From b491eebb18cc430167eff63823fb408e16fc6d5a Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 19 Mar 2026 16:32:30 +0000
Subject: [PATCH 01/17] TransferConsensus()

---
 NAMESPACE                                |   3 +
 R/transfer_consensus.R                   | 566 +++++++++++++++++++++++
 inst/REFERENCES.bib                      |  19 +
 man/TransferConsensus.Rd                 |  68 +++
 tests/testthat/test-transfer_consensus.R |  80 ++++
 5 files changed, 736 insertions(+)
 create mode 100644 R/transfer_consensus.R
 create mode 100644 man/TransferConsensus.Rd
 create mode 100644 tests/testthat/test-transfer_consensus.R

diff --git a/NAMESPACE b/NAMESPACE
index 640491ff7..90a22ab81 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -140,6 +140,7 @@ export(StrainCol)
 export(SumOfRanges)
 export(SumOfVariances)
 export(SumOfVars)
+export(TransferConsensus)
 export(TreeDistPlot)
 export(TreeDistance)
 export(TreesConsistentWithTwoSplits)
@@ -151,6 +152,7 @@ export(entropy_int)
 export(is.HPart)
 importFrom(Rdpack,reprompt)
 importFrom(TreeTools,AllAncestors)
+importFrom(TreeTools,Consensus)
 importFrom(TreeTools,DropTip)
 importFrom(TreeTools,FirstMatchingSplit)
 importFrom(TreeTools,KeepTip)
@@ -177,6 +179,7 @@ importFrom(TreeTools,RootOnNode)
 importFrom(TreeTools,RootTree)
 importFrom(TreeTools,SplitInformation)
 importFrom(TreeTools,SplitsInBinaryTree)
+importFrom(TreeTools,StarTree)
 importFrom(TreeTools,TipLabels)
 importFrom(TreeTools,TipsInSplits)
 importFrom(TreeTools,TreeIsRooted)
diff --git a/R/transfer_consensus.R b/R/transfer_consensus.R
new file mode 100644
index 000000000..bd1d9f65e
--- /dev/null
+++ b/R/transfer_consensus.R
@@ -0,0 +1,566 @@
+#' Consensus tree minimizing transfer distance
+#'
+#' Construct a consensus tree that minimizes the sum of transfer distances
+#' to a set of input trees, using a greedy add-and-prune heuristic.
+#'
+#' Unlike the majority-rule consensus, which minimizes Robinson-Foulds
+#' distance and can be highly unresolved when phylogenetic signal is low,
+#' `TransferConsensus()` uses the finer-grained transfer distance
+#' \insertCite{Lemoine2018}{TreeDist} to construct a more resolved consensus
+#' tree.
+#'
+#' The algorithm pools all splits observed across input trees, computes
+#' pairwise transfer distances between them, and greedily adds or removes
+#' splits to minimize total transfer dissimilarity cost.  The approach
+#' follows \insertCite{Takazawa2025;textual}{TreeDist}, reimplemented for
+#' TreeDist's infrastructure.
+#'
+#' @param trees An object of class `multiPhylo`: the input trees.
+#'   All trees must share the same tip labels.
+#' @param scale Logical; if `TRUE` (default), use the scaled transfer
+#'   distance (normalized by light-side size minus one).  If `FALSE`, use
+#'   the unscaled (raw Hamming) transfer distance.
+#' @param greedy Character string specifying the greedy strategy:
+#'   `"best"` (default) picks the single highest-benefit action at each step;
+#'   `"first"` picks the first improving action encountered (faster but
+#'   potentially lower quality).
+#' @param init Character string specifying the initial consensus:
+#'   `"empty"` (default) starts with no splits (purely additive);
+#'   `"majority"` starts with the majority-rule consensus and refines.
+#'
+#' @return A tree of class `phylo`.
+#'
+#' @references
+#' \insertAllCited{}
+#'
+#' @examples
+#' library(TreeTools)
+#' # Generate bootstrap-like trees
+#' trees <- as.phylo(1:20, nTip = 12)
+#'
+#' # Transfer consensus (more resolved than majority rule)
+#' tc <- TransferConsensus(trees)
+#' plot(tc)
+#'
+#' # Compare resolution
+#' mr <- Consensus(trees, p = 0.5)
+#' cat("Majority-rule splits:", NSplits(mr), "\n")
+#' cat("Transfer consensus splits:", NSplits(tc), "\n")
+#'
+#' @importFrom TreeTools as.Splits TipLabels NSplits Consensus StarTree
+#' @export
+TransferConsensus <- function(trees,
+                              scale = TRUE,
+                              greedy = c("best", "first"),
+                              init = c("empty", "majority")) {
+  greedy <- match.arg(greedy)
+  init <- match.arg(init)
+
+  if (!inherits(trees, "multiPhylo")) {
+    stop("`trees` must be an object of class 'multiPhylo'.")
+  }
+  nTree <- length(trees)
+  if (nTree < 2L) stop("Need at least 2 trees.")
+  tipLabels <- TipLabels(trees[[1]])
+  nTip <- length(tipLabels)
+
+  splitInfo <- .PoolSplits(trees, tipLabels)
+  splitMat   <- splitInfo$splits     # integer matrix: nSplits x nTip (logical)
+  counts     <- splitInfo$counts
+  lightSide  <- splitInfo$lightSide
+  treeMembers <- splitInfo$treeMembers
+  nSplits <- nrow(splitMat)
+
+  # Pairwise transfer distance matrix (integer, nSplits x nSplits)
+  DIST <- .TransferDistMat(splitMat, nTip)
+
+  # Augment with sentinel column/row (distance to leaf bipartitions)
+  # sentinel index = nSplits + 1
+  sentDist <- lightSide - 1L  # length nSplits
+
+  # Transfer dissimilarity cost (FP cost of including each split)
+  TD <- .ComputeTD(DIST, sentDist, treeMembers, lightSide, nTree, scale)
+
+  # Compatibility matrix
+  compat <- .CompatMat(splitMat, nTip)
+
+  # Mutable state
+  st <- new.env(parent = emptyenv())
+  st$MATCH  <- rep(NA_integer_, nSplits)  # NA = sentinel
+
+  st$MATCH2 <- rep(NA_integer_, nSplits)
+  st$incl   <- logical(nSplits)
+
+  if (init == "majority") {
+    majIdx <- which(counts > nTree / 2)
+    if (length(majIdx)) {
+      for (idx in majIdx) st$incl[idx] <- TRUE
+      .InitMatches(st, DIST, sentDist, lightSide, scale)
+    }
+  }
+
+  sortOrd <- order(counts, decreasing = TRUE)
+
+  if (greedy == "best") {
+    .GreedyBest(st, DIST, sentDist, TD, counts, lightSide, compat,
+                sortOrd, scale, nSplits, nTip)
+  } else {
+    .GreedyFirst(st, DIST, sentDist, TD, counts, lightSide, compat,
+                 sortOrd, scale, nSplits, nTip)
+  }
+
+  .SplitsToPhylo(splitInfo$rawSplits, st$incl, tipLabels, nTip)
+}
+
+
+# ===========================================================================
+# Internal helpers
+# ===========================================================================
+
+# popcount lookup for 0:255
+.POPCOUNT <- as.integer(sapply(0:255, function(x) sum(as.integer(intToBits(x)))))
+
+#' Pool unique splits, returning an integer (logical) matrix
+#' @noRd
+.PoolSplits <- function(trees, tipLabels) {
+  nTip <- length(tipLabels)
+  nTree <- length(trees)
+
+  # Collect all splits as logical matrices and hash to unique set
+  env <- new.env(hash = TRUE, parent = emptyenv())
+  splitsList <- list()
+  rawSplitsList <- list()
+  countVec <- integer(0)
+  treeMembers <- vector("list", nTree)
+  nextIdx <- 1L
+
+  for (i in seq_len(nTree)) {
+    sp <- as.Splits(trees[[i]], tipLabels)
+    logMat <- as.logical(sp)  # matrix: nSplits x nTip
+    # The Splits object is a raw matrix internally; preserve structure
+    rawMat <- unclass(sp)
+    if (is.null(dim(logMat))) {
+      logMat <- matrix(logMat, nrow = 1)
+      rawMat <- matrix(rawMat, nrow = 1)
+    }
+    nSp <- nrow(logMat)
+    # Canonicalise: ensure tip 1 is FALSE
+    nSp <- nrow(logMat)
+    members <- integer(nSp)
+    for (j in seq_len(nSp)) {
+      row <- logMat[j, ]
+      rawRow <- rawMat[j, ]
+      if (row[1]) {
+        row <- !row
+        rawRow <- .FlipRaw(rawRow, nTip)
+      }
+      key <- paste0(which(row), collapse = ",")
+      idx <- env[[key]]
+      if (is.null(idx)) {
+        env[[key]] <- nextIdx
+        splitsList[[nextIdx]] <- row
+        rawSplitsList[[nextIdx]] <- rawRow
+        countVec[nextIdx] <- 1L
+        members[j] <- nextIdx
+        nextIdx <- nextIdx + 1L
+      } else {
+        countVec[idx] <- countVec[idx] + 1L
+        members[j] <- idx
+      }
+    }
+    treeMembers[[i]] <- unique(members)
+  }
+
+  nSplits <- length(splitsList)
+  splitMat <- matrix(FALSE, nrow = nSplits, ncol = nTip)
+  nBytes <- length(rawSplitsList[[1]])
+  rawMat <- matrix(as.raw(0), nrow = nSplits, ncol = nBytes)
+  for (k in seq_len(nSplits)) {
+    splitMat[k, ] <- splitsList[[k]]
+    rawMat[k, ] <- rawSplitsList[[k]]
+  }
+
+  lightSide <- pmin(rowSums(splitMat), nTip - rowSums(splitMat))
+
+  list(
+    splits = splitMat,         # logical matrix nSplits x nTip
+    rawSplits = rawMat,        # raw matrix nSplits x nBytes
+    counts = countVec,
+    lightSide = as.integer(lightSide),
+    treeMembers = treeMembers
+  )
+}
+
+
+.FlipRaw <- function(rawVec, nTip) {
+  nBytes <- length(rawVec)
+  usedBits <- ((nTip - 1L) %% 8L) + 1L
+  lastMask <- as.raw(sum(2^(0:(usedBits - 1L))))
+  out <- xor(rawVec, as.raw(0xff))
+  out[nBytes] <- out[nBytes] & lastMask
+  out
+}
+
+
+#' Pairwise transfer distance matrix using logical split matrix
+#' Transfer distance = min(hamming(a, b), n - hamming(a, b))
+#' hamming(a,b) when both are logical = sum(xor(a,b))
+#' @noRd
+.TransferDistMat <- function(splitMat, nTip) {
+  # splitMat is logical: nSplits x nTip
+  # hamming = number of differing positions
+  # Use tcrossprod trick: hamming(a,b) = sum(a) + sum(b) - 2*sum(a&b)
+  # = nAgreeing... actually let's just compute XOR directly.
+  # Faster: hamming = nTip - 2 * (a %*% t(b) + (1-a) %*% t(1-b))
+  #       = nTip - 2 * (a %*% t(b) + (nTip - rowSums(a) - colSums(b) + a %*% t(b)))
+  # Simpler: agreement = a %*% t(b) + (1-a) %*% t(1-b)
+  #                    = 2 * (a %*% t(b)) - rowSums(a) - rep(rowSums(b)) + nTip
+  # hamming = nTip - agreement
+  sm <- splitMat + 0L  # convert to integer
+  ab <- tcrossprod(sm)  # sm %*% t(sm)
+  rs <- rowSums(sm)
+  hamming <- nTip - 2L * ab + outer(rs, rs, "+") - nTip
+  # Simplifies to: hamming = outer(rs, rs, "+") - 2 * ab
+  # Wait, let me re-derive:
+  # agreement_ij = sum(a_i == b_j) = sum(a&b) + sum(!a & !b)
+  #              = ab[i,j] + (nTip - rs[i] - rs[j] + ab[i,j])
+  #              = 2*ab[i,j] + nTip - rs[i] - rs[j]
+  # hamming_ij = nTip - agreement_ij = rs[i] + rs[j] - 2*ab[i,j]
+  hamming <- outer(rs, rs, "+") - 2L * ab
+
+  # Transfer distance = min(hamming, nTip - hamming)
+  pmin(hamming, nTip - hamming)
+}
+
+
+#' Compute TD (transfer dissimilarity cost) for each split
+#' @noRd
+.ComputeTD <- function(DIST, sentDist, treeMembers, lightSide, nTree, scale) {
+  nSplits <- nrow(DIST)
+  TD <- numeric(nSplits)
+  pMinus1 <- lightSide - 1L
+
+  for (i in seq_len(nTree)) {
+    idx <- treeMembers[[i]]
+    # For each split b, min distance to any split in tree i
+    if (length(idx) == 1L) {
+      minD <- DIST[, idx]
+    } else {
+      minD <- apply(DIST[, idx, drop = FALSE], 1, min)
+    }
+    # Also consider sentinel distance
+    minD <- pmin(minD, sentDist)
+    if (scale) {
+      TD <- TD + pmin(minD / pMinus1, 1)
+    } else {
+      TD <- TD + pmin(minD, pMinus1)
+    }
+  }
+  TD
+}
+
+
+#' Compatibility matrix via vectorized bipartition check
+#' @noRd
+.CompatMat <- function(splitMat, nTip) {
+  # Two splits compatible iff one of the 4 intersections (A&B, A&~B, ~A&B, ~A&~B) is empty
+  sm <- splitMat + 0L  # integer 0/1 matrix
+  notSm <- 1L - sm
+
+  # A&B: a[i,] & b[j,] -> any nonzero -> tcrossprod(sm, sm) > 0
+  ab   <- tcrossprod(sm, sm) > 0L
+  anb  <- tcrossprod(sm, notSm) > 0L
+  nab  <- tcrossprod(notSm, sm) > 0L
+  nanb <- tcrossprod(notSm, notSm) > 0L
+
+  # Compatible if at least one intersection is empty
+  !ab | !anb | !nab | !nanb
+}
+
+
+#' Initialize MATCH/MATCH2 from currently included splits
+#' @noRd
+.InitMatches <- function(st, DIST, sentDist, lightSide, scale) {
+  nSplits <- length(st$MATCH)
+  incIdx <- which(st$incl)
+  pMinus1 <- lightSide - 1L
+
+  if (length(incIdx) == 0L) return(invisible())
+
+  # For each split b, find 1st and 2nd closest among included
+  subDIST <- DIST[, incIdx, drop = FALSE]
+  for (b in seq_len(nSplits)) {
+    dists <- subDIST[b, ]
+    ord <- order(dists)
+    bestDist <- dists[ord[1]]
+    thresh <- pMinus1[b]
+    if (scale && bestDist / thresh >= 1) next
+    if (!scale && bestDist >= thresh) next
+    st$MATCH[b] <- incIdx[ord[1]]
+    if (length(ord) > 1L) {
+      secDist <- dists[ord[2]]
+      if (scale && secDist / thresh < 1) {
+        st$MATCH2[b] <- incIdx[ord[2]]
+      } else if (!scale && secDist < thresh) {
+        st$MATCH2[b] <- incIdx[ord[2]]
+      }
+    }
+  }
+}
+
+
+#' Get distance from split b to its match (NA = sentinel)
+#' @noRd
+.Dist <- function(b, idx, DIST, sentDist) {
+  if (is.na(idx)) sentDist[b] else DIST[b, idx]
+}
+
+
+#' Vectorized add-benefit: returns benefit for each candidate
+#' @noRd
+.AddBenefitVec <- function(candidates, st, DIST, sentDist, TD, counts,
+                           lightSide, scale) {
+  nSplits <- length(st$MATCH)
+  pMinus1 <- lightSide - 1L
+
+  # Current match distances for all splits
+  matchDist <- ifelse(is.na(st$MATCH), sentDist, .DiagDist(st$MATCH, DIST, sentDist))
+
+  benefits <- numeric(length(candidates))
+  for (ci in seq_along(candidates)) {
+    c <- candidates[ci]
+    newDist <- DIST[, c]
+    if (scale) {
+      diff <- (matchDist - newDist) / pMinus1
+    } else {
+      diff <- matchDist - newDist
+    }
+    diff[diff < 0] <- 0
+    benefits[ci] <- sum(diff * counts) - TD[c]
+  }
+  benefits
+}
+
+#' Helper: get DIST\[b, MATCH\[b\]\] for all b, vectorized
+#' @noRd
+.DiagDist <- function(matchVec, DIST, sentDist) {
+  n <- length(matchVec)
+  out <- numeric(n)
+  notNA <- !is.na(matchVec)
+  if (any(notNA)) {
+    out[notNA] <- DIST[cbind(which(notNA), matchVec[notNA])]
+  }
+  out[!notNA] <- sentDist[!notNA]
+  out
+}
+
+
+#' Vectorized remove-benefit
+#' @noRd
+.RemoveBenefitVec <- function(candidates, st, DIST, sentDist, TD, counts,
+                              lightSide, scale) {
+  nSplits <- length(st$MATCH)
+  pMinus1 <- lightSide - 1L
+
+  # For remove, only splits whose MATCH == candidate are affected
+  benefits <- numeric(length(candidates))
+  matchDist <- .DiagDist(st$MATCH, DIST, sentDist)
+  match2Dist <- .DiagDist(st$MATCH2, DIST, sentDist)
+
+  for (ci in seq_along(candidates)) {
+    c <- candidates[ci]
+    affected <- st$MATCH == c & !is.na(st$MATCH)
+    if (any(affected)) {
+      if (scale) {
+        fn_cost <- sum((DIST[affected, c] - match2Dist[affected]) /
+                         pMinus1[affected] * counts[affected])
+      } else {
+        fn_cost <- sum((DIST[affected, c] - match2Dist[affected]) *
+                         counts[affected])
+      }
+    } else {
+      fn_cost <- 0
+    }
+    benefits[ci] <- TD[c] + fn_cost
+  }
+  benefits
+}
+
+
+.DoAdd <- function(branchIdx, st, DIST, sentDist) {
+  st$incl[branchIdx] <- TRUE
+  nSplits <- length(st$MATCH)
+
+  curMatchDist <- .DiagDist(st$MATCH, DIST, sentDist)
+  newDist <- DIST[, branchIdx]
+
+  better <- newDist < curMatchDist
+  if (any(better)) {
+    st$MATCH2[better] <- st$MATCH[better]
+    st$MATCH[better] <- branchIdx
+  }
+
+  # Check if it becomes second match for others
+  notBetter <- !better
+  secDist <- .DiagDist(st$MATCH2, DIST, sentDist)
+  betterSec <- notBetter & (newDist < secDist)
+  if (any(betterSec)) {
+    st$MATCH2[betterSec] <- branchIdx
+  }
+}
+
+
+.DoRemove <- function(branchIdx, st, DIST, sentDist, lightSide, scale) {
+  st$incl[branchIdx] <- FALSE
+  nSplits <- length(st$MATCH)
+  curInc <- which(st$incl)
+  pMinus1 <- lightSide - 1L
+
+  # Splits whose first match was branchIdx
+  affected1 <- which(st$MATCH == branchIdx & !is.na(st$MATCH))
+  if (length(affected1)) {
+    st$MATCH[affected1] <- st$MATCH2[affected1]
+    # Find new second match for these
+    for (b in affected1) {
+      st$MATCH2[b] <- .FindSecond(b, st$MATCH[b], curInc, DIST,
+                                  pMinus1, scale)
+    }
+  }
+
+  # Splits whose second match was branchIdx
+  affected2 <- which(st$MATCH2 == branchIdx & !is.na(st$MATCH2))
+  if (length(affected2)) {
+    for (b in affected2) {
+      st$MATCH2[b] <- .FindSecond(b, st$MATCH[b], curInc, DIST,
+                                  pMinus1, scale)
+    }
+  }
+}
+
+
+.FindSecond <- function(b, matchIdx, curInc, DIST, pMinus1, scale) {
+  cands <- if (is.na(matchIdx)) curInc else setdiff(curInc, matchIdx)
+  if (length(cands) == 0L) return(NA_integer_)
+  dists <- DIST[b, cands]
+  best <- cands[which.min(dists)]
+  bestDist <- DIST[b, best]
+  if (scale && bestDist / pMinus1[b] >= 1) return(NA_integer_)
+  if (!scale && bestDist >= pMinus1[b]) return(NA_integer_)
+  best
+}
+
+
+.IsCompat <- function(idx, incl, compat, nTip) {
+  incIdx <- which(incl)
+  if (length(incIdx) == 0L) return(TRUE)
+  if (length(incIdx) >= nTip - 3L) return(FALSE)
+  all(compat[idx, incIdx])
+}
+
+
+.GreedyBest <- function(st, DIST, sentDist, TD, counts, lightSide,
+                        compat, sortOrd, scale, nSplits, nTip) {
+  repeat {
+    # Evaluate all candidates
+    addCands <- integer(0)
+    remCands <- integer(0)
+    for (idx in sortOrd) {
+      if (st$incl[idx]) {
+        remCands <- c(remCands, idx)
+      } else if (.IsCompat(idx, st$incl, compat, nTip)) {
+        addCands <- c(addCands, idx)
+      }
+    }
+
+    bestBen <- 0
+    bestIdx <- 0L
+    bestAction <- ""
+
+    if (length(addCands)) {
+      addBen <- .AddBenefitVec(addCands, st, DIST, sentDist, TD, counts,
+                               lightSide, scale)
+      mx <- max(addBen)
+      if (mx > bestBen) {
+        bestBen <- mx
+        bestIdx <- addCands[which.max(addBen)]
+        bestAction <- "add"
+      }
+    }
+    if (length(remCands)) {
+      remBen <- .RemoveBenefitVec(remCands, st, DIST, sentDist, TD, counts,
+                                  lightSide, scale)
+      mx <- max(remBen)
+      if (mx > bestBen) {
+        bestBen <- mx
+        bestIdx <- remCands[which.max(remBen)]
+        bestAction <- "remove"
+      }
+    }
+
+    if (bestBen <= 0) break
+    if (bestAction == "add") {
+      .DoAdd(bestIdx, st, DIST, sentDist)
+    } else {
+      .DoRemove(bestIdx, st, DIST, sentDist, lightSide, scale)
+    }
+  }
+}
+
+
+.GreedyFirst <- function(st, DIST, sentDist, TD, counts, lightSide,
+                         compat, sortOrd, scale, nSplits, nTip) {
+  improving <- TRUE
+  while (improving) {
+    improving <- FALSE
+    matchDist <- .DiagDist(st$MATCH, DIST, sentDist)
+    match2Dist <- .DiagDist(st$MATCH2, DIST, sentDist)
+    pMinus1 <- lightSide - 1L
+
+    for (idx in sortOrd) {
+      if (st$incl[idx]) {
+        # Quick remove benefit
+        affected <- st$MATCH == idx & !is.na(st$MATCH)
+        if (any(affected)) {
+          if (scale) {
+            fn <- sum((DIST[affected, idx] - match2Dist[affected]) /
+                        pMinus1[affected] * counts[affected])
+          } else {
+            fn <- sum((DIST[affected, idx] - match2Dist[affected]) *
+                        counts[affected])
+          }
+        } else {
+          fn <- 0
+        }
+        if (TD[idx] + fn > 0) {
+          .DoRemove(idx, st, DIST, sentDist, lightSide, scale)
+          improving <- TRUE
+          break
+        }
+      } else if (.IsCompat(idx, st$incl, compat, nTip)) {
+        newDist <- DIST[, idx]
+        if (scale) {
+          diff <- pmax((matchDist - newDist) / pMinus1, 0)
+        } else {
+          diff <- pmax(matchDist - newDist, 0)
+        }
+        if (sum(diff * counts) - TD[idx] > 0) {
+          .DoAdd(idx, st, DIST, sentDist)
+          improving <- TRUE
+          break
+        }
+      }
+    }
+  }
+}
+
+
+.SplitsToPhylo <- function(rawSplits, included, tipLabels, nTip) {
+  selectedIdx <- which(included)
+  if (length(selectedIdx) == 0L) {
+    return(TreeTools::StarTree(tipLabels))
+  }
+  selectedRaw <- rawSplits[selectedIdx, , drop = FALSE]
+  sp <- structure(selectedRaw, nTip = nTip, tip.label = tipLabels,
+                  class = "Splits")
+  as.phylo(sp)
+}
diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib
index 6614f6450..9cb926b7d 100644
--- a/inst/REFERENCES.bib
+++ b/inst/REFERENCES.bib
@@ -1,3 +1,22 @@
+@article{Lemoine2018,
+  author = {Lemoine, Fr{\'e}d{\'e}ric and Domelevo Entfellner, Jean-Baka and Wilkinson, Eduan and Correia, Daniela and D{\'a}vila Felipe, Miraine and De Oliveira, Tulio and Gascuel, Olivier},
+  title = {{Renewing Felsenstein's phylogenetic bootstrap in the era of big data}},
+  journal = {Nature},
+  year = {2018},
+  volume = {556},
+  number = {7702},
+  pages = {452--456},
+  doi = {10.1038/s41586-018-0043-0}
+}
+
+@article{Takazawa2025,
+  author = {Takazawa, Yuuki and Takeda, Akiko and Hayamizu, Momoko and Gascuel, Olivier},
+  title = {{Outperforming the majority-rule consensus tree using fine-grained dissimilarity measures}},
+  journal = {bioRxiv},
+  year = {2025},
+  doi = {10.64898/2026.03.16.712085}
+}
+
 @article{Allen2001,
   author = {Allen, Benjamin L. and Steel, Mike A.},
   doi = {10.1007/s00026-001-8006-8},
diff --git a/man/TransferConsensus.Rd b/man/TransferConsensus.Rd
new file mode 100644
index 000000000..9f69a55da
--- /dev/null
+++ b/man/TransferConsensus.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/transfer_consensus.R
+\name{TransferConsensus}
+\alias{TransferConsensus}
+\title{Consensus tree minimizing transfer distance}
+\usage{
+TransferConsensus(
+  trees,
+  scale = TRUE,
+  greedy = c("best", "first"),
+  init = c("empty", "majority")
+)
+}
+\arguments{
+\item{trees}{An object of class \code{multiPhylo}: the input trees.
+All trees must share the same tip labels.}
+
+\item{scale}{Logical; if \code{TRUE} (default), use the scaled transfer
+distance (normalized by light-side size minus one).  If \code{FALSE}, use
+the unscaled (raw Hamming) transfer distance.}
+
+\item{greedy}{Character string specifying the greedy strategy:
+\code{"best"} (default) picks the single highest-benefit action at each step;
+\code{"first"} picks the first improving action encountered (faster but
+potentially lower quality).}
+
+\item{init}{Character string specifying the initial consensus:
+\code{"empty"} (default) starts with no splits (purely additive);
+\code{"majority"} starts with the majority-rule consensus and refines.}
+}
+\value{
+A tree of class \code{phylo}.
+}
+\description{
+Construct a consensus tree that minimizes the sum of transfer distances
+to a set of input trees, using a greedy add-and-prune heuristic.
+}
+\details{
+Unlike the majority-rule consensus, which minimizes Robinson-Foulds
+distance and can be highly unresolved when phylogenetic signal is low,
+\code{TransferConsensus()} uses the finer-grained transfer distance
+\insertCite{Lemoine2018}{TreeDist} to construct a more resolved consensus
+tree.
+
+The algorithm pools all splits observed across input trees, computes
+pairwise transfer distances between them, and greedily adds or removes
+splits to minimize total transfer dissimilarity cost.  The approach
+follows \insertCite{Takazawa2025;textual}{TreeDist}, reimplemented for
+TreeDist's infrastructure.
+}
+\examples{
+library(TreeTools)
+# Generate bootstrap-like trees
+trees <- as.phylo(1:20, nTip = 12)
+
+# Transfer consensus (more resolved than majority rule)
+tc <- TransferConsensus(trees)
+plot(tc)
+
+# Compare resolution
+mr <- Consensus(trees, p = 0.5)
+cat("Majority-rule splits:", NSplits(mr), "\n")
+cat("Transfer consensus splits:", NSplits(tc), "\n")
+
+}
+\references{
+\insertAllCited{}
+}
diff --git a/tests/testthat/test-transfer_consensus.R b/tests/testthat/test-transfer_consensus.R
new file mode 100644
index 000000000..9e3d8e353
--- /dev/null
+++ b/tests/testthat/test-transfer_consensus.R
@@ -0,0 +1,80 @@
+test_that("Identical trees return fully resolved consensus", {
+  tree <- as.phylo(0, nTip = 10)
+  trees <- structure(rep(list(tree), 10), class = "multiPhylo")
+
+  tc <- TransferConsensus(trees, greedy = "first")
+  expect_equal(NSplits(tc), NSplits(tree))
+
+  tc_best <- TransferConsensus(trees, greedy = "best")
+  expect_equal(NSplits(tc_best), NSplits(tree))
+})
+
+test_that("Star tree returned when no signal", {
+  set.seed(6129)
+  # Fully random trees with many tips — negligible split overlap
+  trees <- structure(lapply(1:30, function(i) rtree(50)),
+                     class = "multiPhylo")
+  tc <- TransferConsensus(trees, greedy = "first")
+
+  # Should be very unresolved (0 or near-0 splits)
+  expect_lte(NSplits(tc), 5)
+})
+
+test_that("Transfer consensus is at least as resolved as majority-rule for structured trees",
+{
+  # Trees from as.phylo with moderate overlap
+  trees <- as.phylo(0:29, nTip = 20)
+  tc <- TransferConsensus(trees, greedy = "best")
+  mr <- Consensus(trees, p = 0.5)
+
+  # Transfer consensus should be at least as resolved
+  expect_gte(NSplits(tc), NSplits(mr))
+})
+
+test_that("Both greedy strategies produce valid trees", {
+  trees <- as.phylo(1:15, nTip = 12)
+
+  tc_first <- TransferConsensus(trees, greedy = "first")
+  tc_best  <- TransferConsensus(trees, greedy = "best")
+
+  expect_s3_class(tc_first, "phylo")
+  expect_s3_class(tc_best, "phylo")
+  expect_equal(sort(TipLabels(tc_first)), sort(TipLabels(trees[[1]])))
+  expect_equal(sort(TipLabels(tc_best)), sort(TipLabels(trees[[1]])))
+})
+
+test_that("scale = FALSE (unscaled) works", {
+  trees <- as.phylo(1:10, nTip = 10)
+
+  tc_scaled <- TransferConsensus(trees, scale = TRUE)
+  tc_unscaled <- TransferConsensus(trees, scale = FALSE)
+
+  expect_s3_class(tc_scaled, "phylo")
+  expect_s3_class(tc_unscaled, "phylo")
+})
+
+test_that("init = 'majority' works", {
+  trees <- as.phylo(0:19, nTip = 15)
+
+  tc_empty <- TransferConsensus(trees, init = "empty")
+  tc_maj   <- TransferConsensus(trees, init = "majority")
+
+  expect_s3_class(tc_empty, "phylo")
+  expect_s3_class(tc_maj, "phylo")
+  # Both should produce reasonable trees
+  expect_gte(NSplits(tc_empty), 1)
+  expect_gte(NSplits(tc_maj), 1)
+})
+
+test_that("Error on bad input", {
+  expect_error(TransferConsensus(list(rtree(5))), "multiPhylo")
+  expect_error(TransferConsensus(structure(list(rtree(5)), class = "multiPhylo")),
+               "at least 2")
+})
+
+test_that("Two-tree consensus returns a valid tree", {
+  trees <- as.phylo(1:2, nTip = 8)
+  tc <- TransferConsensus(trees)
+  expect_s3_class(tc, "phylo")
+  expect_equal(length(TipLabels(tc)), 8)
+})

From 351e337a80b750a22cf5f22ecc6db4819f65f689 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 19 Mar 2026 20:18:40 +0000
Subject: [PATCH 02/17] TransferConsensus opt

---
 .positai/settings.json     |   4 +
 AGENTS.md                  |  20 +
 R/RcppExports.R            |  21 +
 R/transfer_consensus.R     |  62 +--
 inst/REFERENCES.bib        |   4 +-
 src/RcppExports.cpp        |  33 ++
 src/transfer_consensus.cpp | 840 +++++++++++++++++++++++++++++++++++++
 7 files changed, 939 insertions(+), 45 deletions(-)
 create mode 100644 src/transfer_consensus.cpp

diff --git a/.positai/settings.json b/.positai/settings.json
index 7aba4f95e..627e8f7f2 100644
--- a/.positai/settings.json
+++ b/.positai/settings.json
@@ -28,6 +28,10 @@
       "rm -f src/*.o src/*.dll": "allow",
       "tail *": "allow",
       "wc *": "allow"
+    },
+    "external_directory": {
+      "../Quartet/*": "allow",
+      "../TreeTools/*": "allow"
     }
   },
   "model": {
diff --git a/AGENTS.md b/AGENTS.md
index 7ed0028b9..6208160a7 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -271,6 +271,26 @@ covr::file_coverage(cov, "src/pairwise_distances.cpp")  # per-file summary
 Aim for full line coverage of new C++ and R code.  If a new code path is not
 exercised by the existing test suite, add targeted tests in `tests/testthat/`.
 
+### ⚠ Exploratory / risky R code — use a subprocess
+
+When running experimental R code that may be slow, allocate large objects,
+or involve complex loops (e.g., hill-climbing over tree space, brute-force
+evaluation of many candidate trees), **run it in a subprocess** rather than
+the interactive RStudio session.  Long-running or memory-heavy computations
+in the main session can freeze or crash RStudio.
+
+```r
+# Write the experiment to a temp script, then run via Rscript:
+writeLines(code, tmp <- tempfile(fileext = ".R"))
+system2("Rscript", tmp, stdout = TRUE, stderr = TRUE)
+
+# Or use callr for structured subprocess execution:
+callr::r(function() { ... }, timeout = 120)
+```
+
+This applies especially to prototype algorithm exploration (e.g., CID
+hill-climbing over split space) where per-iteration cost is uncertain.
+
 ---
 
 ## Completed Optimizations (this dev cycle)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 140dd0e98..da2b7b5ee 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -200,6 +200,27 @@ spr_table_7 <- function(sp1, sp2) {
     .Call(`_TreeDist_spr_table_7`, sp1, sp2)
 }
 
+#' Transfer consensus (C++ implementation)
+#'
+#' @param splits_list List of raw matrices (one per tree), each from as.Splits().
+#' @param n_tip Number of tips.
+#' @param scale Logical: use scaled transfer distance?
+#' @param greedy_best Logical: TRUE for "best", FALSE for "first".
+#' @param init_majority Logical: TRUE to start from majority-rule splits.
+#'
+#' @return A LogicalVector of length n_splits indicating which pooled splits
+#'   are included in the consensus, plus attributes "raw_splits" (a raw matrix
+#'   of all unique splits) and "light_side" (integer vector).
+#' @keywords internal
+cpp_transfer_consensus <- function(splits_list, n_tip, scale, greedy_best_flag, init_majority) {
+    .Call(`_TreeDist_cpp_transfer_consensus`, splits_list, n_tip, scale, greedy_best_flag, init_majority)
+}
+
+#' @keywords internal
+cpp_tc_profile <- function(splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter) {
+    .Call(`_TreeDist_cpp_tc_profile`, splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter)
+}
+
 cpp_robinson_foulds_distance <- function(x, y, nTip) {
     .Call(`_TreeDist_cpp_robinson_foulds_distance`, x, y, nTip)
 }
diff --git a/R/transfer_consensus.R b/R/transfer_consensus.R
index bd1d9f65e..c710d3c35 100644
--- a/R/transfer_consensus.R
+++ b/R/transfer_consensus.R
@@ -64,52 +64,28 @@ TransferConsensus <- function(trees,
   tipLabels <- TipLabels(trees[[1]])
   nTip <- length(tipLabels)
 
-  splitInfo <- .PoolSplits(trees, tipLabels)
-  splitMat   <- splitInfo$splits     # integer matrix: nSplits x nTip (logical)
-  counts     <- splitInfo$counts
-  lightSide  <- splitInfo$lightSide
-  treeMembers <- splitInfo$treeMembers
-  nSplits <- nrow(splitMat)
-
-  # Pairwise transfer distance matrix (integer, nSplits x nSplits)
-  DIST <- .TransferDistMat(splitMat, nTip)
-
-  # Augment with sentinel column/row (distance to leaf bipartitions)
-  # sentinel index = nSplits + 1
-  sentDist <- lightSide - 1L  # length nSplits
-
-  # Transfer dissimilarity cost (FP cost of including each split)
-  TD <- .ComputeTD(DIST, sentDist, treeMembers, lightSide, nTree, scale)
-
-  # Compatibility matrix
-  compat <- .CompatMat(splitMat, nTip)
-
-  # Mutable state
-  st <- new.env(parent = emptyenv())
-  st$MATCH  <- rep(NA_integer_, nSplits)  # NA = sentinel
-
-  st$MATCH2 <- rep(NA_integer_, nSplits)
-  st$incl   <- logical(nSplits)
-
-  if (init == "majority") {
-    majIdx <- which(counts > nTree / 2)
-    if (length(majIdx)) {
-      for (idx in majIdx) st$incl[idx] <- TRUE
-      .InitMatches(st, DIST, sentDist, lightSide, scale)
-    }
-  }
-
-  sortOrd <- order(counts, decreasing = TRUE)
+  # Convert each tree to a raw split matrix (TreeTools C++ internally)
+  splitsList <- lapply(trees, function(tr) {
+    sp <- as.Splits(tr, tipLabels)
+    unclass(sp)
+  })
+
+  # Delegate all work to C++
+  res <- cpp_transfer_consensus(
+    splitsList, nTip, scale,
+    greedy_best_flag = (greedy == "best"),
+    init_majority = (init == "majority")
+  )
 
-  if (greedy == "best") {
-    .GreedyBest(st, DIST, sentDist, TD, counts, lightSide, compat,
-                sortOrd, scale, nSplits, nTip)
-  } else {
-    .GreedyFirst(st, DIST, sentDist, TD, counts, lightSide, compat,
-                 sortOrd, scale, nSplits, nTip)
+  included <- res$included
+  if (!any(included)) {
+    return(StarTree(tipLabels))
   }
 
-  .SplitsToPhylo(splitInfo$rawSplits, st$incl, tipLabels, nTip)
+  rawSplits <- res$raw_splits[included, , drop = FALSE]
+  sp <- structure(rawSplits, nTip = nTip, tip.label = tipLabels,
+                  class = "Splits")
+  as.phylo(sp)
 }
 
 
diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib
index 9cb926b7d..f2f400a3f 100644
--- a/inst/REFERENCES.bib
+++ b/inst/REFERENCES.bib
@@ -9,11 +9,11 @@ @article{Lemoine2018
   doi = {10.1038/s41586-018-0043-0}
 }
 
-@article{Takazawa2025,
+@article{Takazawa2026,
   author = {Takazawa, Yuuki and Takeda, Akiko and Hayamizu, Momoko and Gascuel, Olivier},
   title = {{Outperforming the majority-rule consensus tree using fine-grained dissimilarity measures}},
   journal = {bioRxiv},
-  year = {2025},
+  year = {2026},
   doi = {10.64898/2026.03.16.712085}
 }
 
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 443df8c17..90346a5e5 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -522,6 +522,37 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// cpp_transfer_consensus
+List cpp_transfer_consensus(const List& splits_list, const int n_tip, const bool scale, const bool greedy_best_flag, const bool init_majority);
+RcppExport SEXP _TreeDist_cpp_transfer_consensus(SEXP splits_listSEXP, SEXP n_tipSEXP, SEXP scaleSEXP, SEXP greedy_best_flagSEXP, SEXP init_majoritySEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const List& >::type splits_list(splits_listSEXP);
+    Rcpp::traits::input_parameter< const int >::type n_tip(n_tipSEXP);
+    Rcpp::traits::input_parameter< const bool >::type scale(scaleSEXP);
+    Rcpp::traits::input_parameter< const bool >::type greedy_best_flag(greedy_best_flagSEXP);
+    Rcpp::traits::input_parameter< const bool >::type init_majority(init_majoritySEXP);
+    rcpp_result_gen = Rcpp::wrap(cpp_transfer_consensus(splits_list, n_tip, scale, greedy_best_flag, init_majority));
+    return rcpp_result_gen;
+END_RCPP
+}
+// cpp_tc_profile
+Rcpp::NumericVector cpp_tc_profile(const List& splits_list, const int n_tip, const bool scale, const bool greedy_best_flag, const bool init_majority, const int n_iter);
+RcppExport SEXP _TreeDist_cpp_tc_profile(SEXP splits_listSEXP, SEXP n_tipSEXP, SEXP scaleSEXP, SEXP greedy_best_flagSEXP, SEXP init_majoritySEXP, SEXP n_iterSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const List& >::type splits_list(splits_listSEXP);
+    Rcpp::traits::input_parameter< const int >::type n_tip(n_tipSEXP);
+    Rcpp::traits::input_parameter< const bool >::type scale(scaleSEXP);
+    Rcpp::traits::input_parameter< const bool >::type greedy_best_flag(greedy_best_flagSEXP);
+    Rcpp::traits::input_parameter< const bool >::type init_majority(init_majoritySEXP);
+    Rcpp::traits::input_parameter< const int >::type n_iter(n_iterSEXP);
+    rcpp_result_gen = Rcpp::wrap(cpp_tc_profile(splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter));
+    return rcpp_result_gen;
+END_RCPP
+}
 // cpp_robinson_foulds_distance
 List cpp_robinson_foulds_distance(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip);
 RcppExport SEXP _TreeDist_cpp_robinson_foulds_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) {
@@ -658,6 +689,8 @@ static const R_CallMethodDef CallEntries[] = {
     {"_TreeDist_keep_and_reroot", (DL_FUNC) &_TreeDist_keep_and_reroot, 3},
     {"_TreeDist_keep_and_reduce", (DL_FUNC) &_TreeDist_keep_and_reduce, 3},
     {"_TreeDist_spr_table_7", (DL_FUNC) &_TreeDist_spr_table_7, 2},
+    {"_TreeDist_cpp_transfer_consensus", (DL_FUNC) &_TreeDist_cpp_transfer_consensus, 5},
+    {"_TreeDist_cpp_tc_profile", (DL_FUNC) &_TreeDist_cpp_tc_profile, 6},
     {"_TreeDist_cpp_robinson_foulds_distance", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_distance, 3},
     {"_TreeDist_cpp_robinson_foulds_info", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_info, 3},
     {"_TreeDist_cpp_matching_split_distance", (DL_FUNC) &_TreeDist_cpp_matching_split_distance, 3},
diff --git a/src/transfer_consensus.cpp b/src/transfer_consensus.cpp
new file mode 100644
index 000000000..03f4f6bfe
--- /dev/null
+++ b/src/transfer_consensus.cpp
@@ -0,0 +1,840 @@
+/* transfer_consensus.cpp
+ *
+ * C++ implementation of the Transfer Consensus algorithm (Takazawa 2025).
+ * Replaces the R-level .PoolSplits, .TransferDistMat, .ComputeTD,
+ * .CompatMat, and .GreedyBest/.GreedyFirst helpers.
+ *
+ * Exported function: cpp_transfer_consensus()
+ */
+
+#include <TreeTools/SplitList.h>
+#include <Rcpp/Lightest>
+#include <algorithm>
+#include <chrono>
+#include <cstdint>
+#include <functional>
+#include <numeric>
+#include <unordered_map>
+#include <vector>
+
+using Rcpp::IntegerVector;
+using Rcpp::List;
+using Rcpp::LogicalVector;
+using Rcpp::RawMatrix;
+using TreeTools::count_bits;
+using TreeTools::SplitList;
+
+
+// ============================================================================
+// Types
+// ============================================================================
+
+using int16 = int_fast16_t;
+using int32 = int_fast32_t;
+
+// ============================================================================
+// Pooled split representation
+// ============================================================================
+
+struct PooledSplits {
+  int n_splits;
+  int n_bins;
+  int n_tips;
+  splitbit last_mask;
+
+  // Flat array of canonical split data: n_splits * n_bins elements.
+  // Split i occupies [i*n_bins .. (i+1)*n_bins).
+  std::vector<splitbit> data;
+
+  // Per-split metadata
+  std::vector<int> count;       // how many trees contain this split
+  std::vector<int> light_side;  // min(popcount, n_tips - popcount)
+
+  // Tree membership: tree_members[t] = list of unique-split indices in tree t
+  std::vector<std::vector<int>> tree_members;
+
+  // Access helpers
+  const splitbit* split(int i) const { return &data[i * n_bins]; }
+  splitbit*       split(int i)       { return &data[i * n_bins]; }
+};
+
+
+// ============================================================================
+// FNV-1a hash for canonical split arrays
+// ============================================================================
+
+struct SplitHash {
+  int n_bins;
+  explicit SplitHash(int nb) : n_bins(nb) {}
+  SplitHash() : n_bins(0) {}
+
+  std::size_t operator()(const splitbit* sp) const {
+    std::size_t h = 14695981039346656037ULL;
+    const unsigned char* p = reinterpret_cast<const unsigned char*>(sp);
+    const int bytes = n_bins * static_cast<int>(sizeof(splitbit));
+    for (int i = 0; i < bytes; ++i) {
+      h ^= static_cast<std::size_t>(p[i]);
+      h *= 1099511628211ULL;
+    }
+    return h;
+  }
+};
+
+struct SplitEqual {
+  int n_bins;
+  explicit SplitEqual(int nb) : n_bins(nb) {}
+  SplitEqual() : n_bins(0) {}
+
+  bool operator()(const splitbit* a, const splitbit* b) const {
+    for (int bin = 0; bin < n_bins; ++bin) {
+      if (a[bin] != b[bin]) return false;
+    }
+    return true;
+  }
+};
+
+// ============================================================================
+// pool_splits: deduplicate and canonicalise all splits from all trees
+// ============================================================================
+
+static PooledSplits pool_splits(const List& splits_list, int n_tips) {
+  const int n_tree = splits_list.size();
+
+  // Determine n_bins from first tree
+  const RawMatrix first_mat = Rcpp::as<RawMatrix>(splits_list[0]);
+  const int n_bytes = first_mat.ncol();
+  const int n_bins = (n_bytes + static_cast<int>(sizeof(splitbit)) - 1)
+    / static_cast<int>(sizeof(splitbit));
+
+  const splitbit last_mask = (n_tips % SL_BIN_SIZE == 0)
+    ? ~splitbit(0)
+    : (splitbit(1) << (n_tips % SL_BIN_SIZE)) - 1;
+
+  // Hash map from canonical split → unique index
+  SplitHash hasher(n_bins);
+  SplitEqual eq(n_bins);
+  std::unordered_map<const splitbit*, int, SplitHash, SplitEqual>
+    split_map(64, hasher, eq);
+
+  // Temp buffer for canonicalisation
+  std::vector<splitbit> canon_buf(n_bins);
+
+  PooledSplits pool;
+  pool.n_tips = n_tips;
+  pool.n_bins = n_bins;
+  pool.last_mask = last_mask;
+  pool.n_splits = 0;
+  pool.tree_members.resize(n_tree);
+
+  for (int t = 0; t < n_tree; ++t) {
+    SplitList sl(Rcpp::as<RawMatrix>(splits_list[t]));
+    std::vector<int>& members = pool.tree_members[t];
+    members.reserve(sl.n_splits);
+
+    for (int16 s = 0; s < sl.n_splits; ++s) {
+      // Copy split bits into canon_buf
+      for (int bin = 0; bin < n_bins; ++bin) {
+        canon_buf[bin] = sl.state[s][bin];
+      }
+      // Mask last bin
+      canon_buf[n_bins - 1] &= last_mask;
+
+      // Canonicalise: if bit 0 is set, flip
+      if (canon_buf[0] & splitbit(1)) {
+        for (int bin = 0; bin < n_bins; ++bin) {
+          canon_buf[bin] = ~canon_buf[bin];
+        }
+        canon_buf[n_bins - 1] &= last_mask;
+      }
+
+      // Look up in map
+      auto it = split_map.find(canon_buf.data());
+      int idx;
+      if (it != split_map.end()) {
+        idx = it->second;
+        pool.count[idx]++;
+      } else {
+        idx = pool.n_splits++;
+        // Append canonical data
+        const size_t old_sz = pool.data.size();
+        pool.data.resize(old_sz + n_bins);
+        std::copy(canon_buf.begin(), canon_buf.end(),
+                  pool.data.begin() + old_sz);
+        pool.count.push_back(1);
+
+        // Compute light_side = min(popcount, n_tips - popcount)
+        int pc = 0;
+        for (int bin = 0; bin < n_bins; ++bin) {
+          pc += count_bits(canon_buf[bin]);
+        }
+        pool.light_side.push_back(std::min(pc, n_tips - pc));
+
+        // Insert pointer into map (points into pool.data)
+        split_map[pool.split(idx)] = idx;
+      }
+
+      // Only record unique splits per tree
+      if (members.empty() || members.back() != idx) {
+        // Check if already present (small set, linear scan ok)
+        bool found = false;
+        for (int m : members) {
+          if (m == idx) { found = true; break; }
+        }
+        if (!found) members.push_back(idx);
+      }
+    }
+  }
+
+  return pool;
+}
+
+
+// ============================================================================
+// transfer_dist_mat: pairwise transfer distance between all unique splits
+// ============================================================================
+
+// Returns a flat n_splits x n_splits matrix (row-major).
+// DIST[i * n + j] = transfer distance between split i and split j.
+static std::vector<int> transfer_dist_mat(const PooledSplits& pool) {
+  const int M = pool.n_splits;
+  const int nb = pool.n_bins;
+  const int nt = pool.n_tips;
+
+  std::vector<int> dist(M * M, 0);
+
+  for (int i = 0; i < M; ++i) {
+    const splitbit* a = pool.split(i);
+    for (int j = i + 1; j < M; ++j) {
+      const splitbit* b = pool.split(j);
+      int hamming = 0;
+      for (int bin = 0; bin < nb; ++bin) {
+        hamming += count_bits(a[bin] ^ b[bin]);
+      }
+      int td = std::min(hamming, nt - hamming);
+      dist[i * M + j] = td;
+      dist[j * M + i] = td;
+    }
+  }
+  return dist;
+}
+
+
+// ============================================================================
+// compute_td: transfer dissimilarity cost for each split
+// ============================================================================
+
+static std::vector<double> compute_td(
+    const std::vector<int>& dist,
+    const PooledSplits& pool,
+    bool scale
+) {
+  const int M = pool.n_splits;
+  const int n_tree = static_cast<int>(pool.tree_members.size());
+
+  std::vector<double> td(M, 0.0);
+
+  for (int t = 0; t < n_tree; ++t) {
+    const auto& members = pool.tree_members[t];
+    const int n_mem = static_cast<int>(members.size());
+
+    for (int b = 0; b < M; ++b) {
+      // Min distance from split b to any split in tree t
+      int min_d = pool.light_side[b] - 1; // sentinel distance
+      for (int k = 0; k < n_mem; ++k) {
+        int d = dist[b * M + members[k]];
+        if (d < min_d) min_d = d;
+      }
+      const int p_minus_1 = pool.light_side[b] - 1;
+      if (p_minus_1 <= 0) continue;
+      if (scale) {
+        double contrib = static_cast<double>(min_d) / p_minus_1;
+        td[b] += (contrib < 1.0) ? contrib : 1.0;
+      } else {
+        td[b] += (min_d < p_minus_1) ? min_d : p_minus_1;
+      }
+    }
+  }
+  return td;
+}
+
+
+// ============================================================================
+// compat_mat: pairwise compatibility between all unique splits
+// ============================================================================
+
+// Returns flat M x M bool matrix.
+// compat[i * M + j] = true iff splits i and j are compatible.
+static std::vector<uint8_t> compat_mat(const PooledSplits& pool) {
+  const int M = pool.n_splits;
+  const int nb = pool.n_bins;
+  const int last_bin = nb - 1;
+  const splitbit lm = pool.last_mask;
+
+  std::vector<uint8_t> compat(M * M, 1);
+
+  for (int i = 0; i < M; ++i) {
+    const splitbit* a = pool.split(i);
+    for (int j = i + 1; j < M; ++j) {
+      const splitbit* b = pool.split(j);
+      bool ab = false, anb = false, nab = false, nanb = false;
+      for (int bin = 0; bin < nb; ++bin) {
+        splitbit mask = (bin == last_bin) ? lm : ~splitbit(0);
+        splitbit a_bin = a[bin] & mask;
+        splitbit b_bin = b[bin] & mask;
+        if (!ab)   ab   = (a_bin & b_bin) != 0;
+        if (!anb)  anb  = (a_bin & ~b_bin & mask) != 0;
+        if (!nab)  nab  = (~a_bin & b_bin & mask) != 0;
+        if (!nanb) nanb = (~a_bin & ~b_bin & mask) != 0;
+        if (ab && anb && nab && nanb) break;
+      }
+      bool comp = !ab || !anb || !nab || !nanb;
+      compat[i * M + j] = comp ? 1 : 0;
+      compat[j * M + i] = comp ? 1 : 0;
+    }
+  }
+  return compat;
+}
+
+
+// ============================================================================
+// Greedy loop helpers
+// ============================================================================
+
+// Sentinel distance for split b (distance to a leaf bipartition)
+inline int sent_dist(int b, const PooledSplits& pool) {
+  return pool.light_side[b] - 1;
+}
+
+// "Effective distance" from split b to its match (or sentinel if match == -1)
+inline int eff_dist(int b, int match_idx, const std::vector<int>& dist,
+                    int M, const PooledSplits& pool) {
+  return (match_idx < 0) ? sent_dist(b, pool) : dist[b * M + match_idx];
+}
+
+// Find second-closest included split to split b (excluding matchIdx).
+// Returns {index, distance} as a pair; index = -1 means sentinel.
+static std::pair<int, int> find_second(
+    int b, int matchIdx,
+    const std::vector<uint8_t>& incl,
+    const std::vector<int>& dist,
+    int M, const PooledSplits& pool,
+    bool scale
+) {
+  int p_minus_1 = sent_dist(b, pool);
+  int best = -1;
+  int best_d = (p_minus_1 > 0) ? p_minus_1 : 0;
+
+  for (int c = 0; c < M; ++c) {
+    if (!incl[c] || c == matchIdx) continue;
+    int d = dist[b * M + c];
+    if (d < best_d) {
+      best_d = d;
+      best = c;
+    }
+  }
+  if (best >= 0) {
+    if (scale) {
+      if (p_minus_1 <= 0 ||
+          static_cast<double>(best_d) / p_minus_1 >= 1.0)
+        return {-1, p_minus_1};
+    } else {
+      if (best_d >= p_minus_1)
+        return {-1, p_minus_1};
+    }
+  }
+  return {best, (best >= 0) ? best_d : p_minus_1};
+}
+
+
+// ============================================================================
+// GreedyState: persistent state for the greedy loop.
+//
+// Maintains match_dist[], match2_dist[], n_incompat[], weight[] to avoid
+// redundant O(M) scans in benefit calculations and compat checks.
+// ============================================================================
+
+struct GreedyState {
+  int M;
+  int n_tip;
+  int n_incl;
+  bool scale;
+
+  std::vector<int>&       match;
+  std::vector<int>&       match2;
+  std::vector<uint8_t>&   incl;
+  const std::vector<int>& dist;
+  const std::vector<double>& td;
+  const PooledSplits&     pool;
+  const std::vector<uint8_t>& compat;
+
+  // Cached per-split arrays
+  std::vector<int>    match_dist;   // dist to match[b], or sentinel
+  std::vector<int>    match2_dist;  // dist to match2[b], or sentinel
+  std::vector<int>    n_incompat;   // # included splits incompatible with b
+  std::vector<double> weight;       // count[b] / (light_side[b] - 1) or count[b]
+
+  GreedyState(
+      std::vector<int>& match_, std::vector<int>& match2_,
+      std::vector<uint8_t>& incl_,
+      const std::vector<int>& dist_, const std::vector<double>& td_,
+      const PooledSplits& pool_, const std::vector<uint8_t>& compat_,
+      bool scale_, int M_, int n_tip_
+  ) : M(M_), n_tip(n_tip_), n_incl(0), scale(scale_),
+      match(match_), match2(match2_), incl(incl_),
+      dist(dist_), td(td_), pool(pool_), compat(compat_),
+      match_dist(M_), match2_dist(M_), n_incompat(M_, 0), weight(M_)
+  {
+    // Precompute weight = count[b] / (light_side[b] - 1) for scaled,
+    //                     count[b] for unscaled
+    for (int b = 0; b < M; ++b) {
+      int p1 = sent_dist(b, pool);
+      if (p1 <= 0) { weight[b] = 0.0; continue; }
+      weight[b] = scale
+        ? static_cast<double>(pool.count[b]) / p1
+        : static_cast<double>(pool.count[b]);
+    }
+
+    // Initialize match_dist / match2_dist from current match/match2
+    for (int b = 0; b < M; ++b) {
+      match_dist[b]  = eff_dist(b, match[b],  dist, M, pool);
+      match2_dist[b] = eff_dist(b, match2[b], dist, M, pool);
+    }
+
+    // Initialize n_incompat from current incl
+    for (int i = 0; i < M; ++i) {
+      if (!incl[i]) continue;
+      n_incl++;
+      for (int j = 0; j < M; ++j) {
+        if (!compat[j * M + i]) n_incompat[j]++;
+      }
+    }
+  }
+
+  // O(1) compat check
+  bool is_compatible(int idx) const {
+    return n_incompat[idx] == 0 && n_incl < n_tip - 3;
+  }
+
+  // Add-benefit using precomputed match_dist[] and weight[]
+  double add_benefit(int c) const {
+    double ben = -td[c];
+    for (int b = 0; b < M; ++b) {
+      int diff = match_dist[b] - dist[b * M + c];
+      if (diff <= 0) continue;
+      ben += diff * weight[b];
+    }
+    return ben;
+  }
+
+  // Remove-benefit using precomputed match2_dist[] and weight[]
+  double remove_benefit(int c) const {
+    double ben = td[c];
+    for (int b = 0; b < M; ++b) {
+      if (match[b] != c) continue;
+      int diff = dist[b * M + c] - match2_dist[b];
+      if (diff >= 0) continue;
+      ben += diff * weight[b];
+    }
+    return ben;
+  }
+
+  // Execute add: update incl, match, match2, match_dist, match2_dist,
+  //              n_incompat, n_incl
+  void do_add(int idx) {
+    incl[idx] = 1;
+    n_incl++;
+
+    // Update n_incompat for all splits
+    for (int j = 0; j < M; ++j) {
+      if (!compat[j * M + idx]) n_incompat[j]++;
+    }
+
+    // Update match/match2 and their cached distances
+    for (int b = 0; b < M; ++b) {
+      int new_d = dist[b * M + idx];
+      if (new_d < match_dist[b]) {
+        match2[b]      = match[b];
+        match2_dist[b] = match_dist[b];
+        match[b]       = idx;
+        match_dist[b]  = new_d;
+      } else if (new_d < match2_dist[b]) {
+        match2[b]      = idx;
+        match2_dist[b] = new_d;
+      }
+    }
+  }
+
+  // Execute remove: update incl, match, match2, match_dist, match2_dist,
+  //                 n_incompat, n_incl
+  void do_remove(int idx) {
+    incl[idx] = 0;
+    n_incl--;
+
+    // Update n_incompat
+    for (int j = 0; j < M; ++j) {
+      if (!compat[j * M + idx]) n_incompat[j]--;
+    }
+
+    // Update match/match2 for affected splits
+    for (int b = 0; b < M; ++b) {
+      if (match[b] == idx) {
+        // Promote match2 → match, find new match2
+        match[b]      = match2[b];
+        match_dist[b] = match2_dist[b];
+        auto [sec, sec_d] = find_second(b, match[b], incl, dist, M, pool, scale);
+        match2[b]      = sec;
+        match2_dist[b] = sec_d;
+      } else if (match2[b] == idx) {
+        auto [sec, sec_d] = find_second(b, match[b], incl, dist, M, pool, scale);
+        match2[b]      = sec;
+        match2_dist[b] = sec_d;
+      }
+    }
+  }
+};
+
+
+// ============================================================================
+// Greedy "best" strategy
+// ============================================================================
+
+static void greedy_best(
+    std::vector<int>& match,
+    std::vector<int>& match2,
+    std::vector<uint8_t>& incl,
+    const std::vector<int>& dist,
+    const std::vector<double>& td,
+    const PooledSplits& pool,
+    const std::vector<uint8_t>& compat,
+    const std::vector<int>& sort_ord,
+    bool scale,
+    int M, int n_tip
+) {
+  GreedyState st(match, match2, incl, dist, td, pool, compat, scale, M, n_tip);
+
+  while (true) {
+    double best_ben = 0.0;
+    int best_idx = -1;
+    bool best_is_add = false;
+
+    for (int si = 0; si < M; ++si) {
+      int idx = sort_ord[si];
+      if (incl[idx]) {
+        double ben = st.remove_benefit(idx);
+        if (ben > best_ben) {
+          best_ben = ben;
+          best_idx = idx;
+          best_is_add = false;
+        }
+      } else {
+        if (!st.is_compatible(idx)) continue;
+        double ben = st.add_benefit(idx);
+        if (ben > best_ben) {
+          best_ben = ben;
+          best_idx = idx;
+          best_is_add = true;
+        }
+      }
+    }
+
+    if (best_ben <= 0.0 || best_idx < 0) break;
+
+    if (best_is_add) {
+      st.do_add(best_idx);
+    } else {
+      st.do_remove(best_idx);
+    }
+  }
+}
+
+
+// ============================================================================
+// Greedy "first" strategy
+// ============================================================================
+
+static void greedy_first(
+    std::vector<int>& match,
+    std::vector<int>& match2,
+    std::vector<uint8_t>& incl,
+    const std::vector<int>& dist,
+    const std::vector<double>& td,
+    const PooledSplits& pool,
+    const std::vector<uint8_t>& compat,
+    const std::vector<int>& sort_ord,
+    bool scale,
+    int M, int n_tip
+) {
+  GreedyState st(match, match2, incl, dist, td, pool, compat, scale, M, n_tip);
+
+  bool improving = true;
+  while (improving) {
+    improving = false;
+    for (int si = 0; si < M; ++si) {
+      int idx = sort_ord[si];
+      if (incl[idx]) {
+        if (st.remove_benefit(idx) > 0.0) {
+          st.do_remove(idx);
+          improving = true;
+          break;
+        }
+      } else {
+        if (!st.is_compatible(idx)) continue;
+        if (st.add_benefit(idx) > 0.0) {
+          st.do_add(idx);
+          improving = true;
+          break;
+        }
+      }
+    }
+  }
+}
+
+
+// ============================================================================
+// Init matches (for init = "majority")
+// ============================================================================
+
+static void init_matches(
+    std::vector<int>& match,
+    std::vector<int>& match2,
+    const std::vector<uint8_t>& incl,
+    const std::vector<int>& dist,
+    int M, const PooledSplits& pool,
+    bool scale
+) {
+  // Collect included indices
+  std::vector<int> inc_idx;
+  for (int i = 0; i < M; ++i) {
+    if (incl[i]) inc_idx.push_back(i);
+  }
+  if (inc_idx.empty()) return;
+
+  for (int b = 0; b < M; ++b) {
+    int p_minus_1 = pool.light_side[b] - 1;
+    if (p_minus_1 <= 0) continue;
+
+    // Find closest and second-closest among included
+    int best = -1, second = -1;
+    int best_d = p_minus_1, second_d = p_minus_1; // sentinel threshold
+
+    for (int c : inc_idx) {
+      int d = dist[b * M + c];
+      if (d < best_d) {
+        second = best;
+        second_d = best_d;
+        best = c;
+        best_d = d;
+      } else if (d < second_d) {
+        second = c;
+        second_d = d;
+      }
+    }
+
+    if (best >= 0) {
+      if (scale) {
+        if (static_cast<double>(best_d) / p_minus_1 >= 1.0) continue;
+      } else {
+        if (best_d >= p_minus_1) continue;
+      }
+      match[b] = best;
+
+      if (second >= 0) {
+        if (scale) {
+          if (static_cast<double>(second_d) / p_minus_1 < 1.0)
+            match2[b] = second;
+        } else {
+          if (second_d < p_minus_1)
+            match2[b] = second;
+        }
+      }
+    }
+  }
+}
+
+
+// ============================================================================
+// Main exported function
+// ============================================================================
+
+//' Transfer consensus (C++ implementation)
+//'
+//' @param splits_list List of raw matrices (one per tree), each from as.Splits().
+//' @param n_tip Number of tips.
+//' @param scale Logical: use scaled transfer distance?
+//' @param greedy_best Logical: TRUE for "best", FALSE for "first".
+//' @param init_majority Logical: TRUE to start from majority-rule splits.
+//'
+//' @return A LogicalVector of length n_splits indicating which pooled splits
+//'   are included in the consensus, plus attributes "raw_splits" (a raw matrix
+//'   of all unique splits) and "light_side" (integer vector).
+//' @keywords internal
+// [[Rcpp::export]]
+List cpp_transfer_consensus(
+    const List& splits_list,
+    const int n_tip,
+    const bool scale,
+    const bool greedy_best_flag,
+    const bool init_majority
+) {
+  const int n_tree = splits_list.size();
+
+  // ---- Pool unique splits ----
+  PooledSplits pool = pool_splits(splits_list, n_tip);
+  const int M = pool.n_splits;
+
+  if (M == 0) {
+    return List::create(
+      Rcpp::Named("included") = LogicalVector(0),
+      Rcpp::Named("raw_splits") = RawMatrix(0, 0),
+      Rcpp::Named("light_side") = IntegerVector(0)
+    );
+  }
+
+  // ---- Pairwise transfer distance matrix ----
+  std::vector<int> dist = transfer_dist_mat(pool);
+
+  // ---- Transfer dissimilarity cost ----
+  std::vector<double> td = compute_td(dist, pool, scale);
+
+  // ---- Compatibility matrix ----
+  std::vector<uint8_t> compat = compat_mat(pool);
+
+  // ---- Sort order (by count, descending) ----
+  std::vector<int> sort_ord(M);
+  std::iota(sort_ord.begin(), sort_ord.end(), 0);
+  std::sort(sort_ord.begin(), sort_ord.end(),
+            [&](int a, int b) { return pool.count[a] > pool.count[b]; });
+
+  // ---- Mutable state ----
+  std::vector<int> match(M, -1);   // -1 = sentinel
+  std::vector<int> match2(M, -1);
+  std::vector<uint8_t> incl(M, 0);
+
+  // ---- Init from majority if requested ----
+  if (init_majority) {
+    double half = n_tree / 2.0;
+    for (int i = 0; i < M; ++i) {
+      if (pool.count[i] > half) {
+        incl[i] = 1;
+      }
+    }
+    init_matches(match, match2, incl, dist, M, pool, scale);
+  }
+
+  // ---- Greedy loop ----
+  if (greedy_best_flag) {
+    greedy_best(match, match2, incl, dist, td, pool, compat, sort_ord,
+                scale, M, n_tip);
+  } else {
+    greedy_first(match, match2, incl, dist, td, pool, compat, sort_ord,
+                 scale, M, n_tip);
+  }
+
+  // ---- Build output ----
+  // Return raw split data for included splits so R can convert to phylo
+  LogicalVector incl_r(M);
+  for (int i = 0; i < M; ++i) incl_r[i] = incl[i] != 0;
+
+  // Build raw matrix of ALL unique splits (n_splits x n_bytes)
+  // The canonical form may differ from the original raw form: we need to
+  // return the canonical splitbit data as raw bytes for as.phylo().
+  const int n_bytes = pool.n_bins * static_cast<int>(sizeof(splitbit));
+  RawMatrix raw_splits(M, n_bytes);
+  for (int i = 0; i < M; ++i) {
+    const unsigned char* src =
+      reinterpret_cast<const unsigned char*>(pool.split(i));
+    for (int j = 0; j < n_bytes; ++j) {
+      raw_splits(i, j) = Rbyte(src[j]);
+    }
+  }
+
+  IntegerVector light_side(M);
+  for (int i = 0; i < M; ++i) light_side[i] = pool.light_side[i];
+
+  return List::create(
+    Rcpp::Named("included") = incl_r,
+    Rcpp::Named("raw_splits") = raw_splits,
+    Rcpp::Named("light_side") = light_side
+  );
+}
+
+
+// ============================================================================
+// Diagnostic: per-stage timing (for profiling without VTune)
+// ============================================================================
+
+//' @keywords internal
+// [[Rcpp::export]]
+Rcpp::NumericVector cpp_tc_profile(
+    const List& splits_list,
+    const int n_tip,
+    const bool scale,
+    const bool greedy_best_flag,
+    const bool init_majority,
+    const int n_iter
+) {
+  using clk = std::chrono::high_resolution_clock;
+  const int n_tree = splits_list.size();
+
+  double t_pool = 0, t_dist = 0, t_td = 0, t_compat = 0, t_greedy = 0;
+
+  for (int iter = 0; iter < n_iter; ++iter) {
+    auto t0 = clk::now();
+    PooledSplits pool = pool_splits(splits_list, n_tip);
+    auto t1 = clk::now();
+    const int M = pool.n_splits;
+
+    std::vector<int> dist = transfer_dist_mat(pool);
+    auto t2 = clk::now();
+
+    std::vector<double> td = compute_td(dist, pool, scale);
+    auto t3 = clk::now();
+
+    std::vector<uint8_t> compat_v = compat_mat(pool);
+    auto t4 = clk::now();
+
+    std::vector<int> sort_ord(M);
+    std::iota(sort_ord.begin(), sort_ord.end(), 0);
+    std::sort(sort_ord.begin(), sort_ord.end(),
+              [&](int a, int b) { return pool.count[a] > pool.count[b]; });
+
+    std::vector<int> match(M, -1);
+    std::vector<int> match2(M, -1);
+    std::vector<uint8_t> incl(M, 0);
+
+    if (init_majority) {
+      double half = n_tree / 2.0;
+      for (int i = 0; i < M; ++i) {
+        if (pool.count[i] > half) incl[i] = 1;
+      }
+      init_matches(match, match2, incl, dist, M, pool, scale);
+    }
+
+    if (greedy_best_flag) {
+      greedy_best(match, match2, incl, dist, td, pool, compat_v, sort_ord,
+                  scale, M, n_tip);
+    } else {
+      greedy_first(match, match2, incl, dist, td, pool, compat_v, sort_ord,
+                   scale, M, n_tip);
+    }
+    auto t5 = clk::now();
+
+    auto us = [](auto a, auto b) {
+      return std::chrono::duration<double, std::micro>(b - a).count();
+    };
+    t_pool   += us(t0, t1);
+    t_dist   += us(t1, t2);
+    t_td     += us(t2, t3);
+    t_compat += us(t3, t4);
+    t_greedy += us(t4, t5);
+  }
+
+  double inv = 1.0 / n_iter;
+  Rcpp::NumericVector result = {
+    t_pool * inv, t_dist * inv, t_td * inv, t_compat * inv, t_greedy * inv
+  };
+  result.attr("names") = Rcpp::CharacterVector(
+    {"pool_splits", "transfer_dist_mat", "compute_td", "compat_mat", "greedy"}
+  );
+  return result;
+}

From 3eff668fc5bc356167773accc453f96bc44661ac Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 07:23:42 +0000
Subject: [PATCH 03/17] TransferDist()

---
 NAMESPACE                                    |   3 +
 R/RcppExports.R                              |  48 ++-
 R/transfer_consensus.R                       |   9 +-
 R/tree_distance_spr.R                        |   2 +-
 R/tree_distance_transfer.R                   | 258 +++++++++++++
 man/HierarchicalMutualInfo.Rd                |   1 +
 man/JaccardRobinsonFoulds.Rd                 |   1 +
 man/KendallColijn.Rd                         |   1 +
 man/MASTSize.Rd                              |   1 +
 man/MatchingSplitDistance.Rd                 |   1 +
 man/NNIDist.Rd                               |   1 +
 man/NyeSimilarity.Rd                         |   1 +
 man/PathDist.Rd                              |   1 +
 man/Robinson-Foulds.Rd                       |   1 +
 man/SPRDist.Rd                               |   1 +
 man/TransferConsensus.Rd                     |   2 +-
 man/TransferDist.Rd                          | 118 ++++++
 man/TreeDistance.Rd                          |   3 +-
 man/cpp_transfer_consensus.Rd                |  34 ++
 man/cpp_transfer_dist.Rd                     |  26 ++
 man/cpp_transfer_dist_all_pairs.Rd           |  24 ++
 man/cpp_transfer_dist_cross_pairs.Rd         |  24 ++
 man/dot-SPRPairDeO.Rd                        |  22 ++
 src/RcppExports.cpp                          |  60 +++
 src/transfer_consensus.cpp                   |  43 ++-
 src/transfer_distance.cpp                    | 363 +++++++++++++++++++
 tests/testthat/test-tree_distance_transfer.R | 174 +++++++++
 27 files changed, 1208 insertions(+), 15 deletions(-)
 create mode 100644 R/tree_distance_transfer.R
 create mode 100644 man/TransferDist.Rd
 create mode 100644 man/cpp_transfer_consensus.Rd
 create mode 100644 man/cpp_transfer_dist.Rd
 create mode 100644 man/cpp_transfer_dist_all_pairs.Rd
 create mode 100644 man/cpp_transfer_dist_cross_pairs.Rd
 create mode 100644 man/dot-SPRPairDeO.Rd
 create mode 100644 src/transfer_distance.cpp
 create mode 100644 tests/testthat/test-tree_distance_transfer.R

diff --git a/NAMESPACE b/NAMESPACE
index 90a22ab81..23334f6e7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -141,6 +141,9 @@ export(SumOfRanges)
 export(SumOfVariances)
 export(SumOfVars)
 export(TransferConsensus)
+export(TransferDist)
+export(TransferDistSplits)
+export(TransferDistance)
 export(TreeDistPlot)
 export(TreeDistance)
 export(TreesConsistentWithTwoSplits)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index da2b7b5ee..f3b5fae83 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -205,7 +205,7 @@ spr_table_7 <- function(sp1, sp2) {
 #' @param splits_list List of raw matrices (one per tree), each from as.Splits().
 #' @param n_tip Number of tips.
 #' @param scale Logical: use scaled transfer distance?
-#' @param greedy_best Logical: TRUE for "best", FALSE for "first".
+#' @param greedy_best_flag Logical: TRUE for "best", FALSE for "first".
 #' @param init_majority Logical: TRUE to start from majority-rule splits.
 #'
 #' @return A LogicalVector of length n_splits indicating which pooled splits
@@ -221,6 +221,52 @@ cpp_tc_profile <- function(splits_list, n_tip, scale, greedy_best_flag, init_maj
     .Call(`_TreeDist_cpp_tc_profile`, splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter)
 }
 
+#' Per-pair transfer dissimilarity
+#'
+#' @param x,y Raw matrices representing splits (from as.Splits()).
+#' @param nTip Integer: number of tips.
+#'
+#' @return A list with components:
+#'   - score_scaled: scaled transfer dissimilarity (double)
+#'   - score_unscaled: unscaled transfer dissimilarity (double)
+#'   - matching_xy: integer vector, best match in y for each split in x (1-based, NA if sentinel)
+#'   - matching_yx: integer vector, best match in x for each split in y (1-based, NA if sentinel)
+#' @keywords internal
+cpp_transfer_dist <- function(x, y, nTip) {
+    .Call(`_TreeDist_cpp_transfer_dist`, x, y, nTip)
+}
+
+#' @keywords internal
+cpp_transfer_dist_scored <- function(x, y, nTip, scale) {
+    .Call(`_TreeDist_cpp_transfer_dist_scored`, x, y, nTip, scale)
+}
+
+#' All-pairs transfer dissimilarity (OpenMP)
+#'
+#' @param splits_list List of raw matrices (one per tree).
+#' @param n_tip Number of tips.
+#' @param scale Logical: use scaled transfer dissimilarity?
+#' @param n_threads Number of OpenMP threads.
+#'
+#' @return Numeric vector of length choose(N,2) in dist order.
+#' @keywords internal
+cpp_transfer_dist_all_pairs <- function(splits_list, n_tip, scale, n_threads) {
+    .Call(`_TreeDist_cpp_transfer_dist_all_pairs`, splits_list, n_tip, scale, n_threads)
+}
+
+#' Cross-pairs transfer dissimilarity (OpenMP)
+#'
+#' @param splits_a,splits_b Lists of raw matrices.
+#' @param n_tip Number of tips.
+#' @param scale Logical: use scaled transfer dissimilarity?
+#' @param n_threads Number of OpenMP threads.
+#'
+#' @return Numeric matrix of dimension nA x nB.
+#' @keywords internal
+cpp_transfer_dist_cross_pairs <- function(splits_a, splits_b, n_tip, scale, n_threads) {
+    .Call(`_TreeDist_cpp_transfer_dist_cross_pairs`, splits_a, splits_b, n_tip, scale, n_threads)
+}
+
 cpp_robinson_foulds_distance <- function(x, y, nTip) {
     .Call(`_TreeDist_cpp_robinson_foulds_distance`, x, y, nTip)
 }
diff --git a/R/transfer_consensus.R b/R/transfer_consensus.R
index c710d3c35..f1232a7a8 100644
--- a/R/transfer_consensus.R
+++ b/R/transfer_consensus.R
@@ -12,7 +12,7 @@
 #' The algorithm pools all splits observed across input trees, computes
 #' pairwise transfer distances between them, and greedily adds or removes
 #' splits to minimize total transfer dissimilarity cost.  The approach
-#' follows \insertCite{Takazawa2025;textual}{TreeDist}, reimplemented for
+#' follows \insertCite{Takazawa2026;textual}{TreeDist}, reimplemented for
 #' TreeDist's infrastructure.
 #'
 #' @param trees An object of class `multiPhylo`: the input trees.
@@ -396,8 +396,13 @@ TransferConsensus <- function(trees,
   affected1 <- which(st$MATCH == branchIdx & !is.na(st$MATCH))
   if (length(affected1)) {
     st$MATCH[affected1] <- st$MATCH2[affected1]
-    # Find new second match for these
     for (b in affected1) {
+      if (is.na(st$MATCH[b])) {
+        # Promoted value was sentinel — rescan for actual closest
+        st$MATCH[b] <- .FindSecond(b, NA_integer_, curInc, DIST,
+                                    pMinus1, scale)
+      }
+      # Find new second match
       st$MATCH2[b] <- .FindSecond(b, st$MATCH[b], curInc, DIST,
                                   pMinus1, scale)
     }
diff --git a/R/tree_distance_spr.R b/R/tree_distance_spr.R
index 844d054b0..c3be63625 100644
--- a/R/tree_distance_spr.R
+++ b/R/tree_distance_spr.R
@@ -407,7 +407,7 @@ SPRDist.multiPhylo <- SPRDist.list
 # \insertCite{deOliveira2008;textual}{TreeDist}
 # An exact match is unlikely due to the arbitrary breaking of ties when leaves
 # are selected for removal.
-# SPR approximation via Oliveira Martins _et al._ (2008)
+#' SPR approximation via Oliveira Martins _et al._ (2008)
 #' @examples
 #' # de Oliveira Martins et al 2008, fig. 7
 #' tree1 <- ape::read.tree(text = "((1, 2), ((a, b), (c, d)), (3, (4, (5, (6, 7)))));")
diff --git a/R/tree_distance_transfer.R b/R/tree_distance_transfer.R
new file mode 100644
index 000000000..43202d45d
--- /dev/null
+++ b/R/tree_distance_transfer.R
@@ -0,0 +1,258 @@
+#' Transfer dissimilarity between phylogenetic trees
+#'
+#' Compute the transfer dissimilarity between phylogenetic trees, as defined
+#' by \insertCite{Takazawa2026;textual}{TreeDist}.
+#' The transfer dissimilarity uses the transfer distance
+#' \insertCite{Lemoine2018}{TreeDist} to compare bipartitions, providing a
+#' finer-grained measure than the Robinson--Foulds distance.  Each branch in
+#' each tree is scored by how many taxa must be moved to match its closest
+#' counterpart in the other tree, and these scores are summed.
+#'
+#' The `scaled` variant divides each branch's contribution by its depth minus
+#' one, giving equal weight to all branches regardless of their depth (analogous
+#' to the Robinson--Foulds distance).  The `unscaled` variant uses raw transfer
+#' distances, giving more weight to deep branches.  Neither variant satisfies
+#' the triangle inequality for trees with six or more tips.
+#'
+#' @inheritParams TreeDistance
+#' @param scale Logical; if `TRUE` (default), use the scaled transfer
+#'   dissimilarity.  If `FALSE`, use the unscaled transfer dissimilarity.
+#'
+#' @return `TransferDist()` returns an object of class `dist` (if `tree2` is
+#'   `NULL`), a numeric matrix (if both `tree1` and `tree2` are lists), or a
+#'   numeric value (for a single pair).  If `reportMatching = TRUE`, the
+#'   return value carries `matching` and `pairScores` attributes.
+#'
+#' @section Normalization:
+#'
+#' When `normalize = TRUE`, the scaled transfer dissimilarity is divided by
+#' `2 * (n - 3)`, placing it in the range \[0, 1\].  The unscaled version is
+#' divided by the maximum possible unscaled dissimilarity
+#' (following \insertCite{Takazawa2026;textual}{TreeDist}).
+#'
+#' @examples
+#' library(TreeTools)
+#' TransferDist(BalancedTree(8), PectinateTree(8))
+#' TransferDist(BalancedTree(8), PectinateTree(8), scale = FALSE)
+#'
+#' # All-pairs
+#' TransferDist(as.phylo(0:5, 8))
+#'
+#' @references
+#' \insertAllCited{}
+#'
+#' @family tree distances
+#'
+#' @importFrom TreeTools as.Splits TipLabels NSplits
+#' @export
+TransferDist <- function(tree1, tree2 = NULL, scale = TRUE,
+                         normalize = FALSE, reportMatching = FALSE) {
+  
+  # --- Fast path: all-pairs (tree2 = NULL) ---
+  if (is.null(tree2) && !reportMatching) {
+    fast <- .TransferDistAllPairs(tree1, scale)
+    if (!is.null(fast)) {
+      if (!isFALSE(normalize)) {
+        nTip <- length(TipLabels(tree1[[1]]))
+        denom <- .TransferNormDenom(nTip, scale)
+        fast <- fast / denom
+      }
+      return(fast)
+    }
+  }
+  
+  # --- Fast path: cross-pairs ---
+  if (!is.null(tree2) && !reportMatching) {
+    fast <- .TransferDistCrossPairs(tree1, tree2, scale)
+    if (!is.null(fast)) {
+      if (!isFALSE(normalize)) {
+        nTip <- length(TipLabels(
+          if (inherits(tree1, c("phylo", "Splits"))) tree1 else tree1[[1]]))
+        denom <- .TransferNormDenom(nTip, scale)
+        fast <- fast / denom
+      }
+      return(fast)
+    }
+  }
+  
+  # --- Generic fallback via CalculateTreeDistance ---
+  # Capture `scale` in the closure for the Splits-level function
+  Func <- function(splits1, splits2, nTip, reportMatching = FALSE) {
+    TransferDistSplits(splits1, splits2, nTip = nTip,
+                       scale = scale, reportMatching = reportMatching)
+  }
+  unnormalized <- CalculateTreeDistance(Func, tree1, tree2, reportMatching)
+  
+  if (!isFALSE(normalize)) {
+    nTip <- length(TipLabels(
+      if (inherits(tree1, c("phylo", "Splits"))) tree1 else tree1[[1]]))
+    denom <- .TransferNormDenom(nTip, scale)
+    unnormalized <- unnormalized / denom
+  }
+  
+  unnormalized
+}
+
+#' @rdname TransferDist
+#' @export
+TransferDistance <- TransferDist
+
+#' @rdname TransferDist
+#' @inheritParams SharedPhylogeneticInfoSplits
+#' @export
+TransferDistSplits <- function(splits1, splits2,
+                                nTip = attr(splits1, "nTip"),
+                                scale = TRUE,
+                                reportMatching = FALSE) {
+  
+  solution <- cpp_transfer_dist_scored(splits1, splits2,
+                                        nTip = as.integer(nTip),
+                                        scale = scale)
+  ret <- solution[["score"]]
+  
+  if (reportMatching) {
+    nSplits1 <- nrow(splits1)
+    nSplits2 <- nrow(splits2)
+    matching <- solution[["matching"]]
+    matching[matching > nSplits2 | matching == 0L] <- NA
+    if (nSplits1 < nSplits2) {
+      matching <- matching[seq_len(nSplits1)]
+    }
+    attr(ret, "matching") <- matching
+    
+    # Compute full pairwise score matrix for reportMatching
+    pairScores <- matrix(0, nSplits1, nSplits2)
+    for (i in seq_len(nSplits1)) {
+      for (j in seq_len(nSplits2)) {
+        # Per-pair: the transfer distance contribution
+        # This is the individual δ(b_i, b*_j) / (depth(b_i) - 1) for scaled
+        # or min(δ(b_i, b*_j), depth(b_i) - 1) for unscaled
+        pair_res <- cpp_transfer_dist_scored(
+          splits1[i, , drop = FALSE],
+          splits2[j, , drop = FALSE],
+          nTip = as.integer(nTip),
+          scale = scale
+        )
+        pairScores[i, j] <- pair_res[["score"]]
+      }
+    }
+    
+    if (!is.null(attr(splits1, "tip.label"))) {
+      matched1 <- !is.na(matching)
+      matched2 <- matching[matched1]
+      matched1 <- which(matched1)
+      attr(ret, "matchedSplits") <-
+        ReportMatching(splits1[[matched1]], splits2[[matched2]],
+                       realMatch = rep(TRUE, length(matched1)))
+    }
+    
+    attr(ret, "matchedScores") <- pairScores[
+      matrix(c(seq_along(matching), matching), ncol = 2L)]
+    attr(ret, "pairScores") <- pairScores
+  }
+  
+  ret
+}
+
+
+# ============================================================================
+# Internal helpers
+# ============================================================================
+
+# All-pairs fast path
+.TransferDistAllPairs <- function(tree1, scale) {
+  if (inherits(tree1, c("phylo", "Splits"))) return(NULL)
+  if (length(tree1) < 2L) return(NULL)
+  
+  tipLabels <- TipLabels(tree1[[1]])
+  if (is.null(tipLabels)) return(NULL)
+  nTip <- length(tipLabels)
+  if (nTip < 4L) return(NULL)
+  
+  # Check all trees share same tip set
+  allLabels <- TipLabels(tree1)
+  if (is.list(allLabels)) {
+    if (!all(vapply(allLabels[-1], setequal, logical(1), tipLabels))) {
+      return(NULL)
+    }
+  }
+  
+  splitsList <- lapply(tree1, function(tr) {
+    unclass(as.Splits(tr, tipLabels))
+  })
+  
+  nThreads <- max(1L, getOption("TreeDist.threads",
+                                 getOption("mc.cores", 1L)))
+  
+  dists <- cpp_transfer_dist_all_pairs(splitsList, nTip, scale, nThreads)
+  
+  N <- length(tree1)
+  attr(dists, "Size") <- N
+  attr(dists, "Diag") <- FALSE
+  attr(dists, "Upper") <- FALSE
+  nms <- names(tree1)
+  if (!is.null(nms)) attr(dists, "Labels") <- nms
+  class(dists) <- "dist"
+  dists
+}
+
+
+# Cross-pairs fast path
+.TransferDistCrossPairs <- function(tree1, tree2, scale) {
+  single1 <- inherits(tree1, c("phylo", "Splits"))
+  single2 <- inherits(tree2, c("phylo", "Splits"))
+  if (single1 && single2) return(NULL) # use generic path for single pair
+  
+  trees1 <- if (single1) list(tree1) else tree1
+  trees2 <- if (single2) list(tree2) else tree2
+  
+  tipLabels <- TipLabels(trees1[[1]])
+  if (is.null(tipLabels)) return(NULL)
+  nTip <- length(tipLabels)
+  if (nTip < 4L) return(NULL)
+  
+  # Check all trees share same tip set
+  allLabels1 <- TipLabels(trees1)
+  allLabels2 <- TipLabels(trees2)
+  if (is.list(allLabels1)) {
+    if (!all(vapply(allLabels1, setequal, logical(1), tipLabels))) return(NULL)
+  }
+  if (is.list(allLabels2)) {
+    if (!all(vapply(allLabels2, setequal, logical(1), tipLabels))) return(NULL)
+  } else {
+    if (!setequal(allLabels2, tipLabels)) return(NULL)
+  }
+  
+  splits1 <- lapply(trees1, function(tr) unclass(as.Splits(tr, tipLabels)))
+  splits2 <- lapply(trees2, function(tr) unclass(as.Splits(tr, tipLabels)))
+  
+  nThreads <- max(1L, getOption("TreeDist.threads",
+                                 getOption("mc.cores", 1L)))
+  
+  mat <- cpp_transfer_dist_cross_pairs(splits1, splits2, nTip, scale, nThreads)
+  
+  rownames(mat) <- names(trees1)
+  colnames(mat) <- names(trees2)
+  
+  # If one input was a single tree, simplify to vector
+  if (single1) return(mat[1, ])
+  if (single2) return(mat[, 1])
+  mat
+}
+
+
+# Normalization denominator
+.TransferNormDenom <- function(nTip, scale) {
+  if (scale) {
+    # Scaled: each tree contributes at most (n-3) branches × 1.0
+    2.0 * (nTip - 3L)
+  } else {
+    # Unscaled: maximum dissimilarity between two caterpillar trees
+    # Takazawa 2026: (n^2 - 2n + 4)/4 for even n, (n^2 - 2n + 5)/4 for odd n
+    if (nTip %% 2L == 0L) {
+      (nTip^2 - 2 * nTip + 4) / 4
+    } else {
+      (nTip^2 - 2 * nTip + 5) / 4
+    }
+  }
+}
diff --git a/man/HierarchicalMutualInfo.Rd b/man/HierarchicalMutualInfo.Rd
index 9e4788d7b..193b18ba8 100644
--- a/man/HierarchicalMutualInfo.Rd
+++ b/man/HierarchicalMutualInfo.Rd
@@ -134,6 +134,7 @@ Other tree distances:
 \code{\link{PathDist}()},
 \code{\link{Robinson-Foulds}},
 \code{\link{SPRDist}()},
+\code{\link{TransferDist}()},
 \code{\link{TreeDistance}()}
 }
 \concept{tree distances}
diff --git a/man/JaccardRobinsonFoulds.Rd b/man/JaccardRobinsonFoulds.Rd
index 5df329143..7e515be7f 100644
--- a/man/JaccardRobinsonFoulds.Rd
+++ b/man/JaccardRobinsonFoulds.Rd
@@ -136,6 +136,7 @@ Other tree distances:
 \code{\link{PathDist}()},
 \code{\link{Robinson-Foulds}},
 \code{\link{SPRDist}()},
+\code{\link{TransferDist}()},
 \code{\link{TreeDistance}()}
 }
 \author{
diff --git a/man/KendallColijn.Rd b/man/KendallColijn.Rd
index cfd506b54..57d017720 100644
--- a/man/KendallColijn.Rd
+++ b/man/KendallColijn.Rd
@@ -135,6 +135,7 @@ Other tree distances:
 \code{\link{PathDist}()},
 \code{\link{Robinson-Foulds}},
 \code{\link{SPRDist}()},
+\code{\link{TransferDist}()},
 \code{\link{TreeDistance}()}
 }
 \author{
diff --git a/man/MASTSize.Rd b/man/MASTSize.Rd
index 5ecdd2711..db028b0a0 100644
--- a/man/MASTSize.Rd
+++ b/man/MASTSize.Rd
@@ -72,6 +72,7 @@ Other tree distances:
 \code{\link{PathDist}()},
 \code{\link{Robinson-Foulds}},
 \code{\link{SPRDist}()},
+\code{\link{TransferDist}()},
 \code{\link{TreeDistance}()}
 }
 \author{
diff --git a/man/MatchingSplitDistance.Rd b/man/MatchingSplitDistance.Rd
index 83af446fe..b5d64b270 100644
--- a/man/MatchingSplitDistance.Rd
+++ b/man/MatchingSplitDistance.Rd
@@ -94,6 +94,7 @@ Other tree distances:
 \code{\link{PathDist}()},
 \code{\link{Robinson-Foulds}},
 \code{\link{SPRDist}()},
+\code{\link{TransferDist}()},
 \code{\link{TreeDistance}()}
 }
 \author{
diff --git a/man/NNIDist.Rd b/man/NNIDist.Rd
index 70895e041..5e24301bb 100644
--- a/man/NNIDist.Rd
+++ b/man/NNIDist.Rd
@@ -111,6 +111,7 @@ Other tree distances:
 \code{\link{PathDist}()},
 \code{\link{Robinson-Foulds}},
 \code{\link{SPRDist}()},
+\code{\link{TransferDist}()},
 \code{\link{TreeDistance}()}
 }
 \author{
diff --git a/man/NyeSimilarity.Rd b/man/NyeSimilarity.Rd
index 892ed1853..d984b129a 100644
--- a/man/NyeSimilarity.Rd
+++ b/man/NyeSimilarity.Rd
@@ -132,6 +132,7 @@ Other tree distances:
 \code{\link{PathDist}()},
 \code{\link{Robinson-Foulds}},
 \code{\link{SPRDist}()},
+\code{\link{TransferDist}()},
 \code{\link{TreeDistance}()}
 }
 \author{
diff --git a/man/PathDist.Rd b/man/PathDist.Rd
index 5cbfe90cb..1192d4ce0 100644
--- a/man/PathDist.Rd
+++ b/man/PathDist.Rd
@@ -78,6 +78,7 @@ Other tree distances:
 \code{\link{NyeSimilarity}()},
 \code{\link{Robinson-Foulds}},
 \code{\link{SPRDist}()},
+\code{\link{TransferDist}()},
 \code{\link{TreeDistance}()}
 }
 \author{
diff --git a/man/Robinson-Foulds.Rd b/man/Robinson-Foulds.Rd
index a425e8d1b..7510e0945 100644
--- a/man/Robinson-Foulds.Rd
+++ b/man/Robinson-Foulds.Rd
@@ -154,6 +154,7 @@ Other tree distances:
 \code{\link{NyeSimilarity}()},
 \code{\link{PathDist}()},
 \code{\link{SPRDist}()},
+\code{\link{TransferDist}()},
 \code{\link{TreeDistance}()}
 }
 \author{
diff --git a/man/SPRDist.Rd b/man/SPRDist.Rd
index 42fc9b2c0..5075abda8 100644
--- a/man/SPRDist.Rd
+++ b/man/SPRDist.Rd
@@ -84,6 +84,7 @@ Other tree distances:
 \code{\link{NyeSimilarity}()},
 \code{\link{PathDist}()},
 \code{\link{Robinson-Foulds}},
+\code{\link{TransferDist}()},
 \code{\link{TreeDistance}()}
 }
 \author{
diff --git a/man/TransferConsensus.Rd b/man/TransferConsensus.Rd
index 9f69a55da..615e27941 100644
--- a/man/TransferConsensus.Rd
+++ b/man/TransferConsensus.Rd
@@ -45,7 +45,7 @@ tree.
 The algorithm pools all splits observed across input trees, computes
 pairwise transfer distances between them, and greedily adds or removes
 splits to minimize total transfer dissimilarity cost.  The approach
-follows \insertCite{Takazawa2025;textual}{TreeDist}, reimplemented for
+follows \insertCite{Takazawa2026;textual}{TreeDist}, reimplemented for
 TreeDist's infrastructure.
 }
 \examples{
diff --git a/man/TransferDist.Rd b/man/TransferDist.Rd
new file mode 100644
index 000000000..2439130f7
--- /dev/null
+++ b/man/TransferDist.Rd
@@ -0,0 +1,118 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tree_distance_transfer.R
+\name{TransferDist}
+\alias{TransferDist}
+\alias{TransferDistance}
+\alias{TransferDistSplits}
+\title{Transfer dissimilarity between phylogenetic trees}
+\usage{
+TransferDist(
+  tree1,
+  tree2 = NULL,
+  scale = TRUE,
+  normalize = FALSE,
+  reportMatching = FALSE
+)
+
+TransferDistance(
+  tree1,
+  tree2 = NULL,
+  scale = TRUE,
+  normalize = FALSE,
+  reportMatching = FALSE
+)
+
+TransferDistSplits(
+  splits1,
+  splits2,
+  nTip = attr(splits1, "nTip"),
+  scale = TRUE,
+  reportMatching = FALSE
+)
+}
+\arguments{
+\item{tree1, tree2}{Trees of class \code{phylo}, with leaves labelled identically,
+or lists of such trees to undergo pairwise comparison.  Where implemented,
+\code{tree2 = NULL} will compute distances between each pair of trees in the list
+\code{tree1} using a fast algorithm based on
+\insertCite{Day1985;textual}{TreeDist}.}
+
+\item{scale}{Logical; if \code{TRUE} (default), use the scaled transfer
+dissimilarity.  If \code{FALSE}, use the unscaled transfer dissimilarity.}
+
+\item{normalize}{If a numeric value is provided, this will be used as a
+maximum value against which to rescale results.
+If \code{TRUE}, results will be rescaled against a maximum value calculated from
+the specified tree sizes and topology, as specified in the "Normalization"
+section below.
+If \code{FALSE}, results will not be rescaled.}
+
+\item{reportMatching}{Logical specifying whether to return the clade
+matchings as an attribute of the score.}
+
+\item{splits1, splits2}{Logical matrices where each row corresponds to a leaf,
+either listed in the same order or bearing identical names (in any sequence),
+and each column corresponds to a split, such that each leaf is identified as
+a member of the ingroup (\code{TRUE}) or outgroup (\code{FALSE}) of the respective
+split.}
+
+\item{nTip}{(Optional) Integer specifying the number of leaves in each split.}
+}
+\value{
+\code{TransferDist()} returns an object of class \code{dist} (if \code{tree2} is
+\code{NULL}), a numeric matrix (if both \code{tree1} and \code{tree2} are lists), or a
+numeric value (for a single pair).  If \code{reportMatching = TRUE}, the
+return value carries \code{matching} and \code{pairScores} attributes.
+}
+\description{
+Compute the transfer dissimilarity between phylogenetic trees, as defined
+by \insertCite{Takazawa2026;textual}{TreeDist}.
+The transfer dissimilarity uses the transfer distance
+\insertCite{Lemoine2018}{TreeDist} to compare bipartitions, providing a
+finer-grained measure than the Robinson--Foulds distance.  Each branch in
+each tree is scored by how many taxa must be moved to match its closest
+counterpart in the other tree, and these scores are summed.
+}
+\details{
+The \code{scaled} variant divides each branch's contribution by its depth minus
+one, giving equal weight to all branches regardless of their depth (analogous
+to the Robinson--Foulds distance).  The \code{unscaled} variant uses raw transfer
+distances, giving more weight to deep branches.  Neither variant satisfies
+the triangle inequality for trees with six or more tips.
+}
+\section{Normalization}{
+
+
+When \code{normalize = TRUE}, the scaled transfer dissimilarity is divided by
+\code{2 * (n - 3)}, placing it in the range [0, 1].  The unscaled version is
+divided by the maximum possible unscaled dissimilarity
+(following \insertCite{Takazawa2026;textual}{TreeDist}).
+}
+
+\examples{
+library(TreeTools)
+TransferDist(BalancedTree(8), PectinateTree(8))
+TransferDist(BalancedTree(8), PectinateTree(8), scale = FALSE)
+
+# All-pairs
+TransferDist(as.phylo(0:5, 8))
+
+}
+\references{
+\insertAllCited{}
+}
+\seealso{
+Other tree distances: 
+\code{\link{HierarchicalMutualInfo}()},
+\code{\link{JaccardRobinsonFoulds}()},
+\code{\link{KendallColijn}()},
+\code{\link{MASTSize}()},
+\code{\link{MatchingSplitDistance}()},
+\code{\link{NNIDist}()},
+\code{\link{NyeSimilarity}()},
+\code{\link{PathDist}()},
+\code{\link{Robinson-Foulds}},
+\code{\link{SPRDist}()},
+\code{\link{TreeDistance}()}
+}
+\concept{tree distances}
diff --git a/man/TreeDistance.Rd b/man/TreeDistance.Rd
index 352d169c4..f081b6d3a 100644
--- a/man/TreeDistance.Rd
+++ b/man/TreeDistance.Rd
@@ -328,7 +328,8 @@ Other tree distances:
 \code{\link{NyeSimilarity}()},
 \code{\link{PathDist}()},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()}
+\code{\link{SPRDist}()},
+\code{\link{TransferDist}()}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/cpp_transfer_consensus.Rd b/man/cpp_transfer_consensus.Rd
new file mode 100644
index 000000000..59a4e0a91
--- /dev/null
+++ b/man/cpp_transfer_consensus.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{cpp_transfer_consensus}
+\alias{cpp_transfer_consensus}
+\title{Transfer consensus (C++ implementation)}
+\usage{
+cpp_transfer_consensus(
+  splits_list,
+  n_tip,
+  scale,
+  greedy_best_flag,
+  init_majority
+)
+}
+\arguments{
+\item{splits_list}{List of raw matrices (one per tree), each from as.Splits().}
+
+\item{n_tip}{Number of tips.}
+
+\item{scale}{Logical: use scaled transfer distance?}
+
+\item{greedy_best_flag}{Logical: TRUE for "best", FALSE for "first".}
+
+\item{init_majority}{Logical: TRUE to start from majority-rule splits.}
+}
+\value{
+A LogicalVector of length n_splits indicating which pooled splits
+are included in the consensus, plus attributes "raw_splits" (a raw matrix
+of all unique splits) and "light_side" (integer vector).
+}
+\description{
+Transfer consensus (C++ implementation)
+}
+\keyword{internal}
diff --git a/man/cpp_transfer_dist.Rd b/man/cpp_transfer_dist.Rd
new file mode 100644
index 000000000..2ec68871c
--- /dev/null
+++ b/man/cpp_transfer_dist.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{cpp_transfer_dist}
+\alias{cpp_transfer_dist}
+\title{Per-pair transfer dissimilarity}
+\usage{
+cpp_transfer_dist(x, y, nTip)
+}
+\arguments{
+\item{x, y}{Raw matrices representing splits (from as.Splits()).}
+
+\item{nTip}{Integer: number of tips.}
+}
+\value{
+A list with components:
+\itemize{
+\item score_scaled: scaled transfer dissimilarity (double)
+\item score_unscaled: unscaled transfer dissimilarity (double)
+\item matching_xy: integer vector, best match in y for each split in x (1-based, NA if sentinel)
+\item matching_yx: integer vector, best match in x for each split in y (1-based, NA if sentinel)
+}
+}
+\description{
+Per-pair transfer dissimilarity
+}
+\keyword{internal}
diff --git a/man/cpp_transfer_dist_all_pairs.Rd b/man/cpp_transfer_dist_all_pairs.Rd
new file mode 100644
index 000000000..92af73d4a
--- /dev/null
+++ b/man/cpp_transfer_dist_all_pairs.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{cpp_transfer_dist_all_pairs}
+\alias{cpp_transfer_dist_all_pairs}
+\title{All-pairs transfer dissimilarity (OpenMP)}
+\usage{
+cpp_transfer_dist_all_pairs(splits_list, n_tip, scale, n_threads)
+}
+\arguments{
+\item{splits_list}{List of raw matrices (one per tree).}
+
+\item{n_tip}{Number of tips.}
+
+\item{scale}{Logical: use scaled transfer dissimilarity?}
+
+\item{n_threads}{Number of OpenMP threads.}
+}
+\value{
+Numeric vector of length choose(N,2) in dist order.
+}
+\description{
+All-pairs transfer dissimilarity (OpenMP)
+}
+\keyword{internal}
diff --git a/man/cpp_transfer_dist_cross_pairs.Rd b/man/cpp_transfer_dist_cross_pairs.Rd
new file mode 100644
index 000000000..0758544b8
--- /dev/null
+++ b/man/cpp_transfer_dist_cross_pairs.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{cpp_transfer_dist_cross_pairs}
+\alias{cpp_transfer_dist_cross_pairs}
+\title{Cross-pairs transfer dissimilarity (OpenMP)}
+\usage{
+cpp_transfer_dist_cross_pairs(splits_a, splits_b, n_tip, scale, n_threads)
+}
+\arguments{
+\item{splits_a, splits_b}{Lists of raw matrices.}
+
+\item{n_tip}{Number of tips.}
+
+\item{scale}{Logical: use scaled transfer dissimilarity?}
+
+\item{n_threads}{Number of OpenMP threads.}
+}
+\value{
+Numeric matrix of dimension nA x nB.
+}
+\description{
+Cross-pairs transfer dissimilarity (OpenMP)
+}
+\keyword{internal}
diff --git a/man/dot-SPRPairDeO.Rd b/man/dot-SPRPairDeO.Rd
new file mode 100644
index 000000000..c846b761c
--- /dev/null
+++ b/man/dot-SPRPairDeO.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tree_distance_spr.R
+\name{.SPRPairDeO}
+\alias{.SPRPairDeO}
+\title{SPR approximation via Oliveira Martins \emph{et al.} (2008)}
+\usage{
+.SPRPairDeO(tree1, tree2, check = TRUE)
+}
+\description{
+SPR approximation via Oliveira Martins \emph{et al.} (2008)
+}
+\examples{
+# de Oliveira Martins et al 2008, fig. 7
+tree1 <- ape::read.tree(text = "((1, 2), ((a, b), (c, d)), (3, (4, (5, (6, 7)))));")
+tree2 <- ape::read.tree(text = "((1, 2), 3, (4, (5, (((a, b), (c, d)), (6, 7)))));")
+oPar <- par(mfrow =c(2, 1), mar = rep(0, 4))
+plot(tree1)
+plot(tree2)
+par(oPar)
+SPRDist(tree1, tree2, method = "deO")
+}
+\keyword{internal}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 90346a5e5..a8e03bc8f 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -553,6 +553,62 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// cpp_transfer_dist
+List cpp_transfer_dist(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip);
+RcppExport SEXP _TreeDist_cpp_transfer_dist(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const RawMatrix& >::type x(xSEXP);
+    Rcpp::traits::input_parameter< const RawMatrix& >::type y(ySEXP);
+    Rcpp::traits::input_parameter< const IntegerVector& >::type nTip(nTipSEXP);
+    rcpp_result_gen = Rcpp::wrap(cpp_transfer_dist(x, y, nTip));
+    return rcpp_result_gen;
+END_RCPP
+}
+// cpp_transfer_dist_scored
+List cpp_transfer_dist_scored(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip, bool scale);
+RcppExport SEXP _TreeDist_cpp_transfer_dist_scored(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP, SEXP scaleSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const RawMatrix& >::type x(xSEXP);
+    Rcpp::traits::input_parameter< const RawMatrix& >::type y(ySEXP);
+    Rcpp::traits::input_parameter< const IntegerVector& >::type nTip(nTipSEXP);
+    Rcpp::traits::input_parameter< bool >::type scale(scaleSEXP);
+    rcpp_result_gen = Rcpp::wrap(cpp_transfer_dist_scored(x, y, nTip, scale));
+    return rcpp_result_gen;
+END_RCPP
+}
+// cpp_transfer_dist_all_pairs
+NumericVector cpp_transfer_dist_all_pairs(const List& splits_list, int n_tip, bool scale, int n_threads);
+RcppExport SEXP _TreeDist_cpp_transfer_dist_all_pairs(SEXP splits_listSEXP, SEXP n_tipSEXP, SEXP scaleSEXP, SEXP n_threadsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const List& >::type splits_list(splits_listSEXP);
+    Rcpp::traits::input_parameter< int >::type n_tip(n_tipSEXP);
+    Rcpp::traits::input_parameter< bool >::type scale(scaleSEXP);
+    Rcpp::traits::input_parameter< int >::type n_threads(n_threadsSEXP);
+    rcpp_result_gen = Rcpp::wrap(cpp_transfer_dist_all_pairs(splits_list, n_tip, scale, n_threads));
+    return rcpp_result_gen;
+END_RCPP
+}
+// cpp_transfer_dist_cross_pairs
+NumericMatrix cpp_transfer_dist_cross_pairs(const List& splits_a, const List& splits_b, int n_tip, bool scale, int n_threads);
+RcppExport SEXP _TreeDist_cpp_transfer_dist_cross_pairs(SEXP splits_aSEXP, SEXP splits_bSEXP, SEXP n_tipSEXP, SEXP scaleSEXP, SEXP n_threadsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const List& >::type splits_a(splits_aSEXP);
+    Rcpp::traits::input_parameter< const List& >::type splits_b(splits_bSEXP);
+    Rcpp::traits::input_parameter< int >::type n_tip(n_tipSEXP);
+    Rcpp::traits::input_parameter< bool >::type scale(scaleSEXP);
+    Rcpp::traits::input_parameter< int >::type n_threads(n_threadsSEXP);
+    rcpp_result_gen = Rcpp::wrap(cpp_transfer_dist_cross_pairs(splits_a, splits_b, n_tip, scale, n_threads));
+    return rcpp_result_gen;
+END_RCPP
+}
 // cpp_robinson_foulds_distance
 List cpp_robinson_foulds_distance(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip);
 RcppExport SEXP _TreeDist_cpp_robinson_foulds_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) {
@@ -691,6 +747,10 @@ static const R_CallMethodDef CallEntries[] = {
     {"_TreeDist_spr_table_7", (DL_FUNC) &_TreeDist_spr_table_7, 2},
     {"_TreeDist_cpp_transfer_consensus", (DL_FUNC) &_TreeDist_cpp_transfer_consensus, 5},
     {"_TreeDist_cpp_tc_profile", (DL_FUNC) &_TreeDist_cpp_tc_profile, 6},
+    {"_TreeDist_cpp_transfer_dist", (DL_FUNC) &_TreeDist_cpp_transfer_dist, 3},
+    {"_TreeDist_cpp_transfer_dist_scored", (DL_FUNC) &_TreeDist_cpp_transfer_dist_scored, 4},
+    {"_TreeDist_cpp_transfer_dist_all_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_all_pairs, 4},
+    {"_TreeDist_cpp_transfer_dist_cross_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_cross_pairs, 5},
     {"_TreeDist_cpp_robinson_foulds_distance", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_distance, 3},
     {"_TreeDist_cpp_robinson_foulds_info", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_info, 3},
     {"_TreeDist_cpp_matching_split_distance", (DL_FUNC) &_TreeDist_cpp_matching_split_distance, 3},
diff --git a/src/transfer_consensus.cpp b/src/transfer_consensus.cpp
index 03f4f6bfe..dc7b2868d 100644
--- a/src/transfer_consensus.cpp
+++ b/src/transfer_consensus.cpp
@@ -110,6 +110,15 @@ static PooledSplits pool_splits(const List& splits_list, int n_tips) {
     ? ~splitbit(0)
     : (splitbit(1) << (n_tips % SL_BIN_SIZE)) - 1;
 
+  // Count total splits across all trees to pre-allocate pool.data.
+  // This avoids reallocation, which would invalidate the pointers stored
+  // as keys in split_map.
+  int total_splits = 0;
+  for (int t = 0; t < n_tree; ++t) {
+    const RawMatrix mat = Rcpp::as<RawMatrix>(splits_list[t]);
+    total_splits += mat.nrow();
+  }
+
   // Hash map from canonical split → unique index
   SplitHash hasher(n_bins);
   SplitEqual eq(n_bins);
@@ -126,6 +135,10 @@ static PooledSplits pool_splits(const List& splits_list, int n_tips) {
   pool.n_splits = 0;
   pool.tree_members.resize(n_tree);
 
+  // Reserve capacity for the maximum possible unique splits (= total splits).
+  // This ensures pool.data never reallocates, keeping split_map keys valid.
+  pool.data.reserve(static_cast<size_t>(total_splits) * n_bins);
+
   for (int t = 0; t < n_tree; ++t) {
     SplitList sl(Rcpp::as<RawMatrix>(splits_list[t]));
     std::vector<int>& members = pool.tree_members[t];
@@ -155,7 +168,7 @@ static PooledSplits pool_splits(const List& splits_list, int n_tips) {
         pool.count[idx]++;
       } else {
         idx = pool.n_splits++;
-        // Append canonical data
+        // Append canonical data (no reallocation due to reserve above)
         const size_t old_sz = pool.data.size();
         pool.data.resize(old_sz + n_bins);
         std::copy(canon_buf.begin(), canon_buf.end(),
@@ -169,7 +182,7 @@ static PooledSplits pool_splits(const List& splits_list, int n_tips) {
         }
         pool.light_side.push_back(std::min(pc, n_tips - pc));
 
-        // Insert pointer into map (points into pool.data)
+        // Insert pointer into map (points into pool.data; stable due to reserve)
         split_map[pool.split(idx)] = idx;
       }
 
@@ -478,9 +491,20 @@ struct GreedyState {
     // Update match/match2 for affected splits
     for (int b = 0; b < M; ++b) {
       if (match[b] == idx) {
-        // Promote match2 → match, find new match2
+        // Promote match2 → match
         match[b]      = match2[b];
         match_dist[b] = match2_dist[b];
+
+        if (match[b] < 0) {
+          // Promoted value was sentinel — rescan from scratch to find
+          // the actual closest included split (find_second with
+          // matchIdx = -1 searches all included).
+          auto [first, first_d] = find_second(b, -1, incl, dist, M, pool, scale);
+          match[b]      = first;
+          match_dist[b] = first_d;
+        }
+
+        // Find new second-closest
         auto [sec, sec_d] = find_second(b, match[b], incl, dist, M, pool, scale);
         match2[b]      = sec;
         match2_dist[b] = sec_d;
@@ -661,7 +685,7 @@ static void init_matches(
 //' @param splits_list List of raw matrices (one per tree), each from as.Splits().
 //' @param n_tip Number of tips.
 //' @param scale Logical: use scaled transfer distance?
-//' @param greedy_best Logical: TRUE for "best", FALSE for "first".
+//' @param greedy_best_flag Logical: TRUE for "best", FALSE for "first".
 //' @param init_majority Logical: TRUE to start from majority-rule splits.
 //'
 //' @return A LogicalVector of length n_splits indicating which pooled splits
@@ -699,11 +723,12 @@ List cpp_transfer_consensus(
   // ---- Compatibility matrix ----
   std::vector<uint8_t> compat = compat_mat(pool);
 
-  // ---- Sort order (by count, descending) ----
+  // ---- Sort order (by count descending, then index ascending for ties) ----
+  // Must match R's order(-counts, seq_along(counts)) for reproducibility.
   std::vector<int> sort_ord(M);
   std::iota(sort_ord.begin(), sort_ord.end(), 0);
-  std::sort(sort_ord.begin(), sort_ord.end(),
-            [&](int a, int b) { return pool.count[a] > pool.count[b]; });
+  std::stable_sort(sort_ord.begin(), sort_ord.end(),
+                   [&](int a, int b) { return pool.count[a] > pool.count[b]; });
 
   // ---- Mutable state ----
   std::vector<int> match(M, -1);   // -1 = sentinel
@@ -795,8 +820,8 @@ Rcpp::NumericVector cpp_tc_profile(
 
     std::vector<int> sort_ord(M);
     std::iota(sort_ord.begin(), sort_ord.end(), 0);
-    std::sort(sort_ord.begin(), sort_ord.end(),
-              [&](int a, int b) { return pool.count[a] > pool.count[b]; });
+    std::stable_sort(sort_ord.begin(), sort_ord.end(),
+                     [&](int a, int b) { return pool.count[a] > pool.count[b]; });
 
     std::vector<int> match(M, -1);
     std::vector<int> match2(M, -1);
diff --git a/src/transfer_distance.cpp b/src/transfer_distance.cpp
new file mode 100644
index 000000000..fb026b426
--- /dev/null
+++ b/src/transfer_distance.cpp
@@ -0,0 +1,363 @@
+/* transfer_distance.cpp
+ *
+ * Per-pair and batch computation of the transfer dissimilarity between
+ * phylogenetic trees (Takazawa et al. 2026; Lemoine et al. 2018).
+ *
+ * The transfer distance between two bipartitions is the minimum number of
+ * taxa that must be moved to transform one bipartition into the other:
+ *   delta(b, b*) = min(Hamming(b, b*), n - Hamming(b, b*))
+ *
+ * The transfer dissimilarity between two trees sums, for each branch in
+ * each tree, the minimum transfer distance to any branch in the other tree.
+ */
+
+#include <TreeTools/SplitList.h>
+#include <Rcpp/Lightest>
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <numeric>
+#include <vector>
+
+using Rcpp::IntegerVector;
+using Rcpp::List;
+using Rcpp::LogicalVector;
+using Rcpp::Named;
+using Rcpp::NumericMatrix;
+using Rcpp::NumericVector;
+using Rcpp::RawMatrix;
+using TreeTools::count_bits;
+using TreeTools::SplitList;
+
+using int16 = int_fast16_t;
+using int32 = int_fast32_t;
+
+
+// ============================================================================
+// Per-pair transfer distance computation
+// ============================================================================
+
+// Compute transfer distance between two bipartitions (split bitsets).
+// Returns min(hamming, n_tips - hamming).
+static inline int transfer_dist_splits(
+    const splitbit* a, const splitbit* b, int n_bins, int n_tips
+) {
+  int hamming = 0;
+  for (int bin = 0; bin < n_bins; ++bin) {
+    hamming += count_bits(a[bin] ^ b[bin]);
+  }
+  return std::min(hamming, n_tips - hamming);
+}
+
+
+// For each split in `from`, find the minimum transfer distance to any split
+// in `to`.  Returns a vector of length from.n_splits with the min distances,
+// and a matching vector with the index of the best match in `to` (-1 if no
+// improvement over sentinel).
+static void find_min_transfer(
+    const SplitList& from,
+    const SplitList& to,
+    int n_tips,
+    int n_bins,
+    std::vector<int>& min_dist,     // out: [from.n_splits]
+    std::vector<int>& best_match    // out: [from.n_splits]
+) {
+  for (int16 i = 0; i < from.n_splits; ++i) {
+    // Sentinel distance: depth(b) - 1
+    int pc = 0;
+    for (int bin = 0; bin < n_bins; ++bin) {
+      pc += count_bits(from.state[i][bin]);
+    }
+    int depth = std::min(pc, n_tips - pc);
+    int sentinel = depth - 1;
+    if (sentinel <= 0) {
+      min_dist[i] = 0;
+      best_match[i] = -1;
+      continue;
+    }
+
+    int best_d = sentinel;
+    int best_j = -1;
+
+    for (int16 j = 0; j < to.n_splits; ++j) {
+      int d = transfer_dist_splits(from.state[i], to.state[j], n_bins, n_tips);
+      if (d < best_d) {
+        best_d = d;
+        best_j = j;
+        if (d == 0) break; // can't do better
+      }
+    }
+
+    min_dist[i] = best_d;
+    best_match[i] = best_j;
+  }
+}
+
+
+// Accumulate the transfer dissimilarity contribution from one direction.
+// For scaled: sum of min(min_dist[i] / (depth[i] - 1), 1)
+// For unscaled: sum of min(min_dist[i], depth[i] - 1)
+static double accumulate_transfer(
+    const SplitList& sl,
+    const std::vector<int>& min_dist,
+    int n_tips, int n_bins,
+    bool scale
+) {
+  double total = 0.0;
+  for (int16 i = 0; i < sl.n_splits; ++i) {
+    int pc = 0;
+    for (int bin = 0; bin < n_bins; ++bin) {
+      pc += count_bits(sl.state[i][bin]);
+    }
+    int depth = std::min(pc, n_tips - pc);
+    int p_minus_1 = depth - 1;
+    if (p_minus_1 <= 0) continue;
+
+    int d = min_dist[i];
+    if (scale) {
+      double contrib = static_cast<double>(d) / p_minus_1;
+      total += (contrib < 1.0) ? contrib : 1.0;
+    } else {
+      total += (d < p_minus_1) ? d : p_minus_1;
+    }
+  }
+  return total;
+}
+
+
+// ============================================================================
+// Exported: per-pair transfer distance (returns both scaled and unscaled)
+// ============================================================================
+
+//' Per-pair transfer dissimilarity
+//'
+//' @param x,y Raw matrices representing splits (from as.Splits()).
+//' @param nTip Integer: number of tips.
+//'
+//' @return A list with components:
+//'   - score_scaled: scaled transfer dissimilarity (double)
+//'   - score_unscaled: unscaled transfer dissimilarity (double)
+//'   - matching_xy: integer vector, best match in y for each split in x (1-based, NA if sentinel)
+//'   - matching_yx: integer vector, best match in x for each split in y (1-based, NA if sentinel)
+//' @keywords internal
+// [[Rcpp::export]]
+List cpp_transfer_dist(
+    const RawMatrix& x, const RawMatrix& y,
+    const IntegerVector& nTip
+) {
+  if (x.cols() != y.cols()) {
+    Rcpp::stop("Input splits must address same number of tips.");
+  }
+
+  const int n_tip = nTip[0];
+  SplitList sl_x(x);
+  SplitList sl_y(y);
+  const int n_bins = sl_x.n_bins;
+
+  // Direction x → y
+  std::vector<int> min_dist_xy(sl_x.n_splits);
+  std::vector<int> match_xy(sl_x.n_splits);
+  find_min_transfer(sl_x, sl_y, n_tip, n_bins, min_dist_xy, match_xy);
+
+  // Direction y → x
+  std::vector<int> min_dist_yx(sl_y.n_splits);
+  std::vector<int> match_yx(sl_y.n_splits);
+  find_min_transfer(sl_y, sl_x, n_tip, n_bins, min_dist_yx, match_yx);
+
+  // Accumulate both directions for scaled and unscaled
+  double score_scaled =
+      accumulate_transfer(sl_x, min_dist_xy, n_tip, n_bins, true) +
+      accumulate_transfer(sl_y, min_dist_yx, n_tip, n_bins, true);
+  double score_unscaled =
+      accumulate_transfer(sl_x, min_dist_xy, n_tip, n_bins, false) +
+      accumulate_transfer(sl_y, min_dist_yx, n_tip, n_bins, false);
+
+  // Build R matching vectors (1-based, NA for sentinel)
+  IntegerVector r_match_xy(sl_x.n_splits);
+  for (int16 i = 0; i < sl_x.n_splits; ++i) {
+    r_match_xy[i] = (match_xy[i] >= 0) ? match_xy[i] + 1 : NA_INTEGER;
+  }
+  IntegerVector r_match_yx(sl_y.n_splits);
+  for (int16 i = 0; i < sl_y.n_splits; ++i) {
+    r_match_yx[i] = (match_yx[i] >= 0) ? match_yx[i] + 1 : NA_INTEGER;
+  }
+
+  return List::create(
+    Named("score_scaled") = score_scaled,
+    Named("score_unscaled") = score_unscaled,
+    Named("matching_xy") = r_match_xy,
+    Named("matching_yx") = r_match_yx
+  );
+}
+
+
+// ============================================================================
+// Exported: per-pair score-only (for GeneralizedRF compatibility)
+// ============================================================================
+
+// Returns a list with "score" and "matching" to be compatible with
+// GeneralizedRF / CalculateTreeDistance dispatch.
+
+//' @keywords internal
+// [[Rcpp::export]]
+List cpp_transfer_dist_scored(
+    const RawMatrix& x, const RawMatrix& y,
+    const IntegerVector& nTip, bool scale
+) {
+  if (x.cols() != y.cols()) {
+    Rcpp::stop("Input splits must address same number of tips.");
+  }
+
+  const int n_tip = nTip[0];
+  SplitList sl_x(x);
+  SplitList sl_y(y);
+  const int n_bins = sl_x.n_bins;
+
+  std::vector<int> min_dist_xy(sl_x.n_splits);
+  std::vector<int> match_xy(sl_x.n_splits);
+  find_min_transfer(sl_x, sl_y, n_tip, n_bins, min_dist_xy, match_xy);
+
+  std::vector<int> min_dist_yx(sl_y.n_splits);
+  std::vector<int> match_yx(sl_y.n_splits);
+  find_min_transfer(sl_y, sl_x, n_tip, n_bins, min_dist_yx, match_yx);
+
+  double score = scale
+    ? (accumulate_transfer(sl_x, min_dist_xy, n_tip, n_bins, true) +
+       accumulate_transfer(sl_y, min_dist_yx, n_tip, n_bins, true))
+    : (accumulate_transfer(sl_x, min_dist_xy, n_tip, n_bins, false) +
+       accumulate_transfer(sl_y, min_dist_yx, n_tip, n_bins, false));
+
+  // Build matching vector (1-based) — just the x→y direction
+  const int n_out = std::max(static_cast<int>(sl_x.n_splits),
+                             static_cast<int>(sl_y.n_splits));
+  IntegerVector matching(n_out, NA_INTEGER);
+  for (int16 i = 0; i < sl_x.n_splits; ++i) {
+    matching[i] = (match_xy[i] >= 0) ? match_xy[i] + 1 : NA_INTEGER;
+  }
+
+  return List::create(Named("score") = score, Named("matching") = matching);
+}
+
+
+// ============================================================================
+// Batch: all-pairs transfer dissimilarity (OpenMP parallel)
+// ============================================================================
+
+// Static helper for score-only per-pair computation.
+static double transfer_dissimilarity_score(
+    const SplitList& sl_x, const SplitList& sl_y,
+    int n_tip, int n_bins, bool scale
+) {
+  // Avoid allocation when trees have 0 splits (star trees)
+  if (sl_x.n_splits == 0 && sl_y.n_splits == 0) return 0.0;
+
+  std::vector<int> min_dist_xy(sl_x.n_splits);
+  std::vector<int> dummy_match_xy(sl_x.n_splits);
+  find_min_transfer(sl_x, sl_y, n_tip, n_bins, min_dist_xy, dummy_match_xy);
+
+  std::vector<int> min_dist_yx(sl_y.n_splits);
+  std::vector<int> dummy_match_yx(sl_y.n_splits);
+  find_min_transfer(sl_y, sl_x, n_tip, n_bins, min_dist_yx, dummy_match_yx);
+
+  return accumulate_transfer(sl_x, min_dist_xy, n_tip, n_bins, scale) +
+         accumulate_transfer(sl_y, min_dist_yx, n_tip, n_bins, scale);
+}
+
+
+//' All-pairs transfer dissimilarity (OpenMP)
+//'
+//' @param splits_list List of raw matrices (one per tree).
+//' @param n_tip Number of tips.
+//' @param scale Logical: use scaled transfer dissimilarity?
+//' @param n_threads Number of OpenMP threads.
+//'
+//' @return Numeric vector of length choose(N,2) in dist order.
+//' @keywords internal
+// [[Rcpp::export]]
+NumericVector cpp_transfer_dist_all_pairs(
+    const List& splits_list,
+    int n_tip, bool scale, int n_threads
+) {
+  const int N = splits_list.size();
+  const int n_pairs = N * (N - 1) / 2;
+
+  // Pre-convert all trees to SplitLists
+  std::vector<std::unique_ptr<SplitList>> sls(N);
+  int n_bins = 0;
+  for (int i = 0; i < N; ++i) {
+    sls[i] = std::make_unique<SplitList>(Rcpp::as<RawMatrix>(splits_list[i]));
+    if (i == 0) n_bins = sls[0]->n_bins;
+  }
+
+  NumericVector result(n_pairs);
+
+  // R dist format: column-major lower triangle
+  // pos k corresponds to (row, col) where row > col, iterating col first:
+  // (1,0), (2,0), ..., (N-1,0), (2,1), (3,1), ..., (N-1,1), ...
+  // k = col * (N - 1) - col * (col - 1) / 2 + (row - col - 1)  (0-based)
+  #ifdef _OPENMP
+  #pragma omp parallel for schedule(dynamic) num_threads(n_threads)
+  #endif
+  for (int k = 0; k < n_pairs; ++k) {
+    // Invert k to (col, row) in R dist column-major order
+    // col = largest c such that c*(2N-c-1)/2 <= k
+    int col = 0;
+    int remaining = k;
+    while (remaining >= N - 1 - col) {
+      remaining -= (N - 1 - col);
+      ++col;
+    }
+    int row = col + 1 + remaining;
+
+    result[k] = transfer_dissimilarity_score(
+        *sls[col], *sls[row], n_tip, n_bins, scale);
+  }
+
+  return result;
+}
+
+
+//' Cross-pairs transfer dissimilarity (OpenMP)
+//'
+//' @param splits_a,splits_b Lists of raw matrices.
+//' @param n_tip Number of tips.
+//' @param scale Logical: use scaled transfer dissimilarity?
+//' @param n_threads Number of OpenMP threads.
+//'
+//' @return Numeric matrix of dimension nA x nB.
+//' @keywords internal
+// [[Rcpp::export]]
+NumericMatrix cpp_transfer_dist_cross_pairs(
+    const List& splits_a, const List& splits_b,
+    int n_tip, bool scale, int n_threads
+) {
+  const int nA = splits_a.size();
+  const int nB = splits_b.size();
+
+  std::vector<std::unique_ptr<SplitList>> sls_a(nA);
+  std::vector<std::unique_ptr<SplitList>> sls_b(nB);
+  int n_bins = 0;
+
+  for (int i = 0; i < nA; ++i) {
+    sls_a[i] = std::make_unique<SplitList>(Rcpp::as<RawMatrix>(splits_a[i]));
+    if (i == 0) n_bins = sls_a[0]->n_bins;
+  }
+  for (int j = 0; j < nB; ++j) {
+    sls_b[j] = std::make_unique<SplitList>(Rcpp::as<RawMatrix>(splits_b[j]));
+  }
+
+  NumericMatrix result(nA, nB);
+  const int total = nA * nB;
+
+  #ifdef _OPENMP
+  #pragma omp parallel for schedule(dynamic) num_threads(n_threads)
+  #endif
+  for (int k = 0; k < total; ++k) {
+    int i = k / nB;
+    int j = k % nB;
+    result(i, j) = transfer_dissimilarity_score(
+        *sls_a[i], *sls_b[j], n_tip, n_bins, scale);
+  }
+
+  return result;
+}
diff --git a/tests/testthat/test-tree_distance_transfer.R b/tests/testthat/test-tree_distance_transfer.R
new file mode 100644
index 000000000..649192c11
--- /dev/null
+++ b/tests/testthat/test-tree_distance_transfer.R
@@ -0,0 +1,174 @@
+test_that("TransferDist() returns 0 for identical trees", {
+  tr <- TreeTools::BalancedTree(8)
+  expect_equal(0, TransferDist(tr, tr))
+  expect_equal(0, TransferDist(tr, tr, scale = FALSE))
+})
+
+
+test_that("TransferDist() is symmetric", {
+  bal8 <- TreeTools::BalancedTree(8)
+  pec8 <- TreeTools::PectinateTree(8)
+  expect_equal(TransferDist(bal8, pec8), TransferDist(pec8, bal8))
+  expect_equal(TransferDist(bal8, pec8, scale = FALSE),
+               TransferDist(pec8, bal8, scale = FALSE))
+})
+
+
+test_that("TransferDist() scaled is bounded by RF", {
+  # The scaled transfer dissimilarity is bounded above by the RF distance
+  bal8 <- TreeTools::BalancedTree(8)
+  pec8 <- TreeTools::PectinateTree(8)
+  td_scaled <- TransferDist(bal8, pec8)
+  rf <- RobinsonFoulds(bal8, pec8)
+  expect_lte(td_scaled, rf)
+})
+
+
+test_that("TransferDist() normalization", {
+  bal8 <- TreeTools::BalancedTree(8)
+  pec8 <- TreeTools::PectinateTree(8)
+  
+  # Scaled normalization denominator = 2 * (n - 3) = 10
+  td_raw <- TransferDist(bal8, pec8)
+  td_norm <- TransferDist(bal8, pec8, normalize = TRUE)
+  expect_equal(td_norm, td_raw / (2 * (8 - 3)))
+  expect_true(td_norm >= 0)
+  expect_true(td_norm <= 1)
+  
+  # Unscaled normalization
+  td_raw_u <- TransferDist(bal8, pec8, scale = FALSE)
+  td_norm_u <- TransferDist(bal8, pec8, scale = FALSE, normalize = TRUE)
+  # Even n: denom = (n^2 - 2n + 4) / 4 = (64 - 16 + 4) / 4 = 13
+  expect_equal(td_norm_u, td_raw_u / 13)
+})
+
+
+test_that("TransferDist() all-pairs", {
+  trees <- as.phylo(0:5, 8)
+  
+  # All-pairs
+  d <- TransferDist(trees)
+  expect_s3_class(d, "dist")
+  expect_equal(attr(d, "Size"), 6L)
+  
+  # Check diagonal = 0 implicitly (dist objects don't store diagonal)
+  # Check that batch matches pairwise
+  dm <- as.matrix(d)
+  for (i in 1:5) {
+    for (j in (i + 1):6) {
+      expected <- TransferDist(trees[[i]], trees[[j]])
+      expect_equal(dm[i, j], expected, tolerance = 1e-12,
+                   info = paste("pair", i, j))
+    }
+  }
+})
+
+
+test_that("TransferDist() cross-pairs", {
+  trees1 <- as.phylo(0:2, 8)
+  trees2 <- as.phylo(3:5, 8)
+  
+  mat <- TransferDist(trees1, trees2)
+  expect_equal(dim(mat), c(3, 3))
+  
+  # Check matches pairwise
+  for (i in 1:3) {
+    for (j in 1:3) {
+      expected <- TransferDist(trees1[[i]], trees2[[j]])
+      expect_equal(mat[i, j], expected, tolerance = 1e-12,
+                   info = paste("cross", i, j))
+    }
+  }
+})
+
+
+test_that("TransferDist() single tree vs list", {
+  tr <- TreeTools::BalancedTree(8)
+  trees <- as.phylo(0:3, 8)
+  
+  d1 <- TransferDist(tr, trees)
+  d2 <- TransferDist(trees, tr)
+  
+  expect_length(d1, 4)
+  expect_length(d2, 4)
+  
+  for (i in seq_along(trees)) {
+    expected <- TransferDist(tr, trees[[i]])
+    expect_equal(d1[i], expected, tolerance = 1e-12)
+    expect_equal(d2[i], expected, tolerance = 1e-12)
+  }
+})
+
+
+test_that("TransferDist() scaled vs unscaled relationship", {
+  bal8 <- TreeTools::BalancedTree(8)
+  pec8 <- TreeTools::PectinateTree(8)
+  
+  # Both should be non-negative
+  td_s <- TransferDist(bal8, pec8, scale = TRUE)
+  td_u <- TransferDist(bal8, pec8, scale = FALSE)
+  expect_true(td_s >= 0)
+  expect_true(td_u >= 0)
+  
+  # For identical trees, both = 0
+  expect_equal(0, TransferDist(bal8, bal8, scale = TRUE))
+  expect_equal(0, TransferDist(bal8, bal8, scale = FALSE))
+})
+
+
+test_that("TransferDist() reportMatching", {
+  bal8 <- TreeTools::BalancedTree(8)
+  pec8 <- TreeTools::PectinateTree(8)
+  
+  res <- TransferDist(bal8, pec8, reportMatching = TRUE)
+  expect_equal(res[[1]], TransferDist(bal8, pec8))
+  
+  matching <- attr(res, "matching")
+  expect_true(!is.null(matching))
+  
+  pairScores <- attr(res, "pairScores")
+  expect_true(!is.null(pairScores))
+  expect_equal(nrow(pairScores), TreeTools::NSplits(bal8))
+  expect_equal(ncol(pairScores), TreeTools::NSplits(pec8))
+})
+
+
+test_that("TransferDist() handles star trees", {
+  star <- TreeTools::StarTree(8)
+  bal8 <- TreeTools::BalancedTree(8)
+  
+  # Star tree has no internal splits; dissimilarity = sum over binary tree only
+  td <- TransferDist(bal8, star)
+  expect_true(td >= 0)
+  
+  # Star vs star = 0
+  expect_equal(0, TransferDist(star, star))
+})
+
+
+test_that("TransferDist() handles small trees", {
+  # 4 tips: minimal non-trivial case
+  tr1 <- TreeTools::BalancedTree(4)
+  tr2 <- TreeTools::PectinateTree(4)
+  
+  td <- TransferDist(tr1, tr2)
+  expect_true(is.finite(td))
+  expect_true(td >= 0)
+})
+
+
+test_that("TransferDist() consistent with TransferConsensus internals", {
+  skip_if_not_installed("TreeTools")
+  
+  # The transfer dissimilarity for a single pair should be consistent with
+  # how TransferConsensus computes distances internally
+  set.seed(6419)
+  trees <- structure(lapply(1:10, function(i) ape::rtree(12)),
+                     class = "multiPhylo")
+  
+  # All-pairs: check symmetry and non-negativity
+  d <- TransferDist(trees)
+  dm <- as.matrix(d)
+  expect_true(all(dm >= -1e-12))
+  expect_true(isSymmetric(dm, tol = 1e-12))
+})

From 025255ea459dbc3c66a229d0b1078144e8643904 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 09:41:08 +0000
Subject: [PATCH 04/17] Multithreadable

---
 R/RcppExports.R            |  8 ++---
 R/transfer_consensus.R     |  5 ++-
 src/RcppExports.cpp        | 18 +++++-----
 src/transfer_consensus.cpp | 69 +++++++++++++++++++++++++-------------
 4 files changed, 63 insertions(+), 37 deletions(-)

diff --git a/R/RcppExports.R b/R/RcppExports.R
index f3b5fae83..82b2838b3 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -212,13 +212,13 @@ spr_table_7 <- function(sp1, sp2) {
 #'   are included in the consensus, plus attributes "raw_splits" (a raw matrix
 #'   of all unique splits) and "light_side" (integer vector).
 #' @keywords internal
-cpp_transfer_consensus <- function(splits_list, n_tip, scale, greedy_best_flag, init_majority) {
-    .Call(`_TreeDist_cpp_transfer_consensus`, splits_list, n_tip, scale, greedy_best_flag, init_majority)
+cpp_transfer_consensus <- function(splits_list, n_tip, scale, greedy_best_flag, init_majority, n_threads = 1L) {
+    .Call(`_TreeDist_cpp_transfer_consensus`, splits_list, n_tip, scale, greedy_best_flag, init_majority, n_threads)
 }
 
 #' @keywords internal
-cpp_tc_profile <- function(splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter) {
-    .Call(`_TreeDist_cpp_tc_profile`, splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter)
+cpp_tc_profile <- function(splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter, n_threads = 1L) {
+    .Call(`_TreeDist_cpp_tc_profile`, splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter, n_threads)
 }
 
 #' Per-pair transfer dissimilarity
diff --git a/R/transfer_consensus.R b/R/transfer_consensus.R
index f1232a7a8..34076f033 100644
--- a/R/transfer_consensus.R
+++ b/R/transfer_consensus.R
@@ -71,10 +71,13 @@ TransferConsensus <- function(trees,
   })
 
   # Delegate all work to C++
+  nThreads <- max(1L, getOption("TreeDist.threads",
+                                 getOption("mc.cores", 1L)))
   res <- cpp_transfer_consensus(
     splitsList, nTip, scale,
     greedy_best_flag = (greedy == "best"),
-    init_majority = (init == "majority")
+    init_majority = (init == "majority"),
+    n_threads = nThreads
   )
 
   included <- res$included
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index a8e03bc8f..b3d661be0 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -523,8 +523,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // cpp_transfer_consensus
-List cpp_transfer_consensus(const List& splits_list, const int n_tip, const bool scale, const bool greedy_best_flag, const bool init_majority);
-RcppExport SEXP _TreeDist_cpp_transfer_consensus(SEXP splits_listSEXP, SEXP n_tipSEXP, SEXP scaleSEXP, SEXP greedy_best_flagSEXP, SEXP init_majoritySEXP) {
+List cpp_transfer_consensus(const List& splits_list, const int n_tip, const bool scale, const bool greedy_best_flag, const bool init_majority, const int n_threads);
+RcppExport SEXP _TreeDist_cpp_transfer_consensus(SEXP splits_listSEXP, SEXP n_tipSEXP, SEXP scaleSEXP, SEXP greedy_best_flagSEXP, SEXP init_majoritySEXP, SEXP n_threadsSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -533,13 +533,14 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const bool >::type scale(scaleSEXP);
     Rcpp::traits::input_parameter< const bool >::type greedy_best_flag(greedy_best_flagSEXP);
     Rcpp::traits::input_parameter< const bool >::type init_majority(init_majoritySEXP);
-    rcpp_result_gen = Rcpp::wrap(cpp_transfer_consensus(splits_list, n_tip, scale, greedy_best_flag, init_majority));
+    Rcpp::traits::input_parameter< const int >::type n_threads(n_threadsSEXP);
+    rcpp_result_gen = Rcpp::wrap(cpp_transfer_consensus(splits_list, n_tip, scale, greedy_best_flag, init_majority, n_threads));
     return rcpp_result_gen;
 END_RCPP
 }
 // cpp_tc_profile
-Rcpp::NumericVector cpp_tc_profile(const List& splits_list, const int n_tip, const bool scale, const bool greedy_best_flag, const bool init_majority, const int n_iter);
-RcppExport SEXP _TreeDist_cpp_tc_profile(SEXP splits_listSEXP, SEXP n_tipSEXP, SEXP scaleSEXP, SEXP greedy_best_flagSEXP, SEXP init_majoritySEXP, SEXP n_iterSEXP) {
+Rcpp::NumericVector cpp_tc_profile(const List& splits_list, const int n_tip, const bool scale, const bool greedy_best_flag, const bool init_majority, const int n_iter, const int n_threads);
+RcppExport SEXP _TreeDist_cpp_tc_profile(SEXP splits_listSEXP, SEXP n_tipSEXP, SEXP scaleSEXP, SEXP greedy_best_flagSEXP, SEXP init_majoritySEXP, SEXP n_iterSEXP, SEXP n_threadsSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -549,7 +550,8 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const bool >::type greedy_best_flag(greedy_best_flagSEXP);
     Rcpp::traits::input_parameter< const bool >::type init_majority(init_majoritySEXP);
     Rcpp::traits::input_parameter< const int >::type n_iter(n_iterSEXP);
-    rcpp_result_gen = Rcpp::wrap(cpp_tc_profile(splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter));
+    Rcpp::traits::input_parameter< const int >::type n_threads(n_threadsSEXP);
+    rcpp_result_gen = Rcpp::wrap(cpp_tc_profile(splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter, n_threads));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -745,8 +747,8 @@ static const R_CallMethodDef CallEntries[] = {
     {"_TreeDist_keep_and_reroot", (DL_FUNC) &_TreeDist_keep_and_reroot, 3},
     {"_TreeDist_keep_and_reduce", (DL_FUNC) &_TreeDist_keep_and_reduce, 3},
     {"_TreeDist_spr_table_7", (DL_FUNC) &_TreeDist_spr_table_7, 2},
-    {"_TreeDist_cpp_transfer_consensus", (DL_FUNC) &_TreeDist_cpp_transfer_consensus, 5},
-    {"_TreeDist_cpp_tc_profile", (DL_FUNC) &_TreeDist_cpp_tc_profile, 6},
+    {"_TreeDist_cpp_transfer_consensus", (DL_FUNC) &_TreeDist_cpp_transfer_consensus, 6},
+    {"_TreeDist_cpp_tc_profile", (DL_FUNC) &_TreeDist_cpp_tc_profile, 7},
     {"_TreeDist_cpp_transfer_dist", (DL_FUNC) &_TreeDist_cpp_transfer_dist, 3},
     {"_TreeDist_cpp_transfer_dist_scored", (DL_FUNC) &_TreeDist_cpp_transfer_dist_scored, 4},
     {"_TreeDist_cpp_transfer_dist_all_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_all_pairs, 4},
diff --git a/src/transfer_consensus.cpp b/src/transfer_consensus.cpp
index dc7b2868d..332b1df0c 100644
--- a/src/transfer_consensus.cpp
+++ b/src/transfer_consensus.cpp
@@ -7,6 +7,10 @@
  * Exported function: cpp_transfer_consensus()
  */
 
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
 #include <TreeTools/SplitList.h>
 #include <Rcpp/Lightest>
 #include <algorithm>
@@ -208,13 +212,17 @@ static PooledSplits pool_splits(const List& splits_list, int n_tips) {
 
 // Returns a flat n_splits x n_splits matrix (row-major).
 // DIST[i * n + j] = transfer distance between split i and split j.
-static std::vector<int> transfer_dist_mat(const PooledSplits& pool) {
+static std::vector<int> transfer_dist_mat(const PooledSplits& pool,
+                                          int n_threads) {
   const int M = pool.n_splits;
   const int nb = pool.n_bins;
   const int nt = pool.n_tips;
 
   std::vector<int> dist(M * M, 0);
 
+  #ifdef _OPENMP
+  #pragma omp parallel for schedule(dynamic) num_threads(n_threads)
+  #endif
   for (int i = 0; i < M; ++i) {
     const splitbit* a = pool.split(i);
     for (int j = i + 1; j < M; ++j) {
@@ -239,33 +247,40 @@ static std::vector<int> transfer_dist_mat(const PooledSplits& pool) {
 static std::vector<double> compute_td(
     const std::vector<int>& dist,
     const PooledSplits& pool,
-    bool scale
+    bool scale,
+    int n_threads
 ) {
   const int M = pool.n_splits;
   const int n_tree = static_cast<int>(pool.tree_members.size());
 
   std::vector<double> td(M, 0.0);
 
-  for (int t = 0; t < n_tree; ++t) {
-    const auto& members = pool.tree_members[t];
-    const int n_mem = static_cast<int>(members.size());
+  // Parallelise over splits (each td[b] is independent)
+  #ifdef _OPENMP
+  #pragma omp parallel for schedule(static) num_threads(n_threads)
+  #endif
+  for (int b = 0; b < M; ++b) {
+    const int p_minus_1 = pool.light_side[b] - 1;
+    if (p_minus_1 <= 0) continue;
 
-    for (int b = 0; b < M; ++b) {
-      // Min distance from split b to any split in tree t
-      int min_d = pool.light_side[b] - 1; // sentinel distance
+    double acc = 0.0;
+    for (int t = 0; t < n_tree; ++t) {
+      const auto& members = pool.tree_members[t];
+      const int n_mem = static_cast<int>(members.size());
+
+      int min_d = p_minus_1; // sentinel distance
       for (int k = 0; k < n_mem; ++k) {
         int d = dist[b * M + members[k]];
         if (d < min_d) min_d = d;
       }
-      const int p_minus_1 = pool.light_side[b] - 1;
-      if (p_minus_1 <= 0) continue;
       if (scale) {
         double contrib = static_cast<double>(min_d) / p_minus_1;
-        td[b] += (contrib < 1.0) ? contrib : 1.0;
+        acc += (contrib < 1.0) ? contrib : 1.0;
       } else {
-        td[b] += (min_d < p_minus_1) ? min_d : p_minus_1;
+        acc += (min_d < p_minus_1) ? min_d : p_minus_1;
       }
     }
+    td[b] = acc;
   }
   return td;
 }
@@ -277,7 +292,8 @@ static std::vector<double> compute_td(
 
 // Returns flat M x M bool matrix.
 // compat[i * M + j] = true iff splits i and j are compatible.
-static std::vector<uint8_t> compat_mat(const PooledSplits& pool) {
+static std::vector<uint8_t> compat_mat(const PooledSplits& pool,
+                                       int n_threads) {
   const int M = pool.n_splits;
   const int nb = pool.n_bins;
   const int last_bin = nb - 1;
@@ -285,6 +301,9 @@ static std::vector<uint8_t> compat_mat(const PooledSplits& pool) {
 
   std::vector<uint8_t> compat(M * M, 1);
 
+  #ifdef _OPENMP
+  #pragma omp parallel for schedule(dynamic) num_threads(n_threads)
+  #endif
   for (int i = 0; i < M; ++i) {
     const splitbit* a = pool.split(i);
     for (int j = i + 1; j < M; ++j) {
@@ -300,9 +319,9 @@ static std::vector<uint8_t> compat_mat(const PooledSplits& pool) {
         if (!nanb) nanb = (~a_bin & ~b_bin & mask) != 0;
         if (ab && anb && nab && nanb) break;
       }
-      bool comp = !ab || !anb || !nab || !nanb;
-      compat[i * M + j] = comp ? 1 : 0;
-      compat[j * M + i] = comp ? 1 : 0;
+      uint8_t comp = (!ab || !anb || !nab || !nanb) ? 1 : 0;
+      compat[i * M + j] = comp;
+      compat[j * M + i] = comp;
     }
   }
   return compat;
@@ -698,7 +717,8 @@ List cpp_transfer_consensus(
     const int n_tip,
     const bool scale,
     const bool greedy_best_flag,
-    const bool init_majority
+    const bool init_majority,
+    const int n_threads = 1
 ) {
   const int n_tree = splits_list.size();
 
@@ -715,13 +735,13 @@ List cpp_transfer_consensus(
   }
 
   // ---- Pairwise transfer distance matrix ----
-  std::vector<int> dist = transfer_dist_mat(pool);
+  std::vector<int> dist = transfer_dist_mat(pool, n_threads);
 
   // ---- Transfer dissimilarity cost ----
-  std::vector<double> td = compute_td(dist, pool, scale);
+  std::vector<double> td = compute_td(dist, pool, scale, n_threads);
 
   // ---- Compatibility matrix ----
-  std::vector<uint8_t> compat = compat_mat(pool);
+  std::vector<uint8_t> compat = compat_mat(pool, n_threads);
 
   // ---- Sort order (by count descending, then index ascending for ties) ----
   // Must match R's order(-counts, seq_along(counts)) for reproducibility.
@@ -796,7 +816,8 @@ Rcpp::NumericVector cpp_tc_profile(
     const bool scale,
     const bool greedy_best_flag,
     const bool init_majority,
-    const int n_iter
+    const int n_iter,
+    const int n_threads = 1
 ) {
   using clk = std::chrono::high_resolution_clock;
   const int n_tree = splits_list.size();
@@ -809,13 +830,13 @@ Rcpp::NumericVector cpp_tc_profile(
     auto t1 = clk::now();
     const int M = pool.n_splits;
 
-    std::vector<int> dist = transfer_dist_mat(pool);
+    std::vector<int> dist = transfer_dist_mat(pool, n_threads);
     auto t2 = clk::now();
 
-    std::vector<double> td = compute_td(dist, pool, scale);
+    std::vector<double> td = compute_td(dist, pool, scale, n_threads);
     auto t3 = clk::now();
 
-    std::vector<uint8_t> compat_v = compat_mat(pool);
+    std::vector<uint8_t> compat_v = compat_mat(pool, n_threads);
     auto t4 = clk::now();
 
     std::vector<int> sort_ord(M);

From 2caf908dee87069d568ff163f2f942ece64a658e Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 09:50:40 +0000
Subject: [PATCH 05/17] Spelling

---
 AGENTS.md                     | 14 ++++++++++++++
 NEWS.md                       |  2 +-
 R/RcppExports.R               |  8 ++++----
 R/transfer_consensus.R        |  2 +-
 inst/WORDLIST                 |  3 +++
 man/cpp_transfer_consensus.Rd |  3 ++-
 src/transfer_consensus.cpp    |  2 +-
 src/transfer_distance.cpp     |  6 +++---
 8 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 6208160a7..237aa42bf 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -257,6 +257,20 @@ source("benchmark/bench-tree-distances.R")
 C++ compilation flags are controlled by `src/Makevars.win` (Windows) / `src/Makevars`.
 The package requires **C++17** (`CXX_STD = CXX17`).
 
+### Documentation and spelling checks
+
+After any work that adds or modifies roxygen comments, Rd files, NEWS.md, or
+vignettes, run:
+
+```r
+devtools::check_man()                # regenerates Rd files and checks for issues
+spelling::spell_check_package()      # flags potential misspellings
+```
+
+Legitimate technical terms, proper nouns, and code identifiers flagged by the
+spell checker should be added to `inst/WORDLIST` (one word per line,
+alphabetically sorted).  Only fix actual typos in the source.
+
 ### Code coverage
 
 Check that existing tests cover all new code.  The GHA test suite uses codecov.
diff --git a/NEWS.md b/NEWS.md
index 9bdf1aa93..d1b783d23 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -62,7 +62,7 @@ Typical speedups over v2.12.0 for tree sets where most splits are shared
 - Cross-pairs computations (`tree1` vs `tree2` where both are lists) now
   use the same optimized batch path as all-pairs computations.
 
-### KendallColijn distance
+### Kendall & Colijn distance
 
 - `KCVector()` reimplemented in C++, giving ~220× speedup per tree.
 
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 82b2838b3..a907e27f0 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -208,7 +208,7 @@ spr_table_7 <- function(sp1, sp2) {
 #' @param greedy_best_flag Logical: TRUE for "best", FALSE for "first".
 #' @param init_majority Logical: TRUE to start from majority-rule splits.
 #'
-#' @return A LogicalVector of length n_splits indicating which pooled splits
+#' @return A `LogicalVector` of length n_splits indicating which pooled splits
 #'   are included in the consensus, plus attributes "raw_splits" (a raw matrix
 #'   of all unique splits) and "light_side" (integer vector).
 #' @keywords internal
@@ -229,8 +229,8 @@ cpp_tc_profile <- function(splits_list, n_tip, scale, greedy_best_flag, init_maj
 #' @return A list with components:
 #'   - score_scaled: scaled transfer dissimilarity (double)
 #'   - score_unscaled: unscaled transfer dissimilarity (double)
-#'   - matching_xy: integer vector, best match in y for each split in x (1-based, NA if sentinel)
-#'   - matching_yx: integer vector, best match in x for each split in y (1-based, NA if sentinel)
+#'   - `matching_xy`: integer vector, best match in y for each split in x (1-based, NA if sentinel)
+#'   - `matching_yx`: integer vector, best match in x for each split in y (1-based, NA if sentinel)
 #' @keywords internal
 cpp_transfer_dist <- function(x, y, nTip) {
     .Call(`_TreeDist_cpp_transfer_dist`, x, y, nTip)
@@ -261,7 +261,7 @@ cpp_transfer_dist_all_pairs <- function(splits_list, n_tip, scale, n_threads) {
 #' @param scale Logical: use scaled transfer dissimilarity?
 #' @param n_threads Number of OpenMP threads.
 #'
-#' @return Numeric matrix of dimension nA x nB.
+#' @return Numeric matrix of dimension `nA` x `nB`.
 #' @keywords internal
 cpp_transfer_dist_cross_pairs <- function(splits_a, splits_b, n_tip, scale, n_threads) {
     .Call(`_TreeDist_cpp_transfer_dist_cross_pairs`, splits_a, splits_b, n_tip, scale, n_threads)
diff --git a/R/transfer_consensus.R b/R/transfer_consensus.R
index 34076f033..384cff0d2 100644
--- a/R/transfer_consensus.R
+++ b/R/transfer_consensus.R
@@ -13,7 +13,7 @@
 #' pairwise transfer distances between them, and greedily adds or removes
 #' splits to minimize total transfer dissimilarity cost.  The approach
 #' follows \insertCite{Takazawa2026;textual}{TreeDist}, reimplemented for
-#' TreeDist's infrastructure.
+#' 'TreeDist' infrastructure.
 #'
 #' @param trees An object of class `multiPhylo`: the input trees.
 #'   All trees must share the same tip labels.
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 7803ed403..29a67fb44 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -1,6 +1,7 @@
 ABCD
 Bocker
 Bogdanowicz
+branchless
 Böcker
 Cai
 Colijn
@@ -116,6 +117,8 @@ roy
 scic
 sensu
 shinyjs
+speedup
+speedups
 splitwise
 syab
 sysbio
diff --git a/man/cpp_transfer_consensus.Rd b/man/cpp_transfer_consensus.Rd
index 59a4e0a91..99cca0a86 100644
--- a/man/cpp_transfer_consensus.Rd
+++ b/man/cpp_transfer_consensus.Rd
@@ -9,7 +9,8 @@ cpp_transfer_consensus(
   n_tip,
   scale,
   greedy_best_flag,
-  init_majority
+  init_majority,
+  n_threads = 1L
 )
 }
 \arguments{
diff --git a/src/transfer_consensus.cpp b/src/transfer_consensus.cpp
index 332b1df0c..299b54a71 100644
--- a/src/transfer_consensus.cpp
+++ b/src/transfer_consensus.cpp
@@ -707,7 +707,7 @@ static void init_matches(
 //' @param greedy_best_flag Logical: TRUE for "best", FALSE for "first".
 //' @param init_majority Logical: TRUE to start from majority-rule splits.
 //'
-//' @return A LogicalVector of length n_splits indicating which pooled splits
+//' @return A `LogicalVector` of length n_splits indicating which pooled splits
 //'   are included in the consensus, plus attributes "raw_splits" (a raw matrix
 //'   of all unique splits) and "light_side" (integer vector).
 //' @keywords internal
diff --git a/src/transfer_distance.cpp b/src/transfer_distance.cpp
index fb026b426..c2cf8f04a 100644
--- a/src/transfer_distance.cpp
+++ b/src/transfer_distance.cpp
@@ -137,8 +137,8 @@ static double accumulate_transfer(
 //' @return A list with components:
 //'   - score_scaled: scaled transfer dissimilarity (double)
 //'   - score_unscaled: unscaled transfer dissimilarity (double)
-//'   - matching_xy: integer vector, best match in y for each split in x (1-based, NA if sentinel)
-//'   - matching_yx: integer vector, best match in x for each split in y (1-based, NA if sentinel)
+//'   - `matching_xy`: integer vector, best match in y for each split in x (1-based, NA if sentinel)
+//'   - `matching_yx`: integer vector, best match in x for each split in y (1-based, NA if sentinel)
 //' @keywords internal
 // [[Rcpp::export]]
 List cpp_transfer_dist(
@@ -324,7 +324,7 @@ NumericVector cpp_transfer_dist_all_pairs(
 //' @param scale Logical: use scaled transfer dissimilarity?
 //' @param n_threads Number of OpenMP threads.
 //'
-//' @return Numeric matrix of dimension nA x nB.
+//' @return Numeric matrix of dimension `nA` x `nB`.
 //' @keywords internal
 // [[Rcpp::export]]
 NumericMatrix cpp_transfer_dist_cross_pairs(

From 0ec20064319ea1c3106634ba8cec5b6f1adbfb4b Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 09:50:52 +0000
Subject: [PATCH 06/17] redoc

---
 man/TransferConsensus.Rd             | 2 +-
 man/cpp_transfer_consensus.Rd        | 2 +-
 man/cpp_transfer_dist.Rd             | 4 ++--
 man/cpp_transfer_dist_cross_pairs.Rd | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/man/TransferConsensus.Rd b/man/TransferConsensus.Rd
index 615e27941..afd610b75 100644
--- a/man/TransferConsensus.Rd
+++ b/man/TransferConsensus.Rd
@@ -46,7 +46,7 @@ The algorithm pools all splits observed across input trees, computes
 pairwise transfer distances between them, and greedily adds or removes
 splits to minimize total transfer dissimilarity cost.  The approach
 follows \insertCite{Takazawa2026;textual}{TreeDist}, reimplemented for
-TreeDist's infrastructure.
+'TreeDist' infrastructure.
 }
 \examples{
 library(TreeTools)
diff --git a/man/cpp_transfer_consensus.Rd b/man/cpp_transfer_consensus.Rd
index 99cca0a86..a8ebc915a 100644
--- a/man/cpp_transfer_consensus.Rd
+++ b/man/cpp_transfer_consensus.Rd
@@ -25,7 +25,7 @@ cpp_transfer_consensus(
 \item{init_majority}{Logical: TRUE to start from majority-rule splits.}
 }
 \value{
-A LogicalVector of length n_splits indicating which pooled splits
+A \code{LogicalVector} of length n_splits indicating which pooled splits
 are included in the consensus, plus attributes "raw_splits" (a raw matrix
 of all unique splits) and "light_side" (integer vector).
 }
diff --git a/man/cpp_transfer_dist.Rd b/man/cpp_transfer_dist.Rd
index 2ec68871c..204f1a012 100644
--- a/man/cpp_transfer_dist.Rd
+++ b/man/cpp_transfer_dist.Rd
@@ -16,8 +16,8 @@ A list with components:
 \itemize{
 \item score_scaled: scaled transfer dissimilarity (double)
 \item score_unscaled: unscaled transfer dissimilarity (double)
-\item matching_xy: integer vector, best match in y for each split in x (1-based, NA if sentinel)
-\item matching_yx: integer vector, best match in x for each split in y (1-based, NA if sentinel)
+\item \code{matching_xy}: integer vector, best match in y for each split in x (1-based, NA if sentinel)
+\item \code{matching_yx}: integer vector, best match in x for each split in y (1-based, NA if sentinel)
 }
 }
 \description{
diff --git a/man/cpp_transfer_dist_cross_pairs.Rd b/man/cpp_transfer_dist_cross_pairs.Rd
index 0758544b8..60c551061 100644
--- a/man/cpp_transfer_dist_cross_pairs.Rd
+++ b/man/cpp_transfer_dist_cross_pairs.Rd
@@ -16,7 +16,7 @@ cpp_transfer_dist_cross_pairs(splits_a, splits_b, n_tip, scale, n_threads)
 \item{n_threads}{Number of OpenMP threads.}
 }
 \value{
-Numeric matrix of dimension nA x nB.
+Numeric matrix of dimension \code{nA} x \code{nB}.
 }
 \description{
 Cross-pairs transfer dissimilarity (OpenMP)

From e3a05aa98c92f605d661da8b50bddf37c740f701 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 12:07:57 +0000
Subject: [PATCH 07/17] test cov

---
 tests/testthat/test-transfer_consensus.R     | 573 ++++++++++++++++++-
 tests/testthat/test-tree_distance_transfer.R | 358 +++++++++++-
 2 files changed, 908 insertions(+), 23 deletions(-)

diff --git a/tests/testthat/test-transfer_consensus.R b/tests/testthat/test-transfer_consensus.R
index 9e3d8e353..f1605a415 100644
--- a/tests/testthat/test-transfer_consensus.R
+++ b/tests/testthat/test-transfer_consensus.R
@@ -12,7 +12,7 @@ test_that("Identical trees return fully resolved consensus", {
 test_that("Star tree returned when no signal", {
   set.seed(6129)
   # Fully random trees with many tips — negligible split overlap
-  trees <- structure(lapply(1:30, function(i) rtree(50)),
+  trees <- structure(lapply(1:30, function(i) TreeTools::RandomTree(50, root = TRUE)),
                      class = "multiPhylo")
   tc <- TransferConsensus(trees, greedy = "first")
 
@@ -67,9 +67,10 @@ test_that("init = 'majority' works", {
 })
 
 test_that("Error on bad input", {
-  expect_error(TransferConsensus(list(rtree(5))), "multiPhylo")
-  expect_error(TransferConsensus(structure(list(rtree(5)), class = "multiPhylo")),
-               "at least 2")
+  expect_error(TransferConsensus(list(TreeTools::BalancedTree(5))), "multiPhylo")
+  expect_error(TransferConsensus(
+    structure(list(TreeTools::BalancedTree(5)), class = "multiPhylo")),
+    "at least 2")
 })
 
 test_that("Two-tree consensus returns a valid tree", {
@@ -78,3 +79,567 @@ test_that("Two-tree consensus returns a valid tree", {
   expect_s3_class(tc, "phylo")
   expect_equal(length(TipLabels(tc)), 8)
 })
+
+
+# =========================================================================
+# C++ edge cases (transfer_consensus.cpp coverage)
+# =========================================================================
+
+test_that("TransferConsensus returns star tree for all-star input", {
+  star <- StarTree(8)
+  trees <- structure(rep(list(star), 3), class = "multiPhylo")
+  tc <- TransferConsensus(trees)
+  expect_s3_class(tc, "phylo")
+  expect_equal(NSplits(tc), 0)
+})
+
+test_that("TransferConsensus covers all parameter combinations", {
+  trees <- as.phylo(0:9, nTip = 10)
+
+  # greedy="first", init="majority", scale=FALSE
+  tc <- TransferConsensus(trees, greedy = "first", init = "majority",
+                          scale = FALSE)
+  expect_s3_class(tc, "phylo")
+
+  # greedy="best", init="majority", scale=FALSE
+  tc2 <- TransferConsensus(trees, greedy = "best", init = "majority",
+                           scale = FALSE)
+  expect_s3_class(tc2, "phylo")
+
+  # greedy="first", init="empty", scale=FALSE
+  tc3 <- TransferConsensus(trees, greedy = "first", init = "empty",
+                           scale = FALSE)
+  expect_s3_class(tc3, "phylo")
+})
+
+test_that("cpp_tc_profile() runs without error", {
+  trees <- as.phylo(0:4, nTip = 8)
+  tipLabels <- TipLabels(trees[[1]])
+  splitsList <- lapply(trees, function(tr) unclass(as.Splits(tr, tipLabels)))
+
+  # Greedy best, empty init
+  res <- TreeDist:::cpp_tc_profile(splitsList, 8L, TRUE, TRUE, FALSE, 1L, 1L)
+  expect_length(res, 5)
+  expect_true(all(res >= 0))
+  expect_equal(names(res),
+               c("pool_splits", "transfer_dist_mat", "compute_td",
+                 "compat_mat", "greedy"))
+
+  # Greedy first, majority init, unscaled
+  res2 <- TreeDist:::cpp_tc_profile(splitsList, 8L, FALSE, FALSE, TRUE, 1L, 1L)
+  expect_length(res2, 5)
+  expect_true(all(res2 >= 0))
+
+  # Multiple iterations
+  res3 <- TreeDist:::cpp_tc_profile(splitsList, 8L, TRUE, TRUE, FALSE, 3L, 1L)
+  expect_length(res3, 5)
+
+  # Greedy best, majority init
+  res4 <- TreeDist:::cpp_tc_profile(splitsList, 8L, TRUE, TRUE, TRUE, 1L, 1L)
+  expect_length(res4, 5)
+})
+
+
+# =========================================================================
+# R-level internal helper functions (transfer_consensus.R lines 94-551)
+# =========================================================================
+
+test_that(".FlipRaw() flips bits and masks correctly", {
+  # 8 tips (1 byte, all bits used, mask = 0xFF)
+  expect_equal(TreeDist:::.FlipRaw(as.raw(0x07), 8), as.raw(0xf8))
+
+  # 5 tips (1 byte, 5 bits, mask = 0x1F)
+  expect_equal(TreeDist:::.FlipRaw(as.raw(0x07), 5), as.raw(0x18))
+
+  # Multi-byte: 10 tips (2 bytes, last byte 2 bits, mask = 0x03)
+  result <- TreeDist:::.FlipRaw(as.raw(c(0x00, 0x01)), 10)
+  expect_equal(result, as.raw(c(0xff, 0x02)))
+})
+
+test_that(".TransferDistMat() computes pairwise transfer distances", {
+  splitMat <- matrix(c(
+    TRUE, TRUE, FALSE, FALSE, FALSE, FALSE,
+    TRUE, TRUE, TRUE, FALSE, FALSE, FALSE,
+    FALSE, FALSE, FALSE, TRUE, TRUE, TRUE
+  ), nrow = 3, byrow = TRUE)
+
+  DIST <- TreeDist:::.TransferDistMat(splitMat, 6)
+  expect_equal(dim(DIST), c(3, 3))
+  expect_equal(diag(DIST), c(0, 0, 0))
+  expect_true(isSymmetric(DIST))
+  # {1,2} vs {1,2,3}: hamming = 1
+  expect_equal(DIST[1, 2], 1)
+  # {1,2,3} vs {4,5,6}: complements → transfer = 0
+  expect_equal(DIST[2, 3], 0)
+
+  # Single split
+  DIST1 <- TreeDist:::.TransferDistMat(
+    matrix(c(TRUE, TRUE, FALSE, FALSE), nrow = 1), 4)
+  expect_equal(dim(DIST1), c(1, 1))
+  expect_equal(DIST1[1, 1], 0)
+})
+
+test_that(".CompatMat() checks bipartition compatibility", {
+  splitMat <- matrix(c(
+    TRUE, TRUE, FALSE, FALSE, FALSE, FALSE,
+    FALSE, FALSE, TRUE, TRUE, FALSE, FALSE,
+    TRUE, FALSE, TRUE, FALSE, FALSE, FALSE
+  ), nrow = 3, byrow = TRUE)
+
+  compat <- TreeDist:::.CompatMat(splitMat, 6)
+  expect_equal(dim(compat), c(3, 3))
+  expect_true(compat[1, 2])    # {1,2} and {3,4}: disjoint → compatible
+  expect_false(compat[1, 3])   # {1,2} and {1,3}: all 4 intersections → incompatible
+  expect_true(isSymmetric(compat))
+})
+
+test_that(".ComputeTD() works for both scale modes", {
+  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
+  lightSide <- c(3, 2, 3)
+  sentDist <- lightSide - 1L
+  treeMembers <- list(c(1, 2), c(2, 3))
+
+  td_s <- TreeDist:::.ComputeTD(DIST, sentDist, treeMembers, lightSide, 2, TRUE)
+  td_u <- TreeDist:::.ComputeTD(DIST, sentDist, treeMembers, lightSide, 2, FALSE)
+  expect_length(td_s, 3)
+  expect_length(td_u, 3)
+  expect_true(all(td_s >= 0))
+  expect_true(all(td_u >= 0))
+
+  # Single member per tree
+  td_single <- TreeDist:::.ComputeTD(
+    DIST, sentDist, list(c(1), c(3)), lightSide, 2, TRUE)
+  expect_length(td_single, 3)
+})
+
+test_that(".IsCompat() handles edge cases", {
+  compat <- matrix(TRUE, 3, 3)
+  compat[1, 3] <- compat[3, 1] <- FALSE
+
+  # Empty included → always compatible
+  expect_true(TreeDist:::.IsCompat(1, c(FALSE, FALSE, FALSE), compat, 10))
+  # Too many included (>= nTip - 3) → never compatible
+  expect_false(TreeDist:::.IsCompat(1, c(TRUE, TRUE, TRUE), compat, 6))
+  # Compatible with included set
+  expect_true(TreeDist:::.IsCompat(1, c(FALSE, TRUE, FALSE), compat, 10))
+  # Incompatible with an included split
+  expect_false(TreeDist:::.IsCompat(1, c(FALSE, FALSE, TRUE), compat, 10))
+})
+
+test_that(".Dist() returns correct distance", {
+  DIST <- matrix(c(0, 2, 3, 2, 0, 4, 3, 4, 0), 3, 3)
+  sentDist <- c(5, 6, 7)
+
+  expect_equal(TreeDist:::.Dist(1, 2, DIST, sentDist), 2)
+  expect_equal(TreeDist:::.Dist(1, NA, DIST, sentDist), 5)
+  expect_equal(TreeDist:::.Dist(3, NA, DIST, sentDist), 7)
+})
+
+test_that(".DiagDist() vectorizes correctly", {
+  DIST <- matrix(c(0, 2, 3, 2, 0, 4, 3, 4, 0), 3, 3)
+  sentDist <- c(5, 6, 7)
+
+  expect_equal(
+    TreeDist:::.DiagDist(c(2, NA, 1), DIST, sentDist),
+    c(DIST[1, 2], 6, DIST[3, 1])
+  )
+  expect_equal(TreeDist:::.DiagDist(c(NA, NA, NA), DIST, sentDist), sentDist)
+  expect_equal(
+    TreeDist:::.DiagDist(c(2, 3, 1), DIST, sentDist),
+    c(DIST[1, 2], DIST[2, 3], DIST[3, 1])
+  )
+})
+
+test_that(".FindSecond() works for both scale modes and edge cases", {
+  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
+  pMinus1 <- c(4, 4, 4)
+
+  # Unscaled: exclude matchIdx, find best
+  expect_equal(
+    TreeDist:::.FindSecond(1, 2, c(1, 2, 3), DIST, pMinus1, FALSE), 1)
+
+  # matchIdx = NA → search all
+  expect_equal(
+    TreeDist:::.FindSecond(1, NA_integer_, c(2, 3), DIST, pMinus1, FALSE), 2)
+
+  # No candidates
+  expect_true(is.na(
+    TreeDist:::.FindSecond(1, 2, c(2), DIST, pMinus1, FALSE)))
+
+  # All exceed threshold (unscaled)
+  DIST_far <- matrix(c(0, 5, 5, 5, 0, 5, 5, 5, 0), 3, 3)
+  expect_true(is.na(
+    TreeDist:::.FindSecond(1, NA_integer_, c(2, 3), DIST_far, c(2, 2, 2), FALSE)
+  ))
+
+  # Scaled: normal case
+  expect_equal(
+    TreeDist:::.FindSecond(1, NA_integer_, c(2, 3), DIST, pMinus1, TRUE), 2)
+
+  # Scaled: exceeds threshold
+  expect_true(is.na(
+    TreeDist:::.FindSecond(1, NA_integer_, c(2), DIST_far, c(2, 2, 2), TRUE)
+  ))
+})
+
+test_that(".DoAdd() updates match state correctly", {
+  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
+  sentDist <- c(4, 4, 4)
+
+  st <- new.env(parent = emptyenv())
+  st$MATCH <- rep(NA_integer_, 3)
+  st$MATCH2 <- rep(NA_integer_, 3)
+  st$incl <- rep(FALSE, 3)
+
+  # First add: all splits match the newly added split
+  TreeDist:::.DoAdd(2, st, DIST, sentDist)
+  expect_true(st$incl[2])
+  expect_equal(st$MATCH[1], 2)
+  expect_equal(st$MATCH[2], 2)
+  expect_equal(st$MATCH[3], 2)
+
+  # Second add: closer splits update, old match demotes to second
+  TreeDist:::.DoAdd(1, st, DIST, sentDist)
+  expect_equal(st$MATCH[1], 1)    # self-match closer
+  expect_equal(st$MATCH2[1], 2)   # old match becomes second
+  expect_equal(st$MATCH[2], 2)    # split 2 still closest to itself
+  expect_equal(st$MATCH2[2], 1)   # split 1 becomes second
+})
+
+test_that(".DoRemove() handles sentinel promotion and affected2", {
+  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
+  sentDist <- c(4, 4, 4)
+  lightSide <- c(5, 5, 5)
+
+  # Case 1: sentinel promotion (MATCH2 = NA, remove MATCH, no other included)
+  st <- new.env(parent = emptyenv())
+  st$MATCH <- c(2L, NA_integer_, 2L)
+  st$MATCH2 <- c(NA_integer_, NA_integer_, NA_integer_)
+  st$incl <- c(FALSE, TRUE, FALSE)
+
+  TreeDist:::.DoRemove(2, st, DIST, sentDist, lightSide, FALSE)
+  expect_true(is.na(st$MATCH[1]))
+  expect_true(is.na(st$MATCH[3]))
+
+  # Case 2: sentinel promotion with rescan finding another match
+  st2 <- new.env(parent = emptyenv())
+  st2$MATCH <- c(2L, 2L, NA_integer_)
+  st2$MATCH2 <- c(NA_integer_, NA_integer_, NA_integer_)
+  st2$incl <- c(TRUE, TRUE, FALSE)
+
+  TreeDist:::.DoRemove(2, st2, DIST, sentDist, lightSide, FALSE)
+  expect_equal(st2$MATCH[1], 1L)  # rescanned, found split 1
+
+  # Case 3: affected2 path (MATCH2 == removed, MATCH != removed)
+  st3 <- new.env(parent = emptyenv())
+  st3$MATCH <- c(1L, 1L, 1L)
+  st3$MATCH2 <- c(2L, 2L, NA_integer_)
+  st3$incl <- c(TRUE, TRUE, FALSE)
+
+  TreeDist:::.DoRemove(2, st3, DIST, sentDist, lightSide, FALSE)
+  expect_equal(st3$MATCH, c(1L, 1L, 1L))  # unchanged
+  expect_true(is.na(st3$MATCH2[1]))
+  expect_true(is.na(st3$MATCH2[2]))
+
+  # Case 4: scaled mode
+  st4 <- new.env(parent = emptyenv())
+  st4$MATCH <- c(2L, NA_integer_, NA_integer_)
+  st4$MATCH2 <- c(NA_integer_, NA_integer_, NA_integer_)
+  st4$incl <- c(TRUE, TRUE, FALSE)
+
+  TreeDist:::.DoRemove(2, st4, DIST, sentDist, lightSide, TRUE)
+  expect_equal(st4$MATCH[1], 1L)
+})
+
+test_that(".AddBenefitVec() computes add benefits", {
+  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
+  lightSide <- c(3, 3, 3)
+  sentDist <- lightSide - 1L
+  counts <- c(2, 3, 1)
+  TD <- c(0.5, 0.3, 0.8)
+
+  st <- new.env(parent = emptyenv())
+  st$MATCH <- rep(NA_integer_, 3)
+  st$MATCH2 <- rep(NA_integer_, 3)
+  st$incl <- rep(FALSE, 3)
+
+  ben_s <- TreeDist:::.AddBenefitVec(
+    c(1, 2, 3), st, DIST, sentDist, TD, counts, lightSide, scale = TRUE)
+  expect_length(ben_s, 3)
+
+  ben_u <- TreeDist:::.AddBenefitVec(
+    c(1, 2, 3), st, DIST, sentDist, TD, counts, lightSide, scale = FALSE)
+  expect_length(ben_u, 3)
+})
+
+test_that(".RemoveBenefitVec() computes remove benefits", {
+  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
+  lightSide <- c(3, 3, 3)
+  sentDist <- lightSide - 1L
+  counts <- c(2, 3, 1)
+  TD <- c(0.5, 0.3, 0.8)
+
+  st <- new.env(parent = emptyenv())
+  st$MATCH <- c(2L, 1L, 2L)
+  st$MATCH2 <- c(NA_integer_, NA_integer_, NA_integer_)
+  st$incl <- c(TRUE, TRUE, FALSE)
+
+  ben_s <- TreeDist:::.RemoveBenefitVec(
+    c(1, 2), st, DIST, sentDist, TD, counts, lightSide, scale = TRUE)
+  expect_length(ben_s, 2)
+
+  ben_u <- TreeDist:::.RemoveBenefitVec(
+    c(1, 2), st, DIST, sentDist, TD, counts, lightSide, scale = FALSE)
+  expect_length(ben_u, 2)
+
+  # No affected splits (MATCH doesn't reference candidate)
+  st2 <- new.env(parent = emptyenv())
+  st2$MATCH <- c(1L, 1L, 1L)
+  st2$MATCH2 <- rep(NA_integer_, 3)
+  st2$incl <- c(TRUE, TRUE, FALSE)
+
+  ben_none <- TreeDist:::.RemoveBenefitVec(
+    c(2), st2, DIST, sentDist, TD, counts, lightSide, scale = TRUE)
+  # Only TD contribution, no fn_cost (no affected splits)
+  expect_equal(ben_none, TD[2])
+})
+
+test_that(".PoolSplits() pools and deduplicates splits", {
+  trees <- as.phylo(0:4, nTip = 8)
+  tipLabels <- TipLabels(trees[[1]])
+  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
+
+  expect_true(is.matrix(pool$splits))
+  expect_true(is.matrix(pool$rawSplits))
+  expect_equal(ncol(pool$splits), length(tipLabels))
+  expect_equal(length(pool$counts), nrow(pool$splits))
+  expect_equal(length(pool$lightSide), nrow(pool$splits))
+  expect_equal(length(pool$treeMembers), length(trees))
+  expect_true(all(pool$counts >= 1))
+  expect_true(all(pool$lightSide >= 1))
+})
+
+test_that(".SplitsToPhylo() converts splits to tree or star", {
+  trees <- as.phylo(0:2, nTip = 8)
+  tipLabels <- TipLabels(trees[[1]])
+  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
+
+  # Some splits included
+  included <- rep(FALSE, nrow(pool$rawSplits))
+  included[1:min(2, nrow(pool$rawSplits))] <- TRUE
+  tr <- TreeDist:::.SplitsToPhylo(pool$rawSplits, included, tipLabels, 8)
+  expect_s3_class(tr, "phylo")
+  expect_equal(sort(TipLabels(tr)), sort(tipLabels))
+
+  # No splits → star tree
+  tr_star <- TreeDist:::.SplitsToPhylo(
+    pool$rawSplits, rep(FALSE, nrow(pool$rawSplits)), tipLabels, 8)
+  expect_s3_class(tr_star, "phylo")
+  expect_equal(NSplits(tr_star), 0)
+})
+
+test_that(".InitMatches() initializes match state from included splits", {
+  # DIST: included = {1,2}; split 3 close to split 2 (dist 1 < thresh 2)
+  DIST <- matrix(c(
+    0, 1, 3, 5,
+    1, 0, 1, 4,
+    3, 1, 0, 1,
+    5, 4, 1, 0
+  ), 4, 4, byrow = TRUE)
+  lightSide <- c(3, 3, 3, 3)
+  sentDist <- lightSide - 1L  # all 2
+
+  # Scaled mode
+  st <- new.env(parent = emptyenv())
+  st$MATCH <- rep(NA_integer_, 4)
+  st$MATCH2 <- rep(NA_integer_, 4)
+  st$incl <- c(TRUE, TRUE, FALSE, FALSE)
+
+  TreeDist:::.InitMatches(st, DIST, sentDist, lightSide, scale = TRUE)
+  # Split 1: self-match (dist 0), second = split 2 (dist 1)
+  expect_equal(st$MATCH[1], 1)
+  expect_equal(st$MATCH2[1], 2)
+  # Split 3: best among included is split 2 (dist 1), 1/2 < 1 → matched
+  expect_equal(st$MATCH[3], 2)
+  # Split 4: best among included is split 2 (dist 4), 4/2 >= 1 → skipped
+  expect_true(is.na(st$MATCH[4]))
+
+  # Unscaled mode
+  st2 <- new.env(parent = emptyenv())
+  st2$MATCH <- rep(NA_integer_, 4)
+  st2$MATCH2 <- rep(NA_integer_, 4)
+  st2$incl <- c(TRUE, TRUE, FALSE, FALSE)
+
+  TreeDist:::.InitMatches(st2, DIST, sentDist, lightSide, scale = FALSE)
+  expect_equal(st2$MATCH[1], 1)
+  expect_equal(st2$MATCH[3], 2)
+
+  # Empty included set → no-op
+  st3 <- new.env(parent = emptyenv())
+  st3$MATCH <- rep(NA_integer_, 4)
+  st3$MATCH2 <- rep(NA_integer_, 4)
+  st3$incl <- rep(FALSE, 4)
+
+  TreeDist:::.InitMatches(st3, DIST, sentDist, lightSide, scale = TRUE)
+  expect_true(all(is.na(st3$MATCH)))
+})
+
+
+# =========================================================================
+# Full R-level greedy pipeline integration tests
+# =========================================================================
+
+test_that("R-level GreedyBest pipeline produces valid consensus (scaled)", {
+  trees <- as.phylo(0:9, nTip = 8)
+  tipLabels <- TipLabels(trees[[1]])
+  nTip <- length(tipLabels)
+  nTree <- length(trees)
+
+  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
+  nSplits <- nrow(pool$splits)
+  DIST <- TreeDist:::.TransferDistMat(pool$splits, nTip)
+  sentDist <- pool$lightSide - 1L
+  TD <- TreeDist:::.ComputeTD(DIST, sentDist, pool$treeMembers, pool$lightSide,
+                               nTree, scale = TRUE)
+  compat <- TreeDist:::.CompatMat(pool$splits, nTip)
+  sortOrd <- order(-pool$counts, seq_len(nSplits))
+
+  st <- new.env(parent = emptyenv())
+  st$MATCH <- rep(NA_integer_, nSplits)
+  st$MATCH2 <- rep(NA_integer_, nSplits)
+  st$incl <- rep(FALSE, nSplits)
+
+  TreeDist:::.GreedyBest(st, DIST, sentDist, TD, pool$counts, pool$lightSide,
+                          compat, sortOrd, scale = TRUE, nSplits, nTip)
+  expect_true(any(st$incl))
+
+  tr <- TreeDist:::.SplitsToPhylo(pool$rawSplits, st$incl, tipLabels, nTip)
+  expect_s3_class(tr, "phylo")
+})
+
+test_that("R-level GreedyFirst pipeline produces valid consensus (unscaled)", {
+  trees <- as.phylo(0:9, nTip = 8)
+  tipLabels <- TipLabels(trees[[1]])
+  nTip <- length(tipLabels)
+  nTree <- length(trees)
+
+  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
+  nSplits <- nrow(pool$splits)
+  DIST <- TreeDist:::.TransferDistMat(pool$splits, nTip)
+  sentDist <- pool$lightSide - 1L
+  TD <- TreeDist:::.ComputeTD(DIST, sentDist, pool$treeMembers, pool$lightSide,
+                               nTree, scale = FALSE)
+  compat <- TreeDist:::.CompatMat(pool$splits, nTip)
+  sortOrd <- order(-pool$counts, seq_len(nSplits))
+
+  st <- new.env(parent = emptyenv())
+  st$MATCH <- rep(NA_integer_, nSplits)
+  st$MATCH2 <- rep(NA_integer_, nSplits)
+  st$incl <- rep(FALSE, nSplits)
+
+  TreeDist:::.GreedyFirst(st, DIST, sentDist, TD, pool$counts, pool$lightSide,
+                           compat, sortOrd, scale = FALSE, nSplits, nTip)
+  expect_true(any(st$incl))
+
+  tr <- TreeDist:::.SplitsToPhylo(pool$rawSplits, st$incl, tipLabels, nTip)
+  expect_s3_class(tr, "phylo")
+})
+
+test_that("R-level GreedyBest with majority init exercises InitMatches", {
+  trees <- as.phylo(0:14, nTip = 10)
+  tipLabels <- TipLabels(trees[[1]])
+  nTip <- length(tipLabels)
+  nTree <- length(trees)
+
+  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
+  nSplits <- nrow(pool$splits)
+  DIST <- TreeDist:::.TransferDistMat(pool$splits, nTip)
+  sentDist <- pool$lightSide - 1L
+  TD <- TreeDist:::.ComputeTD(DIST, sentDist, pool$treeMembers, pool$lightSide,
+                               nTree, scale = TRUE)
+  compat <- TreeDist:::.CompatMat(pool$splits, nTip)
+  sortOrd <- order(-pool$counts, seq_len(nSplits))
+
+  st <- new.env(parent = emptyenv())
+  st$MATCH <- rep(NA_integer_, nSplits)
+  st$MATCH2 <- rep(NA_integer_, nSplits)
+
+  # Majority init: include splits present in > half of trees
+  st$incl <- pool$counts > (nTree / 2)
+  if (any(st$incl)) {
+    TreeDist:::.InitMatches(st, DIST, sentDist, pool$lightSide, scale = TRUE)
+  }
+
+  TreeDist:::.GreedyBest(st, DIST, sentDist, TD, pool$counts, pool$lightSide,
+                          compat, sortOrd, scale = TRUE, nSplits, nTip)
+
+  tr <- TreeDist:::.SplitsToPhylo(pool$rawSplits, st$incl, tipLabels, nTip)
+  expect_s3_class(tr, "phylo")
+})
+
+test_that("R-level GreedyFirst with majority init (unscaled)", {
+  trees <- as.phylo(0:14, nTip = 10)
+  tipLabels <- TipLabels(trees[[1]])
+  nTip <- length(tipLabels)
+  nTree <- length(trees)
+
+  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
+  nSplits <- nrow(pool$splits)
+  DIST <- TreeDist:::.TransferDistMat(pool$splits, nTip)
+  sentDist <- pool$lightSide - 1L
+  TD <- TreeDist:::.ComputeTD(DIST, sentDist, pool$treeMembers, pool$lightSide,
+                               nTree, scale = FALSE)
+  compat <- TreeDist:::.CompatMat(pool$splits, nTip)
+  sortOrd <- order(-pool$counts, seq_len(nSplits))
+
+  st <- new.env(parent = emptyenv())
+  st$MATCH <- rep(NA_integer_, nSplits)
+  st$MATCH2 <- rep(NA_integer_, nSplits)
+  st$incl <- pool$counts > (nTree / 2)
+  if (any(st$incl)) {
+    TreeDist:::.InitMatches(st, DIST, sentDist, pool$lightSide, scale = FALSE)
+  }
+
+  TreeDist:::.GreedyFirst(st, DIST, sentDist, TD, pool$counts, pool$lightSide,
+                           compat, sortOrd, scale = FALSE, nSplits, nTip)
+
+  tr <- TreeDist:::.SplitsToPhylo(pool$rawSplits, st$incl, tipLabels, nTip)
+  expect_s3_class(tr, "phylo")
+})
+
+test_that("R greedy pipeline matches C++ consensus", {
+  trees <- as.phylo(0:9, nTip = 8)
+  tipLabels <- TipLabels(trees[[1]])
+  nTip <- length(tipLabels)
+  nTree <- length(trees)
+
+  # C++ result
+  tc_cpp <- TransferConsensus(trees, greedy = "best", scale = TRUE)
+
+  # R-level result
+  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
+  nSplits <- nrow(pool$splits)
+  DIST <- TreeDist:::.TransferDistMat(pool$splits, nTip)
+  sentDist <- pool$lightSide - 1L
+  TD <- TreeDist:::.ComputeTD(DIST, sentDist, pool$treeMembers, pool$lightSide,
+                               nTree, scale = TRUE)
+  compat <- TreeDist:::.CompatMat(pool$splits, nTip)
+  sortOrd <- order(-pool$counts, seq_len(nSplits))
+
+  st <- new.env(parent = emptyenv())
+  st$MATCH <- rep(NA_integer_, nSplits)
+  st$MATCH2 <- rep(NA_integer_, nSplits)
+  st$incl <- rep(FALSE, nSplits)
+
+  TreeDist:::.GreedyBest(st, DIST, sentDist, TD, pool$counts, pool$lightSide,
+                          compat, sortOrd, scale = TRUE, nSplits, nTip)
+
+  tc_r <- TreeDist:::.SplitsToPhylo(pool$rawSplits, st$incl, tipLabels, nTip)
+
+  # Both should produce resolved trees with same tip labels
+  expect_s3_class(tc_r, "phylo")
+  expect_equal(sort(TipLabels(tc_r)), sort(TipLabels(tc_cpp)))
+  # Resolution should be similar (not necessarily identical due to
+  # possible differences in canonicalization/sort-order between R and C++)
+  expect_gte(NSplits(tc_r), 1)
+})
diff --git a/tests/testthat/test-tree_distance_transfer.R b/tests/testthat/test-tree_distance_transfer.R
index 649192c11..7186f665e 100644
--- a/tests/testthat/test-tree_distance_transfer.R
+++ b/tests/testthat/test-tree_distance_transfer.R
@@ -27,14 +27,14 @@ test_that("TransferDist() scaled is bounded by RF", {
 test_that("TransferDist() normalization", {
   bal8 <- TreeTools::BalancedTree(8)
   pec8 <- TreeTools::PectinateTree(8)
-  
+
   # Scaled normalization denominator = 2 * (n - 3) = 10
   td_raw <- TransferDist(bal8, pec8)
   td_norm <- TransferDist(bal8, pec8, normalize = TRUE)
   expect_equal(td_norm, td_raw / (2 * (8 - 3)))
   expect_true(td_norm >= 0)
   expect_true(td_norm <= 1)
-  
+
   # Unscaled normalization
   td_raw_u <- TransferDist(bal8, pec8, scale = FALSE)
   td_norm_u <- TransferDist(bal8, pec8, scale = FALSE, normalize = TRUE)
@@ -45,12 +45,12 @@ test_that("TransferDist() normalization", {
 
 test_that("TransferDist() all-pairs", {
   trees <- as.phylo(0:5, 8)
-  
+
   # All-pairs
   d <- TransferDist(trees)
   expect_s3_class(d, "dist")
   expect_equal(attr(d, "Size"), 6L)
-  
+
   # Check diagonal = 0 implicitly (dist objects don't store diagonal)
   # Check that batch matches pairwise
   dm <- as.matrix(d)
@@ -67,10 +67,10 @@ test_that("TransferDist() all-pairs", {
 test_that("TransferDist() cross-pairs", {
   trees1 <- as.phylo(0:2, 8)
   trees2 <- as.phylo(3:5, 8)
-  
+
   mat <- TransferDist(trees1, trees2)
   expect_equal(dim(mat), c(3, 3))
-  
+
   # Check matches pairwise
   for (i in 1:3) {
     for (j in 1:3) {
@@ -85,13 +85,13 @@ test_that("TransferDist() cross-pairs", {
 test_that("TransferDist() single tree vs list", {
   tr <- TreeTools::BalancedTree(8)
   trees <- as.phylo(0:3, 8)
-  
+
   d1 <- TransferDist(tr, trees)
   d2 <- TransferDist(trees, tr)
-  
+
   expect_length(d1, 4)
   expect_length(d2, 4)
-  
+
   for (i in seq_along(trees)) {
     expected <- TransferDist(tr, trees[[i]])
     expect_equal(d1[i], expected, tolerance = 1e-12)
@@ -103,13 +103,13 @@ test_that("TransferDist() single tree vs list", {
 test_that("TransferDist() scaled vs unscaled relationship", {
   bal8 <- TreeTools::BalancedTree(8)
   pec8 <- TreeTools::PectinateTree(8)
-  
+
   # Both should be non-negative
   td_s <- TransferDist(bal8, pec8, scale = TRUE)
   td_u <- TransferDist(bal8, pec8, scale = FALSE)
   expect_true(td_s >= 0)
   expect_true(td_u >= 0)
-  
+
   # For identical trees, both = 0
   expect_equal(0, TransferDist(bal8, bal8, scale = TRUE))
   expect_equal(0, TransferDist(bal8, bal8, scale = FALSE))
@@ -119,13 +119,13 @@ test_that("TransferDist() scaled vs unscaled relationship", {
 test_that("TransferDist() reportMatching", {
   bal8 <- TreeTools::BalancedTree(8)
   pec8 <- TreeTools::PectinateTree(8)
-  
+
   res <- TransferDist(bal8, pec8, reportMatching = TRUE)
   expect_equal(res[[1]], TransferDist(bal8, pec8))
-  
+
   matching <- attr(res, "matching")
   expect_true(!is.null(matching))
-  
+
   pairScores <- attr(res, "pairScores")
   expect_true(!is.null(pairScores))
   expect_equal(nrow(pairScores), TreeTools::NSplits(bal8))
@@ -136,11 +136,11 @@ test_that("TransferDist() reportMatching", {
 test_that("TransferDist() handles star trees", {
   star <- TreeTools::StarTree(8)
   bal8 <- TreeTools::BalancedTree(8)
-  
+
   # Star tree has no internal splits; dissimilarity = sum over binary tree only
   td <- TransferDist(bal8, star)
   expect_true(td >= 0)
-  
+
   # Star vs star = 0
   expect_equal(0, TransferDist(star, star))
 })
@@ -150,7 +150,7 @@ test_that("TransferDist() handles small trees", {
   # 4 tips: minimal non-trivial case
   tr1 <- TreeTools::BalancedTree(4)
   tr2 <- TreeTools::PectinateTree(4)
-  
+
   td <- TransferDist(tr1, tr2)
   expect_true(is.finite(td))
   expect_true(td >= 0)
@@ -159,16 +159,336 @@ test_that("TransferDist() handles small trees", {
 
 test_that("TransferDist() consistent with TransferConsensus internals", {
   skip_if_not_installed("TreeTools")
-  
+
   # The transfer dissimilarity for a single pair should be consistent with
   # how TransferConsensus computes distances internally
   set.seed(6419)
   trees <- structure(lapply(1:10, function(i) ape::rtree(12)),
                      class = "multiPhylo")
-  
+
   # All-pairs: check symmetry and non-negativity
   d <- TransferDist(trees)
   dm <- as.matrix(d)
   expect_true(all(dm >= -1e-12))
   expect_true(isSymmetric(dm, tol = 1e-12))
 })
+
+
+# =========================================================================
+# Additional coverage: TransferDistance alias
+# =========================================================================
+
+test_that("TransferDistance() is an alias for TransferDist()", {
+  bal8 <- TreeTools::BalancedTree(8)
+  pec8 <- TreeTools::PectinateTree(8)
+  expect_equal(TransferDistance(bal8, pec8), TransferDist(bal8, pec8))
+  expect_equal(TransferDistance(bal8, pec8, scale = FALSE),
+               TransferDist(bal8, pec8, scale = FALSE))
+})
+
+
+# =========================================================================
+# cpp_transfer_dist() direct (both scaled + unscaled + matching)
+# =========================================================================
+
+test_that("cpp_transfer_dist() returns both scores and matchings", {
+  bal8 <- TreeTools::BalancedTree(8)
+  pec8 <- TreeTools::PectinateTree(8)
+  tipLabels <- TreeTools::TipLabels(bal8)
+
+  sp1 <- unclass(as.Splits(bal8, tipLabels))
+  sp2 <- unclass(as.Splits(pec8, tipLabels))
+
+  res <- TreeDist:::cpp_transfer_dist(sp1, sp2, 8L)
+
+  expect_true(res$score_scaled >= 0)
+  expect_true(res$score_unscaled >= 0)
+  expect_length(res$matching_xy, nrow(sp1))
+  expect_length(res$matching_yx, nrow(sp2))
+
+  # Consistent with TransferDist
+  expect_equal(res$score_scaled,
+               TransferDist(bal8, pec8, scale = TRUE), tolerance = 1e-12)
+  expect_equal(res$score_unscaled,
+               TransferDist(bal8, pec8, scale = FALSE), tolerance = 1e-12)
+
+  # Identical trees → distance 0, all matchings defined
+  res_id <- TreeDist:::cpp_transfer_dist(sp1, sp1, 8L)
+  expect_equal(res_id$score_scaled, 0)
+  expect_equal(res_id$score_unscaled, 0)
+})
+
+test_that("cpp_transfer_dist() handles star trees", {
+  star <- TreeTools::StarTree(8)
+  bal8 <- TreeTools::BalancedTree(8)
+  tipLabels <- TreeTools::TipLabels(bal8)
+
+  sp_star <- unclass(as.Splits(star, tipLabels))
+  sp_bal <- unclass(as.Splits(bal8, tipLabels))
+
+  # Star vs binary: only one direction contributes
+  res <- TreeDist:::cpp_transfer_dist(sp_bal, sp_star, 8L)
+  expect_true(res$score_scaled >= 0)
+  expect_true(res$score_unscaled >= 0)
+  expect_length(res$matching_xy, nrow(sp_bal))
+  # matching_yx has 0 elements for star tree
+  expect_length(res$matching_yx, 0)
+
+  # Star vs star: both scores = 0
+  res_ss <- TreeDist:::cpp_transfer_dist(sp_star, sp_star, 8L)
+  expect_equal(res_ss$score_scaled, 0)
+  expect_equal(res_ss$score_unscaled, 0)
+})
+
+test_that("cpp_transfer_dist() errors on column mismatch", {
+  sp1 <- matrix(as.raw(0), nrow = 1, ncol = 1)
+  sp2 <- matrix(as.raw(0), nrow = 1, ncol = 2)
+  expect_error(
+    TreeDist:::cpp_transfer_dist(sp1, sp2, 8L),
+    "same number of tips"
+  )
+})
+
+
+# =========================================================================
+# cpp_transfer_dist_scored() edge cases
+# =========================================================================
+
+test_that("cpp_transfer_dist_scored() errors on column mismatch", {
+  sp1 <- matrix(as.raw(0), nrow = 1, ncol = 1)
+  sp2 <- matrix(as.raw(0), nrow = 1, ncol = 2)
+  expect_error(
+    TreeDist:::cpp_transfer_dist_scored(sp1, sp2, 8L, TRUE),
+    "same number of tips"
+  )
+})
+
+test_that("cpp_transfer_dist_scored() with asymmetric split counts", {
+  bal8 <- TreeTools::BalancedTree(8)
+  pec8 <- TreeTools::PectinateTree(8)
+  tipLabels <- TreeTools::TipLabels(bal8)
+
+  sp_bal <- unclass(as.Splits(bal8, tipLabels))
+  sp_pec <- unclass(as.Splits(pec8, tipLabels))
+
+  # BalancedTree(8) has fewer splits than PectinateTree(8)
+  res <- TreeDist:::cpp_transfer_dist_scored(sp_bal, sp_pec, 8L, TRUE)
+  expect_true(res$score >= 0)
+  # matching length = max(nSplits1, nSplits2)
+  expect_equal(length(res$matching),
+               max(nrow(sp_bal), nrow(sp_pec)))
+})
+
+
+# =========================================================================
+# Normalization edge cases
+# =========================================================================
+
+test_that(".TransferNormDenom() for odd and even nTip", {
+  # Even nTip = 8: (64 - 16 + 4) / 4 = 13
+  expect_equal(TreeDist:::.TransferNormDenom(8, FALSE), 13)
+  # Odd nTip = 9: (81 - 18 + 5) / 4 = 17
+  expect_equal(TreeDist:::.TransferNormDenom(9, FALSE), 17)
+  # Scaled: 2 * (n - 3)
+  expect_equal(TreeDist:::.TransferNormDenom(8, TRUE), 10)
+  expect_equal(TreeDist:::.TransferNormDenom(9, TRUE), 12)
+})
+
+test_that("TransferDist() normalize with odd nTip", {
+  trees <- as.phylo(0:3, nTip = 9)
+  # Scaled normalization
+  d <- TransferDist(trees, normalize = TRUE)
+  d_raw <- TransferDist(trees, normalize = FALSE)
+  denom <- 2 * (9 - 3)
+  expect_equal(as.numeric(d), as.numeric(d_raw) / denom, tolerance = 1e-12)
+
+  # Unscaled normalization with odd nTip
+  d_u <- TransferDist(trees, scale = FALSE, normalize = TRUE)
+  d_u_raw <- TransferDist(trees, scale = FALSE, normalize = FALSE)
+  denom_u <- (81 - 18 + 5) / 4
+  expect_equal(as.numeric(d_u), as.numeric(d_u_raw) / denom_u, tolerance = 1e-12)
+})
+
+test_that("TransferDist() normalize with cross-pairs", {
+  trees1 <- as.phylo(0:2, 8)
+  trees2 <- as.phylo(3:5, 8)
+
+  mat_raw <- TransferDist(trees1, trees2)
+  mat_norm <- TransferDist(trees1, trees2, normalize = TRUE)
+  denom <- 2 * (8 - 3)
+  expect_equal(mat_norm, mat_raw / denom, tolerance = 1e-12)
+
+  # Unscaled cross-pairs normalize
+  mat_raw_u <- TransferDist(trees1, trees2, scale = FALSE)
+  mat_norm_u <- TransferDist(trees1, trees2, scale = FALSE, normalize = TRUE)
+  denom_u <- 13
+  expect_equal(mat_norm_u, mat_raw_u / denom_u, tolerance = 1e-12)
+})
+
+test_that("TransferDist() normalize via generic fallback (reportMatching)", {
+  bal8 <- TreeTools::BalancedTree(8)
+  pec8 <- TreeTools::PectinateTree(8)
+
+  # reportMatching forces generic path; normalize divides after
+  res <- TransferDist(bal8, pec8, normalize = TRUE, reportMatching = TRUE)
+  raw <- TransferDist(bal8, pec8, reportMatching = TRUE)
+  denom <- 2 * (8 - 3)
+  expect_equal(res[[1]], raw[[1]] / denom, tolerance = 1e-12)
+
+  # Unscaled + normalize + reportMatching
+  res_u <- TransferDist(bal8, pec8, scale = FALSE,
+                         normalize = TRUE, reportMatching = TRUE)
+  raw_u <- TransferDist(bal8, pec8, scale = FALSE, reportMatching = TRUE)
+  expect_equal(res_u[[1]], raw_u[[1]] / 13, tolerance = 1e-12)
+})
+
+
+# =========================================================================
+# Fast path edge cases
+# =========================================================================
+
+test_that(".TransferDistAllPairs() returns NULL for edge cases", {
+  # Single phylo (not multiPhylo)
+  tr <- TreeTools::BalancedTree(8)
+  expect_null(TreeDist:::.TransferDistAllPairs(tr, TRUE))
+
+  # < 2 trees
+  expect_null(TreeDist:::.TransferDistAllPairs(
+    structure(list(tr), class = "multiPhylo"), TRUE))
+
+  # < 4 tips
+  small <- as.phylo(0:2, nTip = 3)
+  expect_null(TreeDist:::.TransferDistAllPairs(small, TRUE))
+
+  # Mismatched tip labels
+  t1 <- TreeTools::BalancedTree(paste0("a", 1:6))
+  t2 <- TreeTools::BalancedTree(paste0("b", 1:6))
+  mixed <- structure(list(t1, t2), class = "multiPhylo")
+  expect_null(TreeDist:::.TransferDistAllPairs(mixed, TRUE))
+})
+
+test_that(".TransferDistCrossPairs() returns NULL for edge cases", {
+  tr1 <- TreeTools::BalancedTree(8)
+  tr2 <- TreeTools::PectinateTree(8)
+
+  # Both single → delegate to generic
+  expect_null(TreeDist:::.TransferDistCrossPairs(tr1, tr2, TRUE))
+
+  # < 4 tips
+  small1 <- as.phylo(0:1, nTip = 3)
+  small2 <- as.phylo(2:3, nTip = 3)
+  expect_null(TreeDist:::.TransferDistCrossPairs(small1, small2, TRUE))
+
+  # Mismatched tip labels
+  t1 <- structure(list(TreeTools::BalancedTree(paste0("a", 1:6))),
+                  class = "multiPhylo")
+  t2 <- structure(list(TreeTools::BalancedTree(paste0("b", 1:6))),
+                  class = "multiPhylo")
+  expect_null(TreeDist:::.TransferDistCrossPairs(t1, t2, TRUE))
+})
+
+test_that("TransferDist() all-pairs with star trees in mix", {
+  star <- TreeTools::StarTree(8)
+  bal <- TreeTools::BalancedTree(8)
+  pec <- TreeTools::PectinateTree(8)
+  trees <- structure(list(star, bal, pec), class = "multiPhylo")
+
+  d <- TransferDist(trees)
+  dm <- as.matrix(d)
+  expect_equal(dm[1, 1], 0)  # star self
+  expect_true(dm[1, 2] >= 0) # star vs binary
+  expect_equal(dm[2, 3], TransferDist(bal, pec), tolerance = 1e-12)
+})
+
+test_that("TransferDist() all-pairs with exactly 2 trees", {
+  trees <- as.phylo(0:1, nTip = 8)
+  d <- TransferDist(trees)
+  expect_s3_class(d, "dist")
+  expect_equal(attr(d, "Size"), 2L)
+  expect_equal(as.numeric(d),
+               TransferDist(trees[[1]], trees[[2]]), tolerance = 1e-12)
+})
+
+test_that("TransferDist() all-pairs with exactly 4 tips", {
+  trees <- as.phylo(0:3, nTip = 4)
+  d <- TransferDist(trees)
+  expect_s3_class(d, "dist")
+  dm <- as.matrix(d)
+  expect_true(all(dm >= -1e-12))
+})
+
+
+# =========================================================================
+# TransferDistSplits edge cases
+# =========================================================================
+
+test_that("TransferDistSplits() reportMatching with various configurations", {
+  bal8 <- TreeTools::BalancedTree(8)
+  pec8 <- TreeTools::PectinateTree(8)
+  tipLabels <- TreeTools::TipLabels(bal8)
+
+  sp_bal <- as.Splits(bal8, tipLabels)
+  sp_pec <- as.Splits(pec8, tipLabels)
+
+  # Unscaled reportMatching
+  res <- TransferDistSplits(sp_bal, sp_pec, scale = FALSE,
+                             reportMatching = TRUE)
+  expect_true(is.numeric(res))
+  expect_true(!is.null(attr(res, "matching")))
+  expect_true(!is.null(attr(res, "pairScores")))
+  expect_true(!is.null(attr(res, "matchedScores")))
+
+  # Scaled reportMatching (already tested above, but confirm matchedSplits)
+  res_s <- TransferDistSplits(sp_bal, sp_pec, scale = TRUE,
+                               reportMatching = TRUE)
+  expect_true(!is.null(attr(res_s, "matchedSplits")))
+})
+
+test_that("TransferDistSplits() with asymmetric nSplits (nSplits1 < nSplits2)", {
+  # Use majority-rule consensus (fewer splits) vs fully resolved tree
+  trees <- as.phylo(0:9, 8)
+  mr <- Consensus(trees, p = 0.5)
+  bal8 <- TreeTools::BalancedTree(8)
+  tipLabels <- TreeTools::TipLabels(bal8)
+
+  sp_mr <- as.Splits(mr, tipLabels)
+  sp_bal <- as.Splits(bal8, tipLabels)
+
+  # Confirm asymmetry
+  expect_lt(nrow(sp_mr), nrow(sp_bal))
+
+  res <- TransferDistSplits(sp_mr, sp_bal, reportMatching = TRUE)
+  # Matching should be truncated to nSplits1
+  matching <- attr(res, "matching")
+  expect_equal(length(matching), nrow(sp_mr))
+  expect_true(!is.null(attr(res, "pairScores")))
+})
+
+test_that("TransferDist() for 3-tip trees uses fallback path", {
+  # 3 tips: fast path returns NULL, falls through to generic
+  tr1 <- TreeTools::BalancedTree(3)
+  tr2 <- TreeTools::PectinateTree(3)
+  td <- TransferDist(tr1, tr2)
+  expect_true(is.finite(td))
+  expect_true(td >= 0)
+})
+
+test_that("TransferDist() with named tree list preserves labels", {
+  trees <- as.phylo(0:3, nTip = 8)
+  names(trees) <- paste0("tree_", 1:4)
+
+  d <- TransferDist(trees)
+  expect_equal(attr(d, "Labels"), paste0("tree_", 1:4))
+})
+
+test_that("TransferDist() cross-pairs with single Splits input", {
+  bal8 <- TreeTools::BalancedTree(8)
+  tipLabels <- TreeTools::TipLabels(bal8)
+  sp <- as.Splits(bal8, tipLabels)
+  trees <- as.phylo(0:3, nTip = 8)
+
+  # Single Splits object vs list of trees
+  result <- TransferDist(sp, trees)
+  expect_length(result, 4)
+  expect_true(all(result >= 0))
+})

From de1b1028f4f737b02c3a26e00b0c91463dd135f2 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 14:03:07 +0000
Subject: [PATCH 08/17] Guard errors for ARM

---
 R/hierarchical_mutual_information.R      |  4 ++
 R/tree_distance.R                        |  3 ++
 R/tree_distance_mast.R                   |  6 +++
 R/tree_distance_nni.R                    |  7 +++
 R/tree_distance_spr.R                    |  8 ++++
 R/tree_distance_transfer.R               |  4 +-
 R/tree_distance_utilities.R              | 19 +++++++-
 src/day_1985.cpp                         |  8 ++--
 src/hmi.cpp                              |  6 +--
 src/hpart.cpp                            |  4 +-
 src/hpart_relabel.cpp                    |  2 +-
 src/information.h                        |  4 +-
 src/mast.cpp                             |  4 +-
 src/nni_distance.cpp                     | 13 +++---
 src/spr.cpp                              | 18 ++++----
 src/spr_lookup.cpp                       |  2 +-
 src/transfer_consensus.cpp               | 19 +++-----
 src/transfer_distance.cpp                |  4 +-
 src/tree_distances.cpp                   | 19 ++++----
 tests/testthat/test-transfer_consensus.R | 55 ++++++++++++++++++++++++
 20 files changed, 153 insertions(+), 56 deletions(-)

diff --git a/R/hierarchical_mutual_information.R b/R/hierarchical_mutual_information.R
index 06948acd3..7e74ddd75 100644
--- a/R/hierarchical_mutual_information.R
+++ b/R/hierarchical_mutual_information.R
@@ -132,6 +132,8 @@ HH <- SelfHMI
 #' @rdname HierarchicalMutualInfo
 #' @export
 EHMI <- function(tree1, tree2, precision = 0.01, minResample = 36) {
+  if (minResample < 2L) stop("Must perform at least one resampling")
+  if (precision < 1e-8) stop("Tolerance too low")
   EHMI_xptr(as.HPart(tree1), as.HPart(tree2), as.numeric(precision),
                 as.integer(minResample)) / log(2)
 }
@@ -162,6 +164,8 @@ EHMI <- function(tree1, tree2, precision = 0.01, minResample = 36) {
 #' @rdname HierarchicalMutualInfo
 #' @export
 AHMI <- function(tree1, tree2, Mean = max, precision = 0.01, minResample = 36) {
+  if (minResample < 2L) stop("Must perform at least one resampling")
+  if (precision < 1e-8) stop("Tolerance too low")
   hp1 <- as.HPart(tree1)
   hp2 <- as.HPart(tree2, hp1)
   
diff --git a/R/tree_distance.R b/R/tree_distance.R
index 466de37eb..11bf9e913 100644
--- a/R/tree_distance.R
+++ b/R/tree_distance.R
@@ -35,6 +35,7 @@
 #' @export
 GeneralizedRF <- function(splits1, splits2, nTip, PairScorer, 
                            maximize, reportMatching, ...) {
+  .ValidateSplitArgs(splits1, splits2, nTip)
   nSplits1 <- dim(splits1)[[1]]
   nSplits2 <- dim(splits2)[[1]]
   
@@ -148,6 +149,7 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer,
   }
   nTip <- length(tipLabels)
   if (nTip < 4) return(NULL) # nocov
+  if (nTip > 32767L) stop("This many tips are not (yet) supported.")
   
   splits_list <- as.Splits(tree1, tipLabels = tipLabels)
   n_threads <- as.integer(getOption("mc.cores", 1L))
@@ -201,6 +203,7 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer,
   
   nTip <- length(tipLabels1)
   if (nTip < 4) return(NULL)
+  if (nTip > 32767L) stop("This many tips are not (yet) supported.")
   
   splits1 <- as.Splits(tree1, tipLabels = tipLabels1)
   splits2 <- as.Splits(tree2, tipLabels = tipLabels1)  # Use tipLabels1 to ensure order consistency
diff --git a/R/tree_distance_mast.R b/R/tree_distance_mast.R
index 4cdd06e01..5c0698417 100644
--- a/R/tree_distance_mast.R
+++ b/R/tree_distance_mast.R
@@ -93,6 +93,12 @@ MASTSize <- function(tree1, tree2 = tree1, rooted = TRUE) {
 #' @param nTip Integer specifying the number of leaves in each split.
 #' @keywords internal
 .MASTSizeEdges <- function(edge1, edge2, nTip) {
+  if (nrow(edge1) != nrow(edge2)) {
+    stop("Both trees must contain the same number of edges.")
+  }
+  if (nTip > 4096L) {
+    stop("Tree too large; please contact maintainer for advice.")
+  }
   cpp_mast(edge1 - 1L, Postorder(edge2) - 1L, nTip)
 }
 
diff --git a/R/tree_distance_nni.R b/R/tree_distance_nni.R
index 7cce0a83c..458547647 100644
--- a/R/tree_distance_nni.R
+++ b/R/tree_distance_nni.R
@@ -73,6 +73,13 @@ NNIDist <- function(tree1, tree2 = tree1) {
 #' @importFrom TreeTools Postorder RenumberTips
 #' @importFrom ape Nnode.phylo
 .NNIDistSingle <- function(tree1, tree2, nTip, ...) {
+  if (nTip > 32768L) {
+    stop("Cannot calculate NNI distance for trees with so many tips.")
+  }
+  if (nrow(tree1[["edge"]]) != nrow(tree2[["edge"]])) {
+    stop("Both trees must have the same number of edges. ",
+         "Is one rooted and the other unrooted?")
+  }
   tree2 <- RenumberTips(tree2, as.character(tree1[["tip.label"]]))
   
   edge1 <- Postorder(tree1[["edge"]])
diff --git a/R/tree_distance_spr.R b/R/tree_distance_spr.R
index c3be63625..fa480542b 100644
--- a/R/tree_distance_spr.R
+++ b/R/tree_distance_spr.R
@@ -251,6 +251,9 @@ SPRDist.multiPhylo <- SPRDist.list
 }
 
 .SPRExact7 <- function(sp1, sp2) {
+  if (length(sp1) != 4L || length(sp2) != 4L) {
+    stop("Expected a length-4 raw vector of splits") # nocov
+  }
   spr_table_7(sp1, sp2)
 }
 
@@ -421,6 +424,11 @@ SPRDist.multiPhylo <- SPRDist.list
 #' @importFrom TreeTools DropTip TipsInSplits KeepTipPostorder
 #' @importFrom TreeTools edge_to_splits
 .SPRPairDeO <- function(tree1, tree2, check = TRUE) {
+  if (check) {
+    if (length(tree1[["tip.label"]]) != length(tree2[["tip.label"]])) {
+      stop("`tree1` and `tree2` must have the same number of tips.")
+    }
+  }
   moves <- 0
   
   # Reduce trees (Fig. 7A in deOliveira2008)
diff --git a/R/tree_distance_transfer.R b/R/tree_distance_transfer.R
index 43202d45d..cd0942fd5 100644
--- a/R/tree_distance_transfer.R
+++ b/R/tree_distance_transfer.R
@@ -104,7 +104,7 @@ TransferDistSplits <- function(splits1, splits2,
                                 nTip = attr(splits1, "nTip"),
                                 scale = TRUE,
                                 reportMatching = FALSE) {
-  
+  .ValidateSplitArgs(splits1, splits2, nTip)
   solution <- cpp_transfer_dist_scored(splits1, splits2,
                                         nTip = as.integer(nTip),
                                         scale = scale)
@@ -168,6 +168,7 @@ TransferDistSplits <- function(splits1, splits2,
   if (is.null(tipLabels)) return(NULL)
   nTip <- length(tipLabels)
   if (nTip < 4L) return(NULL)
+  if (nTip > 32767L) stop("This many tips are not (yet) supported.")
   
   # Check all trees share same tip set
   allLabels <- TipLabels(tree1)
@@ -210,6 +211,7 @@ TransferDistSplits <- function(splits1, splits2,
   if (is.null(tipLabels)) return(NULL)
   nTip <- length(tipLabels)
   if (nTip < 4L) return(NULL)
+  if (nTip > 32767L) stop("This many tips are not (yet) supported.")
   
   # Check all trees share same tip set
   allLabels1 <- TipLabels(trees1)
diff --git a/R/tree_distance_utilities.R b/R/tree_distance_utilities.R
index 0ad954def..561174bfe 100644
--- a/R/tree_distance_utilities.R
+++ b/R/tree_distance_utilities.R
@@ -132,6 +132,9 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
   # Fast paths: use OpenMP batch functions when all trees share the same tip
   # set and no R-level cluster has been configured.  Each branch mirrors the
   # generic path exactly but avoids per-pair R overhead.
+  if (!is.na(nTip) && nTip > 32767L) {
+    stop("This many tips are not (yet) supported.")
+  }
   if (!is.na(nTip) && is.null(cluster)) {
     .n_threads <- as.integer(getOption("mc.cores", 1L))
     .batch_result <- if (identical(Func, MutualClusteringInfoSplits)) {
@@ -232,7 +235,9 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
 #' @importFrom stats setNames
 .SplitDistanceManyMany <- function(Func, splits1, splits2, 
                                    tipLabels, nTip = length(tipLabels), ...) {
-  
+  if (!is.na(nTip) && nTip > 32767L) {
+    stop("This many tips are not (yet) supported.")
+  }
   if (is.na(nTip)) {
     tipLabels <- union(unlist(tipLabels, use.names = FALSE),
                        unlist(TipLabels(splits2), use.names = FALSE))
@@ -396,6 +401,18 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
   }
 }
 
+# Validate split-matrix arguments before passing to C++.
+# On ARM, Rcpp::stop() can call std::terminate() instead of returning
+# a proper R error, so all expected-input validation must happen in R.
+.ValidateSplitArgs <- function(x, y, nTip) {
+  if (ncol(x) != ncol(y)) {
+    stop("Input splits must address same number of tips.")
+  }
+  if (nTip > 32767L) {
+    stop("This many tips are not (yet) supported.")
+  }
+}
+
 .CheckLabelsSame <- function(labelList) {
   nTip <- unique(lengths(labelList))
   if (length(nTip) != 1) {
diff --git a/src/day_1985.cpp b/src/day_1985.cpp
index 2f449aaef..d192fbe31 100644
--- a/src/day_1985.cpp
+++ b/src/day_1985.cpp
@@ -96,7 +96,7 @@ double calc_consensus_info(const List &trees, const LogicalVector &phylo,
 
   std::vector<ClusterTable> tables;
   if (std::size_t(n_trees) > tables.max_size()) {
-    Rcpp::stop("Not enough memory available to compute consensus of so many trees"); // LCOV_EXCL_LINE
+    Rf_error("Not enough memory available to compute consensus of so many trees"); // LCOV_EXCL_LINE
   }
 
   tables.reserve(n_trees);
@@ -322,9 +322,9 @@ IntegerVector robinson_foulds_all_pairs(const List& tables) {
 double consensus_info(const List trees, const LogicalVector phylo,
                       const NumericVector p) {
   if (p[0] > 1 + 1e-15) { // epsilon catches floating point error
-    Rcpp::stop("p must be <= 1.0 in consensus_info()");
+    Rf_error("p must be <= 1.0 in consensus_info()");
   } else if (p[0] < 0.5) {
-    Rcpp::stop("p must be >= 0.5 in consensus_info()");
+    Rf_error("p must be >= 0.5 in consensus_info()");
   }
 
   // First, peek at the tree size to determine allocation strategy
@@ -343,7 +343,7 @@ double consensus_info(const List trees, const LogicalVector phylo,
       return calc_consensus_info(trees, phylo, p, S);
     }
   } catch(const std::exception& e) {
-    Rcpp::stop(e.what());
+    Rf_error("%s", e.what());
   }
   
   ASSERT(false && "Unreachable code in consensus_tree");
diff --git a/src/hmi.cpp b/src/hmi.cpp
index bb594a7f6..6c2dfbe96 100644
--- a/src/hmi.cpp
+++ b/src/hmi.cpp
@@ -140,7 +140,7 @@ double HMI_xptr(SEXP ptr1, SEXP ptr2) {
   Rcpp::XPtr<TreeDist::HPart> hp1(ptr1);
   Rcpp::XPtr<TreeDist::HPart> hp2(ptr2);
   if (hp1->nodes[hp1->root].n_tip != hp2->nodes[hp2->root].n_tip) {
-    Rcpp::stop("Trees must have the same number of leaves");
+    Rf_error("Trees must have the same number of leaves");
   }
   return TreeDist::hierarchical_mutual_info(hp1->nodes, hp1->root,
                                             hp2->nodes, hp2->root).second;
@@ -171,10 +171,10 @@ Rcpp::NumericVector EHMI_xptr(SEXP hp1_ptr, SEXP hp2_ptr,
                               int minResample = 36) {
   
   if (minResample < 2) {
-    Rcpp::stop("Must perform at least one resampling");
+    Rf_error("Must perform at least one resampling");
   }
   if (tolerance < 1e-8) {
-    Rcpp::stop("Tolerance too low");
+    Rf_error("Tolerance too low");
   }
   
   Rcpp::XPtr<TreeDist::HPart> hp1(hp1_ptr);
diff --git a/src/hpart.cpp b/src/hpart.cpp
index ffbfcaea3..909e1c60c 100644
--- a/src/hpart.cpp
+++ b/src/hpart.cpp
@@ -126,7 +126,7 @@ size_t build_node_from_list(const RObject& node,
   if (Rf_isInteger(node) || Rf_isReal(node)) {
     const IntegerVector leaf_vec(node);
     if (leaf_vec.size() != 1) {
-      Rcpp::stop("List must only contain integers, not vectors of integers");   // #nocov
+      Rf_error("List must only contain integers, not vectors of integers");   // #nocov
     }
     const int leaf_label = leaf_vec[0];         // 1-based R leaf label
     const size_t leaf_idx = leaf_label - 1;     // 0-based label for HNode
@@ -172,7 +172,7 @@ size_t build_node_from_list(const RObject& node,
   }
   
   // Invalid node type
-  Rcpp::stop("Invalid node type");                                              // #nocov
+  Rf_error("Invalid node type");                                              // #nocov
 }
 
 
diff --git a/src/hpart_relabel.cpp b/src/hpart_relabel.cpp
index 6f7906ef9..d90f8b86a 100644
--- a/src/hpart_relabel.cpp
+++ b/src/hpart_relabel.cpp
@@ -13,7 +13,7 @@ void recompute_bitsets_postorder(TreeDist::HPart &hpart, const size_t node_idx,
   if (node.children.empty()) {
     // Leaf node
     if (node.leaf_count != 1) {
-      Rcpp::stop("Leaf node has leaf_count != 1");                              // #nocov
+      Rf_error("Leaf node has leaf_count != 1");                              // #nocov
     }
     int new_index = mapping[node.label]; // mapping is 0-based
     node.label = new_index;
diff --git a/src/information.h b/src/information.h
index 2db2340b8..0ed328c83 100644
--- a/src/information.h
+++ b/src/information.h
@@ -49,7 +49,7 @@ __attribute__((constructor))
 inline double split_phylo_info(const int32 n_in, const int32 *n_tip,
                                const double p) {
   if (*n_tip > CT_MAX_LEAVES) {
-    Rcpp::stop("This many leaves are not yet supported.");
+    Rf_error("This many leaves are not yet supported.");
   }
   const int32 n_out = *n_tip - n_in;
   assert(p > 0);
@@ -75,7 +75,7 @@ inline double split_phylo_info(const int32 n_in, const int32 *n_tip,
 inline double split_clust_info(const int32 n_in, const int32 *n_tip,
                                const double p) {
   if (*n_tip > CT_MAX_LEAVES) {
-    Rcpp::stop("This many leaves are not yet supported.");
+    Rf_error("This many leaves are not yet supported.");
   }
   const int32 n_out = *n_tip - n_in;
   assert(p > 0);
diff --git a/src/mast.cpp b/src/mast.cpp
index 23fb961ea..fac07db85 100644
--- a/src/mast.cpp
+++ b/src/mast.cpp
@@ -64,10 +64,10 @@ int cpp_mast (IntegerMatrix edge1, IntegerMatrix edge2, IntegerVector nTip) {
               n_all_nodes = n_tip + n_internal,
               n_edge = edge1.nrow();
   if (edge2.nrow() != n_edge) {
-    Rcpp::stop("Both trees must contain the same number of edges.");
+    Rf_error("Both trees must contain the same number of edges.");
   }
   if (n_tip > MAST_MAX_TIP) {
-    Rcpp::stop("Tree too large; please contact maintainer for advice.");
+    Rf_error("Tree too large; please contact maintainer for advice.");
   }
   
   int16
diff --git a/src/nni_distance.cpp b/src/nni_distance.cpp
index ac5c17dcc..93866b0bc 100644
--- a/src/nni_distance.cpp
+++ b/src/nni_distance.cpp
@@ -227,13 +227,13 @@ grf_match nni_rf_matching (
     ASSERT(n_splits > 0);
     ASSERT(n_tips > 3);
     if (n_tips > NNI_MAX_TIPS) {
-      Rcpp::stop("Cannot calculate NNI distance for trees with so many tips.");
+      Rf_error("Cannot calculate NNI distance for trees with so many tips.");
     }
     
     // #nocov start
     if (static_cast<int64_t>(n_splits) * static_cast<int64_t>(n_bins) >
           static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
-      Rcpp::stop("Cannot calculate NNI distance for trees with so many splits.");
+      Rf_error("Cannot calculate NNI distance for trees with so many splits.");
     }
     // #nocov end
     
@@ -299,8 +299,7 @@ IntegerVector cpp_nni_distance(const IntegerMatrix& edge1,
                                const IntegerVector& nTip) {
   
   if (nTip[0] > NNI_MAX_TIPS) {
-    Rcpp::stop("Cannot calculate NNI distance for trees with "
-                            "so many tips.");
+    Rf_error("Cannot calculate NNI distance for trees with so many tips.");
   }
   const int32_t n_tip = static_cast<int32_t>(nTip[0]);
   const int32_t node_0 = n_tip;
@@ -317,8 +316,8 @@ IntegerVector cpp_nni_distance(const IntegerMatrix& edge1,
   int32_t li_score_bound = 0;
   
   if (n_edge != int32_t(edge2.nrow())) {
-    Rcpp::stop("Both trees must have the same number of edges. "
-                            "Is one rooted and the other unrooted?");
+    Rf_error("Both trees must have the same number of edges. "
+             "Is one rooted and the other unrooted?");
   }
 
   if (n_tip < 4) {
@@ -349,7 +348,7 @@ IntegerVector cpp_nni_distance(const IntegerMatrix& edge1,
   const int32_t n_splits = n_distinct_edge - n_tip;
   
   if (n_splits < 1) {
-    Rcpp::stop("NNI distance is undefined for trees with no splits"); // #nocov
+    Rf_error("NNI distance is undefined for trees with no splits"); // #nocov
   }
   
   std::unique_ptr<uint64_t[]> splits1(new uint64_t[n_splits * n_bin]);
diff --git a/src/spr.cpp b/src/spr.cpp
index 7eee66d6f..1c4177ec6 100644
--- a/src/spr.cpp
+++ b/src/spr.cpp
@@ -48,16 +48,16 @@ IntegerVector calc_mismatch_size(const RawMatrix& x, const RawMatrix& y) {
 // [[Rcpp::export]]
 IntegerVector mismatch_size (const RawMatrix& x, const RawMatrix& y) {
   if (x.rows() != y.rows()) {
-    throw std::invalid_argument("`x` and `y` differ in number of splits.");
+    Rf_error("`x` and `y` differ in number of splits.");
   }
   if (!x.hasAttribute("nTip")) {
-    Rcpp::stop("`x` lacks nTip attribute");
+    Rf_error("`x` lacks nTip attribute");
   }
   if (!y.hasAttribute("nTip")) {
-    Rcpp::stop("`y` lacks nTip attribute");
+    Rf_error("`y` lacks nTip attribute");
   }
   if (static_cast<int>(x.attr("nTip")) != static_cast<int>(y.attr("nTip"))) {
-    Rcpp::stop("`x` and `y` differ in `nTip`");
+    Rf_error("`x` and `y` differ in `nTip`");
   }
   return calc_mismatch_size(x, y);
 }
@@ -103,23 +103,23 @@ IntegerVector calc_confusion(const RawMatrix &x, const RawMatrix &y) {
 // [[Rcpp::export]]
 IntegerVector confusion(const RawMatrix& x, const RawMatrix& y) {
   if (x.rows() != y.rows()) {
-    throw std::invalid_argument("Input splits contain same number of splits.");
+    Rf_error("Input splits must contain same number of splits.");
   }
   if (!x.hasAttribute("nTip")) {
-    Rcpp::stop("`x` lacks nTip attribute");
+    Rf_error("`x` lacks nTip attribute");
   }
   if (!y.hasAttribute("nTip")) {
-    Rcpp::stop("`y` lacks nTip attribute");
+    Rf_error("`y` lacks nTip attribute");
   }
   if (static_cast<int>(x.attr("nTip")) != static_cast<int>(y.attr("nTip"))) {
-    Rcpp::stop("`x` and `y` differ in `nTip`");
+    Rf_error("`x` and `y` differ in `nTip`");
   }
   return calc_confusion(x, y);
 }
 
 IntegerMatrix reverse (const IntegerMatrix x) {
   if (double(x.nrow()) > double(std::numeric_limits<intx>::max())) {
-    Rcpp::stop("This many edges are not (yet) supported.");
+    Rf_error("This many edges are not (yet) supported.");
   }
   const intx n_edge = intx(x.nrow());
   ASSERT(n_edge % 2 == 0); // Tree is binary
diff --git a/src/spr_lookup.cpp b/src/spr_lookup.cpp
index 9794f2489..013f55e3c 100644
--- a/src/spr_lookup.cpp
+++ b/src/spr_lookup.cpp
@@ -223,7 +223,7 @@ int lookup_7(const SplitSet7& sp1, const SplitSet7& sp2) {
 
 inline SplitSet7 read_splits(const Rcpp::RawVector& r) {
   if (r.size() != 4)
-    Rcpp::stop("Expected a length-4 raw vector of splits");
+    Rf_error("Expected a length-4 raw vector of splits");
   
   SplitSet7 sp{};
   for (int i = 0; i < 4; ++i) {
diff --git a/src/transfer_consensus.cpp b/src/transfer_consensus.cpp
index 299b54a71..b9dac5b75 100644
--- a/src/transfer_consensus.cpp
+++ b/src/transfer_consensus.cpp
@@ -364,16 +364,6 @@ static std::pair<int, int> find_second(
       best = c;
     }
   }
-  if (best >= 0) {
-    if (scale) {
-      if (p_minus_1 <= 0 ||
-          static_cast<double>(best_d) / p_minus_1 >= 1.0)
-        return {-1, p_minus_1};
-    } else {
-      if (best_d >= p_minus_1)
-        return {-1, p_minus_1};
-    }
-  }
   return {best, (best >= 0) ? best_d : p_minus_1};
 }
 
@@ -515,9 +505,12 @@ struct GreedyState {
         match_dist[b] = match2_dist[b];
 
         if (match[b] < 0) {
-          // Promoted value was sentinel — rescan from scratch to find
-          // the actual closest included split (find_second with
-          // matchIdx = -1 searches all included).
+          // Defensive guard: promoted match2 was the sentinel (no real
+          // second-closest split was known).  Rescan from scratch to find
+          // the actual closest included split.  This requires a topology
+          // where match2[b] stayed at -1 while another split's removal
+          // makes b's greedy match the removed split — theoretically
+          // reachable but extremely unlikely in practice.
           auto [first, first_d] = find_second(b, -1, incl, dist, M, pool, scale);
           match[b]      = first;
           match_dist[b] = first_d;
diff --git a/src/transfer_distance.cpp b/src/transfer_distance.cpp
index c2cf8f04a..23f0d9a46 100644
--- a/src/transfer_distance.cpp
+++ b/src/transfer_distance.cpp
@@ -146,7 +146,7 @@ List cpp_transfer_dist(
     const IntegerVector& nTip
 ) {
   if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
+    Rf_error("Input splits must address same number of tips.");
   }
 
   const int n_tip = nTip[0];
@@ -205,7 +205,7 @@ List cpp_transfer_dist_scored(
     const IntegerVector& nTip, bool scale
 ) {
   if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
+    Rf_error("Input splits must address same number of tips.");
   }
 
   const int n_tip = nTip[0];
diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp
index 6aaae498b..b40c1a40d 100644
--- a/src/tree_distances.cpp
+++ b/src/tree_distances.cpp
@@ -29,7 +29,7 @@ namespace TreeDist {
 
   void check_ntip(const double n) {
     if (n > static_cast<double>(std::numeric_limits<int16>::max())) {
-      Rcpp::stop("This many tips are not (yet) supported.");
+      Rf_error("This many tips are not (yet) supported."); // not Rcpp::stop: ARM safety
     }
   }
 
@@ -602,11 +602,14 @@ inline List shared_phylo (const RawMatrix &x, const RawMatrix &y,
                       _["matching"] = final_matching);
 }
 
+// Input validation: Rf_error (longjmp) instead of Rcpp::stop (C++ exception)
+// for ARM compatibility.  R callers should validate before reaching C++.
+
 // [[Rcpp::export]]
 List cpp_robinson_foulds_distance(const RawMatrix &x, const RawMatrix &y,
                                   const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
+    Rf_error("Input splits must address same number of tips.");
   }
   return robinson_foulds_distance(x, y, static_cast<int32>(nTip[0]));
 }
@@ -615,7 +618,7 @@ List cpp_robinson_foulds_distance(const RawMatrix &x, const RawMatrix &y,
 List cpp_robinson_foulds_info(const RawMatrix &x, const RawMatrix &y,
                               const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
+    Rf_error("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
@@ -626,7 +629,7 @@ List cpp_robinson_foulds_info(const RawMatrix &x, const RawMatrix &y,
 List cpp_matching_split_distance(const RawMatrix &x, const RawMatrix &y,
                                  const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
+    Rf_error("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
@@ -638,7 +641,7 @@ List cpp_jaccard_similarity(const RawMatrix &x, const RawMatrix &y,
                             const IntegerVector &nTip, const NumericVector &k,
                             const LogicalVector &allowConflict) {
   if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
+    Rf_error("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
@@ -649,7 +652,7 @@ List cpp_jaccard_similarity(const RawMatrix &x, const RawMatrix &y,
 List cpp_msi_distance(const RawMatrix &x, const RawMatrix &y,
                       const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
+    Rf_error("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
@@ -660,7 +663,7 @@ List cpp_msi_distance(const RawMatrix &x, const RawMatrix &y,
 List cpp_mutual_clustering(const RawMatrix &x, const RawMatrix &y,
                            const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
+    Rf_error("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
@@ -671,7 +674,7 @@ List cpp_mutual_clustering(const RawMatrix &x, const RawMatrix &y,
 List cpp_shared_phylo(const RawMatrix &x, const RawMatrix &y,
                       const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
+    Rf_error("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
diff --git a/tests/testthat/test-transfer_consensus.R b/tests/testthat/test-transfer_consensus.R
index f1605a415..e038e0955 100644
--- a/tests/testthat/test-transfer_consensus.R
+++ b/tests/testthat/test-transfer_consensus.R
@@ -112,6 +112,61 @@ test_that("TransferConsensus covers all parameter combinations", {
   expect_s3_class(tc3, "phylo")
 })
 
+test_that("Greedy remove path fires with conflicting majority splits", {
+  # Two groups of trees with very different topologies: majority-init seeds
+  # splits from the larger group, some of which the greedy then removes.
+  t1 <- as.phylo(0, nTip = 12)
+  t2 <- as.phylo(100000, nTip = 12)
+  trees <- structure(c(rep(list(t1), 6), rep(list(t2), 5)),
+                     class = "multiPhylo")
+
+  # greedy="best" exercises do_remove + find_second in GreedyState
+  tc_best_s <- TransferConsensus(trees, greedy = "best", init = "majority",
+                                 scale = TRUE)
+  expect_s3_class(tc_best_s, "phylo")
+
+  tc_best_u <- TransferConsensus(trees, greedy = "best", init = "majority",
+                                 scale = FALSE)
+  expect_s3_class(tc_best_u, "phylo")
+
+  # greedy="first" exercises the same remove path via greedy_first
+
+  tc_first_s <- TransferConsensus(trees, greedy = "first", init = "majority",
+                                  scale = TRUE)
+  expect_s3_class(tc_first_s, "phylo")
+
+  tc_first_u <- TransferConsensus(trees, greedy = "first", init = "majority",
+                                  scale = FALSE)
+  expect_s3_class(tc_first_u, "phylo")
+})
+
+test_that("Greedy exercises diverse topologies (extra C++ path coverage)", {
+  # Random trees with many tips — sparse split overlap
+  set.seed(5123)
+  rand_trees <- structure(
+    lapply(1:30, function(i) TreeTools::RandomTree(10, root = TRUE)),
+    class = "multiPhylo")
+  # Diverse indexed trees
+  diverse_trees <- as.phylo(seq(0, 500, by = 25), nTip = 20)
+  # Three conflicting groups
+  mixed_trees <- structure(c(
+    rep(list(as.phylo(0, 8)), 5),
+    rep(list(as.phylo(300, 8)), 5),
+    rep(list(as.phylo(9999, 8)), 3)
+  ), class = "multiPhylo")
+
+  for (trees in list(rand_trees, diverse_trees, mixed_trees)) {
+    for (gr in c("best", "first")) {
+      for (init in c("empty", "majority")) {
+        for (sc in c(TRUE, FALSE)) {
+          tc <- TransferConsensus(trees, greedy = gr, init = init, scale = sc)
+          expect_s3_class(tc, "phylo")
+        }
+      }
+    }
+  }
+})
+
 test_that("cpp_tc_profile() runs without error", {
   trees <- as.phylo(0:4, nTip = 8)
   tipLabels <- TipLabels(trees[[1]])

From bf6eec7d66fe2e7f7a1980762fe36dc080953c29 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 15:08:20 +0000
Subject: [PATCH 09/17] #nocov

---
 src/transfer_consensus.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/transfer_consensus.cpp b/src/transfer_consensus.cpp
index b9dac5b75..514c64304 100644
--- a/src/transfer_consensus.cpp
+++ b/src/transfer_consensus.cpp
@@ -511,9 +511,9 @@ struct GreedyState {
           // where match2[b] stayed at -1 while another split's removal
           // makes b's greedy match the removed split — theoretically
           // reachable but extremely unlikely in practice.
-          auto [first, first_d] = find_second(b, -1, incl, dist, M, pool, scale);
-          match[b]      = first;
-          match_dist[b] = first_d;
+          auto [first, first_d] = find_second(b, -1, incl, dist, M, pool, scale); // #nocov
+          match[b]      = first;   // #nocov
+          match_dist[b] = first_d; // #nocov
         }
 
         // Find new second-closest

From b20e03e8ed60cbe2ee9db3d15116718eeb5416dd Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 16:07:13 +0000
Subject: [PATCH 10/17] =?UTF-8?q?Rf=5Ferror=20=E2=86=92Rcpp:stop()?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/day_1985.cpp          | 36 +++++++++++++-----------------------
 src/hmi.cpp               |  6 +++---
 src/hpart.cpp             |  4 ++--
 src/hpart_relabel.cpp     |  2 +-
 src/information.h         |  4 ++--
 src/mast.cpp              |  4 ++--
 src/nni_distance.cpp      | 10 +++++-----
 src/spr.cpp               | 18 +++++++++---------
 src/spr_lookup.cpp        |  2 +-
 src/transfer_distance.cpp |  4 ++--
 10 files changed, 40 insertions(+), 50 deletions(-)

diff --git a/src/day_1985.cpp b/src/day_1985.cpp
index d192fbe31..d4a6a76b9 100644
--- a/src/day_1985.cpp
+++ b/src/day_1985.cpp
@@ -96,7 +96,7 @@ double calc_consensus_info(const List &trees, const LogicalVector &phylo,
 
   std::vector<ClusterTable> tables;
   if (std::size_t(n_trees) > tables.max_size()) {
-    Rf_error("Not enough memory available to compute consensus of so many trees"); // LCOV_EXCL_LINE
+    Rcpp::stop("Not enough memory available to compute consensus of so many trees"); // LCOV_EXCL_LINE
   }
 
   tables.reserve(n_trees);
@@ -322,30 +322,20 @@ IntegerVector robinson_foulds_all_pairs(const List& tables) {
 double consensus_info(const List trees, const LogicalVector phylo,
                       const NumericVector p) {
   if (p[0] > 1 + 1e-15) { // epsilon catches floating point error
-    Rf_error("p must be <= 1.0 in consensus_info()");
+    Rcpp::stop("p must be <= 1.0 in consensus_info()");
   } else if (p[0] < 0.5) {
-    Rf_error("p must be >= 0.5 in consensus_info()");
+    Rcpp::stop("p must be >= 0.5 in consensus_info()");
   }
 
-  // First, peek at the tree size to determine allocation strategy
-  // We'll create a temporary ClusterTable just to check the size
-  try {
-    ClusterTable temp_table(Rcpp::List(trees(0)));
-    const int32 n_tip = temp_table.N();
-    
-    if (n_tip <= ct_stack_threshold) {
-      // Small tree: use stack-allocated array
-      std::array<StackEntry, ct_stack_threshold> S;
-      return calc_consensus_info(trees, phylo, p, S);
-    } else {
-      // Large tree: use heap-allocated vector
-      std::vector<StackEntry> S(n_tip);
-      return calc_consensus_info(trees, phylo, p, S);
-    }
-  } catch(const std::exception& e) {
-    Rf_error("%s", e.what());
+  // Peek at tree size to choose stack vs heap allocation for the work buffer
+  ClusterTable temp_table(Rcpp::List(trees(0)));
+  const int32 n_tip = temp_table.N();
+
+  if (n_tip <= ct_stack_threshold) {
+    std::array<StackEntry, ct_stack_threshold> S;
+    return calc_consensus_info(trees, phylo, p, S);
+  } else {
+    std::vector<StackEntry> S(n_tip);
+    return calc_consensus_info(trees, phylo, p, S);
   }
-  
-  ASSERT(false && "Unreachable code in consensus_tree");
-  return 0.0;
 }
diff --git a/src/hmi.cpp b/src/hmi.cpp
index 6c2dfbe96..bb594a7f6 100644
--- a/src/hmi.cpp
+++ b/src/hmi.cpp
@@ -140,7 +140,7 @@ double HMI_xptr(SEXP ptr1, SEXP ptr2) {
   Rcpp::XPtr<TreeDist::HPart> hp1(ptr1);
   Rcpp::XPtr<TreeDist::HPart> hp2(ptr2);
   if (hp1->nodes[hp1->root].n_tip != hp2->nodes[hp2->root].n_tip) {
-    Rf_error("Trees must have the same number of leaves");
+    Rcpp::stop("Trees must have the same number of leaves");
   }
   return TreeDist::hierarchical_mutual_info(hp1->nodes, hp1->root,
                                             hp2->nodes, hp2->root).second;
@@ -171,10 +171,10 @@ Rcpp::NumericVector EHMI_xptr(SEXP hp1_ptr, SEXP hp2_ptr,
                               int minResample = 36) {
   
   if (minResample < 2) {
-    Rf_error("Must perform at least one resampling");
+    Rcpp::stop("Must perform at least one resampling");
   }
   if (tolerance < 1e-8) {
-    Rf_error("Tolerance too low");
+    Rcpp::stop("Tolerance too low");
   }
   
   Rcpp::XPtr<TreeDist::HPart> hp1(hp1_ptr);
diff --git a/src/hpart.cpp b/src/hpart.cpp
index 909e1c60c..ffbfcaea3 100644
--- a/src/hpart.cpp
+++ b/src/hpart.cpp
@@ -126,7 +126,7 @@ size_t build_node_from_list(const RObject& node,
   if (Rf_isInteger(node) || Rf_isReal(node)) {
     const IntegerVector leaf_vec(node);
     if (leaf_vec.size() != 1) {
-      Rf_error("List must only contain integers, not vectors of integers");   // #nocov
+      Rcpp::stop("List must only contain integers, not vectors of integers");   // #nocov
     }
     const int leaf_label = leaf_vec[0];         // 1-based R leaf label
     const size_t leaf_idx = leaf_label - 1;     // 0-based label for HNode
@@ -172,7 +172,7 @@ size_t build_node_from_list(const RObject& node,
   }
   
   // Invalid node type
-  Rf_error("Invalid node type");                                              // #nocov
+  Rcpp::stop("Invalid node type");                                              // #nocov
 }
 
 
diff --git a/src/hpart_relabel.cpp b/src/hpart_relabel.cpp
index d90f8b86a..6f7906ef9 100644
--- a/src/hpart_relabel.cpp
+++ b/src/hpart_relabel.cpp
@@ -13,7 +13,7 @@ void recompute_bitsets_postorder(TreeDist::HPart &hpart, const size_t node_idx,
   if (node.children.empty()) {
     // Leaf node
     if (node.leaf_count != 1) {
-      Rf_error("Leaf node has leaf_count != 1");                              // #nocov
+      Rcpp::stop("Leaf node has leaf_count != 1");                              // #nocov
     }
     int new_index = mapping[node.label]; // mapping is 0-based
     node.label = new_index;
diff --git a/src/information.h b/src/information.h
index 0ed328c83..2db2340b8 100644
--- a/src/information.h
+++ b/src/information.h
@@ -49,7 +49,7 @@ __attribute__((constructor))
 inline double split_phylo_info(const int32 n_in, const int32 *n_tip,
                                const double p) {
   if (*n_tip > CT_MAX_LEAVES) {
-    Rf_error("This many leaves are not yet supported.");
+    Rcpp::stop("This many leaves are not yet supported.");
   }
   const int32 n_out = *n_tip - n_in;
   assert(p > 0);
@@ -75,7 +75,7 @@ inline double split_phylo_info(const int32 n_in, const int32 *n_tip,
 inline double split_clust_info(const int32 n_in, const int32 *n_tip,
                                const double p) {
   if (*n_tip > CT_MAX_LEAVES) {
-    Rf_error("This many leaves are not yet supported.");
+    Rcpp::stop("This many leaves are not yet supported.");
   }
   const int32 n_out = *n_tip - n_in;
   assert(p > 0);
diff --git a/src/mast.cpp b/src/mast.cpp
index fac07db85..23fb961ea 100644
--- a/src/mast.cpp
+++ b/src/mast.cpp
@@ -64,10 +64,10 @@ int cpp_mast (IntegerMatrix edge1, IntegerMatrix edge2, IntegerVector nTip) {
               n_all_nodes = n_tip + n_internal,
               n_edge = edge1.nrow();
   if (edge2.nrow() != n_edge) {
-    Rf_error("Both trees must contain the same number of edges.");
+    Rcpp::stop("Both trees must contain the same number of edges.");
   }
   if (n_tip > MAST_MAX_TIP) {
-    Rf_error("Tree too large; please contact maintainer for advice.");
+    Rcpp::stop("Tree too large; please contact maintainer for advice.");
   }
   
   int16
diff --git a/src/nni_distance.cpp b/src/nni_distance.cpp
index 93866b0bc..031dd5325 100644
--- a/src/nni_distance.cpp
+++ b/src/nni_distance.cpp
@@ -227,13 +227,13 @@ grf_match nni_rf_matching (
     ASSERT(n_splits > 0);
     ASSERT(n_tips > 3);
     if (n_tips > NNI_MAX_TIPS) {
-      Rf_error("Cannot calculate NNI distance for trees with so many tips.");
+      Rcpp::stop("Cannot calculate NNI distance for trees with so many tips.");
     }
     
     // #nocov start
     if (static_cast<int64_t>(n_splits) * static_cast<int64_t>(n_bins) >
           static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
-      Rf_error("Cannot calculate NNI distance for trees with so many splits.");
+      Rcpp::stop("Cannot calculate NNI distance for trees with so many splits.");
     }
     // #nocov end
     
@@ -299,7 +299,7 @@ IntegerVector cpp_nni_distance(const IntegerMatrix& edge1,
                                const IntegerVector& nTip) {
   
   if (nTip[0] > NNI_MAX_TIPS) {
-    Rf_error("Cannot calculate NNI distance for trees with so many tips.");
+    Rcpp::stop("Cannot calculate NNI distance for trees with so many tips.");
   }
   const int32_t n_tip = static_cast<int32_t>(nTip[0]);
   const int32_t node_0 = n_tip;
@@ -316,7 +316,7 @@ IntegerVector cpp_nni_distance(const IntegerMatrix& edge1,
   int32_t li_score_bound = 0;
   
   if (n_edge != int32_t(edge2.nrow())) {
-    Rf_error("Both trees must have the same number of edges. "
+    Rcpp::stop("Both trees must have the same number of edges. "
              "Is one rooted and the other unrooted?");
   }
 
@@ -348,7 +348,7 @@ IntegerVector cpp_nni_distance(const IntegerMatrix& edge1,
   const int32_t n_splits = n_distinct_edge - n_tip;
   
   if (n_splits < 1) {
-    Rf_error("NNI distance is undefined for trees with no splits"); // #nocov
+    Rcpp::stop("NNI distance is undefined for trees with no splits"); // #nocov
   }
   
   std::unique_ptr<uint64_t[]> splits1(new uint64_t[n_splits * n_bin]);
diff --git a/src/spr.cpp b/src/spr.cpp
index 1c4177ec6..766c7d66f 100644
--- a/src/spr.cpp
+++ b/src/spr.cpp
@@ -48,16 +48,16 @@ IntegerVector calc_mismatch_size(const RawMatrix& x, const RawMatrix& y) {
 // [[Rcpp::export]]
 IntegerVector mismatch_size (const RawMatrix& x, const RawMatrix& y) {
   if (x.rows() != y.rows()) {
-    Rf_error("`x` and `y` differ in number of splits.");
+    Rcpp::stop("`x` and `y` differ in number of splits.");
   }
   if (!x.hasAttribute("nTip")) {
-    Rf_error("`x` lacks nTip attribute");
+    Rcpp::stop("`x` lacks nTip attribute");
   }
   if (!y.hasAttribute("nTip")) {
-    Rf_error("`y` lacks nTip attribute");
+    Rcpp::stop("`y` lacks nTip attribute");
   }
   if (static_cast<int>(x.attr("nTip")) != static_cast<int>(y.attr("nTip"))) {
-    Rf_error("`x` and `y` differ in `nTip`");
+    Rcpp::stop("`x` and `y` differ in `nTip`");
   }
   return calc_mismatch_size(x, y);
 }
@@ -103,23 +103,23 @@ IntegerVector calc_confusion(const RawMatrix &x, const RawMatrix &y) {
 // [[Rcpp::export]]
 IntegerVector confusion(const RawMatrix& x, const RawMatrix& y) {
   if (x.rows() != y.rows()) {
-    Rf_error("Input splits must contain same number of splits.");
+    Rcpp::stop("Input splits must contain same number of splits.");
   }
   if (!x.hasAttribute("nTip")) {
-    Rf_error("`x` lacks nTip attribute");
+    Rcpp::stop("`x` lacks nTip attribute");
   }
   if (!y.hasAttribute("nTip")) {
-    Rf_error("`y` lacks nTip attribute");
+    Rcpp::stop("`y` lacks nTip attribute");
   }
   if (static_cast<int>(x.attr("nTip")) != static_cast<int>(y.attr("nTip"))) {
-    Rf_error("`x` and `y` differ in `nTip`");
+    Rcpp::stop("`x` and `y` differ in `nTip`");
   }
   return calc_confusion(x, y);
 }
 
 IntegerMatrix reverse (const IntegerMatrix x) {
   if (double(x.nrow()) > double(std::numeric_limits<intx>::max())) {
-    Rf_error("This many edges are not (yet) supported.");
+    Rcpp::stop("This many edges are not (yet) supported.");
   }
   const intx n_edge = intx(x.nrow());
   ASSERT(n_edge % 2 == 0); // Tree is binary
diff --git a/src/spr_lookup.cpp b/src/spr_lookup.cpp
index 013f55e3c..9794f2489 100644
--- a/src/spr_lookup.cpp
+++ b/src/spr_lookup.cpp
@@ -223,7 +223,7 @@ int lookup_7(const SplitSet7& sp1, const SplitSet7& sp2) {
 
 inline SplitSet7 read_splits(const Rcpp::RawVector& r) {
   if (r.size() != 4)
-    Rf_error("Expected a length-4 raw vector of splits");
+    Rcpp::stop("Expected a length-4 raw vector of splits");
   
   SplitSet7 sp{};
   for (int i = 0; i < 4; ++i) {
diff --git a/src/transfer_distance.cpp b/src/transfer_distance.cpp
index 23f0d9a46..c2cf8f04a 100644
--- a/src/transfer_distance.cpp
+++ b/src/transfer_distance.cpp
@@ -146,7 +146,7 @@ List cpp_transfer_dist(
     const IntegerVector& nTip
 ) {
   if (x.cols() != y.cols()) {
-    Rf_error("Input splits must address same number of tips.");
+    Rcpp::stop("Input splits must address same number of tips.");
   }
 
   const int n_tip = nTip[0];
@@ -205,7 +205,7 @@ List cpp_transfer_dist_scored(
     const IntegerVector& nTip, bool scale
 ) {
   if (x.cols() != y.cols()) {
-    Rf_error("Input splits must address same number of tips.");
+    Rcpp::stop("Input splits must address same number of tips.");
   }
 
   const int n_tip = nTip[0];

From 6e6ad56f83b0c6ee6efb1cd5a82d57d8f09edbd3 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 16:24:19 +0000
Subject: [PATCH 11/17] Rcpp::stop() in exported wrappers only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Exported functions (thin wrappers) use Rcpp::stop() for input
validation — the exception is caught immediately by BEGIN_RCPP/END_RCPP
in the generated RcppExports wrapper.

Implementation functions retain Rf_error() for defensive guards that
should never be reached if wrappers validate correctly (LCOV_EXCL,

This pattern avoids C++ exception unwinding through complex stack frames,
which can cause std::terminate() on ARM.

Revert "Rf_error →Rcpp:stop()"

This reverts commit b20e03e8ed60cbe2ee9db3d15116718eeb5416dd.

Test R-level guards instead of C++ error paths (ARM safety)
---
 src/day_1985.cpp       |  2 +-
 src/hpart.cpp          |  4 ++--
 src/hpart_relabel.cpp  |  2 +-
 src/information.h      |  4 ++--
 src/nni_distance.cpp   | 13 ++-----------
 src/spr.cpp            |  2 +-
 src/tree_distances.cpp | 19 ++++++++-----------
 7 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/src/day_1985.cpp b/src/day_1985.cpp
index d4a6a76b9..fac8d35f1 100644
--- a/src/day_1985.cpp
+++ b/src/day_1985.cpp
@@ -96,7 +96,7 @@ double calc_consensus_info(const List &trees, const LogicalVector &phylo,
 
   std::vector<ClusterTable> tables;
   if (std::size_t(n_trees) > tables.max_size()) {
-    Rcpp::stop("Not enough memory available to compute consensus of so many trees"); // LCOV_EXCL_LINE
+    Rf_error("Not enough memory available to compute consensus of so many trees"); // LCOV_EXCL_LINE
   }
 
   tables.reserve(n_trees);
diff --git a/src/hpart.cpp b/src/hpart.cpp
index ffbfcaea3..909e1c60c 100644
--- a/src/hpart.cpp
+++ b/src/hpart.cpp
@@ -126,7 +126,7 @@ size_t build_node_from_list(const RObject& node,
   if (Rf_isInteger(node) || Rf_isReal(node)) {
     const IntegerVector leaf_vec(node);
     if (leaf_vec.size() != 1) {
-      Rcpp::stop("List must only contain integers, not vectors of integers");   // #nocov
+      Rf_error("List must only contain integers, not vectors of integers");   // #nocov
     }
     const int leaf_label = leaf_vec[0];         // 1-based R leaf label
     const size_t leaf_idx = leaf_label - 1;     // 0-based label for HNode
@@ -172,7 +172,7 @@ size_t build_node_from_list(const RObject& node,
   }
   
   // Invalid node type
-  Rcpp::stop("Invalid node type");                                              // #nocov
+  Rf_error("Invalid node type");                                              // #nocov
 }
 
 
diff --git a/src/hpart_relabel.cpp b/src/hpart_relabel.cpp
index 6f7906ef9..d90f8b86a 100644
--- a/src/hpart_relabel.cpp
+++ b/src/hpart_relabel.cpp
@@ -13,7 +13,7 @@ void recompute_bitsets_postorder(TreeDist::HPart &hpart, const size_t node_idx,
   if (node.children.empty()) {
     // Leaf node
     if (node.leaf_count != 1) {
-      Rcpp::stop("Leaf node has leaf_count != 1");                              // #nocov
+      Rf_error("Leaf node has leaf_count != 1");                              // #nocov
     }
     int new_index = mapping[node.label]; // mapping is 0-based
     node.label = new_index;
diff --git a/src/information.h b/src/information.h
index 2db2340b8..0ed328c83 100644
--- a/src/information.h
+++ b/src/information.h
@@ -49,7 +49,7 @@ __attribute__((constructor))
 inline double split_phylo_info(const int32 n_in, const int32 *n_tip,
                                const double p) {
   if (*n_tip > CT_MAX_LEAVES) {
-    Rcpp::stop("This many leaves are not yet supported.");
+    Rf_error("This many leaves are not yet supported.");
   }
   const int32 n_out = *n_tip - n_in;
   assert(p > 0);
@@ -75,7 +75,7 @@ inline double split_phylo_info(const int32 n_in, const int32 *n_tip,
 inline double split_clust_info(const int32 n_in, const int32 *n_tip,
                                const double p) {
   if (*n_tip > CT_MAX_LEAVES) {
-    Rcpp::stop("This many leaves are not yet supported.");
+    Rf_error("This many leaves are not yet supported.");
   }
   const int32 n_out = *n_tip - n_in;
   assert(p > 0);
diff --git a/src/nni_distance.cpp b/src/nni_distance.cpp
index 031dd5325..48341f152 100644
--- a/src/nni_distance.cpp
+++ b/src/nni_distance.cpp
@@ -226,16 +226,7 @@ grf_match nni_rf_matching (
   
     ASSERT(n_splits > 0);
     ASSERT(n_tips > 3);
-    if (n_tips > NNI_MAX_TIPS) {
-      Rcpp::stop("Cannot calculate NNI distance for trees with so many tips.");
-    }
-    
-    // #nocov start
-    if (static_cast<int64_t>(n_splits) * static_cast<int64_t>(n_bins) >
-          static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
-      Rcpp::stop("Cannot calculate NNI distance for trees with so many splits.");
-    }
-    // #nocov end
+    ASSERT(n_tips <= NNI_MAX_TIPS); // Validated by exported wrapper
     
     const int32_t last_bin = n_bins - 1;
     const int32_t unset_tips = (n_tips % SL_BIN_SIZE) ? 
@@ -317,7 +308,7 @@ IntegerVector cpp_nni_distance(const IntegerMatrix& edge1,
   
   if (n_edge != int32_t(edge2.nrow())) {
     Rcpp::stop("Both trees must have the same number of edges. "
-             "Is one rooted and the other unrooted?");
+               "Is one rooted and the other unrooted?");
   }
 
   if (n_tip < 4) {
diff --git a/src/spr.cpp b/src/spr.cpp
index 766c7d66f..281a2fb60 100644
--- a/src/spr.cpp
+++ b/src/spr.cpp
@@ -119,7 +119,7 @@ IntegerVector confusion(const RawMatrix& x, const RawMatrix& y) {
 
 IntegerMatrix reverse (const IntegerMatrix x) {
   if (double(x.nrow()) > double(std::numeric_limits<intx>::max())) {
-    Rcpp::stop("This many edges are not (yet) supported.");
+    Rf_error("This many edges are not (yet) supported."); // impl guard; wrapper validates
   }
   const intx n_edge = intx(x.nrow());
   ASSERT(n_edge % 2 == 0); // Tree is binary
diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp
index b40c1a40d..6aaae498b 100644
--- a/src/tree_distances.cpp
+++ b/src/tree_distances.cpp
@@ -29,7 +29,7 @@ namespace TreeDist {
 
   void check_ntip(const double n) {
     if (n > static_cast<double>(std::numeric_limits<int16>::max())) {
-      Rf_error("This many tips are not (yet) supported."); // not Rcpp::stop: ARM safety
+      Rcpp::stop("This many tips are not (yet) supported.");
     }
   }
 
@@ -602,14 +602,11 @@ inline List shared_phylo (const RawMatrix &x, const RawMatrix &y,
                       _["matching"] = final_matching);
 }
 
-// Input validation: Rf_error (longjmp) instead of Rcpp::stop (C++ exception)
-// for ARM compatibility.  R callers should validate before reaching C++.
-
 // [[Rcpp::export]]
 List cpp_robinson_foulds_distance(const RawMatrix &x, const RawMatrix &y,
                                   const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rf_error("Input splits must address same number of tips.");
+    Rcpp::stop("Input splits must address same number of tips.");
   }
   return robinson_foulds_distance(x, y, static_cast<int32>(nTip[0]));
 }
@@ -618,7 +615,7 @@ List cpp_robinson_foulds_distance(const RawMatrix &x, const RawMatrix &y,
 List cpp_robinson_foulds_info(const RawMatrix &x, const RawMatrix &y,
                               const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rf_error("Input splits must address same number of tips.");
+    Rcpp::stop("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
@@ -629,7 +626,7 @@ List cpp_robinson_foulds_info(const RawMatrix &x, const RawMatrix &y,
 List cpp_matching_split_distance(const RawMatrix &x, const RawMatrix &y,
                                  const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rf_error("Input splits must address same number of tips.");
+    Rcpp::stop("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
@@ -641,7 +638,7 @@ List cpp_jaccard_similarity(const RawMatrix &x, const RawMatrix &y,
                             const IntegerVector &nTip, const NumericVector &k,
                             const LogicalVector &allowConflict) {
   if (x.cols() != y.cols()) {
-    Rf_error("Input splits must address same number of tips.");
+    Rcpp::stop("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
@@ -652,7 +649,7 @@ List cpp_jaccard_similarity(const RawMatrix &x, const RawMatrix &y,
 List cpp_msi_distance(const RawMatrix &x, const RawMatrix &y,
                       const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rf_error("Input splits must address same number of tips.");
+    Rcpp::stop("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
@@ -663,7 +660,7 @@ List cpp_msi_distance(const RawMatrix &x, const RawMatrix &y,
 List cpp_mutual_clustering(const RawMatrix &x, const RawMatrix &y,
                            const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rf_error("Input splits must address same number of tips.");
+    Rcpp::stop("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
@@ -674,7 +671,7 @@ List cpp_mutual_clustering(const RawMatrix &x, const RawMatrix &y,
 List cpp_shared_phylo(const RawMatrix &x, const RawMatrix &y,
                       const IntegerVector &nTip) {
   if (x.cols() != y.cols()) {
-    Rf_error("Input splits must address same number of tips.");
+    Rcpp::stop("Input splits must address same number of tips.");
   }
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);

From 5cd45c20a49de510c789130ce37ee0454d157fcb Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 16:47:35 +0000
Subject: [PATCH 12/17] Replace defensive Rf_error() with ASSERT() in impl code

These guards are unreachable when exported wrappers validate correctly.
ASSERT() is active in debug builds and compiles to nothing in release.
---
 src/day_1985.cpp      |  4 +++-
 src/hpart.cpp         |  4 ++--
 src/hpart_relabel.cpp |  2 +-
 src/information.h     | 26 +++++++++++---------------
 src/spr.cpp           |  2 +-
 5 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/day_1985.cpp b/src/day_1985.cpp
index fac8d35f1..c3e5aaa3e 100644
--- a/src/day_1985.cpp
+++ b/src/day_1985.cpp
@@ -3,6 +3,8 @@ using namespace Rcpp;
 
 #include "tree_distances.h" /* includes <TreeTools/SplitList.h> */
 #include "information.h"
+#include <cassert>
+#include <TreeTools/assert.h>
 
 #include <TreeTools.h> /* for root_on_node() */
 #include <TreeTools/root_tree.h> /* for root_on_node() */
@@ -96,7 +98,7 @@ double calc_consensus_info(const List &trees, const LogicalVector &phylo,
 
   std::vector<ClusterTable> tables;
   if (std::size_t(n_trees) > tables.max_size()) {
-    Rf_error("Not enough memory available to compute consensus of so many trees"); // LCOV_EXCL_LINE
+    ASSERT(false && "Not enough memory for consensus of so many trees"); // LCOV_EXCL_LINE
   }
 
   tables.reserve(n_trees);
diff --git a/src/hpart.cpp b/src/hpart.cpp
index 909e1c60c..9bd47dd95 100644
--- a/src/hpart.cpp
+++ b/src/hpart.cpp
@@ -126,7 +126,7 @@ size_t build_node_from_list(const RObject& node,
   if (Rf_isInteger(node) || Rf_isReal(node)) {
     const IntegerVector leaf_vec(node);
     if (leaf_vec.size() != 1) {
-      Rf_error("List must only contain integers, not vectors of integers");   // #nocov
+      ASSERT(false && "List must only contain integers, not vectors of integers"); // #nocov
     }
     const int leaf_label = leaf_vec[0];         // 1-based R leaf label
     const size_t leaf_idx = leaf_label - 1;     // 0-based label for HNode
@@ -172,7 +172,7 @@ size_t build_node_from_list(const RObject& node,
   }
   
   // Invalid node type
-  Rf_error("Invalid node type");                                              // #nocov
+  ASSERT(false && "Invalid node type");                                        // #nocov
 }
 
 
diff --git a/src/hpart_relabel.cpp b/src/hpart_relabel.cpp
index d90f8b86a..1337c91d9 100644
--- a/src/hpart_relabel.cpp
+++ b/src/hpart_relabel.cpp
@@ -13,7 +13,7 @@ void recompute_bitsets_postorder(TreeDist::HPart &hpart, const size_t node_idx,
   if (node.children.empty()) {
     // Leaf node
     if (node.leaf_count != 1) {
-      Rf_error("Leaf node has leaf_count != 1");                              // #nocov
+      ASSERT(node.leaf_count == 1 && "Leaf node has leaf_count != 1");         // #nocov
     }
     int new_index = mapping[node.label]; // mapping is 0-based
     node.label = new_index;
diff --git a/src/information.h b/src/information.h
index 0ed328c83..cb2ce58b7 100644
--- a/src/information.h
+++ b/src/information.h
@@ -2,7 +2,7 @@
 #define _TREEDIST_INFO_H
 
 #include <Rcpp/Lightest>
-#include <cassert> /* for log2() */
+#include <TreeTools/assert.h>
 #include <cmath> /* for log2() */
 #include "ints.h" /* for int16 */
 
@@ -48,14 +48,12 @@ __attribute__((constructor))
 
 inline double split_phylo_info(const int32 n_in, const int32 *n_tip,
                                const double p) {
-  if (*n_tip > CT_MAX_LEAVES) {
-    Rf_error("This many leaves are not yet supported.");
-  }
+  ASSERT(*n_tip <= CT_MAX_LEAVES && "This many leaves are not yet supported.");
   const int32 n_out = *n_tip - n_in;
-  assert(p > 0);
-  assert(p <= 1);
-  assert(n_in > 1);
-  assert(n_out > 1);
+  ASSERT(p > 0);
+  ASSERT(p <= 1);
+  ASSERT(n_in > 1);
+  ASSERT(n_out > 1);
   if (p == 1) {
     return (l2unrooted[*n_tip] - l2rooted[n_in] - l2rooted[n_out]);
   } else {
@@ -74,14 +72,12 @@ inline double split_phylo_info(const int32 n_in, const int32 *n_tip,
 
 inline double split_clust_info(const int32 n_in, const int32 *n_tip,
                                const double p) {
-  if (*n_tip > CT_MAX_LEAVES) {
-    Rf_error("This many leaves are not yet supported.");
-  }
+  ASSERT(*n_tip <= CT_MAX_LEAVES && "This many leaves are not yet supported.");
   const int32 n_out = *n_tip - n_in;
-  assert(p > 0);
-  assert(p <= 1);
-  assert(n_in > 1);
-  assert(n_out > 1);
+  ASSERT(p > 0);
+  ASSERT(p <= 1);
+  ASSERT(n_in > 1);
+  ASSERT(n_out > 1);
   return -p * (
       (((n_in * l2[n_in]) + (n_out * l2[n_out])) / *n_tip) - l2[*n_tip]
   );
diff --git a/src/spr.cpp b/src/spr.cpp
index 281a2fb60..ac83be137 100644
--- a/src/spr.cpp
+++ b/src/spr.cpp
@@ -119,7 +119,7 @@ IntegerVector confusion(const RawMatrix& x, const RawMatrix& y) {
 
 IntegerMatrix reverse (const IntegerMatrix x) {
   if (double(x.nrow()) > double(std::numeric_limits<intx>::max())) {
-    Rf_error("This many edges are not (yet) supported."); // impl guard; wrapper validates
+    ASSERT(false && "This many edges are not (yet) supported."); // wrapper validates
   }
   const intx n_edge = intx(x.nrow());
   ASSERT(n_edge % 2 == 0); // Tree is binary

From 87e6cd819efef372076c720f632c4eb336a82ccf Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 20 Mar 2026 17:04:57 +0000
Subject: [PATCH 13/17] Remove all Rcpp::stop() from C++; validate in R only

Rcpp::stop() causes std::terminate() on ARM regardless of call depth.
All input validation now happens in R wrappers before entering C++.
C++ functions use ASSERT() for debug-only invariant checks.

Tests updated to exercise R-level guards instead of calling C++ with
deliberately invalid input.
---
 src/day_1985.cpp                             |  8 ++--
 src/hmi.cpp                                  | 13 ++-----
 src/mast.cpp                                 |  9 ++---
 src/nni_distance.cpp                         | 12 ++----
 src/spr.cpp                                  | 39 ++++++--------------
 src/spr_lookup.cpp                           |  2 +-
 src/transfer_distance.cpp                    | 11 +++---
 src/tree_distances.cpp                       | 35 ++++++------------
 tests/testthat/test-mast.R                   |  4 +-
 tests/testthat/test-tree_distance_nni.R      |  5 +--
 tests/testthat/test-tree_distance_transfer.R |  8 ++--
 11 files changed, 52 insertions(+), 94 deletions(-)

diff --git a/src/day_1985.cpp b/src/day_1985.cpp
index c3e5aaa3e..6773fdea3 100644
--- a/src/day_1985.cpp
+++ b/src/day_1985.cpp
@@ -323,11 +323,9 @@ IntegerVector robinson_foulds_all_pairs(const List& tables) {
 // [[Rcpp::export]]
 double consensus_info(const List trees, const LogicalVector phylo,
                       const NumericVector p) {
-  if (p[0] > 1 + 1e-15) { // epsilon catches floating point error
-    Rcpp::stop("p must be <= 1.0 in consensus_info()");
-  } else if (p[0] < 0.5) {
-    Rcpp::stop("p must be >= 0.5 in consensus_info()");
-  }
+  // Validated by R caller (ConsensusInfo checks p range)
+  ASSERT(p[0] <= 1 + 1e-15 && "p must be <= 1.0 in consensus_info()");
+  ASSERT(p[0] >= 0.5 && "p must be >= 0.5 in consensus_info()");
 
   // Peek at tree size to choose stack vs heap allocation for the work buffer
   ClusterTable temp_table(Rcpp::List(trees(0)));
diff --git a/src/hmi.cpp b/src/hmi.cpp
index bb594a7f6..bcbe7c6a8 100644
--- a/src/hmi.cpp
+++ b/src/hmi.cpp
@@ -139,9 +139,8 @@ double hierarchical_self_info(const std::vector<TreeDist::HNode>& nodes, size_t
 double HMI_xptr(SEXP ptr1, SEXP ptr2) {
   Rcpp::XPtr<TreeDist::HPart> hp1(ptr1);
   Rcpp::XPtr<TreeDist::HPart> hp2(ptr2);
-  if (hp1->nodes[hp1->root].n_tip != hp2->nodes[hp2->root].n_tip) {
-    Rcpp::stop("Trees must have the same number of leaves");
-  }
+  ASSERT(hp1->nodes[hp1->root].n_tip == hp2->nodes[hp2->root].n_tip
+         && "Trees must have the same number of leaves");
   return TreeDist::hierarchical_mutual_info(hp1->nodes, hp1->root,
                                             hp2->nodes, hp2->root).second;
 }
@@ -170,12 +169,8 @@ Rcpp::NumericVector EHMI_xptr(SEXP hp1_ptr, SEXP hp2_ptr,
                               double tolerance = 0.01,
                               int minResample = 36) {
   
-  if (minResample < 2) {
-    Rcpp::stop("Must perform at least one resampling");
-  }
-  if (tolerance < 1e-8) {
-    Rcpp::stop("Tolerance too low");
-  }
+  ASSERT(minResample >= 2 && "Must perform at least one resampling");
+  ASSERT(tolerance >= 1e-8 && "Tolerance too low");
   
   Rcpp::XPtr<TreeDist::HPart> hp1(hp1_ptr);
   Rcpp::XPtr<TreeDist::HPart> hp2(hp2_ptr);
diff --git a/src/mast.cpp b/src/mast.cpp
index 23fb961ea..c26af5ff8 100644
--- a/src/mast.cpp
+++ b/src/mast.cpp
@@ -1,5 +1,6 @@
 #include <cstdlib>
 #include <Rcpp/Lightest>
+#include <TreeTools/assert.h>
 #include "ints.h"
 using namespace Rcpp;
 
@@ -63,12 +64,8 @@ int cpp_mast (IntegerMatrix edge1, IntegerMatrix edge2, IntegerVector nTip) {
               n_internal = n_tip - 1,
               n_all_nodes = n_tip + n_internal,
               n_edge = edge1.nrow();
-  if (edge2.nrow() != n_edge) {
-    Rcpp::stop("Both trees must contain the same number of edges.");
-  }
-  if (n_tip > MAST_MAX_TIP) {
-    Rcpp::stop("Tree too large; please contact maintainer for advice.");
-  }
+  ASSERT(edge2.nrow() == n_edge && "Both trees must contain the same number of edges.");
+  ASSERT(n_tip <= MAST_MAX_TIP && "Tree too large; please contact maintainer for advice.");
   
   int16
     t1_left[MAST_MAX_NODE] = {}, t1_right[MAST_MAX_NODE] = {},
diff --git a/src/nni_distance.cpp b/src/nni_distance.cpp
index 48341f152..db3a5b3c6 100644
--- a/src/nni_distance.cpp
+++ b/src/nni_distance.cpp
@@ -289,9 +289,7 @@ IntegerVector cpp_nni_distance(const IntegerMatrix& edge1,
                                const IntegerMatrix& edge2,
                                const IntegerVector& nTip) {
   
-  if (nTip[0] > NNI_MAX_TIPS) {
-    Rcpp::stop("Cannot calculate NNI distance for trees with so many tips.");
-  }
+  ASSERT(nTip[0] <= NNI_MAX_TIPS && "Cannot calculate NNI distance for trees with so many tips.");
   const int32_t n_tip = static_cast<int32_t>(nTip[0]);
   const int32_t node_0 = n_tip;
   const int32_t node_0_r = n_tip + 1;
@@ -306,10 +304,8 @@ IntegerVector cpp_nni_distance(const IntegerMatrix& edge1,
   int32_t fack_score_bound = 0;
   int32_t li_score_bound = 0;
   
-  if (n_edge != int32_t(edge2.nrow())) {
-    Rcpp::stop("Both trees must have the same number of edges. "
-               "Is one rooted and the other unrooted?");
-  }
+  ASSERT(n_edge == int32_t(edge2.nrow())
+         && "Both trees must have the same number of edges.");
 
   if (n_tip < 4) {
     return(IntegerVector::create(Named("lower") = 0,
@@ -339,7 +335,7 @@ IntegerVector cpp_nni_distance(const IntegerMatrix& edge1,
   const int32_t n_splits = n_distinct_edge - n_tip;
   
   if (n_splits < 1) {
-    Rcpp::stop("NNI distance is undefined for trees with no splits"); // #nocov
+    ASSERT(false && "NNI distance is undefined for trees with no splits"); // #nocov
   }
   
   std::unique_ptr<uint64_t[]> splits1(new uint64_t[n_splits * n_bin]);
diff --git a/src/spr.cpp b/src/spr.cpp
index ac83be137..c9dd456ce 100644
--- a/src/spr.cpp
+++ b/src/spr.cpp
@@ -47,18 +47,11 @@ IntegerVector calc_mismatch_size(const RawMatrix& x, const RawMatrix& y) {
 
 // [[Rcpp::export]]
 IntegerVector mismatch_size (const RawMatrix& x, const RawMatrix& y) {
-  if (x.rows() != y.rows()) {
-    Rcpp::stop("`x` and `y` differ in number of splits.");
-  }
-  if (!x.hasAttribute("nTip")) {
-    Rcpp::stop("`x` lacks nTip attribute");
-  }
-  if (!y.hasAttribute("nTip")) {
-    Rcpp::stop("`y` lacks nTip attribute");
-  }
-  if (static_cast<int>(x.attr("nTip")) != static_cast<int>(y.attr("nTip"))) {
-    Rcpp::stop("`x` and `y` differ in `nTip`");
-  }
+  ASSERT(x.rows() == y.rows() && "`x` and `y` differ in number of splits.");
+  ASSERT(x.hasAttribute("nTip") && "`x` lacks nTip attribute");
+  ASSERT(y.hasAttribute("nTip") && "`y` lacks nTip attribute");
+  ASSERT(static_cast<int>(x.attr("nTip")) == static_cast<int>(y.attr("nTip"))
+         && "`x` and `y` differ in `nTip`");
   return calc_mismatch_size(x, y);
 }
 
@@ -102,25 +95,17 @@ IntegerVector calc_confusion(const RawMatrix &x, const RawMatrix &y) {
 
 // [[Rcpp::export]]
 IntegerVector confusion(const RawMatrix& x, const RawMatrix& y) {
-  if (x.rows() != y.rows()) {
-    Rcpp::stop("Input splits must contain same number of splits.");
-  }
-  if (!x.hasAttribute("nTip")) {
-    Rcpp::stop("`x` lacks nTip attribute");
-  }
-  if (!y.hasAttribute("nTip")) {
-    Rcpp::stop("`y` lacks nTip attribute");
-  }
-  if (static_cast<int>(x.attr("nTip")) != static_cast<int>(y.attr("nTip"))) {
-    Rcpp::stop("`x` and `y` differ in `nTip`");
-  }
+  ASSERT(x.rows() == y.rows() && "Input splits must contain same number of splits.");
+  ASSERT(x.hasAttribute("nTip") && "`x` lacks nTip attribute");
+  ASSERT(y.hasAttribute("nTip") && "`y` lacks nTip attribute");
+  ASSERT(static_cast<int>(x.attr("nTip")) == static_cast<int>(y.attr("nTip"))
+         && "`x` and `y` differ in `nTip`");
   return calc_confusion(x, y);
 }
 
 IntegerMatrix reverse (const IntegerMatrix x) {
-  if (double(x.nrow()) > double(std::numeric_limits<intx>::max())) {
-    ASSERT(false && "This many edges are not (yet) supported."); // wrapper validates
-  }
+  ASSERT(double(x.nrow()) <= double(std::numeric_limits<intx>::max())
+         && "This many edges are not (yet) supported.");
   const intx n_edge = intx(x.nrow());
   ASSERT(n_edge % 2 == 0); // Tree is binary
   IntegerMatrix ret(n_edge, 2);
diff --git a/src/spr_lookup.cpp b/src/spr_lookup.cpp
index 9794f2489..1ac2281cb 100644
--- a/src/spr_lookup.cpp
+++ b/src/spr_lookup.cpp
@@ -223,7 +223,7 @@ int lookup_7(const SplitSet7& sp1, const SplitSet7& sp2) {
 
 inline SplitSet7 read_splits(const Rcpp::RawVector& r) {
   if (r.size() != 4)
-    Rcpp::stop("Expected a length-4 raw vector of splits");
+    ASSERT(false && "Expected a length-4 raw vector of splits");
   
   SplitSet7 sp{};
   for (int i = 0; i < 4; ++i) {
diff --git a/src/transfer_distance.cpp b/src/transfer_distance.cpp
index c2cf8f04a..a2ff23e84 100644
--- a/src/transfer_distance.cpp
+++ b/src/transfer_distance.cpp
@@ -12,6 +12,7 @@
  */
 
 #include <TreeTools/SplitList.h>
+#include <TreeTools/assert.h>
 #include <Rcpp/Lightest>
 #include <algorithm>
 #include <cstdint>
@@ -145,9 +146,8 @@ List cpp_transfer_dist(
     const RawMatrix& x, const RawMatrix& y,
     const IntegerVector& nTip
 ) {
-  if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
-  }
+  // Validated by R caller (.ValidateSplitArgs or as.Splits with shared tipLabels)
+  ASSERT(x.cols() == y.cols() && "Input splits must address same number of tips.");
 
   const int n_tip = nTip[0];
   SplitList sl_x(x);
@@ -204,9 +204,8 @@ List cpp_transfer_dist_scored(
     const RawMatrix& x, const RawMatrix& y,
     const IntegerVector& nTip, bool scale
 ) {
-  if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
-  }
+  // Validated by R caller (.ValidateSplitArgs or as.Splits with shared tipLabels)
+  ASSERT(x.cols() == y.cols() && "Input splits must address same number of tips.");
 
   const int n_tip = nTip[0];
   SplitList sl_x(x);
diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp
index 6aaae498b..f4b4ce859 100644
--- a/src/tree_distances.cpp
+++ b/src/tree_distances.cpp
@@ -1,4 +1,5 @@
 #include <TreeTools/SplitList.h>
+#include <TreeTools/assert.h>
 #include <cmath>
 #include <memory> /* for unique_ptr, make_unique */
 #include <Rcpp/Lightest>
@@ -28,9 +29,9 @@ namespace TreeDist {
   }
 
   void check_ntip(const double n) {
-    if (n > static_cast<double>(std::numeric_limits<int16>::max())) {
-      Rcpp::stop("This many tips are not (yet) supported.");
-    }
+    // Validated by R caller (nTip > 32767 guard in CalculateTreeDistance et al.)
+    ASSERT(n <= static_cast<double>(std::numeric_limits<int16>::max())
+           && "This many tips are not (yet) supported.");
   }
 
 
@@ -605,18 +606,14 @@ inline List shared_phylo (const RawMatrix &x, const RawMatrix &y,
 // [[Rcpp::export]]
 List cpp_robinson_foulds_distance(const RawMatrix &x, const RawMatrix &y,
                                   const IntegerVector &nTip) {
-  if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
-  }
+  ASSERT(x.cols() == y.cols() && "Input splits must address same number of tips.");
   return robinson_foulds_distance(x, y, static_cast<int32>(nTip[0]));
 }
 
 // [[Rcpp::export]]
 List cpp_robinson_foulds_info(const RawMatrix &x, const RawMatrix &y,
                               const IntegerVector &nTip) {
-  if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
-  }
+  ASSERT(x.cols() == y.cols() && "Input splits must address same number of tips.");
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
   return robinson_foulds_info(x, y, n_tip);
@@ -625,9 +622,7 @@ List cpp_robinson_foulds_info(const RawMatrix &x, const RawMatrix &y,
 // [[Rcpp::export]]
 List cpp_matching_split_distance(const RawMatrix &x, const RawMatrix &y,
                                  const IntegerVector &nTip) {
-  if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
-  }
+  ASSERT(x.cols() == y.cols() && "Input splits must address same number of tips.");
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
   return matching_split_distance(x, y, n_tip);
@@ -637,9 +632,7 @@ List cpp_matching_split_distance(const RawMatrix &x, const RawMatrix &y,
 List cpp_jaccard_similarity(const RawMatrix &x, const RawMatrix &y,
                             const IntegerVector &nTip, const NumericVector &k,
                             const LogicalVector &allowConflict) {
-  if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
-  }
+  ASSERT(x.cols() == y.cols() && "Input splits must address same number of tips.");
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
   return jaccard_similarity(x, y, n_tip, k, allowConflict);
@@ -648,9 +641,7 @@ List cpp_jaccard_similarity(const RawMatrix &x, const RawMatrix &y,
 // [[Rcpp::export]]
 List cpp_msi_distance(const RawMatrix &x, const RawMatrix &y,
                       const IntegerVector &nTip) {
-  if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
-  }
+  ASSERT(x.cols() == y.cols() && "Input splits must address same number of tips.");
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
   return msi_distance(x, y, n_tip);
@@ -659,9 +650,7 @@ List cpp_msi_distance(const RawMatrix &x, const RawMatrix &y,
 // [[Rcpp::export]]
 List cpp_mutual_clustering(const RawMatrix &x, const RawMatrix &y,
                            const IntegerVector &nTip) {
-  if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
-  }
+  ASSERT(x.cols() == y.cols() && "Input splits must address same number of tips.");
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
   return mutual_clustering(x, y, n_tip);
@@ -670,9 +659,7 @@ List cpp_mutual_clustering(const RawMatrix &x, const RawMatrix &y,
 // [[Rcpp::export]]
 List cpp_shared_phylo(const RawMatrix &x, const RawMatrix &y,
                       const IntegerVector &nTip) {
-  if (x.cols() != y.cols()) {
-    Rcpp::stop("Input splits must address same number of tips.");
-  }
+  ASSERT(x.cols() == y.cols() && "Input splits must address same number of tips.");
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
   return shared_phylo(x, y, n_tip);
diff --git a/tests/testthat/test-mast.R b/tests/testthat/test-mast.R
index 8a6235ce2..7d3e9289b 100644
--- a/tests/testthat/test-mast.R
+++ b/tests/testthat/test-mast.R
@@ -1,7 +1,9 @@
 library("TreeTools")
 
 test_that("MAST fails gracefully", {
-  expect_error(cpp_mast(BalancedTree(7)$edge, BalancedTree(8)$edge, 7)) # Different sizes
+  # R-level guard catches edge count mismatch
+  expect_error(.MASTSizeEdges(BalancedTree(7)$edge, BalancedTree(8)$edge, 7),
+               "same number of edges")
   expect_error(MASTSize(BalancedTree(8), UnrootTree(BalancedTree(8))))
   expect_error(MASTSize(BalancedTree(10000), PectinateTree(10000))) # Too large
   
diff --git a/tests/testthat/test-tree_distance_nni.R b/tests/testthat/test-tree_distance_nni.R
index 8569231fb..20bcda232 100644
--- a/tests/testthat/test-tree_distance_nni.R
+++ b/tests/testthat/test-tree_distance_nni.R
@@ -8,9 +8,8 @@ test_that("NNIDist() handles exceptions", {
   expect_error(NNIDist(list(PectinateTree(1:8), 
                             PectinateTree(as.character(1:8)))),
                "trees must bear identical labels")
-  expect_error(cpp_nni_distance(
-    PectinateTree(40000)$edge, # Will fail before not being postorder is problem
-    BalancedTree(40000)$edge, 40000), "so many tips")
+  # R-level guard catches too-many-tips
+  expect_error(NNIDist(PectinateTree(40000), BalancedTree(40000)), "so many tips")
   
   expect_error(NNIDist(BalancedTree(5), RootOnNode(BalancedTree(5), 1)))
   
diff --git a/tests/testthat/test-tree_distance_transfer.R b/tests/testthat/test-tree_distance_transfer.R
index 7186f665e..f3882a737 100644
--- a/tests/testthat/test-tree_distance_transfer.R
+++ b/tests/testthat/test-tree_distance_transfer.R
@@ -240,11 +240,11 @@ test_that("cpp_transfer_dist() handles star trees", {
   expect_equal(res_ss$score_unscaled, 0)
 })
 
-test_that("cpp_transfer_dist() errors on column mismatch", {
+test_that("column mismatch caught by R-level guard", {
   sp1 <- matrix(as.raw(0), nrow = 1, ncol = 1)
   sp2 <- matrix(as.raw(0), nrow = 1, ncol = 2)
   expect_error(
-    TreeDist:::cpp_transfer_dist(sp1, sp2, 8L),
+    TreeDist:::.ValidateSplitArgs(sp1, sp2, 8L),
     "same number of tips"
   )
 })
@@ -254,11 +254,11 @@ test_that("cpp_transfer_dist() errors on column mismatch", {
 # cpp_transfer_dist_scored() edge cases
 # =========================================================================
 
-test_that("cpp_transfer_dist_scored() errors on column mismatch", {
+test_that("column mismatch caught by R-level guard (scored)", {
   sp1 <- matrix(as.raw(0), nrow = 1, ncol = 1)
   sp2 <- matrix(as.raw(0), nrow = 1, ncol = 2)
   expect_error(
-    TreeDist:::cpp_transfer_dist_scored(sp1, sp2, 8L, TRUE),
+    TreeDist:::.ValidateSplitArgs(sp1, sp2, 8L),
     "same number of tips"
   )
 })

From 00ba7cb4dab0658f4a32f85df71819cbc1dc83af Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Sun, 22 Mar 2026 06:43:38 +0000
Subject: [PATCH 14/17] NEWS.md: AI-generated branch summary for
 transfer-consensus

---
 NEWS.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index d1b783d23..4bd545426 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,14 @@
+<!-- AI-generated branch summary (2026-03-22) -->
+# Branch: `transfer-consensus` → merge target: `master`
+
+This branch implements transfer consensus trees (`TransferConsensus()`) and
+a corresponding transfer distance metric (`TransferDist()`), providing a
+consensus method based on the transfer distance rather than the majority-rule
+or strict approaches.  Error handling in C++ code is tightened to validate
+inputs at the R level rather than using `Rcpp::stop()` or `Rf_error()` in
+implementation code.
+<!-- end AI-generated summary -->
+
 # TreeDist 2.13.0 (2026-03-17)
 
 ## Performance

From 1980da156dfb0250ff6816feda9b3c48e0bb71ac Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Sun, 22 Mar 2026 12:47:36 +0000
Subject: [PATCH 15/17] Add R-level validation for functions that lost
 Rcpp::stop() guards

Commit 87e6cd81 removed Rcpp::stop() from C++ and replaced with
ASSERT() (compiled out in release). Tests still expected errors from
C++ calls. This commit:

- mismatch_size(), confusion(): Add R validation wrappers (nTip,
  nrow checks) shadowing RcppExports versions
- consensus_info(): Add R wrapper checking p range and CT_MAX_LEAVES
  (16383) limit
- HierarchicalMutualInfo(): Check tip count mismatch for phylo inputs
- test-hmi.cpp.R: Test EHMI() R guards instead of EHMI_xptr()
- test-tree_distance.R: Replace direct cpp_* error tests with
  .ValidateSplitArgs test (R-level validation path)

Fixes all 19 test failures from GHA.
---
 R/hierarchical_mutual_information.R |  4 ++++
 R/tree_distance_spr.R               | 34 +++++++++++++++++++++++++++++
 R/tree_information.R                | 17 +++++++++++++++
 tests/testthat/test-hmi.cpp.R       |  7 +++---
 tests/testthat/test-tree_distance.R | 14 ++++--------
 5 files changed, 63 insertions(+), 13 deletions(-)

diff --git a/R/hierarchical_mutual_information.R b/R/hierarchical_mutual_information.R
index 7e74ddd75..862d5c936 100644
--- a/R/hierarchical_mutual_information.R
+++ b/R/hierarchical_mutual_information.R
@@ -66,6 +66,10 @@ HierarchicalMutualInfo <- function(tree1, tree2 = NULL, normalize = FALSE) {
       1
     }
   } else {
+    if (inherits(tree1, "phylo") && inherits(tree2, "phylo") &&
+        NTip(tree1) != NTip(tree2)) {
+      stop("Trees must have the same number of leaves")
+    }
     hp2 <- as.HPart(tree2, tree1)
     hmi <- HMI_xptr(hp1, hp2)
     if (isFALSE(normalize)) {
diff --git a/R/tree_distance_spr.R b/R/tree_distance_spr.R
index fa480542b..85657a375 100644
--- a/R/tree_distance_spr.R
+++ b/R/tree_distance_spr.R
@@ -489,3 +489,37 @@ SPRDist.multiPhylo <- SPRDist.list
   # Return:
   moves
 }
+
+# R-level validation wrappers for internal C++ functions.
+# C++ uses ASSERT() (compiled out in release); these ensure user-visible errors.
+mismatch_size <- function(x, y) {
+  if (!inherits(x, "Splits") || is.null(attr(x, "nTip"))) {
+    stop("`x` lacks nTip attribute")
+  }
+  if (!inherits(y, "Splits") || is.null(attr(y, "nTip"))) {
+    stop("`y` lacks nTip attribute")
+  }
+  if (attr(x, "nTip") != attr(y, "nTip")) {
+    stop("`x` and `y` differ in `nTip`")
+  }
+  if (nrow(x) != nrow(y)) {
+    stop("`x` and `y` differ in number of splits.")
+  }
+  .Call(`_TreeDist_mismatch_size`, x, y)
+}
+
+confusion <- function(x, y) {
+  if (!inherits(x, "Splits") || is.null(attr(x, "nTip"))) {
+    stop("`x` lacks nTip attribute")
+  }
+  if (!inherits(y, "Splits") || is.null(attr(y, "nTip"))) {
+    stop("`y` lacks nTip attribute")
+  }
+  if (attr(x, "nTip") != attr(y, "nTip")) {
+    stop("`x` and `y` differ in `nTip`")
+  }
+  if (nrow(x) != nrow(y)) {
+    stop("Input splits must contain same number of splits.")
+  }
+  .Call(`_TreeDist_confusion`, x, y)
+}
diff --git a/R/tree_information.R b/R/tree_information.R
index 767c918d3..580298c88 100644
--- a/R/tree_information.R
+++ b/R/tree_information.R
@@ -380,6 +380,23 @@ ConsensusInfo <- function(trees, info = "phylogenetic", p = 0.5,
   consensus_info(trees, mode == 1L, p = safeP)
 }
 
+# R-level validation wrapper; shadows RcppExports version.
+# C++ ASSERT() is compiled out in release builds.
+consensus_info <- function(trees, phylo, p) {
+  if (p > 1 + 1e-15) {
+    stop("p must be <= 1.0 in consensus_info()")
+  }
+  if (p < 0.5) {
+    stop("p must be >= 0.5 in consensus_info()")
+  }
+  nTip <- NTip(trees[[1]])
+  # CT_MAX_LEAVES = 16383 in information.h (lookup table size limit)
+  if (nTip > 16383L) {
+    stop("This many leaves are not yet supported")
+  }
+  .Call(`_TreeDist_consensus_info`, trees, phylo, p)
+}
+
 #' Maximum Clade Information Tree
 #'
 #' Analogous to the Maximum Clade Credibility tree:
diff --git a/tests/testthat/test-hmi.cpp.R b/tests/testthat/test-hmi.cpp.R
index 78ad0f221..cc883e4a5 100644
--- a/tests/testthat/test-hmi.cpp.R
+++ b/tests/testthat/test-hmi.cpp.R
@@ -6,9 +6,10 @@ test_that("HMI fails nicely", {
   expect_error(HierarchicalMutualInfo(bal5, pec5, normalize = "Error"),
                "`normalize` must be logical, or a function")
   
-  expect_error(EHMI_xptr(bal5, pec5, tolerance = 1e-16),
-               ".olerance too low")
-  expect_error(EHMI_xptr(bal5, pec5, minResample = 1),
+  # R-level guards in EHMI(); C++ uses ASSERT only
+  expect_error(EHMI(BalancedTree(5), PectinateTree(5), precision = 1e-16),
+               "Tolerance too low")
+  expect_error(EHMI(BalancedTree(5), PectinateTree(5), minResample = 1),
                "Must perform at least one resampl")
 })
 
diff --git a/tests/testthat/test-tree_distance.R b/tests/testthat/test-tree_distance.R
index cd96e3e93..67319da54 100644
--- a/tests/testthat/test-tree_distance.R
+++ b/tests/testthat/test-tree_distance.R
@@ -76,16 +76,10 @@ test_that("Size mismatch causes error", {
   expect_error(MeilaVariationOfInformation(splits7, splits8),
                "Split lengths differ")
   
-  Test <- function(Func) {
-    expect_error(Func(splits8, as.Splits(BalancedTree(9)), 8))
-  }
-  Test(cpp_robinson_foulds_distance)
-  Test(cpp_robinson_foulds_info)
-  Test(cpp_matching_split_distance)
-  Test(cpp_jaccard_similarity)
-  Test(cpp_msi_distance)
-  Test(cpp_mutual_clustering)
-  Test(cpp_shared_phylo)
+  # Validation is now in R (.ValidateSplitArgs), not C++
+  splits9 <- as.Splits(BalancedTree(9))
+  expect_error(.ValidateSplitArgs(splits8, splits9, 8L),
+               "same number of tips")
 })
 
 test_that("Metrics handle polytomies", {

From 4a60427435def72afd791e3789cd57fdb42fc773 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Sun, 22 Mar 2026 13:49:12 +0000
Subject: [PATCH 16/17] Return 0

---
 src/hpart.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/hpart.cpp b/src/hpart.cpp
index 9bd47dd95..22bab059c 100644
--- a/src/hpart.cpp
+++ b/src/hpart.cpp
@@ -171,8 +171,9 @@ size_t build_node_from_list(const RObject& node,
     return my_idx;
   }
   
-  // Invalid node type
+  // Invalid node type — unreachable when R caller validates input
   ASSERT(false && "Invalid node type");                                        // #nocov
+  return 0;                                                                    // #nocov
 }
 
 

From 9e01c6e3091643000c4a8bd8c638932fc664db32 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Mon, 23 Mar 2026 08:04:45 +0000
Subject: [PATCH 17/17] Surface headers for Linking To (#190)

* Surface headers for Linking To
---
 .github/workflows/ASan.yml                    |   5 +-
 .positai/settings.json                        |   7 +
 AGENTS.md                                     |  46 ++
 NEWS.md                                       |  11 +-
 R/RcppExports.R                               |   4 +
 inst/include/TreeDist/cost_matrix.h           | 276 ++++++++++
 inst/include/TreeDist/lap.h                   |  35 ++
 inst/include/TreeDist/lap_impl.h              | 254 ++++++++++
 inst/include/TreeDist/lap_scratch.h           |  44 ++
 inst/include/TreeDist/mutual_clustering.h     |  87 ++++
 .../include/TreeDist/mutual_clustering_impl.h | 290 +++++++++++
 inst/include/TreeDist/types.h                 |  32 ++
 src/Makevars                                  |   1 +
 src/Makevars.win                              |   1 +
 src/RcppExports.cpp                           |  14 +
 src/ints.h                                    |   9 +-
 src/lap.cpp                                   |  58 ++-
 src/lap.h                                     | 471 +-----------------
 src/tree_distance_functions.cpp               |  66 ++-
 src/tree_distances.h                          |  25 +-
 tests/testthat/test-mci_impl.R                |  53 ++
 21 files changed, 1282 insertions(+), 507 deletions(-)
 create mode 100644 inst/include/TreeDist/cost_matrix.h
 create mode 100644 inst/include/TreeDist/lap.h
 create mode 100644 inst/include/TreeDist/lap_impl.h
 create mode 100644 inst/include/TreeDist/lap_scratch.h
 create mode 100644 inst/include/TreeDist/mutual_clustering.h
 create mode 100644 inst/include/TreeDist/mutual_clustering_impl.h
 create mode 100644 inst/include/TreeDist/types.h
 create mode 100644 tests/testthat/test-mci_impl.R

diff --git a/.github/workflows/ASan.yml b/.github/workflows/ASan.yml
index 90a74a663..7c0b0a952 100644
--- a/.github/workflows/ASan.yml
+++ b/.github/workflows/ASan.yml
@@ -55,8 +55,9 @@ jobs:
         run: |
           export LD_PRELOAD=$(gcc -print-file-name=libasan.so)
 
-          echo "PKG_CFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer" > src/Makevars
-          echo "PKG_CXXFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer" >> src/Makevars
+          # Replace PKG_CXXFLAGS in-place; preserve PKG_CPPFLAGS and PKG_LIBS
+          sed -i 's/^PKG_CXXFLAGS.*/PKG_CXXFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer/' src/Makevars
+          echo "PKG_CFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer" >> src/Makevars
 
           mkdir ~/.R
           echo "LDFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer" >> ~/.R/Makevars
diff --git a/.positai/settings.json b/.positai/settings.json
index 627e8f7f2..acec4998f 100644
--- a/.positai/settings.json
+++ b/.positai/settings.json
@@ -30,8 +30,15 @@
       "wc *": "allow"
     },
     "external_directory": {
+      "C:/Users/pjjg18/GitHub/TreeSearch-a/*": "allow",
+      "C:/Users/pjjg18/.positai/skills/r-package-profiling/references/*": "allow",
       "../Quartet/*": "allow",
       "../TreeTools/*": "allow"
+    },
+    "webfetch": {
+      "https://github.com/*": "allow",
+      "https://api.github.com/*": "allow",
+      "https://raw.githubusercontent.com/*": "allow"
     }
   },
   "model": {
diff --git a/AGENTS.md b/AGENTS.md
index 237aa42bf..6d81e8830 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1177,6 +1177,52 @@ for the cross-pairs case.
 
 ---
 
+## LinkingTo Header Exposure (`expose-lapjv` branch)
+
+Extracted LAP and MCI C++ APIs into `inst/include/TreeDist/` headers so that
+downstream packages (e.g., TreeSearch) can use `LinkingTo: TreeDist`:
+
+| Header | Content |
+|--------|---------|
+| `types.h` | `cost`, `lap_dim`, `lap_row`, `lap_col`, constants |
+| `cost_matrix.h` | `CostMatrix` class (Rcpp-free) |
+| `lap_scratch.h` | `LapScratch` struct |
+| `lap.h` | `lap()` declarations |
+| `lap_impl.h` | `lap()` implementation (include in exactly one TU) |
+| `mutual_clustering.h` | MCI declarations |
+| `mutual_clustering_impl.h` | MCI implementation (include in exactly one TU) |
+
+`src/lap.h` is now a thin wrapper that includes `<TreeDist/lap.h>` and
+re-exports types to global scope.
+
+### LAPJV codegen regression (diagnosed, partially mitigated)
+
+Including `lap_impl.h` in `lap.cpp` changed the TU context enough for GCC 14's
+register allocator to produce ~8% more instructions in the Dijkstra hot loop,
+causing a consistent 20–25% regression on standalone LAPJV (n ≥ 400).
+
+**Root cause:** the installed-header version of `CostMatrix` (in
+`inst/include/TreeDist/cost_matrix.h`) has a different method set than main's
+monolithic `src/lap.h` (extra methods like `setWithTranspose()`, `dim8()`;
+missing test variants like `findRowSubminNaive`).  This changes GCC's
+optimization heuristics for the entire TU, even though `lap()` never calls
+the differing methods.
+
+**Fix:** define `lap()` directly in `lap.cpp` (not via `#include <TreeDist/lap_impl.h>`)
+with `__attribute__((optimize("align-functions=64", "align-loops=16")))`.
+The `lapjv()` wrapper fills the transposed buffer first (matching R's
+column-major storage) then untransposes — restoring the cache-friendly pattern.
+
+**Residual:** ~5–9% on LAPJV 1999×1999 vs main (from the CostMatrix class
+definition difference).  Tree distance metrics are unaffected — they call
+`lap()` from `pairwise_distances.cpp`, whose TU context is unchanged.
+
+**Maintenance note:** if the `lap()` algorithm changes, update BOTH `src/lap.cpp`
+and `inst/include/TreeDist/lap_impl.h`.  The duplication is intentional — it
+preserves the TU context that was profiled and tuned.
+
+---
+
 ## Remaining Optimization Opportunities
 
 - Sort+merge pre-scan for `rf_info_score`: **DONE** — replaced O(n²) loop with
diff --git a/NEWS.md b/NEWS.md
index 17785cba9..082fec343 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,14 @@
 <!-- AI-generated branch summary (2026-03-22) -->
-# Branch: `transfer-consensus` → merge target: `master`
+# Branch: `expose-lapjv` → merge target: `master`
+
+This branch exposes the LAPJV (Jonker–Volgenant) linear assignment solver
+via C++ headers so that downstream packages can link to it (`LinkingTo`),
+and implements a transfer consensus method (`TransferConsensus()`,
+`TransferDist()`).  It also tightens error handling by moving all
+`Rcpp::stop()` / `Rf_error()` calls out of C++ implementation code
+into R-level input validation.
+
+# TreeDist 2.13.0.9000
 
 This branch implements transfer consensus trees (`TransferConsensus()`) and
 a corresponding transfer distance metric (`TransferDist()`), providing a
diff --git a/R/RcppExports.R b/R/RcppExports.R
index a907e27f0..9e4d02b24 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -267,6 +267,10 @@ cpp_transfer_dist_cross_pairs <- function(splits_a, splits_b, n_tip, scale, n_th
     .Call(`_TreeDist_cpp_transfer_dist_cross_pairs`, splits_a, splits_b, n_tip, scale, n_threads)
 }
 
+cpp_mci_impl_score <- function(x, y, n_tips) {
+    .Call(`_TreeDist_cpp_mci_impl_score`, x, y, n_tips)
+}
+
 cpp_robinson_foulds_distance <- function(x, y, nTip) {
     .Call(`_TreeDist_cpp_robinson_foulds_distance`, x, y, nTip)
 }
diff --git a/inst/include/TreeDist/cost_matrix.h b/inst/include/TreeDist/cost_matrix.h
new file mode 100644
index 000000000..fe4dddcf2
--- /dev/null
+++ b/inst/include/TreeDist/cost_matrix.h
@@ -0,0 +1,276 @@
+#ifndef TREEDIST_COST_MATRIX_H_
+#define TREEDIST_COST_MATRIX_H_
+
+// TreeDist CostMatrix — cache-aligned, block-padded cost matrix for the LAP.
+//
+// Rcpp-free public API.  The Rcpp-dependent constructor
+// (CostMatrix(Rcpp::NumericMatrix)) lives in TreeDist's own src/lap.h.
+
+#include "types.h"
+
+#include <algorithm>
+#include <cassert>
+#include <tuple>
+#include <vector>
+
+namespace TreeDist {
+
+class CostMatrix {
+private:
+  std::size_t dim_;
+  std::size_t dim8_;
+  alignas(64) std::vector<cost> data_;
+  alignas(64) std::vector<cost> t_data_;
+  bool transposed_;
+
+  static std::size_t block_containing(const std::size_t x) {
+    return ((x + BLOCK_SIZE - 1) / BLOCK_SIZE) * BLOCK_SIZE;
+  }
+
+public:
+  // Default constructor for pooled instances (zero-size, no allocation).
+  CostMatrix() : dim_(0), dim8_(0), transposed_(false) {}
+
+  explicit CostMatrix(std::size_t dim)
+    : dim_(dim),
+      dim8_(block_containing(dim_)),
+      data_(std::vector<cost>(dim8_ * dim8_)),
+      t_data_(std::vector<cost>(dim8_ * dim8_)),
+      transposed_(false) {}
+
+  // Resize for reuse.  Only reallocates when the new dimension exceeds the
+  // current buffer capacity; otherwise just updates dim_/dim8_ and marks
+  // the transpose as stale.
+  void resize(std::size_t new_dim) {
+    dim_ = new_dim;
+    dim8_ = block_containing(new_dim);
+    const std::size_t needed = dim8_ * dim8_;
+    if (data_.size() < needed) {
+      data_.resize(needed);
+      t_data_.resize(needed);
+    }
+    transposed_ = false;
+  }
+
+  void reset() noexcept { transposed_ = false; }
+
+  [[nodiscard]] std::size_t dim() const noexcept { return dim_; }
+  [[nodiscard]] std::size_t dim8() const noexcept { return dim8_; }
+
+  // ---- Element access ----
+
+  cost& operator()(lap_row i, lap_col j) {
+    return data_[static_cast<std::size_t>(i) * dim8_ + j];
+  }
+
+  [[nodiscard]] const cost& operator()(lap_row i, lap_col j) const {
+    return data_[static_cast<std::size_t>(i) * dim8_ + j];
+  }
+
+  [[nodiscard]] const cost& row0(lap_col j) const {
+    return data_[j];
+  }
+
+  [[nodiscard]] const cost& entry0(lap_row i) const {
+    return data_[static_cast<std::size_t>(i) * dim8_];
+  }
+
+  [[nodiscard]] cost* row(lap_row i) {
+    return &data_[static_cast<std::size_t>(i) * dim8_];
+  }
+
+  [[nodiscard]] const cost* row(lap_row i) const {
+    return &data_[static_cast<std::size_t>(i) * dim8_];
+  }
+
+  [[nodiscard]] cost* col(lap_col i) {
+    return &t_data_[static_cast<std::size_t>(i) * dim8_];
+  }
+
+  [[nodiscard]] const cost* col(lap_col i) const {
+    return &t_data_[static_cast<std::size_t>(i) * dim8_];
+  }
+
+  // Write a value to both the data and transposed-data arrays.
+  // After filling the entire matrix this way, call markTransposed()
+  // to skip the lazy transpose in makeTranspose().
+  void setWithTranspose(lap_row i, lap_col j, cost value) {
+    data_[static_cast<std::size_t>(i) * dim8_ + j] = value;
+    t_data_[static_cast<std::size_t>(j) * dim8_ + i] = value;
+  }
+
+  void markTransposed() noexcept { transposed_ = true; }
+
+  // ---- Transpose ----
+
+  void makeTranspose() noexcept {
+    if (transposed_) return;
+
+    const cost* __restrict__ data_ptr = data_.data();
+    cost* __restrict__ t_data_ptr = t_data_.data();
+
+#if defined(__GNUC__) || defined(__clang__)
+    data_ptr =
+      static_cast<const cost*>(__builtin_assume_aligned(data_ptr, 64));
+    t_data_ptr =
+      static_cast<cost*>(__builtin_assume_aligned(t_data_ptr, 64));
+#endif
+    for (std::size_t i = 0; i < dim_; i += BLOCK_SIZE) {
+      for (std::size_t j = 0; j < dim_; j += BLOCK_SIZE) {
+        for (std::size_t r = i;
+             r < std::min(i + BLOCK_SIZE, dim_); ++r) {
+          for (std::size_t c = j;
+               c < std::min(j + BLOCK_SIZE, dim_); ++c) {
+            t_data_ptr[c * dim8_ + r] = data_ptr[r * dim8_ + c];
+          }
+        }
+      }
+    }
+    transposed_ = true;
+  }
+
+  void makeUntranspose() noexcept {
+    const cost* __restrict__ data_ptr = t_data_.data();
+    cost* __restrict__ t_data_ptr = data_.data();
+
+#if defined(__GNUC__) || defined(__clang__)
+    data_ptr =
+      static_cast<const cost*>(__builtin_assume_aligned(data_ptr, 64));
+    t_data_ptr =
+      static_cast<cost*>(__builtin_assume_aligned(t_data_ptr, 64));
+#endif
+    for (std::size_t i = 0; i < dim_; i += BLOCK_SIZE) {
+      for (std::size_t j = 0; j < dim_; j += BLOCK_SIZE) {
+        for (std::size_t r = i;
+             r < std::min(i + BLOCK_SIZE, dim_); ++r) {
+          for (std::size_t c = j;
+               c < std::min(j + BLOCK_SIZE, dim_); ++c) {
+            t_data_ptr[c * dim8_ + r] = data_ptr[r * dim8_ + c];
+          }
+        }
+      }
+    }
+  }
+
+  // ---- Padding ----
+
+  void padTrAfterCol(lap_row start_row, cost value) {
+    std::size_t start_index = static_cast<std::size_t>(start_row) * dim8_;
+    std::size_t end_index   = dim_ * dim8_;
+    std::fill(t_data_.begin() + start_index,
+              t_data_.begin() + end_index, value);
+  }
+
+  void padAfterRow(lap_row start_row, cost value) {
+    std::size_t start_index = static_cast<std::size_t>(start_row) * dim8_;
+    std::size_t end_index   = dim_ * dim8_;
+    std::fill(data_.begin() + start_index,
+              data_.begin() + end_index, value);
+  }
+
+  void padTrColAfterRow(const lap_row r, const lap_col start_col,
+                        const cost value) {
+    std::size_t r_offset = r * dim8_;
+    std::size_t actual_start_col = static_cast<std::size_t>(start_col);
+    std::size_t start_index = r_offset + actual_start_col;
+    std::size_t end_index = start_index + dim_ - actual_start_col;
+    std::fill(t_data_.begin() + start_index,
+              t_data_.begin() + end_index, value);
+  }
+
+  void padRowAfterCol(const lap_row r, const lap_col start_col,
+                      const cost value) {
+    std::size_t r_offset = r * dim8_;
+    std::size_t actual_start_col = static_cast<std::size_t>(start_col);
+    std::size_t start_index = r_offset + actual_start_col;
+    std::size_t end_index = start_index + dim_ - actual_start_col;
+    std::fill(data_.begin() + start_index,
+              data_.begin() + end_index, value);
+  }
+
+  // ---- Search ----
+
+  std::pair<cost, lap_row> findColMin(lap_col j,
+                                      lap_row search_dim = -1) {
+    makeTranspose();
+    const cost* col_data = col(j);
+    const lap_row n =
+      (search_dim < 0) ? static_cast<lap_row>(dim_) : search_dim;
+    const auto min_ptr = std::min_element(col_data, col_data + n);
+    return {*min_ptr,
+            static_cast<lap_row>(std::distance(col_data, min_ptr))};
+  }
+
+  std::tuple<cost, cost, lap_col, lap_col> findRowSubmin(
+      const lap_row* i, const std::vector<cost>& v
+  ) const {
+    assert(dim_ > 1);
+
+    const cost* __restrict__ row_i = row(*i);
+    const lap_col dim = static_cast<lap_col>(dim_);
+    const cost* __restrict__ v_ptr = v.data();
+
+    const cost h0 = row_i[0] - v_ptr[0];
+    const cost h1 = row_i[1] - v_ptr[1];
+
+    cost min_val, submin_val;
+    lap_col min_idx, submin_idx;
+
+    if (h0 > h1) {
+      min_val = h1; submin_val = h0;
+      min_idx = 1;  submin_idx = 0;
+    } else {
+      min_val = h0; submin_val = h1;
+      min_idx = 0;  submin_idx = 1;
+    }
+
+    const lap_col j_limit =
+      (dim < 4 ? 0 : static_cast<lap_col>(dim - 3));
+
+    for (lap_col j = 2; j < j_limit; j += 4) {
+      assert(BLOCK_SIZE >= 4);
+      const cost h0 = row_i[j]     - v_ptr[j];
+      const cost h1 = row_i[j + 1] - v_ptr[j + 1];
+      const cost h2 = row_i[j + 2] - v_ptr[j + 2];
+      const cost h3 = row_i[j + 3] - v_ptr[j + 3];
+      if (h0 < submin_val) {
+        if (h0 < min_val) {
+          submin_val = min_val; min_val = h0;
+          submin_idx = min_idx; min_idx = j;
+        } else { submin_val = h0; submin_idx = j; }
+      }
+      if (h1 < submin_val) {
+        if (h1 < min_val) {
+          submin_val = min_val; min_val = h1;
+          submin_idx = min_idx; min_idx = j + 1;
+        } else { submin_val = h1; submin_idx = j + 1; }
+      }
+      if (h2 < submin_val) {
+        if (h2 < min_val) {
+          submin_val = min_val; min_val = h2;
+          submin_idx = min_idx; min_idx = j + 2;
+        } else { submin_val = h2; submin_idx = j + 2; }
+      }
+      if (h3 < submin_val) {
+        if (h3 < min_val) {
+          submin_val = min_val; min_val = h3;
+          submin_idx = min_idx; min_idx = j + 3;
+        } else { submin_val = h3; submin_idx = j + 3; }
+      }
+    }
+    for (lap_col j = j_limit; j < dim; ++j) {
+      const cost h = row_i[j] - v_ptr[j];
+      if (h < submin_val) {
+        if (h < min_val) {
+          submin_val = min_val; min_val = h;
+          submin_idx = min_idx; min_idx = j;
+        } else { submin_val = h; submin_idx = j; }
+      }
+    }
+    return {min_val, submin_val, min_idx, submin_idx};
+  }
+};
+
+} // namespace TreeDist
+
+#endif // TREEDIST_COST_MATRIX_H_
diff --git a/inst/include/TreeDist/lap.h b/inst/include/TreeDist/lap.h
new file mode 100644
index 000000000..673cfa22b
--- /dev/null
+++ b/inst/include/TreeDist/lap.h
@@ -0,0 +1,35 @@
+#ifndef TREEDIST_LAP_H_
+#define TREEDIST_LAP_H_
+
+// LAP (Linear Assignment Problem) — Jonker–Volgenant declarations.
+//
+// Provides the lap() function signature.  The implementation lives in
+// lap_impl.h, guarded by TREEDIST_LAP_IMPLEMENTATION.  Downstream
+// packages should include lap_impl.h in exactly one translation unit
+// with the guard defined.
+
+#include "lap_scratch.h"
+
+namespace TreeDist {
+
+  // Primary overload: caller supplies pre-allocated scratch.
+  extern cost lap(lap_row dim,
+                  CostMatrix& input_cost,
+                  std::vector<lap_col>& rowsol,
+                  std::vector<lap_row>& colsol,
+                  bool allow_interrupt,
+                  LapScratch& scratch);
+
+  // Convenience overload: creates a temporary scratch.
+  inline cost lap(lap_row dim,
+                  CostMatrix& input_cost,
+                  std::vector<lap_col>& rowsol,
+                  std::vector<lap_row>& colsol,
+                  bool allow_interrupt = true) {
+    LapScratch scratch;
+    return lap(dim, input_cost, rowsol, colsol, allow_interrupt, scratch);
+  }
+
+} // namespace TreeDist
+
+#endif // TREEDIST_LAP_H_
diff --git a/inst/include/TreeDist/lap_impl.h b/inst/include/TreeDist/lap_impl.h
new file mode 100644
index 000000000..196c37f6e
--- /dev/null
+++ b/inst/include/TreeDist/lap_impl.h
@@ -0,0 +1,254 @@
+#ifndef TREEDIST_LAP_IMPL_H_
+#define TREEDIST_LAP_IMPL_H_
+
+// LAP (Linear Assignment Problem) — Jonker–Volgenant implementation.
+//
+// Guard this with #define TREEDIST_LAP_IMPLEMENTATION before including.
+// Include in exactly one translation unit per package.
+//
+// Interrupt handling:  define TREEDIST_CHECK_INTERRUPT() before including
+// to enable user-interrupt checks (e.g., Rcpp::checkUserInterrupt()).
+// If not defined, interrupt checks are silently skipped.
+//
+// Original algorithm:
+//   R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm
+//   for Dense and Sparse Linear Assignment Problems," Computing 38,
+//   325-340, 1987.
+
+#ifdef TREEDIST_LAP_IMPLEMENTATION
+
+#include "lap.h"
+
+#ifndef TREEDIST_CHECK_INTERRUPT
+#define TREEDIST_CHECK_INTERRUPT() ((void)0)
+#define TREEDIST_CHECK_INTERRUPT_DEFINED_HERE_
+#endif
+
+namespace TreeDist {
+
+namespace detail {
+  inline bool nontrivially_less_than(cost a, cost b) noexcept {
+    return a + ((a > ROUND_PRECISION) ? 8 : 0) < b;
+  }
+} // namespace detail
+
+// The LAP hot loops are sensitive to instruction alignment — even unrelated
+// changes to other code in the same translation unit can shift layout enough
+// to cause measurable regressions.  Force 64-byte function alignment and
+// 16-byte loop alignment to stabilise performance across builds.
+#if defined(__GNUC__) && !defined(__clang__)
+__attribute__((optimize("align-functions=64", "align-loops=16")))
+#endif
+cost lap(const lap_row dim,
+         CostMatrix& input_cost,
+         std::vector<lap_col>& rowsol,
+         std::vector<lap_row>& colsol,
+         const bool allow_interrupt,
+         LapScratch& scratch)
+{
+  lap_row num_free = 0;
+  scratch.ensure(dim);
+  auto& v       = scratch.v;
+  auto& matches = scratch.matches;
+  std::fill(matches.begin(), matches.begin() + dim, 0);
+  const cost* __restrict__ v_ptr = v.data();
+
+  // COLUMN REDUCTION
+  for (lap_col j = dim; j--; ) {
+    const auto [min, imin] = input_cost.findColMin(j);
+    v[j] = min;
+    ++matches[imin];
+
+    if (matches[imin] == 1) {
+      rowsol[imin] = j;
+      colsol[j] = imin;
+    } else if (v_ptr[j] < v_ptr[rowsol[imin]]) {
+      const lap_col j1 = rowsol[imin];
+      rowsol[imin] = j;
+      colsol[j] = imin;
+      colsol[j1] = -1;
+    } else {
+      colsol[j] = -1;
+    }
+  }
+
+  // REDUCTION TRANSFER
+  auto& freeunassigned = scratch.freeunassigned;
+
+  for (lap_row i = 0; i < dim; ++i) {
+    if (matches[i] == 0) {
+      freeunassigned[num_free++] = i;
+    } else if (matches[i] == 1) {
+      const lap_col j1 = rowsol[i];
+      const cost* row_i = input_cost.row(i);
+      cost min_cost;
+      if (j1 == 0) {
+        min_cost = row_i[1] - v_ptr[1];
+        for (lap_col j = 2; j < dim; ++j) {
+          const cost reduced_cost = row_i[j] - v_ptr[j];
+          if (reduced_cost < min_cost) {
+            min_cost = reduced_cost;
+          }
+        }
+      } else {
+        min_cost = row_i[0] - v_ptr[0];
+        for (lap_col j = 1; j < dim; ++j) {
+          if (j == j1) continue;
+          const cost reduced_cost = row_i[j] - v_ptr[j];
+          if (reduced_cost < min_cost) {
+            min_cost = reduced_cost;
+          }
+        }
+      }
+      v[j1] -= min_cost;
+    }
+  }
+
+  // AUGMENTING ROW REDUCTION
+  auto& col_list = scratch.col_list;
+  int loopcnt = 0;
+
+  do {
+    ++loopcnt;
+
+    lap_row previous_num_free = num_free;
+    num_free = 0;
+    lap_row k = 0;
+    while (k < previous_num_free) {
+      const lap_row i = freeunassigned[k++];
+      const auto [umin, usubmin, min_idx, j2] =
+        input_cost.findRowSubmin(&i, v);
+      lap_col j1 = min_idx;
+
+      lap_row i0 = colsol[j1];
+      const bool strictly_less =
+        detail::nontrivially_less_than(umin, usubmin);
+      if (strictly_less) {
+        v[j1] -= (usubmin - umin);
+      } else if (i0 > -1) {
+        j1 = j2;
+        i0 = colsol[j2];
+      }
+
+      rowsol[i] = j1;
+      colsol[j1] = i;
+
+      if (i0 > -1) {
+        if (strictly_less) {
+          freeunassigned[--k] = i0;
+          if (allow_interrupt) TREEDIST_CHECK_INTERRUPT();
+        } else {
+          freeunassigned[num_free++] = i0;
+        }
+      }
+    }
+  } while (loopcnt < 2);
+
+  // AUGMENT SOLUTION for each free row.
+  auto& d           = scratch.d;
+  auto& predecessor = scratch.predecessor;
+
+  for (lap_row f = 0; f < num_free; ++f) {
+    bool unassignedfound = false;
+    lap_row free_row = freeunassigned[f];
+    const cost* free_row_cost = input_cost.row(free_row);
+    lap_col endofpath = 0;
+    lap_col last = 0;
+    lap_row i;
+    lap_col j1;
+
+    for (lap_col j = 0; j < dim; ++j) {
+      d[j] = free_row_cost[j] - v_ptr[j];
+      predecessor[j] = free_row;
+      col_list[j] = j;
+    }
+
+    cost min = 0;
+    lap_col low = 0;
+    lap_col up = 0;
+
+    do {
+      if (up == low) {
+        last = low - 1;
+        min = d[col_list[up++]];
+
+        for (lap_dim k = up; k < dim; ++k) {
+          const lap_col j = col_list[k];
+          const cost h = d[j];
+          if (h <= min) {
+            if (h < min) {
+              up = low;
+              min = h;
+            }
+            col_list[k] = col_list[up];
+            col_list[up++] = j;
+          }
+        }
+        for (lap_dim k = low; k < up; ++k) {
+          if (colsol[col_list[k]] < 0) {
+            endofpath = col_list[k];
+            unassignedfound = true;
+            break;
+          }
+        }
+      }
+
+      if (!unassignedfound) {
+        j1 = col_list[low++];
+        i = colsol[j1];
+        const cost* row_i = input_cost.row(i);
+        const cost h = row_i[j1] - v_ptr[j1] - min;
+
+        for (lap_dim k = up; k < dim; ++k) {
+          const lap_col j = col_list[k];
+          cost v2 = row_i[j] - v_ptr[j] - h;
+          if (v2 < d[j]) {
+            predecessor[j] = i;
+            if (v2 == min) {
+              if (colsol[j] < 0) {
+                endofpath = j;
+                unassignedfound = true;
+                break;
+              } else {
+                col_list[k] = col_list[up];
+                col_list[up++] = j;
+              }
+            }
+            d[j] = v2;
+          }
+        }
+      }
+    } while (!unassignedfound);
+
+    for (lap_dim k = 0; k <= last; ++k) {
+      j1 = col_list[k];
+      v[j1] += d[j1] - min;
+    }
+
+    do {
+      i = predecessor[endofpath];
+      colsol[endofpath] = i;
+      j1 = endofpath;
+      endofpath = rowsol[i];
+      rowsol[i] = j1;
+    } while (i != free_row);
+  }
+
+  // Calculate optimal cost.
+  cost lapcost = 0;
+  for (lap_dim i = 0; i < dim; ++i) {
+    lapcost += input_cost(i, rowsol[i]);
+  }
+
+  return lapcost;
+}
+
+} // namespace TreeDist
+
+#ifdef TREEDIST_CHECK_INTERRUPT_DEFINED_HERE_
+#undef TREEDIST_CHECK_INTERRUPT
+#undef TREEDIST_CHECK_INTERRUPT_DEFINED_HERE_
+#endif
+
+#endif // TREEDIST_LAP_IMPLEMENTATION
+#endif // TREEDIST_LAP_IMPL_H_
diff --git a/inst/include/TreeDist/lap_scratch.h b/inst/include/TreeDist/lap_scratch.h
new file mode 100644
index 000000000..4a9ca43b3
--- /dev/null
+++ b/inst/include/TreeDist/lap_scratch.h
@@ -0,0 +1,44 @@
+#ifndef TREEDIST_LAP_SCRATCH_H_
+#define TREEDIST_LAP_SCRATCH_H_
+
+// Reusable heap storage for one thread's LAPJV workspace.
+//
+// Pass to the scratch-taking overload of lap() to eliminate per-call
+// std::vector allocations.  In a serial context construct once before the
+// loop; in an OpenMP context allocate one per thread and index by
+// omp_get_thread_num().  All vectors are grown lazily; never shrunk.
+
+#include "cost_matrix.h"
+
+namespace TreeDist {
+
+struct LapScratch {
+  std::vector<cost>    v;
+  std::vector<lap_col> matches;
+  std::vector<lap_row> freeunassigned;
+  std::vector<lap_col> col_list;
+  std::vector<cost>    d;
+  std::vector<lap_row> predecessor;
+  std::vector<lap_col> rowsol;
+  std::vector<lap_row> colsol;
+  CostMatrix score_pool;
+  CostMatrix small_pool;
+
+  void ensure(int dim) noexcept {
+    const int padded = static_cast<int>(
+      ((static_cast<std::size_t>(dim) + BLOCK_SIZE - 1)
+       / BLOCK_SIZE) * BLOCK_SIZE);
+    if (static_cast<int>(v.size())              < padded) v.resize(padded);
+    if (static_cast<int>(matches.size())        < dim)    matches.resize(dim);
+    if (static_cast<int>(freeunassigned.size()) < dim)    freeunassigned.resize(dim);
+    if (static_cast<int>(col_list.size())       < dim)    col_list.resize(dim);
+    if (static_cast<int>(d.size())              < dim)    d.resize(dim);
+    if (static_cast<int>(predecessor.size())    < dim)    predecessor.resize(dim);
+    if (static_cast<int>(rowsol.size())         < dim)    rowsol.resize(dim);
+    if (static_cast<int>(colsol.size())         < dim)    colsol.resize(dim);
+  }
+};
+
+} // namespace TreeDist
+
+#endif // TREEDIST_LAP_SCRATCH_H_
diff --git a/inst/include/TreeDist/mutual_clustering.h b/inst/include/TreeDist/mutual_clustering.h
new file mode 100644
index 000000000..437633f0b
--- /dev/null
+++ b/inst/include/TreeDist/mutual_clustering.h
@@ -0,0 +1,87 @@
+#ifndef TREEDIST_MUTUAL_CLUSTERING_H_
+#define TREEDIST_MUTUAL_CLUSTERING_H_
+
+// Mutual Clustering Information (MCI) — public API.
+//
+// Provides the core MCI score computation between two split sets,
+// clustering entropy, and lookup-table initialization.
+//
+// Implementation lives in mutual_clustering_impl.h, guarded by
+// TREEDIST_MCI_IMPLEMENTATION.
+
+#include "lap.h"
+#include <TreeTools/SplitList.h>   // splitbit, SL_MAX_TIPS, count_bits
+#include <cmath>
+
+namespace TreeDist {
+
+  // ---- Lookup tables (populated by init_lg2_tables) ----
+  //
+  // lg2[i]                = log2(i)  for  0 <= i <= SL_MAX_TIPS
+  // lg2_double_factorial[i] = log2(i!!) for 0 <= i < 2*SL_MAX_TIPS - 2
+  // lg2_unrooted[i]       = log2((2i-5)!!) for i >= 3
+  // lg2_rooted             = &lg2_unrooted[0] + 1  (so lg2_rooted[i] = lg2_unrooted[i+1])
+  //
+  // These are defined in mutual_clustering_impl.h.
+
+  extern double lg2[SL_MAX_TIPS + 1];
+  extern double lg2_double_factorial[SL_MAX_TIPS + SL_MAX_TIPS - 2];
+  extern double lg2_unrooted[SL_MAX_TIPS + 2];
+  extern double* lg2_rooted;
+
+  // Populate lookup tables.  Must be called once before any MCI
+  // computation.  max_tips should be >= the largest tree size used.
+  void init_lg2_tables(int max_tips);
+
+  // ---- Inline helpers ----
+
+  // Information content of a perfectly-matching split pair.
+  // ic_matching(a, b, n) = (a + b) * lg2[n] - a * lg2[a] - b * lg2[b]
+  [[nodiscard]] inline double ic_matching(int16 a, int16 b,
+                                          int16 n) noexcept {
+    const double lg2a = lg2[a];
+    const double lg2b = lg2[b];
+    const double lg2n = lg2[n];
+    return (a + b) * lg2n - a * lg2a - b * lg2b;
+  }
+
+  // Clustering entropy of a split set.
+  // Computes H_clust = -sum_i [ p_i * log2(p_i) + q_i * log2(q_i) ]
+  // where p_i = in_split[i] / n_tips and q_i = 1 - p_i.
+  [[nodiscard]] inline double clustering_entropy(
+      const int* in_split, int n_splits, int n_tips) {
+    if (n_tips <= 1 || n_splits == 0) return 0.0;
+    const double lg2n = std::log2(static_cast<double>(n_tips));
+    double ce = 0.0;
+    for (int i = 0; i < n_splits; ++i) {
+      const int a = in_split[i];
+      const int b = n_tips - a;
+      if (a > 1 && b > 1) {
+        ce += (a * std::log2(static_cast<double>(a)) +
+               b * std::log2(static_cast<double>(b)))
+              / n_tips - lg2n;
+      }
+    }
+    return -ce;
+  }
+
+  // ---- MCI score (declaration only) ----
+  //
+  // Computes the Mutual Clustering Information between two split sets.
+  // Uses sort+merge exact-match detection + LAP on the reduced matrix.
+  //
+  // a_state[i] points to n_bins splitbit words for split i of tree A.
+  // a_in[i]    = popcount of split i (tips in the "1" partition).
+  // Returns the MCI score (higher = more similar).
+  //
+  // Implementation in mutual_clustering_impl.h.
+
+  double mutual_clustering_score(
+      const splitbit* const* a_state, const int16* a_in, int16 a_n_splits,
+      const splitbit* const* b_state, const int16* b_in, int16 b_n_splits,
+      int16 n_bins, int32 n_tips,
+      LapScratch& scratch);
+
+} // namespace TreeDist
+
+#endif // TREEDIST_MUTUAL_CLUSTERING_H_
diff --git a/inst/include/TreeDist/mutual_clustering_impl.h b/inst/include/TreeDist/mutual_clustering_impl.h
new file mode 100644
index 000000000..7f98c8fb9
--- /dev/null
+++ b/inst/include/TreeDist/mutual_clustering_impl.h
@@ -0,0 +1,290 @@
+#ifndef TREEDIST_MUTUAL_CLUSTERING_IMPL_H_
+#define TREEDIST_MUTUAL_CLUSTERING_IMPL_H_
+
+// Mutual Clustering Information — implementation.
+//
+// Guard with #define TREEDIST_MCI_IMPLEMENTATION before including.
+// Include in exactly one translation unit per package (the same TU
+// that defines TREEDIST_LAP_IMPLEMENTATION).
+//
+// Usage:
+//   #define TREEDIST_LAP_IMPLEMENTATION
+//   #define TREEDIST_MCI_IMPLEMENTATION
+//   #include <TreeDist/lap_impl.h>
+//   #include <TreeDist/mutual_clustering_impl.h>
+
+#ifdef TREEDIST_MCI_IMPLEMENTATION
+
+#include "mutual_clustering.h"
+
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include <vector>
+
+namespace TreeDist {
+
+// ---- Table definitions ----
+
+double lg2[SL_MAX_TIPS + 1];
+double lg2_double_factorial[SL_MAX_TIPS + SL_MAX_TIPS - 2];
+double lg2_unrooted[SL_MAX_TIPS + 2];
+double* lg2_rooted = &lg2_unrooted[0] + 1;
+
+void init_lg2_tables(int max_tips) {
+  lg2[0] = 0;
+  const int lg2_limit = std::min(max_tips + 1,
+                                 static_cast<int>(SL_MAX_TIPS + 1));
+  for (int i = 1; i < lg2_limit; ++i) {
+    lg2[i] = std::log2(static_cast<double>(i));
+  }
+
+  for (int i = 0; i < 3; ++i) {
+    lg2_double_factorial[i] = 0;
+    lg2_unrooted[i] = 0;
+  }
+
+  const int df_limit = std::min(2 * max_tips,
+                                static_cast<int>(SL_MAX_TIPS + SL_MAX_TIPS - 2));
+  for (int i = 2; i < df_limit; ++i) {
+    lg2_double_factorial[i] =
+      lg2_double_factorial[i - 2] + std::log2(static_cast<double>(i));
+  }
+
+  const int ur_limit = std::min(max_tips + 2,
+                                static_cast<int>(SL_MAX_TIPS + 2));
+  for (int i = 3; i < ur_limit; ++i) {
+    lg2_unrooted[i] = lg2_double_factorial[i + i - 5];
+  }
+}
+
+// ---- Sort+merge exact-match detection (internal) ----
+//
+// Canonicalise each split so bit 0 is always set (flip complement if not),
+// sort by canonical form, merge-scan.  O(n log n).
+//
+// Writes a_match[ai] = bi+1 if matched, else 0.  Likewise b_match.
+// Returns number of exact matches.
+
+namespace detail {
+
+static int16 find_exact_matches_raw(
+    const splitbit* const* a_state, const int16* /*a_in*/, int16 a_n,
+    const splitbit* const* b_state, const int16* /*b_in*/, int16 b_n,
+    int16 n_bins, int32 n_tips,
+    int16* a_match, int16* b_match)
+{
+  std::fill(a_match, a_match + a_n, int16(0));
+  std::fill(b_match, b_match + b_n, int16(0));
+  if (a_n == 0 || b_n == 0) return 0;
+
+  const int16 last_bin = n_bins - 1;
+  const splitbit last_mask = (n_tips % SL_BIN_SIZE == 0)
+    ? ~splitbit(0)
+    : (splitbit(1) << (n_tips % SL_BIN_SIZE)) - 1;
+
+  // Flat buffers for canonical forms
+  std::vector<splitbit> a_canon(static_cast<std::size_t>(a_n) * n_bins);
+  std::vector<splitbit> b_canon(static_cast<std::size_t>(b_n) * n_bins);
+
+  for (int16 i = 0; i < a_n; ++i) {
+    const bool flip = !(a_state[i][0] & 1);
+    for (int16 bin = 0; bin < n_bins; ++bin) {
+      splitbit val = flip ? ~a_state[i][bin] : a_state[i][bin];
+      if (bin == last_bin) val &= last_mask;
+      a_canon[i * n_bins + bin] = val;
+    }
+  }
+  for (int16 i = 0; i < b_n; ++i) {
+    const bool flip = !(b_state[i][0] & 1);
+    for (int16 bin = 0; bin < n_bins; ++bin) {
+      splitbit val = flip ? ~b_state[i][bin] : b_state[i][bin];
+      if (bin == last_bin) val &= last_mask;
+      b_canon[i * n_bins + bin] = val;
+    }
+  }
+
+  // Sort index arrays by canonical form
+  auto canon_less = [&](const splitbit* canon, int16 n_b, int16 i, int16 j) {
+    for (int16 bin = 0; bin < n_b; ++bin) {
+      const splitbit vi = canon[i * n_b + bin];
+      const splitbit vj = canon[j * n_b + bin];
+      if (vi < vj) return true;
+      if (vi > vj) return false;
+    }
+    return false;
+  };
+
+  std::vector<int16> a_order(a_n), b_order(b_n);
+  std::iota(a_order.begin(), a_order.end(), int16(0));
+  std::iota(b_order.begin(), b_order.end(), int16(0));
+
+  std::sort(a_order.begin(), a_order.end(),
+            [&](int16 i, int16 j) {
+              return canon_less(a_canon.data(), n_bins, i, j);
+            });
+  std::sort(b_order.begin(), b_order.end(),
+            [&](int16 i, int16 j) {
+              return canon_less(b_canon.data(), n_bins, i, j);
+            });
+
+  // Merge-scan
+  int16 exact_n = 0;
+  int16 ai_pos = 0, bi_pos = 0;
+  while (ai_pos < a_n && bi_pos < b_n) {
+    const int16 ai = a_order[ai_pos];
+    const int16 bi = b_order[bi_pos];
+
+    int cmp = 0;
+    for (int16 bin = 0; bin < n_bins; ++bin) {
+      const splitbit va = a_canon[ai * n_bins + bin];
+      const splitbit vb = b_canon[bi * n_bins + bin];
+      if (va < vb) { cmp = -1; break; }
+      if (va > vb) { cmp =  1; break; }
+    }
+
+    if (cmp < 0) {
+      ++ai_pos;
+    } else if (cmp > 0) {
+      ++bi_pos;
+    } else {
+      a_match[ai] = bi + 1;
+      b_match[bi] = ai + 1;
+      ++exact_n;
+      ++ai_pos;
+      ++bi_pos;
+    }
+  }
+
+  return exact_n;
+}
+
+} // namespace detail
+
+
+// ---- MCI score implementation ----
+
+double mutual_clustering_score(
+    const splitbit* const* a_state, const int16* a_in, int16 a_n_splits,
+    const splitbit* const* b_state, const int16* b_in, int16 b_n_splits,
+    int16 n_bins, int32 n_tips,
+    LapScratch& scratch)
+{
+  if (a_n_splits == 0 || b_n_splits == 0 || n_tips == 0) return 0.0;
+
+  const int16 most_splits = std::max(a_n_splits, b_n_splits);
+  const double n_tips_rcp = 1.0 / static_cast<double>(n_tips);
+
+  constexpr cost max_score  = BIG;
+  constexpr double over_max = 1.0 / static_cast<double>(BIG);
+  const double max_over_tips = static_cast<double>(BIG) * n_tips_rcp;
+  const double lg2_n = lg2[n_tips];
+
+  // --- Phase 1: O(n log n) exact-match detection ---
+  std::vector<int16> a_match_buf(a_n_splits);
+  std::vector<int16> b_match_buf(b_n_splits);
+
+  const int16 exact_n = detail::find_exact_matches_raw(
+    a_state, a_in, a_n_splits,
+    b_state, b_in, b_n_splits,
+    n_bins, n_tips,
+    a_match_buf.data(), b_match_buf.data());
+
+  const int16* a_match = a_match_buf.data();
+  const int16* b_match = b_match_buf.data();
+
+  // Accumulate exact-match score
+  double exact_score = 0.0;
+  for (int16 ai = 0; ai < a_n_splits; ++ai) {
+    if (a_match[ai]) {
+      const int16 na = a_in[ai];
+      const int16 nA = static_cast<int16>(n_tips - na);
+      exact_score += ic_matching(na, nA, static_cast<int16>(n_tips));
+    }
+  }
+
+  // Early exit when everything matched exactly
+  if (exact_n == b_n_splits || exact_n == a_n_splits) {
+    return exact_score * n_tips_rcp;
+  }
+
+  // --- Phase 2: fill cost matrix for unmatched splits only (O(k²)) ---
+  const int16 lap_n = most_splits - exact_n;
+
+  std::vector<int16> a_unmatch, b_unmatch;
+  a_unmatch.reserve(lap_n);
+  b_unmatch.reserve(lap_n);
+  for (int16 ai = 0; ai < a_n_splits; ++ai) {
+    if (!a_match[ai]) a_unmatch.push_back(ai);
+  }
+  for (int16 bi = 0; bi < b_n_splits; ++bi) {
+    if (!b_match[bi]) b_unmatch.push_back(bi);
+  }
+
+  scratch.score_pool.resize(lap_n);
+  CostMatrix& score = scratch.score_pool;
+
+  const int16 a_unmatched_n = static_cast<int16>(a_unmatch.size());
+  const int16 b_unmatched_n = static_cast<int16>(b_unmatch.size());
+
+  for (int16 a_pos = 0; a_pos < a_unmatched_n; ++a_pos) {
+    const int16 ai = a_unmatch[a_pos];
+    const int16 na = a_in[ai];
+    const int16 nA = static_cast<int16>(n_tips - na);
+    const splitbit* a_row = a_state[ai];
+
+    const double offset_a = lg2_n - lg2[na];
+    const double offset_A = lg2_n - lg2[nA];
+
+    for (int16 b_pos = 0; b_pos < b_unmatched_n; ++b_pos) {
+      const int16 bi = b_unmatch[b_pos];
+      const splitbit* b_row = b_state[bi];
+      int16 a_and_b = 0;
+      for (int16 bin = 0; bin < n_bins; ++bin) {
+        a_and_b += TreeTools::count_bits(a_row[bin] & b_row[bin]);
+      }
+
+      const int16 nb = b_in[bi];
+      const int16 nB = static_cast<int16>(n_tips - nb);
+      const int16 a_and_B = na - a_and_b;
+      const int16 A_and_b = nb - a_and_b;
+      const int16 A_and_B = nA - A_and_b;
+
+      if (a_and_b == A_and_b && a_and_b == a_and_B
+          && a_and_b == A_and_B) {
+        score(a_pos, b_pos) = max_score;
+      } else {
+        const double lg2_nb = lg2[nb];
+        const double lg2_nB = lg2[nB];
+        const double ic_sum =
+          a_and_b * (lg2[a_and_b] + offset_a - lg2_nb) +
+          a_and_B * (lg2[a_and_B] + offset_a - lg2_nB) +
+          A_and_b * (lg2[A_and_b] + offset_A - lg2_nb) +
+          A_and_B * (lg2[A_and_B] + offset_A - lg2_nB);
+        score(a_pos, b_pos) =
+          max_score - static_cast<cost>(ic_sum * max_over_tips);
+      }
+    }
+    if (b_unmatched_n < lap_n) {
+      score.padRowAfterCol(a_pos, b_unmatched_n, max_score);
+    }
+  }
+  if (a_unmatched_n < lap_n) {
+    score.padAfterRow(a_unmatched_n, max_score);
+  }
+
+  // --- Phase 3: solve LAP on the reduced matrix ---
+  scratch.ensure(lap_n);
+  auto& rowsol = scratch.rowsol;
+  auto& colsol = scratch.colsol;
+
+  const double lap_score =
+    static_cast<double>((max_score * lap_n) -
+      lap(lap_n, score, rowsol, colsol, false, scratch)) * over_max;
+  return lap_score + exact_score * n_tips_rcp;
+}
+
+} // namespace TreeDist
+
+#endif // TREEDIST_MCI_IMPLEMENTATION
+#endif // TREEDIST_MUTUAL_CLUSTERING_IMPL_H_
diff --git a/inst/include/TreeDist/types.h b/inst/include/TreeDist/types.h
new file mode 100644
index 000000000..c953989be
--- /dev/null
+++ b/inst/include/TreeDist/types.h
@@ -0,0 +1,32 @@
+#ifndef TREEDIST_TYPES_H_
+#define TREEDIST_TYPES_H_
+
+// TreeDist public type definitions.
+//
+// Rcpp-free: uses only standard headers plus TreeTools (for SL_MAX_SPLITS).
+// Downstream packages include via:  #include <TreeDist/types.h>
+
+#include <cstdint>
+#include <limits>
+#include <TreeTools/SplitList.h>   // SL_MAX_SPLITS, splitbit
+
+namespace TreeDist {
+
+  using int16 = int_fast16_t;
+  using int32 = int_fast32_t;
+  using cost  = int_fast64_t;
+
+  using lap_dim = int;
+  using lap_row = lap_dim;
+  using lap_col = lap_dim;
+
+  constexpr cost BIG =
+    (std::numeric_limits<cost>::max)() / SL_MAX_SPLITS;
+
+  constexpr cost ROUND_PRECISION = 2048 * 2048;
+
+  constexpr std::size_t BLOCK_SIZE = 16;
+
+} // namespace TreeDist
+
+#endif // TREEDIST_TYPES_H_
diff --git a/src/Makevars b/src/Makevars
index 139b1d17f..d3df534eb 100644
--- a/src/Makevars
+++ b/src/Makevars
@@ -1,2 +1,3 @@
+PKG_CPPFLAGS = -I../inst/include
 PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
 PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS)
diff --git a/src/Makevars.win b/src/Makevars.win
index 139b1d17f..d3df534eb 100644
--- a/src/Makevars.win
+++ b/src/Makevars.win
@@ -1,2 +1,3 @@
+PKG_CPPFLAGS = -I../inst/include
 PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
 PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS)
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index b3d661be0..72309c5ba 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -611,6 +611,19 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// cpp_mci_impl_score
+double cpp_mci_impl_score(const Rcpp::RawMatrix& x, const Rcpp::RawMatrix& y, int n_tips);
+RcppExport SEXP _TreeDist_cpp_mci_impl_score(SEXP xSEXP, SEXP ySEXP, SEXP n_tipsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const Rcpp::RawMatrix& >::type x(xSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::RawMatrix& >::type y(ySEXP);
+    Rcpp::traits::input_parameter< int >::type n_tips(n_tipsSEXP);
+    rcpp_result_gen = Rcpp::wrap(cpp_mci_impl_score(x, y, n_tips));
+    return rcpp_result_gen;
+END_RCPP
+}
 // cpp_robinson_foulds_distance
 List cpp_robinson_foulds_distance(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip);
 RcppExport SEXP _TreeDist_cpp_robinson_foulds_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) {
@@ -753,6 +766,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_TreeDist_cpp_transfer_dist_scored", (DL_FUNC) &_TreeDist_cpp_transfer_dist_scored, 4},
     {"_TreeDist_cpp_transfer_dist_all_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_all_pairs, 4},
     {"_TreeDist_cpp_transfer_dist_cross_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_cross_pairs, 5},
+    {"_TreeDist_cpp_mci_impl_score", (DL_FUNC) &_TreeDist_cpp_mci_impl_score, 3},
     {"_TreeDist_cpp_robinson_foulds_distance", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_distance, 3},
     {"_TreeDist_cpp_robinson_foulds_info", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_info, 3},
     {"_TreeDist_cpp_matching_split_distance", (DL_FUNC) &_TreeDist_cpp_matching_split_distance, 3},
diff --git a/src/ints.h b/src/ints.h
index a5af8774f..50fd3cc70 100644
--- a/src/ints.h
+++ b/src/ints.h
@@ -1,10 +1,13 @@
 #ifndef _TREEDIST_INT_H
 #define _TREEDIST_INT_H
 
-#include <cstdint>
+#include <TreeDist/types.h>
 
-using int16 = int_fast16_t;
-using int32 = int_fast32_t;
+// Re-export shared types to global scope for backward compatibility.
+using TreeDist::int16;
+using TreeDist::int32;
+
+// Types used only within TreeDist's own source.
 using uint32 = uint_fast32_t;
 using grf_match = std::vector<int>;
 
diff --git a/src/lap.cpp b/src/lap.cpp
index 4963fa9ec..e77c712f6 100644
--- a/src/lap.cpp
+++ b/src/lap.cpp
@@ -26,6 +26,15 @@
  https://www.mathworks.com/matlabcentral/fileexchange/26836-lapjv-jonker-volgenant-algorithm-for-linear-assignment-problem-v3-0:
  *
  *************************************************************************/
+
+// NOTE: The LAP hot loops are highly sensitive to instruction alignment and
+// register allocation, which are affected by the TU's full include graph.
+// Do NOT include lap_impl.h here — that header is for downstream LinkingTo
+// consumers only.  TreeDist's own lap() is compiled directly in this file
+// to preserve the codegen context that was profiled and tuned.
+//
+// If the algorithm changes, update BOTH this file and lap_impl.h.
+
 #include "lap.h"
 #include <Rcpp/Lightest>
 
@@ -37,11 +46,42 @@ Rcpp::List lapjv(Rcpp::NumericMatrix &x, Rcpp::NumericVector &maxX) {
   const lap_dim spare_rows = n_row - n_col;
   const cost max_score = cost(BIG / max_dim);
   const double x_max = maxX[0];
+  const double scale_factor = max_score / x_max;
   
   std::vector<lap_col> rowsol(max_dim);
   std::vector<lap_row> colsol(max_dim);
   
-  cost_matrix input(x, x_max);
+  // Build cost matrix.  Fill the transposed buffer first (matching R's
+  // column-major storage for sequential reads) then untranspose.
+  cost_matrix input(max_dim);
+  const double* __restrict__ src_data = REAL(x);
+  cost* __restrict__ t_ptr = input.col(0);
+  const std::size_t dim8 = input.dim8();
+  
+  for (lap_col c = 0; c < n_col; ++c) {
+    const std::size_t t_off = static_cast<std::size_t>(c) * dim8;
+    const std::size_t s_off = static_cast<std::size_t>(c) * n_row;
+    for (lap_row r = 0; r < n_row; ++r) {
+      t_ptr[t_off + r] = static_cast<cost>(src_data[s_off + r] * scale_factor);
+    }
+    // Pad remaining rows in this transposed column
+    for (lap_row r = n_row; r < max_dim; ++r) {
+      t_ptr[t_off + r] = max_score;
+    }
+    for (std::size_t r = max_dim; r < dim8; ++r) {
+      t_ptr[t_off + r] = max_score;
+    }
+  }
+  // Pad remaining transposed columns
+  for (lap_col c = n_col; c < max_dim; ++c) {
+    const std::size_t t_off = static_cast<std::size_t>(c) * dim8;
+    for (std::size_t r = 0; r < dim8; ++r) {
+      t_ptr[t_off + r] = max_score;
+    }
+  }
+  
+  // Untranspose: t_data_ -> data_
+  input.makeUntranspose();
   
   cost score = lap(max_dim, input, rowsol, colsol);
   
@@ -60,27 +100,22 @@ Rcpp::List lapjv(Rcpp::NumericMatrix &x, Rcpp::NumericVector &maxX) {
   );
 }
 
+namespace {
 inline bool nontrivially_less_than(cost a, cost b) noexcept {
   return a + ((a > ROUND_PRECISION) ? 8 : 0) < b;
 }
+} // anonymous namespace
 
 /* This function is the jv shortest augmenting path algorithm to solve the 
    assignment problem */
+namespace TreeDist {
+
 cost lap(const lap_row dim,
-         cost_matrix &input_cost,
+         CostMatrix &input_cost,
          std::vector<lap_col> &rowsol,
          std::vector<lap_row> &colsol,
          const bool allow_interrupt,
          LapScratch &scratch)
-  
-  // input:
-  // dim        - problem size
-  // input_cost - cost matrix
-  
-  // output:
-  // rowsol     - column assigned to row in solution
-  // colsol     - row assigned to column in solution
-  
 {
   lap_row num_free = 0;
   scratch.ensure(dim);
@@ -316,3 +351,4 @@ cost lap(const lap_row dim,
 
   return lapcost;
 }
+} // namespace TreeDist
diff --git a/src/lap.h b/src/lap.h
index b1a225087..592ce9b2c 100644
--- a/src/lap.h
+++ b/src/lap.h
@@ -9,456 +9,27 @@
 // because the LAP hot loops are sensitive to instruction alignment — even
 // unrelated inline functions in the same translation unit can shift code
 // layout enough to cause measurable regressions.
-
-#include <TreeTools/SplitList.h>
-#include <Rcpp/Lightest>
-
-#include <limits> /* for numeric_limits */
-#include <vector> /* for vector */
-
-#include "ints.h"
-
-/*************** TYPES      *******************/
-
-using cost = int_fast64_t;
-using lap_dim = int;
-using lap_row = lap_dim;
-using lap_col = lap_dim;
-
-/* For a reason I've not determined, shrinking BIG is necessary to avoid 
- * an infinite loop in lap. */
-constexpr cost BIG = (std::numeric_limits<cost>::max)() / SL_MAX_SPLITS;
-constexpr cost ROUND_PRECISION = 2048 * 2048;
-constexpr size_t BLOCK_SIZE = 16;
-
-class CostMatrix {
-private:
-  size_t dim_; // Important not to use int16, which will overflow on *
-  size_t dim8_;
-  alignas(64) std::vector<cost> data_;
-  alignas(64) std::vector<cost> t_data_;
-  bool transposed_;
-  
-  static size_t block_containing(const size_t x) {
-    return ((x + BLOCK_SIZE - 1) / BLOCK_SIZE) * BLOCK_SIZE;
-  }
-  
-public:
-  // Default constructor for pooled instances (zero-size, no allocation).
-  CostMatrix() : dim_(0), dim8_(0), transposed_(false) {}
-
-  CostMatrix(size_t dim)
-    : dim_(dim),
-      dim8_(block_containing(dim_)),
-      data_(std::vector<cost>(dim8_ * dim8_)),
-      t_data_(std::vector<cost>(dim8_ * dim8_)),
-      transposed_(false) {}
-
-  // Resize for reuse.  Only reallocates when the new dimension exceeds the
-  // current buffer capacity; otherwise just updates dim_/dim8_ and marks
-  // the transpose as stale.  New elements (if any) are value-initialised.
-  void resize(size_t new_dim) {
-    dim_ = new_dim;
-    dim8_ = block_containing(new_dim);
-    const size_t needed = dim8_ * dim8_;
-    if (data_.size() < needed) {
-      data_.resize(needed);
-      t_data_.resize(needed);
-    }
-    transposed_ = false;
-  }
-      
-  CostMatrix(const Rcpp::NumericMatrix& src, const double x_max)
-    : dim_((std::max(src.nrow(), src.ncol()))),  // or pad here as needed
-      dim8_(block_containing(dim_)),
-      data_(std::vector<cost>(dim8_ * dim_)),
-      t_data_(std::vector<cost>(dim8_ * dim_)),
-      transposed_(true)
-  {
-    // Compute scale factor
-    const cost max_score = cost(BIG / dim_);
-    double scale_factor = max_score / x_max;
-    
-    const lap_row n_row = src.nrow();
-    const lap_col n_col = src.ncol();
-    
-    const double* __restrict src_data = REAL(src);
-    cost* __restrict dest_data = t_data_.data();
-    
-    for (lap_col c = 0; c < n_col; ++c) {
-      const size_t data_c = c * dim8_;
-      const size_t src_c = c * n_row;
-      for (lap_row r = 0; r < n_row; ++r) {
-        // Marginally faster than std::transform
-        dest_data[data_c + r] = static_cast<cost>(src_data[src_c + r] * scale_factor);
-      }
-      
-      padTrColAfterRow(c, n_row, max_score);  // padding now goes after row `c`
-    }
-    
-    padTrAfterCol(n_col, max_score);
-    
-    makeUntranspose();
-  }
-  
-  // Reset for reuse at the same dimension — clears the transpose cache so the
-  // next findColMin() re-builds it from fresh data.  Call between pair
-  // computations when reusing a thread-local CostMatrix instance.
-  void reset() noexcept { transposed_ = false; }
-
-  [[nodiscard]] size_t dim() const noexcept { return dim_; }
-
-  void makeTranspose() noexcept {
-    if (transposed_) return;
-    
-    const cost* __restrict data_ptr = data_.data();
-    cost* __restrict t_data_ptr = t_data_.data();
-    
-#if defined(__GNUC__) || defined(__clang__)
-    data_ptr = static_cast<const cost*>(__builtin_assume_aligned(data_ptr, 64));
-    t_data_ptr = static_cast<cost*>(__builtin_assume_aligned(t_data_ptr, 64));
-#endif
-    for (size_t i = 0; i < dim_; i += BLOCK_SIZE) {
-      for (size_t j = 0; j < dim_; j += BLOCK_SIZE) {
-        for (size_t r = i; r < std::min(i + BLOCK_SIZE, dim_); ++r) {
-          for (size_t c = j; c < std::min(j + BLOCK_SIZE, dim_); ++c) {
-            t_data_ptr[c * dim8_ + r] = data_ptr[r * dim8_ + c];
-          }
-        }
-      }
-    }
-    
-    transposed_ = true;
-  }
-  
-  void makeUntranspose() noexcept {
-    const cost* __restrict data_ptr = t_data_.data();
-    cost* __restrict t_data_ptr = data_.data();
-    
-#if defined(__GNUC__) || defined(__clang__)
-    data_ptr = static_cast<const cost*>(__builtin_assume_aligned(data_ptr, 64));
-    t_data_ptr = static_cast<cost*>(__builtin_assume_aligned(t_data_ptr, 64));
-#endif
-    for (size_t i = 0; i < dim_; i += BLOCK_SIZE) {
-      for (size_t j = 0; j < dim_; j += BLOCK_SIZE) {
-        for (size_t r = i; r < std::min(i + BLOCK_SIZE, dim_); ++r) {
-          for (size_t c = j; c < std::min(j + BLOCK_SIZE, dim_); ++c) {
-            t_data_ptr[c * dim8_ + r] = data_ptr[r * dim8_ + c];
-          }
-        }
-      }
-    }
-  }
-  
-  
-  // Access operator for read/write
-  cost& operator()(lap_row i, lap_col j) {
-    return data_[static_cast<size_t>(i) * dim8_ + j];
-  }
-  
-  // Const version for read-only access
-  [[nodiscard]] const cost& operator()(lap_row i, lap_col j) const {
-    return data_[static_cast<size_t>(i) * dim8_ + j];
-  }
-  
-  [[nodiscard]] const cost& row0(lap_col j) const {
-    return data_[j];
-  }
-  
-  [[nodiscard]] const cost& entry0(lap_row i) const {
-    return data_[static_cast<size_t>(i) * dim8_];
-  }
-  
-  [[nodiscard]] cost* row(lap_row i) {
-    return &data_[static_cast<size_t>(i) * dim8_];
-  }
-  
-  [[nodiscard]] const cost* row(lap_row i) const {
-    return &data_[static_cast<size_t>(i) * dim8_];
-  }
-  
-  [[nodiscard]] cost* col(lap_col i) {
-    return &t_data_[static_cast<size_t>(i) * dim8_];
-  }
-  
-  [[nodiscard]] const cost* col(lap_col i) const {
-    return &t_data_[static_cast<size_t>(i) * dim8_];
-  }
-  
-  void padTrAfterCol(lap_row start_row, cost value) {
-    size_t start_index = static_cast<size_t>(start_row) * dim8_;
-    size_t end_index   = dim_ * dim8_;
-    std::fill(t_data_.begin() + start_index, t_data_.begin() + end_index, value);
-  }
-  
-  void padAfterRow(lap_row start_row, cost value) {
-    size_t start_index = static_cast<size_t>(start_row) * dim8_;
-    size_t end_index   = dim_ * dim8_;
-    std::fill(data_.begin() + start_index, data_.begin() + end_index, value);
-  }
-  
-  void padTrColAfterRow(const lap_row r, const lap_col start_col,
-                         const cost value) {
-    size_t r_offset = r * dim8_;
-    size_t actual_start_col = static_cast<size_t>(start_col);
-    size_t start_index = r_offset + actual_start_col;
-    size_t end_index = start_index + dim_ - actual_start_col;
-    std::fill(t_data_.begin() + start_index, t_data_.begin() + end_index, value);
-  }
-  
-  void padRowAfterCol(const lap_row r, const lap_col start_col,
-                      const cost value) {
-    size_t r_offset = r * dim8_;
-    size_t actual_start_col = static_cast<size_t>(start_col);
-    size_t start_index = r_offset + actual_start_col;
-    size_t end_index = start_index + dim_ - actual_start_col;
-    std::fill(data_.begin() + start_index, data_.begin() + end_index, value);
-  }
-  
-  std::pair<cost, lap_row> findColMin(lap_col j,
-                                      lap_row search_dim = -1) {
-    makeTranspose();
-    const cost* col_data = col(j);
-    const lap_row n = (search_dim < 0) ? static_cast<lap_row>(dim_) : search_dim;
-    const auto min_ptr = std::min_element(col_data, col_data + n);
-    return {*min_ptr,
-            static_cast<lap_row>(std::distance(col_data, min_ptr))};
-  }
-  
-  // test2000 = 119 ms (!)
-  //     Find minimum and second minimum reduced cost over columns.
-  std::tuple<cost, cost, lap_col, lap_col> findRowSubmin(
-      const lap_row* i, const std::vector<cost>& v
-  ) const {
-    assert(dim_ > 1);
-    
-    const cost* __restrict row_i = row(*i);
-    const lap_col dim = static_cast<lap_col>(dim_);
-    const cost* __restrict v_ptr = v.data();
-    
-    const cost h0 = row_i[0] - v_ptr[0];
-    const cost h1 = row_i[1] - v_ptr[1];
-    
-    cost min_val, submin_val;
-    lap_col min_idx, submin_idx;
-    
-    if (h0 > h1) {
-      min_val = h1; submin_val = h0;
-      min_idx = 1; submin_idx = 0;
-    } else {
-      min_val = h0; submin_val = h1;
-      min_idx = 0; submin_idx = 1;
-    }
-    
-    const lap_col j_limit = (dim < 4 ? 0 : static_cast<lap_col>(dim - 3));
-    
-    for (lap_col j = 2; j < j_limit; j += 4) {
-      assert(BLOCK_SIZE >= 4);  // Unrolling loop x4 gives ~20% speedup
-      const cost h0 = row_i[j] - v_ptr[j];
-      const cost h1 = row_i[j + 1] - v_ptr[j + 1];
-      const cost h2 = row_i[j + 2] - v_ptr[j + 2];
-      const cost h3 = row_i[j + 3] - v_ptr[j + 3];
-      if (h0 < submin_val) {
-        if (h0 < min_val) {
-          submin_val = min_val;
-          min_val = h0;
-          submin_idx = min_idx;
-          min_idx = j;
-        } else {
-          submin_val = h0;
-          submin_idx = j;
-        }
-      }
-      if (h1 < submin_val) {
-        if (h1 < min_val) {
-          submin_val = min_val;
-          min_val = h1;
-          submin_idx = min_idx;
-          min_idx = j + 1;
-        } else {
-          submin_val = h1;
-          submin_idx = j + 1;
-        }
-      }
-      if (h2 < submin_val) {
-        if (h2 < min_val) {
-          submin_val = min_val;
-          min_val = h2;
-          submin_idx = min_idx;
-          min_idx = j + 2;
-        } else {
-          submin_val = h2;
-          submin_idx = j + 2;
-        }
-      }
-      if (h3 < submin_val) {
-        if (h3 < min_val) {
-          submin_val = min_val;
-          min_val = h3;
-          submin_idx = min_idx;
-          min_idx = j + 3;
-        } else {
-          submin_val = h3;
-          submin_idx = j + 3;
-        }
-      }
-    }
-    for (lap_col j = j_limit; j < dim; ++j) {
-      const cost h = row_i[j] - v_ptr[j];
-      if (h < submin_val) {
-        if (h < min_val) {
-          submin_val = min_val;
-          min_val = h;
-          submin_idx = min_idx;
-          min_idx = j;
-        } else {
-          submin_val = h;
-          submin_idx = j;
-        }
-      }
-    }
-    return {min_val, submin_val, min_idx, submin_idx};
-  }
-  
-  // test2000 = 260 ms
-  std::tuple<cost, cost, lap_col, lap_col> findRowSubminNaive(
-      const lap_row* i,
-      const std::vector<cost>& v_ptr
-  ) {
-    const cost* __restrict row_i = row(*i);
-    const lap_col dim = static_cast<lap_col>(dim_);
-    
-    if (dim < 2) {
-      return {row_i[0] - v_ptr[0], std::numeric_limits<cost>::max(), 0, 0};
-    }
-    
-    // Initialize with first two elements
-    cost h0 = row_i[0] - v_ptr[0];
-    cost h1 = row_i[1] - v_ptr[1];
-    
-    cost min_val, submin_val;
-    lap_col min_idx, submin_idx;
-    
-    if (h0 <= h1) {
-      min_val = h0; submin_val = h1;
-      min_idx = 0; submin_idx = 1;
-    } else {
-      min_val = h1; submin_val = h0;
-      min_idx = 1; submin_idx = 0;
-    }
-    
-    // Process remaining elements
-    for (lap_col j = 2; j < dim; ++j) {
-      const cost h = row_i[j] - v_ptr[j];
-      
-      if (h < min_val) {
-        submin_val = min_val;
-        submin_idx = min_idx;
-        min_val = h;
-        min_idx = j;
-      } else if (h < submin_val) {
-        submin_val = h;
-        submin_idx = j;
-      }
-    }
-    
-    return {min_val, submin_val, min_idx, submin_idx};
-  }
-  
-  // test2000 = 370 ms (!)
-  std::tuple<cost, cost, lap_col, lap_col> findRowSubminTwoPassNaive(
-      const lap_row* i,
-      const std::vector<cost>& v_ptr
-  ) {
-    const cost* __restrict row_i = row(*i);
-    cost min_val = std::numeric_limits<cost>::max();
-    lap_col min_idx = 0;
-    const lap_col dim = static_cast<lap_col>(dim_);
-    
-    for (lap_col j = 0; j < dim; ++j) {
-      const cost h = row_i[j] - v_ptr[j];
-      if (h < min_val) {
-        min_val = h;
-        min_idx = j;
-      }
-    }
-    
-    // Second pass: find subminimum
-    cost submin_val = std::numeric_limits<cost>::max();
-    lap_col submin_idx = (min_idx == 0) ? 1 : 0;
-    
-    for (lap_col j = 0; j < dim; ++j) {
-      if (j != min_idx) {
-        const cost h = row_i[j] - v_ptr[j];
-        if (h < submin_val) {
-          submin_val = h;
-          submin_idx = j;
-        }
-      }
-    }
-    
-    return {min_val, submin_val, min_idx, submin_idx};
-  }
-};
-
-using cost_matrix = CostMatrix;
-
-// ---------------------------------------------------------------------------
-// LapScratch — reusable heap storage for one thread's LAPJV workspace.
 //
-// Pass to the scratch-taking overload of lap() to eliminate the ~6 per-call
-// std::vector allocations.  In a serial context construct once before the
-// loop; in an OpenMP context allocate one per thread and index by
-// omp_get_thread_num().  All vectors are grown lazily; never shrunk.
-// ---------------------------------------------------------------------------
-struct LapScratch {
-  std::vector<cost>    v;               // column dual variables (padded)
-  std::vector<lap_col> matches;         // assignment-count per row
-  std::vector<lap_row> freeunassigned;  // list of unassigned rows
-  std::vector<lap_col> col_list;        // column scan list
-  std::vector<cost>    d;               // Dijkstra distances
-  std::vector<lap_row> predecessor;     // augmenting-path predecessors
-  // rowsol / colsol are included so score functions that don't need the
-  // solution afterwards can avoid a separate allocation.
-  std::vector<lap_col> rowsol;
-  std::vector<lap_row> colsol;
-  // Pooled CostMatrix instances — reused across pairs to avoid per-call
-  // heap allocation.  Grown lazily on first use, never shrunk.
-  CostMatrix score_pool;               // main cost matrix
-  CostMatrix small_pool;               // reduced matrix (exact-match path)
-
-  void ensure(int dim) noexcept {
-    const int padded = static_cast<int>(
-        ((static_cast<size_t>(dim) + BLOCK_SIZE - 1) / BLOCK_SIZE) * BLOCK_SIZE);
-    if (static_cast<int>(v.size())              < padded) v.resize(padded);
-    if (static_cast<int>(matches.size())        < dim)    matches.resize(dim);
-    if (static_cast<int>(freeunassigned.size()) < dim)    freeunassigned.resize(dim);
-    if (static_cast<int>(col_list.size())       < dim)    col_list.resize(dim);
-    if (static_cast<int>(d.size())              < dim)    d.resize(dim);
-    if (static_cast<int>(predecessor.size())    < dim)    predecessor.resize(dim);
-    if (static_cast<int>(rowsol.size())         < dim)    rowsol.resize(dim);
-    if (static_cast<int>(colsol.size())         < dim)    colsol.resize(dim);
-  }
-};
-
-/*************** FUNCTIONS  *******************/
-
-// Primary overload: caller supplies pre-allocated scratch (zero alloc in hot loop).
-extern cost lap(lap_row dim,
-                cost_matrix &input_cost,
-                std::vector<lap_col> &rowsol,
-                std::vector<lap_row> &colsol,
-                bool allow_interrupt,
-                LapScratch &scratch);
-
-// Convenience overload: creates a temporary scratch (for one-off calls).
-inline cost lap(lap_row dim,
-                cost_matrix &input_cost,
-                std::vector<lap_col> &rowsol,
-                std::vector<lap_row> &colsol,
-                bool allow_interrupt = true) {
-  LapScratch scratch;
-  return lap(dim, input_cost, rowsol, colsol, allow_interrupt, scratch);
-}
+// Public types, CostMatrix, LapScratch, and lap() declarations live in
+// inst/include/TreeDist/ headers (shared with downstream LinkingTo users).
+// This wrapper re-exports them to global scope for backward compatibility.
+
+#include <TreeDist/lap.h>     // types, CostMatrix, LapScratch, lap()
+
+#include "ints.h"             // grf_match, uint32 (TreeDist-internal types)
+
+// Re-export shared types and classes to global scope.
+using TreeDist::cost;
+using TreeDist::lap_dim;
+using TreeDist::lap_row;
+using TreeDist::lap_col;
+using TreeDist::BIG;
+using TreeDist::ROUND_PRECISION;
+using TreeDist::BLOCK_SIZE;
+using TreeDist::CostMatrix;
+using TreeDist::LapScratch;
+using TreeDist::lap;
+
+using cost_matrix = TreeDist::CostMatrix;
 
 #endif
diff --git a/src/tree_distance_functions.cpp b/src/tree_distance_functions.cpp
index 4a3b77e1f..99347300b 100644
--- a/src/tree_distance_functions.cpp
+++ b/src/tree_distance_functions.cpp
@@ -1,35 +1,47 @@
+#include <TreeTools/SplitList.h>
 #include <Rcpp/Lightest>
-#include <TreeTools/SplitList.h> /* for SL_MAX_TIPS */
 
-#include <cmath> /* for log2() */
+// Provide the MCI table definitions and implementation in this TU.
+#define TREEDIST_MCI_IMPLEMENTATION
+#include <TreeDist/mutual_clustering_impl.h>
 
 #include "tree_distances.h"
 
-using namespace Rcpp;
-
-constexpr int32 LG2_SIZE = SL_MAX_TIPS + 1;
-
-double lg2[LG2_SIZE];
-double lg2_double_factorial[SL_MAX_TIPS + SL_MAX_TIPS - 2];
-double lg2_unrooted[SL_MAX_TIPS + 2];
-double *lg2_rooted = &lg2_unrooted[0] + 1;
+// Populate lookup tables at library load time.
 __attribute__((constructor))
   void initialize_ldf() {
-    lg2[0] = 0;
-    for (int32 i = 1; i != LG2_SIZE; ++i) {
-      lg2[i] = log2(i);
-    }
-    for (int16 i = 0; i != 3; ++i) {
-      lg2_double_factorial[i] = 0;
-      lg2_unrooted[i] = 0;
-    }
-    assert(lg2_rooted[0] == 0);
-    assert(lg2_rooted[1] == 0);
-    for (int32 i = 2; i != SL_MAX_TIPS + SL_MAX_TIPS - 2; ++i) {
-      lg2_double_factorial[i] = lg2_double_factorial[i - 2] + log2(i);
-    }
-    for (int32 i = 3; i != SL_MAX_TIPS + 2; ++i) {
-      lg2_unrooted[i] = lg2_double_factorial[i + i - 5];
-      assert(lg2_rooted[i - 1] == lg2_double_factorial[i + i - 5]);
-    }
+    TreeDist::init_lg2_tables(SL_MAX_TIPS);
+  }
+
+// Thin wrapper that exercises the installable-header version of
+// mutual_clustering_score(), for test coverage and downstream validation.
+// [[Rcpp::export]]
+double cpp_mci_impl_score(const Rcpp::RawMatrix& x,
+                          const Rcpp::RawMatrix& y,
+                          int n_tips) {
+  using TreeTools::SplitList;
+
+  const SplitList a(x);
+  const SplitList b(y);
+  TreeDist::LapScratch scratch;
+
+  // Build arrays matching the header's raw-pointer API types.
+  std::vector<const splitbit*> a_ptrs(a.n_splits);
+  std::vector<const splitbit*> b_ptrs(b.n_splits);
+  std::vector<TreeDist::int16> a_in(a.n_splits);
+  std::vector<TreeDist::int16> b_in(b.n_splits);
+  for (TreeDist::int16 i = 0; i < a.n_splits; ++i) {
+    a_ptrs[i] = a.state[i];
+    a_in[i] = static_cast<TreeDist::int16>(a.in_split[i]);
   }
+  for (TreeDist::int16 i = 0; i < b.n_splits; ++i) {
+    b_ptrs[i] = b.state[i];
+    b_in[i] = static_cast<TreeDist::int16>(b.in_split[i]);
+  }
+
+  return TreeDist::mutual_clustering_score(
+    a_ptrs.data(), a_in.data(), a.n_splits,
+    b_ptrs.data(), b_in.data(), b.n_splits,
+    a.n_bins, static_cast<TreeDist::int32>(n_tips),
+    scratch);
+}
diff --git a/src/tree_distances.h b/src/tree_distances.h
index f5ba93e7a..8d4f7b4f7 100644
--- a/src/tree_distances.h
+++ b/src/tree_distances.h
@@ -2,18 +2,25 @@
 #define _TREEDIST_TREE_DISTANCES_H
 
 #include "lap.h"
+#include <TreeDist/mutual_clustering.h>
 #include <TreeTools/SplitList.h>
 
-/***** Constants requiring initialization *****/
+/***** Re-export shared lookup tables to global scope *****/
+
+using TreeDist::lg2;
+using TreeDist::lg2_double_factorial;
+using TreeDist::lg2_unrooted;
+using TreeDist::lg2_rooted;
+
+/***** Constants *****/
 
 constexpr splitbit ALL_ONES = (std::numeric_limits<splitbit>::max)();
-extern double lg2[SL_MAX_TIPS + 1];
-extern double lg2_double_factorial[SL_MAX_TIPS + SL_MAX_TIPS - 2];
-extern double lg2_unrooted[SL_MAX_TIPS + 2];
-extern double *lg2_rooted;
 
 namespace TreeDist {
 
+  // Re-exported from mutual_clustering.h:
+  //   ic_matching(int16 a, int16 b, int16 n)
+
   // See equation 16 in Meila 2007 (k' denoted K).
   // nkK is converted to pkK in the calling function when divided by n.
   inline void add_ic_element(double& ic_sum, const int16 nkK, const int16 nk,
@@ -52,14 +59,6 @@ namespace TreeDist {
   }
 
 
-[[nodiscard]] inline double ic_matching(const int16 a, const int16 b, const int16 n) noexcept {
-    const double lg2a = lg2[a];
-    const double lg2b = lg2[b];
-    const double lg2n = lg2[n];
-    return (a + b) * lg2n - a * lg2a - b * lg2b;
-    //  (a * (lg2n - lg2a)) + (b * (lg2n - lg2b)); is substantially slower
-  }
-
 [[nodiscard]] inline double one_overlap(const int16 a, const int16 b, const int16 n) noexcept {
     assert(SL_MAX_TIPS + 2 <= std::numeric_limits<int16>::max()); // verify int16 ok
     if (a == b) {
diff --git a/tests/testthat/test-mci_impl.R b/tests/testthat/test-mci_impl.R
new file mode 100644
index 000000000..58d72da2a
--- /dev/null
+++ b/tests/testthat/test-mci_impl.R
@@ -0,0 +1,53 @@
+test_that("Header mutual_clustering_score matches MutualClusteringInfo", {
+  skip_if_not_installed("TreeDist")
+  library(TreeTools)
+  library(TreeDist)
+
+  bal8 <- BalancedTree(8)
+  pec8 <- PectinateTree(8)
+  star8 <- StarTree(8)
+
+  tips <- TipLabels(bal8)
+  n_tip <- length(tips)
+  splits_bal <- as.Splits(bal8, tips)
+  splits_pec <- as.Splits(pec8, tips)
+  splits_star <- as.Splits(star8, tips)
+
+  # Score-only from the installable-header implementation
+  impl_score <- TreeDist:::cpp_mci_impl_score
+
+  impl_bal_pec <- impl_score(splits_bal, splits_pec, n_tip)
+  impl_bal_bal <- impl_score(splits_bal, splits_bal, n_tip)
+  impl_star <- impl_score(splits_bal, splits_star, n_tip)
+
+  # Reference from MutualClusteringInfo (unnormalized score)
+  ref_bal_pec <- MutualClusteringInfo(bal8, pec8)
+  ref_bal_bal <- MutualClusteringInfo(bal8, bal8)
+
+  expect_equal(impl_bal_pec, ref_bal_pec, tolerance = 1e-10)
+  expect_equal(impl_bal_bal, ref_bal_bal, tolerance = 1e-10)
+  expect_equal(impl_star, 0)
+})
+
+test_that("Header MCI covers exact-match early exit and partial LAP", {
+  skip_if_not_installed("TreeDist")
+  library(TreeTools)
+  library(TreeDist)
+  impl_score <- TreeDist:::cpp_mci_impl_score
+
+  # Two identical trees → all splits match exactly (early exit path)
+  bal20 <- BalancedTree(20)
+  tips <- TipLabels(bal20)
+  n_tip <- length(tips)
+  splits20 <- as.Splits(bal20, tips)
+
+  result <- impl_score(splits20, splits20, n_tip)
+  expect_equal(result, MutualClusteringInfo(bal20, bal20), tolerance = 1e-10)
+
+  # Trees that share some but not all splits → partial match + LAP
+  pec20 <- PectinateTree(20)
+  splits_pec20 <- as.Splits(pec20, tips)
+
+  result2 <- impl_score(splits20, splits_pec20, n_tip)
+  expect_equal(result2, MutualClusteringInfo(bal20, pec20), tolerance = 1e-10)
+})