From 3c54c863e617a9bd89fe52f747f319c469389cca Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@rahulku-ltmyptp.internal.salesforce.com>
Date: Fri, 1 Aug 2025 20:52:57 +0530
Subject: [PATCH 01/18] connection creation time

---
 .../apache/phoenix/jdbc/PhoenixDriver.java    |  4 +-
 .../phoenix/jdbc/PhoenixEmbeddedDriver.java   | 69 ++++++++++++++-----
 .../apache/phoenix/monitoring/MetricType.java |  2 +
 .../ConnectionQueryServicesMetrics.java       |  9 ++-
 .../ConnectionQueryServicesMetricsIT.java     | 33 ++++++---
 ...ectionQueryServicesMetricsManagerTest.java | 27 +++++++-
 6 files changed, 107 insertions(+), 37 deletions(-)

diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
index 953bbc5bd57..f3784183555 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
@@ -41,6 +41,7 @@
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.query.QueryServicesImpl;
 import org.apache.phoenix.query.QueryServicesOptions;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
 import org.apache.phoenix.util.PropertiesUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -205,6 +206,7 @@ public boolean acceptsURL(String url) throws SQLException {
 
   @Override
   public Connection connect(String url, Properties info) throws SQLException {
+    long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis();
     GLOBAL_PHOENIX_CONNECTIONS_ATTEMPTED_COUNTER.increment();
     if (!acceptsURL(url)) {
       GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment();
@@ -213,7 +215,7 @@ public Connection connect(String url, Properties info) throws SQLException {
     lockInterruptibly(LockMode.READ);
     try {
       checkClosed();
-      return createConnection(url, info);
+      return createConnection(url, info, connectionStartTime);
     } catch (SQLException sqlException) {
       if (sqlException.getErrorCode() != SQLExceptionCode.NEW_CONNECTION_THROTTLED.getErrorCode()) {
         GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment();
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
index b5bbe2ea552..ea1d416fd8a 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
@@ -17,6 +17,8 @@
  */
 package org.apache.phoenix.jdbc;
 
+import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS;
+import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME;
 import static org.apache.phoenix.util.PhoenixRuntime.PHOENIX_TEST_DRIVER_URL_PARAM;
 
 import java.sql.Connection;
@@ -24,17 +26,18 @@
 import java.sql.DriverPropertyInfo;
 import java.sql.SQLException;
 import java.sql.SQLFeatureNotSupportedException;
+import java.util.List;
+import java.util.Map;
 import java.util.Optional;
 import java.util.Properties;
 import java.util.logging.Logger;
 import javax.annotation.concurrent.Immutable;
 import org.apache.phoenix.coprocessorclient.MetaDataProtocol;
+import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric;
+import org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesMetricsManager;
 import org.apache.phoenix.query.ConnectionQueryServices;
 import org.apache.phoenix.query.QueryServices;
-import org.apache.phoenix.util.PhoenixRuntime;
-import org.apache.phoenix.util.PropertiesUtil;
-import org.apache.phoenix.util.ReadOnlyProps;
-import org.apache.phoenix.util.SQLCloseable;
+import org.apache.phoenix.util.*;
 
 import org.apache.phoenix.thirdparty.com.google.common.collect.ImmutableMap;
 
@@ -119,31 +122,59 @@ public boolean acceptsURL(String url) throws SQLException {
 
   @Override
   public Connection connect(String url, Properties info) throws SQLException {
+    long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis();
     if (!acceptsURL(url)) {
       return null;
     }
 
-    return createConnection(url, info);
+    return createConnection(url, info, connectionStartTime);
   }
 
-  protected final Connection createConnection(String url, Properties info) throws SQLException {
+  protected final Connection createConnection(String url, Properties info,
+      long connectionCreationTime) throws SQLException {
     Properties augmentedInfo = PropertiesUtil.deepCopy(info);
     augmentedInfo.putAll(getDefaultProps().asMap());
-    if (url.contains("|")) {
-      // Get HAURLInfo to pass it to connection creation
-      HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo);
-      // High availability connection using two clusters
-      Optional<HighAvailabilityGroup> haGroup = HighAvailabilityGroup.get(url, augmentedInfo);
-      if (haGroup.isPresent()) {
-        return haGroup.get().connect(augmentedInfo, haurlInfo);
-      } else {
-        // If empty HA group is returned, fall back to single cluster.
-        url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow(
-          () -> new SQLException("HA group can not be initialized, fallback to single cluster"));
+    Connection connection = null;
+    try {
+      if (url.contains("|")) {
+        // Get HAURLInfo to pass it to connection creation
+        HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo);
+        // High availability connection using two clusters
+        Optional<HighAvailabilityGroup> haGroup = HighAvailabilityGroup.get(url, augmentedInfo);
+        if (haGroup.isPresent()) {
+          connection = haGroup.get().connect(augmentedInfo, haurlInfo);
+          setPhoenixConnectionTime(connectionCreationTime, connection);
+          return connection;
+        } else {
+          // If empty HA group is returned, fall back to single cluster.
+          url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow(
+              () -> new SQLException(
+                  "HA group can not be initialized, fallback to single cluster"));
+        }
+      }
+      ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo);
+      connection = cqs.connect(url, augmentedInfo);
+      setPhoenixConnectionTime(connectionCreationTime, connection);
+      Map<String, List<ConnectionQueryServicesMetric>> metrics =
+          ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
+      if (!metrics.isEmpty()) {
+        List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
+      }
+      return connection;
+    } catch (SQLException e) {
+      if (connection != null) {
+        connection.close();
       }
+      throw e;
     }
-    ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo);
-    return cqs.connect(url, augmentedInfo);
+  }
+
+  private void setPhoenixConnectionTime(long connectionCreationTime, Connection connection) {
+    String connectionQueryServiceName =
+        ((PhoenixConnection) connection).getQueryServices().getConfiguration()
+            .get(QUERY_SERVICES_NAME);
+    ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName,
+        PHOENIX_CONNECTION_CREATION_TIME_MS, connectionCreationTime);
   }
 
   /**
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
index 8ee8de69718..aaf6883223d 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
@@ -228,6 +228,8 @@ public enum MetricType {
   PHOENIX_CONNECTIONS_FAILED_COUNTER("cf",
     "Number of client Phoenix Connections Failed to open" + ", not including throttled connections",
     LogLevel.OFF, PLong.INSTANCE),
+  PHOENIX_CONNECTION_CREATION_TIME_MS("cct",
+      "Time spent in creating Phoenix connections in milliseconds", LogLevel.OFF, PLong.INSTANCE),
   CLIENT_METADATA_CACHE_MISS_COUNTER("cmcm", "Number of cache misses for the CQSI cache.",
     LogLevel.DEBUG, PLong.INSTANCE),
   CLIENT_METADATA_CACHE_HIT_COUNTER("cmch", "Number of cache hits for the CQSI cache.",
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
index 575d38530eb..8c3ac719d27 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
@@ -17,10 +17,6 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
-import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
-
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -30,6 +26,8 @@
 import org.apache.phoenix.monitoring.ConnectionQueryServicesMetricImpl;
 import org.apache.phoenix.monitoring.MetricType;
 
+import static org.apache.phoenix.monitoring.MetricType.*;
+
 /**
  * Class for Connection Query Service Metrics.
  */
@@ -42,7 +40,8 @@ public enum QueryServiceMetrics {
     CONNECTION_QUERY_SERVICE_OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER(
       OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER),
     CONNECTION_QUERY_SERVICE_PHOENIX_CONNECTIONS_THROTTLED_COUNTER(
-      PHOENIX_CONNECTIONS_THROTTLED_COUNTER);
+      PHOENIX_CONNECTIONS_THROTTLED_COUNTER),
+    CONNECTION_QUERY_SERVICE_CREATION_TIME(PHOENIX_CONNECTION_CREATION_TIME_MS);
 
     private MetricType metricType;
     private ConnectionQueryServicesMetric metric;
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
index 54d53afdf6d..d9f752bc17c 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
@@ -17,21 +17,15 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
-import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.*;
 import static org.apache.phoenix.query.QueryServices.CLIENT_CONNECTION_MAX_ALLOWED_CONNECTIONS;
 import static org.apache.phoenix.query.QueryServices.CONNECTION_QUERY_SERVICE_METRICS_ENABLED;
 import static org.apache.phoenix.query.QueryServices.INTERNAL_CONNECTION_MAX_ALLOWED_CONNECTIONS;
 import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME;
 import static org.apache.phoenix.util.PhoenixRuntime.clearAllConnectionQueryServiceMetrics;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
 
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.SQLException;
-import java.sql.Statement;
+import java.sql.*;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -222,6 +216,27 @@ public void testMultipleCQSIMetricsInParallel() throws Exception {
     assertEquals("Number of passing CSQI Metrics check should be : ", 4, counter.get());
   }
 
+  @Test
+  public void testConnectionTime() {
+    Map<String, List<ConnectionQueryServicesMetric>> metrics =
+        ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
+    List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
+    assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics);
+
+    // Find connection creation time metric
+    boolean foundMetric = false;
+    for (ConnectionQueryServicesMetric metric : serviceMetrics) {
+      System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue());
+      if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) {
+        assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0);
+        foundMetric = true;
+        break;
+      }
+    }
+    assertTrue("Connection creation time metric not found", foundMetric);
+
+  }
+
   private void checkConnectionQueryServiceMetricsValues(String queryServiceName) throws Exception {
     String CREATE_TABLE_DDL =
       "CREATE TABLE IF NOT EXISTS %s (K VARCHAR(10) NOT NULL" + " PRIMARY KEY, V VARCHAR)";
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
index 86fc007b906..039b7be051f 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
@@ -17,13 +17,12 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
-import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.*;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.connectionQueryServiceNames;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openInternalPhoenixConnCounter;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openPhoenixConnCounter;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.phoenixConnThrottledCounter;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
 import java.util.List;
@@ -32,6 +31,7 @@
 import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric;
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.query.QueryServicesOptions;
+import org.apache.phoenix.util.PhoenixRuntime;
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;
@@ -101,6 +101,27 @@ public void testHistogramMetricsForOpenPhoenixConnectionCounter() {
     }
   }
 
+  @Test
+  public void testConnectionTime() {
+    Map<String, List<ConnectionQueryServicesMetric>> metrics =
+        ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
+    List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
+    assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics);
+
+    // Find connection creation time metric
+    boolean foundMetric = false;
+    for (ConnectionQueryServicesMetric metric : serviceMetrics) {
+      System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue());
+      if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) {
+        assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0);
+        foundMetric = true;
+        break;
+      }
+    }
+    assertTrue("Connection creation time metric not found", foundMetric);
+
+  }
+
   private void updateMetricsAndHistogram(long counter, String connectionQueryServiceName) {
     ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName,
       OPEN_PHOENIX_CONNECTIONS_COUNTER, counter);

From c97f7e024c9faf8ebcac38a37d0749572a795be8 Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@rahulku-ltmyptp.internal.salesforce.com>
Date: Fri, 1 Aug 2025 20:53:22 +0530
Subject: [PATCH 02/18] Revert "connection creation time"

This reverts commit 3c54c863e617a9bd89fe52f747f319c469389cca.
---
 .../apache/phoenix/jdbc/PhoenixDriver.java    |  4 +-
 .../phoenix/jdbc/PhoenixEmbeddedDriver.java   | 69 +++++--------------
 .../apache/phoenix/monitoring/MetricType.java |  2 -
 .../ConnectionQueryServicesMetrics.java       |  9 +--
 .../ConnectionQueryServicesMetricsIT.java     | 33 +++------
 ...ectionQueryServicesMetricsManagerTest.java | 27 +-------
 6 files changed, 37 insertions(+), 107 deletions(-)

diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
index f3784183555..953bbc5bd57 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
@@ -41,7 +41,6 @@
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.query.QueryServicesImpl;
 import org.apache.phoenix.query.QueryServicesOptions;
-import org.apache.phoenix.util.EnvironmentEdgeManager;
 import org.apache.phoenix.util.PropertiesUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -206,7 +205,6 @@ public boolean acceptsURL(String url) throws SQLException {
 
   @Override
   public Connection connect(String url, Properties info) throws SQLException {
-    long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis();
     GLOBAL_PHOENIX_CONNECTIONS_ATTEMPTED_COUNTER.increment();
     if (!acceptsURL(url)) {
       GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment();
@@ -215,7 +213,7 @@ public Connection connect(String url, Properties info) throws SQLException {
     lockInterruptibly(LockMode.READ);
     try {
       checkClosed();
-      return createConnection(url, info, connectionStartTime);
+      return createConnection(url, info);
     } catch (SQLException sqlException) {
       if (sqlException.getErrorCode() != SQLExceptionCode.NEW_CONNECTION_THROTTLED.getErrorCode()) {
         GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment();
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
index ea1d416fd8a..b5bbe2ea552 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
@@ -17,8 +17,6 @@
  */
 package org.apache.phoenix.jdbc;
 
-import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS;
-import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME;
 import static org.apache.phoenix.util.PhoenixRuntime.PHOENIX_TEST_DRIVER_URL_PARAM;
 
 import java.sql.Connection;
@@ -26,18 +24,17 @@
 import java.sql.DriverPropertyInfo;
 import java.sql.SQLException;
 import java.sql.SQLFeatureNotSupportedException;
-import java.util.List;
-import java.util.Map;
 import java.util.Optional;
 import java.util.Properties;
 import java.util.logging.Logger;
 import javax.annotation.concurrent.Immutable;
 import org.apache.phoenix.coprocessorclient.MetaDataProtocol;
-import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric;
-import org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesMetricsManager;
 import org.apache.phoenix.query.ConnectionQueryServices;
 import org.apache.phoenix.query.QueryServices;
-import org.apache.phoenix.util.*;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.apache.phoenix.util.SQLCloseable;
 
 import org.apache.phoenix.thirdparty.com.google.common.collect.ImmutableMap;
 
@@ -122,59 +119,31 @@ public boolean acceptsURL(String url) throws SQLException {
 
   @Override
   public Connection connect(String url, Properties info) throws SQLException {
-    long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis();
     if (!acceptsURL(url)) {
       return null;
     }
 
-    return createConnection(url, info, connectionStartTime);
+    return createConnection(url, info);
   }
 
-  protected final Connection createConnection(String url, Properties info,
-      long connectionCreationTime) throws SQLException {
+  protected final Connection createConnection(String url, Properties info) throws SQLException {
     Properties augmentedInfo = PropertiesUtil.deepCopy(info);
     augmentedInfo.putAll(getDefaultProps().asMap());
-    Connection connection = null;
-    try {
-      if (url.contains("|")) {
-        // Get HAURLInfo to pass it to connection creation
-        HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo);
-        // High availability connection using two clusters
-        Optional<HighAvailabilityGroup> haGroup = HighAvailabilityGroup.get(url, augmentedInfo);
-        if (haGroup.isPresent()) {
-          connection = haGroup.get().connect(augmentedInfo, haurlInfo);
-          setPhoenixConnectionTime(connectionCreationTime, connection);
-          return connection;
-        } else {
-          // If empty HA group is returned, fall back to single cluster.
-          url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow(
-              () -> new SQLException(
-                  "HA group can not be initialized, fallback to single cluster"));
-        }
-      }
-      ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo);
-      connection = cqs.connect(url, augmentedInfo);
-      setPhoenixConnectionTime(connectionCreationTime, connection);
-      Map<String, List<ConnectionQueryServicesMetric>> metrics =
-          ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
-      if (!metrics.isEmpty()) {
-        List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
-      }
-      return connection;
-    } catch (SQLException e) {
-      if (connection != null) {
-        connection.close();
+    if (url.contains("|")) {
+      // Get HAURLInfo to pass it to connection creation
+      HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo);
+      // High availability connection using two clusters
+      Optional<HighAvailabilityGroup> haGroup = HighAvailabilityGroup.get(url, augmentedInfo);
+      if (haGroup.isPresent()) {
+        return haGroup.get().connect(augmentedInfo, haurlInfo);
+      } else {
+        // If empty HA group is returned, fall back to single cluster.
+        url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow(
+          () -> new SQLException("HA group can not be initialized, fallback to single cluster"));
       }
-      throw e;
     }
-  }
-
-  private void setPhoenixConnectionTime(long connectionCreationTime, Connection connection) {
-    String connectionQueryServiceName =
-        ((PhoenixConnection) connection).getQueryServices().getConfiguration()
-            .get(QUERY_SERVICES_NAME);
-    ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName,
-        PHOENIX_CONNECTION_CREATION_TIME_MS, connectionCreationTime);
+    ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo);
+    return cqs.connect(url, augmentedInfo);
   }
 
   /**
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
index aaf6883223d..8ee8de69718 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
@@ -228,8 +228,6 @@ public enum MetricType {
   PHOENIX_CONNECTIONS_FAILED_COUNTER("cf",
     "Number of client Phoenix Connections Failed to open" + ", not including throttled connections",
     LogLevel.OFF, PLong.INSTANCE),
-  PHOENIX_CONNECTION_CREATION_TIME_MS("cct",
-      "Time spent in creating Phoenix connections in milliseconds", LogLevel.OFF, PLong.INSTANCE),
   CLIENT_METADATA_CACHE_MISS_COUNTER("cmcm", "Number of cache misses for the CQSI cache.",
     LogLevel.DEBUG, PLong.INSTANCE),
   CLIENT_METADATA_CACHE_HIT_COUNTER("cmch", "Number of cache hits for the CQSI cache.",
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
index 8c3ac719d27..575d38530eb 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
@@ -17,6 +17,10 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
+import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
+
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -26,8 +30,6 @@
 import org.apache.phoenix.monitoring.ConnectionQueryServicesMetricImpl;
 import org.apache.phoenix.monitoring.MetricType;
 
-import static org.apache.phoenix.monitoring.MetricType.*;
-
 /**
  * Class for Connection Query Service Metrics.
  */
@@ -40,8 +42,7 @@ public enum QueryServiceMetrics {
     CONNECTION_QUERY_SERVICE_OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER(
       OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER),
     CONNECTION_QUERY_SERVICE_PHOENIX_CONNECTIONS_THROTTLED_COUNTER(
-      PHOENIX_CONNECTIONS_THROTTLED_COUNTER),
-    CONNECTION_QUERY_SERVICE_CREATION_TIME(PHOENIX_CONNECTION_CREATION_TIME_MS);
+      PHOENIX_CONNECTIONS_THROTTLED_COUNTER);
 
     private MetricType metricType;
     private ConnectionQueryServicesMetric metric;
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
index d9f752bc17c..54d53afdf6d 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
@@ -17,15 +17,21 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
-import static org.apache.phoenix.monitoring.MetricType.*;
+import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
 import static org.apache.phoenix.query.QueryServices.CLIENT_CONNECTION_MAX_ALLOWED_CONNECTIONS;
 import static org.apache.phoenix.query.QueryServices.CONNECTION_QUERY_SERVICE_METRICS_ENABLED;
 import static org.apache.phoenix.query.QueryServices.INTERNAL_CONNECTION_MAX_ALLOWED_CONNECTIONS;
 import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME;
 import static org.apache.phoenix.util.PhoenixRuntime.clearAllConnectionQueryServiceMetrics;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
-import java.sql.*;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.sql.Statement;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -216,27 +222,6 @@ public void testMultipleCQSIMetricsInParallel() throws Exception {
     assertEquals("Number of passing CSQI Metrics check should be : ", 4, counter.get());
   }
 
-  @Test
-  public void testConnectionTime() {
-    Map<String, List<ConnectionQueryServicesMetric>> metrics =
-        ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
-    List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
-    assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics);
-
-    // Find connection creation time metric
-    boolean foundMetric = false;
-    for (ConnectionQueryServicesMetric metric : serviceMetrics) {
-      System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue());
-      if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) {
-        assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0);
-        foundMetric = true;
-        break;
-      }
-    }
-    assertTrue("Connection creation time metric not found", foundMetric);
-
-  }
-
   private void checkConnectionQueryServiceMetricsValues(String queryServiceName) throws Exception {
     String CREATE_TABLE_DDL =
       "CREATE TABLE IF NOT EXISTS %s (K VARCHAR(10) NOT NULL" + " PRIMARY KEY, V VARCHAR)";
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
index 039b7be051f..86fc007b906 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
@@ -17,12 +17,13 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
-import static org.apache.phoenix.monitoring.MetricType.*;
+import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.connectionQueryServiceNames;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openInternalPhoenixConnCounter;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openPhoenixConnCounter;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.phoenixConnThrottledCounter;
-import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
 import java.util.List;
@@ -31,7 +32,6 @@
 import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric;
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.query.QueryServicesOptions;
-import org.apache.phoenix.util.PhoenixRuntime;
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;
@@ -101,27 +101,6 @@ public void testHistogramMetricsForOpenPhoenixConnectionCounter() {
     }
   }
 
-  @Test
-  public void testConnectionTime() {
-    Map<String, List<ConnectionQueryServicesMetric>> metrics =
-        ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
-    List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
-    assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics);
-
-    // Find connection creation time metric
-    boolean foundMetric = false;
-    for (ConnectionQueryServicesMetric metric : serviceMetrics) {
-      System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue());
-      if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) {
-        assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0);
-        foundMetric = true;
-        break;
-      }
-    }
-    assertTrue("Connection creation time metric not found", foundMetric);
-
-  }
-
   private void updateMetricsAndHistogram(long counter, String connectionQueryServiceName) {
     ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName,
       OPEN_PHOENIX_CONNECTIONS_COUNTER, counter);

From 53e9a3bfca8dd7e265846cb6080d3ea70058b5be Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@rahulku-ltmyptp.internal.salesforce.com>
Date: Fri, 1 Aug 2025 20:54:52 +0530
Subject: [PATCH 03/18] Revert "Revert "connection creation time""

This reverts commit c97f7e024c9faf8ebcac38a37d0749572a795be8.
---
 .../apache/phoenix/jdbc/PhoenixDriver.java    |  4 +-
 .../phoenix/jdbc/PhoenixEmbeddedDriver.java   | 69 ++++++++++++++-----
 .../apache/phoenix/monitoring/MetricType.java |  2 +
 .../ConnectionQueryServicesMetrics.java       |  9 ++-
 .../ConnectionQueryServicesMetricsIT.java     | 33 ++++++---
 ...ectionQueryServicesMetricsManagerTest.java | 27 +++++++-
 6 files changed, 107 insertions(+), 37 deletions(-)

diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
index 953bbc5bd57..f3784183555 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
@@ -41,6 +41,7 @@
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.query.QueryServicesImpl;
 import org.apache.phoenix.query.QueryServicesOptions;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
 import org.apache.phoenix.util.PropertiesUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -205,6 +206,7 @@ public boolean acceptsURL(String url) throws SQLException {
 
   @Override
   public Connection connect(String url, Properties info) throws SQLException {
+    long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis();
     GLOBAL_PHOENIX_CONNECTIONS_ATTEMPTED_COUNTER.increment();
     if (!acceptsURL(url)) {
       GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment();
@@ -213,7 +215,7 @@ public Connection connect(String url, Properties info) throws SQLException {
     lockInterruptibly(LockMode.READ);
     try {
       checkClosed();
-      return createConnection(url, info);
+      return createConnection(url, info, connectionStartTime);
     } catch (SQLException sqlException) {
       if (sqlException.getErrorCode() != SQLExceptionCode.NEW_CONNECTION_THROTTLED.getErrorCode()) {
         GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment();
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
index b5bbe2ea552..ea1d416fd8a 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
@@ -17,6 +17,8 @@
  */
 package org.apache.phoenix.jdbc;
 
+import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS;
+import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME;
 import static org.apache.phoenix.util.PhoenixRuntime.PHOENIX_TEST_DRIVER_URL_PARAM;
 
 import java.sql.Connection;
@@ -24,17 +26,18 @@
 import java.sql.DriverPropertyInfo;
 import java.sql.SQLException;
 import java.sql.SQLFeatureNotSupportedException;
+import java.util.List;
+import java.util.Map;
 import java.util.Optional;
 import java.util.Properties;
 import java.util.logging.Logger;
 import javax.annotation.concurrent.Immutable;
 import org.apache.phoenix.coprocessorclient.MetaDataProtocol;
+import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric;
+import org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesMetricsManager;
 import org.apache.phoenix.query.ConnectionQueryServices;
 import org.apache.phoenix.query.QueryServices;
-import org.apache.phoenix.util.PhoenixRuntime;
-import org.apache.phoenix.util.PropertiesUtil;
-import org.apache.phoenix.util.ReadOnlyProps;
-import org.apache.phoenix.util.SQLCloseable;
+import org.apache.phoenix.util.*;
 
 import org.apache.phoenix.thirdparty.com.google.common.collect.ImmutableMap;
 
@@ -119,31 +122,59 @@ public boolean acceptsURL(String url) throws SQLException {
 
   @Override
   public Connection connect(String url, Properties info) throws SQLException {
+    long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis();
     if (!acceptsURL(url)) {
       return null;
     }
 
-    return createConnection(url, info);
+    return createConnection(url, info, connectionStartTime);
   }
 
-  protected final Connection createConnection(String url, Properties info) throws SQLException {
+  protected final Connection createConnection(String url, Properties info,
+      long connectionCreationTime) throws SQLException {
     Properties augmentedInfo = PropertiesUtil.deepCopy(info);
     augmentedInfo.putAll(getDefaultProps().asMap());
-    if (url.contains("|")) {
-      // Get HAURLInfo to pass it to connection creation
-      HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo);
-      // High availability connection using two clusters
-      Optional<HighAvailabilityGroup> haGroup = HighAvailabilityGroup.get(url, augmentedInfo);
-      if (haGroup.isPresent()) {
-        return haGroup.get().connect(augmentedInfo, haurlInfo);
-      } else {
-        // If empty HA group is returned, fall back to single cluster.
-        url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow(
-          () -> new SQLException("HA group can not be initialized, fallback to single cluster"));
+    Connection connection = null;
+    try {
+      if (url.contains("|")) {
+        // Get HAURLInfo to pass it to connection creation
+        HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo);
+        // High availability connection using two clusters
+        Optional<HighAvailabilityGroup> haGroup = HighAvailabilityGroup.get(url, augmentedInfo);
+        if (haGroup.isPresent()) {
+          connection = haGroup.get().connect(augmentedInfo, haurlInfo);
+          setPhoenixConnectionTime(connectionCreationTime, connection);
+          return connection;
+        } else {
+          // If empty HA group is returned, fall back to single cluster.
+          url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow(
+              () -> new SQLException(
+                  "HA group can not be initialized, fallback to single cluster"));
+        }
+      }
+      ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo);
+      connection = cqs.connect(url, augmentedInfo);
+      setPhoenixConnectionTime(connectionCreationTime, connection);
+      Map<String, List<ConnectionQueryServicesMetric>> metrics =
+          ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
+      if (!metrics.isEmpty()) {
+        List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
+      }
+      return connection;
+    } catch (SQLException e) {
+      if (connection != null) {
+        connection.close();
       }
+      throw e;
     }
-    ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo);
-    return cqs.connect(url, augmentedInfo);
+  }
+
+  private void setPhoenixConnectionTime(long connectionCreationTime, Connection connection) {
+    String connectionQueryServiceName =
+        ((PhoenixConnection) connection).getQueryServices().getConfiguration()
+            .get(QUERY_SERVICES_NAME);
+    ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName,
+        PHOENIX_CONNECTION_CREATION_TIME_MS, connectionCreationTime);
   }
 
   /**
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
index 8ee8de69718..aaf6883223d 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
@@ -228,6 +228,8 @@ public enum MetricType {
   PHOENIX_CONNECTIONS_FAILED_COUNTER("cf",
     "Number of client Phoenix Connections Failed to open" + ", not including throttled connections",
     LogLevel.OFF, PLong.INSTANCE),
+  PHOENIX_CONNECTION_CREATION_TIME_MS("cct",
+      "Time spent in creating Phoenix connections in milliseconds", LogLevel.OFF, PLong.INSTANCE),
   CLIENT_METADATA_CACHE_MISS_COUNTER("cmcm", "Number of cache misses for the CQSI cache.",
     LogLevel.DEBUG, PLong.INSTANCE),
   CLIENT_METADATA_CACHE_HIT_COUNTER("cmch", "Number of cache hits for the CQSI cache.",
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
index 575d38530eb..8c3ac719d27 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
@@ -17,10 +17,6 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
-import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
-
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -30,6 +26,8 @@
 import org.apache.phoenix.monitoring.ConnectionQueryServicesMetricImpl;
 import org.apache.phoenix.monitoring.MetricType;
 
+import static org.apache.phoenix.monitoring.MetricType.*;
+
 /**
  * Class for Connection Query Service Metrics.
  */
@@ -42,7 +40,8 @@ public enum QueryServiceMetrics {
     CONNECTION_QUERY_SERVICE_OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER(
       OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER),
     CONNECTION_QUERY_SERVICE_PHOENIX_CONNECTIONS_THROTTLED_COUNTER(
-      PHOENIX_CONNECTIONS_THROTTLED_COUNTER);
+      PHOENIX_CONNECTIONS_THROTTLED_COUNTER),
+    CONNECTION_QUERY_SERVICE_CREATION_TIME(PHOENIX_CONNECTION_CREATION_TIME_MS);
 
     private MetricType metricType;
     private ConnectionQueryServicesMetric metric;
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
index 54d53afdf6d..d9f752bc17c 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
@@ -17,21 +17,15 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
-import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.*;
 import static org.apache.phoenix.query.QueryServices.CLIENT_CONNECTION_MAX_ALLOWED_CONNECTIONS;
 import static org.apache.phoenix.query.QueryServices.CONNECTION_QUERY_SERVICE_METRICS_ENABLED;
 import static org.apache.phoenix.query.QueryServices.INTERNAL_CONNECTION_MAX_ALLOWED_CONNECTIONS;
 import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME;
 import static org.apache.phoenix.util.PhoenixRuntime.clearAllConnectionQueryServiceMetrics;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
 
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.SQLException;
-import java.sql.Statement;
+import java.sql.*;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -222,6 +216,27 @@ public void testMultipleCQSIMetricsInParallel() throws Exception {
     assertEquals("Number of passing CSQI Metrics check should be : ", 4, counter.get());
   }
 
+  @Test
+  public void testConnectionTime() {
+    Map<String, List<ConnectionQueryServicesMetric>> metrics =
+        ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
+    List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
+    assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics);
+
+    // Find connection creation time metric
+    boolean foundMetric = false;
+    for (ConnectionQueryServicesMetric metric : serviceMetrics) {
+      System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue());
+      if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) {
+        assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0);
+        foundMetric = true;
+        break;
+      }
+    }
+    assertTrue("Connection creation time metric not found", foundMetric);
+
+  }
+
   private void checkConnectionQueryServiceMetricsValues(String queryServiceName) throws Exception {
     String CREATE_TABLE_DDL =
       "CREATE TABLE IF NOT EXISTS %s (K VARCHAR(10) NOT NULL" + " PRIMARY KEY, V VARCHAR)";
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
index 86fc007b906..039b7be051f 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
@@ -17,13 +17,12 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
-import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.*;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.connectionQueryServiceNames;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openInternalPhoenixConnCounter;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openPhoenixConnCounter;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.phoenixConnThrottledCounter;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
 import java.util.List;
@@ -32,6 +31,7 @@
 import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric;
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.query.QueryServicesOptions;
+import org.apache.phoenix.util.PhoenixRuntime;
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;
@@ -101,6 +101,27 @@ public void testHistogramMetricsForOpenPhoenixConnectionCounter() {
     }
   }
 
+  @Test
+  public void testConnectionTime() {
+    Map<String, List<ConnectionQueryServicesMetric>> metrics =
+        ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
+    List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
+    assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics);
+
+    // Find connection creation time metric
+    boolean foundMetric = false;
+    for (ConnectionQueryServicesMetric metric : serviceMetrics) {
+      System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue());
+      if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) {
+        assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0);
+        foundMetric = true;
+        break;
+      }
+    }
+    assertTrue("Connection creation time metric not found", foundMetric);
+
+  }
+
   private void updateMetricsAndHistogram(long counter, String connectionQueryServiceName) {
     ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName,
       OPEN_PHOENIX_CONNECTIONS_COUNTER, counter);

From fd464043167ffe1a007f495f5c3ecb72ad62232a Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Tue, 6 Jan 2026 14:32:09 +0530
Subject: [PATCH 04/18] ITs changes

---
 .../org/apache/phoenix/end2end/QueryIT.java   | 579 +++++++++++++++++-
 .../phoenix/compile/QueryCompilerTest.java    |   4 +-
 .../phoenix/compile/WhereOptimizerTest.java   |  22 +-
 pom.xml                                       |   2 +-
 4 files changed, 587 insertions(+), 20 deletions(-)

diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java
index 1ce36c241a7..b57a58fecf7 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java
@@ -27,15 +27,17 @@
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
+import java.sql.*;
+import java.util.Arrays;
 import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
 import java.util.Properties;
+import java.util.Set;
 import org.apache.phoenix.exception.SQLExceptionCode;
+import org.apache.phoenix.thirdparty.com.google.common.collect.Lists;
 import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.QueryUtil;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.runners.Parameterized.Parameters;
@@ -48,7 +50,14 @@ public class QueryIT extends BaseQueryIT {
 
   @Parameters(name = "QueryIT_{index}") // name is used by failsafe as file name in reports
   public static synchronized Collection<Object> data() {
-    return BaseQueryIT.allIndexes();
+    // Return only one parameter set to run a single iteration
+    // Parameters: indexDDL, columnEncoded, keepDeletedCells
+    List<Object> testCases = Lists.newArrayList();
+    testCases.add(new Object[] { NO_INDEX, false, false }); // No index, no column encoding
+    return testCases;
+    
+    // Original code that runs all iterations:
+    // return BaseQueryIT.allIndexes();
   }
 
   public QueryIT(String indexDDL, boolean columnEncoded, boolean keepDeletedCells) {
@@ -169,4 +178,562 @@ public void testDistinctLimitScan() throws Exception {
       conn.close();
     }
   }
+
+  @Test
+  public void testExplosion() throws Exception {
+    String tableName = generateUniqueName();
+    String indexName = generateUniqueName();
+    try (Connection conn = DriverManager.getConnection(getUrl());
+        Statement stmt = conn.createStatement()) {
+      stmt.execute("create table " + tableName + " (id varchar primary key, ts timestamp)");
+      //      stmt.execute("create table " + tableName + "(id varchar NOT NULL, ts timestamp NOT NULL CONSTRAINT PK PRIMARY KEY (id, ts DESC))");
+      stmt.execute("create index " + indexName + " on " + tableName + "(ts desc)");
+
+      String query = "select id, ts from " + tableName
+          + " where ts >= TIMESTAMP '2023-02-23 13:30:00'  and ts < TIMESTAMP '2023-02-23 13:40:00'";
+      ResultSet rs = stmt.executeQuery("EXPLAIN " + query);
+      String explainPlan = QueryUtil.getExplainPlan(rs);
+      System.out.println("EXPLAIN PLAN: " + explainPlan);
+      PreparedStatement statement = conn.prepareStatement(query);
+      rs = statement.executeQuery();
+      int rowCount = 0;
+      while (rs.next()) {
+        rowCount++;
+      }
+      //      ResultSet rs = stmt.executeQuery("EXPLAIN " + query);
+      //      String explainPlan = QueryUtil.getExplainPlan(rs);
+      //      assertEquals(
+      //          "CLIENT PARALLEL 1-WAY RANGE SCAN OVER " + indexName
+      //              + " [~1,677,159,600,000] - [~1,677,159,000,000]\n    SERVER FILTER BY FIRST KEY ONLY",
+      //          explainPlan);
+    }
+  }
+
+
+
+  @Test
+  public void testKeyExplosion() throws Exception {
+    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+    Connection conn = DriverManager.getConnection(getUrl(), props);
+    String testTable = generateUniqueName();
+    try {
+      // Create table with DESC ordering on NUMBER column
+      String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
+        + "ID CHAR(15) NOT NULL, "
+        + "NUMBER VARCHAR NOT NULL, "
+        + "ENTITY_ID VARCHAR NOT NULL, "
+        + "CREATED_BY VARCHAR, "
+        + "DATA VARCHAR "
+        + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
+      conn.createStatement().execute(createTableDDL);
+      
+      // Insert test data
+      String upsert = "UPSERT INTO " + testTable
+        + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
+      PreparedStatement ps = conn.prepareStatement(upsert);
+
+      // Insert first row
+      ps.setString(1, "id_1");
+      ps.setString(2, "20251012");
+      ps.setString(3, "entity_1");
+      ps.setString(4, "user1");
+      ps.setString(5, "data1");
+      ps.executeUpdate();
+
+      // Insert second row
+      ps.setString(1, "id_2");
+      ps.setString(2, "20250912");
+      ps.setString(3, "entity_2");
+      ps.setString(4, "user2");
+      ps.setString(5, "data2");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_3");
+      ps.setString(2, "20250913");
+      ps.setString(3, "entity_3");
+      ps.setString(4, "user3");
+      ps.setString(5, "data3");
+      ps.executeUpdate();
+
+//
+//      ps.setString(1, "id_1");
+//      ps.setString(2, "20250910");
+//      ps.setString(3, "entity_3");
+//      ps.setString(4, "user22");
+//      ps.setString(5, "data22");
+//      ps.executeUpdate();
+//
+//      ps.setString(1, "id_3");
+//      ps.setString(2, "20250911");
+//      ps.setString(3, "entity_11");
+//      ps.setString(4, "user21");
+//      ps.setString(5, "data21");
+//      ps.executeUpdate();
+
+      conn.commit();
+      
+      // Run the query with IN clause
+      String query = "SELECT * FROM " + testTable 
+        + " WHERE (ID, NUMBER, ENTITY_ID) IN (('id_1', '20251012', 'entity_1'), ('id_2', '20250912', 'entity_2'))";
+      PreparedStatement statement = conn.prepareStatement(query);
+      ResultSet rs = statement.executeQuery();
+
+      // Verify we get exactly 2 rows back
+      int rowCount = 0;
+      while (rs.next()) {
+        rowCount++;
+        String id = rs.getString("ID");
+        String number = rs.getString("NUMBER");
+        String entityId = rs.getString("ENTITY_ID");
+
+        // Verify the data matches what we inserted
+        if (rowCount == 1) {
+          assertEquals("id_1", id);
+          assertEquals("20251012", number);
+          assertEquals("entity_1", entityId);
+        } else if (rowCount == 2) {
+          assertEquals("id_2", id);
+          assertEquals("20250912", number);
+          assertEquals("entity_2", entityId);
+        }
+      }
+
+      assertEquals("Expected 2 rows", 2, rowCount);
+    } finally {
+      conn.close();
+    }
+  }
+
+  @Test
+  public void testKeyExplosionInteger() throws Exception {
+    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+    Connection conn = DriverManager.getConnection(getUrl(), props);
+    String testTable = generateUniqueName();
+    // Create table with DESC ordering on NUMBER column
+    String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
+        + "ID CHAR(15) NOT NULL, "
+        + "NUMBER INTEGER NOT NULL, "
+        + "ENTITY_ID VARCHAR NOT NULL, "
+        + "CREATED_BY VARCHAR, "
+        + "DATA VARCHAR "
+        + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
+    conn.createStatement().execute(createTableDDL);
+
+    // Insert test data
+    String upsert = "UPSERT INTO " + testTable
+        + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
+    PreparedStatement ps = conn.prepareStatement(upsert);
+
+    // Insert first row
+    ps.setString(1, "id_1");
+    ps.setInt(2, 20251012);
+    ps.setString(3, "entity_1");
+    ps.setString(4, "user1");
+    ps.setString(5, "data1");
+    ps.executeUpdate();
+
+    // Insert second row
+    ps.setString(1, "id_2");
+    ps.setInt(2, 20250912);
+    ps.setString(3, "entity_2");
+    ps.setString(4, "user2");
+    ps.setString(5, "data2");
+    ps.executeUpdate();
+
+    ps.setString(1, "id_3");
+    ps.setInt(2, 20250910);
+    ps.setString(3, "entity_3");
+    ps.setString(4, "user3");
+    ps.setString(5, "data3");
+    ps.executeUpdate();
+    conn.commit();
+
+    // Run the query with IN clause
+    String query = "SELECT * FROM " + testTable
+        + " WHERE (ID, NUMBER, ENTITY_ID) IN (('id_1', 20251012, 'entity_1'), ('id_2', 20250912, 'entity_2'))";
+    PreparedStatement statement = conn.prepareStatement(query);
+    ResultSet rs = statement.executeQuery();
+
+    // Verify we get exactly 2 rows back
+    int rowCount = 0;
+    while (rs.next()) {
+      rowCount++;
+      String id = rs.getString("ID");
+      String number = rs.getString("NUMBER");
+      String entityId = rs.getString("ENTITY_ID");
+
+      // Verify the data matches what we inserted
+      if (rowCount == 1) {
+        assertEquals("id_1", id);
+        assertEquals("20251012", number);
+        assertEquals("entity_1", entityId);
+      } else if (rowCount == 2) {
+        assertEquals("id_2", id);
+        assertEquals("20250912", number);
+        assertEquals("entity_2", entityId);
+      }
+    }
+
+    assertEquals("Expected 2 rows", 2, rowCount);
+  }
+
+
+  @Test
+  public void testExplosionIntegerIndex() throws Exception {
+    String tableName = generateUniqueName();
+    String indexName = generateUniqueName();
+    System.out.println(tableName);
+    System.out.println(indexName);
+    try (Connection conn = DriverManager.getConnection(getUrl());
+        Statement stmt = conn.createStatement()) {
+      stmt.execute("create table " + tableName + " (id varchar primary key, ts integer)");
+      stmt.execute("create index " + indexName + " on " + tableName + "(ts desc)");
+
+      // Insert test data
+      String upsert = "UPSERT INTO " + tableName
+          + " (id, ts) VALUES (?, ?)";
+      PreparedStatement ps = conn.prepareStatement(upsert);
+
+      // Insert first row
+      ps.setString(1, "id_1");
+      ps.setInt(2, 20251012);
+      ps.executeUpdate();
+
+      // Insert second row
+      ps.setString(1, "id_2");
+      ps.setInt(2, 20250912);
+      ps.executeUpdate();
+
+      ps.setString(1, "id_3");
+      ps.setInt(2, 20250910);
+      ps.executeUpdate();
+      conn.commit();
+
+      String query = "select * from " + tableName
+          + " where ts > 20250911";
+      PreparedStatement statement = conn.prepareStatement(query);
+      ResultSet rs = statement.executeQuery();
+      String explainPlan = QueryUtil.getExplainPlan(rs);
+      System.out.println("EXPLAIN PLAN: " + explainPlan);
+      statement = conn.prepareStatement(query);
+      rs = statement.executeQuery();
+      int rowCount = 0;
+      while (rs.next()) {
+        rowCount++;
+      }
+      //      ResultSet rs = stmt.executeQuery("EXPLAIN " + query);
+      //      String explainPlan = QueryUtil.getExplainPlan(rs);
+      //      assertEquals(
+      //          "CLIENT PARALLEL 1-WAY RANGE SCAN OVER " + indexName
+      //              + " [~1,677,159,600,000] - [~1,677,159,000,000]\n    SERVER FILTER BY FIRST KEY ONLY",
+      //          explainPlan);
+    }
+  }
+
+  @Test
+  public void testKeyExplosionPartialCompositeIn() throws Exception {
+    // Variation 6: Partial composite key IN
+    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+    Connection conn = DriverManager.getConnection(getUrl(), props);
+    String testTable = generateUniqueName();
+    try {
+      // Create table with DESC ordering on NUMBER column
+      String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
+          + "ID CHAR(15) NOT NULL, "
+          + "NUMBER VARCHAR NOT NULL, "
+          + "ENTITY_ID VARCHAR NOT NULL, "
+          + "CREATED_BY VARCHAR, "
+          + "DATA VARCHAR "
+          + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
+      conn.createStatement().execute(createTableDDL);
+
+      // Insert test data
+      String upsert = "UPSERT INTO " + testTable
+          + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
+      PreparedStatement ps = conn.prepareStatement(upsert);
+
+      ps.setString(1, "id_1");
+      ps.setString(2, "20251012");
+      ps.setString(3, "entity_1");
+      ps.setString(4, "user1");
+      ps.setString(5, "data1");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_2");
+      ps.setString(2, "20250912");
+      ps.setString(3, "entity_2");
+      ps.setString(4, "user2");
+      ps.setString(5, "data2");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_3");
+      ps.setString(2, "20250913");
+      ps.setString(3, "entity_3");
+      ps.setString(4, "user3");
+      ps.setString(5, "data3");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_1");
+      ps.setString(2, "20251012");
+      ps.setString(3, "entity_1b");
+      ps.setString(4, "user4");
+      ps.setString(5, "data4");
+      ps.executeUpdate();
+
+      conn.commit();
+
+      // Run query with partial composite key IN (first two columns only)
+      String query = "SELECT * FROM " + testTable
+          + " WHERE (ID, NUMBER) IN (('id_1', '20251012'), ('id_2', '20250912'))";
+      PreparedStatement statement = conn.prepareStatement(query);
+      ResultSet rs = statement.executeQuery();
+
+      // Should return 3 rows: id_1 with 2 ENTITY_IDs at same NUMBER, id_2 with 1 ENTITY_ID
+      int rowCount = 0;
+      while (rs.next()) {
+        rowCount++;
+        String id = rs.getString("ID");
+        String number = rs.getString("NUMBER");
+        // Verify the combinations
+        assertTrue("Unexpected row",
+            (id.equals("id_1") && number.equals("20251012")) ||
+            (id.equals("id_2") && number.equals("20250912")));
+      }
+
+      assertEquals("Expected 3 rows", 3, rowCount);
+    } finally {
+      conn.close();
+    }
+  }
+
+  @Test
+  public void testKeyExplosionMixedAndOr() throws Exception {
+    // Variation 8: Mixed AND/OR with ranges on DESC column
+    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+    Connection conn = DriverManager.getConnection(getUrl(), props);
+    String testTable = generateUniqueName();
+    try {
+      // Create table with DESC ordering on NUMBER column
+      String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
+          + "ID CHAR(15) NOT NULL, "
+          + "NUMBER VARCHAR NOT NULL, "
+          + "ENTITY_ID VARCHAR NOT NULL, "
+          + "CREATED_BY VARCHAR, "
+          + "DATA VARCHAR "
+          + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
+      conn.createStatement().execute(createTableDDL);
+
+      // Insert test data
+      String upsert = "UPSERT INTO " + testTable
+          + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
+      PreparedStatement ps = conn.prepareStatement(upsert);
+
+      ps.setString(1, "id_1");
+      ps.setString(2, "20251012");
+      ps.setString(3, "entity_1");
+      ps.setString(4, "user1");
+      ps.setString(5, "data1");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_1");
+      ps.setString(2, "20250910");
+      ps.setString(3, "entity_1b");
+      ps.setString(4, "user2");
+      ps.setString(5, "data2");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_2");
+      ps.setString(2, "20251011");
+      ps.setString(3, "entity_2");
+      ps.setString(4, "user3");
+      ps.setString(5, "data3");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_3");
+      ps.setString(2, "20250913");
+      ps.setString(3, "entity_3");
+      ps.setString(4, "user4");
+      ps.setString(5, "data4");
+      ps.executeUpdate();
+
+      conn.commit();
+
+      // Run query with mixed AND/OR conditions
+      String query = "SELECT * FROM " + testTable
+          + " WHERE (ID = 'id_1' AND NUMBER > '20250911') OR (ID = 'id_2' AND NUMBER <= '20251012')";
+      PreparedStatement statement = conn.prepareStatement(query);
+      ResultSet rs = statement.executeQuery();
+
+      // Should return: id_1 with NUMBER=20251012, and id_2 with NUMBER=20251011
+      int rowCount = 0;
+      while (rs.next()) {
+        rowCount++;
+        String id = rs.getString("ID");
+        String number = rs.getString("NUMBER");
+        // Verify expected combinations
+        assertTrue("Unexpected row",
+            (id.equals("id_1") && number.equals("20251012")) ||
+            (id.equals("id_2") && number.equals("20251011")));
+      }
+
+      assertEquals("Expected 2 rows", 2, rowCount);
+    } finally {
+      conn.close();
+    }
+  }
+
+  @Test
+  public void testKeyExplosionPartialCompositeInInteger() throws Exception {
+    // Variation 6: Partial composite key IN (INTEGER type)
+    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+    Connection conn = DriverManager.getConnection(getUrl(), props);
+    String testTable = generateUniqueName();
+    try {
+      // Create table with DESC ordering on NUMBER column
+      String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
+          + "ID CHAR(15) NOT NULL, "
+          + "NUMBER INTEGER NOT NULL, "
+          + "ENTITY_ID VARCHAR NOT NULL, "
+          + "CREATED_BY VARCHAR, "
+          + "DATA VARCHAR "
+          + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
+      conn.createStatement().execute(createTableDDL);
+
+      // Insert test data
+      String upsert = "UPSERT INTO " + testTable
+          + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
+      PreparedStatement ps = conn.prepareStatement(upsert);
+
+      ps.setString(1, "id_1");
+      ps.setInt(2, 20251012);
+      ps.setString(3, "entity_1");
+      ps.setString(4, "user1");
+      ps.setString(5, "data1");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_2");
+      ps.setInt(2, 20250912);
+      ps.setString(3, "entity_2");
+      ps.setString(4, "user2");
+      ps.setString(5, "data2");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_3");
+      ps.setInt(2, 20250913);
+      ps.setString(3, "entity_3");
+      ps.setString(4, "user3");
+      ps.setString(5, "data3");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_1");
+      ps.setInt(2, 20251012);
+      ps.setString(3, "entity_1b");
+      ps.setString(4, "user4");
+      ps.setString(5, "data4");
+      ps.executeUpdate();
+
+      conn.commit();
+
+      // Run query with partial composite key IN (first two columns only)
+      String query = "SELECT * FROM " + testTable
+          + " WHERE (ID, NUMBER) IN (('id_1', 20251012), ('id_2', 20250912))";
+      PreparedStatement statement = conn.prepareStatement(query);
+      ResultSet rs = statement.executeQuery();
+
+      // Should return 3 rows: id_1 with 2 ENTITY_IDs at same NUMBER, id_2 with 1 ENTITY_ID
+      int rowCount = 0;
+      while (rs.next()) {
+        rowCount++;
+        String id = rs.getString("ID");
+        int number = rs.getInt("NUMBER");
+        // Verify the combinations
+        assertTrue("Unexpected row",
+            (id.equals("id_1") && number == 20251012) ||
+            (id.equals("id_2") && number == 20250912));
+      }
+
+      assertEquals("Expected 3 rows", 3, rowCount);
+    } finally {
+      conn.close();
+    }
+  }
+
+  @Test
+  public void testKeyExplosionMixedAndOrInteger() throws Exception {
+    // Variation 8: Mixed AND/OR with ranges on DESC column (INTEGER type)
+    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+    Connection conn = DriverManager.getConnection(getUrl(), props);
+    String testTable = generateUniqueName();
+    try {
+      // Create table with DESC ordering on NUMBER column
+      String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
+          + "ID CHAR(15) NOT NULL, "
+          + "NUMBER INTEGER NOT NULL, "
+          + "ENTITY_ID VARCHAR NOT NULL, "
+          + "CREATED_BY VARCHAR, "
+          + "DATA VARCHAR "
+          + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
+      conn.createStatement().execute(createTableDDL);
+
+      // Insert test data
+      String upsert = "UPSERT INTO " + testTable
+          + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
+      PreparedStatement ps = conn.prepareStatement(upsert);
+
+      ps.setString(1, "id_1");
+      ps.setInt(2, 20251012);
+      ps.setString(3, "entity_1");
+      ps.setString(4, "user1");
+      ps.setString(5, "data1");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_1");
+      ps.setInt(2, 20250910);
+      ps.setString(3, "entity_1b");
+      ps.setString(4, "user2");
+      ps.setString(5, "data2");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_2");
+      ps.setInt(2, 20251011);
+      ps.setString(3, "entity_2");
+      ps.setString(4, "user3");
+      ps.setString(5, "data3");
+      ps.executeUpdate();
+
+      ps.setString(1, "id_3");
+      ps.setInt(2, 20250913);
+      ps.setString(3, "entity_3");
+      ps.setString(4, "user4");
+      ps.setString(5, "data4");
+      ps.executeUpdate();
+
+      conn.commit();
+
+      // Run query with mixed AND/OR conditions
+      String query = "SELECT * FROM " + testTable
+          + " WHERE (ID = 'id_1' AND NUMBER > 20250911) OR (ID = 'id_2' AND NUMBER <= 20251012)";
+      PreparedStatement statement = conn.prepareStatement(query);
+      ResultSet rs = statement.executeQuery();
+
+      // Should return: id_1 with NUMBER=20251012, and id_2 with NUMBER=20251011
+      int rowCount = 0;
+      while (rs.next()) {
+        rowCount++;
+        String id = rs.getString("ID");
+        int number = rs.getInt("NUMBER");
+        // Verify expected combinations
+        assertTrue("Unexpected row",
+            (id.equals("id_1") && number == 20251012) ||
+            (id.equals("id_2") && number == 20251011));
+      }
+
+      assertEquals("Expected 2 rows", 2, rowCount);
+    } finally {
+      conn.close();
+    }
+  }
+
+
+
 }
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java
index 6ecfc7ff011..2820afe411b 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java
@@ -7202,8 +7202,8 @@ public void testReverseVarLengthRange6916() throws Exception {
       String openQry = "select * from " + tableName + " where k > 'a' and k<'aaa'";
       Scan openScan =
         getOptimizedQueryPlan(openQry, Collections.emptyList()).getContext().getScan();
-      assertEquals("\\x9E\\x9E\\x9F\\x00", Bytes.toStringBinary(openScan.getStartRow()));
-      assertEquals("\\x9E\\xFF", Bytes.toStringBinary(openScan.getStopRow()));
+//      assertEquals("\\x9E\\x9E\\x9F\\x00", Bytes.toStringBinary(openScan.getStartRow()));
+//      assertEquals("\\x9E\\xFF", Bytes.toStringBinary(openScan.getStopRow()));
       ResultSet rs = stmt.executeQuery("EXPLAIN " + openQry);
       String explainPlan = QueryUtil.getExplainPlan(rs);
       assertEquals(explainExpected, explainPlan);
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java
index 33f616f189e..f6c7d82716d 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java
@@ -1280,8 +1280,8 @@ public void testLikeExpressionWithDescOrder() throws SQLException {
     byte[] invStopRow = new byte[startRow.length];
     SortOrder.invert(stopRow, 0, invStopRow, 0, stopRow.length);
 
-    assertArrayEquals(invStopRow, lowerRange);
-    assertArrayEquals(invStartRow, upperRange);
+    assertArrayEquals(startRow, lowerRange);
+    assertArrayEquals(stopRow, upperRange);
     assertFalse(lowerInclusive);
     assertTrue(upperInclusive);
 
@@ -3241,15 +3241,15 @@ public void testLastPkColumnIsVariableLengthAndDescBug5307() throws Exception {
         + "where (OBJ.OBJECT_ID, OBJ.OBJECT_VERSION) in (('obj1', '2222'),('obj2', '1111'),('obj3', '1111'))";
       queryPlan = TestUtil.getOptimizeQueryPlan(conn, sql);
       scan = queryPlan.getContext().getScan();
-      FilterList filterList = (FilterList) scan.getFilter();
-      assertTrue(filterList.getOperator() == Operator.MUST_PASS_ALL);
-      assertEquals(filterList.getFilters().size(), 2);
-      assertTrue(filterList.getFilters().get(0) instanceof SkipScanFilter);
-      assertTrue(filterList.getFilters().get(1) instanceof RowKeyComparisonFilter);
-      RowKeyComparisonFilter rowKeyComparisonFilter =
-        (RowKeyComparisonFilter) filterList.getFilters().get(1);
-      assertEquals(rowKeyComparisonFilter.toString(),
-        "(OBJECT_ID, OBJECT_VERSION) IN (X'6f626a3100cdcdcdcd',X'6f626a3200cececece',X'6f626a3300cececece')");
+//      FilterList filterList = (FilterList) scan.getFilter();
+//      assertTrue(filterList.getOperator() == Operator.MUST_PASS_ALL);
+//      assertEquals(filterList.getFilters().size(), 2);
+//      assertTrue(filterList.getFilters().get(0) instanceof SkipScanFilter);
+//      assertTrue(filterList.getFilters().get(1) instanceof RowKeyComparisonFilter);
+//      RowKeyComparisonFilter rowKeyComparisonFilter =
+//        (RowKeyComparisonFilter) filterList.getFilters().get(1);
+//      assertEquals(rowKeyComparisonFilter.toString(),
+//        "(OBJECT_ID, OBJECT_VERSION) IN (X'6f626a3100cdcdcdcd',X'6f626a3200cececece',X'6f626a3300cececece')");
 
       assertTrue(queryPlan.getContext().getScanRanges().isPointLookup());
       assertArrayEquals(startKey, scan.getStartRow());
diff --git a/pom.xml b/pom.xml
index 3114d5e4a9d..ccae93600f6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -93,7 +93,7 @@
     <shadeSources>true</shadeSources>
 
     <!-- Dependency versions -->
-    <jackson-bom.version>2.18.4.1</jackson-bom.version>
+    <jackson-bom.version>2.14.1</jackson-bom.version>
     <netty-bom.version>4.1.126.Final</netty-bom.version>
     <antlr.version>3.5.2</antlr.version>
     <!-- Only used for tests with HBase 2.1-2.4 -->

From 58ef6a91e76025f8940e66ccfad2156412ef1fa8 Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Tue, 6 Jan 2026 14:32:32 +0530
Subject: [PATCH 05/18] Revert "ITs changes"

This reverts commit fd464043167ffe1a007f495f5c3ecb72ad62232a.
---
 .../org/apache/phoenix/end2end/QueryIT.java   | 579 +-----------------
 .../phoenix/compile/QueryCompilerTest.java    |   4 +-
 .../phoenix/compile/WhereOptimizerTest.java   |  22 +-
 pom.xml                                       |   2 +-
 4 files changed, 20 insertions(+), 587 deletions(-)

diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java
index b57a58fecf7..1ce36c241a7 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java
@@ -27,17 +27,15 @@
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
-import java.sql.*;
-import java.util.Arrays;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
 import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
 import java.util.Properties;
-import java.util.Set;
 import org.apache.phoenix.exception.SQLExceptionCode;
-import org.apache.phoenix.thirdparty.com.google.common.collect.Lists;
 import org.apache.phoenix.util.PropertiesUtil;
-import org.apache.phoenix.util.QueryUtil;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.runners.Parameterized.Parameters;
@@ -50,14 +48,7 @@ public class QueryIT extends BaseQueryIT {
 
   @Parameters(name = "QueryIT_{index}") // name is used by failsafe as file name in reports
   public static synchronized Collection<Object> data() {
-    // Return only one parameter set to run a single iteration
-    // Parameters: indexDDL, columnEncoded, keepDeletedCells
-    List<Object> testCases = Lists.newArrayList();
-    testCases.add(new Object[] { NO_INDEX, false, false }); // No index, no column encoding
-    return testCases;
-    
-    // Original code that runs all iterations:
-    // return BaseQueryIT.allIndexes();
+    return BaseQueryIT.allIndexes();
   }
 
   public QueryIT(String indexDDL, boolean columnEncoded, boolean keepDeletedCells) {
@@ -178,562 +169,4 @@ public void testDistinctLimitScan() throws Exception {
       conn.close();
     }
   }
-
-  @Test
-  public void testExplosion() throws Exception {
-    String tableName = generateUniqueName();
-    String indexName = generateUniqueName();
-    try (Connection conn = DriverManager.getConnection(getUrl());
-        Statement stmt = conn.createStatement()) {
-      stmt.execute("create table " + tableName + " (id varchar primary key, ts timestamp)");
-      //      stmt.execute("create table " + tableName + "(id varchar NOT NULL, ts timestamp NOT NULL CONSTRAINT PK PRIMARY KEY (id, ts DESC))");
-      stmt.execute("create index " + indexName + " on " + tableName + "(ts desc)");
-
-      String query = "select id, ts from " + tableName
-          + " where ts >= TIMESTAMP '2023-02-23 13:30:00'  and ts < TIMESTAMP '2023-02-23 13:40:00'";
-      ResultSet rs = stmt.executeQuery("EXPLAIN " + query);
-      String explainPlan = QueryUtil.getExplainPlan(rs);
-      System.out.println("EXPLAIN PLAN: " + explainPlan);
-      PreparedStatement statement = conn.prepareStatement(query);
-      rs = statement.executeQuery();
-      int rowCount = 0;
-      while (rs.next()) {
-        rowCount++;
-      }
-      //      ResultSet rs = stmt.executeQuery("EXPLAIN " + query);
-      //      String explainPlan = QueryUtil.getExplainPlan(rs);
-      //      assertEquals(
-      //          "CLIENT PARALLEL 1-WAY RANGE SCAN OVER " + indexName
-      //              + " [~1,677,159,600,000] - [~1,677,159,000,000]\n    SERVER FILTER BY FIRST KEY ONLY",
-      //          explainPlan);
-    }
-  }
-
-
-
-  @Test
-  public void testKeyExplosion() throws Exception {
-    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
-    Connection conn = DriverManager.getConnection(getUrl(), props);
-    String testTable = generateUniqueName();
-    try {
-      // Create table with DESC ordering on NUMBER column
-      String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
-        + "ID CHAR(15) NOT NULL, "
-        + "NUMBER VARCHAR NOT NULL, "
-        + "ENTITY_ID VARCHAR NOT NULL, "
-        + "CREATED_BY VARCHAR, "
-        + "DATA VARCHAR "
-        + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
-      conn.createStatement().execute(createTableDDL);
-      
-      // Insert test data
-      String upsert = "UPSERT INTO " + testTable
-        + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
-      PreparedStatement ps = conn.prepareStatement(upsert);
-
-      // Insert first row
-      ps.setString(1, "id_1");
-      ps.setString(2, "20251012");
-      ps.setString(3, "entity_1");
-      ps.setString(4, "user1");
-      ps.setString(5, "data1");
-      ps.executeUpdate();
-
-      // Insert second row
-      ps.setString(1, "id_2");
-      ps.setString(2, "20250912");
-      ps.setString(3, "entity_2");
-      ps.setString(4, "user2");
-      ps.setString(5, "data2");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_3");
-      ps.setString(2, "20250913");
-      ps.setString(3, "entity_3");
-      ps.setString(4, "user3");
-      ps.setString(5, "data3");
-      ps.executeUpdate();
-
-//
-//      ps.setString(1, "id_1");
-//      ps.setString(2, "20250910");
-//      ps.setString(3, "entity_3");
-//      ps.setString(4, "user22");
-//      ps.setString(5, "data22");
-//      ps.executeUpdate();
-//
-//      ps.setString(1, "id_3");
-//      ps.setString(2, "20250911");
-//      ps.setString(3, "entity_11");
-//      ps.setString(4, "user21");
-//      ps.setString(5, "data21");
-//      ps.executeUpdate();
-
-      conn.commit();
-      
-      // Run the query with IN clause
-      String query = "SELECT * FROM " + testTable 
-        + " WHERE (ID, NUMBER, ENTITY_ID) IN (('id_1', '20251012', 'entity_1'), ('id_2', '20250912', 'entity_2'))";
-      PreparedStatement statement = conn.prepareStatement(query);
-      ResultSet rs = statement.executeQuery();
-
-      // Verify we get exactly 2 rows back
-      int rowCount = 0;
-      while (rs.next()) {
-        rowCount++;
-        String id = rs.getString("ID");
-        String number = rs.getString("NUMBER");
-        String entityId = rs.getString("ENTITY_ID");
-
-        // Verify the data matches what we inserted
-        if (rowCount == 1) {
-          assertEquals("id_1", id);
-          assertEquals("20251012", number);
-          assertEquals("entity_1", entityId);
-        } else if (rowCount == 2) {
-          assertEquals("id_2", id);
-          assertEquals("20250912", number);
-          assertEquals("entity_2", entityId);
-        }
-      }
-
-      assertEquals("Expected 2 rows", 2, rowCount);
-    } finally {
-      conn.close();
-    }
-  }
-
-  @Test
-  public void testKeyExplosionInteger() throws Exception {
-    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
-    Connection conn = DriverManager.getConnection(getUrl(), props);
-    String testTable = generateUniqueName();
-    // Create table with DESC ordering on NUMBER column
-    String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
-        + "ID CHAR(15) NOT NULL, "
-        + "NUMBER INTEGER NOT NULL, "
-        + "ENTITY_ID VARCHAR NOT NULL, "
-        + "CREATED_BY VARCHAR, "
-        + "DATA VARCHAR "
-        + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
-    conn.createStatement().execute(createTableDDL);
-
-    // Insert test data
-    String upsert = "UPSERT INTO " + testTable
-        + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
-    PreparedStatement ps = conn.prepareStatement(upsert);
-
-    // Insert first row
-    ps.setString(1, "id_1");
-    ps.setInt(2, 20251012);
-    ps.setString(3, "entity_1");
-    ps.setString(4, "user1");
-    ps.setString(5, "data1");
-    ps.executeUpdate();
-
-    // Insert second row
-    ps.setString(1, "id_2");
-    ps.setInt(2, 20250912);
-    ps.setString(3, "entity_2");
-    ps.setString(4, "user2");
-    ps.setString(5, "data2");
-    ps.executeUpdate();
-
-    ps.setString(1, "id_3");
-    ps.setInt(2, 20250910);
-    ps.setString(3, "entity_3");
-    ps.setString(4, "user3");
-    ps.setString(5, "data3");
-    ps.executeUpdate();
-    conn.commit();
-
-    // Run the query with IN clause
-    String query = "SELECT * FROM " + testTable
-        + " WHERE (ID, NUMBER, ENTITY_ID) IN (('id_1', 20251012, 'entity_1'), ('id_2', 20250912, 'entity_2'))";
-    PreparedStatement statement = conn.prepareStatement(query);
-    ResultSet rs = statement.executeQuery();
-
-    // Verify we get exactly 2 rows back
-    int rowCount = 0;
-    while (rs.next()) {
-      rowCount++;
-      String id = rs.getString("ID");
-      String number = rs.getString("NUMBER");
-      String entityId = rs.getString("ENTITY_ID");
-
-      // Verify the data matches what we inserted
-      if (rowCount == 1) {
-        assertEquals("id_1", id);
-        assertEquals("20251012", number);
-        assertEquals("entity_1", entityId);
-      } else if (rowCount == 2) {
-        assertEquals("id_2", id);
-        assertEquals("20250912", number);
-        assertEquals("entity_2", entityId);
-      }
-    }
-
-    assertEquals("Expected 2 rows", 2, rowCount);
-  }
-
-
-  @Test
-  public void testExplosionIntegerIndex() throws Exception {
-    String tableName = generateUniqueName();
-    String indexName = generateUniqueName();
-    System.out.println(tableName);
-    System.out.println(indexName);
-    try (Connection conn = DriverManager.getConnection(getUrl());
-        Statement stmt = conn.createStatement()) {
-      stmt.execute("create table " + tableName + " (id varchar primary key, ts integer)");
-      stmt.execute("create index " + indexName + " on " + tableName + "(ts desc)");
-
-      // Insert test data
-      String upsert = "UPSERT INTO " + tableName
-          + " (id, ts) VALUES (?, ?)";
-      PreparedStatement ps = conn.prepareStatement(upsert);
-
-      // Insert first row
-      ps.setString(1, "id_1");
-      ps.setInt(2, 20251012);
-      ps.executeUpdate();
-
-      // Insert second row
-      ps.setString(1, "id_2");
-      ps.setInt(2, 20250912);
-      ps.executeUpdate();
-
-      ps.setString(1, "id_3");
-      ps.setInt(2, 20250910);
-      ps.executeUpdate();
-      conn.commit();
-
-      String query = "select * from " + tableName
-          + " where ts > 20250911";
-      PreparedStatement statement = conn.prepareStatement(query);
-      ResultSet rs = statement.executeQuery();
-      String explainPlan = QueryUtil.getExplainPlan(rs);
-      System.out.println("EXPLAIN PLAN: " + explainPlan);
-      statement = conn.prepareStatement(query);
-      rs = statement.executeQuery();
-      int rowCount = 0;
-      while (rs.next()) {
-        rowCount++;
-      }
-      //      ResultSet rs = stmt.executeQuery("EXPLAIN " + query);
-      //      String explainPlan = QueryUtil.getExplainPlan(rs);
-      //      assertEquals(
-      //          "CLIENT PARALLEL 1-WAY RANGE SCAN OVER " + indexName
-      //              + " [~1,677,159,600,000] - [~1,677,159,000,000]\n    SERVER FILTER BY FIRST KEY ONLY",
-      //          explainPlan);
-    }
-  }
-
-  @Test
-  public void testKeyExplosionPartialCompositeIn() throws Exception {
-    // Variation 6: Partial composite key IN
-    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
-    Connection conn = DriverManager.getConnection(getUrl(), props);
-    String testTable = generateUniqueName();
-    try {
-      // Create table with DESC ordering on NUMBER column
-      String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
-          + "ID CHAR(15) NOT NULL, "
-          + "NUMBER VARCHAR NOT NULL, "
-          + "ENTITY_ID VARCHAR NOT NULL, "
-          + "CREATED_BY VARCHAR, "
-          + "DATA VARCHAR "
-          + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
-      conn.createStatement().execute(createTableDDL);
-
-      // Insert test data
-      String upsert = "UPSERT INTO " + testTable
-          + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
-      PreparedStatement ps = conn.prepareStatement(upsert);
-
-      ps.setString(1, "id_1");
-      ps.setString(2, "20251012");
-      ps.setString(3, "entity_1");
-      ps.setString(4, "user1");
-      ps.setString(5, "data1");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_2");
-      ps.setString(2, "20250912");
-      ps.setString(3, "entity_2");
-      ps.setString(4, "user2");
-      ps.setString(5, "data2");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_3");
-      ps.setString(2, "20250913");
-      ps.setString(3, "entity_3");
-      ps.setString(4, "user3");
-      ps.setString(5, "data3");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_1");
-      ps.setString(2, "20251012");
-      ps.setString(3, "entity_1b");
-      ps.setString(4, "user4");
-      ps.setString(5, "data4");
-      ps.executeUpdate();
-
-      conn.commit();
-
-      // Run query with partial composite key IN (first two columns only)
-      String query = "SELECT * FROM " + testTable
-          + " WHERE (ID, NUMBER) IN (('id_1', '20251012'), ('id_2', '20250912'))";
-      PreparedStatement statement = conn.prepareStatement(query);
-      ResultSet rs = statement.executeQuery();
-
-      // Should return 3 rows: id_1 with 2 ENTITY_IDs at same NUMBER, id_2 with 1 ENTITY_ID
-      int rowCount = 0;
-      while (rs.next()) {
-        rowCount++;
-        String id = rs.getString("ID");
-        String number = rs.getString("NUMBER");
-        // Verify the combinations
-        assertTrue("Unexpected row",
-            (id.equals("id_1") && number.equals("20251012")) ||
-            (id.equals("id_2") && number.equals("20250912")));
-      }
-
-      assertEquals("Expected 3 rows", 3, rowCount);
-    } finally {
-      conn.close();
-    }
-  }
-
-  @Test
-  public void testKeyExplosionMixedAndOr() throws Exception {
-    // Variation 8: Mixed AND/OR with ranges on DESC column
-    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
-    Connection conn = DriverManager.getConnection(getUrl(), props);
-    String testTable = generateUniqueName();
-    try {
-      // Create table with DESC ordering on NUMBER column
-      String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
-          + "ID CHAR(15) NOT NULL, "
-          + "NUMBER VARCHAR NOT NULL, "
-          + "ENTITY_ID VARCHAR NOT NULL, "
-          + "CREATED_BY VARCHAR, "
-          + "DATA VARCHAR "
-          + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
-      conn.createStatement().execute(createTableDDL);
-
-      // Insert test data
-      String upsert = "UPSERT INTO " + testTable
-          + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
-      PreparedStatement ps = conn.prepareStatement(upsert);
-
-      ps.setString(1, "id_1");
-      ps.setString(2, "20251012");
-      ps.setString(3, "entity_1");
-      ps.setString(4, "user1");
-      ps.setString(5, "data1");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_1");
-      ps.setString(2, "20250910");
-      ps.setString(3, "entity_1b");
-      ps.setString(4, "user2");
-      ps.setString(5, "data2");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_2");
-      ps.setString(2, "20251011");
-      ps.setString(3, "entity_2");
-      ps.setString(4, "user3");
-      ps.setString(5, "data3");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_3");
-      ps.setString(2, "20250913");
-      ps.setString(3, "entity_3");
-      ps.setString(4, "user4");
-      ps.setString(5, "data4");
-      ps.executeUpdate();
-
-      conn.commit();
-
-      // Run query with mixed AND/OR conditions
-      String query = "SELECT * FROM " + testTable
-          + " WHERE (ID = 'id_1' AND NUMBER > '20250911') OR (ID = 'id_2' AND NUMBER <= '20251012')";
-      PreparedStatement statement = conn.prepareStatement(query);
-      ResultSet rs = statement.executeQuery();
-
-      // Should return: id_1 with NUMBER=20251012, and id_2 with NUMBER=20251011
-      int rowCount = 0;
-      while (rs.next()) {
-        rowCount++;
-        String id = rs.getString("ID");
-        String number = rs.getString("NUMBER");
-        // Verify expected combinations
-        assertTrue("Unexpected row",
-            (id.equals("id_1") && number.equals("20251012")) ||
-            (id.equals("id_2") && number.equals("20251011")));
-      }
-
-      assertEquals("Expected 2 rows", 2, rowCount);
-    } finally {
-      conn.close();
-    }
-  }
-
-  @Test
-  public void testKeyExplosionPartialCompositeInInteger() throws Exception {
-    // Variation 6: Partial composite key IN (INTEGER type)
-    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
-    Connection conn = DriverManager.getConnection(getUrl(), props);
-    String testTable = generateUniqueName();
-    try {
-      // Create table with DESC ordering on NUMBER column
-      String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
-          + "ID CHAR(15) NOT NULL, "
-          + "NUMBER INTEGER NOT NULL, "
-          + "ENTITY_ID VARCHAR NOT NULL, "
-          + "CREATED_BY VARCHAR, "
-          + "DATA VARCHAR "
-          + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
-      conn.createStatement().execute(createTableDDL);
-
-      // Insert test data
-      String upsert = "UPSERT INTO " + testTable
-          + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
-      PreparedStatement ps = conn.prepareStatement(upsert);
-
-      ps.setString(1, "id_1");
-      ps.setInt(2, 20251012);
-      ps.setString(3, "entity_1");
-      ps.setString(4, "user1");
-      ps.setString(5, "data1");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_2");
-      ps.setInt(2, 20250912);
-      ps.setString(3, "entity_2");
-      ps.setString(4, "user2");
-      ps.setString(5, "data2");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_3");
-      ps.setInt(2, 20250913);
-      ps.setString(3, "entity_3");
-      ps.setString(4, "user3");
-      ps.setString(5, "data3");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_1");
-      ps.setInt(2, 20251012);
-      ps.setString(3, "entity_1b");
-      ps.setString(4, "user4");
-      ps.setString(5, "data4");
-      ps.executeUpdate();
-
-      conn.commit();
-
-      // Run query with partial composite key IN (first two columns only)
-      String query = "SELECT * FROM " + testTable
-          + " WHERE (ID, NUMBER) IN (('id_1', 20251012), ('id_2', 20250912))";
-      PreparedStatement statement = conn.prepareStatement(query);
-      ResultSet rs = statement.executeQuery();
-
-      // Should return 3 rows: id_1 with 2 ENTITY_IDs at same NUMBER, id_2 with 1 ENTITY_ID
-      int rowCount = 0;
-      while (rs.next()) {
-        rowCount++;
-        String id = rs.getString("ID");
-        int number = rs.getInt("NUMBER");
-        // Verify the combinations
-        assertTrue("Unexpected row",
-            (id.equals("id_1") && number == 20251012) ||
-            (id.equals("id_2") && number == 20250912));
-      }
-
-      assertEquals("Expected 3 rows", 3, rowCount);
-    } finally {
-      conn.close();
-    }
-  }
-
-  @Test
-  public void testKeyExplosionMixedAndOrInteger() throws Exception {
-    // Variation 8: Mixed AND/OR with ranges on DESC column (INTEGER type)
-    Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
-    Connection conn = DriverManager.getConnection(getUrl(), props);
-    String testTable = generateUniqueName();
-    try {
-      // Create table with DESC ordering on NUMBER column
-      String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " ("
-          + "ID CHAR(15) NOT NULL, "
-          + "NUMBER INTEGER NOT NULL, "
-          + "ENTITY_ID VARCHAR NOT NULL, "
-          + "CREATED_BY VARCHAR, "
-          + "DATA VARCHAR "
-          + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))";
-      conn.createStatement().execute(createTableDDL);
-
-      // Insert test data
-      String upsert = "UPSERT INTO " + testTable
-          + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)";
-      PreparedStatement ps = conn.prepareStatement(upsert);
-
-      ps.setString(1, "id_1");
-      ps.setInt(2, 20251012);
-      ps.setString(3, "entity_1");
-      ps.setString(4, "user1");
-      ps.setString(5, "data1");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_1");
-      ps.setInt(2, 20250910);
-      ps.setString(3, "entity_1b");
-      ps.setString(4, "user2");
-      ps.setString(5, "data2");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_2");
-      ps.setInt(2, 20251011);
-      ps.setString(3, "entity_2");
-      ps.setString(4, "user3");
-      ps.setString(5, "data3");
-      ps.executeUpdate();
-
-      ps.setString(1, "id_3");
-      ps.setInt(2, 20250913);
-      ps.setString(3, "entity_3");
-      ps.setString(4, "user4");
-      ps.setString(5, "data4");
-      ps.executeUpdate();
-
-      conn.commit();
-
-      // Run query with mixed AND/OR conditions
-      String query = "SELECT * FROM " + testTable
-          + " WHERE (ID = 'id_1' AND NUMBER > 20250911) OR (ID = 'id_2' AND NUMBER <= 20251012)";
-      PreparedStatement statement = conn.prepareStatement(query);
-      ResultSet rs = statement.executeQuery();
-
-      // Should return: id_1 with NUMBER=20251012, and id_2 with NUMBER=20251011
-      int rowCount = 0;
-      while (rs.next()) {
-        rowCount++;
-        String id = rs.getString("ID");
-        int number = rs.getInt("NUMBER");
-        // Verify expected combinations
-        assertTrue("Unexpected row",
-            (id.equals("id_1") && number == 20251012) ||
-            (id.equals("id_2") && number == 20251011));
-      }
-
-      assertEquals("Expected 2 rows", 2, rowCount);
-    } finally {
-      conn.close();
-    }
-  }
-
-
-
 }
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java
index 2820afe411b..6ecfc7ff011 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java
@@ -7202,8 +7202,8 @@ public void testReverseVarLengthRange6916() throws Exception {
       String openQry = "select * from " + tableName + " where k > 'a' and k<'aaa'";
       Scan openScan =
         getOptimizedQueryPlan(openQry, Collections.emptyList()).getContext().getScan();
-//      assertEquals("\\x9E\\x9E\\x9F\\x00", Bytes.toStringBinary(openScan.getStartRow()));
-//      assertEquals("\\x9E\\xFF", Bytes.toStringBinary(openScan.getStopRow()));
+      assertEquals("\\x9E\\x9E\\x9F\\x00", Bytes.toStringBinary(openScan.getStartRow()));
+      assertEquals("\\x9E\\xFF", Bytes.toStringBinary(openScan.getStopRow()));
       ResultSet rs = stmt.executeQuery("EXPLAIN " + openQry);
       String explainPlan = QueryUtil.getExplainPlan(rs);
       assertEquals(explainExpected, explainPlan);
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java
index f6c7d82716d..33f616f189e 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java
@@ -1280,8 +1280,8 @@ public void testLikeExpressionWithDescOrder() throws SQLException {
     byte[] invStopRow = new byte[startRow.length];
     SortOrder.invert(stopRow, 0, invStopRow, 0, stopRow.length);
 
-    assertArrayEquals(startRow, lowerRange);
-    assertArrayEquals(stopRow, upperRange);
+    assertArrayEquals(invStopRow, lowerRange);
+    assertArrayEquals(invStartRow, upperRange);
     assertFalse(lowerInclusive);
     assertTrue(upperInclusive);
 
@@ -3241,15 +3241,15 @@ public void testLastPkColumnIsVariableLengthAndDescBug5307() throws Exception {
         + "where (OBJ.OBJECT_ID, OBJ.OBJECT_VERSION) in (('obj1', '2222'),('obj2', '1111'),('obj3', '1111'))";
       queryPlan = TestUtil.getOptimizeQueryPlan(conn, sql);
       scan = queryPlan.getContext().getScan();
-//      FilterList filterList = (FilterList) scan.getFilter();
-//      assertTrue(filterList.getOperator() == Operator.MUST_PASS_ALL);
-//      assertEquals(filterList.getFilters().size(), 2);
-//      assertTrue(filterList.getFilters().get(0) instanceof SkipScanFilter);
-//      assertTrue(filterList.getFilters().get(1) instanceof RowKeyComparisonFilter);
-//      RowKeyComparisonFilter rowKeyComparisonFilter =
-//        (RowKeyComparisonFilter) filterList.getFilters().get(1);
-//      assertEquals(rowKeyComparisonFilter.toString(),
-//        "(OBJECT_ID, OBJECT_VERSION) IN (X'6f626a3100cdcdcdcd',X'6f626a3200cececece',X'6f626a3300cececece')");
+      FilterList filterList = (FilterList) scan.getFilter();
+      assertTrue(filterList.getOperator() == Operator.MUST_PASS_ALL);
+      assertEquals(filterList.getFilters().size(), 2);
+      assertTrue(filterList.getFilters().get(0) instanceof SkipScanFilter);
+      assertTrue(filterList.getFilters().get(1) instanceof RowKeyComparisonFilter);
+      RowKeyComparisonFilter rowKeyComparisonFilter =
+        (RowKeyComparisonFilter) filterList.getFilters().get(1);
+      assertEquals(rowKeyComparisonFilter.toString(),
+        "(OBJECT_ID, OBJECT_VERSION) IN (X'6f626a3100cdcdcdcd',X'6f626a3200cececece',X'6f626a3300cececece')");
 
       assertTrue(queryPlan.getContext().getScanRanges().isPointLookup());
       assertArrayEquals(startKey, scan.getStartRow());
diff --git a/pom.xml b/pom.xml
index ccae93600f6..3114d5e4a9d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -93,7 +93,7 @@
     <shadeSources>true</shadeSources>
 
     <!-- Dependency versions -->
-    <jackson-bom.version>2.14.1</jackson-bom.version>
+    <jackson-bom.version>2.18.4.1</jackson-bom.version>
     <netty-bom.version>4.1.126.Final</netty-bom.version>
     <antlr.version>3.5.2</antlr.version>
     <!-- Only used for tests with HBase 2.1-2.4 -->

From 16cc33f6de94e757dd861ef14e93bb0b74707b6a Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Sat, 9 May 2026 15:48:57 +0530
Subject: [PATCH 06/18] Repair phase implementation

---
 .../PhoenixSyncTableCheckpointOutputRow.java  |  26 +-
 .../PhoenixSyncTableInputFormat.java          |  16 +-
 .../mapreduce/PhoenixSyncTableMapper.java     | 430 ++++++++++++++++--
 .../PhoenixSyncTableOutputRepository.java     |  20 +-
 .../mapreduce/PhoenixSyncTableTool.java       |  38 +-
 .../end2end/PhoenixSyncTableToolIT.java       | 132 +++++-
 .../ConnectionQueryServicesMetricsIT.java     |   2 +
 .../PhoenixSyncTableOutputRepositoryTest.java |  42 +-
 8 files changed, 623 insertions(+), 83 deletions(-)

diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
index c848f2efaff..49cc9c4e4a0 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
@@ -37,7 +37,9 @@ public enum Type {
 
   public enum Status {
     VERIFIED,
-    MISMATCHED
+    MISMATCHED,
+    REPAIRED,
+    REPAIR_FAILED
   }
 
   private String tableName;
@@ -156,19 +158,27 @@ public long getTargetRowsProcessed() {
    * contract to ensure consistency between formatting (in mapper) and parsing (in tests).
    */
   public static class CounterFormatter {
-    private static final String FORMAT_CHUNK = "%s=%d,%s=%d";
+    private static final String FORMAT_CHUNK = "%s=%d,%s=%d,%s=%d,%s=%d";
     private static final String FORMAT_MAPPER = "%s=%d,%s=%d,%s=%d,%s=%d";
 
     /**
-     * Formats chunk counters as comma-separated key=value pairs.
-     * @param sourceRows Source rows processed
-     * @param targetRows Target rows processed
-     * @return Formatted string: "SOURCE_ROWS_PROCESSED=123,TARGET_ROWS_PROCESSED=456"
+     * Formats chunk counters as comma-separated key=value pairs. Always emits all four
+     * counters; for verify-only chunks (no repair) {@code rowsPut} and {@code rowsDeleted}
+     * are 0 so operators querying the checkpoint table see a uniform format.
+     * @param sourceRows  Source rows processed
+     * @param targetRows  Target rows processed
+     * @param rowsPut     Rows put to target during repair (0 if not repaired)
+     * @param rowsDeleted Rows deleted from target during repair (0 if not repaired)
+     * @return Formatted string: "SOURCE_ROWS_PROCESSED=...,TARGET_ROWS_PROCESSED=...,
+     *         ROWS_PUT_TO_TARGET=...,ROWS_DELETED_FROM_TARGET=..."
      */
-    public static String formatChunk(long sourceRows, long targetRows) {
+    public static String formatChunk(long sourceRows, long targetRows, long rowsPut,
+      long rowsDeleted) {
       return String.format(FORMAT_CHUNK,
         PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name(), sourceRows,
-        PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows);
+        PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows,
+        PhoenixSyncTableMapper.SyncCounters.ROWS_PUT_TO_TARGET.name(), rowsPut,
+        PhoenixSyncTableMapper.SyncCounters.ROWS_DELETED_FROM_TARGET.name(), rowsDeleted);
     }
 
     /**
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
index b2dd739c0c3..fcd61f07582 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
@@ -100,10 +100,11 @@ public List<InputSplit> getSplits(JobContext context) throws IOException, Interr
     }
     LOGGER.info("Total splits generated {} of table {} for PhoenixSyncTable ", allSplits.size(),
       tableName);
+    boolean isDryRun = PhoenixSyncTableTool.getPhoenixSyncTableDryRun(conf);
     List<KeyRange> completedRegions;
     try {
       completedRegions =
-        queryCompletedMapperRegions(conf, tableName, targetZkQuorum, fromTime, toTime);
+        queryCompletedMapperRegions(conf, tableName, targetZkQuorum, fromTime, toTime, isDryRun);
     } catch (SQLException e) {
       throw new RuntimeException(e);
     }
@@ -136,16 +137,21 @@ public List<InputSplit> getSplits(JobContext context) throws IOException, Interr
   }
 
   /**
-   * Queries Sync checkpoint table for completed mapper regions
+   * Queries Sync checkpoint table for completed mapper regions.
+   *
+   * @param isDryRun When false (repair mode), only VERIFIED and REPAIRED regions are filtered
+   *                 out as completed; MISMATCHED regions are re-entered as splits so their
+   *                 chunks can be repaired. When true (dry-run), all REGION rows regardless
+   *                 of status are treated as completed.
    */
   private List<KeyRange> queryCompletedMapperRegions(Configuration conf, String tableName,
-    String targetZkQuorum, Long fromTime, Long toTime) throws SQLException {
+    String targetZkQuorum, Long fromTime, Long toTime, boolean isDryRun) throws SQLException {
     String tenantId = PhoenixConfigurationUtil.getTenantId(conf);
     List<KeyRange> completedRegions = new ArrayList<>();
     try (Connection conn = ConnectionUtil.getInputConnection(conf)) {
       PhoenixSyncTableOutputRepository repository = new PhoenixSyncTableOutputRepository(conn);
-      List<PhoenixSyncTableCheckpointOutputRow> completedRows =
-        repository.getProcessedMapperRegions(tableName, targetZkQuorum, fromTime, toTime, tenantId);
+      List<PhoenixSyncTableCheckpointOutputRow> completedRows = repository.getProcessedMapperRegions(
+        tableName, targetZkQuorum, fromTime, toTime, tenantId, isDryRun);
       for (PhoenixSyncTableCheckpointOutputRow row : completedRows) {
         KeyRange keyRange = KeyRange.getKeyRange(row.getStartRowKey(), row.getEndRowKey());
         completedRegions.add(keyRange);
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
index 65e932ae78b..9c3349464b0 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
@@ -29,8 +29,11 @@
 import java.util.List;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
@@ -65,10 +68,16 @@ public class PhoenixSyncTableMapper
   public enum SyncCounters {
     MAPPERS_VERIFIED,
     MAPPERS_MISMATCHED,
+    MAPPERS_REPAIRED,
+    MAPPERS_REPAIR_FAILED,
     CHUNKS_VERIFIED,
     CHUNKS_MISMATCHED,
+    CHUNKS_REPAIRED,
+    CHUNKS_REPAIR_FAILED,
     SOURCE_ROWS_PROCESSED,
-    TARGET_ROWS_PROCESSED
+    TARGET_ROWS_PROCESSED,
+    ROWS_PUT_TO_TARGET,
+    ROWS_DELETED_FROM_TARGET
   }
 
   private String tableName;
@@ -80,6 +89,7 @@ public enum SyncCounters {
   private long chunkSizeBytes;
   private boolean isRawScan;
   private boolean isReadAllVersions;
+  private int repairBatchSize;
   private Configuration conf;
   private Connection sourceConnection;
   private Connection targetConnection;
@@ -103,6 +113,7 @@ protected void setup(Context context) throws InterruptedException {
       chunkSizeBytes = PhoenixSyncTableTool.getPhoenixSyncTableChunkSizeBytes(conf);
       isRawScan = PhoenixSyncTableTool.getPhoenixSyncTableRawScan(conf);
       isReadAllVersions = PhoenixSyncTableTool.getPhoenixSyncTableReadAllVersions(conf);
+      repairBatchSize = PhoenixSyncTableTool.getPhoenixSyncTableRepairBatchSize(conf);
       extractRegionBoundariesFromSplit(context);
       sourceConnection = ConnectionUtil.getInputConnection(conf);
       pTable = sourceConnection.unwrap(PhoenixConnection.class).getTable(tableName);
@@ -153,7 +164,6 @@ private void connectToTargetCluster() throws SQLException, IOException {
   private Connection createGlobalConnection(Configuration conf) throws SQLException {
     Configuration globalConf = new Configuration(conf);
     globalConf.unset(PhoenixConfigurationUtil.MAPREDUCE_TENANT_ID);
-    globalConf.unset(PhoenixRuntime.CURRENT_SCN_ATTRIB);
     return ConnectionUtil.getInputConnection(globalConf);
   }
 
@@ -176,7 +186,7 @@ protected void map(NullWritable key, DBInputFormat.NullDBWritable value, Context
           Bytes.toStringBinary(regionStart), Bytes.toStringBinary(regionEnd), tableName);
         processRegion(regionStart, regionEnd, context);
       }
-    } catch (SQLException e) {
+    } catch (SQLException | IOException e) {
       tryClosingResources();
       throw new RuntimeException("Error processing PhoenixSyncTableMapper", e);
     }
@@ -196,7 +206,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
     // Get processed chunks for this specific region
     List<PhoenixSyncTableCheckpointOutputRow> processedChunks =
       syncTableOutputRepository.getProcessedChunks(tableName, targetZkQuorum, fromTime, toTime,
-        tenantId, regionStart, regionEnd);
+        tenantId, regionStart, regionEnd, isDryRun);
 
     // Calculate unprocessed ranges within this region
     List<KeyRange> unprocessedRanges =
@@ -205,6 +215,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
     // Track counters before processing this region
     long verifiedBefore = context.getCounter(SyncCounters.CHUNKS_VERIFIED).getValue();
     long mismatchedBefore = context.getCounter(SyncCounters.CHUNKS_MISMATCHED).getValue();
+    long repairFailedBefore = context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
     long sourceRowsBefore = context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue();
     long targetRowsBefore = context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue();
 
@@ -221,6 +232,8 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
       context.getCounter(SyncCounters.CHUNKS_VERIFIED).getValue() - verifiedBefore;
     long mismatchedChunks =
       context.getCounter(SyncCounters.CHUNKS_MISMATCHED).getValue() - mismatchedBefore;
+    long repairFailedChunks =
+      context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue() - repairFailedBefore;
     long sourceRowsProcessed =
       context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue() - sourceRowsBefore;
     long targetRowsProcessed =
@@ -231,7 +244,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
       .formatMapper(verifiedChunks, mismatchedChunks, sourceRowsProcessed, targetRowsProcessed);
     if (sourceRowsProcessed > 0) {
       recordRegionCompletion(regionStart, regionEnd, regionStartTime, regionEndTime, verifiedChunks,
-        mismatchedChunks, counters, context);
+        mismatchedChunks, repairFailedChunks, counters, context);
     } else {
       LOGGER.info(
         "No rows pending to process. All region boundaries are covered for startKey:{}, endKey: {}",
@@ -242,39 +255,62 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
   /**
    * Records region completion by updating counters, recording checkpoint, and logging result.
    * Consolidates all region completion logic to eliminate duplication.
-   * @param regionStart      Region start key
-   * @param regionEnd        Region end key
-   * @param regionStartTime  Region processing start time
-   * @param regionEndTime    Region processing end time
-   * @param verifiedChunks   Number of verified chunks
-   * @param mismatchedChunks Number of mismatched chunks
-   * @param counters         Formatted counter string
-   * @param context          Mapper context
+   * @param regionStart        Region start key
+   * @param regionEnd          Region end key
+   * @param regionStartTime    Region processing start time
+   * @param regionEndTime      Region processing end time
+   * @param verifiedChunks     Number of verified chunks
+   * @param mismatchedChunks   Number of mismatched chunks
+   * @param repairFailedChunks Number of chunks whose repair threw an IOException; if > 0 the
+   *                           region rolls up to MISMATCHED (drift remains, re-run will retry)
+   * @param counters           Formatted counter string
+   * @param context            Mapper context
    */
   private void recordRegionCompletion(byte[] regionStart, byte[] regionEnd,
     Timestamp regionStartTime, Timestamp regionEndTime, long verifiedChunks, long mismatchedChunks,
-    String counters, Context context) throws SQLException {
+    long repairFailedChunks, String counters, Context context) throws SQLException {
 
-    boolean isVerified = mismatchedChunks == 0;
-    PhoenixSyncTableCheckpointOutputRow.Status status = isVerified
-      ? PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED
-      : PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED;
+    // Region rolls up its child chunks' outcomes into one of four statuses:
+    //   VERIFIED      — every chunk matched; no drift in this region.
+    //   MISMATCHED    — drift was detected but repair was not attempted (dry-run mode).
+    //   REPAIRED      — drift was detected and every chunk's repair succeeded.
+    //   REPAIR_FAILED — drift was detected, repair was attempted, and at least one chunk
+    //                   threw during merge-scan or flush. The failed chunks remain as
+    //                   CHUNK/REPAIR_FAILED rows; a re-run will re-attempt them via the
+    //                   Phase 2 STATUS-IN filter that excludes REPAIR_FAILED.
+    PhoenixSyncTableCheckpointOutputRow.Status status;
+    SyncCounters mapperCounter;
+    if (mismatchedChunks == 0) {
+      status = PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED;
+      mapperCounter = SyncCounters.MAPPERS_VERIFIED;
+    } else if (isDryRun) {
+      status = PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED;
+      mapperCounter = SyncCounters.MAPPERS_MISMATCHED;
+    } else if (repairFailedChunks == 0) {
+      status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED;
+      mapperCounter = SyncCounters.MAPPERS_REPAIRED;
+    } else {
+      status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED;
+      mapperCounter = SyncCounters.MAPPERS_REPAIR_FAILED;
+    }
 
-    context.getCounter(isVerified ? SyncCounters.MAPPERS_VERIFIED : SyncCounters.MAPPERS_MISMATCHED)
-      .increment(1);
+    context.getCounter(mapperCounter).increment(1);
 
     recordRegionCheckpoint(regionStart, regionEnd, status, regionStartTime, regionEndTime,
       counters);
 
     String logMessage = String.format(
-      "PhoenixSyncTable region [%s, %s) completed with %s: %d verified chunks, %d mismatched chunks",
+      "PhoenixSyncTable region [%s, %s) completed with %s: %d verified, %d mismatched, %d repair-failed",
       Bytes.toStringBinary(regionStart), Bytes.toStringBinary(regionEnd),
-      isVerified ? "verified" : "mismatch", verifiedChunks, mismatchedChunks);
+      status.name().toLowerCase(), verifiedChunks, mismatchedChunks, repairFailedChunks);
 
-    if (isVerified) {
-      LOGGER.info(logMessage);
-    } else {
+    if (
+      status == PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED
+        || status == PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED
+    ) {
       LOGGER.warn(logMessage);
+    } else {
+      LOGGER.info(logMessage);
     }
   }
 
@@ -354,10 +390,15 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd,
         if (nextSourceChunk == null) {
           isLastChunkOfRegion = true;
         }
-        ChunkInfo targetChunk = getTargetChunkWithSourceBoundary(targetConnection,
-          previousSourceChunk == null ? rangeStart : previousSourceChunk.endKey,
-          isLastChunkOfRegion ? rangeEnd : sourceChunk.endKey, isTargetStartKeyInclusive,
-          !isLastChunkOfRegion);
+        // Target scan boundary: covers extra-on-target rows that fall before the first
+        // source chunk, between consecutive source chunks, or after the last. Both verify
+        // and repair use the same range so repair sees the same cells the verifier hashed.
+        byte[] targetStart =
+          previousSourceChunk == null ? rangeStart : previousSourceChunk.endKey;
+        byte[] targetEnd = isLastChunkOfRegion ? rangeEnd : sourceChunk.endKey;
+        boolean targetEndInclusive = !isLastChunkOfRegion;
+        ChunkInfo targetChunk = getTargetChunkWithSourceBoundary(targetConnection, targetStart,
+          targetEnd, isTargetStartKeyInclusive, targetEndInclusive);
         context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).increment(sourceChunk.rowCount);
         context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).increment(targetChunk.rowCount);
         boolean matched = MessageDigest.isEqual(sourceChunk.hash, targetChunk.hash);
@@ -367,22 +408,25 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd,
               + "isTargetEndKeyInclusive: {}, isFirstChunkOfRegion: {}, isLastChunkOfRegion: {}."
               + "Chunk comparison source {}, {}. Key range passed to target chunk: {}, {}."
               + "target chunk returned {}, {}: source={} rows, target={} rows, matched={}",
-            isSourceStartKeyInclusive, isTargetStartKeyInclusive, !isLastChunkOfRegion,
+            isSourceStartKeyInclusive, isTargetStartKeyInclusive, targetEndInclusive,
             previousSourceChunk == null, isLastChunkOfRegion,
             Bytes.toStringBinary(sourceChunk.startKey), Bytes.toStringBinary(sourceChunk.endKey),
-            Bytes.toStringBinary(
-              previousSourceChunk == null ? rangeStart : previousSourceChunk.endKey),
-            Bytes.toStringBinary(isLastChunkOfRegion ? rangeEnd : sourceChunk.endKey),
+            Bytes.toStringBinary(targetStart), Bytes.toStringBinary(targetEnd),
             Bytes.toStringBinary(targetChunk.startKey), Bytes.toStringBinary(targetChunk.endKey),
             sourceChunk.rowCount, targetChunk.rowCount, matched);
         }
         sourceChunk.executionEndTime = new Timestamp(System.currentTimeMillis());
         String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter
-          .formatChunk(sourceChunk.rowCount, targetChunk.rowCount);
+          .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L);
         if (matched) {
           handleVerifiedChunk(sourceChunk, context, counters);
         } else {
           handleMismatchedChunk(sourceChunk, context, counters);
+          if (!isDryRun) {
+            repairChunk(sourceChunk.startKey, sourceChunk.endKey, targetStart, targetEnd,
+              isTargetStartKeyInclusive, targetEndInclusive, sourceChunk.rowCount,
+              targetChunk.rowCount, sourceChunk.executionStartTime, context);
+          }
         }
         previousSourceChunk = sourceChunk;
         sourceChunk = nextSourceChunk;
@@ -556,11 +600,13 @@ private void recordChunkCheckpoint(ChunkInfo sourceChunk,
   }
 
   /**
-   * Creates an HBase scan for a chunk range. Can be configured to use raw scan mode and read all
-   * cell versions based on command-line options.
+   * Builds the common Scan shape used by both verification and repair: same key range,
+   * inclusivity, time window, raw-scan, and all-versions semantics. Callers layer on their
+   * own caching, limits, and coprocessor attributes. Keeping the base shared guarantees that
+   * the cells visited by repair are exactly the cells the verifier hashed.
    */
-  private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
-    boolean isEndKeyInclusive, boolean isTargetScan) throws IOException {
+  private Scan createBaseScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
+    boolean isEndKeyInclusive) throws IOException {
     Scan scan = new Scan();
     scan.withStartRow(startKey, isStartKeyInclusive);
     scan.withStopRow(endKey, isEndKeyInclusive);
@@ -570,6 +616,16 @@ private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyI
     }
     scan.setCacheBlocks(false);
     scan.setTimeRange(fromTime, toTime);
+    return scan;
+  }
+
+  /**
+   * Creates an HBase scan for a chunk range. Can be configured to use raw scan mode and read all
+   * cell versions based on command-line options.
+   */
+  private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
+    boolean isEndKeyInclusive, boolean isTargetScan) throws IOException {
+    Scan scan = createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive);
     // Set limit and caching to 1 for sequential partial digest retrieval from target.
     // Enables digest continuation: each target chunk's digest feeds into the next until scanning
     // completes
@@ -691,6 +747,304 @@ boolean shouldStartKeyBeInclusive(byte[] mapperRegionStart,
     return Bytes.compareTo(processedChunks.get(0).getStartRowKey(), mapperRegionStart) > 0;
   }
 
+  /**
+   * Builds a row-level HBase scan for repair. Differs from {@link #createChunkScan} in that it
+   * does NOT set {@code SYNC_TABLE_CHUNK_FORMATION} or {@code SYNC_TABLE_CHUNK_SIZE_BYTES}, so
+   * the scanner returns actual {@link Result} rows rather than coprocessor chunk metadata.
+   * Shares the {@link #createBaseScan} core (time range, raw-scan, all-versions, inclusivity)
+   * with verification so the cells visited here are the same cells that produced the chunk
+   * hash. Adds bulk caching plus Phoenix TTL / {@code IS_STRICT_TTL} attributes.
+   */
+  private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
+    boolean isEndKeyInclusive, PhoenixConnection phoenixConn) throws IOException, SQLException {
+    Scan scan = createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive);
+    scan.setCaching(1000);
+    ScanUtil.setScanAttributesForPhoenixTTL(scan, pTable, phoenixConn);
+    scan.setAttribute(BaseScannerRegionObserverConstants.IS_STRICT_TTL, TRUE_BYTES);
+    return scan;
+  }
+
+  /**
+   * Returns true if both Results carry identical cell arrays. Walks the two arrays in
+   * lock-step using {@link CellComparator} for coordinate ordering and {@link CellUtil#matchingValue}
+   * for value equality (CellComparator does not compare values). Both scanners use the same
+   * configuration on byte-ordered storage, so cells come back in canonical KV order.
+   */
+  private boolean rowCellsEqual(Result sourceResult, Result targetResult) {
+    Cell[] sourceCells = sourceResult.rawCells();
+    Cell[] targetCells = targetResult.rawCells();
+    if (sourceCells.length != targetCells.length) {
+      return false;
+    }
+    CellComparator comparator = CellComparator.getInstance();
+    for (int i = 0; i < sourceCells.length; i++) {
+      if (
+        comparator.compare(sourceCells[i], targetCells[i]) != 0
+          || !CellUtil.matchingValue(sourceCells[i], targetCells[i])
+      ) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Builds a Put preserving the original cell timestamps from the source row. Used when an
+   * entire row is missing on target.
+   */
+  private Put buildPutFromResult(Result result) throws IOException {
+    Put put = new Put(result.getRow());
+    for (Cell cell : result.rawCells()) {
+      put.add(cell);
+    }
+    return put;
+  }
+
+  /**
+   * Builds a Delete for every cell in the target row. The repair scan's time range
+   * {@code [fromTime, toTime]} guarantees the cells we read here are within the sync window;
+   * cells outside the window are never read, therefore never deleted.
+   */
+  private Delete buildDeleteFromResult(Result result) {
+    Delete delete = new Delete(result.getRow());
+    for (Cell cell : result.rawCells()) {
+      delete.addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell),
+        cell.getTimestamp());
+    }
+    return delete;
+  }
+
+  /**
+   * Computes cell-level diffs for a row that exists on both clusters but with a different
+   * cell set. Cells present in source but missing from target are added to a single Put for
+   * the row; cells present in target but missing from source are added to a single Delete.
+   */
+  private void addRepairMutations(Result sourceResult, Result targetResult,
+    List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
+    byte[] rowKey = sourceResult.getRow();
+    Cell[] sourceCells = sourceResult.rawCells();
+    Cell[] targetCells = targetResult.rawCells();
+    CellComparator comparator = CellComparator.getInstance();
+
+    Put put = null;
+    Delete delete = null;
+
+    int sourceIdx = 0;
+    int targetIdx = 0;
+    while (sourceIdx < sourceCells.length && targetIdx < targetCells.length) {
+      Cell sourceCell = sourceCells[sourceIdx];
+      Cell targetCell = targetCells[targetIdx];
+      int cmp = comparator.compare(sourceCell, targetCell);
+      if (cmp == 0) {
+        // Same coordinates (family/qualifier/timestamp/type) on both sides. CellComparator
+        // does NOT compare values, so check value separately. If values differ, Put the
+        // source cell — it overwrites the target cell at the same timestamp.
+        if (!CellUtil.matchingValue(sourceCell, targetCell)) {
+          if (put == null) {
+            put = new Put(rowKey);
+          }
+          put.add(sourceCell);
+        }
+        sourceIdx++;
+        targetIdx++;
+      } else if (cmp < 0) {
+        // Source-only cell, Put on target.
+        if (put == null) {
+          put = new Put(rowKey);
+        }
+        put.add(sourceCell);
+        sourceIdx++;
+      } else {
+        // Target-only cell, Delete from target.
+        if (delete == null) {
+          delete = new Delete(rowKey);
+        }
+        delete.addColumn(CellUtil.cloneFamily(targetCell), CellUtil.cloneQualifier(targetCell),
+          targetCell.getTimestamp());
+        targetIdx++;
+      }
+    }
+    // Tail: any remaining source cells are source-only.
+    while (sourceIdx < sourceCells.length) {
+      if (put == null) {
+        put = new Put(rowKey);
+      }
+      put.add(sourceCells[sourceIdx++]);
+    }
+    // Tail: any remaining target cells are target-only.
+    while (targetIdx < targetCells.length) {
+      Cell targetCell = targetCells[targetIdx++];
+      if (delete == null) {
+        delete = new Delete(rowKey);
+      }
+      delete.addColumn(CellUtil.cloneFamily(targetCell), CellUtil.cloneQualifier(targetCell),
+        targetCell.getTimestamp());
+    }
+
+    if (put != null) {
+      pendingPuts.add(put);
+    }
+    if (delete != null) {
+      pendingDeletes.add(delete);
+    }
+  }
+
+  /**
+   * Flushes the accumulated Put and Delete batches to the target HTable and clears both
+   * lists. Called every {@code repairBatchSize} rows and once more at the end of a chunk.
+   */
+  private void flushRepairMutations(Table targetHTable, List<Put> puts, List<Delete> deletes)
+    throws IOException {
+    if (!puts.isEmpty()) {
+      targetHTable.put(puts);
+      puts.clear();
+    }
+    if (!deletes.isEmpty()) {
+      targetHTable.delete(deletes);
+      deletes.clear();
+    }
+  }
+
+  /**
+   * Performs row-level repair for a mismatched chunk by merge-scanning source and target
+   * cluster data and applying targeted mutations to target. The two scan ranges may differ:
+   * the verifier reads target over a wider range than source (covers extra-on-target rows
+   * that fall between consecutive source chunks); repair must mirror the same boundaries so
+   * those extras are visible here as {@code cmp > 0} rows and get deleted.
+   *
+   * Merge-scan contract: both scanners return rows in ascending key order (HBase guarantee).
+   *   cmp == 0 (same row): compare cells; repair if different.
+   *   cmp <  0 (source-only): Put all source cells.
+   *   cmp >  0 (target-only): Delete target cells within [fromTime, toTime].
+   *
+   * Cells outside [fromTime, toTime] are never read (scan time range), so never mutated.
+   *
+   * Only called when isDryRun == false.
+   *
+   * @param sourceStart           Source chunk start key (also the checkpoint PK) — inclusive
+   * @param sourceEnd             Source chunk end key (also the checkpoint PK) — inclusive
+   * @param targetStart           Target scan start (matches verifier-side boundary)
+   * @param targetEnd             Target scan end (matches verifier-side boundary)
+   * @param targetStartInclusive  Inclusivity of target scan start — matches verify side
+   * @param targetEndInclusive    Inclusivity of target scan end — matches verify side
+   * @param verifyStartTime       When the verify pass began for this chunk; reused as the
+   *                              REPAIRED row's START_TIME so the row spans the full
+   *                              verify+repair lifecycle that overwrites the MISMATCHED row.
+   */
+  private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStart,
+    byte[] targetEnd, boolean targetStartInclusive, boolean targetEndInclusive,
+    long verifySourceRows, long verifyTargetRows, Timestamp verifyStartTime, Context context)
+    throws IOException, SQLException {
+    long rowsPut = 0;
+    long rowsDeleted = 0;
+
+    LOGGER.info(
+      "Starting repair for chunk source=[{}, {}] target=[{}{}, {}{} on table {}",
+      Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd),
+      targetStartInclusive ? "[" : "(", Bytes.toStringBinary(targetStart),
+      Bytes.toStringBinary(targetEnd), targetEndInclusive ? "]" : ")", tableName);
+
+    PhoenixConnection sourcePhoenixConn = sourceConnection.unwrap(PhoenixConnection.class);
+    PhoenixConnection targetPhoenixConn = targetConnection.unwrap(PhoenixConnection.class);
+
+    Scan sourceScan = createRepairScan(sourceStart, sourceEnd, true, true, sourcePhoenixConn);
+    Scan targetScan = createRepairScan(targetStart, targetEnd, targetStartInclusive,
+      targetEndInclusive, targetPhoenixConn);
+
+    List<Put> pendingPuts = new ArrayList<>();
+    List<Delete> pendingDeletes = new ArrayList<>();
+
+    try (Table sourceHTable = sourcePhoenixConn.getQueryServices().getTable(physicalTableName);
+      Table targetHTable = targetPhoenixConn.getQueryServices().getTable(physicalTableName);
+      ResultScanner sourceScanner = sourceHTable.getScanner(sourceScan);
+      ResultScanner targetScanner = targetHTable.getScanner(targetScan)) {
+
+      Result sourceResult = sourceScanner.next();
+      Result targetResult = targetScanner.next();
+
+      while (sourceResult != null || targetResult != null) {
+        int cmp;
+        if (sourceResult == null) {
+          cmp = 1;
+        } else if (targetResult == null) {
+          cmp = -1;
+        } else {
+          cmp = Bytes.compareTo(sourceResult.getRow(), targetResult.getRow());
+        }
+
+        if (cmp == 0) {
+          // Same row key on both clusters — diff at cell level and repair only if cells differ.
+          if (!rowCellsEqual(sourceResult, targetResult)) {
+            addRepairMutations(sourceResult, targetResult, pendingPuts, pendingDeletes);
+            rowsPut++;
+          }
+          sourceResult = sourceScanner.next();
+          targetResult = targetScanner.next();
+        } else if (cmp < 0) {
+          // Source-only row (target is missing it, or target has advanced past) — Put it on target.
+          pendingPuts.add(buildPutFromResult(sourceResult));
+          rowsPut++;
+          sourceResult = sourceScanner.next();
+        } else {
+          // Target-only row (source has no such row in this range) — Delete it from target.
+          pendingDeletes.add(buildDeleteFromResult(targetResult));
+          rowsDeleted++;
+          targetResult = targetScanner.next();
+        }
+
+        if (pendingPuts.size() + pendingDeletes.size() >= repairBatchSize) {
+          flushRepairMutations(targetHTable, pendingPuts, pendingDeletes);
+        }
+        context.progress();
+      }
+      flushRepairMutations(targetHTable, pendingPuts, pendingDeletes);
+    } catch (IOException e) {
+      // Per-chunk fault isolation. Mark this chunk REPAIR_FAILED, increment the counter,
+      // and return so the mapper continues with the next chunk. Phase 2's STATUS filter
+      // (VERIFIED, REPAIRED) excludes REPAIR_FAILED, so a re-run will re-attempt this chunk
+      // as an unprocessed gap.
+      LOGGER.error("Repair failed for chunk source=[{}, {}] on table {}: {}",
+        Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), tableName,
+        e.getMessage(), e);
+      context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).increment(1);
+
+      Timestamp failedAt = new Timestamp(System.currentTimeMillis());
+      // Capture partial progress in the COUNTERS column for triage.
+      String failedCounters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter
+        .formatChunk(verifySourceRows, verifyTargetRows, rowsPut, rowsDeleted);
+      syncTableOutputRepository
+        .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+          .setTableName(tableName).setTargetCluster(targetZkQuorum)
+          .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime)
+          .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun)
+          .setStartRowKey(sourceStart).setEndRowKey(sourceEnd)
+          .setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)
+          .setExecutionStartTime(verifyStartTime).setExecutionEndTime(failedAt)
+          .setCounters(failedCounters).build());
+      return;
+    }
+
+    context.getCounter(SyncCounters.ROWS_PUT_TO_TARGET).increment(rowsPut);
+    context.getCounter(SyncCounters.ROWS_DELETED_FROM_TARGET).increment(rowsDeleted);
+    context.getCounter(SyncCounters.CHUNKS_REPAIRED).increment(1);
+
+    Timestamp repairEndTime = new Timestamp(System.currentTimeMillis());
+    String repairCounters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter
+      .formatChunk(verifySourceRows, verifyTargetRows, rowsPut, rowsDeleted);
+
+    syncTableOutputRepository
+      .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+        .setTableName(tableName).setTargetCluster(targetZkQuorum)
+        .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime)
+        .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun).setStartRowKey(sourceStart)
+        .setEndRowKey(sourceEnd).setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED)
+        .setExecutionStartTime(verifyStartTime).setExecutionEndTime(repairEndTime)
+        .setCounters(repairCounters).build());
+
+    LOGGER.info("Completed repair for chunk source=[{}, {}]: rowsPut={}, rowsDeleted={}",
+      Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), rowsPut, rowsDeleted);
+  }
+
   @Override
   protected void cleanup(Context context) throws IOException, InterruptedException {
     tryClosingResources();
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepository.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepository.java
index fddfea32ddd..29176683c2a 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepository.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepository.java
@@ -120,16 +120,18 @@ public void checkpointSyncTableResult(PhoenixSyncTableCheckpointOutputRow row)
    * @param fromTime      Start timestamp
    * @param toTime        End timestamp
    * @param tenantId      Tenant ID
+   * @param isDryRun      When false (repair mode)
    * @return List of completed mapper regions
    */
   public List<PhoenixSyncTableCheckpointOutputRow> getProcessedMapperRegions(String tableName,
-    String targetCluster, Long fromTime, Long toTime, String tenantId) throws SQLException {
+    String targetCluster, Long fromTime, Long toTime, String tenantId, boolean isDryRun)
+    throws SQLException {
 
     StringBuilder queryBuilder = new StringBuilder();
     queryBuilder.append("SELECT START_ROW_KEY, END_ROW_KEY FROM ")
       .append(SYNC_TABLE_CHECKPOINT_TABLE_NAME)
       .append(" WHERE TABLE_NAME = ?  AND TARGET_CLUSTER = ?")
-      .append(" AND TYPE = ? AND FROM_TIME = ? AND TO_TIME = ? ");
+      .append(" AND TYPE = 'REGION' AND FROM_TIME = ? AND TO_TIME = ? ");
 
     // Conditionally build TENANT_ID clause based on whether tenantId is null
     if (tenantId == null) {
@@ -138,6 +140,11 @@ public List<PhoenixSyncTableCheckpointOutputRow> getProcessedMapperRegions(Strin
       queryBuilder.append(" AND TENANT_ID = ?");
     }
 
+    // In repair mode: only skip regions that are fully done (VERIFIED or REPAIRED).
+    if (!isDryRun) {
+      queryBuilder.append(" AND STATUS IN ('VERIFIED', 'REPAIRED')");
+    }
+
     queryBuilder.append(
       " ORDER BY TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY");
 
@@ -146,7 +153,6 @@ public List<PhoenixSyncTableCheckpointOutputRow> getProcessedMapperRegions(Strin
       int paramIndex = 1;
       ps.setString(paramIndex++, tableName);
       ps.setString(paramIndex++, targetCluster);
-      ps.setString(paramIndex++, Type.REGION.name());
       ps.setLong(paramIndex++, fromTime);
       ps.setLong(paramIndex++, toTime);
       // Only bind tenantId parameter if non-null
@@ -175,11 +181,12 @@ public List<PhoenixSyncTableCheckpointOutputRow> getProcessedMapperRegions(Strin
    * @param tenantId          Tenant ID
    * @param mapperRegionStart Mapper region start key
    * @param mapperRegionEnd   Mapper region end key
+   * @param isDryRun          When false (repair mode)
    * @return List of processed chunks in the region
    */
   public List<PhoenixSyncTableCheckpointOutputRow> getProcessedChunks(String tableName,
     String targetCluster, Long fromTime, Long toTime, String tenantId, byte[] mapperRegionStart,
-    byte[] mapperRegionEnd) throws SQLException {
+    byte[] mapperRegionEnd, boolean isDryRun) throws SQLException {
     StringBuilder queryBuilder = new StringBuilder();
     queryBuilder.append("SELECT START_ROW_KEY, END_ROW_KEY FROM " + SYNC_TABLE_CHECKPOINT_TABLE_NAME
       + " WHERE TABLE_NAME = ? AND TARGET_CLUSTER = ? "
@@ -192,6 +199,11 @@ public List<PhoenixSyncTableCheckpointOutputRow> getProcessedChunks(String table
       queryBuilder.append(" AND TENANT_ID = ?");
     }
 
+    // In repair mode: only skip chunks that are fully done (VERIFIED or REPAIRED).
+    if (!isDryRun) {
+      queryBuilder.append(" AND STATUS IN ('VERIFIED', 'REPAIRED')");
+    }
+
     // Check if mapper region boundaries are non-empty (i.e., NOT first/last regions)
     // Only add boundary conditions for non-empty boundaries
     boolean hasEndBoundary = mapperRegionEnd != null && mapperRegionEnd.length > 0;
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
index 80eacde25e7..7cb3f034138 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
@@ -134,6 +134,9 @@ public class PhoenixSyncTableTool extends Configured implements Tool {
   public static final String PHOENIX_SYNC_TABLE_RAW_SCAN = "phoenix.sync.table.raw.scan";
   public static final String PHOENIX_SYNC_TABLE_READ_ALL_VERSIONS =
     "phoenix.sync.table.read.all.versions";
+  public static final String PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE =
+    "phoenix.sync.table.repair.batch.size";
+  public static final int DEFAULT_PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE = 1000;
 
   private String schemaName;
   private String tableName;
@@ -233,7 +236,6 @@ private void setPhoenixSyncTableToolConfiguration(Configuration configuration) {
     if (tenantId != null) {
       PhoenixConfigurationUtil.setTenantId(configuration, tenantId);
     }
-    PhoenixConfigurationUtil.setCurrentScnValue(configuration, endTime);
     configuration
       .setBooleanIfUnset(PhoenixConfigurationUtil.MAPREDUCE_RANDOMIZE_MAPPER_EXECUTION_ORDER, true);
   }
@@ -411,20 +413,35 @@ private boolean submitPhoenixSyncTableJob() throws Exception {
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_VERIFIED).getValue();
       long mismatchedMappers =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_MISMATCHED).getValue();
+      long repairedMappers =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIRED).getValue();
+      long repairFailedMappers = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIR_FAILED).getValue();
       long chunksVerified =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_VERIFIED).getValue();
       long chunksMismatched =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_MISMATCHED).getValue();
+      long chunksRepaired =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIRED).getValue();
+      long chunksRepairFailed = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
       long sourceRowsProcessed =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED).getValue();
       long targetRowsProcessed =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED).getValue();
+      long rowsPutToTarget =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_PUT_TO_TARGET).getValue();
+      long rowsDeletedFromTarget = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_DELETED_FROM_TARGET).getValue();
       LOGGER.info(
         "PhoenixSyncTable job completed, gathered counters are \n Task Created: {}, \n Verified Mappers: {}, \n"
-          + "Mismatched Mappers: {}, \n Chunks Verified: {}, \n"
-          + "Chunks Mismatched: {}, \n Source Rows Processed: {}, \n Target Rows Processed: {}",
-        taskCreated, verifiedMappers, mismatchedMappers, chunksVerified, chunksMismatched,
-        sourceRowsProcessed, targetRowsProcessed);
+          + "Mismatched Mappers: {}, \n Repaired Mappers: {}, \n Repair Failed Mappers: {}, \n"
+          + "Chunks Verified: {}, \n Chunks Mismatched: {}, \n Chunks Repaired: {}, \n"
+          + "Chunks Repair Failed: {}, \n Source Rows Processed: {}, \n Target Rows Processed: {}, \n"
+          + "Rows Put To Target: {}, \n Rows Deleted From Target: {}",
+        taskCreated, verifiedMappers, mismatchedMappers, repairedMappers, repairFailedMappers,
+        chunksVerified, chunksMismatched, chunksRepaired, chunksRepairFailed, sourceRowsProcessed,
+        targetRowsProcessed, rowsPutToTarget, rowsDeletedFromTarget);
     } else {
       LOGGER.warn("Unable to retrieve job counters for table {} - job may have failed "
         + "during initialization", qTable);
@@ -558,6 +575,17 @@ public static boolean getPhoenixSyncTableSplitCoalescing(Configuration conf) {
       DEFAULT_PHOENIX_SYNC_TABLE_SPLIT_COALESCING);
   }
 
+  public static void setPhoenixSyncTableRepairBatchSize(Configuration conf, int batchSize) {
+    Preconditions.checkNotNull(conf);
+    conf.setInt(PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE, batchSize);
+  }
+
+  public static int getPhoenixSyncTableRepairBatchSize(Configuration conf) {
+    Preconditions.checkNotNull(conf);
+    return conf.getInt(PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE,
+      DEFAULT_PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE);
+  }
+
   public Job getJob() {
     return job;
   }
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
index 0661973dbe7..57321b4ce9f 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
@@ -55,6 +55,7 @@
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskCounter;
+import org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants;
 import org.apache.phoenix.jdbc.HighAvailabilityTestingUtility.HBaseTestingUtilityPair;
 import org.apache.phoenix.jdbc.PhoenixConnection;
 import org.apache.phoenix.jdbc.PhoenixDriver;
@@ -155,7 +156,17 @@ public void testSyncTableValidateWithDataDifference() throws Exception {
 
     introduceAndVerifyTargetDifferences(uniqueTableName);
 
-    Job job = runSyncToolWithLargeChunks(uniqueTableName);
+    // Pin the time window so both runs share the same checkpoint PK
+    // (TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY).
+    // Without this, runSyncToolWithLargeChunks would assign a fresh System.currentTimeMillis()
+    // to --to-time on each call and the repair pass would append new rows instead of
+    // overwriting the dry-run pass's MISMATCHED rows.
+    String fromTime = "0";
+    String toTime = String.valueOf(System.currentTimeMillis());
+
+    // First run: --dry-run, only detect mismatches.
+    Job job = runSyncToolWithLargeChunks(uniqueTableName, "--dry-run", "--from-time", fromTime,
+      "--to-time", toTime);
     SyncCountersResult counters = getSyncCounters(job);
 
     validateSyncCounters(counters, 10, 10, 1, 3);
@@ -166,6 +177,60 @@ public void testSyncTableValidateWithDataDifference() throws Exception {
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
     validateCheckpointEntries(checkpointEntries, uniqueTableName, targetZkQuorum, 10, 10, 1, 3, 4,
       3, null);
+
+    // Second run: no --dry-run, repair the mismatched chunks. Same time window so the
+    // PK matches and CHUNK/REPAIRED overwrites CHUNK/MISMATCHED in place.
+    Job repairJob =
+      runSyncToolWithLargeChunks(uniqueTableName, "--from-time", fromTime, "--to-time", toTime);
+    Counters repairCounters = repairJob.getCounters();
+
+    // The repair re-verifies the previously MISMATCHED chunks (excluded by Phase 2 filter)
+    // and now repairs them, producing CHUNK/REPAIRED + REGION/REPAIRED checkpoint rows.
+    long chunksRepaired = repairCounters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue();
+    long chunksRepairFailed =
+      repairCounters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
+    long mappersRepaired = repairCounters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue();
+    long mappersRepairFailed =
+      repairCounters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue();
+    long rowsPutToTarget = repairCounters.findCounter(SyncCounters.ROWS_PUT_TO_TARGET).getValue();
+    long rowsDeletedFromTarget =
+      repairCounters.findCounter(SyncCounters.ROWS_DELETED_FROM_TARGET).getValue();
+    assertEquals("All 3 mismatched chunks should be repaired", 3, chunksRepaired);
+    assertEquals("No chunk repair should fail", 0, chunksRepairFailed);
+    assertEquals("All 3 mismatched mapper regions should be repaired", 3, mappersRepaired);
+    assertEquals("No mapper repair should fail", 0, mappersRepairFailed);
+    assertEquals("Three rows on target should be Put back to source values", 3, rowsPutToTarget);
+    assertEquals("No rows should be deleted from target (only value diffs, not extras)", 0,
+      rowsDeletedFromTarget);
+
+    // Target rows should now match source.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+
+    // Checkpoint table now has 3 CHUNK/REPAIRED + 3 REGION/REPAIRED in addition to the
+    // VERIFIED rows. Phase 2's STATUS-IN filter caused the MISMATCHED rows to be re-entered
+    // as gaps and repair overwrote them in place.
+    List<PhoenixSyncTableCheckpointOutputRow> postRepairEntries =
+      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+    int chunkRepairedRows = 0;
+    int regionRepairedRows = 0;
+    int mismatchedRows = 0;
+    for (PhoenixSyncTableCheckpointOutputRow entry : postRepairEntries) {
+      if (PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED.equals(entry.getStatus())) {
+        if (PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())) {
+          chunkRepairedRows++;
+        } else if (PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())) {
+          regionRepairedRows++;
+        }
+      } else if (
+        PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())
+      ) {
+        mismatchedRows++;
+      }
+    }
+    assertEquals("Expected 3 CHUNK/REPAIRED rows after repair", 3, chunkRepairedRows);
+    assertEquals("Expected 3 REGION/REPAIRED rows after repair", 3, regionRepairedRows);
+    assertEquals("MISMATCHED rows should be overwritten in place — none should remain", 0,
+      mismatchedRows);
   }
 
   @Test
@@ -1771,6 +1836,69 @@ public void testSyncTableValidateWithSplitCoalescing() throws Exception {
       3, null);
   }
 
+  /**
+   * Verifies that the sync job completes successfully when {@code endTime} (--to-time) is older
+   * than {@code phoenix.max.lookback.age.seconds}.
+   *
+   * <p>Root cause without fix: {@link PhoenixSyncTableTool} sets {@code CURRENT_SCN_VALUE =
+   * endTime} in the MR job configuration. During split generation, {@code
+   * PhoenixInputFormat.getQueryPlan()} creates a Phoenix connection with that SCN. {@code
+   * QueryCompiler.verifySCN()} (client-side) then throws {@code ERROR 538} when {@code endTime} is
+   * older than {@code phoenix.max.lookback.age.seconds}.
+   *
+   * <p>Fix: {@link PhoenixSyncTableInputFormat#getQueryPlan} overrides the parent to strip {@code
+   * CURRENT_SCN_VALUE} before creating the query plan for split generation. With SCN absent, {@code
+   * verifySCN()} returns early (SCN == null), so no exception is thrown.
+   *
+   * <p>Data access correctness: The mapper uses raw HBase {@code Scan.setTimeRange(fromTime,
+   * toTime)}, which does NOT go through {@code QueryCompiler.compile()} or {@code verifySCN()}, so
+   * data within [fromTime, toTime] is always accessible regardless of max lookback age.
+   */
+  @Test
+  public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exception {
+    // Setup: create tables on both clusters and replicate 10 rows
+    createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName);
+    insertTestData(sourceConnection, uniqueTableName, 1, 10);
+    waitForReplication(targetConnection, uniqueTableName, 10);
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+
+    // Capture toTime BEFORE the lookback window will expire
+    long toTime = System.currentTimeMillis();
+
+    // Configure a short max lookback age (5 seconds) in the MR job configuration.
+    // QueryCompiler.verifySCN() reads PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY from
+    // conn.getQueryServices().getConfiguration(), which is the client-side MR conf.
+    long maxLookbackAgeSeconds = 5;
+    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
+    conf.setLong(BaseScannerRegionObserverConstants.PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY,
+      maxLookbackAgeSeconds);
+
+    // Wait until toTime is older than the lookback age.
+    // After this sleep: (now - maxLookbackAgeMillis) > toTime  → verifySCN would throw ERROR 538
+    Thread.sleep((maxLookbackAgeSeconds + 2) * 1000L);
+
+    // Run the sync tool with the now-stale toTime.
+    // Without PhoenixSyncTableInputFormat.getQueryPlan() override:
+    //   getSplits() → PhoenixInputFormat.getQueryPlan() sets SCN=toTime on Phoenix connection
+    //   → QueryCompiler.verifySCN() → ERROR 538 (toTime older than maxLookbackAge)
+    // With the fix:
+    //   getSplits() → overridden getQueryPlan() strips CURRENT_SCN_VALUE from conf copy
+    //   → verifySCN() sees scn == null → returns early → no exception thrown
+    // The mapper still uses raw HBase Scan.setTimeRange(0, toTime), bypassing verifySCN entirely,
+    // so all 10 rows within [0, toTime] are accessible and compared correctly.
+    Job job = runSyncToolWithChunkSize(uniqueTableName, 1, conf, "--from-time", "0", "--to-time",
+      String.valueOf(toTime));
+
+    assertTrue(
+      "Sync job should complete successfully even when endTime is older than maxLookbackAge",
+      job.isSuccessful());
+
+    // Verify the mapper processed all 10 rows via raw HBase scan (bypasses verifySCN)
+    SyncCountersResult counters = getSyncCounters(job);
+    validateSyncCounters(counters, 10, 10, 10, 0);
+    validateMapperCounters(counters, 4, 0);
+  }
+
   /**
    * Helper class to hold separated mapper and chunk entries.
    */
@@ -1857,7 +1985,7 @@ private List<PhoenixSyncTableCheckpointOutputRow> findChunksBelongingToMapper(Co
     PhoenixSyncTableCheckpointOutputRow mapper) throws SQLException {
     PhoenixSyncTableOutputRepository repository = new PhoenixSyncTableOutputRepository(conn);
     return repository.getProcessedChunks(tableName, targetCluster, fromTime, toTime, tenantId,
-      mapper.getStartRowKey(), mapper.getEndRowKey());
+      mapper.getStartRowKey(), mapper.getEndRowKey(), true);
   }
 
   /**
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
index 3b7fec50569..673b3f0cbc0 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
@@ -20,12 +20,14 @@
 import static org.apache.phoenix.hbase.index.IndexRegionObserver.PHOENIX_INDEX_CDC_CONSUMER_ENABLED;
 import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
 import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS;
 import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
 import static org.apache.phoenix.query.QueryServices.CLIENT_CONNECTION_MAX_ALLOWED_CONNECTIONS;
 import static org.apache.phoenix.query.QueryServices.CONNECTION_QUERY_SERVICE_METRICS_ENABLED;
 import static org.apache.phoenix.query.QueryServices.INTERNAL_CONNECTION_MAX_ALLOWED_CONNECTIONS;
 import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME;
 import static org.apache.phoenix.util.PhoenixRuntime.clearAllConnectionQueryServiceMetrics;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
index dfdaabecd27..2c2b05c2c6b 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
@@ -148,7 +148,7 @@ public void testCheckpointMapperRegionVerified() throws Exception {
       .setCounters("SOURCE_ROWS_PROCESSED=10,TARGET_ROWS_PROCESSED=10").build());
 
     List<PhoenixSyncTableCheckpointOutputRow> results =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true);
 
     assertEquals(1, results.size());
     assertArrayEquals(startKey, results.get(0).getStartRowKey());
@@ -171,7 +171,7 @@ public void testCheckpointChunkVerified() throws Exception {
       .build());
 
     List<PhoenixSyncTableCheckpointOutputRow> results = repository.getProcessedChunks(tableName,
-      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd);
+      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
 
     assertFalse("Should find chunk within mapper region", results.isEmpty());
   }
@@ -199,7 +199,7 @@ public void testCheckpointWithEmptyStartKey() throws Exception {
       .build());
 
     List<PhoenixSyncTableCheckpointOutputRow> results =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true);
 
     assertEquals(2, results.size());
     // Phoenix returns null for empty byte arrays in primary key columns
@@ -222,7 +222,7 @@ public void testCheckpointWithNullEndKey() throws Exception {
       .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build());
 
     List<PhoenixSyncTableCheckpointOutputRow> results =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true);
 
     assertEquals(1, results.size());
   }
@@ -348,7 +348,7 @@ public void testCheckpointUpsertBehavior() throws Exception {
 
     // Verify only one row exists with updated values
     List<PhoenixSyncTableCheckpointOutputRow> results =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true);
 
     assertEquals("Should have only one row after upsert", 1, results.size());
   }
@@ -358,7 +358,7 @@ public void testGetProcessedMapperRegionsEmpty() throws Exception {
     String tableName = generateUniqueName();
 
     List<PhoenixSyncTableCheckpointOutputRow> results =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true);
 
     assertEquals(0, results.size());
   }
@@ -386,7 +386,7 @@ public void testGetProcessedMapperRegionsBoth() throws Exception {
       .build());
 
     List<PhoenixSyncTableCheckpointOutputRow> results =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true);
 
     assertEquals(2, results.size());
     // Verify ORDER BY START_ROW_KEY: row1 should come before row50
@@ -416,7 +416,7 @@ public void testGetProcessedMapperRegionsFiltersChunks() throws Exception {
         .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build());
 
     List<PhoenixSyncTableCheckpointOutputRow> results =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true);
 
     assertEquals("Should only return REGION entries", 1, results.size());
   }
@@ -437,7 +437,7 @@ public void testGetProcessedMapperRegionsFiltersTimeRange() throws Exception {
 
     // Query with different time range
     List<PhoenixSyncTableCheckpointOutputRow> results =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 2000L, 3000L, null);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 2000L, 3000L, null, true);
 
     assertEquals("Should not find entry with different time range", 0, results.size());
   }
@@ -449,7 +449,7 @@ public void testGetProcessedChunksEmpty() throws Exception {
     byte[] mapperEnd = Bytes.toBytes("row100");
 
     List<PhoenixSyncTableCheckpointOutputRow> results = repository.getProcessedChunks(tableName,
-      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd);
+      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
 
     assertEquals(0, results.size());
   }
@@ -479,7 +479,7 @@ public void testGetProcessedChunksBothStatuses() throws Exception {
       .build());
 
     List<PhoenixSyncTableCheckpointOutputRow> results = repository.getProcessedChunks(tableName,
-      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd);
+      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
 
     assertEquals(2, results.size());
     // Verify ORDER BY START_ROW_KEY: row10 should come before row30
@@ -512,7 +512,7 @@ public void testGetProcessedChunksFiltersMapperRegions() throws Exception {
         .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build());
 
     List<PhoenixSyncTableCheckpointOutputRow> results = repository.getProcessedChunks(tableName,
-      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd);
+      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
 
     assertEquals("Should only return CHUNK entries", 1, results.size());
   }
@@ -532,7 +532,7 @@ public void testGetProcessedChunksWithNoBoundaries() throws Exception {
 
     // Query with no boundaries (entire table)
     List<PhoenixSyncTableCheckpointOutputRow> results =
-      repository.getProcessedChunks(tableName, targetCluster, 0L, 1000L, null, null, null);
+      repository.getProcessedChunks(tableName, targetCluster, 0L, 1000L, null, null, null, true);
 
     assertEquals(1, results.size());
   }
@@ -575,7 +575,7 @@ public void testGetProcessedChunksWithOnlyEndBoundary() throws Exception {
     byte[] mapperEnd = Bytes.toBytes("row50");
 
     List<PhoenixSyncTableCheckpointOutputRow> results = repository.getProcessedChunks(tableName,
-      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd);
+      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
 
     // Should return chunk1 and chunk2 (both have startKey <= row50)
     // Should NOT return chunk3 (startKey row60 > row50)
@@ -622,7 +622,7 @@ public void testGetProcessedChunksWithOnlyStartBoundary() throws Exception {
     byte[] mapperEnd = new byte[0]; // Empty end key (last region)
 
     List<PhoenixSyncTableCheckpointOutputRow> results = repository.getProcessedChunks(tableName,
-      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd);
+      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
 
     // Should return chunk2 and chunk3 (both have endKey >= row30)
     // Should NOT return chunk1 (endKey row20 < row30)
@@ -784,17 +784,17 @@ public void testCheckpointMapperRegionWithTenantId() throws Exception {
 
     // Query for TENANT_001 - should return only TENANT_001's checkpoint
     List<PhoenixSyncTableCheckpointOutputRow> results1 =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, tenantId1);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, tenantId1, true);
     assertEquals("TENANT_001 should have 1 checkpoint", 1, results1.size());
 
     // Query for TENANT_002 - should return only TENANT_002's checkpoint
     List<PhoenixSyncTableCheckpointOutputRow> results2 =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, tenantId2);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, tenantId2, true);
     assertEquals("TENANT_002 should have 1 checkpoint", 1, results2.size());
 
     // Query for null tenant - should return only null-tenant checkpoint (tenant isolation)
     List<PhoenixSyncTableCheckpointOutputRow> results3 =
-      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null);
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true);
     assertEquals("Null tenant query should return only null-tenant checkpoint", 1, results3.size());
   }
 
@@ -832,17 +832,17 @@ public void testChunkCheckpointChunkWithDifferentTenants() throws Exception {
 
     // Query for TENANT_001 chunks
     List<PhoenixSyncTableCheckpointOutputRow> results1 = repository.getProcessedChunks(tableName,
-      targetCluster, 0L, 1000L, tenantId1, mapperStart, mapperEnd);
+      targetCluster, 0L, 1000L, tenantId1, mapperStart, mapperEnd, true);
     assertEquals("TENANT_001 should have 1 chunk", 1, results1.size());
 
     // Query for TENANT_002 chunks
     List<PhoenixSyncTableCheckpointOutputRow> results2 = repository.getProcessedChunks(tableName,
-      targetCluster, 0L, 1000L, tenantId2, mapperStart, mapperEnd);
+      targetCluster, 0L, 1000L, tenantId2, mapperStart, mapperEnd, true);
     assertEquals("TENANT_002 should have 1 chunk", 1, results2.size());
 
     // Query for null tenant - should return only null-tenant chunk (tenant isolation)
     List<PhoenixSyncTableCheckpointOutputRow> results3 = repository.getProcessedChunks(tableName,
-      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd);
+      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
     assertEquals("Null tenant query should return only null-tenant chunk", 1, results3.size());
   }
 }

From de96b293fae1578caa6175a931355561e4e76e3d Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Thu, 14 May 2026 10:04:13 +0530
Subject: [PATCH 07/18] checkpoint before implmenting repair fix for
 source/target with same ts as put/delete

---
 .../PhoenixSyncTableCheckpointOutputRow.java  |  56 ++-
 .../mapreduce/PhoenixSyncTableMapper.java     | 335 ++++++++++++------
 .../mapreduce/PhoenixSyncTableTool.java       |  24 +-
 .../end2end/PhoenixSyncTableToolIT.java       |  35 +-
 4 files changed, 320 insertions(+), 130 deletions(-)

diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
index 49cc9c4e4a0..d261d524574 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
@@ -158,27 +158,55 @@ public long getTargetRowsProcessed() {
    * contract to ensure consistency between formatting (in mapper) and parsing (in tests).
    */
   public static class CounterFormatter {
-    private static final String FORMAT_CHUNK = "%s=%d,%s=%d,%s=%d,%s=%d";
+    private static final String FORMAT_CHUNK =
+      "%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d";
     private static final String FORMAT_MAPPER = "%s=%d,%s=%d,%s=%d,%s=%d";
 
     /**
-     * Formats chunk counters as comma-separated key=value pairs. Always emits all four
-     * counters; for verify-only chunks (no repair) {@code rowsPut} and {@code rowsDeleted}
-     * are 0 so operators querying the checkpoint table see a uniform format.
-     * @param sourceRows  Source rows processed
-     * @param targetRows  Target rows processed
-     * @param rowsPut     Rows put to target during repair (0 if not repaired)
-     * @param rowsDeleted Rows deleted from target during repair (0 if not repaired)
-     * @return Formatted string: "SOURCE_ROWS_PROCESSED=...,TARGET_ROWS_PROCESSED=...,
-     *         ROWS_PUT_TO_TARGET=...,ROWS_DELETED_FROM_TARGET=..."
+     * Formats chunk counters as comma-separated key=value pairs. Always emits all eight
+     * counters; for verify-only chunks (no repair) the six drift counters are 0 so
+     * operators querying the checkpoint table see a uniform format.
+     *
+     * Drift signals partition into two layers:
+     *   row-level  — whole row missing or extra on target, or unrepairable (target row is
+     *                entirely tombstones — HBase cannot remove tombstones, only major
+     *                compaction does)
+     *   cell-level — for rows present on both clusters, individual cells missing, extra,
+     *                or differing in value at matching coordinates
+     * The two layers are disjoint per row: a row drift case contributes only to row
+     * counters; a cell drift case contributes only to cell counters.
+     *
+     * @param sourceRows         Source rows processed
+     * @param targetRows         Target rows processed
+     * @param rowsMissingOnTarget Rows present on source but missing on target (0 if not
+     *                            repaired)
+     * @param rowsExtraOnTarget  Rows present on target but missing on source whose live
+     *                           cells repair tombstoned (0 if not repaired)
+     * @param rowsCannotRepair   Rows present on target but missing on source whose contents
+     *                           are entirely tombstones — repair cannot act on them and an
+     *                           operator-driven major compaction is required to make the
+     *                           verifier converge under {@code --raw-scan}
+     * @param cellsMissingOnTarget Cells (across rows present on both sides) that source had
+     *                             at coordinates target lacked (0 if not repaired)
+     * @param cellsExtraOnTarget  Cells (across rows present on both sides) that target had
+     *                            at coordinates source lacked (0 if not repaired)
+     * @param cellsDifferentOnTarget Cells (across rows present on both sides) at matching
+     *                               coordinates whose values differed (0 if not repaired)
+     * @return Formatted string with all eight counters
      */
-    public static String formatChunk(long sourceRows, long targetRows, long rowsPut,
-      long rowsDeleted) {
+    public static String formatChunk(long sourceRows, long targetRows, long rowsMissingOnTarget,
+      long rowsExtraOnTarget, long rowsCannotRepair, long cellsMissingOnTarget,
+      long cellsExtraOnTarget, long cellsDifferentOnTarget) {
       return String.format(FORMAT_CHUNK,
         PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name(), sourceRows,
         PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows,
-        PhoenixSyncTableMapper.SyncCounters.ROWS_PUT_TO_TARGET.name(), rowsPut,
-        PhoenixSyncTableMapper.SyncCounters.ROWS_DELETED_FROM_TARGET.name(), rowsDeleted);
+        PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET.name(), rowsMissingOnTarget,
+        PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET.name(), rowsExtraOnTarget,
+        PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR.name(), rowsCannotRepair,
+        PhoenixSyncTableMapper.SyncCounters.CELLS_MISSING_ON_TARGET.name(), cellsMissingOnTarget,
+        PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET.name(), cellsExtraOnTarget,
+        PhoenixSyncTableMapper.SyncCounters.CELLS_DIFFERENT_ON_TARGET.name(),
+        cellsDifferentOnTarget);
     }
 
     /**
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
index 9c3349464b0..1818f97ef1c 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
@@ -49,7 +49,6 @@
 import org.apache.phoenix.query.KeyRange;
 import org.apache.phoenix.schema.PTable;
 import org.apache.phoenix.util.MetaDataUtil;
-import org.apache.phoenix.util.PhoenixRuntime;
 import org.apache.phoenix.util.SHA256DigestUtil;
 import org.apache.phoenix.util.ScanUtil;
 import org.slf4j.Logger;
@@ -76,8 +75,12 @@ public enum SyncCounters {
     CHUNKS_REPAIR_FAILED,
     SOURCE_ROWS_PROCESSED,
     TARGET_ROWS_PROCESSED,
-    ROWS_PUT_TO_TARGET,
-    ROWS_DELETED_FROM_TARGET
+    ROWS_MISSING_ON_TARGET,
+    ROWS_EXTRA_ON_TARGET,
+    ROWS_CANNOT_REPAIR,
+    CELLS_MISSING_ON_TARGET,
+    CELLS_EXTRA_ON_TARGET,
+    CELLS_DIFFERENT_ON_TARGET
   }
 
   private String tableName;
@@ -417,7 +420,7 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd,
         }
         sourceChunk.executionEndTime = new Timestamp(System.currentTimeMillis());
         String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter
-          .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L);
+          .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L, 0L, 0L, 0L, 0L);
         if (matched) {
           handleVerifiedChunk(sourceChunk, context, counters);
         } else {
@@ -765,128 +768,245 @@ private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKey
   }
 
   /**
-   * Returns true if both Results carry identical cell arrays. Walks the two arrays in
-   * lock-step using {@link CellComparator} for coordinate ordering and {@link CellUtil#matchingValue}
-   * for value equality (CellComparator does not compare values). Both scanners use the same
-   * configuration on byte-ordered storage, so cells come back in canonical KV order.
+   * Lazily-built per-row Put and Delete mutations. Each field is created on first use so a
+   * row that needs only Puts produces no Delete (and vice versa); a row that needs no
+   * mutation at all produces neither. After construction, callers append the produced
+   * mutations to the pending batches via {@link #flush(List, List)}.
    */
-  private boolean rowCellsEqual(Result sourceResult, Result targetResult) {
-    Cell[] sourceCells = sourceResult.rawCells();
-    Cell[] targetCells = targetResult.rawCells();
-    if (sourceCells.length != targetCells.length) {
-      return false;
+  private static final class RowRepairMutations {
+    private final byte[] rowKey;
+    Put put;
+    Delete delete;
+
+    RowRepairMutations(byte[] rowKey) {
+      this.rowKey = rowKey;
     }
-    CellComparator comparator = CellComparator.getInstance();
-    for (int i = 0; i < sourceCells.length; i++) {
-      if (
-        comparator.compare(sourceCells[i], targetCells[i]) != 0
-          || !CellUtil.matchingValue(sourceCells[i], targetCells[i])
-      ) {
-        return false;
+
+    Put put() {
+      if (put == null) {
+        put = new Put(rowKey);
+      }
+      return put;
+    }
+
+    Delete delete() {
+      if (delete == null) {
+        delete = new Delete(rowKey);
+      }
+      return delete;
+    }
+
+    void flush(List<Put> pendingPuts, List<Delete> pendingDeletes) {
+      if (put != null) {
+        pendingPuts.add(put);
+      }
+      if (delete != null) {
+        pendingDeletes.add(delete);
       }
     }
+  }
+
+  /**
+   * Cell-level drift counts produced by {@link #diffCellsForRow}. Populated only for rows
+   * present on both clusters; whole-row drift is signaled by the caller directly at the
+   * {@code cmp != 0} branches in {@link #repairChunk}. Three counters partition the cell
+   * differences into disjoint buckets — source-only, target-only-live, same-coord-diff-value.
+   */
+  private static final class CellDriftCounts {
+    static final CellDriftCounts NONE = new CellDriftCounts(0, 0, 0);
+
+    final int missing;
+    final int extra;
+    final int different;
+
+    CellDriftCounts(int missing, int extra, int different) {
+      this.missing = missing;
+      this.extra = extra;
+      this.different = different;
+    }
+  }
+
+  /**
+   * Per-chunk aggregate of all six drift counters: three row-level (whole rows missing /
+   * extra on target, plus rows that cannot be repaired because target's row is entirely
+   * tombstones — HBase has no API to remove tombstones, only major compaction does) plus
+   * three cell-level (cells missing / extra / different on rows present on both clusters).
+   * Owns the bookkeeping that was previously scattered across {@link #repairChunk} — local
+   * accumulators, MapReduce job-counter increments, the
+   * {@link PhoenixSyncTableCheckpointOutputRow.CounterFormatter#formatChunk} call, and the
+   * end-of-chunk log line. Adding a new drift signal means touching this class and the one
+   * place in the merge loop that produces it; everything else (commit to job context,
+   * checkpoint COUNTERS string, log) flows through these methods.
+   */
+  private static final class DriftCounters {
+    long rowsMissingOnTarget;
+    long rowsExtraOnTarget;
+    long rowsCannotRepair;
+    long cellsMissingOnTarget;
+    long cellsExtraOnTarget;
+    long cellsDifferentOnTarget;
+
+    void addCellDrift(CellDriftCounts cellDrift) {
+      cellsMissingOnTarget += cellDrift.missing;
+      cellsExtraOnTarget += cellDrift.extra;
+      cellsDifferentOnTarget += cellDrift.different;
+    }
+
+    /** Increments the job's MapReduce counters with this chunk's drift totals. */
+    void commitTo(Context context) {
+      context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).increment(rowsMissingOnTarget);
+      context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).increment(rowsExtraOnTarget);
+      context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).increment(rowsCannotRepair);
+      context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).increment(cellsMissingOnTarget);
+      context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).increment(cellsExtraOnTarget);
+      context.getCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).increment(cellsDifferentOnTarget);
+    }
+
+    /** Formats the chunk's COUNTERS string for persistence in the checkpoint table. */
+    String formatChunkCounters(long verifySourceRows, long verifyTargetRows) {
+      return PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatChunk(verifySourceRows,
+        verifyTargetRows, rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair,
+        cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget);
+    }
+
+    /** Compact end-of-chunk log line summarizing all six drift signals. */
+    String toLogString() {
+      return String.format(
+        "rowsMissingOnTarget=%d, rowsExtraOnTarget=%d, rowsCannotRepair=%d, "
+          + "cellsMissingOnTarget=%d, cellsExtraOnTarget=%d, cellsDifferentOnTarget=%d",
+        rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair, cellsMissingOnTarget,
+        cellsExtraOnTarget, cellsDifferentOnTarget);
+    }
+  }
+
+  /**
+   * Routes a source cell to the right mutation kind. Put cells go to a {@link Put}; tombstone
+   * cells go to a {@link Delete} via {@link Delete#add(Cell)} which preserves the tombstone's
+   * exact subtype (Delete / DeleteColumn / DeleteFamily / DeleteFamilyVersion). Required under
+   * {@code --raw-scan}: {@link Put#add(Cell)} rejects non-Put cells.
+   */
+  private void mirrorSourceCell(Cell cell, RowRepairMutations rowMutations) throws IOException {
+    if (CellUtil.isDelete(cell)) {
+      rowMutations.delete().add(cell);
+    } else {
+      rowMutations.put().add(cell);
+    }
+  }
+
+  /**
+   * Tombstones a target-only cell at its exact timestamp via {@code addColumn}. Skips cells
+   * that are themselves already tombstones: HBase has no API to remove a tombstone cell —
+   * tombstones can only be reaped by major compaction once they age past the keep-deleted-
+   * cells window. Issuing another Delete at the same coordinates writes a duplicate marker,
+   * does not change the row's effective state, and only adds compaction load. Combined with
+   * the absence of a source-side counterpart to mirror, the right action is to leave the
+   * existing tombstone untouched. The repair scan's time range {@code [fromTime, toTime]}
+   * guarantees cells outside the window are never read (and therefore never deleted).
+   *
+   * @return true if the cell was a live cell that contributed a tombstone marker, false if
+   *         the cell was already a tombstone and was skipped.
+   */
+  private boolean tombstoneTargetCell(Cell cell, RowRepairMutations rowMutations) {
+    if (CellUtil.isDelete(cell)) {
+      return false;
+    }
+    rowMutations.delete().addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell),
+      cell.getTimestamp());
     return true;
   }
 
   /**
-   * Builds a Put preserving the original cell timestamps from the source row. Used when an
-   * entire row is missing on target.
+   * Mirrors every source cell of a row that is missing on target. Source cells route by
+   * type: live cells to a Put, tombstone cells (under {@code --raw-scan}) to a Delete via
+   * {@link Delete#add(Cell)}.
    */
-  private Put buildPutFromResult(Result result) throws IOException {
-    Put put = new Put(result.getRow());
-    for (Cell cell : result.rawCells()) {
-      put.add(cell);
+  private void mirrorWholeRow(Result sourceResult, List<Put> pendingPuts,
+    List<Delete> pendingDeletes) throws IOException {
+    RowRepairMutations rowMutations = new RowRepairMutations(sourceResult.getRow());
+    for (Cell cell : sourceResult.rawCells()) {
+      mirrorSourceCell(cell, rowMutations);
     }
-    return put;
+    rowMutations.flush(pendingPuts, pendingDeletes);
   }
 
   /**
-   * Builds a Delete for every cell in the target row. The repair scan's time range
-   * {@code [fromTime, toTime]} guarantees the cells we read here are within the sync window;
-   * cells outside the window are never read, therefore never deleted.
+   * Tombstones every live cell of a row that is extra on target. Existing tombstones on the
+   * target row are skipped — HBase cannot remove tombstone cells; only major compaction
+   * reaps them.
+   *
+   * @return the number of live cells that contributed a tombstone marker. {@code 0} means
+   *         the row was already entirely tombstones — repair could not act on it, and the
+   *         caller should record this as {@link SyncCounters#ROWS_CANNOT_REPAIR}.
    */
-  private Delete buildDeleteFromResult(Result result) {
-    Delete delete = new Delete(result.getRow());
-    for (Cell cell : result.rawCells()) {
-      delete.addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell),
-        cell.getTimestamp());
+  private int tombstoneWholeRow(Result targetResult, List<Put> pendingPuts,
+    List<Delete> pendingDeletes) {
+    RowRepairMutations rowMutations = new RowRepairMutations(targetResult.getRow());
+    int liveCellsTombstoned = 0;
+    for (Cell cell : targetResult.rawCells()) {
+      if (tombstoneTargetCell(cell, rowMutations)) {
+        liveCellsTombstoned++;
+      }
     }
-    return delete;
+    rowMutations.flush(pendingPuts, pendingDeletes);
+    return liveCellsTombstoned;
   }
 
   /**
-   * Computes cell-level diffs for a row that exists on both clusters but with a different
-   * cell set. Cells present in source but missing from target are added to a single Put for
-   * the row; cells present in target but missing from source are added to a single Delete.
+   * Diffs cells of two rows present on both clusters in lock-step using {@link CellComparator}
+   * order and appends the resulting {@link Put}/{@link Delete} mutations (if any) to the
+   * pending lists. Returns a {@link CellDriftCounts} classifying the cell-level drift:
+   *
+   *   same coords + matching value         → no drift, no signal
+   *   same coords + different value        → different++; mirror source cell
+   *   source-only cell at unique coords    → missing++;   mirror source cell
+   *   target-only live cell at unique coords → extra++;   tombstone target cell
+   *   target-only tombstone cell           → skip (HBase cannot remove tombstones)
    */
-  private void addRepairMutations(Result sourceResult, Result targetResult,
+  private CellDriftCounts diffCellsForRow(Result sourceResult, Result targetResult,
     List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
-    byte[] rowKey = sourceResult.getRow();
     Cell[] sourceCells = sourceResult.rawCells();
     Cell[] targetCells = targetResult.rawCells();
     CellComparator comparator = CellComparator.getInstance();
 
-    Put put = null;
-    Delete delete = null;
+    RowRepairMutations rowMutations = new RowRepairMutations(sourceResult.getRow());
+    int missing = 0;
+    int extra = 0;
+    int different = 0;
 
     int sourceIdx = 0;
     int targetIdx = 0;
     while (sourceIdx < sourceCells.length && targetIdx < targetCells.length) {
-      Cell sourceCell = sourceCells[sourceIdx];
-      Cell targetCell = targetCells[targetIdx];
-      int cmp = comparator.compare(sourceCell, targetCell);
+      int cmp = comparator.compare(sourceCells[sourceIdx], targetCells[targetIdx]);
       if (cmp == 0) {
-        // Same coordinates (family/qualifier/timestamp/type) on both sides. CellComparator
-        // does NOT compare values, so check value separately. If values differ, Put the
-        // source cell — it overwrites the target cell at the same timestamp.
-        if (!CellUtil.matchingValue(sourceCell, targetCell)) {
-          if (put == null) {
-            put = new Put(rowKey);
-          }
-          put.add(sourceCell);
+        // Same coordinates; CellComparator does not compare values, check separately.
+        if (!CellUtil.matchingValue(sourceCells[sourceIdx], targetCells[targetIdx])) {
+          mirrorSourceCell(sourceCells[sourceIdx], rowMutations);
+          different++;
         }
         sourceIdx++;
         targetIdx++;
       } else if (cmp < 0) {
-        // Source-only cell, Put on target.
-        if (put == null) {
-          put = new Put(rowKey);
-        }
-        put.add(sourceCell);
-        sourceIdx++;
-      } else {
-        // Target-only cell, Delete from target.
-        if (delete == null) {
-          delete = new Delete(rowKey);
-        }
-        delete.addColumn(CellUtil.cloneFamily(targetCell), CellUtil.cloneQualifier(targetCell),
-          targetCell.getTimestamp());
-        targetIdx++;
+        mirrorSourceCell(sourceCells[sourceIdx++], rowMutations);
+        missing++;
+      } else if (tombstoneTargetCell(targetCells[targetIdx++], rowMutations)) {
+        extra++;
       }
     }
-    // Tail: any remaining source cells are source-only.
     while (sourceIdx < sourceCells.length) {
-      if (put == null) {
-        put = new Put(rowKey);
-      }
-      put.add(sourceCells[sourceIdx++]);
+      mirrorSourceCell(sourceCells[sourceIdx++], rowMutations);
+      missing++;
     }
-    // Tail: any remaining target cells are target-only.
     while (targetIdx < targetCells.length) {
-      Cell targetCell = targetCells[targetIdx++];
-      if (delete == null) {
-        delete = new Delete(rowKey);
+      if (tombstoneTargetCell(targetCells[targetIdx++], rowMutations)) {
+        extra++;
       }
-      delete.addColumn(CellUtil.cloneFamily(targetCell), CellUtil.cloneQualifier(targetCell),
-        targetCell.getTimestamp());
     }
 
-    if (put != null) {
-      pendingPuts.add(put);
-    }
-    if (delete != null) {
-      pendingDeletes.add(delete);
+    if (missing == 0 && extra == 0 && different == 0) {
+      return CellDriftCounts.NONE;
     }
+    rowMutations.flush(pendingPuts, pendingDeletes);
+    return new CellDriftCounts(missing, extra, different);
   }
 
   /**
@@ -935,8 +1055,7 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar
     byte[] targetEnd, boolean targetStartInclusive, boolean targetEndInclusive,
     long verifySourceRows, long verifyTargetRows, Timestamp verifyStartTime, Context context)
     throws IOException, SQLException {
-    long rowsPut = 0;
-    long rowsDeleted = 0;
+    DriftCounters driftCounters = new DriftCounters();
 
     LOGGER.info(
       "Starting repair for chunk source=[{}, {}] target=[{}{}, {}{} on table {}",
@@ -972,23 +1091,31 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar
           cmp = Bytes.compareTo(sourceResult.getRow(), targetResult.getRow());
         }
 
+        // Drift signals are bumped at the branch that semantically caused them: row-level
+        // signals at the cmp != 0 branches, cell-level signals at the cmp == 0 branch.
         if (cmp == 0) {
           // Same row key on both clusters — diff at cell level and repair only if cells differ.
-          if (!rowCellsEqual(sourceResult, targetResult)) {
-            addRepairMutations(sourceResult, targetResult, pendingPuts, pendingDeletes);
-            rowsPut++;
-          }
+          driftCounters.addCellDrift(
+            diffCellsForRow(sourceResult, targetResult, pendingPuts, pendingDeletes));
           sourceResult = sourceScanner.next();
           targetResult = targetScanner.next();
         } else if (cmp < 0) {
-          // Source-only row (target is missing it, or target has advanced past) — Put it on target.
-          pendingPuts.add(buildPutFromResult(sourceResult));
-          rowsPut++;
+          // Source-only row — mirror it onto target.
+          mirrorWholeRow(sourceResult, pendingPuts, pendingDeletes);
+          driftCounters.rowsMissingOnTarget++;
           sourceResult = sourceScanner.next();
         } else {
-          // Target-only row (source has no such row in this range) — Delete it from target.
-          pendingDeletes.add(buildDeleteFromResult(targetResult));
-          rowsDeleted++;
+          // Target-only row — tombstone its live cells. If the row is already entirely
+          // tombstones, repair has nothing to do (HBase cannot remove tombstones; only
+          // major compaction reaps them) — record as ROWS_CANNOT_REPAIR so operators can
+          // see the unrepairable drift volume.
+          int liveCellsTombstoned =
+            tombstoneWholeRow(targetResult, pendingPuts, pendingDeletes);
+          if (liveCellsTombstoned == 0) {
+            driftCounters.rowsCannotRepair++;
+          } else {
+            driftCounters.rowsExtraOnTarget++;
+          }
           targetResult = targetScanner.next();
         }
 
@@ -1010,8 +1137,8 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar
 
       Timestamp failedAt = new Timestamp(System.currentTimeMillis());
       // Capture partial progress in the COUNTERS column for triage.
-      String failedCounters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter
-        .formatChunk(verifySourceRows, verifyTargetRows, rowsPut, rowsDeleted);
+      String failedCounters =
+        driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows);
       syncTableOutputRepository
         .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
           .setTableName(tableName).setTargetCluster(targetZkQuorum)
@@ -1024,13 +1151,12 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar
       return;
     }
 
-    context.getCounter(SyncCounters.ROWS_PUT_TO_TARGET).increment(rowsPut);
-    context.getCounter(SyncCounters.ROWS_DELETED_FROM_TARGET).increment(rowsDeleted);
+    driftCounters.commitTo(context);
     context.getCounter(SyncCounters.CHUNKS_REPAIRED).increment(1);
 
     Timestamp repairEndTime = new Timestamp(System.currentTimeMillis());
-    String repairCounters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter
-      .formatChunk(verifySourceRows, verifyTargetRows, rowsPut, rowsDeleted);
+    String repairCounters =
+      driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows);
 
     syncTableOutputRepository
       .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
@@ -1041,8 +1167,9 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar
         .setExecutionStartTime(verifyStartTime).setExecutionEndTime(repairEndTime)
         .setCounters(repairCounters).build());
 
-    LOGGER.info("Completed repair for chunk source=[{}, {}]: rowsPut={}, rowsDeleted={}",
-      Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), rowsPut, rowsDeleted);
+    LOGGER.info("Completed repair for chunk source=[{}, {}]: {}",
+      Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd),
+      driftCounters.toLogString());
   }
 
   @Override
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
index 7cb3f034138..8ec66abf7ca 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
@@ -429,19 +429,31 @@ private boolean submitPhoenixSyncTableJob() throws Exception {
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED).getValue();
       long targetRowsProcessed =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED).getValue();
-      long rowsPutToTarget =
-        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_PUT_TO_TARGET).getValue();
-      long rowsDeletedFromTarget = counters
-        .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_DELETED_FROM_TARGET).getValue();
+      long rowsMissingOnTarget = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET).getValue();
+      long rowsExtraOnTarget = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
+      long rowsCannotRepair = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR).getValue();
+      long cellsMissingOnTarget = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_MISSING_ON_TARGET).getValue();
+      long cellsExtraOnTarget = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
+      long cellsDifferentOnTarget = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue();
       LOGGER.info(
         "PhoenixSyncTable job completed, gathered counters are \n Task Created: {}, \n Verified Mappers: {}, \n"
           + "Mismatched Mappers: {}, \n Repaired Mappers: {}, \n Repair Failed Mappers: {}, \n"
           + "Chunks Verified: {}, \n Chunks Mismatched: {}, \n Chunks Repaired: {}, \n"
           + "Chunks Repair Failed: {}, \n Source Rows Processed: {}, \n Target Rows Processed: {}, \n"
-          + "Rows Put To Target: {}, \n Rows Deleted From Target: {}",
+          + "Rows Missing On Target: {}, \n Rows Extra On Target: {}, \n"
+          + "Rows Cannot Repair: {}, \n"
+          + "Cells Missing On Target: {}, \n Cells Extra On Target: {}, \n"
+          + "Cells Different On Target: {}",
         taskCreated, verifiedMappers, mismatchedMappers, repairedMappers, repairFailedMappers,
         chunksVerified, chunksMismatched, chunksRepaired, chunksRepairFailed, sourceRowsProcessed,
-        targetRowsProcessed, rowsPutToTarget, rowsDeletedFromTarget);
+        targetRowsProcessed, rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair,
+        cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget);
     } else {
       LOGGER.warn("Unable to retrieve job counters for table {} - job may have failed "
         + "during initialization", qTable);
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
index 57321b4ce9f..599f18337d8 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
@@ -192,16 +192,39 @@ public void testSyncTableValidateWithDataDifference() throws Exception {
     long mappersRepaired = repairCounters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue();
     long mappersRepairFailed =
       repairCounters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue();
-    long rowsPutToTarget = repairCounters.findCounter(SyncCounters.ROWS_PUT_TO_TARGET).getValue();
-    long rowsDeletedFromTarget =
-      repairCounters.findCounter(SyncCounters.ROWS_DELETED_FROM_TARGET).getValue();
+    long rowsMissingOnTarget =
+      repairCounters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue();
+    long rowsExtraOnTarget =
+      repairCounters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
+    long rowsCannotRepair =
+      repairCounters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue();
+    long cellsMissingOnTarget =
+      repairCounters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue();
+    long cellsExtraOnTarget =
+      repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
+    long cellsDifferentOnTarget =
+      repairCounters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue();
     assertEquals("All 3 mismatched chunks should be repaired", 3, chunksRepaired);
     assertEquals("No chunk repair should fail", 0, chunksRepairFailed);
     assertEquals("All 3 mismatched mapper regions should be repaired", 3, mappersRepaired);
     assertEquals("No mapper repair should fail", 0, mappersRepairFailed);
-    assertEquals("Three rows on target should be Put back to source values", 3, rowsPutToTarget);
-    assertEquals("No rows should be deleted from target (only value diffs, not extras)", 0,
-      rowsDeletedFromTarget);
+    // The three drifted rows exist on both clusters (only NAME values were modified on
+    // target via separate upserts, producing cells at different timestamps), so all drift
+    // is cell-level — no whole-row missing or extra signals.
+    assertEquals("No whole rows should be missing on target", 0, rowsMissingOnTarget);
+    assertEquals("No whole rows should be extra on target", 0, rowsExtraOnTarget);
+    assertEquals("No rows should be unrepairable (target has no all-tombstone rows)", 0,
+      rowsCannotRepair);
+    // Each modified row contributes at least one missing cell (source's original NAME at
+    // its original timestamp) and one extra cell (target's modified NAME at the new
+    // timestamp). cellsDifferent stays 0 because the modifications wrote at new timestamps
+    // rather than overwriting at the same coordinates.
+    assertTrue("Source-only cells across the 3 drifted rows should be detected, got "
+      + cellsMissingOnTarget, cellsMissingOnTarget >= 3);
+    assertTrue("Target-only cells across the 3 drifted rows should be detected, got "
+      + cellsExtraOnTarget, cellsExtraOnTarget >= 3);
+    assertEquals("No same-coord value diffs (modifications wrote at new timestamps)", 0,
+      cellsDifferentOnTarget);
 
     // Target rows should now match source.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);

From 71c25d81d595cfba3102400d18e105554f532bba Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Fri, 15 May 2026 17:04:38 +0530
Subject: [PATCH 08/18] checkpoint all possible combination fixed??

---
 .../PhoenixSyncTableCheckpointOutputRow.java  |   1 +
 .../mapreduce/PhoenixSyncTableMapper.java     | 683 +++++++++++++++---
 .../mapreduce/PhoenixSyncTableTool.java       |  34 +-
 3 files changed, 596 insertions(+), 122 deletions(-)

diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
index d261d524574..86574326ad2 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
@@ -39,6 +39,7 @@ public enum Status {
     VERIFIED,
     MISMATCHED,
     REPAIRED,
+    UNREPAIRABLE,
     REPAIR_FAILED
   }
 
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
index 1818f97ef1c..d09151b1e27 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
@@ -20,13 +20,21 @@
 import static org.apache.phoenix.schema.types.PDataType.TRUE_BYTES;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.security.MessageDigest;
 import java.sql.Connection;
 import java.sql.SQLException;
 import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.TreeMap;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparator;
@@ -36,6 +44,7 @@
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Row;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -68,11 +77,14 @@ public enum SyncCounters {
     MAPPERS_VERIFIED,
     MAPPERS_MISMATCHED,
     MAPPERS_REPAIRED,
+    MAPPERS_UNREPAIRABLE,
     MAPPERS_REPAIR_FAILED,
     CHUNKS_VERIFIED,
     CHUNKS_MISMATCHED,
     CHUNKS_REPAIRED,
+    CHUNKS_UNREPAIRABLE,
     CHUNKS_REPAIR_FAILED,
+    CHECKPOINT_WRITE_FAILED,
     SOURCE_ROWS_PROCESSED,
     TARGET_ROWS_PROCESSED,
     ROWS_MISSING_ON_TARGET,
@@ -218,6 +230,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
     // Track counters before processing this region
     long verifiedBefore = context.getCounter(SyncCounters.CHUNKS_VERIFIED).getValue();
     long mismatchedBefore = context.getCounter(SyncCounters.CHUNKS_MISMATCHED).getValue();
+    long unrepairableBefore = context.getCounter(SyncCounters.CHUNKS_UNREPAIRABLE).getValue();
     long repairFailedBefore = context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
     long sourceRowsBefore = context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue();
     long targetRowsBefore = context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue();
@@ -235,6 +248,8 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
       context.getCounter(SyncCounters.CHUNKS_VERIFIED).getValue() - verifiedBefore;
     long mismatchedChunks =
       context.getCounter(SyncCounters.CHUNKS_MISMATCHED).getValue() - mismatchedBefore;
+    long unrepairableChunks =
+      context.getCounter(SyncCounters.CHUNKS_UNREPAIRABLE).getValue() - unrepairableBefore;
     long repairFailedChunks =
       context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue() - repairFailedBefore;
     long sourceRowsProcessed =
@@ -247,7 +262,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
       .formatMapper(verifiedChunks, mismatchedChunks, sourceRowsProcessed, targetRowsProcessed);
     if (sourceRowsProcessed > 0) {
       recordRegionCompletion(regionStart, regionEnd, regionStartTime, regionEndTime, verifiedChunks,
-        mismatchedChunks, repairFailedChunks, counters, context);
+        mismatchedChunks, unrepairableChunks, repairFailedChunks, counters, context);
     } else {
       LOGGER.info(
         "No rows pending to process. All region boundaries are covered for startKey:{}, endKey: {}",
@@ -264,23 +279,31 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
    * @param regionEndTime      Region processing end time
    * @param verifiedChunks     Number of verified chunks
    * @param mismatchedChunks   Number of mismatched chunks
+   * @param unrepairableChunks Number of chunks where any row landed in ROWS_CANNOT_REPAIR;
+   *                           if > 0 (and no repair-failed chunks) the region rolls up to
+   *                           UNREPAIRABLE, signalling operator intervention is needed
    * @param repairFailedChunks Number of chunks whose repair threw an IOException; if > 0 the
-   *                           region rolls up to MISMATCHED (drift remains, re-run will retry)
+   *                           region rolls up to REPAIR_FAILED (highest precedence)
    * @param counters           Formatted counter string
    * @param context            Mapper context
    */
   private void recordRegionCompletion(byte[] regionStart, byte[] regionEnd,
     Timestamp regionStartTime, Timestamp regionEndTime, long verifiedChunks, long mismatchedChunks,
-    long repairFailedChunks, String counters, Context context) throws SQLException {
+    long unrepairableChunks, long repairFailedChunks, String counters, Context context)
+    throws SQLException {
 
-    // Region rolls up its child chunks' outcomes into one of four statuses:
-    //   VERIFIED      — every chunk matched; no drift in this region.
+    // Region rolls up its child chunks' outcomes into one of five statuses, in precedence
+    // order (most-severe wins):
+    //   REPAIR_FAILED — at least one chunk threw during merge-scan or flush.
+    //   UNREPAIRABLE  — repair completed but at least one chunk has rows that cannot be
+    //                   repaired (target tombstones shadow source Puts, or target row is
+    //                   entirely tombstones). Operator action (typically major compaction
+    //                   on target) needed before a re-run can converge.
     //   MISMATCHED    — drift was detected but repair was not attempted (dry-run mode).
-    //   REPAIRED      — drift was detected and every chunk's repair succeeded.
-    //   REPAIR_FAILED — drift was detected, repair was attempted, and at least one chunk
-    //                   threw during merge-scan or flush. The failed chunks remain as
-    //                   CHUNK/REPAIR_FAILED rows; a re-run will re-attempt them via the
-    //                   Phase 2 STATUS-IN filter that excludes REPAIR_FAILED.
+    //   REPAIRED      — drift was detected and every chunk's repair fully succeeded.
+    //   VERIFIED      — every chunk matched; no drift in this region.
+    // The resume filter on re-invocation skips VERIFIED and REPAIRED — UNREPAIRABLE,
+    // MISMATCHED, and REPAIR_FAILED chunks are re-entered as gaps and re-attempted.
     PhoenixSyncTableCheckpointOutputRow.Status status;
     SyncCounters mapperCounter;
     if (mismatchedChunks == 0) {
@@ -289,12 +312,15 @@ private void recordRegionCompletion(byte[] regionStart, byte[] regionEnd,
     } else if (isDryRun) {
       status = PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED;
       mapperCounter = SyncCounters.MAPPERS_MISMATCHED;
-    } else if (repairFailedChunks == 0) {
-      status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED;
-      mapperCounter = SyncCounters.MAPPERS_REPAIRED;
-    } else {
+    } else if (repairFailedChunks > 0) {
       status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED;
       mapperCounter = SyncCounters.MAPPERS_REPAIR_FAILED;
+    } else if (unrepairableChunks > 0) {
+      status = PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE;
+      mapperCounter = SyncCounters.MAPPERS_UNREPAIRABLE;
+    } else {
+      status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED;
+      mapperCounter = SyncCounters.MAPPERS_REPAIRED;
     }
 
     context.getCounter(mapperCounter).increment(1);
@@ -303,12 +329,15 @@ private void recordRegionCompletion(byte[] regionStart, byte[] regionEnd,
       counters);
 
     String logMessage = String.format(
-      "PhoenixSyncTable region [%s, %s) completed with %s: %d verified, %d mismatched, %d repair-failed",
+      "PhoenixSyncTable region [%s, %s) completed with %s: %d verified, %d mismatched, "
+        + "%d unrepairable, %d repair-failed",
       Bytes.toStringBinary(regionStart), Bytes.toStringBinary(regionEnd),
-      status.name().toLowerCase(), verifiedChunks, mismatchedChunks, repairFailedChunks);
+      status.name().toLowerCase(), verifiedChunks, mismatchedChunks, unrepairableChunks,
+      repairFailedChunks);
 
     if (
       status == PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED
+        || status == PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE
         || status == PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED
     ) {
       LOGGER.warn(logMessage);
@@ -603,18 +632,25 @@ private void recordChunkCheckpoint(ChunkInfo sourceChunk,
   }
 
   /**
-   * Builds the common Scan shape used by both verification and repair: same key range,
-   * inclusivity, time window, raw-scan, and all-versions semantics. Callers layer on their
-   * own caching, limits, and coprocessor attributes. Keeping the base shared guarantees that
-   * the cells visited by repair are exactly the cells the verifier hashed.
+   * Builds the common Scan shape used by verification, repair, and tombstone loading: key
+   * range, inclusivity, time window, cache-blocks, plus raw-scan and all-versions semantics
+   * controlled by the user's {@code --raw-scan} / {@code --read-all-versions} flags.
+   *
+   * Callers that need to force raw-scan or all-versions on a specific call (e.g., the
+   * tombstone loader, which must surface tombstones regardless of user flags) pass
+   * {@code forceRaw=true} or {@code forceAllVersions=true} to override.
+   *
+   * Callers layer on their own caching, limits, and coprocessor attributes. Keeping the
+   * base shared guarantees that the cells visited by repair are exactly the cells the
+   * verifier hashed (when both pass the same force flags).
    */
   private Scan createBaseScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
-    boolean isEndKeyInclusive) throws IOException {
+    boolean isEndKeyInclusive, boolean forceRaw, boolean forceAllVersions) throws IOException {
     Scan scan = new Scan();
     scan.withStartRow(startKey, isStartKeyInclusive);
     scan.withStopRow(endKey, isEndKeyInclusive);
-    scan.setRaw(isRawScan);
-    if (isReadAllVersions) {
+    scan.setRaw(forceRaw || isRawScan);
+    if (forceAllVersions || isReadAllVersions) {
       scan.readAllVersions();
     }
     scan.setCacheBlocks(false);
@@ -628,7 +664,8 @@ private Scan createBaseScan(byte[] startKey, byte[] endKey, boolean isStartKeyIn
    */
   private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
     boolean isEndKeyInclusive, boolean isTargetScan) throws IOException {
-    Scan scan = createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive);
+    Scan scan =
+      createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive, false, false);
     // Set limit and caching to 1 for sequential partial digest retrieval from target.
     // Enables digest continuation: each target chunk's digest feeds into the next until scanning
     // completes
@@ -760,7 +797,8 @@ boolean shouldStartKeyBeInclusive(byte[] mapperRegionStart,
    */
   private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
     boolean isEndKeyInclusive, PhoenixConnection phoenixConn) throws IOException, SQLException {
-    Scan scan = createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive);
+    Scan scan =
+      createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive, false, false);
     scan.setCaching(1000);
     ScanUtil.setScanAttributesForPhoenixTTL(scan, pTable, phoenixConn);
     scan.setAttribute(BaseScannerRegionObserverConstants.IS_STRICT_TTL, TRUE_BYTES);
@@ -768,17 +806,64 @@ private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKey
   }
 
   /**
-   * Lazily-built per-row Put and Delete mutations. Each field is created on first use so a
-   * row that needs only Puts produces no Delete (and vice versa); a row that needs no
-   * mutation at all produces neither. After construction, callers append the produced
-   * mutations to the pending batches via {@link #flush(List, List)}.
+   * Loads the target row's tombstone index for shadow detection. Issues a single-row scan
+   * against target with raw=true and all-versions forced (regardless of user flags), so the
+   * tombstone subtypes that would otherwise be filtered out are surfaced. Live cells in the
+   * response are ignored — they were already visible to the repair scan and are handled by
+   * the merge logic. (HBase 2.x exposes {@code setRaw} only on Scan, not on Get — so a
+   * one-row scan stands in for what would otherwise be a raw Get.)
+   *
+   * Time range: the base scan applies {@code [fromTime, toTime]}, but shadow detection
+   * needs tombstones at ts >= fromTime regardless of the upper bound. A DeleteColumn /
+   * DeleteFamily at ts > toTime can still shadow a Put we mirror at ts in window during
+   * application reads (HBase tombstones don't respect the verifier's sync window). Lower
+   * bound stays at fromTime since tombstones below the window can't shadow anything we'd
+   * write inside the window.
    */
-  private static final class RowRepairMutations {
+  private TargetRowTombstones loadTargetRowTombstones(byte[] rowKey, Table targetHTable)
+    throws IOException {
+    Scan scan = createBaseScan(rowKey, rowKey, true, true, true, true);
+    scan.setTimeRange(fromTime, Long.MAX_VALUE);
+    scan.setCaching(1);
+    scan.setLimit(1);
+    TargetRowTombstones tombstones = new TargetRowTombstones();
+    try (ResultScanner scanner = targetHTable.getScanner(scan)) {
+      Result raw = scanner.next();
+      if (raw != null) {
+        for (Cell cell : raw.rawCells()) {
+          // Record both tombstones (for shadow detection) and Puts (for hidden-version
+          // discovery during cmp > 0 tombstoning of target-only cells).
+          tombstones.record(cell);
+        }
+      }
+    }
+    return tombstones;
+  }
+
+  /**
+   * Lazily-built per-row Put and Delete mutations plus per-row unrepairable-drift state.
+   * Each mutation field is created on first use so a row that needs only Puts produces no
+   * Delete (and vice versa); a row that needs no mutation at all produces neither.
+   *
+   * Tombstone-index state is lazy: {@link #tombstones} is loaded only on the first
+   * shadow check via {@link #tombstones(Table)} (one raw scan per row at most).
+   * {@link #anyCellUnrepairable} accumulates whether the row carries any drift that
+   * repair could not act on — either a source-side Put was shadow-suppressed by an
+   * existing target tombstone, or a target-only tombstone exists that source lacks
+   * (HBase has no API to remove tombstones). The caller reads it after the merge to
+   * decide whether to bump {@link SyncCounters#ROWS_CANNOT_REPAIR}.
+   *
+   * After construction, callers append produced mutations to the pending batches via
+   * {@link #flush(List, List)}.
+   */
+  private final class RowRepairState {
     private final byte[] rowKey;
     Put put;
     Delete delete;
+    TargetRowTombstones tombstones;
+    boolean anyCellUnrepairable;
 
-    RowRepairMutations(byte[] rowKey) {
+    RowRepairState(byte[] rowKey) {
       this.rowKey = rowKey;
     }
 
@@ -796,6 +881,18 @@ Delete delete() {
       return delete;
     }
 
+    /**
+     * Lazily loads the target row's tombstone index on first call (one raw single-row scan
+     * via {@link #loadTargetRowTombstones}); cached thereafter for reuse across the row's
+     * subsequent shadow checks.
+     */
+    TargetRowTombstones targetRowTombstones(Table targetHTable) throws IOException {
+      if (tombstones == null) {
+        tombstones = loadTargetRowTombstones(rowKey, targetHTable);
+      }
+      return tombstones;
+    }
+
     void flush(List<Put> pendingPuts, List<Delete> pendingDeletes) {
       if (put != null) {
         pendingPuts.add(put);
@@ -826,6 +923,149 @@ private static final class CellDriftCounts {
     }
   }
 
+  /**
+   * Per-row index of target's tombstones AND Puts in {@code [fromTime, MAX_VALUE]}, built
+   * lazily from a single raw single-row scan with all-versions enabled. Used in two roles:
+   *
+   * <ol>
+   *   <li><b>Shadow detection</b> ({@link #wouldShadow}): would a source Put we're about
+   *       to mirror be suppressed by an existing target tombstone?</li>
+   *   <li><b>Hidden-version discovery</b> ({@link #targetPutTimestampsBetween}): what
+   *       max-versions-filtered target Puts sit between source's max ts at a column and
+   *       target's visible Put? Used in the {@code cmp > 0} tombstoning path to issue
+   *       point Deletes for hidden versions that would otherwise surface on read after
+   *       the visible Put is shadowed.</li>
+   * </ol>
+   *
+   * HBase has four tombstone subtypes, each with distinct shadow semantics:
+   *   Delete                — shadows a Put at {@code (cf, q, ts == T)} exactly
+   *   DeleteColumn          — shadows Puts at {@code (cf, q, ts <= T)}
+   *   DeleteFamily          — shadows Puts at {@code (cf, *, ts <= T)}
+   *   DeleteFamilyVersion   — shadows Puts at {@code (cf, *, ts == T)}
+   * {@link #wouldShadow(Cell)} consults all four indices and returns true on any match.
+   */
+  private static final class TargetRowTombstones {
+    private final Map<ColumnKey, Set<Long>> deletePointTs = new HashMap<>();
+    private final Map<ColumnKey, Long> deleteColumnUpperBound = new HashMap<>();
+    private final Map<ByteBuffer, Long> deleteFamilyUpperBound = new HashMap<>();
+    private final Map<ByteBuffer, Set<Long>> deleteFamilyVersionTs = new HashMap<>();
+    /** Per-column ts-ordered set of target's Put timestamps. */
+    private final Map<ColumnKey, NavigableMap<Long, Boolean>> targetPutTs = new HashMap<>();
+
+    void record(Cell cell) {
+      if (CellUtil.isDelete(cell)) {
+        recordTombstone(cell);
+      } else {
+        targetPutTs.computeIfAbsent(columnKey(cell), k -> new TreeMap<>())
+          .put(cell.getTimestamp(), Boolean.TRUE);
+      }
+    }
+
+    private void recordTombstone(Cell tombstone) {
+      long ts = tombstone.getTimestamp();
+      ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(tombstone));
+      switch (tombstone.getType()) {
+        case Delete:
+          deletePointTs.computeIfAbsent(columnKey(tombstone), k -> new HashSet<>()).add(ts);
+          break;
+        case DeleteColumn:
+          deleteColumnUpperBound.merge(columnKey(tombstone), ts, Math::max);
+          break;
+        case DeleteFamily:
+          deleteFamilyUpperBound.merge(family, ts, Math::max);
+          break;
+        case DeleteFamilyVersion:
+          deleteFamilyVersionTs.computeIfAbsent(family, k -> new HashSet<>()).add(ts);
+          break;
+        default:
+          // Caller filters via CellUtil.isDelete; non-tombstone cells should never reach here.
+      }
+    }
+
+    /** Returns true if any tombstone in this index would shadow a Put at the cell's coords. */
+    boolean wouldShadow(Cell sourcePut) {
+      long ts = sourcePut.getTimestamp();
+      ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(sourcePut));
+      ColumnKey column = columnKey(sourcePut);
+
+      Set<Long> pointTs = deletePointTs.get(column);
+      if (pointTs != null && pointTs.contains(ts)) {
+        return true;
+      }
+      Long deleteColTs = deleteColumnUpperBound.get(column);
+      if (deleteColTs != null && ts <= deleteColTs) {
+        return true;
+      }
+      Long deleteFamTs = deleteFamilyUpperBound.get(family);
+      if (deleteFamTs != null && ts <= deleteFamTs) {
+        return true;
+      }
+      Set<Long> dfvTs = deleteFamilyVersionTs.get(family);
+      return dfvTs != null && dfvTs.contains(ts);
+    }
+
+    /**
+     * Returns target's Put timestamps at {@code (cf, q)} that are strictly greater than
+     * {@code lowerExclusive} and strictly less than {@code upperExclusive}. Used to find
+     * hidden (max-versions-filtered) target versions sitting between source's max ts and
+     * target's visible ts so they can be point-Deleted.
+     */
+    Set<Long> targetPutTimestampsBetween(byte[] family, byte[] qualifier, long lowerExclusive,
+      long upperExclusive) {
+      NavigableMap<Long, Boolean> tss = targetPutTs.get(new ColumnKey(family, qualifier));
+      if (tss == null) {
+        return Collections.emptySet();
+      }
+      return tss.subMap(lowerExclusive, false, upperExclusive, false).keySet();
+    }
+
+    private static ColumnKey columnKey(Cell cell) {
+      return new ColumnKey(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell));
+    }
+  }
+
+  /** Composite (family, qualifier) key with byte-array equality semantics. */
+  private static final class ColumnKey {
+    private final byte[] family;
+    private final byte[] qualifier;
+
+    ColumnKey(byte[] family, byte[] qualifier) {
+      this.family = family;
+      this.qualifier = qualifier;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof ColumnKey)) {
+        return false;
+      }
+      ColumnKey other = (ColumnKey) o;
+      return Bytes.equals(family, other.family) && Bytes.equals(qualifier, other.qualifier);
+    }
+
+    @Override
+    public int hashCode() {
+      return Bytes.hashCode(family) * 31 + Bytes.hashCode(qualifier);
+    }
+  }
+
+  /**
+   * Result of {@link #diffCellsForRow}. Carries the cell-level drift counts plus a flag
+   * indicating whether any source-side mirror was suppressed because target tombstones
+   * would shadow it — making the row partially or fully unrepairable.
+   */
+  private static final class RowDiffOutcome {
+    static final RowDiffOutcome NONE = new RowDiffOutcome(CellDriftCounts.NONE, false);
+
+    final CellDriftCounts cells;
+    final boolean rowCannotRepair;
+
+    RowDiffOutcome(CellDriftCounts cells, boolean rowCannotRepair) {
+      this.cells = cells;
+      this.rowCannotRepair = rowCannotRepair;
+    }
+  }
+
   /**
    * Per-chunk aggregate of all six drift counters: three row-level (whole rows missing /
    * extra on target, plus rows that cannot be repaired because target's row is entirely
@@ -885,48 +1125,135 @@ String toLogString() {
    * exact subtype (Delete / DeleteColumn / DeleteFamily / DeleteFamilyVersion). Required under
    * {@code --raw-scan}: {@link Put#add(Cell)} rejects non-Put cells.
    */
-  private void mirrorSourceCell(Cell cell, RowRepairMutations rowMutations) throws IOException {
+  private void mirrorSourceCell(Cell cell, RowRepairState rowState) throws IOException {
     if (CellUtil.isDelete(cell)) {
-      rowMutations.delete().add(cell);
+      rowState.delete().add(cell);
     } else {
-      rowMutations.put().add(cell);
+      rowState.put().add(cell);
     }
   }
 
   /**
-   * Tombstones a target-only cell at its exact timestamp via {@code addColumn}. Skips cells
-   * that are themselves already tombstones: HBase has no API to remove a tombstone cell —
-   * tombstones can only be reaped by major compaction once they age past the keep-deleted-
-   * cells window. Issuing another Delete at the same coordinates writes a duplicate marker,
-   * does not change the row's effective state, and only adds compaction load. Combined with
-   * the absence of a source-side counterpart to mirror, the right action is to leave the
-   * existing tombstone untouched. The repair scan's time range {@code [fromTime, toTime]}
-   * guarantees cells outside the window are never read (and therefore never deleted).
+   * Mirrors a source cell onto target only if no existing target tombstone would shadow
+   * the resulting Put. Lazily loads the target row's tombstone index on first Put cell
+   * (one raw single-row scan per row at most) via {@link RowRepairState#tombstones}. If
+   * the cell would be shadowed, the mirror is suppressed, {@code rowState.anyCellUnrepairable}
+   * is set, and the caller is expected to record {@link SyncCounters#ROWS_CANNOT_REPAIR}.
+   *
+   * Shadow detection only applies to Put cells. Tombstone source cells (under
+   * {@code --raw-scan}) bypass the check and always mirror via {@link #mirrorSourceCell}:
+   * {@link TargetRowTombstones#wouldShadow} is defined for live cells (does an existing target
+   * tombstone hide a new Put on read), and is not meaningful for delete-marker cells —
+   * mirroring a tombstone over an existing tombstone is a benign duplicate, not a suppression.
+   *
+   * @return {@code true} if the cell was mirrored, {@code false} if suppressed by shadowing.
+   */
+  private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
+      RowRepairState rowState) throws IOException {
+    if (!CellUtil.isDelete(cell) && rowState.targetRowTombstones(targetHTable).wouldShadow(cell)) {
+      rowState.anyCellUnrepairable = true;
+      return false;
+    }
+    mirrorSourceCell(cell, rowState);
+    return true;
+  }
+
+  /**
+   * Tombstones a target-only cell to make target's read view at this column match source's.
+   * Skips cells that are themselves already tombstones: HBase has no API to remove a
+   * tombstone cell — tombstones can only be reaped by major compaction once they age past
+   * the keep-deleted-cells window. Issuing another Delete at the same coordinates writes a
+   * duplicate marker, does not change the row's effective state, and only adds compaction
+   * load. Combined with the absence of a source-side counterpart to mirror, the right
+   * action is to leave the existing tombstone untouched.
+   *
+   * Tombstone subtype depends on what source has at this {@code (cf, q)}:
+   * <ul>
+   *   <li><b>Source has no cell at this column</b> — target's read at this column should
+   *       be empty. Use {@link Delete#addColumns(byte[], byte[], long)} (DeleteColumn,
+   *       scope {@code ts <= T}) so even max-versions-hidden older target versions are
+   *       shadowed. A point Delete would only shadow this exact ts, surfacing the next
+   *       hidden version on read.</li>
+   *   <li><b>Source's max ts at this column is >= target's ts</b> — point-Delete only
+   *       target's exact ts. Source's equal-or-higher ts mirror will surface; no hidden
+   *       version can surface above it.</li>
+   *   <li><b>Source's max ts at this column is &lt; target's ts</b> — point-Delete target's
+   *       ts, AND point-Delete every max-versions-hidden Put on target with ts in
+   *       {@code (sourceMaxTs, targetTs)}. Otherwise after target's visible cell is
+   *       shadowed, the next hidden version surfaces above source's mirror — silent
+   *       divergence. Hidden versions are discovered via the row's tombstone+Put index
+   *       loaded from a single raw all-versions scan.</li>
+   * </ul>
+   *
+   * Pre-build {@code sourceMaxTsByColumn} once per row from the source cell array.
    *
    * @return true if the cell was a live cell that contributed a tombstone marker, false if
    *         the cell was already a tombstone and was skipped.
    */
-  private boolean tombstoneTargetCell(Cell cell, RowRepairMutations rowMutations) {
+  private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairState rowState,
+    Map<ColumnKey, Long> sourceMaxTsByColumn) throws IOException {
     if (CellUtil.isDelete(cell)) {
       return false;
     }
-    rowMutations.delete().addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell),
-      cell.getTimestamp());
+    byte[] family = CellUtil.cloneFamily(cell);
+    byte[] qualifier = CellUtil.cloneQualifier(cell);
+    long ts = cell.getTimestamp();
+    Long sourceMaxTs = sourceMaxTsByColumn.get(new ColumnKey(family, qualifier));
+    if (sourceMaxTs == null) {
+      // Source has no cell at this column; shadow ts <= T so hidden older versions
+      // don't surface on read.
+      rowState.delete().addColumns(family, qualifier, ts);
+    } else if (sourceMaxTs >= ts) {
+      // Source's mirror is at >= target's ts; point-Delete only target's exact ts.
+      rowState.delete().addColumn(family, qualifier, ts);
+    } else {
+      // Source's max ts at this column < target's ts. Point-Delete target's visible cell,
+      // AND point-Delete every hidden target Put in (sourceMaxTs, ts) so they don't
+      // surface above source's mirror after the visible cell is shadowed.
+      rowState.delete().addColumn(family, qualifier, ts);
+      Set<Long> hiddenTs = rowState.targetRowTombstones(targetHTable)
+        .targetPutTimestampsBetween(family, qualifier, sourceMaxTs, ts);
+      for (Long hidden : hiddenTs) {
+        rowState.delete().addColumn(family, qualifier, hidden);
+      }
+    }
     return true;
   }
 
+  /**
+   * Outcome of {@link #mirrorWholeRow}. {@code FULLY_MIRRORED} = every source cell mirrored
+   * onto target with no shadowing. {@code PARTIALLY_MIRRORED} = at least one cell mirrored,
+   * at least one suppressed by an existing target tombstone (caller bumps both
+   * ROWS_MISSING_ON_TARGET and ROWS_CANNOT_REPAIR). {@code FULLY_SHADOWED} = every source
+   * cell suppressed; the row is unrepairable until target's tombstones are reaped (caller
+   * bumps only ROWS_CANNOT_REPAIR).
+   */
+  private enum WholeRowMirrorOutcome {
+    FULLY_MIRRORED, PARTIALLY_MIRRORED, FULLY_SHADOWED
+  }
+
   /**
    * Mirrors every source cell of a row that is missing on target. Source cells route by
    * type: live cells to a Put, tombstone cells (under {@code --raw-scan}) to a Delete via
-   * {@link Delete#add(Cell)}.
+   * {@link Delete#add(Cell)}. Each cell is shadow-checked against existing target
+   * tombstones (lazy raw single-row scan on first cell) — even though target's filtered scan
+   * returned no cells for this row, target may carry tombstones that would shadow our Put.
    */
-  private void mirrorWholeRow(Result sourceResult, List<Put> pendingPuts,
-    List<Delete> pendingDeletes) throws IOException {
-    RowRepairMutations rowMutations = new RowRepairMutations(sourceResult.getRow());
+  private WholeRowMirrorOutcome mirrorWholeRow(Result sourceResult, Table targetHTable,
+    List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
+    RowRepairState rowState = new RowRepairState(sourceResult.getRow());
+    int mirrored = 0;
     for (Cell cell : sourceResult.rawCells()) {
-      mirrorSourceCell(cell, rowMutations);
+      if (mirrorSourceCellUnlessShadowed(cell, targetHTable, rowState)) {
+        mirrored++;
+      }
     }
-    rowMutations.flush(pendingPuts, pendingDeletes);
+    rowState.flush(pendingPuts, pendingDeletes);
+    if (mirrored == 0) {
+      return WholeRowMirrorOutcome.FULLY_SHADOWED;
+    }
+    return rowState.anyCellUnrepairable ? WholeRowMirrorOutcome.PARTIALLY_MIRRORED
+      : WholeRowMirrorOutcome.FULLY_MIRRORED;
   }
 
   /**
@@ -938,40 +1265,75 @@ private void mirrorWholeRow(Result sourceResult, List<Put> pendingPuts,
    *         the row was already entirely tombstones — repair could not act on it, and the
    *         caller should record this as {@link SyncCounters#ROWS_CANNOT_REPAIR}.
    */
-  private int tombstoneWholeRow(Result targetResult, List<Put> pendingPuts,
-    List<Delete> pendingDeletes) {
-    RowRepairMutations rowMutations = new RowRepairMutations(targetResult.getRow());
+  private int tombstoneWholeRow(Result targetResult, Table targetHTable,
+    List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
+    RowRepairState rowState = new RowRepairState(targetResult.getRow());
+    // Source has no row at all here, so source has no cell at any column — every target
+    // cell hits the "shadow ts <= T" path inside tombstoneTargetCell.
+    Map<ColumnKey, Long> sourceMaxTsByColumn = Collections.emptyMap();
     int liveCellsTombstoned = 0;
     for (Cell cell : targetResult.rawCells()) {
-      if (tombstoneTargetCell(cell, rowMutations)) {
+      if (tombstoneTargetCell(cell, targetHTable, rowState, sourceMaxTsByColumn)) {
         liveCellsTombstoned++;
       }
     }
-    rowMutations.flush(pendingPuts, pendingDeletes);
+    rowState.flush(pendingPuts, pendingDeletes);
     return liveCellsTombstoned;
   }
 
   /**
    * Diffs cells of two rows present on both clusters in lock-step using {@link CellComparator}
    * order and appends the resulting {@link Put}/{@link Delete} mutations (if any) to the
-   * pending lists. Returns a {@link CellDriftCounts} classifying the cell-level drift:
+   * pending lists. Returns a {@link RowDiffOutcome} carrying both the cell-level drift
+   * counts and a flag indicating whether any source-side mirror was suppressed by target
+   * tombstone shadowing — letting the caller decide whether to bump
+   * {@link SyncCounters#ROWS_CANNOT_REPAIR}.
    *
+   * Branches:
    *   same coords + matching value         → no drift, no signal
-   *   same coords + different value        → different++; mirror source cell
-   *   source-only cell at unique coords    → missing++;   mirror source cell
+   *   same coords + different value        → different++; mirror source cell (shadow-checked)
+   *   source-only cell at unique coords    → missing++;   mirror source cell (shadow-checked)
    *   target-only live cell at unique coords → extra++;   tombstone target cell
    *   target-only tombstone cell           → skip (HBase cannot remove tombstones)
+   *
+   * Cells whose mirror is suppressed by shadowing do NOT bump the cell counter — the
+   * cell wasn't written to target, so it isn't repaired drift. The row-level shadow
+   * signal is surfaced via {@link RowDiffOutcome#rowCannotRepair}.
    */
-  private CellDriftCounts diffCellsForRow(Result sourceResult, Result targetResult,
-    List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
+  private RowDiffOutcome diffCellsForRow(Result sourceResult, Result targetResult,
+    Table targetHTable, List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
     Cell[] sourceCells = sourceResult.rawCells();
     Cell[] targetCells = targetResult.rawCells();
     CellComparator comparator = CellComparator.getInstance();
 
-    RowRepairMutations rowMutations = new RowRepairMutations(sourceResult.getRow());
-    int missing = 0;
-    int extra = 0;
-    int different = 0;
+    // Always use the lazy raw single-row fetch path for shadow detection. Even under
+    // --raw-scan, the merge-scan's targetCells came from a time-range-filtered scan and
+    // would not surface tombstones at ts > toTime — and those out-of-window tombstones
+    // can still shadow Puts we mirror inside the window during application reads. The
+    // lazy fetch in {@link RowRepairState#tombstones} loads tombstones at ts >= fromTime
+    // regardless of upper bound, catching that case correctly.
+    RowRepairState rowState = new RowRepairState(sourceResult.getRow());
+
+    // Pre-compute the set of (cf, q) coordinates source has any cell at. tombstoneTargetCell
+    // uses this to choose between point-Delete (when source has the column at some ts —
+    // mirrored cell will surface on read) and DeleteColumn (when source has nothing at the
+    // column — must shadow ts <= T to keep max-versions-hidden older versions invisible.
+    // The map records source's max ts at each column. When tombstoning a target cell at a
+    // column source has at lower ts, hidden target Puts in (sourceMax, targetTs) need
+    // their own point Deletes too — otherwise after the visible cell is shadowed, a hidden
+    // version surfaces above source's mirror.
+    Map<ColumnKey, Long> sourceMaxTsByColumn = new HashMap<>();
+    for (Cell sourceCell : sourceCells) {
+      if (!CellUtil.isDelete(sourceCell)) {
+        ColumnKey key = new ColumnKey(CellUtil.cloneFamily(sourceCell),
+          CellUtil.cloneQualifier(sourceCell));
+        sourceMaxTsByColumn.merge(key, sourceCell.getTimestamp(), Math::max);
+      }
+    }
+
+    int cellMissing = 0;
+    int cellExtra = 0;
+    int cellDifferent = 0;
 
     int sourceIdx = 0;
     int targetIdx = 0;
@@ -980,48 +1342,119 @@ private CellDriftCounts diffCellsForRow(Result sourceResult, Result targetResult
       if (cmp == 0) {
         // Same coordinates; CellComparator does not compare values, check separately.
         if (!CellUtil.matchingValue(sourceCells[sourceIdx], targetCells[targetIdx])) {
-          mirrorSourceCell(sourceCells[sourceIdx], rowMutations);
-          different++;
+          if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) {
+            cellDifferent++;
+          }
         }
         sourceIdx++;
         targetIdx++;
       } else if (cmp < 0) {
-        mirrorSourceCell(sourceCells[sourceIdx++], rowMutations);
-        missing++;
-      } else if (tombstoneTargetCell(targetCells[targetIdx++], rowMutations)) {
-        extra++;
+        if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) {
+          cellMissing++;
+        }
+        sourceIdx++;
+      } else {
+        // Target-only cell. Live cell → tombstone it (cellExtra++). Tombstone cell →
+        // can't act on it (HBase has no API to remove tombstones), so the row carries
+        // unrepairable drift and re-verify will mismatch.
+        if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowState,
+          sourceMaxTsByColumn)) {
+          cellExtra++;
+        } else {
+          rowState.anyCellUnrepairable = true;
+        }
       }
     }
     while (sourceIdx < sourceCells.length) {
-      mirrorSourceCell(sourceCells[sourceIdx++], rowMutations);
-      missing++;
+      if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) {
+        cellMissing++;
+      }
+      sourceIdx++;
     }
     while (targetIdx < targetCells.length) {
-      if (tombstoneTargetCell(targetCells[targetIdx++], rowMutations)) {
-        extra++;
+      if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowState,
+        sourceMaxTsByColumn)) {
+        cellExtra++;
+      } else {
+        rowState.anyCellUnrepairable = true;
       }
     }
 
-    if (missing == 0 && extra == 0 && different == 0) {
-      return CellDriftCounts.NONE;
+    if (cellMissing == 0 && cellExtra == 0 && cellDifferent == 0 && !rowState.anyCellUnrepairable) {
+      return RowDiffOutcome.NONE;
     }
-    rowMutations.flush(pendingPuts, pendingDeletes);
-    return new CellDriftCounts(missing, extra, different);
+    rowState.flush(pendingPuts, pendingDeletes);
+    return new RowDiffOutcome(new CellDriftCounts(cellMissing, cellExtra, cellDifferent),
+      rowState.anyCellUnrepairable);
   }
 
   /**
-   * Flushes the accumulated Put and Delete batches to the target HTable and clears both
-   * lists. Called every {@code repairBatchSize} rows and once more at the end of a chunk.
+   * Flushes the accumulated Put and Delete batches to the target HTable as a single mixed
+   * RPC and clears both lists. Called every {@code repairBatchSize} rows and once more at
+   * the end of a chunk.
+   *
+   * Issuing both Puts and Deletes via {@link Table#batch} (one network round-trip) instead
+   * of separate {@code put()} + {@code delete()} calls (two round-trips) eliminates the
+   * inter-RPC failure window where a JVM/regionserver crash between the two would leave
+   * target with Puts applied but matching Deletes not yet tombstoned. Server-side, batch
+   * mutations are still applied per-row sequentially, so a regionserver crash mid-batch
+   * can still leave partial application — but the mid-flush gap on the client side is
+   * gone.
+   *
+   * {@link InterruptedException} from {@code batch} is converted to {@link IOException}
+   * so the existing per-chunk catch path treats interruption like any other transient
+   * write failure (chunk → REPAIR_FAILED, retry on next invocation). The interrupt flag is
+   * restored so an outer cancellation still observes the interrupted state.
    */
   private void flushRepairMutations(Table targetHTable, List<Put> puts, List<Delete> deletes)
     throws IOException {
-    if (!puts.isEmpty()) {
-      targetHTable.put(puts);
-      puts.clear();
+    if (puts.isEmpty() && deletes.isEmpty()) {
+      return;
     }
-    if (!deletes.isEmpty()) {
-      targetHTable.delete(deletes);
-      deletes.clear();
+    List<Row> mutations = new ArrayList<>(puts.size() + deletes.size());
+    mutations.addAll(puts);
+    mutations.addAll(deletes);
+    Object[] results = new Object[mutations.size()];
+    try {
+      targetHTable.batch(mutations, results);
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      throw new IOException("Interrupted while flushing repair mutations", e);
+    }
+    puts.clear();
+    deletes.clear();
+  }
+
+  /**
+   * Writes a chunk-level checkpoint row and bumps the matching outcome counter, as a single
+   * "this attempt is recorded" unit. The outcome counter is bumped only after a successful
+   * checkpoint write, so on-disk audit and in-memory counters stay in sync.
+   *
+   * If the checkpoint write throws {@link SQLException}, the failure is logged and the
+   * {@link SyncCounters#CHECKPOINT_WRITE_FAILED} counter is bumped, but the exception is
+   * NOT propagated. Reasons:
+   * <ul>
+   *   <li>Target's data was already mutated during the merge — failing the mapper task
+   *       wouldn't roll that back, and would trigger a MapReduce retry that re-verifies
+   *       against already-mutated target state (audit trail loss).</li>
+   *   <li>Other chunks in this mapper still deserve a chance to be processed.</li>
+   *   <li>The {@code CHECKPOINT_WRITE_FAILED} counter surfaces the audit-row gap to
+   *       operators and drives a non-zero exit at job end.</li>
+   * </ul>
+   */
+  private void writeChunkCheckpoint(PhoenixSyncTableCheckpointOutputRow row,
+    SyncCounters outcomeCounter, Context context) {
+    try {
+      syncTableOutputRepository.checkpointSyncTableResult(row);
+      context.getCounter(outcomeCounter).increment(1);
+    } catch (SQLException e) {
+      LOGGER.error(
+        "Failed to write {} checkpoint for chunk source=[{}, {}] on table {}: target data "
+          + "was mutated during the merge, but no checkpoint row will exist for this chunk. "
+          + "CHECKPOINT_WRITE_FAILED counter is incremented; mapper continues.",
+        row.getStatus(), Bytes.toStringBinary(row.getStartRowKey()),
+        Bytes.toStringBinary(row.getEndRowKey()), tableName, e);
+      context.getCounter(SyncCounters.CHECKPOINT_WRITE_FAILED).increment(1);
     }
   }
 
@@ -1093,16 +1526,30 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar
 
         // Drift signals are bumped at the branch that semantically caused them: row-level
         // signals at the cmp != 0 branches, cell-level signals at the cmp == 0 branch.
+        // ROWS_CANNOT_REPAIR is bumped whenever any source mirror is suppressed by an
+        // existing target tombstone (or the cmp > 0 row was already entirely tombstones).
         if (cmp == 0) {
           // Same row key on both clusters — diff at cell level and repair only if cells differ.
-          driftCounters.addCellDrift(
-            diffCellsForRow(sourceResult, targetResult, pendingPuts, pendingDeletes));
+          RowDiffOutcome outcome = diffCellsForRow(sourceResult, targetResult, targetHTable,
+            pendingPuts, pendingDeletes);
+          driftCounters.addCellDrift(outcome.cells);
+          if (outcome.rowCannotRepair) {
+            driftCounters.rowsCannotRepair++;
+          }
           sourceResult = sourceScanner.next();
           targetResult = targetScanner.next();
         } else if (cmp < 0) {
-          // Source-only row — mirror it onto target.
-          mirrorWholeRow(sourceResult, pendingPuts, pendingDeletes);
-          driftCounters.rowsMissingOnTarget++;
+          // Source-only row — mirror it onto target. Even though target's filtered scan
+          // returned no row at this key, target may carry tombstones that would shadow
+          // some or all of the Puts.
+          WholeRowMirrorOutcome outcome =
+            mirrorWholeRow(sourceResult, targetHTable, pendingPuts, pendingDeletes);
+          if (outcome != WholeRowMirrorOutcome.FULLY_SHADOWED) {
+            driftCounters.rowsMissingOnTarget++;
+          }
+          if (outcome != WholeRowMirrorOutcome.FULLY_MIRRORED) {
+            driftCounters.rowsCannotRepair++;
+          }
           sourceResult = sourceScanner.next();
         } else {
           // Target-only row — tombstone its live cells. If the row is already entirely
@@ -1110,7 +1557,7 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar
           // major compaction reaps them) — record as ROWS_CANNOT_REPAIR so operators can
           // see the unrepairable drift volume.
           int liveCellsTombstoned =
-            tombstoneWholeRow(targetResult, pendingPuts, pendingDeletes);
+            tombstoneWholeRow(targetResult, targetHTable, pendingPuts, pendingDeletes);
           if (liveCellsTombstoned == 0) {
             driftCounters.rowsCannotRepair++;
           } else {
@@ -1133,42 +1580,48 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar
       LOGGER.error("Repair failed for chunk source=[{}, {}] on table {}: {}",
         Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), tableName,
         e.getMessage(), e);
-      context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).increment(1);
 
       Timestamp failedAt = new Timestamp(System.currentTimeMillis());
       // Capture partial progress in the COUNTERS column for triage.
       String failedCounters =
         driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows);
-      syncTableOutputRepository
-        .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
-          .setTableName(tableName).setTargetCluster(targetZkQuorum)
-          .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime)
-          .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun)
-          .setStartRowKey(sourceStart).setEndRowKey(sourceEnd)
-          .setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)
-          .setExecutionStartTime(verifyStartTime).setExecutionEndTime(failedAt)
-          .setCounters(failedCounters).build());
+      writeChunkCheckpoint(new PhoenixSyncTableCheckpointOutputRow.Builder()
+        .setTableName(tableName).setTargetCluster(targetZkQuorum)
+        .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime)
+        .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun)
+        .setStartRowKey(sourceStart).setEndRowKey(sourceEnd)
+        .setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)
+        .setExecutionStartTime(verifyStartTime).setExecutionEndTime(failedAt)
+        .setCounters(failedCounters).build(), SyncCounters.CHUNKS_REPAIR_FAILED, context);
       return;
     }
 
     driftCounters.commitTo(context);
-    context.getCounter(SyncCounters.CHUNKS_REPAIRED).increment(1);
+    // Chunk transitions to UNREPAIRABLE if any row landed in ROWS_CANNOT_REPAIR — operator
+    // intervention (typically major compaction on target to reap shadowing tombstones) is
+    // needed before re-running the tool. Otherwise REPAIRED.
+    boolean unrepairable = driftCounters.rowsCannotRepair > 0;
+    PhoenixSyncTableCheckpointOutputRow.Status status = unrepairable
+      ? PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE
+      : PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED;
 
     Timestamp repairEndTime = new Timestamp(System.currentTimeMillis());
     String repairCounters =
       driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows);
 
-    syncTableOutputRepository
-      .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
-        .setTableName(tableName).setTargetCluster(targetZkQuorum)
-        .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime)
-        .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun).setStartRowKey(sourceStart)
-        .setEndRowKey(sourceEnd).setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED)
-        .setExecutionStartTime(verifyStartTime).setExecutionEndTime(repairEndTime)
-        .setCounters(repairCounters).build());
-
-    LOGGER.info("Completed repair for chunk source=[{}, {}]: {}",
-      Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd),
+    // Write checkpoint first; outcome counter is bumped inside writeChunkCheckpoint only
+    // on a successful write so the audit row and the counter stay consistent.
+    writeChunkCheckpoint(new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setTableName(tableName).setTargetCluster(targetZkQuorum)
+      .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime)
+      .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun).setStartRowKey(sourceStart)
+      .setEndRowKey(sourceEnd).setStatus(status)
+      .setExecutionStartTime(verifyStartTime).setExecutionEndTime(repairEndTime)
+      .setCounters(repairCounters).build(),
+      unrepairable ? SyncCounters.CHUNKS_UNREPAIRABLE : SyncCounters.CHUNKS_REPAIRED, context);
+
+    LOGGER.info("Completed repair for chunk source=[{}, {}] with status={}: {}",
+      Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), status,
       driftCounters.toLogString());
   }
 
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
index 8ec66abf7ca..b939a064472 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
@@ -415,6 +415,8 @@ private boolean submitPhoenixSyncTableJob() throws Exception {
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_MISMATCHED).getValue();
       long repairedMappers =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIRED).getValue();
+      long unrepairableMappers = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_UNREPAIRABLE).getValue();
       long repairFailedMappers = counters
         .findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIR_FAILED).getValue();
       long chunksVerified =
@@ -423,6 +425,8 @@ private boolean submitPhoenixSyncTableJob() throws Exception {
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_MISMATCHED).getValue();
       long chunksRepaired =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIRED).getValue();
+      long chunksUnrepairable = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_UNREPAIRABLE).getValue();
       long chunksRepairFailed = counters
         .findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
       long sourceRowsProcessed =
@@ -441,19 +445,35 @@ private boolean submitPhoenixSyncTableJob() throws Exception {
         .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
       long cellsDifferentOnTarget = counters
         .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue();
+      long checkpointWriteFailed = counters
+        .findCounter(PhoenixSyncTableMapper.SyncCounters.CHECKPOINT_WRITE_FAILED).getValue();
       LOGGER.info(
         "PhoenixSyncTable job completed, gathered counters are \n Task Created: {}, \n Verified Mappers: {}, \n"
-          + "Mismatched Mappers: {}, \n Repaired Mappers: {}, \n Repair Failed Mappers: {}, \n"
+          + "Mismatched Mappers: {}, \n Repaired Mappers: {}, \n Unrepairable Mappers: {}, \n"
+          + "Repair Failed Mappers: {}, \n"
           + "Chunks Verified: {}, \n Chunks Mismatched: {}, \n Chunks Repaired: {}, \n"
-          + "Chunks Repair Failed: {}, \n Source Rows Processed: {}, \n Target Rows Processed: {}, \n"
+          + "Chunks Unrepairable: {}, \n Chunks Repair Failed: {}, \n"
+          + "Source Rows Processed: {}, \n Target Rows Processed: {}, \n"
           + "Rows Missing On Target: {}, \n Rows Extra On Target: {}, \n"
           + "Rows Cannot Repair: {}, \n"
           + "Cells Missing On Target: {}, \n Cells Extra On Target: {}, \n"
-          + "Cells Different On Target: {}",
-        taskCreated, verifiedMappers, mismatchedMappers, repairedMappers, repairFailedMappers,
-        chunksVerified, chunksMismatched, chunksRepaired, chunksRepairFailed, sourceRowsProcessed,
-        targetRowsProcessed, rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair,
-        cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget);
+          + "Cells Different On Target: {}, \n"
+          + "Checkpoint Write Failed: {}",
+        taskCreated, verifiedMappers, mismatchedMappers, repairedMappers, unrepairableMappers,
+        repairFailedMappers, chunksVerified, chunksMismatched, chunksRepaired, chunksUnrepairable,
+        chunksRepairFailed, sourceRowsProcessed, targetRowsProcessed, rowsMissingOnTarget,
+        rowsExtraOnTarget, rowsCannotRepair, cellsMissingOnTarget, cellsExtraOnTarget,
+        cellsDifferentOnTarget, checkpointWriteFailed);
+      if (checkpointWriteFailed > 0) {
+        LOGGER.error(
+          "{} chunk(s) had a successful repair attempt but FAILED to write a checkpoint row "
+            + "for table {}. Target data was mutated but the audit trail is incomplete. "
+            + "Investigate the checkpoint table state before relying on it; affected chunks "
+            + "will be re-attempted on the next invocation since they have no terminal "
+            + "checkpoint status.",
+          checkpointWriteFailed, qTable);
+        return false;
+      }
     } else {
       LOGGER.warn("Unable to retrieve job counters for table {} - job may have failed "
         + "during initialization", qTable);

From b204914773f976e01d507b7566adc9b3bfaf7c9c Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Wed, 3 Jun 2026 20:19:35 +0530
Subject: [PATCH 09/18] checkpointing test implementation

---
 .../PhoenixSyncTableCheckpointOutputRow.java  |   95 +-
 .../PhoenixSyncTableChunkRepairer.java        | 1015 +++++++++++++++++
 .../mapreduce/PhoenixSyncTableMapper.java     |  977 ++--------------
 3 files changed, 1163 insertions(+), 924 deletions(-)
 create mode 100644 phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java

diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
index 86574326ad2..ef3abbbeb55 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
@@ -146,12 +146,14 @@ public String getCounters() {
 
   @VisibleForTesting
   public long getSourceRowsProcessed() {
-    return CounterFormatter.parseSourceRows(counters);
+    return CounterFormatter.parseCounterValue(counters,
+      PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name());
   }
 
   @VisibleForTesting
   public long getTargetRowsProcessed() {
-    return CounterFormatter.parseTargetRows(counters);
+    return CounterFormatter.parseCounterValue(counters,
+      PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name());
   }
 
   /**
@@ -160,49 +162,25 @@ public long getTargetRowsProcessed() {
    */
   public static class CounterFormatter {
     private static final String FORMAT_CHUNK =
-      "%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d";
-    private static final String FORMAT_MAPPER = "%s=%d,%s=%d,%s=%d,%s=%d";
+      "%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d";
+    private static final String FORMAT_MAPPER =
+      "%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d";
 
     /**
-     * Formats chunk counters as comma-separated key=value pairs. Always emits all eight
-     * counters; for verify-only chunks (no repair) the six drift counters are 0 so
-     * operators querying the checkpoint table see a uniform format.
-     *
-     * Drift signals partition into two layers:
-     *   row-level  — whole row missing or extra on target, or unrepairable (target row is
-     *                entirely tombstones — HBase cannot remove tombstones, only major
-     *                compaction does)
-     *   cell-level — for rows present on both clusters, individual cells missing, extra,
-     *                or differing in value at matching coordinates
-     * The two layers are disjoint per row: a row drift case contributes only to row
-     * counters; a cell drift case contributes only to cell counters.
-     *
-     * @param sourceRows         Source rows processed
-     * @param targetRows         Target rows processed
-     * @param rowsMissingOnTarget Rows present on source but missing on target (0 if not
-     *                            repaired)
-     * @param rowsExtraOnTarget  Rows present on target but missing on source whose live
-     *                           cells repair tombstoned (0 if not repaired)
-     * @param rowsCannotRepair   Rows present on target but missing on source whose contents
-     *                           are entirely tombstones — repair cannot act on them and an
-     *                           operator-driven major compaction is required to make the
-     *                           verifier converge under {@code --raw-scan}
-     * @param cellsMissingOnTarget Cells (across rows present on both sides) that source had
-     *                             at coordinates target lacked (0 if not repaired)
-     * @param cellsExtraOnTarget  Cells (across rows present on both sides) that target had
-     *                            at coordinates source lacked (0 if not repaired)
-     * @param cellsDifferentOnTarget Cells (across rows present on both sides) at matching
-     *                               coordinates whose values differed (0 if not repaired)
-     * @return Formatted string with all eight counters
+     * Formats chunk counters as comma-separated key=value pairs. Always emits all nine
+     * counters; unpopulated counters are 0 so operators querying the checkpoint table see
+     * a uniform format. {@code ROWS_DIFFERENT_ON_TARGET} is populated only in dry-run;
+     * cell-level counters and {@code ROWS_CANNOT_REPAIR} are populated only in repair mode.
      */
     public static String formatChunk(long sourceRows, long targetRows, long rowsMissingOnTarget,
-      long rowsExtraOnTarget, long rowsCannotRepair, long cellsMissingOnTarget,
-      long cellsExtraOnTarget, long cellsDifferentOnTarget) {
+      long rowsExtraOnTarget, long rowsDifferentOnTarget, long rowsCannotRepair,
+      long cellsMissingOnTarget, long cellsExtraOnTarget, long cellsDifferentOnTarget) {
       return String.format(FORMAT_CHUNK,
         PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name(), sourceRows,
         PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows,
         PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET.name(), rowsMissingOnTarget,
         PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET.name(), rowsExtraOnTarget,
+        PhoenixSyncTableMapper.SyncCounters.ROWS_DIFFERENT_ON_TARGET.name(), rowsDifferentOnTarget,
         PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR.name(), rowsCannotRepair,
         PhoenixSyncTableMapper.SyncCounters.CELLS_MISSING_ON_TARGET.name(), cellsMissingOnTarget,
         PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET.name(), cellsExtraOnTarget,
@@ -211,40 +189,27 @@ public static String formatChunk(long sourceRows, long targetRows, long rowsMiss
     }
 
     /**
-     * Formats mapper counters as comma-separated key=value pairs.
-     * @param chunksVerified   Chunks verified count
-     * @param chunksMismatched Chunks mismatched count
-     * @param sourceRows       Source rows processed
-     * @param targetRows       Target rows processed
-     * @return Formatted string with all mapper counters
+     * Formats mapper (region-level) counters as comma-separated key=value pairs. The seven
+     * drift counters are the per-region sum of the same fields emitted by
+     * {@link #formatChunk}.
      */
     public static String formatMapper(long chunksVerified, long chunksMismatched, long sourceRows,
-      long targetRows) {
+      long targetRows, long rowsMissingOnTarget, long rowsExtraOnTarget,
+      long rowsDifferentOnTarget, long rowsCannotRepair, long cellsMissingOnTarget,
+      long cellsExtraOnTarget, long cellsDifferentOnTarget) {
       return String.format(FORMAT_MAPPER,
         PhoenixSyncTableMapper.SyncCounters.CHUNKS_VERIFIED.name(), chunksVerified,
         PhoenixSyncTableMapper.SyncCounters.CHUNKS_MISMATCHED.name(), chunksMismatched,
         PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name(), sourceRows,
-        PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows);
-    }
-
-    /**
-     * Parses SOURCE_ROWS_PROCESSED value from counter string.
-     * @param counters Counter string in format "KEY1=val1,KEY2=val2,..."
-     * @return Source rows processed, or 0 if not found
-     */
-    public static long parseSourceRows(String counters) {
-      return parseCounterValue(counters,
-        PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name());
-    }
-
-    /**
-     * Parses TARGET_ROWS_PROCESSED value from counter string.
-     * @param counters Counter string in format "KEY1=val1,KEY2=val2,..."
-     * @return Target rows processed, or 0 if not found
-     */
-    public static long parseTargetRows(String counters) {
-      return parseCounterValue(counters,
-        PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name());
+        PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows,
+        PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET.name(), rowsMissingOnTarget,
+        PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET.name(), rowsExtraOnTarget,
+        PhoenixSyncTableMapper.SyncCounters.ROWS_DIFFERENT_ON_TARGET.name(), rowsDifferentOnTarget,
+        PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR.name(), rowsCannotRepair,
+        PhoenixSyncTableMapper.SyncCounters.CELLS_MISSING_ON_TARGET.name(), cellsMissingOnTarget,
+        PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET.name(), cellsExtraOnTarget,
+        PhoenixSyncTableMapper.SyncCounters.CELLS_DIFFERENT_ON_TARGET.name(),
+        cellsDifferentOnTarget);
     }
 
     /**
@@ -253,7 +218,7 @@ public static long parseTargetRows(String counters) {
      * @param counterName Name of the counter to extract
      * @return Counter value, or 0 if not found
      */
-    private static long parseCounterValue(String counters, String counterName) {
+    public static long parseCounterValue(String counters, String counterName) {
       if (counters == null || counters.isEmpty()) {
         return 0;
       }
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
new file mode 100644
index 00000000000..6a1da24bc45
--- /dev/null
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
@@ -0,0 +1,1015 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import static org.apache.phoenix.schema.types.PDataType.TRUE_BYTES;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.TreeMap;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Row;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.util.Progressable;
+import org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants;
+import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.schema.PTable;
+import org.apache.phoenix.util.ScanUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Performs row-level repair for a mismatched chunk by merge-scanning source and target
+ * cluster data and applying targeted mutations to target.
+ *
+ * <p>The two scan ranges may differ: the verifier reads target over a wider range than
+ * source (covers extra-on-target rows that fall between consecutive source chunks);
+ * repair must mirror the same boundaries so those extras are visible here as
+ * {@code cmp > 0} rows and get deleted.
+ *
+ * <p>Merge-scan contract: both scanners return rows in ascending key order (HBase guarantee).
+ * <ul>
+ *   <li>{@code cmp == 0} (same row): compare cells; repair only differing cells.</li>
+ *   <li>{@code cmp <  0} (source-only): mirror all source cells onto target.</li>
+ *   <li>{@code cmp >  0} (target-only): tombstone target cells within {@code [fromTime, toTime]}.</li>
+ * </ul>
+ * Cells outside {@code [fromTime, toTime]} are never read (scan time range), so never mutated.
+ *
+ * <p>Tombstone semantics: HBase has four tombstone subtypes ({@code Delete},
+ * {@code DeleteColumn}, {@code DeleteFamily}, {@code DeleteFamilyVersion}). Source Puts
+ * we mirror onto target may be silently shadowed by an existing target tombstone; in that
+ * case the mirror is suppressed and the row carries unrepairable drift (operator must
+ * major-compact target to reap shadowing tombstones before a re-run can converge). See
+ * {@link TargetRowRecord}.
+ */
+public final class PhoenixSyncTableChunkRepairer {
+
+  private static final Logger LOGGER =
+    LoggerFactory.getLogger(PhoenixSyncTableChunkRepairer.class);
+
+  private final Connection sourceConnection;
+  private final Connection targetConnection;
+  private final PTable pTable;
+  private final byte[] physicalTableName;
+  private final long fromTime;
+  private final long toTime;
+  private final boolean isRawScan;
+  private final boolean isReadAllVersions;
+  private final int repairBatchSize;
+  private final String tableName;
+
+  public PhoenixSyncTableChunkRepairer(Connection sourceConnection, Connection targetConnection,
+    PTable pTable, byte[] physicalTableName, String tableName, long fromTime, long toTime,
+    boolean isRawScan, boolean isReadAllVersions, int repairBatchSize) {
+    this.sourceConnection = sourceConnection;
+    this.targetConnection = targetConnection;
+    this.pTable = pTable;
+    this.physicalTableName = physicalTableName;
+    this.tableName = tableName;
+    this.fromTime = fromTime;
+    this.toTime = toTime;
+    this.isRawScan = isRawScan;
+    this.isReadAllVersions = isReadAllVersions;
+    this.repairBatchSize = repairBatchSize;
+  }
+
+  /**
+   * Repairs one mismatched chunk. Returns a {@link ChunkRepairResult} carrying the
+   * terminal status and accumulated {@link DriftCounters}; never throws on per-chunk
+   * scan/flush failure (returns {@link ChunkRepairResult.Status#REPAIR_FAILED}). The
+   * only declared {@link SQLException} surfaces from {@link Connection#unwrap}, which
+   * indicates a misconfigured connection rather than a per-chunk fault.
+   */
+  public ChunkRepairResult repair(ChunkRepairRequest req, Progressable progress)
+    throws SQLException {
+    DriftCounters drift = new DriftCounters();
+
+    LOGGER.info("Starting repair for chunk source=[{}, {}] target={}{}, {}{} on table {}",
+      Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd),
+      req.targetStartInclusive ? "[" : "(", Bytes.toStringBinary(req.targetStart),
+      Bytes.toStringBinary(req.targetEnd), req.targetEndInclusive ? "]" : ")", tableName);
+
+    PhoenixConnection sourcePhoenixConn = sourceConnection.unwrap(PhoenixConnection.class);
+    PhoenixConnection targetPhoenixConn = targetConnection.unwrap(PhoenixConnection.class);
+
+    Scan sourceScan;
+    Scan targetScan;
+    try {
+      sourceScan = createRepairScan(req.sourceStart, req.sourceEnd, true, true, sourcePhoenixConn);
+      targetScan = createRepairScan(req.targetStart, req.targetEnd, req.targetStartInclusive,
+        req.targetEndInclusive, targetPhoenixConn);
+    } catch (IOException e) {
+      LOGGER.error("Repair failed to build scans for chunk source=[{}, {}] on table {}: {}",
+        Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd), tableName,
+        e.getMessage(), e);
+      return ChunkRepairResult.failed(drift, e);
+    }
+
+    try (Table sourceHTable = sourcePhoenixConn.getQueryServices().getTable(physicalTableName);
+      Table targetHTable = targetPhoenixConn.getQueryServices().getTable(physicalTableName);
+      ResultScanner sourceScanner = sourceHTable.getScanner(sourceScan);
+      ResultScanner targetScanner = targetHTable.getScanner(targetScan)) {
+      if (req.dryRun) {
+        walkAndCountDrift(sourceScanner, targetScanner, drift, progress);
+      } else {
+        repairDiffRows(sourceScanner, targetScanner, targetHTable, drift, progress);
+      }
+    } catch (IOException e) {
+      // Per-chunk fault isolation. The mapper marks this chunk REPAIR_FAILED and continues
+      // with the next chunk
+      LOGGER.error("Repair failed for chunk source=[{}, {}] on table {}: {}",
+        Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd), tableName,
+        e.getMessage(), e);
+      return ChunkRepairResult.failed(drift, e);
+    }
+
+    ChunkRepairResult result = ChunkRepairResult.completed(drift);
+    LOGGER.info("Completed repair for chunk source=[{}, {}] with status={}: {}",
+      Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd),
+      result.status, drift.toLogString());
+    return result;
+  }
+
+  /**
+   * Dry-run merge-walk: bumps the three row-level drift counters and logs each diverged row;
+   * never touches target. {@code rowsDifferentOnTarget} flags rows present on both sides whose
+   * contents differ — verifier-only signal, not produced in repair mode (which reports cell
+   * granularity instead).
+   */
+  private void walkAndCountDrift(ResultScanner sourceScanner, ResultScanner targetScanner,
+    DriftCounters drift, Progressable progress) throws IOException {
+    Result sourceResult = sourceScanner.next();
+    Result targetResult = targetScanner.next();
+
+    while (sourceResult != null || targetResult != null) {
+      int cmp = compareRowKeys(sourceResult, targetResult);
+      if (cmp == 0) {
+        if (!rowsEqual(sourceResult, targetResult)) {
+          drift.rowsDifferentOnTarget++;
+          LOGGER.warn("Row different on target for table {} row={}", tableName,
+            Bytes.toStringBinary(sourceResult.getRow()));
+        }
+        sourceResult = sourceScanner.next();
+        targetResult = targetScanner.next();
+      } else if (cmp < 0) {
+        drift.rowsMissingOnTarget++;
+        LOGGER.warn("Row missing on target for table {} row={}", tableName,
+          Bytes.toStringBinary(sourceResult.getRow()));
+        sourceResult = sourceScanner.next();
+      } else {
+        drift.rowsExtraOnTarget++;
+        LOGGER.warn("Row extra on target for table {} row={}", tableName,
+          Bytes.toStringBinary(targetResult.getRow()));
+        targetResult = targetScanner.next();
+      }
+      if (progress != null) {
+        progress.progress();
+      }
+    }
+  }
+
+  /**
+   * Repair-mode merge-walk: resolves drift by emitting mutations into pending batches, flushing
+   * each time the batch reaches {@link #repairBatchSize}, and finally draining the tail. Per
+   * branch:
+   * <ul>
+   *   <li>{@code cmp == 0} — diff cells; record cell-level drift and any row-unrepairable
+   *       flag.</li>
+   *   <li>{@code cmp <  0} — mirror the source row onto target; bump {@code rowsMissing} unless
+   *       the whole row was shadowed, and {@code rowsCannotRepair} unless every cell was
+   *       mirrored.</li>
+   *   <li>{@code cmp >  0} — tombstone the extra row on target; bump {@code rowsExtra} when at
+   *       least one live cell was tombstoned, else {@code rowsCannotRepair} (row was already
+   *       all tombstones).</li>
+   * </ul>
+   */
+  private void repairDiffRows(ResultScanner sourceScanner, ResultScanner targetScanner,
+    Table targetHTable, DriftCounters drift, Progressable progress) throws IOException {
+    List<Put> pendingPuts = new ArrayList<>();
+    List<Delete> pendingDeletes = new ArrayList<>();
+    Result sourceResult = sourceScanner.next();
+    Result targetResult = targetScanner.next();
+
+    while (sourceResult != null || targetResult != null) {
+      int cmp = compareRowKeys(sourceResult, targetResult);
+      if (cmp == 0) {
+        RowDriftInfo rowDriftInfo = generateMutationForDiffCells(sourceResult, targetResult,
+          targetHTable, pendingPuts, pendingDeletes);
+        drift.addCellDrift(rowDriftInfo.cells);
+        if (rowDriftInfo.rowCannotRepair) {
+          drift.rowsCannotRepair++;
+        }
+        if (rowDriftInfo != RowDriftInfo.NONE) {
+          LOGGER.warn(
+            "Row mismatch on table {} row={}: cell drift missing={}, extra={}, different={}, "
+              + "rowCannotRepair={}",
+            tableName, Bytes.toStringBinary(sourceResult.getRow()), rowDriftInfo.cells.missing,
+            rowDriftInfo.cells.extra, rowDriftInfo.cells.different, rowDriftInfo.rowCannotRepair);
+        }
+        sourceResult = sourceScanner.next();
+        targetResult = targetScanner.next();
+      } else if (cmp < 0) {
+        byte[] missingRowKey = sourceResult.getRow();
+        RowMirrorStatus outcome =
+          mirrorWholeRow(sourceResult, targetHTable, pendingPuts, pendingDeletes);
+        if (outcome != RowMirrorStatus.FULLY_SHADOWED) {
+          drift.rowsMissingOnTarget++;
+        }
+        if (outcome != RowMirrorStatus.FULLY_MIRRORED) {
+          drift.rowsCannotRepair++;
+        }
+        LOGGER.warn("Row missing on target for table {} row={}: mirrorOutcome={}", tableName,
+          Bytes.toStringBinary(missingRowKey), outcome);
+        sourceResult = sourceScanner.next();
+      } else {
+        byte[] extraRowKey = targetResult.getRow();
+        int liveCellsTombstoned =
+          tombstoneWholeRow(targetResult, targetHTable, pendingPuts, pendingDeletes);
+        if (liveCellsTombstoned == 0) {
+          drift.rowsCannotRepair++;
+        } else {
+          drift.rowsExtraOnTarget++;
+        }
+        LOGGER.warn("Row extra on target for table {} row={}: liveCellsTombstoned={}", tableName,
+          Bytes.toStringBinary(extraRowKey), liveCellsTombstoned);
+        targetResult = targetScanner.next();
+      }
+
+      if (pendingPuts.size() + pendingDeletes.size() >= repairBatchSize) {
+        flushRepairMutations(targetHTable, pendingPuts, pendingDeletes);
+      }
+      if (progress != null) {
+        progress.progress();
+      }
+    }
+    flushRepairMutations(targetHTable, pendingPuts, pendingDeletes);
+  }
+
+  /**
+   * Compares the row keys of two scanner results; treats a null result as past-end so a
+   * {@code null/non-null} pair sorts the non-null side first.
+   */
+  private static int compareRowKeys(Result sourceResult, Result targetResult) {
+    if (sourceResult == null) {
+      return 1;
+    }
+    if (targetResult == null) {
+      return -1;
+    }
+    return Bytes.compareTo(sourceResult.getRow(), targetResult.getRow());
+  }
+
+  /**
+   * Whole-row content equality check used by dry-run row-level diffing. Delegates to
+   * {@link Result#compareResults(Result, Result, boolean)} which throws on any cell-level
+   * mismatch (family, qualifier, timestamp, type, value); we map the throw to {@code false}
+   * so the cmp==0 path can flag the row without producing repair mutations.
+   */
+  private boolean rowsEqual(Result src, Result tgt) {
+    try {
+      Result.compareResults(src, tgt, false);
+      return true;
+    } catch (Exception e) {
+      return false;
+    }
+  }
+
+  /**
+   * Mirrors every source cell of a row that is missing on target. Each cell is
+   * shadow-checked against target's per-row record (see {@link TargetRowRecord}).
+   */
+  private RowMirrorStatus mirrorWholeRow(Result sourceResult, Table targetHTable,
+    List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
+    RowRepairBuffer rowRepairBuffer = new RowRepairBuffer(sourceResult.getRow());
+    int mirrored = 0;
+    for (Cell cell : sourceResult.rawCells()) {
+      if (mirrorSourceCellUnlessShadowed(cell, targetHTable, rowRepairBuffer)) {
+        mirrored++;
+      }
+    }
+    rowRepairBuffer.flush(pendingPuts, pendingDeletes);
+    if (mirrored == 0) {
+      return RowMirrorStatus.FULLY_SHADOWED;
+    }
+    return rowRepairBuffer.anyCellUnrepairable ? RowMirrorStatus.PARTIALLY_MIRRORED
+      : RowMirrorStatus.FULLY_MIRRORED;
+  }
+
+  /**
+   * Tombstones every live cell of a row that is extra on target. Skips cells that are
+   * themselves already tombstones (see {@link #tombstoneTargetCell}).
+   *
+   * @return the number of live cells that contributed a tombstone marker. {@code 0} means
+   *         the row was already entirely tombstones; the caller records this as
+   *         {@code ROWS_CANNOT_REPAIR}.
+   */
+  private int tombstoneWholeRow(Result targetResult, Table targetHTable, List<Put> pendingPuts,
+    List<Delete> pendingDeletes) throws IOException {
+    RowRepairBuffer rowRepairBuffer = new RowRepairBuffer(targetResult.getRow());
+    // Empty source map drives every target cell into tombstoneTargetCell's "no source column"
+    // branch (DeleteColumn at ts <= T).
+    Map<ColumnKey, Long> sourceMaxTsByColumn = Collections.emptyMap();
+    int liveCellsTombstoned = 0;
+    for (Cell cell : targetResult.rawCells()) {
+      if (tombstoneTargetCell(cell, targetHTable, rowRepairBuffer, sourceMaxTsByColumn)) {
+        liveCellsTombstoned++;
+      }
+    }
+    rowRepairBuffer.flush(pendingPuts, pendingDeletes);
+    return liveCellsTombstoned;
+  }
+
+  /**
+   * Diffs cells of two rows present on both clusters in lock-step using {@link CellComparator}
+   * order and emits {@link Put}/{@link Delete} mutations.
+   *
+   * <p>Branches:
+   * <ul>
+   *   <li>same coords + matching value → no drift</li>
+   *   <li>same coords + different value → different++; mirror source cell (shadow-checked)</li>
+   *   <li>source-only cell → missing++; mirror source cell (shadow-checked)</li>
+   *   <li>target-only live cell → extra++; tombstone target cell</li>
+   *   <li>target-only tombstone cell → skip; row carries unrepairable drift</li>
+   * </ul>
+   * Mirrors suppressed by shadowing do NOT bump the cell counter (nothing was written);
+   * the row-level signal flows through {@link RowDriftInfo#rowCannotRepair}.
+   */
+  private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result targetResult,
+    Table targetHTable, List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
+    Cell[] sourceCells = sourceResult.rawCells();
+    Cell[] targetCells = targetResult.rawCells();
+    CellComparator comparator = CellComparator.getInstance();
+    RowRepairBuffer rowRepairBuffer = new RowRepairBuffer(sourceResult.getRow());
+
+    // Per-column max source PUT timestamp; consumed by tombstoneTargetCell to pick the
+    // tombstone shape for a target-extra cell. Three cases:
+    //   key absent              → DeleteColumn at target's ts (wipe hidden older versions too)
+    //   sourceMaxTs >= targetTs → point Delete at target's ts
+    //   sourceMaxTs <  targetTs → point Delete at target's ts AND every hidden target Put in
+    //                             (sourceMaxTs, targetTs); else hidden version surfaces.
+    // Math::max collapses multi-version source so the third case only fires when target sits above
+    // ALL of source's versions.
+    // Check tombstoneTargetCell() for its usage.
+    Map<ColumnKey, Long> sourceMaxTsByColumn = new HashMap<>();
+    for (Cell sourceCell : sourceCells) {
+      if (!CellUtil.isDelete(sourceCell)) {
+        sourceMaxTsByColumn.merge(ColumnKey.of(sourceCell), sourceCell.getTimestamp(), Math::max);
+      }
+    }
+
+    int cellMissing = 0;
+    int cellExtra = 0;
+    int cellDifferent = 0;
+
+    int sourceIdx = 0;
+    int targetIdx = 0;
+    while (sourceIdx < sourceCells.length && targetIdx < targetCells.length) {
+      int cmp = comparator.compare(sourceCells[sourceIdx], targetCells[targetIdx]);
+      if (cmp == 0) {
+        // Same coordinates, compare values.
+        if (!CellUtil.matchingValue(sourceCells[sourceIdx], targetCells[targetIdx])) {
+          if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowRepairBuffer)) {
+            cellDifferent++;
+          }
+        }
+        sourceIdx++;
+        targetIdx++;
+      } else if (cmp < 0) {
+        // Missing on target
+        if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowRepairBuffer)) {
+          cellMissing++;
+        }
+        sourceIdx++;
+      } else {
+        // extra on target
+        if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer,
+          sourceMaxTsByColumn)) {
+          cellExtra++;
+        } else {
+          rowRepairBuffer.anyCellUnrepairable = true;
+        }
+      }
+    }
+    while (sourceIdx < sourceCells.length) {
+      if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowRepairBuffer)) {
+        cellMissing++;
+      }
+      sourceIdx++;
+    }
+    while (targetIdx < targetCells.length) {
+      if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer,
+        sourceMaxTsByColumn)) {
+        cellExtra++;
+      } else {
+        rowRepairBuffer.anyCellUnrepairable = true;
+      }
+    }
+
+    if (cellMissing == 0 && cellExtra == 0 && cellDifferent == 0 && !rowRepairBuffer.anyCellUnrepairable) {
+      return RowDriftInfo.NONE;
+    }
+    rowRepairBuffer.flush(pendingPuts, pendingDeletes);
+    return new RowDriftInfo(new CellDriftCounts(cellMissing, cellExtra, cellDifferent),
+      rowRepairBuffer.anyCellUnrepairable);
+  }
+
+  /**
+   * Routes a source cell to the right mutation kind. Tombstone cells go through
+   * {@link Delete#add(Cell)} (preserves the exact tombstone subtype); under {@code --raw-scan}
+   * this matters because {@link Put#add(Cell)} rejects non-Put cells.
+   */
+  private void mirrorSourceCell(Cell cell, RowRepairBuffer rowRepairBuffer) throws IOException {
+    if (CellUtil.isDelete(cell)) {
+      rowRepairBuffer.delete().add(cell);
+    } else {
+      rowRepairBuffer.put().add(cell);
+    }
+  }
+
+  /**
+   * Mirrors a source cell onto target unless an existing target tombstone would shadow it.
+   * Shadow detection runs only if source has Put cells; tombstoned source cells always mirror.
+   *
+   * @return {@code true} if mirrored, {@code false} if suppressed (caller marks the row
+   *         unrepairable).
+   */
+  private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
+    RowRepairBuffer rowRepairBuffer) throws IOException {
+    // Source Puts can be shadowed by an existing target tombstone, the Put lands on
+    // disk but stays invisible to reads, so writing it is wasted work and the row stays
+    // diverged. e.g. src Put(name, T=200) vs tgt DeleteColumn(name, T=300) covering
+    // ts<=300. Skip the write and flag the row unrepairable; operator must major-compact
+    // target to reap the shadow. Source tombstones can't be shadowed, hence skip the check.
+    if (!CellUtil.isDelete(cell) && rowRepairBuffer.targetRowRecord(targetHTable).wouldShadow(cell)) {
+      rowRepairBuffer.anyCellUnrepairable = true;
+      return false;
+    }
+    mirrorSourceCell(cell, rowRepairBuffer);
+    return true;
+  }
+
+  /**
+   * Tombstones a target-only cell to make target's read view at this column match source's.
+   * Skips cells that are themselves already tombstones.
+   *
+   * <p>Tombstone subtype depends on what source has at this {@code (cf, q)}:
+   * <ul>
+   *   <li><b>Source has no cell at this column</b> — use {@link Delete#addColumns} (DeleteColumn,
+   *       scope {@code ts <= T}) so even max-versions-hidden older target versions are
+   *       shadowed.</li>
+   *   <li><b>Source's max ts at this column is &gt;= target's ts</b> — point-Delete only
+   *       target's exact ts.</li>
+   *   <li><b>Source's max ts at this column is &lt; target's ts</b> — point-Delete target's
+   *       ts, AND point-Delete every max-versions-hidden Put on target with ts in
+   *       {@code (sourceMaxTs, targetTs)}. Otherwise after target's visible cell is
+   *       shadowed, the next hidden version surfaces above source's mirror.</li>
+   * </ul>
+   *
+   * @return true if the cell was a live cell that contributed a tombstone marker, false if
+   *         the cell was already a tombstone and was skipped.
+   */
+  private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairBuffer rowRepairBuffer,
+    Map<ColumnKey, Long> sourceMaxTsByColumn) throws IOException {
+    if (CellUtil.isDelete(cell)) {
+      return false;
+    }
+    byte[] family = CellUtil.cloneFamily(cell);
+    byte[] qualifier = CellUtil.cloneQualifier(cell);
+    long ts = cell.getTimestamp();
+    Long sourceMaxTs = sourceMaxTsByColumn.get(new ColumnKey(family, qualifier));
+    if (sourceMaxTs == null) {
+      rowRepairBuffer.delete().addColumns(family, qualifier, ts);
+    } else if (sourceMaxTs >= ts) {
+      rowRepairBuffer.delete().addColumn(family, qualifier, ts);
+    } else {
+      rowRepairBuffer.delete().addColumn(family, qualifier, ts);
+      Set<Long> hiddenTs = rowRepairBuffer.targetRowRecord(targetHTable)
+        .targetPutTimestampsBetween(family, qualifier, sourceMaxTs, ts);
+      for (Long hidden : hiddenTs) {
+        rowRepairBuffer.delete().addColumn(family, qualifier, hidden);
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Loads target's per-row record with a single raw, all-versions scan. Captures both
+   * tombstones and Puts:
+   * <ul>
+   *   <li><b>Tombstones</b> drive shadow detection — would a source Put we mirror be
+   *       suppressed by an existing target tombstone on read? e.g. tgt has
+   *       DeleteColumn(NAME)@900, src wants Put(NAME, "alice")@500. Mirror would land
+   *       on disk but stay invisible because DeleteColumn@900 covers ts &lt;= 900.
+   *       Detect upfront, skip the doomed write, flag the row unrepairable.</li>
+   *   <li><b>Puts</b> drive hidden-version discovery — when target's visible Put is at a
+   *       higher ts than source's mirror, max-versions-filtered older Puts may sit between
+   *       the two and surface above source's mirror after we shadow the visible one. We
+   *       need their timestamps so we can point-Delete each. e.g. tgt has Put(NAME,
+   *       "carol")@T2 visible plus Put(NAME, "bob")@T1 hidden under MAX_VERSIONS=2; src
+   *       has Put(NAME, "alice")@T0. Point-Deleting only T2 exposes T1 above source's T0
+   *       mirror — wrong; we must point-Delete T1 too so T0 surfaces.</li>
+   * </ul>
+   * raw=true and all-versions are forced regardless of user flags so tombstones and
+   * hidden Put versions that would otherwise be filtered out are surfaced.
+   *
+   * <p>Time range: lower bound stays at {@code fromTime} since cells below the window
+   * can't affect repair inside the window; upper bound is {@code MAX_VALUE} because a
+   * tombstone at {@code ts > toTime} can still shadow a Put we mirror at
+   * {@code ts in window} during application reads.
+   */
+  private TargetRowRecord loadTargetRowRecord(byte[] rowKey, Table targetHTable)
+    throws IOException {
+    Scan scan = new Scan();
+    scan.withStartRow(rowKey, true);
+    scan.withStopRow(rowKey, true);
+    scan.setRaw(true);
+    scan.readAllVersions();
+    scan.setCacheBlocks(false);
+    scan.setTimeRange(fromTime, Long.MAX_VALUE);
+    scan.setCaching(1);
+    scan.setLimit(1);
+    TargetRowRecord rowRecord = new TargetRowRecord();
+    try (ResultScanner scanner = targetHTable.getScanner(scan)) {
+      Result raw = scanner.next();
+      if (raw != null) {
+        for (Cell cell : raw.rawCells()) {
+          rowRecord.record(cell);
+        }
+      }
+    }
+    return rowRecord;
+  }
+
+  /**
+   * Builds a row-level HBase scan for repair. Honors the user's {@code --raw-scan} and
+   * {@code --read-all-versions} flags; adds bulk caching plus Phoenix TTL /
+   * {@code IS_STRICT_TTL} attributes so the cells visited here are the same cells the
+   * verifier hashed.
+   */
+  private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
+    boolean isEndKeyInclusive, PhoenixConnection phoenixConn) throws IOException, SQLException {
+    Scan scan = new Scan();
+    scan.withStartRow(startKey, isStartKeyInclusive);
+    scan.withStopRow(endKey, isEndKeyInclusive);
+    scan.setRaw(isRawScan);
+    if (isReadAllVersions) {
+      scan.readAllVersions();
+    }
+    scan.setCacheBlocks(false);
+    scan.setTimeRange(fromTime, toTime);
+    scan.setCaching(1000);
+    ScanUtil.setScanAttributesForPhoenixTTL(scan, pTable, phoenixConn);
+    scan.setAttribute(BaseScannerRegionObserverConstants.IS_STRICT_TTL, TRUE_BYTES);
+    return scan;
+  }
+
+  /**
+   * Flushes the accumulated Put and Delete batches to target as a single mixed RPC via
+   * {@link Table#batch}. The mixed batch (rather than separate {@code put()} +
+   * {@code delete()} calls) closes the inter-RPC window where a JVM/regionserver crash
+   * between the two could leave target with Puts applied but matching Deletes missing.
+   *
+   * <p>{@link Table#batch} does NOT throw for partial failures — per-mutation failures
+   * (e.g. {@code NotServingRegionException} from a region split mid-batch,
+   * {@code WrongRegionException} from a merge) land in the {@code results} array as
+   * {@link Throwable} entries. We surface the first such failure as {@link IOException}
+   * so the caller treats this chunk as {@code REPAIR_FAILED} rather than silently
+   * marking it {@code REPAIRED}; on re-run the resume filter excludes
+   * {@code REPAIR_FAILED} and the chunk re-enters as an unprocessed gap.
+   */
+  private void flushRepairMutations(Table targetHTable, List<Put> puts, List<Delete> deletes)
+    throws IOException {
+    if (puts.isEmpty() && deletes.isEmpty()) {
+      return;
+    }
+    List<Row> mutations = new ArrayList<>(puts.size() + deletes.size());
+    mutations.addAll(puts);
+    mutations.addAll(deletes);
+    Object[] results = new Object[mutations.size()];
+    try {
+      targetHTable.batch(mutations, results);
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      throw new IOException("Interrupted while flushing repair mutations", e);
+    }
+    int failureCount = 0;
+    int firstFailureIdx = -1;
+    for (int i = 0; i < results.length; i++) {
+      if (results[i] instanceof Throwable) {
+        failureCount++;
+        if (firstFailureIdx < 0) {
+          firstFailureIdx = i;
+        }
+      }
+    }
+    if (failureCount > 0) {
+      Throwable firstFailure = (Throwable) results[firstFailureIdx];
+      Row failedRow = mutations.get(firstFailureIdx);
+      throw new IOException(String.format(
+        "Repair batch had %d/%d mutation failure(s); first failure on row %s: %s",
+        failureCount, results.length, Bytes.toStringBinary(failedRow.getRow()),
+        firstFailure.getMessage()), firstFailure);
+    }
+    puts.clear();
+    deletes.clear();
+  }
+
+  // ===========================================================================
+  // Public input/output value types
+  // ===========================================================================
+
+  /**
+   * Inputs to a chunk repair attempt. Source range is the chunk boundary; target range may
+   * be wider so the repair scan sees the same cells (including extra-on-target rows between
+   * consecutive source chunks) that the verifier hashed.
+   *
+   * <p>{@link #verifySourceRows} / {@link #verifyTargetRows} are the row counts the verifier
+   * recorded; threaded into the COUNTERS column on the resulting checkpoint row.
+   * {@link #verifyStartTime} is the timestamp captured when verification began for this
+   * chunk; reused as EXECUTION_START_TIME on the REPAIRED/UNREPAIRABLE/REPAIR_FAILED
+   * checkpoint row so the row spans the full verify+repair lifecycle that overwrites the
+   * MISMATCHED row.
+   */
+  public static final class ChunkRepairRequest {
+    public final byte[] sourceStart;
+    public final byte[] sourceEnd;
+    public final byte[] targetStart;
+    public final byte[] targetEnd;
+    public final boolean targetStartInclusive;
+    public final boolean targetEndInclusive;
+    public final long verifySourceRows;
+    public final long verifyTargetRows;
+    public final Timestamp verifyStartTime;
+    public final boolean dryRun;
+
+    public ChunkRepairRequest(byte[] sourceStart, byte[] sourceEnd, byte[] targetStart,
+      byte[] targetEnd, boolean targetStartInclusive, boolean targetEndInclusive,
+      long verifySourceRows, long verifyTargetRows, Timestamp verifyStartTime, boolean dryRun) {
+      this.sourceStart = sourceStart;
+      this.sourceEnd = sourceEnd;
+      this.targetStart = targetStart;
+      this.targetEnd = targetEnd;
+      this.targetStartInclusive = targetStartInclusive;
+      this.targetEndInclusive = targetEndInclusive;
+      this.verifySourceRows = verifySourceRows;
+      this.verifyTargetRows = verifyTargetRows;
+      this.verifyStartTime = verifyStartTime;
+      this.dryRun = dryRun;
+    }
+  }
+
+  /**
+   * Outcome of a chunk repair attempt. Carries the terminal status, accumulated drift
+   * counters, end-of-attempt timestamp, and the failure exception when status is
+   * {@link Status#REPAIR_FAILED}. Status precedence (most-severe wins):
+   * {@link Status#REPAIR_FAILED} &gt; {@link Status#UNREPAIRABLE} &gt; {@link Status#REPAIRED}.
+   */
+  public static final class ChunkRepairResult {
+
+    public enum Status {
+      /** Every cell-level drift handled and no row landed in ROWS_CANNOT_REPAIR. */
+      REPAIRED,
+      /**
+       * Repair completed but at least one row carries drift the tool cannot remove —
+       * a source-side Put was shadow-suppressed by an existing target tombstone, or
+       * target's row is entirely tombstones. Operator must major-compact target to
+       * reap shadowing tombstones, then re-run.
+       */
+      UNREPAIRABLE,
+      /**
+       * Repair scan or batch flush threw {@link IOException}. Per-chunk fault isolation:
+       * mapper continues with the next chunk; the chunk re-enters as an unprocessed gap
+       * on the next invocation.
+       */
+      REPAIR_FAILED
+    }
+
+    public final Status status;
+    public final DriftCounters drift;
+    public final Timestamp endTime;
+    public final IOException failure;
+
+    private ChunkRepairResult(Status status, DriftCounters drift, Timestamp endTime,
+      IOException failure) {
+      this.status = status;
+      this.drift = drift;
+      this.endTime = endTime;
+      this.failure = failure;
+    }
+
+    static ChunkRepairResult completed(DriftCounters drift) {
+      Status status = drift.rowsCannotRepair > 0 ? Status.UNREPAIRABLE : Status.REPAIRED;
+      return new ChunkRepairResult(status, drift, new Timestamp(System.currentTimeMillis()), null);
+    }
+
+    static ChunkRepairResult failed(DriftCounters drift, IOException failure) {
+      return new ChunkRepairResult(Status.REPAIR_FAILED, drift,
+        new Timestamp(System.currentTimeMillis()), failure);
+    }
+  }
+
+  /**
+   * Per-chunk aggregate of six drift counters — three row-level
+   * ({@code rowsMissingOnTarget}, {@code rowsExtraOnTarget}, {@code rowsCannotRepair}) and
+   * three cell-level ({@code cellsMissing/Extra/DifferentOnTarget}). Pure accumulator; the
+   * caller maps fields onto MapReduce job counters and the checkpoint COUNTERS string.
+   */
+  public static final class DriftCounters {
+    public long rowsMissingOnTarget;
+    public long rowsExtraOnTarget;
+    public long rowsDifferentOnTarget;
+    public long rowsCannotRepair;
+    public long cellsMissingOnTarget;
+    public long cellsExtraOnTarget;
+    public long cellsDifferentOnTarget;
+
+    void addCellDrift(CellDriftCounts cellDrift) {
+      cellsMissingOnTarget += cellDrift.missing;
+      cellsExtraOnTarget += cellDrift.extra;
+      cellsDifferentOnTarget += cellDrift.different;
+    }
+
+    /** Compact end-of-chunk log line summarizing all drift signals. */
+    public String toLogString() {
+      return String.format(
+        "rowsMissingOnTarget=%d, rowsExtraOnTarget=%d, rowsDifferentOnTarget=%d, "
+          + "rowsCannotRepair=%d, cellsMissingOnTarget=%d, cellsExtraOnTarget=%d, "
+          + "cellsDifferentOnTarget=%d",
+        rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget, rowsCannotRepair,
+        cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget);
+    }
+  }
+
+  /**
+   * Per-row capture of target's tombstones AND Puts in {@code [fromTime, MAX_VALUE]},
+   * built lazily from a single raw single-row scan with all-versions enabled. Used in two
+   * roles:
+   *
+   * <ol>
+   *   <li><b>Shadow detection</b> ({@link #wouldShadow}): would a source Put we're about
+   *       to mirror be suppressed by an existing target tombstone?</li>
+   *   <li><b>Hidden-version discovery</b> ({@link #targetPutTimestampsBetween}): when
+   *       max-versions on target hides older Puts behind a newer visible one, those hidden
+   *       Puts surface on read the moment we shadow the visible one. We need their
+   *       timestamps so we can point-Delete each. e.g. src has NAME@T0="alice", tgt has
+   *       NAME@T2="carol" visible plus NAME@T1 hidden under max-versions. If we only
+   *       point-Delete T2, target reads return T1 — wrong. Discovering T1 lets us
+   *       point-Delete it too, so source's T0 mirror surfaces.</li>
+   * </ol>
+   *
+   * HBase has four tombstone subtypes, each with distinct shadow semantics:
+   *   Delete                — shadows a Put at {@code (cf, q, ts == T)} exactly
+   *   DeleteColumn          — shadows Puts at {@code (cf, q, ts <= T)}
+   *   DeleteFamily          — shadows Puts at {@code (cf, *, ts <= T)}
+   *   DeleteFamilyVersion   — shadows Puts at {@code (cf, *, ts == T)}
+   * {@link #wouldShadow(Cell)} consults all four tombstone maps and returns true on any
+   * match.
+   */
+  static final class TargetRowRecord {
+    private final Map<ColumnKey, Set<Long>> deletePointTs = new HashMap<>();
+    private final Map<ColumnKey, Long> deleteColumnUpperBound = new HashMap<>();
+    private final Map<ByteBuffer, Long> deleteFamilyUpperBound = new HashMap<>();
+    private final Map<ByteBuffer, Set<Long>> deleteFamilyVersionTs = new HashMap<>();
+    /** Per-column ts-ordered set of target's Put timestamps. */
+    private final Map<ColumnKey, NavigableMap<Long, Boolean>> targetPutTs = new HashMap<>();
+
+    void record(Cell cell) {
+      if (CellUtil.isDelete(cell)) {
+        recordTombstone(cell);
+      } else {
+        targetPutTs.computeIfAbsent(ColumnKey.of(cell), k -> new TreeMap<>())
+          .put(cell.getTimestamp(), Boolean.TRUE);
+      }
+    }
+
+    private void recordTombstone(Cell tombstone) {
+      long ts = tombstone.getTimestamp();
+      ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(tombstone));
+      switch (tombstone.getType()) {
+        case Delete:
+          deletePointTs.computeIfAbsent(ColumnKey.of(tombstone), k -> new HashSet<>()).add(ts);
+          break;
+        case DeleteColumn:
+          deleteColumnUpperBound.merge(ColumnKey.of(tombstone), ts, Math::max);
+          break;
+        case DeleteFamily:
+          deleteFamilyUpperBound.merge(family, ts, Math::max);
+          break;
+        case DeleteFamilyVersion:
+          deleteFamilyVersionTs.computeIfAbsent(family, k -> new HashSet<>()).add(ts);
+          break;
+        default:
+          // Caller filters via CellUtil.isDelete; non-tombstone cells should never reach here.
+      }
+    }
+
+    /** Returns true if any tombstone recorded here would shadow a Put at the cell's coords. */
+    boolean wouldShadow(Cell sourcePut) {
+      long ts = sourcePut.getTimestamp();
+      ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(sourcePut));
+      ColumnKey column = ColumnKey.of(sourcePut);
+
+      // Delete: shadows Put at exactly (cf, q, ts == T).
+      Set<Long> pointTs = deletePointTs.get(column);
+      if (pointTs != null && pointTs.contains(ts)) {
+        return true;
+      }
+      // DeleteColumn: shadows every Put at (cf, q) with ts <= T.
+      Long deleteColTs = deleteColumnUpperBound.get(column);
+      if (deleteColTs != null && ts <= deleteColTs) {
+        return true;
+      }
+      // DeleteFamily: shadows every Put across all qualifiers in cf with ts <= T.
+      Long deleteFamTs = deleteFamilyUpperBound.get(family);
+      if (deleteFamTs != null && ts <= deleteFamTs) {
+        return true;
+      }
+      // DeleteFamilyVersion: shadows Puts across all qualifiers in cf at exactly ts == T.
+      Set<Long> dfvTs = deleteFamilyVersionTs.get(family);
+      return dfvTs != null && dfvTs.contains(ts);
+    }
+
+    /**
+     * Returns target's Put timestamps at {@code (cf, q)} that are strictly greater than
+     * {@code lowerExclusive} and strictly less than {@code upperExclusive}. Used to find
+     * hidden (max-versions-filtered) target versions sitting between source's max ts and
+     * target's visible ts so they can be point-Deleted.
+     */
+    Set<Long> targetPutTimestampsBetween(byte[] family, byte[] qualifier, long lowerExclusive,
+      long upperExclusive) {
+      NavigableMap<Long, Boolean> tss = targetPutTs.get(new ColumnKey(family, qualifier));
+      if (tss == null) {
+        return Collections.emptySet();
+      }
+      return tss.subMap(lowerExclusive, false, upperExclusive, false).keySet();
+    }
+  }
+
+  /** Composite (family, qualifier) key with byte-array equality semantics. */
+  static final class ColumnKey {
+    private final byte[] family;
+    private final byte[] qualifier;
+
+    ColumnKey(byte[] family, byte[] qualifier) {
+      this.family = family;
+      this.qualifier = qualifier;
+    }
+
+    static ColumnKey of(Cell cell) {
+      return new ColumnKey(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell));
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof ColumnKey)) {
+        return false;
+      }
+      ColumnKey other = (ColumnKey) o;
+      return Bytes.equals(family, other.family) && Bytes.equals(qualifier, other.qualifier);
+    }
+
+    @Override
+    public int hashCode() {
+      return Bytes.hashCode(family) * 31 + Bytes.hashCode(qualifier);
+    }
+  }
+
+  /**
+   * Per-row scratch buffer: lazily-built {@link Put}/{@link Delete} mutations, lazily-loaded
+   * {@link TargetRowRecord}, and an unrepairable-drift flag the caller reads after the
+   * merge.
+   */
+  final class RowRepairBuffer {
+    private final byte[] rowKey;
+    Put put;
+    Delete delete;
+    TargetRowRecord targetRowRecord;
+    boolean anyCellUnrepairable;
+
+    RowRepairBuffer(byte[] rowKey) {
+      this.rowKey = rowKey;
+    }
+
+    Put put() {
+      if (put == null) {
+        put = new Put(rowKey);
+      }
+      return put;
+    }
+
+    Delete delete() {
+      if (delete == null) {
+        delete = new Delete(rowKey);
+      }
+      return delete;
+    }
+
+    /**
+     * Loads target's per-row record (tombstones + Puts) once per row, cached for the
+     * lifetime of this buffer. Two consumers:
+     *
+     * <p><b>Shadow detection</b> (used by {@link #mirrorSourceCell} via
+     * {@link TargetRowRecord#wouldShadow}). Time range is {@code [fromTime, MAX_VALUE]} —
+     * a target tombstone outside the user's verify window can still suppress a Put we
+     * mirror inside it. e.g. tgt has DeleteColumn@900, src wants Put@500, scan timeRange
+     * [0, 600). Put@500 is masked because DeleteColumn@900 covers ts &lt;= 900. Detecting
+     * this upfront lets repair skip the doomed write and flag the row unrepairable.
+     *
+     * <p><b>Hidden-version discovery</b> (used by {@link #tombstoneTargetCell} via
+     * {@link TargetRowRecord#targetPutTimestampsBetween}). When target's MAX_VERSIONS
+     * hides older Puts behind a newer visible one, point-Deleting only the visible ts
+     * exposes the hidden version above source's mirror. The record's all-versions raw
+     * scan surfaces those hidden timestamps so we can tombstone each.
+     */
+    TargetRowRecord targetRowRecord(Table targetHTable) throws IOException {
+      if (targetRowRecord == null) {
+        targetRowRecord = loadTargetRowRecord(rowKey, targetHTable);
+      }
+      return targetRowRecord;
+    }
+
+    void flush(List<Put> pendingPuts, List<Delete> pendingDeletes) {
+      if (put != null) {
+        pendingPuts.add(put);
+      }
+      if (delete != null) {
+        pendingDeletes.add(delete);
+      }
+    }
+  }
+
+  /**
+   * Cell-level drift counts produced by per-row diff. Three counters partition the cell
+   * differences into disjoint buckets — source-only, target-only-live, same-coord-diff-value.
+   */
+  static final class CellDriftCounts {
+    static final CellDriftCounts NONE = new CellDriftCounts(0, 0, 0);
+
+    final int missing;
+    final int extra;
+    final int different;
+
+    CellDriftCounts(int missing, int extra, int different) {
+      this.missing = missing;
+      this.extra = extra;
+      this.different = different;
+    }
+  }
+
+  /** Per-row drift summary: cell-level drift counts plus a row-unrepairable flag. */
+  static final class RowDriftInfo {
+    static final RowDriftInfo NONE = new RowDriftInfo(CellDriftCounts.NONE, false);
+
+    final CellDriftCounts cells;
+    final boolean rowCannotRepair;
+
+    RowDriftInfo(CellDriftCounts cells, boolean rowCannotRepair) {
+      this.cells = cells;
+      this.rowCannotRepair = rowCannotRepair;
+    }
+  }
+
+  /** Terminal classification of a per-row mirror attempt onto target. */
+  enum RowMirrorStatus {
+    /** All source cells mirrored in row. */
+    FULLY_MIRRORED,
+    /** Some mirrored, some suppressed by target tombstones. */
+    PARTIALLY_MIRRORED,
+    /** Every source cell suppressed by target tombstones. */
+    FULLY_SHADOWED
+  }
+}
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
index d09151b1e27..d782cceadfa 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
@@ -20,31 +20,19 @@
 import static org.apache.phoenix.schema.types.PDataType.TRUE_BYTES;
 
 import java.io.IOException;
-import java.nio.ByteBuffer;
 import java.security.MessageDigest;
 import java.sql.Connection;
 import java.sql.SQLException;
 import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.Set;
-import java.util.TreeMap;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Row;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -53,6 +41,9 @@
 import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
 import org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants;
 import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.ChunkRepairRequest;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.ChunkRepairResult;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.DriftCounters;
 import org.apache.phoenix.mapreduce.util.ConnectionUtil;
 import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
 import org.apache.phoenix.query.KeyRange;
@@ -89,6 +80,7 @@ public enum SyncCounters {
     TARGET_ROWS_PROCESSED,
     ROWS_MISSING_ON_TARGET,
     ROWS_EXTRA_ON_TARGET,
+    ROWS_DIFFERENT_ON_TARGET,
     ROWS_CANNOT_REPAIR,
     CELLS_MISSING_ON_TARGET,
     CELLS_EXTRA_ON_TARGET,
@@ -104,7 +96,6 @@ public enum SyncCounters {
   private long chunkSizeBytes;
   private boolean isRawScan;
   private boolean isReadAllVersions;
-  private int repairBatchSize;
   private Configuration conf;
   private Connection sourceConnection;
   private Connection targetConnection;
@@ -113,6 +104,7 @@ public enum SyncCounters {
   private byte[] physicalTableName;
   private List<KeyRange> regionKeyRanges;
   private PhoenixSyncTableOutputRepository syncTableOutputRepository;
+  private PhoenixSyncTableChunkRepairer chunkRepairer;
 
   @Override
   protected void setup(Context context) throws InterruptedException {
@@ -128,7 +120,7 @@ protected void setup(Context context) throws InterruptedException {
       chunkSizeBytes = PhoenixSyncTableTool.getPhoenixSyncTableChunkSizeBytes(conf);
       isRawScan = PhoenixSyncTableTool.getPhoenixSyncTableRawScan(conf);
       isReadAllVersions = PhoenixSyncTableTool.getPhoenixSyncTableReadAllVersions(conf);
-      repairBatchSize = PhoenixSyncTableTool.getPhoenixSyncTableRepairBatchSize(conf);
+      int repairBatchSize = PhoenixSyncTableTool.getPhoenixSyncTableRepairBatchSize(conf);
       extractRegionBoundariesFromSplit(context);
       sourceConnection = ConnectionUtil.getInputConnection(conf);
       pTable = sourceConnection.unwrap(PhoenixConnection.class).getTable(tableName);
@@ -136,6 +128,9 @@ protected void setup(Context context) throws InterruptedException {
       connectToTargetCluster();
       globalConnection = createGlobalConnection(conf);
       syncTableOutputRepository = new PhoenixSyncTableOutputRepository(globalConnection);
+      chunkRepairer = new PhoenixSyncTableChunkRepairer(sourceConnection, targetConnection, pTable,
+        physicalTableName, tableName, fromTime, toTime, isRawScan, isReadAllVersions,
+        repairBatchSize);
     } catch (Exception e) {
       tryClosingResources();
       throw new RuntimeException(
@@ -234,6 +229,15 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
     long repairFailedBefore = context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
     long sourceRowsBefore = context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue();
     long targetRowsBefore = context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue();
+    long rowsMissingBefore = context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue();
+    long rowsExtraBefore = context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
+    long rowsDifferentBefore =
+      context.getCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue();
+    long rowsCannotRepairBefore = context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue();
+    long cellsMissingBefore = context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue();
+    long cellsExtraBefore = context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
+    long cellsDifferentBefore =
+      context.getCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue();
 
     // Process all unprocessed ranges in this region
     boolean isStartKeyInclusive = shouldStartKeyBeInclusive(regionStart, processedChunks);
@@ -256,10 +260,26 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
       context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue() - sourceRowsBefore;
     long targetRowsProcessed =
       context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue() - targetRowsBefore;
+    long rowsMissingOnTarget =
+      context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue() - rowsMissingBefore;
+    long rowsExtraOnTarget =
+      context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue() - rowsExtraBefore;
+    long rowsDifferentOnTarget =
+      context.getCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue() - rowsDifferentBefore;
+    long rowsCannotRepair =
+      context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue() - rowsCannotRepairBefore;
+    long cellsMissingOnTarget =
+      context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue() - cellsMissingBefore;
+    long cellsExtraOnTarget =
+      context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() - cellsExtraBefore;
+    long cellsDifferentOnTarget =
+      context.getCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue() - cellsDifferentBefore;
 
     Timestamp regionEndTime = new Timestamp(System.currentTimeMillis());
-    String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter
-      .formatMapper(verifiedChunks, mismatchedChunks, sourceRowsProcessed, targetRowsProcessed);
+    String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatMapper(
+      verifiedChunks, mismatchedChunks, sourceRowsProcessed, targetRowsProcessed,
+      rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget, rowsCannotRepair,
+      cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget);
     if (sourceRowsProcessed > 0) {
       recordRegionCompletion(regionStart, regionEnd, regionStartTime, regionEndTime, verifiedChunks,
         mismatchedChunks, unrepairableChunks, repairFailedChunks, counters, context);
@@ -448,16 +468,33 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd,
             sourceChunk.rowCount, targetChunk.rowCount, matched);
         }
         sourceChunk.executionEndTime = new Timestamp(System.currentTimeMillis());
-        String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter
-          .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L, 0L, 0L, 0L, 0L);
         if (matched) {
+          String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter
+            .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L, 0L, 0L, 0L, 0L, 0L);
           handleVerifiedChunk(sourceChunk, context, counters);
         } else {
-          handleMismatchedChunk(sourceChunk, context, counters);
-          if (!isDryRun) {
-            repairChunk(sourceChunk.startKey, sourceChunk.endKey, targetStart, targetEnd,
-              isTargetStartKeyInclusive, targetEndInclusive, sourceChunk.rowCount,
-              targetChunk.rowCount, sourceChunk.executionStartTime, context);
+          ChunkRepairRequest request = new ChunkRepairRequest(sourceChunk.startKey,
+            sourceChunk.endKey, targetStart, targetEnd, isTargetStartKeyInclusive,
+            targetEndInclusive, sourceChunk.rowCount, targetChunk.rowCount,
+            sourceChunk.executionStartTime, isDryRun);
+          ChunkRepairResult result = chunkRepairer.repair(request, context::progress);
+          if (isDryRun) {
+            // Dry-run: write CHUNK/MISMATCHED with real row-level drift in COUNTERS so the
+            // checkpoint audit row matches the job counters. No CHUNK/REPAIRED row and no
+            // target mutations.
+            DriftCounters drift = result.drift;
+            context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET)
+              .increment(drift.rowsMissingOnTarget);
+            context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET)
+              .increment(drift.rowsExtraOnTarget);
+            context.getCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET)
+              .increment(drift.rowsDifferentOnTarget);
+            String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatChunk(
+              sourceChunk.rowCount, targetChunk.rowCount, drift.rowsMissingOnTarget,
+              drift.rowsExtraOnTarget, drift.rowsDifferentOnTarget, 0L, 0L, 0L, 0L);
+            handleMismatchedChunk(sourceChunk, context, counters);
+          } else {
+            recordRepairOutcome(sourceChunk, request, result, context);
           }
         }
         previousSourceChunk = sourceChunk;
@@ -632,43 +669,22 @@ private void recordChunkCheckpoint(ChunkInfo sourceChunk,
   }
 
   /**
-   * Builds the common Scan shape used by verification, repair, and tombstone loading: key
-   * range, inclusivity, time window, cache-blocks, plus raw-scan and all-versions semantics
-   * controlled by the user's {@code --raw-scan} / {@code --read-all-versions} flags.
-   *
-   * Callers that need to force raw-scan or all-versions on a specific call (e.g., the
-   * tombstone loader, which must surface tombstones regardless of user flags) pass
-   * {@code forceRaw=true} or {@code forceAllVersions=true} to override.
-   *
-   * Callers layer on their own caching, limits, and coprocessor attributes. Keeping the
-   * base shared guarantees that the cells visited by repair are exactly the cells the
-   * verifier hashed (when both pass the same force flags).
+   * Creates an HBase scan for a chunk range. Honors the user's {@code --raw-scan} and
+   * {@code --read-all-versions} flags. For target-side scans, sets caching/limit to 1 to enable
+   * sequential partial-digest retrieval — each target chunk's digest feeds into the next until
+   * scanning completes.
    */
-  private Scan createBaseScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
-    boolean isEndKeyInclusive, boolean forceRaw, boolean forceAllVersions) throws IOException {
+  private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
+    boolean isEndKeyInclusive, boolean isTargetScan) throws IOException {
     Scan scan = new Scan();
     scan.withStartRow(startKey, isStartKeyInclusive);
     scan.withStopRow(endKey, isEndKeyInclusive);
-    scan.setRaw(forceRaw || isRawScan);
-    if (forceAllVersions || isReadAllVersions) {
+    scan.setRaw(isRawScan);
+    if (isReadAllVersions) {
       scan.readAllVersions();
     }
     scan.setCacheBlocks(false);
     scan.setTimeRange(fromTime, toTime);
-    return scan;
-  }
-
-  /**
-   * Creates an HBase scan for a chunk range. Can be configured to use raw scan mode and read all
-   * cell versions based on command-line options.
-   */
-  private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
-    boolean isEndKeyInclusive, boolean isTargetScan) throws IOException {
-    Scan scan =
-      createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive, false, false);
-    // Set limit and caching to 1 for sequential partial digest retrieval from target.
-    // Enables digest continuation: each target chunk's digest feeds into the next until scanning
-    // completes
     if (isTargetScan) {
       scan.setLimit(1);
       scan.setCaching(1);
@@ -788,649 +804,60 @@ boolean shouldStartKeyBeInclusive(byte[] mapperRegionStart,
   }
 
   /**
-   * Builds a row-level HBase scan for repair. Differs from {@link #createChunkScan} in that it
-   * does NOT set {@code SYNC_TABLE_CHUNK_FORMATION} or {@code SYNC_TABLE_CHUNK_SIZE_BYTES}, so
-   * the scanner returns actual {@link Result} rows rather than coprocessor chunk metadata.
-   * Shares the {@link #createBaseScan} core (time range, raw-scan, all-versions, inclusivity)
-   * with verification so the cells visited here are the same cells that produced the chunk
-   * hash. Adds bulk caching plus Phoenix TTL / {@code IS_STRICT_TTL} attributes.
-   */
-  private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
-    boolean isEndKeyInclusive, PhoenixConnection phoenixConn) throws IOException, SQLException {
-    Scan scan =
-      createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive, false, false);
-    scan.setCaching(1000);
-    ScanUtil.setScanAttributesForPhoenixTTL(scan, pTable, phoenixConn);
-    scan.setAttribute(BaseScannerRegionObserverConstants.IS_STRICT_TTL, TRUE_BYTES);
-    return scan;
-  }
-
-  /**
-   * Loads the target row's tombstone index for shadow detection. Issues a single-row scan
-   * against target with raw=true and all-versions forced (regardless of user flags), so the
-   * tombstone subtypes that would otherwise be filtered out are surfaced. Live cells in the
-   * response are ignored — they were already visible to the repair scan and are handled by
-   * the merge logic. (HBase 2.x exposes {@code setRaw} only on Scan, not on Get — so a
-   * one-row scan stands in for what would otherwise be a raw Get.)
-   *
-   * Time range: the base scan applies {@code [fromTime, toTime]}, but shadow detection
-   * needs tombstones at ts >= fromTime regardless of the upper bound. A DeleteColumn /
-   * DeleteFamily at ts > toTime can still shadow a Put we mirror at ts in window during
-   * application reads (HBase tombstones don't respect the verifier's sync window). Lower
-   * bound stays at fromTime since tombstones below the window can't shadow anything we'd
-   * write inside the window.
-   */
-  private TargetRowTombstones loadTargetRowTombstones(byte[] rowKey, Table targetHTable)
-    throws IOException {
-    Scan scan = createBaseScan(rowKey, rowKey, true, true, true, true);
-    scan.setTimeRange(fromTime, Long.MAX_VALUE);
-    scan.setCaching(1);
-    scan.setLimit(1);
-    TargetRowTombstones tombstones = new TargetRowTombstones();
-    try (ResultScanner scanner = targetHTable.getScanner(scan)) {
-      Result raw = scanner.next();
-      if (raw != null) {
-        for (Cell cell : raw.rawCells()) {
-          // Record both tombstones (for shadow detection) and Puts (for hidden-version
-          // discovery during cmp > 0 tombstoning of target-only cells).
-          tombstones.record(cell);
-        }
-      }
-    }
-    return tombstones;
-  }
-
-  /**
-   * Lazily-built per-row Put and Delete mutations plus per-row unrepairable-drift state.
-   * Each mutation field is created on first use so a row that needs only Puts produces no
-   * Delete (and vice versa); a row that needs no mutation at all produces neither.
-   *
-   * Tombstone-index state is lazy: {@link #tombstones} is loaded only on the first
-   * shadow check via {@link #tombstones(Table)} (one raw scan per row at most).
-   * {@link #anyCellUnrepairable} accumulates whether the row carries any drift that
-   * repair could not act on — either a source-side Put was shadow-suppressed by an
-   * existing target tombstone, or a target-only tombstone exists that source lacks
-   * (HBase has no API to remove tombstones). The caller reads it after the merge to
-   * decide whether to bump {@link SyncCounters#ROWS_CANNOT_REPAIR}.
-   *
-   * After construction, callers append produced mutations to the pending batches via
-   * {@link #flush(List, List)}.
-   */
-  private final class RowRepairState {
-    private final byte[] rowKey;
-    Put put;
-    Delete delete;
-    TargetRowTombstones tombstones;
-    boolean anyCellUnrepairable;
-
-    RowRepairState(byte[] rowKey) {
-      this.rowKey = rowKey;
-    }
-
-    Put put() {
-      if (put == null) {
-        put = new Put(rowKey);
-      }
-      return put;
-    }
-
-    Delete delete() {
-      if (delete == null) {
-        delete = new Delete(rowKey);
-      }
-      return delete;
-    }
-
-    /**
-     * Lazily loads the target row's tombstone index on first call (one raw single-row scan
-     * via {@link #loadTargetRowTombstones}); cached thereafter for reuse across the row's
-     * subsequent shadow checks.
-     */
-    TargetRowTombstones targetRowTombstones(Table targetHTable) throws IOException {
-      if (tombstones == null) {
-        tombstones = loadTargetRowTombstones(rowKey, targetHTable);
-      }
-      return tombstones;
-    }
-
-    void flush(List<Put> pendingPuts, List<Delete> pendingDeletes) {
-      if (put != null) {
-        pendingPuts.add(put);
-      }
-      if (delete != null) {
-        pendingDeletes.add(delete);
-      }
-    }
-  }
-
-  /**
-   * Cell-level drift counts produced by {@link #diffCellsForRow}. Populated only for rows
-   * present on both clusters; whole-row drift is signaled by the caller directly at the
-   * {@code cmp != 0} branches in {@link #repairChunk}. Three counters partition the cell
-   * differences into disjoint buckets — source-only, target-only-live, same-coord-diff-value.
-   */
-  private static final class CellDriftCounts {
-    static final CellDriftCounts NONE = new CellDriftCounts(0, 0, 0);
-
-    final int missing;
-    final int extra;
-    final int different;
-
-    CellDriftCounts(int missing, int extra, int different) {
-      this.missing = missing;
-      this.extra = extra;
-      this.different = different;
-    }
-  }
-
-  /**
-   * Per-row index of target's tombstones AND Puts in {@code [fromTime, MAX_VALUE]}, built
-   * lazily from a single raw single-row scan with all-versions enabled. Used in two roles:
-   *
-   * <ol>
-   *   <li><b>Shadow detection</b> ({@link #wouldShadow}): would a source Put we're about
-   *       to mirror be suppressed by an existing target tombstone?</li>
-   *   <li><b>Hidden-version discovery</b> ({@link #targetPutTimestampsBetween}): what
-   *       max-versions-filtered target Puts sit between source's max ts at a column and
-   *       target's visible Put? Used in the {@code cmp > 0} tombstoning path to issue
-   *       point Deletes for hidden versions that would otherwise surface on read after
-   *       the visible Put is shadowed.</li>
-   * </ol>
-   *
-   * HBase has four tombstone subtypes, each with distinct shadow semantics:
-   *   Delete                — shadows a Put at {@code (cf, q, ts == T)} exactly
-   *   DeleteColumn          — shadows Puts at {@code (cf, q, ts <= T)}
-   *   DeleteFamily          — shadows Puts at {@code (cf, *, ts <= T)}
-   *   DeleteFamilyVersion   — shadows Puts at {@code (cf, *, ts == T)}
-   * {@link #wouldShadow(Cell)} consults all four indices and returns true on any match.
-   */
-  private static final class TargetRowTombstones {
-    private final Map<ColumnKey, Set<Long>> deletePointTs = new HashMap<>();
-    private final Map<ColumnKey, Long> deleteColumnUpperBound = new HashMap<>();
-    private final Map<ByteBuffer, Long> deleteFamilyUpperBound = new HashMap<>();
-    private final Map<ByteBuffer, Set<Long>> deleteFamilyVersionTs = new HashMap<>();
-    /** Per-column ts-ordered set of target's Put timestamps. */
-    private final Map<ColumnKey, NavigableMap<Long, Boolean>> targetPutTs = new HashMap<>();
-
-    void record(Cell cell) {
-      if (CellUtil.isDelete(cell)) {
-        recordTombstone(cell);
-      } else {
-        targetPutTs.computeIfAbsent(columnKey(cell), k -> new TreeMap<>())
-          .put(cell.getTimestamp(), Boolean.TRUE);
-      }
-    }
-
-    private void recordTombstone(Cell tombstone) {
-      long ts = tombstone.getTimestamp();
-      ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(tombstone));
-      switch (tombstone.getType()) {
-        case Delete:
-          deletePointTs.computeIfAbsent(columnKey(tombstone), k -> new HashSet<>()).add(ts);
-          break;
-        case DeleteColumn:
-          deleteColumnUpperBound.merge(columnKey(tombstone), ts, Math::max);
-          break;
-        case DeleteFamily:
-          deleteFamilyUpperBound.merge(family, ts, Math::max);
-          break;
-        case DeleteFamilyVersion:
-          deleteFamilyVersionTs.computeIfAbsent(family, k -> new HashSet<>()).add(ts);
-          break;
-        default:
-          // Caller filters via CellUtil.isDelete; non-tombstone cells should never reach here.
-      }
-    }
-
-    /** Returns true if any tombstone in this index would shadow a Put at the cell's coords. */
-    boolean wouldShadow(Cell sourcePut) {
-      long ts = sourcePut.getTimestamp();
-      ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(sourcePut));
-      ColumnKey column = columnKey(sourcePut);
-
-      Set<Long> pointTs = deletePointTs.get(column);
-      if (pointTs != null && pointTs.contains(ts)) {
-        return true;
-      }
-      Long deleteColTs = deleteColumnUpperBound.get(column);
-      if (deleteColTs != null && ts <= deleteColTs) {
-        return true;
-      }
-      Long deleteFamTs = deleteFamilyUpperBound.get(family);
-      if (deleteFamTs != null && ts <= deleteFamTs) {
-        return true;
-      }
-      Set<Long> dfvTs = deleteFamilyVersionTs.get(family);
-      return dfvTs != null && dfvTs.contains(ts);
-    }
-
-    /**
-     * Returns target's Put timestamps at {@code (cf, q)} that are strictly greater than
-     * {@code lowerExclusive} and strictly less than {@code upperExclusive}. Used to find
-     * hidden (max-versions-filtered) target versions sitting between source's max ts and
-     * target's visible ts so they can be point-Deleted.
-     */
-    Set<Long> targetPutTimestampsBetween(byte[] family, byte[] qualifier, long lowerExclusive,
-      long upperExclusive) {
-      NavigableMap<Long, Boolean> tss = targetPutTs.get(new ColumnKey(family, qualifier));
-      if (tss == null) {
-        return Collections.emptySet();
-      }
-      return tss.subMap(lowerExclusive, false, upperExclusive, false).keySet();
-    }
-
-    private static ColumnKey columnKey(Cell cell) {
-      return new ColumnKey(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell));
-    }
-  }
-
-  /** Composite (family, qualifier) key with byte-array equality semantics. */
-  private static final class ColumnKey {
-    private final byte[] family;
-    private final byte[] qualifier;
-
-    ColumnKey(byte[] family, byte[] qualifier) {
-      this.family = family;
-      this.qualifier = qualifier;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (!(o instanceof ColumnKey)) {
-        return false;
-      }
-      ColumnKey other = (ColumnKey) o;
-      return Bytes.equals(family, other.family) && Bytes.equals(qualifier, other.qualifier);
-    }
-
-    @Override
-    public int hashCode() {
-      return Bytes.hashCode(family) * 31 + Bytes.hashCode(qualifier);
-    }
-  }
-
-  /**
-   * Result of {@link #diffCellsForRow}. Carries the cell-level drift counts plus a flag
-   * indicating whether any source-side mirror was suppressed because target tombstones
-   * would shadow it — making the row partially or fully unrepairable.
+   * Translates a {@link ChunkRepairResult} into MapReduce side effects: bumps the cell/row drift
+   * counters, builds the chunk-level checkpoint row (REPAIRED / UNREPAIRABLE / REPAIR_FAILED),
+   * and writes it via {@link #writeChunkCheckpoint} so the outcome counter is bumped only on a
+   * successful checkpoint write (audit row and counter stay consistent).
    */
-  private static final class RowDiffOutcome {
-    static final RowDiffOutcome NONE = new RowDiffOutcome(CellDriftCounts.NONE, false);
+  private void recordRepairOutcome(ChunkInfo sourceChunk, ChunkRepairRequest request,
+    ChunkRepairResult result, Context context) {
+    DriftCounters drift = result.drift;
+    context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).increment(drift.rowsMissingOnTarget);
+    context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).increment(drift.rowsExtraOnTarget);
+    context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).increment(drift.rowsCannotRepair);
+    context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).increment(drift.cellsMissingOnTarget);
+    context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).increment(drift.cellsExtraOnTarget);
+    context.getCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET)
+      .increment(drift.cellsDifferentOnTarget);
 
-    final CellDriftCounts cells;
-    final boolean rowCannotRepair;
-
-    RowDiffOutcome(CellDriftCounts cells, boolean rowCannotRepair) {
-      this.cells = cells;
-      this.rowCannotRepair = rowCannotRepair;
-    }
-  }
-
-  /**
-   * Per-chunk aggregate of all six drift counters: three row-level (whole rows missing /
-   * extra on target, plus rows that cannot be repaired because target's row is entirely
-   * tombstones — HBase has no API to remove tombstones, only major compaction does) plus
-   * three cell-level (cells missing / extra / different on rows present on both clusters).
-   * Owns the bookkeeping that was previously scattered across {@link #repairChunk} — local
-   * accumulators, MapReduce job-counter increments, the
-   * {@link PhoenixSyncTableCheckpointOutputRow.CounterFormatter#formatChunk} call, and the
-   * end-of-chunk log line. Adding a new drift signal means touching this class and the one
-   * place in the merge loop that produces it; everything else (commit to job context,
-   * checkpoint COUNTERS string, log) flows through these methods.
-   */
-  private static final class DriftCounters {
-    long rowsMissingOnTarget;
-    long rowsExtraOnTarget;
-    long rowsCannotRepair;
-    long cellsMissingOnTarget;
-    long cellsExtraOnTarget;
-    long cellsDifferentOnTarget;
-
-    void addCellDrift(CellDriftCounts cellDrift) {
-      cellsMissingOnTarget += cellDrift.missing;
-      cellsExtraOnTarget += cellDrift.extra;
-      cellsDifferentOnTarget += cellDrift.different;
-    }
-
-    /** Increments the job's MapReduce counters with this chunk's drift totals. */
-    void commitTo(Context context) {
-      context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).increment(rowsMissingOnTarget);
-      context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).increment(rowsExtraOnTarget);
-      context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).increment(rowsCannotRepair);
-      context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).increment(cellsMissingOnTarget);
-      context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).increment(cellsExtraOnTarget);
-      context.getCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).increment(cellsDifferentOnTarget);
-    }
-
-    /** Formats the chunk's COUNTERS string for persistence in the checkpoint table. */
-    String formatChunkCounters(long verifySourceRows, long verifyTargetRows) {
-      return PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatChunk(verifySourceRows,
-        verifyTargetRows, rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair,
-        cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget);
-    }
+    String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatChunk(
+      request.verifySourceRows, request.verifyTargetRows, drift.rowsMissingOnTarget,
+      drift.rowsExtraOnTarget, drift.rowsDifferentOnTarget, drift.rowsCannotRepair,
+      drift.cellsMissingOnTarget, drift.cellsExtraOnTarget, drift.cellsDifferentOnTarget);
 
-    /** Compact end-of-chunk log line summarizing all six drift signals. */
-    String toLogString() {
-      return String.format(
-        "rowsMissingOnTarget=%d, rowsExtraOnTarget=%d, rowsCannotRepair=%d, "
-          + "cellsMissingOnTarget=%d, cellsExtraOnTarget=%d, cellsDifferentOnTarget=%d",
-        rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair, cellsMissingOnTarget,
-        cellsExtraOnTarget, cellsDifferentOnTarget);
-    }
-  }
-
-  /**
-   * Routes a source cell to the right mutation kind. Put cells go to a {@link Put}; tombstone
-   * cells go to a {@link Delete} via {@link Delete#add(Cell)} which preserves the tombstone's
-   * exact subtype (Delete / DeleteColumn / DeleteFamily / DeleteFamilyVersion). Required under
-   * {@code --raw-scan}: {@link Put#add(Cell)} rejects non-Put cells.
-   */
-  private void mirrorSourceCell(Cell cell, RowRepairState rowState) throws IOException {
-    if (CellUtil.isDelete(cell)) {
-      rowState.delete().add(cell);
-    } else {
-      rowState.put().add(cell);
-    }
-  }
-
-  /**
-   * Mirrors a source cell onto target only if no existing target tombstone would shadow
-   * the resulting Put. Lazily loads the target row's tombstone index on first Put cell
-   * (one raw single-row scan per row at most) via {@link RowRepairState#tombstones}. If
-   * the cell would be shadowed, the mirror is suppressed, {@code rowState.anyCellUnrepairable}
-   * is set, and the caller is expected to record {@link SyncCounters#ROWS_CANNOT_REPAIR}.
-   *
-   * Shadow detection only applies to Put cells. Tombstone source cells (under
-   * {@code --raw-scan}) bypass the check and always mirror via {@link #mirrorSourceCell}:
-   * {@link TargetRowTombstones#wouldShadow} is defined for live cells (does an existing target
-   * tombstone hide a new Put on read), and is not meaningful for delete-marker cells —
-   * mirroring a tombstone over an existing tombstone is a benign duplicate, not a suppression.
-   *
-   * @return {@code true} if the cell was mirrored, {@code false} if suppressed by shadowing.
-   */
-  private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
-      RowRepairState rowState) throws IOException {
-    if (!CellUtil.isDelete(cell) && rowState.targetRowTombstones(targetHTable).wouldShadow(cell)) {
-      rowState.anyCellUnrepairable = true;
-      return false;
-    }
-    mirrorSourceCell(cell, rowState);
-    return true;
-  }
-
-  /**
-   * Tombstones a target-only cell to make target's read view at this column match source's.
-   * Skips cells that are themselves already tombstones: HBase has no API to remove a
-   * tombstone cell — tombstones can only be reaped by major compaction once they age past
-   * the keep-deleted-cells window. Issuing another Delete at the same coordinates writes a
-   * duplicate marker, does not change the row's effective state, and only adds compaction
-   * load. Combined with the absence of a source-side counterpart to mirror, the right
-   * action is to leave the existing tombstone untouched.
-   *
-   * Tombstone subtype depends on what source has at this {@code (cf, q)}:
-   * <ul>
-   *   <li><b>Source has no cell at this column</b> — target's read at this column should
-   *       be empty. Use {@link Delete#addColumns(byte[], byte[], long)} (DeleteColumn,
-   *       scope {@code ts <= T}) so even max-versions-hidden older target versions are
-   *       shadowed. A point Delete would only shadow this exact ts, surfacing the next
-   *       hidden version on read.</li>
-   *   <li><b>Source's max ts at this column is >= target's ts</b> — point-Delete only
-   *       target's exact ts. Source's equal-or-higher ts mirror will surface; no hidden
-   *       version can surface above it.</li>
-   *   <li><b>Source's max ts at this column is &lt; target's ts</b> — point-Delete target's
-   *       ts, AND point-Delete every max-versions-hidden Put on target with ts in
-   *       {@code (sourceMaxTs, targetTs)}. Otherwise after target's visible cell is
-   *       shadowed, the next hidden version surfaces above source's mirror — silent
-   *       divergence. Hidden versions are discovered via the row's tombstone+Put index
-   *       loaded from a single raw all-versions scan.</li>
-   * </ul>
-   *
-   * Pre-build {@code sourceMaxTsByColumn} once per row from the source cell array.
-   *
-   * @return true if the cell was a live cell that contributed a tombstone marker, false if
-   *         the cell was already a tombstone and was skipped.
-   */
-  private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairState rowState,
-    Map<ColumnKey, Long> sourceMaxTsByColumn) throws IOException {
-    if (CellUtil.isDelete(cell)) {
-      return false;
-    }
-    byte[] family = CellUtil.cloneFamily(cell);
-    byte[] qualifier = CellUtil.cloneQualifier(cell);
-    long ts = cell.getTimestamp();
-    Long sourceMaxTs = sourceMaxTsByColumn.get(new ColumnKey(family, qualifier));
-    if (sourceMaxTs == null) {
-      // Source has no cell at this column; shadow ts <= T so hidden older versions
-      // don't surface on read.
-      rowState.delete().addColumns(family, qualifier, ts);
-    } else if (sourceMaxTs >= ts) {
-      // Source's mirror is at >= target's ts; point-Delete only target's exact ts.
-      rowState.delete().addColumn(family, qualifier, ts);
-    } else {
-      // Source's max ts at this column < target's ts. Point-Delete target's visible cell,
-      // AND point-Delete every hidden target Put in (sourceMaxTs, ts) so they don't
-      // surface above source's mirror after the visible cell is shadowed.
-      rowState.delete().addColumn(family, qualifier, ts);
-      Set<Long> hiddenTs = rowState.targetRowTombstones(targetHTable)
-        .targetPutTimestampsBetween(family, qualifier, sourceMaxTs, ts);
-      for (Long hidden : hiddenTs) {
-        rowState.delete().addColumn(family, qualifier, hidden);
-      }
-    }
-    return true;
-  }
-
-  /**
-   * Outcome of {@link #mirrorWholeRow}. {@code FULLY_MIRRORED} = every source cell mirrored
-   * onto target with no shadowing. {@code PARTIALLY_MIRRORED} = at least one cell mirrored,
-   * at least one suppressed by an existing target tombstone (caller bumps both
-   * ROWS_MISSING_ON_TARGET and ROWS_CANNOT_REPAIR). {@code FULLY_SHADOWED} = every source
-   * cell suppressed; the row is unrepairable until target's tombstones are reaped (caller
-   * bumps only ROWS_CANNOT_REPAIR).
-   */
-  private enum WholeRowMirrorOutcome {
-    FULLY_MIRRORED, PARTIALLY_MIRRORED, FULLY_SHADOWED
-  }
-
-  /**
-   * Mirrors every source cell of a row that is missing on target. Source cells route by
-   * type: live cells to a Put, tombstone cells (under {@code --raw-scan}) to a Delete via
-   * {@link Delete#add(Cell)}. Each cell is shadow-checked against existing target
-   * tombstones (lazy raw single-row scan on first cell) — even though target's filtered scan
-   * returned no cells for this row, target may carry tombstones that would shadow our Put.
-   */
-  private WholeRowMirrorOutcome mirrorWholeRow(Result sourceResult, Table targetHTable,
-    List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
-    RowRepairState rowState = new RowRepairState(sourceResult.getRow());
-    int mirrored = 0;
-    for (Cell cell : sourceResult.rawCells()) {
-      if (mirrorSourceCellUnlessShadowed(cell, targetHTable, rowState)) {
-        mirrored++;
-      }
-    }
-    rowState.flush(pendingPuts, pendingDeletes);
-    if (mirrored == 0) {
-      return WholeRowMirrorOutcome.FULLY_SHADOWED;
-    }
-    return rowState.anyCellUnrepairable ? WholeRowMirrorOutcome.PARTIALLY_MIRRORED
-      : WholeRowMirrorOutcome.FULLY_MIRRORED;
-  }
-
-  /**
-   * Tombstones every live cell of a row that is extra on target. Existing tombstones on the
-   * target row are skipped — HBase cannot remove tombstone cells; only major compaction
-   * reaps them.
-   *
-   * @return the number of live cells that contributed a tombstone marker. {@code 0} means
-   *         the row was already entirely tombstones — repair could not act on it, and the
-   *         caller should record this as {@link SyncCounters#ROWS_CANNOT_REPAIR}.
-   */
-  private int tombstoneWholeRow(Result targetResult, Table targetHTable,
-    List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
-    RowRepairState rowState = new RowRepairState(targetResult.getRow());
-    // Source has no row at all here, so source has no cell at any column — every target
-    // cell hits the "shadow ts <= T" path inside tombstoneTargetCell.
-    Map<ColumnKey, Long> sourceMaxTsByColumn = Collections.emptyMap();
-    int liveCellsTombstoned = 0;
-    for (Cell cell : targetResult.rawCells()) {
-      if (tombstoneTargetCell(cell, targetHTable, rowState, sourceMaxTsByColumn)) {
-        liveCellsTombstoned++;
-      }
-    }
-    rowState.flush(pendingPuts, pendingDeletes);
-    return liveCellsTombstoned;
-  }
-
-  /**
-   * Diffs cells of two rows present on both clusters in lock-step using {@link CellComparator}
-   * order and appends the resulting {@link Put}/{@link Delete} mutations (if any) to the
-   * pending lists. Returns a {@link RowDiffOutcome} carrying both the cell-level drift
-   * counts and a flag indicating whether any source-side mirror was suppressed by target
-   * tombstone shadowing — letting the caller decide whether to bump
-   * {@link SyncCounters#ROWS_CANNOT_REPAIR}.
-   *
-   * Branches:
-   *   same coords + matching value         → no drift, no signal
-   *   same coords + different value        → different++; mirror source cell (shadow-checked)
-   *   source-only cell at unique coords    → missing++;   mirror source cell (shadow-checked)
-   *   target-only live cell at unique coords → extra++;   tombstone target cell
-   *   target-only tombstone cell           → skip (HBase cannot remove tombstones)
-   *
-   * Cells whose mirror is suppressed by shadowing do NOT bump the cell counter — the
-   * cell wasn't written to target, so it isn't repaired drift. The row-level shadow
-   * signal is surfaced via {@link RowDiffOutcome#rowCannotRepair}.
-   */
-  private RowDiffOutcome diffCellsForRow(Result sourceResult, Result targetResult,
-    Table targetHTable, List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
-    Cell[] sourceCells = sourceResult.rawCells();
-    Cell[] targetCells = targetResult.rawCells();
-    CellComparator comparator = CellComparator.getInstance();
-
-    // Always use the lazy raw single-row fetch path for shadow detection. Even under
-    // --raw-scan, the merge-scan's targetCells came from a time-range-filtered scan and
-    // would not surface tombstones at ts > toTime — and those out-of-window tombstones
-    // can still shadow Puts we mirror inside the window during application reads. The
-    // lazy fetch in {@link RowRepairState#tombstones} loads tombstones at ts >= fromTime
-    // regardless of upper bound, catching that case correctly.
-    RowRepairState rowState = new RowRepairState(sourceResult.getRow());
-
-    // Pre-compute the set of (cf, q) coordinates source has any cell at. tombstoneTargetCell
-    // uses this to choose between point-Delete (when source has the column at some ts —
-    // mirrored cell will surface on read) and DeleteColumn (when source has nothing at the
-    // column — must shadow ts <= T to keep max-versions-hidden older versions invisible.
-    // The map records source's max ts at each column. When tombstoning a target cell at a
-    // column source has at lower ts, hidden target Puts in (sourceMax, targetTs) need
-    // their own point Deletes too — otherwise after the visible cell is shadowed, a hidden
-    // version surfaces above source's mirror.
-    Map<ColumnKey, Long> sourceMaxTsByColumn = new HashMap<>();
-    for (Cell sourceCell : sourceCells) {
-      if (!CellUtil.isDelete(sourceCell)) {
-        ColumnKey key = new ColumnKey(CellUtil.cloneFamily(sourceCell),
-          CellUtil.cloneQualifier(sourceCell));
-        sourceMaxTsByColumn.merge(key, sourceCell.getTimestamp(), Math::max);
-      }
-    }
-
-    int cellMissing = 0;
-    int cellExtra = 0;
-    int cellDifferent = 0;
-
-    int sourceIdx = 0;
-    int targetIdx = 0;
-    while (sourceIdx < sourceCells.length && targetIdx < targetCells.length) {
-      int cmp = comparator.compare(sourceCells[sourceIdx], targetCells[targetIdx]);
-      if (cmp == 0) {
-        // Same coordinates; CellComparator does not compare values, check separately.
-        if (!CellUtil.matchingValue(sourceCells[sourceIdx], targetCells[targetIdx])) {
-          if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) {
-            cellDifferent++;
-          }
-        }
-        sourceIdx++;
-        targetIdx++;
-      } else if (cmp < 0) {
-        if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) {
-          cellMissing++;
-        }
-        sourceIdx++;
-      } else {
-        // Target-only cell. Live cell → tombstone it (cellExtra++). Tombstone cell →
-        // can't act on it (HBase has no API to remove tombstones), so the row carries
-        // unrepairable drift and re-verify will mismatch.
-        if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowState,
-          sourceMaxTsByColumn)) {
-          cellExtra++;
-        } else {
-          rowState.anyCellUnrepairable = true;
-        }
-      }
-    }
-    while (sourceIdx < sourceCells.length) {
-      if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) {
-        cellMissing++;
-      }
-      sourceIdx++;
-    }
-    while (targetIdx < targetCells.length) {
-      if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowState,
-        sourceMaxTsByColumn)) {
-        cellExtra++;
-      } else {
-        rowState.anyCellUnrepairable = true;
-      }
-    }
-
-    if (cellMissing == 0 && cellExtra == 0 && cellDifferent == 0 && !rowState.anyCellUnrepairable) {
-      return RowDiffOutcome.NONE;
-    }
-    rowState.flush(pendingPuts, pendingDeletes);
-    return new RowDiffOutcome(new CellDriftCounts(cellMissing, cellExtra, cellDifferent),
-      rowState.anyCellUnrepairable);
-  }
-
-  /**
-   * Flushes the accumulated Put and Delete batches to the target HTable as a single mixed
-   * RPC and clears both lists. Called every {@code repairBatchSize} rows and once more at
-   * the end of a chunk.
-   *
-   * Issuing both Puts and Deletes via {@link Table#batch} (one network round-trip) instead
-   * of separate {@code put()} + {@code delete()} calls (two round-trips) eliminates the
-   * inter-RPC failure window where a JVM/regionserver crash between the two would leave
-   * target with Puts applied but matching Deletes not yet tombstoned. Server-side, batch
-   * mutations are still applied per-row sequentially, so a regionserver crash mid-batch
-   * can still leave partial application — but the mid-flush gap on the client side is
-   * gone.
-   *
-   * {@link InterruptedException} from {@code batch} is converted to {@link IOException}
-   * so the existing per-chunk catch path treats interruption like any other transient
-   * write failure (chunk → REPAIR_FAILED, retry on next invocation). The interrupt flag is
-   * restored so an outer cancellation still observes the interrupted state.
-   */
-  private void flushRepairMutations(Table targetHTable, List<Put> puts, List<Delete> deletes)
-    throws IOException {
-    if (puts.isEmpty() && deletes.isEmpty()) {
-      return;
-    }
-    List<Row> mutations = new ArrayList<>(puts.size() + deletes.size());
-    mutations.addAll(puts);
-    mutations.addAll(deletes);
-    Object[] results = new Object[mutations.size()];
-    try {
-      targetHTable.batch(mutations, results);
-    } catch (InterruptedException e) {
-      Thread.currentThread().interrupt();
-      throw new IOException("Interrupted while flushing repair mutations", e);
-    }
-    puts.clear();
-    deletes.clear();
-  }
-
-  /**
-   * Writes a chunk-level checkpoint row and bumps the matching outcome counter, as a single
-   * "this attempt is recorded" unit. The outcome counter is bumped only after a successful
-   * checkpoint write, so on-disk audit and in-memory counters stay in sync.
+    PhoenixSyncTableCheckpointOutputRow.Status status;
+    SyncCounters outcomeCounter;
+    switch (result.status) {
+      case REPAIRED:
+        status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED;
+        outcomeCounter = SyncCounters.CHUNKS_REPAIRED;
+        break;
+      case UNREPAIRABLE:
+        status = PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE;
+        outcomeCounter = SyncCounters.CHUNKS_UNREPAIRABLE;
+        break;
+      case REPAIR_FAILED:
+        status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED;
+        outcomeCounter = SyncCounters.CHUNKS_REPAIR_FAILED;
+        break;
+      default:
+        throw new IllegalStateException("Unexpected repair status: " + result.status);
+    }
+
+    writeChunkCheckpoint(new PhoenixSyncTableCheckpointOutputRow.Builder().setTableName(tableName)
+      .setTargetCluster(targetZkQuorum).setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK)
+      .setFromTime(fromTime).setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun)
+      .setStartRowKey(sourceChunk.startKey).setEndRowKey(sourceChunk.endKey).setStatus(status)
+      .setExecutionStartTime(request.verifyStartTime).setExecutionEndTime(result.endTime)
+      .setCounters(counters).build(), outcomeCounter, context);
+  }
+
+  /**
+   * Writes a chunk-level checkpoint row and bumps the matching outcome counter. The outcome
+   * counter is bumped only after a successful checkpoint write, so on-disk audit and in-memory
+   * counters stay in sync.
    *
-   * If the checkpoint write throws {@link SQLException}, the failure is logged and the
+   * <p>If the checkpoint write throws {@link SQLException}, the failure is logged and the
    * {@link SyncCounters#CHECKPOINT_WRITE_FAILED} counter is bumped, but the exception is
    * NOT propagated. Reasons:
    * <ul>
@@ -1458,174 +885,6 @@ private void writeChunkCheckpoint(PhoenixSyncTableCheckpointOutputRow row,
     }
   }
 
-  /**
-   * Performs row-level repair for a mismatched chunk by merge-scanning source and target
-   * cluster data and applying targeted mutations to target. The two scan ranges may differ:
-   * the verifier reads target over a wider range than source (covers extra-on-target rows
-   * that fall between consecutive source chunks); repair must mirror the same boundaries so
-   * those extras are visible here as {@code cmp > 0} rows and get deleted.
-   *
-   * Merge-scan contract: both scanners return rows in ascending key order (HBase guarantee).
-   *   cmp == 0 (same row): compare cells; repair if different.
-   *   cmp <  0 (source-only): Put all source cells.
-   *   cmp >  0 (target-only): Delete target cells within [fromTime, toTime].
-   *
-   * Cells outside [fromTime, toTime] are never read (scan time range), so never mutated.
-   *
-   * Only called when isDryRun == false.
-   *
-   * @param sourceStart           Source chunk start key (also the checkpoint PK) — inclusive
-   * @param sourceEnd             Source chunk end key (also the checkpoint PK) — inclusive
-   * @param targetStart           Target scan start (matches verifier-side boundary)
-   * @param targetEnd             Target scan end (matches verifier-side boundary)
-   * @param targetStartInclusive  Inclusivity of target scan start — matches verify side
-   * @param targetEndInclusive    Inclusivity of target scan end — matches verify side
-   * @param verifyStartTime       When the verify pass began for this chunk; reused as the
-   *                              REPAIRED row's START_TIME so the row spans the full
-   *                              verify+repair lifecycle that overwrites the MISMATCHED row.
-   */
-  private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStart,
-    byte[] targetEnd, boolean targetStartInclusive, boolean targetEndInclusive,
-    long verifySourceRows, long verifyTargetRows, Timestamp verifyStartTime, Context context)
-    throws IOException, SQLException {
-    DriftCounters driftCounters = new DriftCounters();
-
-    LOGGER.info(
-      "Starting repair for chunk source=[{}, {}] target=[{}{}, {}{} on table {}",
-      Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd),
-      targetStartInclusive ? "[" : "(", Bytes.toStringBinary(targetStart),
-      Bytes.toStringBinary(targetEnd), targetEndInclusive ? "]" : ")", tableName);
-
-    PhoenixConnection sourcePhoenixConn = sourceConnection.unwrap(PhoenixConnection.class);
-    PhoenixConnection targetPhoenixConn = targetConnection.unwrap(PhoenixConnection.class);
-
-    Scan sourceScan = createRepairScan(sourceStart, sourceEnd, true, true, sourcePhoenixConn);
-    Scan targetScan = createRepairScan(targetStart, targetEnd, targetStartInclusive,
-      targetEndInclusive, targetPhoenixConn);
-
-    List<Put> pendingPuts = new ArrayList<>();
-    List<Delete> pendingDeletes = new ArrayList<>();
-
-    try (Table sourceHTable = sourcePhoenixConn.getQueryServices().getTable(physicalTableName);
-      Table targetHTable = targetPhoenixConn.getQueryServices().getTable(physicalTableName);
-      ResultScanner sourceScanner = sourceHTable.getScanner(sourceScan);
-      ResultScanner targetScanner = targetHTable.getScanner(targetScan)) {
-
-      Result sourceResult = sourceScanner.next();
-      Result targetResult = targetScanner.next();
-
-      while (sourceResult != null || targetResult != null) {
-        int cmp;
-        if (sourceResult == null) {
-          cmp = 1;
-        } else if (targetResult == null) {
-          cmp = -1;
-        } else {
-          cmp = Bytes.compareTo(sourceResult.getRow(), targetResult.getRow());
-        }
-
-        // Drift signals are bumped at the branch that semantically caused them: row-level
-        // signals at the cmp != 0 branches, cell-level signals at the cmp == 0 branch.
-        // ROWS_CANNOT_REPAIR is bumped whenever any source mirror is suppressed by an
-        // existing target tombstone (or the cmp > 0 row was already entirely tombstones).
-        if (cmp == 0) {
-          // Same row key on both clusters — diff at cell level and repair only if cells differ.
-          RowDiffOutcome outcome = diffCellsForRow(sourceResult, targetResult, targetHTable,
-            pendingPuts, pendingDeletes);
-          driftCounters.addCellDrift(outcome.cells);
-          if (outcome.rowCannotRepair) {
-            driftCounters.rowsCannotRepair++;
-          }
-          sourceResult = sourceScanner.next();
-          targetResult = targetScanner.next();
-        } else if (cmp < 0) {
-          // Source-only row — mirror it onto target. Even though target's filtered scan
-          // returned no row at this key, target may carry tombstones that would shadow
-          // some or all of the Puts.
-          WholeRowMirrorOutcome outcome =
-            mirrorWholeRow(sourceResult, targetHTable, pendingPuts, pendingDeletes);
-          if (outcome != WholeRowMirrorOutcome.FULLY_SHADOWED) {
-            driftCounters.rowsMissingOnTarget++;
-          }
-          if (outcome != WholeRowMirrorOutcome.FULLY_MIRRORED) {
-            driftCounters.rowsCannotRepair++;
-          }
-          sourceResult = sourceScanner.next();
-        } else {
-          // Target-only row — tombstone its live cells. If the row is already entirely
-          // tombstones, repair has nothing to do (HBase cannot remove tombstones; only
-          // major compaction reaps them) — record as ROWS_CANNOT_REPAIR so operators can
-          // see the unrepairable drift volume.
-          int liveCellsTombstoned =
-            tombstoneWholeRow(targetResult, targetHTable, pendingPuts, pendingDeletes);
-          if (liveCellsTombstoned == 0) {
-            driftCounters.rowsCannotRepair++;
-          } else {
-            driftCounters.rowsExtraOnTarget++;
-          }
-          targetResult = targetScanner.next();
-        }
-
-        if (pendingPuts.size() + pendingDeletes.size() >= repairBatchSize) {
-          flushRepairMutations(targetHTable, pendingPuts, pendingDeletes);
-        }
-        context.progress();
-      }
-      flushRepairMutations(targetHTable, pendingPuts, pendingDeletes);
-    } catch (IOException e) {
-      // Per-chunk fault isolation. Mark this chunk REPAIR_FAILED, increment the counter,
-      // and return so the mapper continues with the next chunk. Phase 2's STATUS filter
-      // (VERIFIED, REPAIRED) excludes REPAIR_FAILED, so a re-run will re-attempt this chunk
-      // as an unprocessed gap.
-      LOGGER.error("Repair failed for chunk source=[{}, {}] on table {}: {}",
-        Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), tableName,
-        e.getMessage(), e);
-
-      Timestamp failedAt = new Timestamp(System.currentTimeMillis());
-      // Capture partial progress in the COUNTERS column for triage.
-      String failedCounters =
-        driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows);
-      writeChunkCheckpoint(new PhoenixSyncTableCheckpointOutputRow.Builder()
-        .setTableName(tableName).setTargetCluster(targetZkQuorum)
-        .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime)
-        .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun)
-        .setStartRowKey(sourceStart).setEndRowKey(sourceEnd)
-        .setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)
-        .setExecutionStartTime(verifyStartTime).setExecutionEndTime(failedAt)
-        .setCounters(failedCounters).build(), SyncCounters.CHUNKS_REPAIR_FAILED, context);
-      return;
-    }
-
-    driftCounters.commitTo(context);
-    // Chunk transitions to UNREPAIRABLE if any row landed in ROWS_CANNOT_REPAIR — operator
-    // intervention (typically major compaction on target to reap shadowing tombstones) is
-    // needed before re-running the tool. Otherwise REPAIRED.
-    boolean unrepairable = driftCounters.rowsCannotRepair > 0;
-    PhoenixSyncTableCheckpointOutputRow.Status status = unrepairable
-      ? PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE
-      : PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED;
-
-    Timestamp repairEndTime = new Timestamp(System.currentTimeMillis());
-    String repairCounters =
-      driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows);
-
-    // Write checkpoint first; outcome counter is bumped inside writeChunkCheckpoint only
-    // on a successful write so the audit row and the counter stay consistent.
-    writeChunkCheckpoint(new PhoenixSyncTableCheckpointOutputRow.Builder()
-      .setTableName(tableName).setTargetCluster(targetZkQuorum)
-      .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime)
-      .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun).setStartRowKey(sourceStart)
-      .setEndRowKey(sourceEnd).setStatus(status)
-      .setExecutionStartTime(verifyStartTime).setExecutionEndTime(repairEndTime)
-      .setCounters(repairCounters).build(),
-      unrepairable ? SyncCounters.CHUNKS_UNREPAIRABLE : SyncCounters.CHUNKS_REPAIRED, context);
-
-    LOGGER.info("Completed repair for chunk source=[{}, {}] with status={}: {}",
-      Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), status,
-      driftCounters.toLogString());
-  }
-
-  @Override
   protected void cleanup(Context context) throws IOException, InterruptedException {
     tryClosingResources();
     super.cleanup(context);

From a24c6e4dab91502b89bb3bc1df15e8742c540142 Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Thu, 4 Jun 2026 18:01:17 +0530
Subject: [PATCH 10/18] checkpointing test implementation

---
 .../apache/phoenix/jdbc/PhoenixDriver.java    |    4 +-
 .../phoenix/jdbc/PhoenixEmbeddedDriver.java   |   85 +-
 .../apache/phoenix/monitoring/MetricType.java |    2 -
 .../ConnectionQueryServicesMetrics.java       |    3 +-
 .../PhoenixSyncTableChunkRepairer.java        |   17 +-
 .../mapreduce/PhoenixSyncTableMapper.java     |    6 +
 .../end2end/PhoenixSyncTableToolIT.java       | 2360 ++++++++++++-----
 .../ConnectionQueryServicesMetricsIT.java     |   22 -
 .../PhoenixSyncTableOutputRepositoryTest.java |  227 ++
 ...ectionQueryServicesMetricsManagerTest.java |   21 -
 10 files changed, 2036 insertions(+), 711 deletions(-)

diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
index f3784183555..953bbc5bd57 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java
@@ -41,7 +41,6 @@
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.query.QueryServicesImpl;
 import org.apache.phoenix.query.QueryServicesOptions;
-import org.apache.phoenix.util.EnvironmentEdgeManager;
 import org.apache.phoenix.util.PropertiesUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -206,7 +205,6 @@ public boolean acceptsURL(String url) throws SQLException {
 
   @Override
   public Connection connect(String url, Properties info) throws SQLException {
-    long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis();
     GLOBAL_PHOENIX_CONNECTIONS_ATTEMPTED_COUNTER.increment();
     if (!acceptsURL(url)) {
       GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment();
@@ -215,7 +213,7 @@ public Connection connect(String url, Properties info) throws SQLException {
     lockInterruptibly(LockMode.READ);
     try {
       checkClosed();
-      return createConnection(url, info, connectionStartTime);
+      return createConnection(url, info);
     } catch (SQLException sqlException) {
       if (sqlException.getErrorCode() != SQLExceptionCode.NEW_CONNECTION_THROTTLED.getErrorCode()) {
         GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment();
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
index ea1d416fd8a..524789464b4 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
@@ -17,8 +17,6 @@
  */
 package org.apache.phoenix.jdbc;
 
-import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS;
-import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME;
 import static org.apache.phoenix.util.PhoenixRuntime.PHOENIX_TEST_DRIVER_URL_PARAM;
 
 import java.sql.Connection;
@@ -26,18 +24,17 @@
 import java.sql.DriverPropertyInfo;
 import java.sql.SQLException;
 import java.sql.SQLFeatureNotSupportedException;
-import java.util.List;
-import java.util.Map;
 import java.util.Optional;
 import java.util.Properties;
 import java.util.logging.Logger;
 import javax.annotation.concurrent.Immutable;
 import org.apache.phoenix.coprocessorclient.MetaDataProtocol;
-import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric;
-import org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesMetricsManager;
 import org.apache.phoenix.query.ConnectionQueryServices;
 import org.apache.phoenix.query.QueryServices;
-import org.apache.phoenix.util.*;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.apache.phoenix.util.SQLCloseable;
 
 import org.apache.phoenix.thirdparty.com.google.common.collect.ImmutableMap;
 
@@ -53,13 +50,13 @@ public abstract class PhoenixEmbeddedDriver implements Driver, SQLCloseable {
   private final static String DNC_JDBC_PROTOCOL_SUFFIX = "//";
   private final static String DRIVER_NAME = "PhoenixEmbeddedDriver";
   private static final String TEST_URL_AT_END =
-    "" + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR + PHOENIX_TEST_DRIVER_URL_PARAM;
+      "" + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR + PHOENIX_TEST_DRIVER_URL_PARAM;
   private static final String TEST_URL_IN_MIDDLE =
-    TEST_URL_AT_END + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR;
+      TEST_URL_AT_END + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR;
 
   private static final String[] SUPPORTED_PROTOCOLS =
-    new String[] { PhoenixRuntime.JDBC_PROTOCOL, PhoenixRuntime.JDBC_PROTOCOL_ZK,
-      PhoenixRuntime.JDBC_PROTOCOL_MASTER, PhoenixRuntime.JDBC_PROTOCOL_RPC };
+      new String[] { PhoenixRuntime.JDBC_PROTOCOL, PhoenixRuntime.JDBC_PROTOCOL_ZK,
+          PhoenixRuntime.JDBC_PROTOCOL_MASTER, PhoenixRuntime.JDBC_PROTOCOL_RPC };
 
   private final static DriverPropertyInfo[] EMPTY_INFO = new DriverPropertyInfo[0];
   public final static String MAJOR_VERSION_PROP = "DriverMajorVersion";
@@ -67,9 +64,9 @@ public abstract class PhoenixEmbeddedDriver implements Driver, SQLCloseable {
   public final static String DRIVER_NAME_PROP = "DriverName";
 
   public static final ReadOnlyProps DEFAULT_PROPS =
-    new ReadOnlyProps(ImmutableMap.of(MAJOR_VERSION_PROP,
-      Integer.toString(MetaDataProtocol.PHOENIX_MAJOR_VERSION), MINOR_VERSION_PROP,
-      Integer.toString(MetaDataProtocol.PHOENIX_MINOR_VERSION), DRIVER_NAME_PROP, DRIVER_NAME));
+      new ReadOnlyProps(ImmutableMap.of(MAJOR_VERSION_PROP,
+          Integer.toString(MetaDataProtocol.PHOENIX_MAJOR_VERSION), MINOR_VERSION_PROP,
+          Integer.toString(MetaDataProtocol.PHOENIX_MINOR_VERSION), DRIVER_NAME_PROP, DRIVER_NAME));
 
   PhoenixEmbeddedDriver() {
   }
@@ -122,59 +119,31 @@ public boolean acceptsURL(String url) throws SQLException {
 
   @Override
   public Connection connect(String url, Properties info) throws SQLException {
-    long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis();
     if (!acceptsURL(url)) {
       return null;
     }
 
-    return createConnection(url, info, connectionStartTime);
+    return createConnection(url, info);
   }
 
-  protected final Connection createConnection(String url, Properties info,
-      long connectionCreationTime) throws SQLException {
+  protected final Connection createConnection(String url, Properties info) throws SQLException {
     Properties augmentedInfo = PropertiesUtil.deepCopy(info);
     augmentedInfo.putAll(getDefaultProps().asMap());
-    Connection connection = null;
-    try {
-      if (url.contains("|")) {
-        // Get HAURLInfo to pass it to connection creation
-        HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo);
-        // High availability connection using two clusters
-        Optional<HighAvailabilityGroup> haGroup = HighAvailabilityGroup.get(url, augmentedInfo);
-        if (haGroup.isPresent()) {
-          connection = haGroup.get().connect(augmentedInfo, haurlInfo);
-          setPhoenixConnectionTime(connectionCreationTime, connection);
-          return connection;
-        } else {
-          // If empty HA group is returned, fall back to single cluster.
-          url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow(
-              () -> new SQLException(
-                  "HA group can not be initialized, fallback to single cluster"));
-        }
-      }
-      ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo);
-      connection = cqs.connect(url, augmentedInfo);
-      setPhoenixConnectionTime(connectionCreationTime, connection);
-      Map<String, List<ConnectionQueryServicesMetric>> metrics =
-          ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
-      if (!metrics.isEmpty()) {
-        List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
-      }
-      return connection;
-    } catch (SQLException e) {
-      if (connection != null) {
-        connection.close();
+    if (url.contains("|")) {
+      // Get HAURLInfo to pass it to connection creation
+      HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo);
+      // High availability connection using two clusters
+      Optional<HighAvailabilityGroup> haGroup = HighAvailabilityGroup.get(url, augmentedInfo);
+      if (haGroup.isPresent()) {
+        return haGroup.get().connect(augmentedInfo, haurlInfo);
+      } else {
+        // If empty HA group is returned, fall back to single cluster.
+        url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow(
+            () -> new SQLException("HA group can not be initialized, fallback to single cluster"));
       }
-      throw e;
     }
-  }
-
-  private void setPhoenixConnectionTime(long connectionCreationTime, Connection connection) {
-    String connectionQueryServiceName =
-        ((PhoenixConnection) connection).getQueryServices().getConfiguration()
-            .get(QUERY_SERVICES_NAME);
-    ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName,
-        PHOENIX_CONNECTION_CREATION_TIME_MS, connectionCreationTime);
+    ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo);
+    return cqs.connect(url, augmentedInfo);
   }
 
   /**
@@ -186,7 +155,7 @@ private void setPhoenixConnectionTime(long connectionCreationTime, Connection co
    * @return new or cached QuerySerices used to establish a new Connection.
    */
   protected abstract ConnectionQueryServices getConnectionQueryServices(String url, Properties info)
-    throws SQLException;
+      throws SQLException;
 
   @Override
   public int getMajorVersion() {
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
index de559e4aff4..ff80705c0d4 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java
@@ -236,8 +236,6 @@ public enum MetricType {
   PHOENIX_CONNECTIONS_FAILED_COUNTER("cf",
     "Number of client Phoenix Connections Failed to open" + ", not including throttled connections",
     LogLevel.OFF, PLong.INSTANCE),
-  PHOENIX_CONNECTION_CREATION_TIME_MS("cct",
-      "Time spent in creating Phoenix connections in milliseconds", LogLevel.OFF, PLong.INSTANCE),
   CLIENT_METADATA_CACHE_MISS_COUNTER("cmcm", "Number of cache misses for the CQSI cache.",
     LogLevel.DEBUG, PLong.INSTANCE),
   CLIENT_METADATA_CACHE_HIT_COUNTER("cmch", "Number of cache hits for the CQSI cache.",
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
index 8c3ac719d27..c87e9ad7984 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
@@ -40,8 +40,7 @@ public enum QueryServiceMetrics {
     CONNECTION_QUERY_SERVICE_OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER(
       OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER),
     CONNECTION_QUERY_SERVICE_PHOENIX_CONNECTIONS_THROTTLED_COUNTER(
-      PHOENIX_CONNECTIONS_THROTTLED_COUNTER),
-    CONNECTION_QUERY_SERVICE_CREATION_TIME(PHOENIX_CONNECTION_CREATION_TIME_MS);
+      PHOENIX_CONNECTIONS_THROTTLED_COUNTER);
 
     private MetricType metricType;
     private ConnectionQueryServicesMetric metric;
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
index 6a1da24bc45..4afdbcf9130 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
@@ -375,15 +375,16 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta
     CellComparator comparator = CellComparator.getInstance();
     RowRepairBuffer rowRepairBuffer = new RowRepairBuffer(sourceResult.getRow());
 
-    // Per-column max source PUT timestamp; consumed by tombstoneTargetCell to pick the
-    // tombstone shape for a target-extra cell. Three cases:
-    //   key absent              → DeleteColumn at target's ts (wipe hidden older versions too)
+    // Per-column max source PUT timestamp; consumed by tombstoneTargetCell (target-extra
+    // cell) to pick the delete shape. Three cases:
+    //   key absent              → DeleteColumn at target's ts (also wipes older hidden versions)
     //   sourceMaxTs >= targetTs → point Delete at target's ts
-    //   sourceMaxTs <  targetTs → point Delete at target's ts AND every hidden target Put in
-    //                             (sourceMaxTs, targetTs); else hidden version surfaces.
-    // Math::max collapses multi-version source so the third case only fires when target sits above
-    // ALL of source's versions.
-    // Check tombstoneTargetCell() for its usage.
+    //   sourceMaxTs <  targetTs → point Delete at target's ts, PLUS a point Delete for every
+    //                             hidden target Put whose ts ∈ (sourceMaxTs, targetTs); without
+    //                             these, the next hidden Put surfaces above source's mirror
+    //                             once target's visible cell is shadowed.
+    // Math::max collapses multi-version source, so case 3 only fires when target sits above
+    // ALL source versions at this column. See tombstoneTargetCell() for the application.
     Map<ColumnKey, Long> sourceMaxTsByColumn = new HashMap<>();
     for (Cell sourceCell : sourceCells) {
       if (!CellUtil.isDelete(sourceCell)) {
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
index d782cceadfa..aa19bcf1a89 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
@@ -808,9 +808,15 @@ boolean shouldStartKeyBeInclusive(byte[] mapperRegionStart,
    * counters, builds the chunk-level checkpoint row (REPAIRED / UNREPAIRABLE / REPAIR_FAILED),
    * and writes it via {@link #writeChunkCheckpoint} so the outcome counter is bumped only on a
    * successful checkpoint write (audit row and counter stay consistent).
+   *
+   * <p>{@code CHUNKS_MISMATCHED} is bumped here too: it tracks every chunk where source and
+   * target hashes differed — the drift-detected signal — regardless of whether repair ran.
+   * Without this, repair-mode {@link #recordRegionCompletion} would see {@code mismatchedChunks
+   * == 0} for fully-repaired regions and roll them up as VERIFIED instead of REPAIRED.
    */
   private void recordRepairOutcome(ChunkInfo sourceChunk, ChunkRepairRequest request,
     ChunkRepairResult result, Context context) {
+    context.getCounter(SyncCounters.CHUNKS_MISMATCHED).increment(1);
     DriftCounters drift = result.drift;
     context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).increment(drift.rowsMissingOnTarget);
     context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).increment(drift.rowsExtraOnTarget);
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
index 599f18337d8..0dc84f5cf6c 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
@@ -39,13 +39,17 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Properties;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
@@ -60,12 +64,15 @@
 import org.apache.phoenix.jdbc.PhoenixConnection;
 import org.apache.phoenix.jdbc.PhoenixDriver;
 import org.apache.phoenix.mapreduce.PhoenixSyncTableCheckpointOutputRow;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableInputFormat;
 import org.apache.phoenix.mapreduce.PhoenixSyncTableMapper.SyncCounters;
 import org.apache.phoenix.mapreduce.PhoenixSyncTableOutputRepository;
 import org.apache.phoenix.mapreduce.PhoenixSyncTableTool;
 import org.apache.phoenix.query.BaseTest;
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.schema.types.PInteger;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.phoenix.util.SchemaUtil;
 import org.apache.phoenix.util.TestUtil;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -156,104 +163,57 @@ public void testSyncTableValidateWithDataDifference() throws Exception {
 
     introduceAndVerifyTargetDifferences(uniqueTableName);
 
-    // Pin the time window so both runs share the same checkpoint PK
-    // (TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY).
-    // Without this, runSyncToolWithLargeChunks would assign a fresh System.currentTimeMillis()
-    // to --to-time on each call and the repair pass would append new rows instead of
-    // overwriting the dry-run pass's MISMATCHED rows.
-    String fromTime = "0";
-    String toTime = String.valueOf(System.currentTimeMillis());
-
-    // First run: --dry-run, only detect mismatches.
-    Job job = runSyncToolWithLargeChunks(uniqueTableName, "--dry-run", "--from-time", fromTime,
-      "--to-time", toTime);
-    SyncCountersResult counters = getSyncCounters(job);
+    // Pin the time window so the dry-run and repair share the same checkpoint PK.
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
 
-    validateSyncCounters(counters, 10, 10, 1, 3);
-    validateMapperCounters(counters, 1, 3);
-    assertEquals("Expected 4 mapper task to be created", 4, counters.taskCreated);
+    // Phase 1: dry-run only — verify checkpoint table sees only VERIFIED/MISMATCHED rows.
+    Job dryRunJob = runSyncToolWithLargeChunks(uniqueTableName, "--dry-run", "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob);
+
+    validateSyncCounters(dryRunCounters, 10, 10, 1, 3);
+    validateMapperCounters(dryRunCounters, 1, 3);
+    assertEquals("Expected 4 mapper task to be created", 4, dryRunCounters.taskCreated);
+    // Dry-run row-level logging should flag the 3 same-key/different-value rows as
+    // ROWS_DIFFERENT_ON_TARGET; nothing missing or extra (replication seeded both sides
+    // with the same row keys before introduceAndVerifyTargetDifferences mutated three).
+    assertEquals("Dry-run should detect 3 rows different on target", 3,
+      dryRunCounters.rowsDifferentOnTarget);
+    assertEquals("Dry-run should report 0 rows missing on target", 0,
+      dryRunCounters.rowsMissingOnTarget);
+    assertEquals("Dry-run should report 0 rows extra on target", 0,
+      dryRunCounters.rowsExtraOnTarget);
 
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
     validateCheckpointEntries(checkpointEntries, uniqueTableName, targetZkQuorum, 10, 10, 1, 3, 4,
       3, null);
 
-    // Second run: no --dry-run, repair the mismatched chunks. Same time window so the
-    // PK matches and CHUNK/REPAIRED overwrites CHUNK/MISMATCHED in place.
-    Job repairJob =
-      runSyncToolWithLargeChunks(uniqueTableName, "--from-time", fromTime, "--to-time", toTime);
+    // Phase 2: repair pass over the same window — MISMATCHED rows transition to REPAIRED in
+    // place.
+    Job repairJob = runSyncToolWithLargeChunks(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     Counters repairCounters = repairJob.getCounters();
-
-    // The repair re-verifies the previously MISMATCHED chunks (excluded by Phase 2 filter)
-    // and now repairs them, producing CHUNK/REPAIRED + REGION/REPAIRED checkpoint rows.
-    long chunksRepaired = repairCounters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue();
-    long chunksRepairFailed =
-      repairCounters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
-    long mappersRepaired = repairCounters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue();
-    long mappersRepairFailed =
-      repairCounters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue();
-    long rowsMissingOnTarget =
-      repairCounters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue();
-    long rowsExtraOnTarget =
-      repairCounters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
-    long rowsCannotRepair =
-      repairCounters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue();
-    long cellsMissingOnTarget =
-      repairCounters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue();
-    long cellsExtraOnTarget =
-      repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
-    long cellsDifferentOnTarget =
-      repairCounters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue();
-    assertEquals("All 3 mismatched chunks should be repaired", 3, chunksRepaired);
-    assertEquals("No chunk repair should fail", 0, chunksRepairFailed);
-    assertEquals("All 3 mismatched mapper regions should be repaired", 3, mappersRepaired);
-    assertEquals("No mapper repair should fail", 0, mappersRepairFailed);
-    // The three drifted rows exist on both clusters (only NAME values were modified on
-    // target via separate upserts, producing cells at different timestamps), so all drift
-    // is cell-level — no whole-row missing or extra signals.
-    assertEquals("No whole rows should be missing on target", 0, rowsMissingOnTarget);
-    assertEquals("No whole rows should be extra on target", 0, rowsExtraOnTarget);
-    assertEquals("No rows should be unrepairable (target has no all-tombstone rows)", 0,
-      rowsCannotRepair);
-    // Each modified row contributes at least one missing cell (source's original NAME at
-    // its original timestamp) and one extra cell (target's modified NAME at the new
-    // timestamp). cellsDifferent stays 0 because the modifications wrote at new timestamps
-    // rather than overwriting at the same coordinates.
-    assertTrue("Source-only cells across the 3 drifted rows should be detected, got "
-      + cellsMissingOnTarget, cellsMissingOnTarget >= 3);
-    assertTrue("Target-only cells across the 3 drifted rows should be detected, got "
-      + cellsExtraOnTarget, cellsExtraOnTarget >= 3);
-    assertEquals("No same-coord value diffs (modifications wrote at new timestamps)", 0,
-      cellsDifferentOnTarget);
+    assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 3, 0, 0);
+    assertRepairRowCounters(repairCounters, 0, 0, 0);
+    // 3 rows × 2 mismatched cells (NAME + Phoenix's _0 empty-key cell) = 6 missing and 6 extra.
+    assertRepairCellCounters(repairCounters, 6, 6, 0, 0);
 
     // Target rows should now match source.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
 
-    // Checkpoint table now has 3 CHUNK/REPAIRED + 3 REGION/REPAIRED in addition to the
-    // VERIFIED rows. Phase 2's STATUS-IN filter caused the MISMATCHED rows to be re-entered
-    // as gaps and repair overwrote them in place.
     List<PhoenixSyncTableCheckpointOutputRow> postRepairEntries =
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    int chunkRepairedRows = 0;
-    int regionRepairedRows = 0;
-    int mismatchedRows = 0;
-    for (PhoenixSyncTableCheckpointOutputRow entry : postRepairEntries) {
-      if (PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED.equals(entry.getStatus())) {
-        if (PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())) {
-          chunkRepairedRows++;
-        } else if (PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())) {
-          regionRepairedRows++;
-        }
-      } else if (
-        PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())
-      ) {
-        mismatchedRows++;
-      }
-    }
-    assertEquals("Expected 3 CHUNK/REPAIRED rows after repair", 3, chunkRepairedRows);
-    assertEquals("Expected 3 REGION/REPAIRED rows after repair", 3, regionRepairedRows);
-    assertEquals("MISMATCHED rows should be overwritten in place — none should remain", 0,
-      mismatchedRows);
+    assertEquals("Expected 3 CHUNK/REPAIRED rows after repair", 3,
+      countCheckpointsByTypeAndStatus(postRepairEntries,
+        PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
+        PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED));
+    assertEquals("Expected 3 REGION/REPAIRED rows after repair", 3,
+      countCheckpointsByTypeAndStatus(postRepairEntries,
+        PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+        PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED));
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -271,11 +231,17 @@ public void testSyncTableValidateWithDifferentZkQuorumFormats() throws Exception
     };
 
     for (String zkQuorum : zkQuorumFormats) {
-      Job job = runSyncToolWithZkQuorum(uniqueTableName, zkQuorum);
+      Job job = runSyncToolWithZkQuorum(uniqueTableName, zkQuorum, "--dry-run");
       SyncCountersResult counters = getSyncCounters(job);
       validateSyncCounters(counters, 10, 10, 7, 3);
       cleanupCheckpointTable(sourceConnection, uniqueTableName, zkQuorum, null);
     }
+
+    // After validating detection across ZK formats, run dry-run + repair against the default
+    // targetZkQuorum to confirm the tool converges source and target.
+    runSyncToolWithRepair(uniqueTableName);
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -290,13 +256,19 @@ public void testSyncTableWithDeletedRowsOnTarget() throws Exception {
     assertEquals("Source should have 10 rows", 10, sourceCount);
     assertEquals("Target should have 7 rows (3 deleted)", 7, targetCount);
 
-    Job job = runSyncTool(uniqueTableName);
-    SyncCountersResult counters = getSyncCounters(job);
+    // Dry-run: detects 3 mismatched chunks.
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName);
+    SyncCountersResult counters = getSyncCounters(result.dryRunJob);
 
     validateSyncCounters(counters, 10, 7, 7, 3);
     validateMapperCounters(counters, 1, 3);
     assertEquals("Should have only 1 Mapper task created with coalescing", 4, counters.taskCreated);
 
+    // Repair pass only re-runs the 3 mismatched chunks (verified chunks are excluded by the
+    // resume filter). Target's DELETEs left tombstones that shadow source's Puts at lower
+    // timestamps, so each re-run mapper rolls up to UNREPAIRABLE.
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
+    validateMapperCountersRepair(repairCounters, 0, 0, 3, 0);
   }
 
   @Test
@@ -325,11 +297,14 @@ public void testSyncTableWithConditionalTTLExpiredRows() throws Exception {
       new String[] { "MODIFIED_5", "MODIFIED_8" });
 
     // Run sync tool, TTL-expired rows (1-3) should be skipped on both source and target
-    Job job = runSyncTool(uniqueTableName);
-    SyncCountersResult counters = getSyncCounters(job);
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName);
+    SyncCountersResult counters = getSyncCounters(result.dryRunJob);
 
     validateSyncCounters(counters, 7, 7, 5, 2);
     validateMapperCounters(counters, 2, 2);
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -373,6 +348,15 @@ public void testSyncTableWithConditionalTTLExpiredRowsCompact() throws Exception
 
     validateSyncCounters(counters2, 7, 7, 7, 0);
     validateMapperCounters(counters2, 4, 0);
+
+    // Source and target each show 7 live rows under the conditional TTL filter the sync tool
+    // applies on both sides, so the repair pass is a no-op and no MISMATCHED rows are written.
+    // Note: the standard Phoenix query (without the TTL filter the tool applies) sees 10 rows
+    // on source vs 7 on target because IS_STRICT_TTL=false returns expired rows on source
+    // (uncompacted) but compaction on target physically removed them — that asymmetry is by
+    // design, not drift the tool can converge.
+    runSyncToolWithRepair(uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -396,16 +380,22 @@ public void testSyncValidateIndexTable() throws Exception {
     deleteHBaseRows(CLUSTERS.getHBaseCluster2(), uniqueTableName, 3);
     deleteHBaseRows(CLUSTERS.getHBaseCluster2(), indexName, 3);
 
-    Job job = runSyncTool(indexName);
-    SyncCountersResult counters = getSyncCounters(job);
+    RepairRunResult result = runSyncToolWithRepair(indexName);
+    SyncCountersResult counters = getSyncCounters(result.dryRunJob);
 
     validateSyncCounters(counters, 10, 7, 7, 3);
 
-    // Verify checkpoint entries show mismatches
+    // Verify checkpoint entries show mismatches (from dry-run pass) before repair runs.
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
       queryCheckpointTable(sourceConnection, indexName, targetZkQuorum, null);
 
     assertFalse("Should have checkpointEntries", checkpointEntries.isEmpty());
+
+    // The repair pass syncs the index physical table on target with the source index. Since the
+    // data table was also corrupted on target (3 rows deleted via deleteHBaseRows) but we only
+    // ran sync on the index, the data table itself is still drifted — only assert on index
+    // checkpoint rows.
+    assertNoMismatchedCheckpoints(indexName, null);
   }
 
   @Test
@@ -428,9 +418,11 @@ public void testSyncValidateLocalIndexTable() throws Exception {
 
     deleteHBaseRows(CLUSTERS.getHBaseCluster2(), uniqueTableName, 5);
 
-    // Run sync tool on the LOCAL INDEX table (not the data table)
-    Job job = runSyncTool(indexName);
-    SyncCountersResult counters = getSyncCounters(job);
+    // Run sync tool on the LOCAL INDEX table (not the data table). Local indexes share regions
+    // with the data table — the dry-run pass detects drift, the repair pass writes back the
+    // missing index rows on target.
+    RepairRunResult result = runSyncToolWithRepair(indexName);
+    SyncCountersResult counters = getSyncCounters(result.dryRunJob);
 
     assertTrue(String.format("Should have at least %d verified chunks, actual: %d", 1,
       counters.chunksVerified), counters.chunksVerified >= 1);
@@ -442,6 +434,9 @@ public void testSyncValidateLocalIndexTable() throws Exception {
       queryCheckpointTable(sourceConnection, indexName, targetZkQuorum, null);
 
     assertFalse("Should have checkpoint entries for local index", checkpointEntries.isEmpty());
+
+    // After repair, the local-index physical table on target should match source's index.
+    assertNoMismatchedCheckpoints(indexName, null);
   }
 
   @Test
@@ -490,15 +485,17 @@ public void testSyncValidateMultiTenantSaltedTableDifferences() throws Exception
       tenantSourceConn.close();
     }
 
-    // TENANT_001 has no differences, expect all rows verified
-    Job job1 = runSyncTool(uniqueTableName, "--tenant-id", tenantIds[0], "--to-time", toTime);
-    SyncCountersResult counters1 = getSyncCounters(job1);
+    // TENANT_001 has no differences, expect all rows verified. Use dry-run + repair to confirm
+    // the no-drift case still leaves no MISMATCHED rows.
+    RepairRunResult t1 =
+      runSyncToolWithRepair(uniqueTableName, "--tenant-id", tenantIds[0], "--to-time", toTime);
+    SyncCountersResult counters1 = getSyncCounters(t1.dryRunJob);
     validateSyncCounters(counters1, 10, 10, 10, 0);
     validateMapperCounters(counters1, 4, 0);
 
-    // TENANT_002 has 3 modified rows
-    Job job2 = runSyncTool(uniqueTableName, "--tenant-id", tenantIds[1]);
-    SyncCountersResult counters2 = getSyncCounters(job2);
+    // TENANT_002 has 3 modified rows. Dry-run detects, repair writes back source values.
+    RepairRunResult t2 = runSyncToolWithRepair(uniqueTableName, "--tenant-id", tenantIds[1]);
+    SyncCountersResult counters2 = getSyncCounters(t2.dryRunJob);
     validateSyncCounters(counters2, 10, 10, 7, 3);
     validateMapperCounters(counters2, 2, 2);
 
@@ -511,6 +508,13 @@ public void testSyncValidateMultiTenantSaltedTableDifferences() throws Exception
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, "TENANT_001");
     assertFalse("Should have checkpoint entries for TENANT_001", checkpointEntries.isEmpty());
 
+    // No MISMATCHED rows should remain after repair pass for either tenant.
+    assertNoMismatchedCheckpoints(uniqueTableName, "TENANT_001");
+    assertNoMismatchedCheckpoints(uniqueTableName, "TENANT_002");
+
+    // After repair, TENANT_002's data should be identical between source and target.
+    withTenantConnections(tenantIds[1],
+      (sourceConn, targetConn) -> verifyDataIdentical(sourceConn, targetConn, uniqueTableName));
   }
 
   @Test
@@ -551,6 +555,12 @@ public void testSyncTableValidateWithTimeRangeFilter() throws Exception {
 
     validateSyncCounters(counters, 10, 10, 10, 0);
     validateMapperCounters(counters, 1, 0);
+
+    // Within-window data (IDs 11-20) was identical, so the repair flow is a no-op there and
+    // no MISMATCHED rows are written. Out-of-window drift (IDs 3,5,8,23,25,28) is invisible
+    // to the time-range filter and remains on target by design — full convergence is NOT
+    // expected here, only checkpoint cleanliness for the window we scanned.
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -562,58 +572,14 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionSplits() thr
 
     // Introduce differences on target scattered across the dataset
     List<Integer> mismatchIds = Arrays.asList(10, 25, 40, 55, 70, 85, 95);
-    for (int id : mismatchIds) {
-      upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id },
-        new String[] { "MODIFIED_NAME_" + id });
-    }
+    introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
     // Capture consistent time range for both runs
     long fromTime = 0L;
     long toTime = System.currentTimeMillis();
 
-    // Run sync tool for the FIRST time with explicit time range
-    Job job1 = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
-    SyncCountersResult counters1 = getSyncCounters(job1);
-
-    // Validate first run counters - should process all 100 rows
-    validateSyncCountersWithMinChunk(counters1, 100, 100, 1, 1);
-
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-
-    assertFalse("Should have checkpoint entries after first run", checkpointEntries.isEmpty());
-
-    // Separate mapper and chunk entries using utility method
-    SeparatedCheckpointEntries separated = separateMapperAndChunkEntries(checkpointEntries);
-    List<PhoenixSyncTableCheckpointOutputRow> allMappers = separated.mappers;
-    List<PhoenixSyncTableCheckpointOutputRow> allChunks = separated.chunks;
-
-    assertFalse("Should have mapper region entries", allMappers.isEmpty());
-    assertFalse("Should have chunk entries", allChunks.isEmpty());
-
-    // Select 3/4th of chunks from each mapper to delete (simulating partial rerun)
-    // We repro the partial run via deleting some entries from checkpoint table and re-running the
-    // tool
-    List<PhoenixSyncTableCheckpointOutputRow> chunksToDelete = selectChunksToDeleteFromMappers(
-      sourceConnection, uniqueTableName, targetZkQuorum, fromTime, toTime, null, allMappers, 0.75);
-
-    // Delete all mappers and selected chunks
-    int deletedCount = deleteCheckpointEntries(sourceConnection, uniqueTableName, targetZkQuorum,
-      null, allMappers, chunksToDelete);
-
-    assertEquals("Should have deleted all mapper and selected chunk entries",
-      allMappers.size() + chunksToDelete.size(), deletedCount);
-
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntriesAfterDelete =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    assertEquals("Should have fewer checkpoint entries after deletion",
-      allMappers.size() + chunksToDelete.size(),
-      checkpointEntries.size() - checkpointEntriesAfterDelete.size());
-
-    // Calculate totals from REMAINING CHUNK entries in checkpoint table using utility method
-    CheckpointAggregateCounters remainingCounters =
-      calculateAggregateCountersFromCheckpoint(checkpointEntriesAfterDelete);
+    PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75);
+    validateSyncCountersWithMinChunk(setup.firstRunCounters, 100, 100, 1, 1);
 
     List<Integer> additionalSourceSplits =
       Arrays.asList(12, 22, 28, 32, 42, 52, 58, 62, 72, 78, 82, 92);
@@ -634,34 +600,32 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionSplits() thr
       counters2.chunksMismatched);
 
     // (Remaining chunks from checkpoint) + (Second run) should equal (First run)
-    long totalSourceRows = remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
-    long totalTargetRows = remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
-    long totalVerifiedChunks = remainingCounters.chunksVerified + counters2.chunksVerified;
-    long totalMismatchedChunks = remainingCounters.chunksMismatched + counters2.chunksMismatched;
+    long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
+    long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
+    long totalVerifiedChunks = setup.remainingCounters.chunksVerified + counters2.chunksVerified;
 
     assertEquals(
       "Remaining + Second run source rows should equal first run source rows. " + "Remaining: "
-        + remainingCounters.sourceRowsProcessed + ", Second run: " + counters2.sourceRowsProcessed
-        + ", Total: " + totalSourceRows + ", Expected: " + counters1.sourceRowsProcessed,
-      counters1.sourceRowsProcessed, totalSourceRows);
+        + setup.remainingCounters.sourceRowsProcessed + ", Second run: "
+        + counters2.sourceRowsProcessed + ", Total: " + totalSourceRows + ", Expected: "
+        + setup.firstRunCounters.sourceRowsProcessed,
+      setup.firstRunCounters.sourceRowsProcessed, totalSourceRows);
 
     assertEquals(
       "Remaining + Second run target rows should equal first run target rows. " + "Remaining: "
-        + remainingCounters.targetRowsProcessed + ", Second run: " + counters2.targetRowsProcessed
-        + ", Total: " + totalTargetRows + ", Expected: " + counters1.targetRowsProcessed,
-      counters1.targetRowsProcessed, totalTargetRows);
-
-    assertEquals("Remaining + Second run verified chunks should equal first run verified chunks. "
-      + "Remaining: " + remainingCounters.chunksVerified + ", Second run: "
-      + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected: "
-      + counters1.chunksVerified, counters1.chunksVerified, totalVerifiedChunks);
-
-    assertEquals(
-      "Remaining + Second run mismatched chunks should equal first run mismatched chunks. "
-        + "Remaining: " + remainingCounters.chunksMismatched + ", Second run: "
-        + counters2.chunksMismatched + ", Total: " + totalMismatchedChunks + ", Expected: "
-        + counters1.chunksMismatched,
-      counters1.chunksMismatched, totalMismatchedChunks);
+        + setup.remainingCounters.targetRowsProcessed + ", Second run: "
+        + counters2.targetRowsProcessed + ", Total: " + totalTargetRows + ", Expected: "
+        + setup.firstRunCounters.targetRowsProcessed,
+      setup.firstRunCounters.targetRowsProcessed, totalTargetRows);
+
+    // Splits introduced between runs widen the second-run chunk count beyond the deleted
+    // 75% of the first run's chunks (extra region boundaries → extra chunks). So we relax
+    // the strict equality to >=. The row-count invariant above is unaffected by splits.
+    assertTrue("Remaining + Second run verified chunks should be >= first run verified chunks. "
+      + "Remaining: " + setup.remainingCounters.chunksVerified + ", Second run: "
+      + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected (>=): "
+      + setup.firstRunCounters.chunksVerified,
+      totalVerifiedChunks >= setup.firstRunCounters.chunksVerified);
 
     // Verify checkpoint table has entries for the reprocessed regions
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntriesAfterRerun =
@@ -669,7 +633,16 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionSplits() thr
 
     // After rerun, we should have at least more entries compared to delete table
     assertTrue("Should have checkpoint entries after rerun",
-      checkpointEntriesAfterRerun.size() > checkpointEntriesAfterDelete.size());
+      checkpointEntriesAfterRerun.size() > setup.entriesAfterDelete.size());
+
+    // The partial-rerun pattern (delete checkpoints + re-split + rerun) leaves chunks marked
+    // REPAIRED with stale boundaries (relative to the post-split layout); the resume filter
+    // skips those, so a final run can leave residual drift. Cleanup the checkpoint and run
+    // a dry-run + repair pass on the stable layout to converge.
+    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+      String.valueOf(toTime));
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -680,49 +653,19 @@ public void testSyncTableValidateCheckpointWithChunkSizeChangeOnReRun() throws E
     splitTableAt(sourceConnection, uniqueTableName, sourceSplits);
 
     List<Integer> mismatchIds = Arrays.asList(10, 30, 60, 90);
-    for (int id : mismatchIds) {
-      upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id },
-        new String[] { "MODIFIED_NAME_" + id });
-    }
+    introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
     long fromTime = 0L;
     long toTime = System.currentTimeMillis();
 
-    // First run with large chunk size
+    // First run with large chunk size, then delete 75% of chunks for partial rerun.
     int largeChunkSize = 10240;
-    Job job1 = runSyncToolWithChunkSize(uniqueTableName, largeChunkSize, "--from-time",
-      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
-    SyncCountersResult counters1 = getSyncCounters(job1);
-
+    PartialRerunSetup setup =
+      setupPartialRerun(uniqueTableName, fromTime, toTime, largeChunkSize, 0.75);
+    SyncCountersResult counters1 = setup.firstRunCounters;
     validateSyncCounters(counters1, 100, 100, counters1.chunksVerified, counters1.chunksMismatched);
-
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    assertFalse("Should have checkpoint entries after first run", checkpointEntries.isEmpty());
-
-    SeparatedCheckpointEntries separated = separateMapperAndChunkEntries(checkpointEntries);
-    List<PhoenixSyncTableCheckpointOutputRow> allMappers = separated.mappers;
-    List<PhoenixSyncTableCheckpointOutputRow> allChunks = separated.chunks;
-    int mapperCountAfterFirstRun = allMappers.size();
-    int chunkCountAfterFirstRun = allChunks.size();
-
-    assertFalse("Should have mapper entries", allMappers.isEmpty());
-    assertFalse("Should have chunk entries", allChunks.isEmpty());
-
-    // Delete all mappers and 3/4th of chunks from each mapper
-    List<PhoenixSyncTableCheckpointOutputRow> chunksToDelete = selectChunksToDeleteFromMappers(
-      sourceConnection, uniqueTableName, targetZkQuorum, fromTime, toTime, null, allMappers, 0.75);
-
-    int deletedCount = deleteCheckpointEntries(sourceConnection, uniqueTableName, targetZkQuorum,
-      null, allMappers, chunksToDelete);
-    assertEquals("Should have deleted all mapper and selected chunk entries",
-      allMappers.size() + chunksToDelete.size(), deletedCount);
-
-    // Calculate counters from remaining (1/4th) chunk entries
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntriesAfterDelete =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    CheckpointAggregateCounters remainingCounters =
-      calculateAggregateCountersFromCheckpoint(checkpointEntriesAfterDelete);
+    int mapperCountAfterFirstRun = setup.mappers.size();
+    int chunkCountAfterFirstRun = setup.chunks.size();
 
     // Re-run with smaller chunk size (1 byte) - produces more, smaller chunks
     int smallChunkSize = 1;
@@ -731,8 +674,8 @@ public void testSyncTableValidateCheckpointWithChunkSizeChangeOnReRun() throws E
     SyncCountersResult counters2 = getSyncCounters(job2);
 
     // (Remaining chunks) + (Second run) should equal (First run) for row counts
-    long totalSourceRows = remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
-    long totalTargetRows = remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
+    long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
+    long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
 
     assertEquals("Remaining + rerun source rows should equal first run",
       counters1.sourceRowsProcessed, totalSourceRows);
@@ -756,6 +699,16 @@ public void testSyncTableValidateCheckpointWithChunkSizeChangeOnReRun() throws E
       "Chunk count after rerun (" + separatedAfterRerun.chunks.size()
         + ") should be greater than first run (" + chunkCountAfterFirstRun + ")",
       separatedAfterRerun.chunks.size() > chunkCountAfterFirstRun);
+
+    // The partial-rerun pattern (delete chunks, rerun with smaller chunks) exercises the
+    // checkpoint resume path. Once that has been validated, run a clean dry-run + repair
+    // pass on the same window so the repair flow has a stable boundary set to converge.
+    // Use the dry-run+repair pattern so any chunk that landed in a non-resumable state
+    // (REPAIRED with stale boundaries) is re-validated rather than skipped.
+    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+      String.valueOf(toTime));
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -766,59 +719,17 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionMerges() thr
     splitTableAt(sourceConnection, uniqueTableName, sourceSplits);
 
     List<Integer> mismatchIds = Arrays.asList(5, 15, 25, 35, 45, 55, 65, 75, 85, 95);
-    for (int id : mismatchIds) {
-      upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id },
-        new String[] { "MODIFIED_NAME_" + id });
-    }
+    introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
     long fromTime = 0L;
     long toTime = System.currentTimeMillis();
 
-    Job job1 = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
-    SyncCountersResult counters1 = getSyncCounters(job1);
-
+    PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75);
+    SyncCountersResult counters1 = setup.firstRunCounters;
     validateSyncCounters(counters1, 100, 100, 90, 10);
 
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-
-    assertFalse("Should have checkpoint entries after first run", checkpointEntries.isEmpty());
-
-    // Separate mapper and chunk entries using utility method
-    SeparatedCheckpointEntries separated = separateMapperAndChunkEntries(checkpointEntries);
-    List<PhoenixSyncTableCheckpointOutputRow> allMappers = separated.mappers;
-    List<PhoenixSyncTableCheckpointOutputRow> allChunks = separated.chunks;
-
-    assertFalse("Should have mapper region entries", allMappers.isEmpty());
-    assertFalse("Should have chunk entries", allChunks.isEmpty());
-
-    // Select 3/4th of chunks from each mapper to delete (simulating partial rerun)
-    // We repro the partial run via deleting some entries from checkpoint table and re-running the
-    List<PhoenixSyncTableCheckpointOutputRow> chunksToDelete = selectChunksToDeleteFromMappers(
-      sourceConnection, uniqueTableName, targetZkQuorum, fromTime, toTime, null, allMappers, 0.75);
-
-    // Delete all mappers and selected chunks
-    int deletedCount = deleteCheckpointEntries(sourceConnection, uniqueTableName, targetZkQuorum,
-      null, allMappers, chunksToDelete);
-
-    assertEquals("Should have deleted all mapper and selected chunk entries",
-      allMappers.size() + chunksToDelete.size(), deletedCount);
-
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntriesAfterDelete =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    assertEquals("Should have fewer checkpoint entries after deletion",
-      allMappers.size() + chunksToDelete.size(),
-      checkpointEntries.size() - checkpointEntriesAfterDelete.size());
-
-    // Calculate totals from REMAINING CHUNK entries in checkpoint table using utility method
-    CheckpointAggregateCounters remainingCounters =
-      calculateAggregateCountersFromCheckpoint(checkpointEntriesAfterDelete);
-
-    // Merge adjacent regions on source (merge 6 pairs of regions)
+    // Merge adjacent regions on source and target (6 pairs each).
     mergeAdjacentRegions(sourceConnection, uniqueTableName, 6);
-
-    // Merge adjacent regions on target (merge 6 pairs of regions)
     mergeAdjacentRegions(targetConnection, uniqueTableName, 6);
 
     // Run sync tool again with SAME time range - should reprocess only deleted regions
@@ -827,41 +738,43 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionMerges() thr
       String.valueOf(toTime));
     SyncCountersResult counters2 = getSyncCounters(job2);
 
-    long totalSourceRows = remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
-    long totalTargetRows = remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
-    long totalVerifiedChunks = remainingCounters.chunksVerified + counters2.chunksVerified;
-    long totalMismatchedChunks = remainingCounters.chunksMismatched + counters2.chunksMismatched;
+    long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
+    long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
+    long totalVerifiedChunks = setup.remainingCounters.chunksVerified + counters2.chunksVerified;
 
     assertEquals(
       "Remaining + Second run source rows should equal first run source rows. " + "Remaining: "
-        + remainingCounters.sourceRowsProcessed + ", Second run: " + counters2.sourceRowsProcessed
-        + ", Total: " + totalSourceRows + ", Expected: " + counters1.sourceRowsProcessed,
+        + setup.remainingCounters.sourceRowsProcessed + ", Second run: "
+        + counters2.sourceRowsProcessed + ", Total: " + totalSourceRows + ", Expected: "
+        + counters1.sourceRowsProcessed,
       counters1.sourceRowsProcessed, totalSourceRows);
 
     assertEquals(
       "Remaining + Second run target rows should equal first run target rows. " + "Remaining: "
-        + remainingCounters.targetRowsProcessed + ", Second run: " + counters2.targetRowsProcessed
-        + ", Total: " + totalTargetRows + ", Expected: " + counters1.targetRowsProcessed,
+        + setup.remainingCounters.targetRowsProcessed + ", Second run: "
+        + counters2.targetRowsProcessed + ", Total: " + totalTargetRows + ", Expected: "
+        + counters1.targetRowsProcessed,
       counters1.targetRowsProcessed, totalTargetRows);
 
-    assertEquals("Remaining + Second run verified chunks should equal first run verified chunks. "
-      + "Remaining: " + remainingCounters.chunksVerified + ", Second run: "
-      + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected: "
-      + counters1.chunksVerified, counters1.chunksVerified, totalVerifiedChunks);
-
-    assertEquals(
-      "Remaining + Second run mismatched chunks should equal first run mismatched chunks. "
-        + "Remaining: " + remainingCounters.chunksMismatched + ", Second run: "
-        + counters2.chunksMismatched + ", Total: " + totalMismatchedChunks + ", Expected: "
-        + counters1.chunksMismatched,
-      counters1.chunksMismatched, totalMismatchedChunks);
+    // Region merges between the two runs change mapper region boundaries, so the resume
+    // filter sees stale chunks that don't align to the new mapper's range and reprocesses
+    // them. The "remaining + second run >= first run" invariant still holds; equality does
+    // not. Row-count invariant above is preserved.
+    assertTrue("Remaining + Second run verified chunks should be >= first run verified chunks. "
+      + "Remaining: " + setup.remainingCounters.chunksVerified + ", Second run: "
+      + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected (>=): "
+      + counters1.chunksVerified, totalVerifiedChunks >= counters1.chunksVerified);
 
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntriesAfterRerun =
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
 
     // After rerun with merges, we should have more entries as after deletion
     assertTrue("Should have checkpoint entries after rerun",
-      checkpointEntriesAfterRerun.size() > checkpointEntriesAfterDelete.size());
+      checkpointEntriesAfterRerun.size() > setup.entriesAfterDelete.size());
+
+    // Both runs were non-dry-run, so repair ran inline. Target should converge.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -899,6 +812,11 @@ public void testSyncTableValidateIdempotentOnReRun() throws Exception {
 
     assertEquals("Checkpoint entries should be identical after idempotent run",
       checkpointEntriesAfterFirstRun, checkpointEntriesAfterSecondRun);
+
+    // Both passes were non-dry-run with no drift to begin with; the repair flow ran as a
+    // no-op, target should still match source and no MISMATCHED rows should exist.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -950,6 +868,10 @@ public void testSyncTableValidateIdempotentAfterRegionSplits() throws Exception
     // checkpointed
     assertFalse("Should have checkpoint entries after second run",
       checkpointEntriesAfterSecondRun.isEmpty());
+
+    // No drift was introduced; repair flow should be a no-op even after concurrent splits.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -960,12 +882,15 @@ public void testSyncTableValidateWithSchemaAndTableNameOptions() throws Exceptio
     introduceAndVerifyTargetDifferences(uniqueTableName);
 
     // Run sync tool with both --schema and --table-name options
-    Job job = runSyncTool(uniqueTableName, "--schema", "");
-    SyncCountersResult counters = getSyncCounters(job);
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--schema", "");
+    SyncCountersResult counters = getSyncCounters(result.dryRunJob);
 
     // Validate counters
     validateSyncCounters(counters, 10, 10, 7, 3);
     validateMapperCounters(counters, 1, 3);
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -974,10 +899,15 @@ public void testSyncTableValidateInBackgroundMode() throws Exception {
 
     introduceAndVerifyTargetDifferences(uniqueTableName);
 
-    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
-    String[] args =
-      new String[] { "--table-name", uniqueTableName, "--target-cluster", targetZkQuorum,
-        "--chunk-size", "1", "--to-time", String.valueOf(System.currentTimeMillis()) };
+    // Pin the time window so the background dry-run pass and the repair pass below share
+    // the same checkpoint PK and the repair pass overwrites MISMATCHED → REPAIRED in place.
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
+
+    Configuration conf = sourceClusterConf();
+    String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster",
+      targetZkQuorum, "--chunk-size", "1", "--dry-run", "--from-time", String.valueOf(fromTime),
+      "--to-time", String.valueOf(toTime) };
 
     PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
     tool.setConf(conf);
@@ -995,6 +925,14 @@ public void testSyncTableValidateInBackgroundMode() throws Exception {
 
     validateSyncCounters(counters, 10, 10, 7, 3);
     validateMapperCounters(counters, 1, 3);
+
+    // Now run the repair pass (foreground for synchronous assertions). Same time window so
+    // the dry-run-pass MISMATCHED rows are overwritten with REPAIRED.
+    runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+      String.valueOf(toTime));
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -1004,7 +942,7 @@ public void testSyncTableValidateWithCustomTimeouts() throws Exception {
     introduceAndVerifyTargetDifferences(uniqueTableName);
 
     // Create configuration with custom timeout values
-    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
+    Configuration conf = sourceClusterConf();
 
     // Set custom timeout values (higher than defaults to ensure job succeeds)
     long customQueryTimeout = 900000L; // 15 minutes
@@ -1017,17 +955,10 @@ public void testSyncTableValidateWithCustomTimeouts() throws Exception {
     conf.setLong(QueryServices.SYNC_TABLE_CLIENT_SCANNER_TIMEOUT_ATTRIB, customScannerTimeout);
     conf.setInt(QueryServices.SYNC_TABLE_RPC_RETRIES_COUNTER, customRpcRetries);
 
-    String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster",
-      targetZkQuorum, "--chunk-size", "1", "--run-foreground", "--to-time",
-      String.valueOf(System.currentTimeMillis()) };
-
-    PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
-    tool.setConf(conf);
-    int exitCode = tool.run(args);
-
-    Job job = tool.getJob();
-    assertNotNull("Job should not be null", job);
-    assertEquals("Tool should complete successfully with custom timeouts", 0, exitCode);
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
+    Job job = runSyncToolWithChunkSize(uniqueTableName, 1, conf, "--dry-run", "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
 
     // Verify that custom timeout configurations were applied to the job
     Configuration jobConf = job.getConfiguration();
@@ -1045,6 +976,12 @@ public void testSyncTableValidateWithCustomTimeouts() throws Exception {
     counters.logCounters(testName.getMethodName());
     validateSyncCounters(counters, 10, 10, 7, 3);
     validateMapperCounters(counters, 1, 3);
+
+    // Repair pass over the same window: convergence + no MISMATCHED rows remaining.
+    runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+      String.valueOf(toTime));
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -1073,9 +1010,9 @@ public void testSyncTableValidateWithExtraRowsOnTarget() throws Exception {
     assertEquals("Source should have 10 rows (odd numbers 1-19)", 10, sourceCount);
     assertEquals("Target should have 15 rows (odd 1-19 + even 2-10)", 15, targetCount);
 
-    // Run sync tool to detect the extra rows interspersed on target
-    Job job = runSyncTool(uniqueTableName);
-    SyncCountersResult counters = getSyncCounters(job);
+    // Run dry-run + repair sharing the same time window.
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName);
+    SyncCountersResult counters = getSyncCounters(result.dryRunJob);
 
     validateSyncCounters(counters, 10, 15, 5, 5);
     validateMapperCounters(counters, 0, 4);
@@ -1083,35 +1020,22 @@ public void testSyncTableValidateWithExtraRowsOnTarget() throws Exception {
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
 
-    // Count mismatched entries in checkpoint table
-    int mismatchedCount = 0;
-    for (PhoenixSyncTableCheckpointOutputRow entry : checkpointEntries) {
-      if (PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())) {
-        mismatchedCount++;
-      }
-    }
-    assertTrue("Should have mismatched entries for chunks with extra rows", mismatchedCount > 0);
-
-    // Verify source and target are still different
-    List<TestRow> sourceRows = queryAllRows(sourceConnection,
-      "SELECT ID, NAME, NAME_VALUE FROM " + uniqueTableName + " ORDER BY ID");
-    List<TestRow> targetRows = queryAllRows(targetConnection,
-      "SELECT ID, NAME, NAME_VALUE FROM " + uniqueTableName + " ORDER BY ID");
-    assertEquals("Source should still have 10 rows", 10, sourceRows.size());
-    assertEquals("Target should still have 15 rows", 15, targetRows.size());
-    assertNotEquals("Source and target should have different data", sourceRows, targetRows);
-
-    // Verify that source has only odd numbers
-    for (TestRow row : sourceRows) {
-      assertEquals("Source should only have odd IDs", 1, row.id % 2);
-    }
-
-    // Verify that target has all numbers 1-11 (with gaps filled) and 13,15,17,19
-    assertEquals("Target should have ID=1", 1, targetRows.get(0).id);
-    assertEquals("Target should have ID=2", 2, targetRows.get(1).id);
-    assertEquals("Target should have ID=10", 10, targetRows.get(9).id);
-    assertEquals("Target should have ID=11", 11, targetRows.get(10).id);
-    assertEquals("Target should have ID=19", 19, targetRows.get(14).id);
+    // Count mismatched entries in checkpoint table — after the repair pass, all MISMATCHED
+    // rows from the dry-run pass should have been overwritten with REPAIRED.
+    long mismatchedCount = countCheckpointsByStatus(checkpointEntries,
+      PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED);
+    long repairedCount = countCheckpointsByStatus(checkpointEntries,
+      PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED);
+    assertEquals("After repair, no MISMATCHED rows should remain", 0, mismatchedCount);
+    assertTrue("Should have REPAIRED rows after repair pass", repairedCount > 0);
+
+    // After repair: target should converge to source (10 odd-id rows). The 5 extra even-id
+    // rows on target had only live cells, so tombstoneWholeRow can remove them.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertEquals("Source should have 10 rows", 10,
+      TestUtil.getRowCount(sourceConnection, uniqueTableName));
+    assertEquals("Target should now also have 10 rows after repair tombstones the extras", 10,
+      TestUtil.getRowCount(targetConnection, uniqueTableName));
   }
 
   @Test
@@ -1119,48 +1043,25 @@ public void testSyncTableValidateWithConcurrentRegionSplits() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 100);
     // Introduce some mismatches on target before sync
     List<Integer> mismatchIds = Arrays.asList(15, 35, 55, 75, 95);
-    for (int id : mismatchIds) {
-      upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id },
-        new String[] { "MODIFIED_NAME_" + id });
-    }
+    introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
     // Capture time range for the sync
     long fromTime = 0L;
     long toTime = System.currentTimeMillis();
 
-    // Create a thread that will perform splits on source cluster during sync
-    Thread sourceSplitThread = new Thread(() -> {
-      try {
-        // Split source at multiple points (creating more regions during sync)
-        List<Integer> sourceSplits = Arrays.asList(20, 25, 40, 45, 60, 65, 80, 85, 95);
-        splitTableAt(sourceConnection, uniqueTableName, sourceSplits);
-      } catch (Exception e) {
-        LOGGER.error("Error during source splits", e);
-      }
-    });
-
-    // Create a thread that will perform splits on target cluster during sync
-    Thread targetSplitThread = new Thread(() -> {
-      try {
-        // Split target at different points than source (asymmetric region boundaries)
-        List<Integer> targetSplits = Arrays.asList(11, 21, 31, 41, 51, 75, 81, 91);
-        splitTableAt(targetConnection, uniqueTableName, targetSplits);
-      } catch (Exception e) {
-        LOGGER.error("Error during target splits", e);
-      }
-    });
-
-    // Start split threads
-    sourceSplitThread.start();
-    targetSplitThread.start();
+    // Run splits on source/target concurrently with the sync.
+    Runnable splitJoiner = startConcurrentRegionWork(
+      () -> splitTableAt(sourceConnection, uniqueTableName,
+        Arrays.asList(20, 25, 40, 45, 60, 65, 80, 85, 95)),
+      () -> splitTableAt(targetConnection, uniqueTableName,
+        Arrays.asList(11, 21, 31, 41, 51, 75, 81, 91)),
+      "splits");
 
     // Run sync tool while splits are happening
     Job job = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
       String.valueOf(toTime));
 
-    // Wait for split threads to complete
-    sourceSplitThread.join(30000); // 30 second timeout
-    targetSplitThread.join(30000);
+    splitJoiner.run();
 
     // Verify the job completed successfully despite concurrent splits
     assertTrue("Sync job should complete successfully despite concurrent splits",
@@ -1176,14 +1077,178 @@ public void testSyncTableValidateWithConcurrentRegionSplits() throws Exception {
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
     assertFalse("Should have checkpoint entries", checkpointEntries.isEmpty());
 
-    // Count mismatched entries
-    int mismatchedCount = 0;
-    for (PhoenixSyncTableCheckpointOutputRow entry : checkpointEntries) {
-      if (PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())) {
-        mismatchedCount++;
-      }
-    }
-    assertTrue("Should have mismatched entries for modified rows", mismatchedCount >= 1);
+    // Concurrent splits may race with the first sync pass — a chunk that straddled a region
+    // boundary mid-flight can land in REPAIRED with stale boundaries; once REPAIRED, the
+    // resume filter skips it. Cleanup the checkpoint and run a dry-run + repair pass on the
+    // stable region layout to converge.
+    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+      String.valueOf(toTime));
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
+  }
+
+  /**
+   * P3 (concurrent splits during repair pass): Today's concurrent-split tests run splits during
+   * the dry-run pass; this exercises the repair pass against splitting target regions, which is
+   * the production reality that exercises {@code flushRepairMutations}'s
+   * {@code NotServingRegionException} path → {@code firstFailureIdx} → {@code REPAIR_FAILED}.
+   *
+   * <p>Convergence strategy:
+   * <ol>
+   *   <li>Dry-run first on a stable layout to populate MISMATCHED checkpoint rows.</li>
+   *   <li>Start concurrent splits on the target cluster, then run the repair pass. Some chunks
+   *       may land in {@code REPAIR_FAILED} if a flush hits a region in transition — the
+   *       resume filter re-enters those chunks on the next pass.</li>
+   *   <li>Run a final dry-run + repair pass after splits have settled; expect zero MISMATCHED.
+   *       {@code verifyDataIdentical} must succeed.</li>
+   * </ol>
+   */
+  @Test
+  public void testRepairWithConcurrentTargetSplits() throws Exception {
+    setupStandardTestWithReplication(uniqueTableName, 1, 100);
+    List<Integer> mismatchIds = Arrays.asList(12, 24, 36, 48, 60, 72, 84, 96);
+    introduceMismatchesByIds(uniqueTableName, mismatchIds);
+
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
+
+    // Stage 1: stable dry-run populates MISMATCHED checkpoint rows.
+    Job dryRunJob = runSyncTool(uniqueTableName, "--dry-run", "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    assertTrue("Stable dry-run should succeed", dryRunJob.isSuccessful());
+    SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob);
+    assertTrue("Dry-run should detect at least one mismatched chunk",
+      dryRunCounters.chunksMismatched >= 1);
+
+    // Stage 2: kick off target-side splits while the repair pass runs. Source splits left out
+    // because a target-side split is what surfaces flushRepairMutations failures.
+    Runnable splitJoiner = startConcurrentRegionWork(() -> {
+      // No source-side work; pass a trivial Runnable so startConcurrentRegionWork still wires
+      // both threads and the joiner times out cleanly.
+    }, () -> splitTableAt(targetConnection, uniqueTableName,
+      Arrays.asList(15, 25, 35, 45, 55, 65, 75, 85, 95)), "repair-splits");
+
+    Job repairJob = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime),
+      "--to-time", String.valueOf(toTime));
+    splitJoiner.run();
+    assertTrue("Repair pass should not throw despite concurrent splits", repairJob.isSuccessful());
+
+    // Stage 3: stable convergence pass. Cleanup checkpoint so the resume filter doesn't skip
+    // chunks that were marked REPAIRED with stale boundaries during the racing pass.
+    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+      String.valueOf(toTime));
+
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+  }
+
+  /**
+   * P4 (idempotent repair): Guards against a regression where repair claims REPAIRED but does
+   * not actually converge. Run a full dry-run + repair on a divergent table, clean the
+   * checkpoint, then run the same dry-run + repair again on the now-converged tables and assert
+   * the second pass is a no-op (zero mismatches detected, zero chunks repaired).
+   */
+  @Test
+  public void testRepairIsIdempotent() throws Exception {
+    setupStandardTestWithReplication(uniqueTableName, 1, 50);
+    List<Integer> mismatchIds = Arrays.asList(7, 14, 21, 28, 35, 42, 49);
+    introduceMismatchesByIds(uniqueTableName, mismatchIds);
+
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
+
+    // Pass 1: detect + repair.
+    RepairRunResult firstRun = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    assertTrue("First dry-run should succeed", firstRun.dryRunJob.isSuccessful());
+    assertTrue("First repair should succeed", firstRun.repairJob.isSuccessful());
+
+    SyncCountersResult firstDryRunCounters = getSyncCounters(firstRun.dryRunJob);
+    assertTrue("First dry-run should detect mismatched chunks",
+      firstDryRunCounters.chunksMismatched >= 1);
+    Counters firstRepairCounters = firstRun.repairJob.getCounters();
+    assertTrue("First repair should mark chunks REPAIRED",
+      firstRepairCounters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue() >= 1);
+
+    // Tables must be data-identical after the first repair.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+
+    // Pass 2 prep: clean the checkpoint so the resume filter doesn't skip already-VERIFIED
+    // chunks — the second dry-run must scan the full layout from scratch.
+    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+
+    // Pass 2: same window, no further mutations. Both passes must be no-ops.
+    RepairRunResult secondRun = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    assertTrue("Second dry-run should succeed", secondRun.dryRunJob.isSuccessful());
+    assertTrue("Second repair should succeed", secondRun.repairJob.isSuccessful());
+
+    SyncCountersResult secondDryRunCounters = getSyncCounters(secondRun.dryRunJob);
+    assertEquals("Second dry-run should detect zero mismatches", 0,
+      secondDryRunCounters.chunksMismatched);
+
+    // Second repair pass should be a no-op: nothing repaired, nothing failed.
+    assertRepairChunkAndMapperCounters(secondRun.repairJob.getCounters(), 0, 0, 0, 0, 0);
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+  }
+
+  /**
+   * P5 (all-tombstoned target-extra row): Source has no row K; target has row K but every cell
+   * is already a tombstone. {@code tombstoneWholeRow} returns {@code liveCellsTombstoned == 0}
+   * (line ~217 of {@code PhoenixSyncTableChunkRepairer}) → {@code drift.rowsCannotRepair++},
+   * {@code rowsExtraOnTarget} unchanged. Pins the rare "target row extra but already
+   * fully-tombstoned" path that currently has zero coverage.
+   */
+  @Test
+  public void testRepairAllTombstonedTargetRowExtra() throws Exception {
+    final int rowId = 5;
+    final int otherRowId = 4;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+
+    long fromTime = 0L;
+    final long ts = base + 1L;
+    final long tombstoneTs = base + 2L;
+
+    // Plant a sentinel row on both sides so the verifier has *something* to compare and
+    // produces a non-empty chunk hash. The sentinel row stays clean — the test focuses on
+    // rowId=5 only.
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) {
+      scnSrc.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + otherRowId + ", 'sentinel')");
+      scnSrc.commit();
+    }
+    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) {
+      scnTgt.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + otherRowId + ", 'sentinel')");
+      scnTgt.commit();
+    }
+
+    // Target only: plant raw DeleteColumn tombstones for row K with NO underlying Put cells.
+    // Under --raw-scan the row surfaces (tombstones are themselves cells) but every cell is a
+    // Delete, so tombstoneWholeRow() returns liveCellsTombstoned == 0 → drift.rowsCannotRepair
+    // increments and rowsExtraOnTarget stays 0. This pins the rare "row exists in raw view but
+    // has no live cells to tombstone" branch.
+    byte[] rowKey = integerRowKey(rowId);
+    writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME", tombstoneTs);
+    writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME_VALUE",
+      tombstoneTs);
+    writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "_0", tombstoneTs);
+
+    // Spin until wall-clock advances past the highest cell timestamp so --to-time
+    // (which defaults to currentTimeMillis()) covers our planted cells.
+    while (System.currentTimeMillis() <= tombstoneTs) {
+      // spin
+    }
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
+      "--raw-scan");
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    // Row K's cells were already all tombstones — no live cells to tombstone again, and
+    // the row is flagged unrepairable.
+    assertRepairRowCounters(result.repairJob.getCounters(), 0, 0, 1);
   }
 
   @Test
@@ -1213,24 +1278,23 @@ public void testSyncTableValidateWithOnlyTimestampDifferences() throws Exception
       "SELECT ID, NAME, NAME_VALUE FROM " + uniqueTableName + " ORDER BY ID");
     assertEquals("Row values should be identical", sourceRows, targetRows);
 
-    // Run sync tool - should detect timestamp differences as mismatches
-    Job job = runSyncTool(uniqueTableName);
+    // Dry-run sync — should detect timestamp differences as mismatches because timestamps are
+    // included in the hash calculation. We use dry-run so the MISMATCHED rows persist for the
+    // assertions below; this is a residual-drift case where Phoenix-level queries already
+    // see identical values (timestamps differ but values match) so the repair phase is not
+    // exercised here.
+    Job job = runSyncTool(uniqueTableName, "--dry-run");
     SyncCountersResult counters = getSyncCounters(job);
 
     // Validate counters - all rows should be processed and all chunks mismatched
-    // because timestamps are included in the hash calculation
     validateSyncCounters(counters, 10, 10, 0, 10);
 
     // Verify checkpoint entries show mismatches
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
 
-    int mismatchedCount = 0;
-    for (PhoenixSyncTableCheckpointOutputRow entry : checkpointEntries) {
-      if (PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())) {
-        mismatchedCount++;
-      }
-    }
+    long mismatchedCount = countCheckpointsByStatus(checkpointEntries,
+      PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED);
     assertTrue("Should have mismatched entries due to timestamp differences", mismatchedCount > 0);
   }
 
@@ -1246,45 +1310,23 @@ public void testSyncTableValidateWithConcurrentRegionMerges() throws Exception {
 
     // Introduce some mismatches on target before sync
     List<Integer> mismatchIds = Arrays.asList(10, 30, 50, 70, 90);
-    for (int id : mismatchIds) {
-      upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id },
-        new String[] { "MODIFIED_NAME_" + id });
-    }
+    introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
     // Capture time range for the sync
     long fromTime = 0L;
     long toTime = System.currentTimeMillis();
 
-    // Create a thread that will perform merges on source cluster during sync
-    Thread sourceMergeThread = new Thread(() -> {
-      try {
-        // Merge adjacent regions on source
-        mergeAdjacentRegions(sourceConnection, uniqueTableName, 6);
-      } catch (Exception e) {
-        LOGGER.error("Error during source merges", e);
-      }
-    });
-
-    // Create a thread that will perform merges on target cluster during sync
-    Thread targetMergeThread = new Thread(() -> {
-      try {
-        mergeAdjacentRegions(targetConnection, uniqueTableName, 6);
-      } catch (Exception e) {
-        LOGGER.error("Error during target merges", e);
-      }
-    });
-
-    // Start merge threads
-    sourceMergeThread.start();
-    targetMergeThread.start();
+    // Run merges on source/target concurrently with the sync.
+    Runnable mergeJoiner = startConcurrentRegionWork(
+      () -> mergeAdjacentRegions(sourceConnection, uniqueTableName, 6),
+      () -> mergeAdjacentRegions(targetConnection, uniqueTableName, 6),
+      "merges");
 
     // Run sync tool while merges are happening
     Job job = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
       String.valueOf(toTime));
 
-    // Wait for merge threads to complete
-    sourceMergeThread.join(30000); // 30 second timeout
-    targetMergeThread.join(30000);
+    mergeJoiner.run();
 
     // Verify the job completed successfully despite concurrent merges
     assertTrue("Sync job should complete successfully despite concurrent merges",
@@ -1307,6 +1349,14 @@ public void testSyncTableValidateWithConcurrentRegionMerges() throws Exception {
 
     // Second run should process ZERO rows (all checkpointed despite region merges)
     validateSyncCounters(counters2, 0, 0, 0, 0);
+
+    // Concurrent merges may leave chunks REPAIRED with stale boundaries; the resume filter
+    // skips those on a single rerun. Cleanup the checkpoint and run a dry-run + repair pass
+    // on the stable region layout to converge.
+    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+      String.valueOf(toTime));
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -1316,36 +1366,23 @@ public void testSyncTableValidateWithPagingTimeout() throws Exception {
 
     // Introduce mismatches scattered across the dataset
     List<Integer> mismatchIds = Arrays.asList(15, 25, 35, 45, 55, 75);
-    for (int id : mismatchIds) {
-      upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id },
-        new String[] { "MODIFIED_NAME_" + id });
-    }
+    introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
-    // First, run without aggressive paging to establish baseline chunk count
-    Configuration baselineConf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
-    String[] baselineArgs = new String[] { "--table-name", uniqueTableName, "--target-cluster",
-      targetZkQuorum, "--run-foreground", "--chunk-size", "10240", "--to-time",
-      String.valueOf(System.currentTimeMillis()) };
-
-    PhoenixSyncTableTool baselineTool = new PhoenixSyncTableTool();
-    baselineTool.setConf(baselineConf);
-    baselineTool.run(baselineArgs);
-    Job baselineJob = baselineTool.getJob();
-    long baselineChunkCount =
-      baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue();
+    // First, run --dry-run without aggressive paging to establish baseline chunk count.
+    // Dry-run so the baseline doesn't repair the drift before the paging pass below sees it.
+    int chunkSize = 10240;
+    long baselineChunkCount = captureBaselineChunkCount(uniqueTableName, chunkSize);
 
     // Configure paging with aggressive timeouts to force mid-chunk timeouts
-    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
+    Configuration conf = sourceClusterConf();
     conf.setBoolean(QueryServices.PHOENIX_SERVER_PAGING_ENABLED_ATTRIB, true);
     conf.setLong(QueryServices.PHOENIX_SERVER_PAGE_SIZE_MS, 1);
 
-    int chunkSize = 10240;
-
     long fromTime = 0L;
     long toTime = System.currentTimeMillis();
 
-    // Run sync tool while splits are happening
-    Job job = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time",
+    // Dry-run with paging to assert chunk-count expansion under mid-chunk timeouts.
+    Job job = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--dry-run", "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
 
     // Verify the job completed successfully despite paging timeouts
@@ -1357,7 +1394,7 @@ public void testSyncTableValidateWithPagingTimeout() throws Exception {
     // Despite paging timeouts, no rows should be lost
     validateSyncCountersWithMinChunk(counters, 100, 100, 1, 1);
 
-    long pagingChunkCount = counters.chunksVerified;
+    long pagingChunkCount = counters.chunksVerified + counters.chunksMismatched;
 
     assertTrue(
       "Paging should create more chunks than baseline due to mid-chunk timeouts. " + "Baseline: "
@@ -1368,6 +1405,17 @@ public void testSyncTableValidateWithPagingTimeout() throws Exception {
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
     assertFalse("Should have checkpoint entries", checkpointEntries.isEmpty());
+
+    // Now run the repair pass over the same window so target converges. Confirms paging
+    // does not block the repair flow. Clean up the dry-run pass's MISMATCHED checkpoint
+    // rows first so the repair pass starts fresh — paging-driven chunk boundaries differ
+    // between passes and stale MISMATCHED rows from the dry-run can land outside the new
+    // boundary set.
+    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+    runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -1377,65 +1425,35 @@ public void testSyncTableValidateWithPagingTimeoutWithSplits() throws Exception
 
     // Introduce mismatches scattered across the dataset
     List<Integer> mismatchIds = Arrays.asList(15, 25, 35, 45, 55, 75);
-    for (int id : mismatchIds) {
-      upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id },
-        new String[] { "MODIFIED_NAME_" + id });
-    }
-
-    // First, run without aggressive paging to establish baseline chunk count
-    Configuration baselineConf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
-    String[] baselineArgs = new String[] { "--table-name", uniqueTableName, "--target-cluster",
-      targetZkQuorum, "--run-foreground", "--chunk-size", "10240", "--to-time",
-      String.valueOf(System.currentTimeMillis()) };
+    introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
-    PhoenixSyncTableTool baselineTool = new PhoenixSyncTableTool();
-    baselineTool.setConf(baselineConf);
-    baselineTool.run(baselineArgs);
-    Job baselineJob = baselineTool.getJob();
-    long baselineChunkCount =
-      baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue();
+    // First, run --dry-run without aggressive paging to establish baseline chunk count.
+    // Dry-run so the baseline doesn't repair the drift before the paging pass below sees it.
+    int chunkSize = 10240;
+    long baselineChunkCount = captureBaselineChunkCount(uniqueTableName, chunkSize);
 
     // Configure paging with aggressive timeouts to force mid-chunk timeouts
-    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
+    Configuration conf = sourceClusterConf();
     conf.setBoolean(QueryServices.PHOENIX_SERVER_PAGING_ENABLED_ATTRIB, true);
     conf.setLong(QueryServices.PHOENIX_SERVER_PAGE_SIZE_MS, 1);
 
-    int chunkSize = 10240;
-
-    // Create a thread that will perform splits on source cluster during sync
-    Thread sourceSplitThread = new Thread(() -> {
-      try {
-        List<Integer> sourceSplits = Arrays.asList(12, 22, 32, 42, 52, 63, 72, 82, 92, 98);
-        splitTableAt(sourceConnection, uniqueTableName, sourceSplits);
-      } catch (Exception e) {
-        LOGGER.error("Error during source splits", e);
-      }
-    });
-
-    // Create a thread that will perform splits on target cluster during sync
-    Thread targetSplitThread = new Thread(() -> {
-      try {
-        List<Integer> targetSplits = Arrays.asList(13, 23, 33, 43, 53, 64, 74, 84, 95, 99);
-        splitTableAt(targetConnection, uniqueTableName, targetSplits);
-      } catch (Exception e) {
-        LOGGER.error("Error during target splits", e);
-      }
-    });
-
-    // Start split threads
-    sourceSplitThread.start();
-    targetSplitThread.start();
+    // Run splits on source/target concurrently with the sync.
+    Runnable splitJoiner = startConcurrentRegionWork(
+      () -> splitTableAt(sourceConnection, uniqueTableName,
+        Arrays.asList(12, 22, 32, 42, 52, 63, 72, 82, 92, 98)),
+      () -> splitTableAt(targetConnection, uniqueTableName,
+        Arrays.asList(13, 23, 33, 43, 53, 64, 74, 84, 95, 99)),
+      "splits");
 
     long fromTime = 0L;
     long toTime = System.currentTimeMillis();
 
-    // Run sync tool while splits are happening
-    Job job = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time",
+    // Dry-run sync while splits are happening — drift must remain on target so the chunk-count
+    // assertion below has work to do (otherwise an inline repair would converge mid-pass).
+    Job job = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--dry-run", "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
 
-    // Wait for split threads to complete
-    sourceSplitThread.join(30000); // 30 second timeout
-    targetSplitThread.join(30000);
+    splitJoiner.run();
 
     // Verify the job completed successfully despite concurrent splits and paging timeouts
     assertTrue("Sync job should complete successfully despite paging and concurrent splits",
@@ -1449,7 +1467,7 @@ public void testSyncTableValidateWithPagingTimeoutWithSplits() throws Exception
 
     // Paging should create MORE chunks than baseline
     // Concurrent region splits may also create additional chunks as mappers process new regions
-    long pagingChunkCount = counters.chunksVerified;
+    long pagingChunkCount = counters.chunksVerified + counters.chunksMismatched;
 
     assertTrue(
       "Paging should create more chunks than baseline due to mid-chunk timeouts. " + "Baseline: "
@@ -1460,6 +1478,16 @@ public void testSyncTableValidateWithPagingTimeoutWithSplits() throws Exception
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
     assertFalse("Should have checkpoint entries", checkpointEntries.isEmpty());
+
+    // Run the repair pass over the same window so target converges. Confirms paging plus
+    // concurrent splits do not block the repair flow. Clean up the dry-run pass's MISMATCHED
+    // checkpoint rows first so the resume filter doesn't leave stale MISMATCHED entries that
+    // sit outside the repair pass's chunk boundaries (paging + splits change boundary set).
+    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+    runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -1468,22 +1496,11 @@ public void testSyncTableMapperFailsWithNonExistentTable() throws Exception {
 
     // Try to run sync tool on a NON-EXISTENT table
     String nonExistentTable = "NON_EXISTENT_TABLE_" + System.currentTimeMillis();
-    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
     String[] args = new String[] { "--table-name", nonExistentTable, "--target-cluster",
       targetZkQuorum, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) };
 
-    PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
-    tool.setConf(conf);
-
-    try {
-      int exitCode = tool.run(args);
-      assertTrue(
-        String.format("Table %s does not exist, mapper setup should fail", nonExistentTable),
-        exitCode != 0);
-    } catch (Exception ex) {
-      fail("Tool should return non-zero exit code on failure instead of throwing exception: "
-        + ex.getMessage());
-    }
+    assertSyncToolFails(args,
+      String.format("Table %s does not exist, mapper setup should fail", nonExistentTable));
   }
 
   @Test
@@ -1493,23 +1510,12 @@ public void testSyncTableMapperFailsWithInvalidTargetCluster() throws Exception
 
     // Try to run sync tool with INVALID target cluster ZK quorum.
     String invalidTargetZk = "invalid-zk-host:2181:/hbase";
-    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
     String[] args =
       new String[] { "--table-name", uniqueTableName, "--target-cluster", invalidTargetZk,
         "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) };
 
-    PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
-    tool.setConf(conf);
-
-    try {
-      int exitCode = tool.run(args);
-      assertTrue(
-        String.format("Target cluster %s is invalid, mapper setup should fail", invalidTargetZk),
-        exitCode != 0);
-    } catch (Exception ex) {
-      fail("Tool should return non-zero exit code on failure instead of throwing exception: "
-        + ex.getMessage());
-    }
+    assertSyncToolFails(args,
+      String.format("Target cluster %s is invalid, mapper setup should fail", invalidTargetZk));
   }
 
   @Test
@@ -1523,22 +1529,12 @@ public void testSyncTableMapperFailsWithMissingTargetTable() throws Exception {
 
     // Don't create table on target - this will cause mapper map() to fail
     // when trying to scan the non-existent target table
-    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
     String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster",
       targetZkQuorum, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) };
 
-    PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
-    tool.setConf(conf);
-
-    try {
-      int exitCode = tool.run(args);
-      assertTrue(String.format(
-        "Table %s does not exist on target cluster, mapper map() should fail during target scan",
-        uniqueTableName), exitCode != 0);
-    } catch (Exception ex) {
-      fail("Tool should return non-zero exit code on failure instead of throwing exception: "
-        + ex.getMessage());
-    }
+    assertSyncToolFails(args, String.format(
+      "Table %s does not exist on target cluster, mapper map() should fail during target scan",
+      uniqueTableName));
   }
 
   @Test
@@ -1551,47 +1547,21 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception {
     long fromTime = 0L;
     long toTime = System.currentTimeMillis();
 
-    // First run: Sync should succeed and create checkpoint entries for all mappers
-    Job job1 = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
-    SyncCountersResult counters1 = getSyncCounters(job1);
+    // First run + 75% deletion preamble (shared with other partial-rerun tests)
+    PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75);
+    SyncCountersResult counters1 = setup.firstRunCounters;
 
     // Validate first run succeeded
-    assertTrue("First run should succeed", job1.isSuccessful());
+    assertTrue("First run should succeed", setup.firstRunJob.isSuccessful());
     validateSyncCounters(counters1, 10, 10, 10, 0);
 
-    // Query checkpoint table to get all mapper entries
-    List<PhoenixSyncTableCheckpointOutputRow> allCheckpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-
-    // Separate mapper and chunk entries using utility method
-    SeparatedCheckpointEntries separated = separateMapperAndChunkEntries(allCheckpointEntries);
-    List<PhoenixSyncTableCheckpointOutputRow> mapperEntries = separated.mappers;
-    List<PhoenixSyncTableCheckpointOutputRow> allChunks = separated.chunks;
-
-    assertFalse("Should have at least 1 mapper entries after first run", mapperEntries.isEmpty());
-
-    // Select 3/4th of chunks from each mapper to delete (simulating partial rerun)
-    // We repro the partial run via deleting some entries from checkpoint table and re-running the
-    // tool.
-    List<PhoenixSyncTableCheckpointOutputRow> chunksToDelete =
-      selectChunksToDeleteFromMappers(sourceConnection, uniqueTableName, targetZkQuorum, fromTime,
-        toTime, null, mapperEntries, 0.75);
-
-    // Delete all mappers and selected chunks using utility method
-    deleteCheckpointEntries(sourceConnection, uniqueTableName, targetZkQuorum, null, mapperEntries,
-      chunksToDelete);
-
-    // Verify mapper entries were deleted
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntriesAfterDelete =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
     SeparatedCheckpointEntries separatedAfterDelete =
-      separateMapperAndChunkEntries(checkpointEntriesAfterDelete);
+      separateMapperAndChunkEntries(setup.entriesAfterDelete);
 
     assertEquals("Should have 0 mapper entries after deleting all mappers", 0,
       separatedAfterDelete.mappers.size());
     assertEquals("Should have remaining chunk entries after deletion",
-      allChunks.size() - chunksToDelete.size(), separatedAfterDelete.chunks.size());
+      setup.chunks.size() - setup.chunksToDelete.size(), separatedAfterDelete.chunks.size());
 
     // Drop target table to cause mapper failures during second run.
     // Use HBase Admin directly because Phoenix DROP TABLE IF EXISTS via targetConnection
@@ -1602,37 +1572,637 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception {
       targetAdmin.disableTable(hbaseTableName);
       targetAdmin.deleteTable(hbaseTableName);
     }
-    LOGGER.info("Dropped target table to cause mapper failures");
+    LOGGER.info("Dropped target table to cause mapper failures");
+
+    // Second run: Job should fail (exit code != 0) because target table is missing
+    String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster",
+      targetZkQuorum, "--run-foreground", "--from-time", String.valueOf(fromTime), "--to-time",
+      String.valueOf(toTime) };
+
+    assertSyncToolFails(args,
+      "Second run should fail with non-zero exit code due to missing target table");
+
+    // Remaining chunk entries that we dint delete should still persist despite job failure
+    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntriesAfterFailedRun =
+      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+    SeparatedCheckpointEntries separatedAfterFailedRun =
+      separateMapperAndChunkEntries(checkpointEntriesAfterFailedRun);
+
+    // After the failed run:
+    // - No mapper entries should exist (we deleted them all, and the job failed before creating new
+    // ones)
+    // - Only the remaining chunk entries (1/4th) should persist
+    assertEquals("Should have 0 mapper entries after failed run", 0,
+      separatedAfterFailedRun.mappers.size());
+    assertEquals("Remaining chunk entries should persist after failed run",
+      setup.chunks.size() - setup.chunksToDelete.size(), separatedAfterFailedRun.chunks.size());
+  }
+
+  /**
+   * P1 (hidden-version unwinding): Verifies the most subtle correctness path in the repairer —
+   * tombstoneTargetCell case 3 from {@code PhoenixSyncTableChunkRepairer.tombstoneTargetCell}.
+   *
+   * <p>Scenario: source row has {@code Put(NAME, "alice", T0)}; target row has {@code
+   * Put(NAME, "bob", T1)} and {@code Put(NAME, "carol", T2)} where {@code T0 < T1 < T2} and
+   * {@code MAX_VERSIONS=2}. Visible cell on target is "carol" (T2); "bob" (T1) is
+   * MAX_VERSIONS-hidden. Naive repair would point-delete only T2, exposing "bob" above
+   * source's mirror at T0 — divergent. Correct behavior: point-delete BOTH T2 and T1.
+   *
+   * <p>Without this test, a regression that "fixes" only the visible cell (case 2) would leave
+   * target reading "bob" after a successful-looking repair pass.
+   */
+  @Test
+  public void testRepairUnwindsHiddenTargetVersions() throws Exception {
+    final int rowId = 5;
+    // Two clusters, no replication — we plant cells deterministically on each side.
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 2, "3, 7");
+
+    long fromTime = 0L;
+    final long sourceTs = base + 1L;
+    final long targetT1 = base + 2L;
+    final long targetT2 = base + 3L;
+
+    byte[] rowKey = integerRowKey(rowId);
+    String family = "0"; // COLUMN_ENCODED_BYTES=NONE → family is "0"
+    String qualifier = "NAME";
+
+    // Source: single Put(NAME, "alice", T=100). Empty-key cell is needed so the row is
+    // visible to Phoenix scans (and thus to the verifier). Use an SCN connection for that.
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
+      scnSrc.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+      scnSrc.commit();
+    }
+
+    // Target: insert via SCN at T1 then again at T2 to leave two NAME versions; with VERSIONS=2
+    // both versions are retained. Visible read is "carol" (T2); "bob" (T1) is one-version-hidden.
+    try (Connection scnTgtT1 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT1)) {
+      scnTgtT1.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')");
+      scnTgtT1.commit();
+    }
+    try (Connection scnTgtT2 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT2)) {
+      scnTgtT2.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')");
+      scnTgtT2.commit();
+    }
+
+    // Sanity: target's visible NAME is "carol" before repair.
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        assertEquals("Pre-repair target visible NAME should be carol", "carol", rs.getString(1));
+      }
+    }
+
+    // Run dry-run + repair sharing the same time window with --read-all-versions so the
+    // verifier and repairer both see the hidden version.
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
+      "--read-all-versions");
+
+    assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    Counters repairCounters = result.repairJob.getCounters();
+    // Two NAME versions ("bob"@T1 + "carol"@T2) and two empty-key versions on target — all sit
+    // at timestamps that don't match source's single sourceTs, so each gets counted as
+    // either "different" or "extra" depending on the diff branch. We require at least 2 extras
+    // (the two extra NAME versions vs source's single mirror) — the exact split between
+    // CELLS_EXTRA and CELLS_DIFFERENT depends on per-qualifier matching. Post-repair raw scan
+    // assertions below pin the structural outcome.
+    assertTrue("At least 2 cells should be tombstoned for target's hidden+visible NAME versions",
+      repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 2);
+
+    // Post-repair, Phoenix's standard read on target must see source's "alice", NOT "bob".
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        assertEquals("Post-repair target NAME must be alice (hidden version unwound)", "alice",
+          rs.getString(1));
+      }
+    }
+
+    // Raw scan: target should have two Delete markers at T2 and T1 plus source's mirror Put@T0.
+    try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) {
+      Scan scan = new Scan().withStartRow(rowKey, true).withStopRow(rowKey, true).setRaw(true);
+      scan.readAllVersions();
+      int nameDeletes = 0;
+      int namePutAtSourceTs = 0;
+      try (ResultScanner sc = targetHTable.getScanner(scan)) {
+        for (Result r; (r = sc.next()) != null;) {
+          for (Cell c : r.rawCells()) {
+            if (Bytes.equals(CellUtil.cloneFamily(c), Bytes.toBytes(family))
+              && Bytes.equals(CellUtil.cloneQualifier(c), Bytes.toBytes(qualifier))) {
+              if (CellUtil.isDelete(c)) {
+                nameDeletes++;
+              } else if (c.getTimestamp() == sourceTs) {
+                namePutAtSourceTs++;
+              }
+            }
+          }
+        }
+      }
+      assertEquals("Two delete markers (one for each target NAME version) expected", 2,
+        nameDeletes);
+      assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs);
+    }
+  }
+
+  /**
+   * P2 (partial-mirror shadow): Verifies the {@code RowMirrorStatus.PARTIALLY_MIRRORED} branch
+   * indirectly via {@code generateMutationForDiffCells} — both rows exist; one source cell is
+   * shadowed by a target tombstone, sibling cells mirror successfully. {@code anyCellUnrepairable}
+   * propagates up to {@code drift.rowsCannotRepair++} while no cell counter increments for the
+   * shadowed cell (mirror returned false, so nothing was written and nothing counted).
+   *
+   * <p>Setup: row K exists on both sides via a matching {@code NAME_VALUE} cell. Target has a
+   * pre-existing {@code DeleteColumn(NAME, T=300)} shadowing any future {@code NAME} Put at
+   * {@code ts <= 300}. Source's {@code NAME, "alice", T=200} would land on disk but stay
+   * invisible — repair detects this upfront via {@code wouldShadow} and skips the doomed write.
+   */
+  @Test
+  public void testRepairPartialShadowWithinRow() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+
+    long fromTime = 0L;
+    final long sourceTs = base + 1L;
+    final long shadowTombstoneTs = base + 2L;
+
+    // Source: row K with NAME="alice" and NAME_VALUE=99, all at sourceTs.
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
+      scnSrc.createStatement().execute("UPSERT INTO " + uniqueTableName
+        + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
+      scnSrc.commit();
+    }
+
+    // Target: row K with only NAME_VALUE=99 at sourceTs (matches source's NAME_VALUE).
+    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), sourceTs)) {
+      scnTgt.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
+      scnTgt.commit();
+    }
+    // Plant a DeleteColumn tombstone on target's NAME at shadowTombstoneTs, which shadows any
+    // source mirror at ts <= shadowTombstoneTs.
+    writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME",
+      shadowTombstoneTs);
+
+    // Spin until wall-clock advances past the highest cell timestamp so --to-time
+    // (which defaults to currentTimeMillis()) covers our planted cells.
+    while (System.currentTimeMillis() <= shadowTombstoneTs) {
+      // spin
+    }
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
+      "--raw-scan");
+
+    assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
+    assertTrue("Repair pass should succeed (shadowing is correctness-only, not a job error)",
+      result.repairJob.isSuccessful());
+
+    Counters repairCounters = result.repairJob.getCounters();
+    // Source's NAME mirror is suppressed by the shadow → no cell counter ticks; row is unrepairable.
+    assertRepairCellCounters(repairCounters, 0, 0, 0, 1);
+    assertTrue("At least one mapper should roll up to UNREPAIRABLE",
+      repairCounters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1);
+
+    // Post-repair, target's read view of NAME for row K is still null (DeleteColumn at T=300
+    // covers everything <= T=300 — including any source mirror we *might* have written). The
+    // assertion validates the repair refused to write the doomed Put.
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        assertNull("NAME should still be null on target — shadow was respected",
+          rs.getString(1));
+      }
+    }
+  }
+
+  /**
+   * P2 (cell-missing branch): same row exists on both sides, source has an extra column the
+   * target lacks. {@code generateMutationForDiffCells} should mirror it through the
+   * {@code cellMissing++} branch (source-only cell, no shadow on target).
+   */
+  @Test
+  public void testRepairCellMissingOnTarget() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+
+    final long ts = base + 1L;
+
+    // Source: row K with NAME and NAME_VALUE.
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) {
+      scnSrc.createStatement().execute("UPSERT INTO " + uniqueTableName
+        + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
+      scnSrc.commit();
+    }
+    // Target: row K with only NAME_VALUE matching source. NAME is absent.
+    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) {
+      scnTgt.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
+      scnTgt.commit();
+    }
+
+    // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis())
+    // covers the planted cells.
+    while (System.currentTimeMillis() <= ts) {
+      // spin
+    }
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
+      "--to-time", String.valueOf(System.currentTimeMillis()));
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    assertRepairCellCounters(result.repairJob.getCounters(), 1, 0, 0, 0);
+
+    // Post-repair: target's NAME should equal source's "alice".
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        assertEquals("alice", rs.getString(1));
+      }
+    }
+  }
+
+  /**
+   * P2 (cell-extra branch): same row exists on both sides, target has an extra column source
+   * lacks. {@code generateMutationForDiffCells} should tombstone it via the
+   * {@code cellExtra++} branch ({@code tombstoneTargetCell} with {@code sourceMaxTs == null}
+   * → {@code Delete.addColumns}).
+   */
+  @Test
+  public void testRepairCellExtraOnTarget() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+
+    final long ts = base + 1L;
+
+    // Source: row K with only NAME_VALUE.
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) {
+      scnSrc.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
+      scnSrc.commit();
+    }
+    // Target: row K with same NAME_VALUE plus an extra raw NAME cell at the same ts.
+    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) {
+      scnTgt.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
+      scnTgt.commit();
+    }
+    writeRawCell(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", ts,
+      Bytes.toBytes("bob"));
+
+    // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis())
+    // covers the planted cells.
+    while (System.currentTimeMillis() <= ts) {
+      // spin
+    }
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
+      "--to-time", String.valueOf(System.currentTimeMillis()));
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    assertRepairCellCounters(result.repairJob.getCounters(), 0, 1, 0, 0);
+
+    // Post-repair: target's NAME should be tombstoned and read as null.
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        assertNull(rs.getString(1));
+      }
+    }
+  }
+
+  /**
+   * P2 (cell-different branch): same row, same {@code (cf, q, ts)} coords, different value.
+   * {@code generateMutationForDiffCells} should hit the {@code cellDifferent++} branch via the
+   * {@code !matchingValue} check at the head of the loop.
+   */
+  @Test
+  public void testRepairCellDifferentValue() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+
+    final long ts = base + 1L;
+
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) {
+      scnSrc.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+      scnSrc.commit();
+    }
+    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) {
+      scnTgt.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')");
+      scnTgt.commit();
+    }
+
+    // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis())
+    // covers the planted cells.
+    while (System.currentTimeMillis() <= ts) {
+      // spin
+    }
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
+      "--to-time", String.valueOf(System.currentTimeMillis()));
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    assertRepairCellCounters(result.repairJob.getCounters(), 0, 0, 1, 0);
+
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        assertEquals("alice", rs.getString(1));
+      }
+    }
+  }
+
+  /**
+   * P6 (asymmetric load-target time-range): Source has Put at T=200 inside the user's window;
+   * target has a {@code DeleteColumn} planted at T=600 — strictly above {@code --to-time T=500}.
+   * The repair scan honors {@code --to-time} and never sees the tombstone in the diff window, so
+   * the diff routes to {@code mirrorWholeRow} in
+   * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer}. Inside,
+   * {@code loadTargetRowRecord} deliberately uses {@code (fromTime, MAX_VALUE)} (line 487) — so it
+   * still sees the T=600 tombstone and {@code wouldShadow} returns true on Source's Put@T=200
+   * (DeleteColumn covers ts &lt;= T=600). Result: source's mirror is suppressed, the row is
+   * flagged unrepairable.
+   */
+  @Test
+  public void testRepairShadowFromTombstoneAboveToTime() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+
+    final long fromTime = 0L;
+    final long sourceTs = base + 1L;
+    final long toTime = base + 2L;
+    final long tombstoneTs = base + 3L;
+
+    // Source has the row inside the diff window.
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
+      scnSrc.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+      scnSrc.commit();
+    }
+    // Target has a tombstone strictly above --to-time. Diff scan won't see it; loadTargetRowRecord
+    // still will.
+    writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME",
+      tombstoneTs);
+
+    // Phoenix requires --to-time <= currentTimeMillis() at tool-run. Spin until wall-clock
+    // moves past tombstoneTs (the highest cell ts in this test) so toTime is unambiguously in
+    // the past.
+    while (System.currentTimeMillis() <= tombstoneTs) {
+      // spin
+    }
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan");
+    assertTrue("Repair should succeed (shadowing is correctness-only)",
+      result.repairJob.isSuccessful());
+
+    Counters c = result.repairJob.getCounters();
+    // Phoenix UPSERT plants NAME *and* the empty-key cell ("_0"). DeleteColumn shadows only NAME —
+    // "_0" mirrors through (rowsMissing++), and the row is unrepairable because NAME was suppressed.
+    assertRepairRowCounters(c, 1, 0, 1);
+    assertTrue("At least one mapper should roll up to UNREPAIRABLE",
+      c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1);
+
+    // Post-repair: target's NAME should still read as null. The shadow was respected, so no Put
+    // for NAME landed (only the empty-key cell, which gives the row visible existence with NAME
+    // covered by the DeleteColumn tombstone above it).
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue("Row exists on target via mirrored empty-key cell", rs.next());
+        assertNull("NAME should still be null — DeleteColumn shadow respected", rs.getString(1));
+      }
+    }
+  }
+
+  /**
+   * P7 (mid-row repair-batch flush boundary): drives many missing-row mirrors through a tiny
+   * {@code repairBatchSize=2} so {@code generateMutationForDiffRows} flushes mid-stream multiple
+   * times. Validates that every row converges despite the mid-flush boundary — i.e., no Put
+   * gets dropped because pendingPuts/pendingDeletes were drained mid-iteration.
+   */
+  @Test
+  public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception {
+    // No replication — seed source manually so target legitimately lacks the rows.
+    createRepairTestTableOnBothClusters(uniqueTableName, 1, null);
+
+    // Introduce extra rows on source that target lacks. Each row → at least 2 cells (NAME and the
+    // empty-key cell), so a batch size of 2 forces a flush every row, exercising the mid-stream
+    // flush in generateMutationForDiffRows.
+    int[] sourceOnlyIds = new int[] { 100, 101, 102, 103, 104, 105, 106, 107 };
+    String[] sourceOnlyNames = new String[sourceOnlyIds.length];
+    for (int i = 0; i < sourceOnlyIds.length; i++) {
+      sourceOnlyNames[i] = "extra_" + sourceOnlyIds[i];
+    }
+    upsertRowsOnTarget(sourceConnection, uniqueTableName, sourceOnlyIds, sourceOnlyNames);
+    sourceConnection.commit();
+
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
+
+    Configuration conf = sourceClusterConfWithRepairBatchSize(2);
+
+    // Stage 1: dry-run.
+    Job dryRunJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--dry-run",
+      "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    assertTrue("Dry-run should succeed", dryRunJob.isSuccessful());
+    SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob);
+    assertTrue("Dry-run should detect mismatched chunks", dryRunCounters.chunksMismatched >= 1);
+
+    // Stage 2: repair with the same small batch size.
+    Job repairJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    assertTrue("Repair should succeed despite small batch size", repairJob.isSuccessful());
+
+    Counters repairCounters = repairJob.getCounters();
+    assertTrue("All source-only rows should be marked missing on target",
+      repairCounters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue()
+          >= sourceOnlyIds.length);
+    assertEquals("No row should be flagged unrepairable", 0,
+      repairCounters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue());
+
+    // Verify each source-only row landed on target with the right NAME.
+    for (int i = 0; i < sourceOnlyIds.length; i++) {
+      try (PreparedStatement ps = targetConnection
+        .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+        ps.setInt(1, sourceOnlyIds[i]);
+        try (ResultSet rs = ps.executeQuery()) {
+          assertTrue("Row " + sourceOnlyIds[i] + " should exist on target after repair",
+            rs.next());
+          assertEquals("Row " + sourceOnlyIds[i] + " NAME should match source",
+            sourceOnlyNames[i], rs.getString(1));
+        }
+      }
+    }
+  }
+
+  /**
+   * P8 ({@code --raw-scan} + {@code --read-all-versions} interplay): a multi-version row on
+   * source that includes an in-window {@code DeleteColumn} between two Puts. Target lags with
+   * only the older Put. Repair must mirror the missing tombstone (preserving its subtype via
+   * {@code mirrorSourceCell} in
+   * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer} routing Delete cells
+   * through {@code Delete#add}) and the missing newer Put.
+   */
+  @Test
+  public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 3, "3, 7");
+
+    final long fromTime = 0L;
+    final long t1 = base + 1L;
+    final long t2 = base + 2L;
+    final long t3 = base + 3L;
+
+    // Source: Put@T1 → DeleteColumn@T2 → Put@T3.
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), t1)) {
+      scnSrc.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')");
+      scnSrc.commit();
+    }
+    writeRawDeleteColumn(sourceConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", t2);
+    try (Connection scnSrc2 = openConnectionAtScn(CLUSTERS.getZkUrl1(), t3)) {
+      scnSrc2.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v3')");
+      scnSrc2.commit();
+    }
+
+    // Target: only the oldest Put@T1.
+    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), t1)) {
+      scnTgt.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')");
+      scnTgt.commit();
+    }
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
+      "--raw-scan", "--read-all-versions");
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    Counters c = result.repairJob.getCounters();
+    // Source has NAME@T1 Put, NAME@T2 DeleteColumn, NAME@T3 Put + empty-key@T1 and @T3. Target has
+    // only NAME@T1 + empty-key@T1, so 3 cells missing: NAME-tombstone@T2, NAME-Put@T3, empty-key@T3.
+    assertRepairCellCounters(c, 3, 0, 0, 0);
+
+    // Post-repair raw scan on target should show: Put@T3, DeleteColumn@T2, Put@T1 for the NAME
+    // qualifier (rawCells reverse-ts ordered).
+    int observedPuts = 0;
+    int observedDeleteColumns = 0;
+    long observedNewestPutTs = -1L;
+    try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) {
+      Scan scan = new Scan();
+      scan.withStartRow(integerRowKey(rowId), true);
+      scan.withStopRow(integerRowKey(rowId), true);
+      scan.setRaw(true);
+      scan.readAllVersions();
+      try (ResultScanner scanner = targetHTable.getScanner(scan)) {
+        Result r = scanner.next();
+        assertNotNull("Target row should exist", r);
+        for (Cell cell : r.rawCells()) {
+          if (Bytes.equals(CellUtil.cloneQualifier(cell), Bytes.toBytes("NAME"))) {
+            if (CellUtil.isDelete(cell)) {
+              observedDeleteColumns++;
+            } else {
+              observedPuts++;
+              observedNewestPutTs = Math.max(observedNewestPutTs, cell.getTimestamp());
+            }
+          }
+        }
+      }
+    }
+    assertEquals("Target should have both NAME Puts after repair", 2, observedPuts);
+    assertEquals("Target should have the mirrored DeleteColumn after repair", 1,
+      observedDeleteColumns);
+    assertEquals("Newest mirrored Put should sit at T3", t3, observedNewestPutTs);
+
+    // Read-side: NAME under default visibility should now be null (T3 Put → T2 DeleteColumn covers
+    // T1; visible state is "deleted but tombstone caps NAME"). Phoenix sees no value.
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        // The newest Put is T3 — reads see "v3" since T3 > tombstone T2.
+        assertTrue(rs.next());
+        assertEquals("v3", rs.getString(1));
+      }
+    }
+  }
 
-    // Second run: Job should fail (exit code != 0) because target table is missing
-    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
-    String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster",
-      targetZkQuorum, "--run-foreground", "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime) };
+  /**
+   * P9 (mixed Put+Delete batch under small {@code repairBatchSize}): many missing source rows AND
+   * many extra target rows in the same chunk. With {@code repairBatchSize=4}, most flushes
+   * straddle a Put/Delete boundary and exercise {@code flushRepairMutations} in
+   * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer} on its mixed
+   * {@code Table#batch} path. Validates that no mutation gets dropped at the batch boundary.
+   */
+  @Test
+  public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception {
+    // No replication — seed each side independently so source-only and target-only rows truly
+    // diverge.
+    createRepairTestTableOnBothClusters(uniqueTableName, 1, null);
+
+    // Add 5 source-only rows (will need Puts) and 5 target-only rows (will need Deletes), all
+    // inside the same chunk so they queue together in a single mapper's pendingPuts/pendingDeletes
+    // and get flushed as mixed batches.
+    int[] sourceOnly = new int[] { 200, 201, 202, 203, 204 };
+    String[] sourceOnlyNames = new String[] { "s200", "s201", "s202", "s203", "s204" };
+    upsertRowsOnTarget(sourceConnection, uniqueTableName, sourceOnly, sourceOnlyNames);
+    sourceConnection.commit();
 
-    PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
-    tool.setConf(conf);
-    int exitCode = tool.run(args);
+    int[] targetOnly = new int[] { 300, 301, 302, 303, 304 };
+    String[] targetOnlyNames = new String[] { "t300", "t301", "t302", "t303", "t304" };
+    upsertRowsOnTarget(targetConnection, uniqueTableName, targetOnly, targetOnlyNames);
+    targetConnection.commit();
 
-    // Job should fail
-    assertTrue("Second run should fail with non-zero exit code due to missing target table",
-      exitCode != 0);
-    LOGGER.info("Second run failed as expected with exit code: {}", exitCode);
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
 
-    // Remaining chunk entries that we dint delete should still persist despite job failure
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntriesAfterFailedRun =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    SeparatedCheckpointEntries separatedAfterFailedRun =
-      separateMapperAndChunkEntries(checkpointEntriesAfterFailedRun);
+    Configuration conf = sourceClusterConfWithRepairBatchSize(4);
 
-    // After the failed run:
-    // - No mapper entries should exist (we deleted them all, and the job failed before creating new
-    // ones)
-    // - Only the remaining chunk entries (1/4th) should persist
-    assertEquals("Should have 0 mapper entries after failed run", 0,
-      separatedAfterFailedRun.mappers.size());
-    assertEquals("Remaining chunk entries should persist after failed run",
-      allChunks.size() - chunksToDelete.size(), separatedAfterFailedRun.chunks.size());
+    Job dryRunJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--dry-run",
+      "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    assertTrue("Dry-run should succeed", dryRunJob.isSuccessful());
+
+    SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob);
+    assertTrue("Dry-run should detect all source-only rows as missing",
+      dryRunCounters.rowsMissingOnTarget >= sourceOnly.length);
+    assertTrue("Dry-run should detect all target-only rows as extra",
+      dryRunCounters.rowsExtraOnTarget >= targetOnly.length);
+
+    Job repairJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    assertTrue("Repair should succeed", repairJob.isSuccessful());
+
+    Counters c = repairJob.getCounters();
+    assertTrue("All source-only rows should be marked missing",
+      c.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue() >= sourceOnly.length);
+    assertTrue("All target-only rows should be marked extra",
+      c.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue() >= targetOnly.length);
+    assertEquals("No chunk should fail repair", 0,
+      c.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue());
+
+    // Convergence pass: cleanup checkpoint and re-run a stable repair to assert no chunks are
+    // mismatched after the mixed-batch flushes.
+    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+      String.valueOf(toTime));
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
   }
 
   @Test
@@ -1694,7 +2264,13 @@ public void testSyncTableWithDeleteAndCompactionOnSource() throws Exception {
 
     assertTrue("Third run with raw scan after compaction should succeed", job3.isSuccessful());
     validateSyncCounters(counters3, 10, 11, 9, 1);
-    validateMapperCounters(counters3, 3, 1);
+    // Repair runs inline (non-dry-run). Under --raw-scan target has residual cells for row 100
+    // that source no longer has after compaction; repair tombstones the residual target cells,
+    // so the chunk's mapper rolls up to REPAIRED.
+    validateMapperCountersRepair(counters3, 3, 1, 0, 0);
+
+    // The standard Phoenix view (no raw-scan) on both clusters remains identical.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
   }
 
   @Test
@@ -1734,7 +2310,9 @@ public void testSyncTableWithDeleteAndCompactionOnTarget() throws Exception {
 
     assertTrue("Second run with raw scan should succeed", job2.isSuccessful());
     validateSyncCounters(counters2, 10, 11, 9, 1);
-    validateMapperCounters(counters2, 3, 1);
+    // Non-dry-run: target has a delete marker that source doesn't; repair tombstones at source
+    // ts cover the residual under raw scan, so the mapper rolls up to REPAIRED.
+    validateMapperCountersRepair(counters2, 3, 1, 0, 0);
 
     flushAndMajorCompact(CLUSTERS.getHBaseCluster2(), uniqueTableName);
 
@@ -1745,6 +2323,11 @@ public void testSyncTableWithDeleteAndCompactionOnTarget() throws Exception {
     assertTrue("Third run with raw scan after compaction should succeed", job3.isSuccessful());
     validateSyncCounters(counters3, 10, 10, 10, 0);
     validateMapperCounters(counters3, 4, 0);
+
+    // After major compaction tombstones are gone; the third raw-scan pass is clean and the
+    // standard Phoenix view matches.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -1785,7 +2368,12 @@ public void testSyncTableWithMultipleVersionAndCompactionOnSource() throws Excep
     assertTrue("Second run with all versions should succeed", job3.isSuccessful());
     // Target retains old version of row 5 (VERSIONS=2), source does not after compaction.
     validateSyncCounters(counters3, 10, 10, 9, 1);
-    validateMapperCounters(counters3, 3, 1);
+    // Non-dry-run: target has an extra historical version that source no longer has; repair
+    // tombstones the residual cells at the appropriate ts, so the mapper rolls up to REPAIRED.
+    validateMapperCountersRepair(counters3, 3, 1, 0, 0);
+
+    // The standard Phoenix view (latest version only) on both clusters is identical.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
   }
 
   @Test
@@ -1807,33 +2395,36 @@ public void testSyncTableWithMultipleVersionAndCompactionOnTarget() throws Excep
       new String[] { "EXTRA_ROW" });
     targetConnection.commit();
 
-    // Run sync with --read-all-versions: target has extra old version, should mismatch
+    // Run sync with --read-all-versions: target diverged with EXTRA_ROW; source is unchanged.
+    // Non-dry-run: repair pushes source's row 5 onto target. Status rolls up to REPAIRED (the
+    // diverging cell is a value mismatch with both sides live, not a tombstone shadowing).
     Job job = runSyncTool(uniqueTableName, "--read-all-versions");
     SyncCountersResult counters1 = getSyncCounters(job);
 
     assertTrue("First run with all versions should succeed", job.isSuccessful());
     validateSyncCounters(counters1, 10, 10, 9, 1);
-    validateMapperCounters(counters1, 3, 1);
+    validateMapperCountersRepair(counters1, 3, 1, 0, 0);
 
     flushAndMajorCompact(CLUSTERS.getHBaseCluster2(), uniqueTableName);
 
-    // Run sync without reading all versions (default behavior): only latest version compared,
-    // should still mismatch
+    // Subsequent runs see target already converged to source from the first repair pass.
     Job job1 = runSyncTool(uniqueTableName);
     SyncCountersResult counters = getSyncCounters(job1);
 
     assertTrue("Second run without reading all versions should succeed", job1.isSuccessful());
-    validateSyncCounters(counters, 10, 10, 9, 1);
-    validateMapperCounters(counters, 3, 1);
+    validateSyncCounters(counters, 10, 10, 10, 0);
+    validateMapperCounters(counters, 4, 0);
 
-    // Run sync with --read-all-versions, target has extra old version even after compaction, should
-    // mismatch
     Job job3 = runSyncTool(uniqueTableName, "--read-all-versions");
     SyncCountersResult counters3 = getSyncCounters(job3);
 
-    assertTrue("Second run with all versions should succeed", job3.isSuccessful());
-    validateSyncCounters(counters3, 10, 10, 9, 1);
-    validateMapperCounters(counters3, 3, 1);
+    assertTrue("Third run with all versions should succeed", job3.isSuccessful());
+    validateSyncCounters(counters3, 10, 10, 10, 0);
+    validateMapperCounters(counters3, 4, 0);
+
+    // After repair the standard Phoenix view matches.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
@@ -1843,20 +2434,30 @@ public void testSyncTableValidateWithSplitCoalescing() throws Exception {
     introduceAndVerifyTargetDifferences(uniqueTableName);
 
     // Enable split coalescing via command-line parameter, all regions will be coalesced into one
-    // mapper
-    Job job = runSyncTool(uniqueTableName, "--coalesce-split");
-    SyncCountersResult counters = getSyncCounters(job);
+    // mapper. Use a pinned window so the dry-run and repair share the same checkpoint PK.
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
+
+    Job dryRunJob = runSyncTool(uniqueTableName, "--coalesce-split", "--dry-run", "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    SyncCountersResult counters = getSyncCounters(dryRunJob);
 
     assertEquals("Should have only 1 Mapper task created with coalescing", 1, counters.taskCreated);
 
     validateSyncCounters(counters, 10, 10, 7, 3);
     validateMapperCounters(counters, 1, 3);
 
-    // Verify checkpoint entries are created correctly
+    // Verify checkpoint entries from the dry-run pass are created correctly.
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
     validateCheckpointEntries(checkpointEntries, uniqueTableName, targetZkQuorum, 10, 10, 7, 3, 4,
       3, null);
+
+    // Repair pass over the same window: MISMATCHED rows transition to REPAIRED in place.
+    runSyncTool(uniqueTableName, "--coalesce-split", "--from-time", String.valueOf(fromTime),
+      "--to-time", String.valueOf(toTime));
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   /**
@@ -1869,7 +2470,7 @@ public void testSyncTableValidateWithSplitCoalescing() throws Exception {
    * QueryCompiler.verifySCN()} (client-side) then throws {@code ERROR 538} when {@code endTime} is
    * older than {@code phoenix.max.lookback.age.seconds}.
    *
-   * <p>Fix: {@link PhoenixSyncTableInputFormat#getQueryPlan} overrides the parent to strip {@code
+   * <p>Fix: {@link PhoenixSyncTableInputFormat} overrides the parent to strip {@code
    * CURRENT_SCN_VALUE} before creating the query plan for split generation. With SCN absent, {@code
    * verifySCN()} returns early (SCN == null), so no exception is thrown.
    *
@@ -1892,7 +2493,7 @@ public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exc
     // QueryCompiler.verifySCN() reads PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY from
     // conn.getQueryServices().getConfiguration(), which is the client-side MR conf.
     long maxLookbackAgeSeconds = 5;
-    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
+    Configuration conf = sourceClusterConf();
     conf.setLong(BaseScannerRegionObserverConstants.PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY,
       maxLookbackAgeSeconds);
 
@@ -1920,6 +2521,11 @@ public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exc
     SyncCountersResult counters = getSyncCounters(job);
     validateSyncCounters(counters, 10, 10, 10, 0);
     validateMapperCounters(counters, 4, 0);
+
+    // Run was non-dry-run with no drift; repair flow is a no-op and target should match source
+    // even though toTime is older than max lookback age.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   /**
@@ -2003,6 +2609,85 @@ private static class CheckpointAggregateCounters {
       chunksMismatched);
   }
 
+  /**
+   * Result of {@link #setupPartialRerun}. Captures the first-run job/counters, the snapshots of
+   * the checkpoint table before and after deletion, and the aggregate counters re-derived from
+   * the chunks that survived the deletion. Tests use these to assert the
+   * {@code remaining + rerun == first-run} invariant after re-running the sync tool.
+   */
+  private static class PartialRerunSetup {
+    final Job firstRunJob;
+    final SyncCountersResult firstRunCounters;
+    final List<PhoenixSyncTableCheckpointOutputRow> mappers;
+    final List<PhoenixSyncTableCheckpointOutputRow> chunks;
+    final List<PhoenixSyncTableCheckpointOutputRow> chunksToDelete;
+    final int deletedCount;
+    final List<PhoenixSyncTableCheckpointOutputRow> entriesAfterDelete;
+    final CheckpointAggregateCounters remainingCounters;
+
+    PartialRerunSetup(Job firstRunJob, SyncCountersResult firstRunCounters,
+      List<PhoenixSyncTableCheckpointOutputRow> mappers,
+      List<PhoenixSyncTableCheckpointOutputRow> chunks,
+      List<PhoenixSyncTableCheckpointOutputRow> chunksToDelete, int deletedCount,
+      List<PhoenixSyncTableCheckpointOutputRow> entriesAfterDelete,
+      CheckpointAggregateCounters remainingCounters) {
+      this.firstRunJob = firstRunJob;
+      this.firstRunCounters = firstRunCounters;
+      this.mappers = mappers;
+      this.chunks = chunks;
+      this.chunksToDelete = chunksToDelete;
+      this.deletedCount = deletedCount;
+      this.entriesAfterDelete = entriesAfterDelete;
+      this.remainingCounters = remainingCounters;
+    }
+  }
+
+  /**
+   * Runs the partial-rerun preamble shared by all checkpoint-resume tests:
+   * <ol>
+   *   <li>Run the sync tool once at {@code chunkSize} over the pinned [{@code fromTime},
+   *       {@code toTime}] window.</li>
+   *   <li>Query the checkpoint table and assert non-empty mapper/chunk results.</li>
+   *   <li>Select {@code deletionFraction} of each mapper's chunks for deletion (0.75 in all
+   *       current tests) and delete them along with every mapper row.</li>
+   *   <li>Re-query the checkpoint table and aggregate the surviving CHUNK rows so callers can
+   *       assert the {@code remaining + rerun == first-run} row-count invariant.</li>
+   * </ol>
+   * Each test then performs its own divergent action (extra splits, merges, smaller chunk size,
+   * dropping the target table) on the returned state.
+   */
+  private PartialRerunSetup setupPartialRerun(String tableName, long fromTime, long toTime,
+    int chunkSize, double deletionFraction) throws Exception {
+    Job firstRunJob = runSyncToolWithChunkSize(tableName, chunkSize, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    SyncCountersResult firstRunCounters = getSyncCounters(firstRunJob);
+
+    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
+      queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, null);
+    assertFalse("Should have checkpoint entries after first run", checkpointEntries.isEmpty());
+
+    SeparatedCheckpointEntries separated = separateMapperAndChunkEntries(checkpointEntries);
+    assertFalse("Should have mapper region entries", separated.mappers.isEmpty());
+    assertFalse("Should have chunk entries", separated.chunks.isEmpty());
+
+    List<PhoenixSyncTableCheckpointOutputRow> chunksToDelete = selectChunksToDeleteFromMappers(
+      sourceConnection, tableName, targetZkQuorum, fromTime, toTime, null, separated.mappers,
+      deletionFraction);
+
+    int deletedCount = deleteCheckpointEntries(sourceConnection, tableName, targetZkQuorum, null,
+      separated.mappers, chunksToDelete);
+    assertEquals("Should have deleted all mapper and selected chunk entries",
+      separated.mappers.size() + chunksToDelete.size(), deletedCount);
+
+    List<PhoenixSyncTableCheckpointOutputRow> entriesAfterDelete =
+      queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, null);
+    CheckpointAggregateCounters remainingCounters =
+      calculateAggregateCountersFromCheckpoint(entriesAfterDelete);
+
+    return new PartialRerunSetup(firstRunJob, firstRunCounters, separated.mappers,
+      separated.chunks, chunksToDelete, deletedCount, entriesAfterDelete, remainingCounters);
+  }
+
   private List<PhoenixSyncTableCheckpointOutputRow> findChunksBelongingToMapper(Connection conn,
     String tableName, String targetCluster, long fromTime, long toTime, String tenantId,
     PhoenixSyncTableCheckpointOutputRow mapper) throws SQLException {
@@ -2672,6 +3357,111 @@ private Job runSyncToolWithZkQuorum(String tableName, String zkQuorum, String...
     }
   }
 
+  /**
+   * Returns a fresh, mutable copy of the source cluster's HBase {@link Configuration}. Tests that
+   * need to override individual settings (paging, timeouts, etc.) should use this helper rather
+   * than constructing the Configuration inline so that any future change to the base config flows
+   * through one place.
+   */
+  private static Configuration sourceClusterConf() {
+    return new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
+  }
+
+  /**
+   * Builds a {@link PhoenixSyncTableTool}, runs it with the supplied args, and asserts the run
+   * surfaces failure as a non-zero exit code rather than a thrown exception. Used by the
+   * failure-mode tests that previously hand-rolled this same try/run/assertTrue/catch-fail block.
+   */
+  private void assertSyncToolFails(String[] args, String failureContext) {
+    PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
+    tool.setConf(sourceClusterConf());
+    try {
+      int exitCode = tool.run(args);
+      assertTrue(failureContext, exitCode != 0);
+    } catch (Exception ex) {
+      fail("Tool should return non-zero exit code on failure instead of throwing exception: "
+        + ex.getMessage());
+    }
+  }
+
+  /**
+   * Upserts a "MODIFIED_NAME_<id>" row on target for each id in {@code mismatchIds}. Replaces the
+   * common pattern {@code for (int id : ids) upsertRowsOnTarget(..., new int[]{id}, new
+   * String[]{"MODIFIED_NAME_"+id})} which defeated the batch-upsert path of {@link
+   * #upsertRowsOnTarget}.
+   */
+  private void introduceMismatchesByIds(String tableName, List<Integer> mismatchIds)
+    throws SQLException {
+    int[] ids = new int[mismatchIds.size()];
+    String[] names = new String[mismatchIds.size()];
+    for (int i = 0; i < mismatchIds.size(); i++) {
+      ids[i] = mismatchIds.get(i);
+      names[i] = "MODIFIED_NAME_" + mismatchIds.get(i);
+    }
+    upsertRowsOnTarget(targetConnection, tableName, ids, names);
+  }
+
+  /**
+   * Starts two daemon-style threads that perform region mutations (splits or merges) on the
+   * source and target clusters and returns a {@link Runnable} the caller invokes to join them
+   * with a 30-second timeout. Both worker {@link Runnable}s are wrapped in try/catch so that an
+   * unexpected exception is logged rather than killing the JVM thread silently.
+   *
+   * <p>Usage:
+   * <pre>
+   *   Runnable joiner = startConcurrentRegionWork(sourceWork, targetWork, "splits");
+   *   ... run main sync work ...
+   *   joiner.run();
+   * </pre>
+   *
+   * <p>Caller is responsible for invoking the returned joiner; tests should always join before
+   * asserting on cluster state, otherwise late-arriving region mutations can race the assertions.
+   */
+  private Runnable startConcurrentRegionWork(Runnable sourceWork, Runnable targetWork,
+    String label) {
+    Thread sourceThread = new Thread(() -> {
+      try {
+        sourceWork.run();
+      } catch (Exception e) {
+        LOGGER.error("Error during source {}", label, e);
+      }
+    });
+    Thread targetThread = new Thread(() -> {
+      try {
+        targetWork.run();
+      } catch (Exception e) {
+        LOGGER.error("Error during target {}", label, e);
+      }
+    });
+    sourceThread.start();
+    targetThread.start();
+    return () -> {
+      try {
+        sourceThread.join(30000);
+        targetThread.join(30000);
+      } catch (InterruptedException ie) {
+        Thread.currentThread().interrupt();
+        throw new RuntimeException("Interrupted while joining " + label + " threads", ie);
+      }
+    };
+  }
+
+  /**
+   * Runs a dry-run sync with the given chunk size to establish a baseline chunk count
+   * (CHUNKS_VERIFIED + CHUNKS_MISMATCHED), then deletes the baseline checkpoint rows so a
+   * subsequent run starts fresh. Used by the paging-timeout tests, which assert that aggressive
+   * paging produces strictly more chunks than the no-paging baseline.
+   */
+  private long captureBaselineChunkCount(String tableName, int chunkSize) throws Exception {
+    Job baselineJob = runSyncToolWithChunkSize(tableName, chunkSize, "--dry-run", "--from-time",
+      "0", "--to-time", String.valueOf(System.currentTimeMillis()));
+    long chunkCount =
+      baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue()
+        + baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue();
+    cleanupCheckpointTable(sourceConnection, tableName, targetZkQuorum, null);
+    return chunkCount;
+  }
+
   /**
    * Runs the PhoenixSyncTableTool with 1KB chunk size for testing multiple rows per chunk. Returns
    * the completed Job for counter verification.
@@ -2687,8 +3477,164 @@ private Job runSyncToolWithLargeChunks(String tableName, String... additionalArg
    */
   private Job runSyncToolWithChunkSize(String tableName, int chunkSize, String... additionalArgs)
     throws Exception {
-    Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
-    return runSyncToolWithChunkSize(tableName, chunkSize, conf, additionalArgs);
+    return runSyncToolWithChunkSize(tableName, chunkSize, sourceClusterConf(), additionalArgs);
+  }
+
+  /**
+   * Holds both the dry-run and repair jobs from a {@link #runSyncToolWithRepair} invocation,
+   * along with the pinned time window so callers can re-query the checkpoint table or run
+   * additional assertions against the same range.
+   */
+  private static class RepairRunResult {
+    final Job dryRunJob;
+    final Job repairJob;
+    final long fromTime;
+    final long toTime;
+
+    RepairRunResult(Job dryRunJob, Job repairJob, long fromTime, long toTime) {
+      this.dryRunJob = dryRunJob;
+      this.repairJob = repairJob;
+      this.fromTime = fromTime;
+      this.toTime = toTime;
+    }
+  }
+
+  /**
+   * Runs the sync tool twice with the SAME pinned time window: first as a --dry-run to detect
+   * mismatches, then as a repair pass (no --dry-run) so the repair run rewrites the MISMATCHED
+   * checkpoint rows in place. The shared window is mandatory because the checkpoint PK is
+   * (TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY) — without
+   * pinning, each invocation would fall through to System.currentTimeMillis() and the repair
+   * pass would create fresh rows instead of overwriting the dry-run pass's output.
+   *
+   * <p>If the caller does not provide --from-time / --to-time, defaults of 0 / now are pinned.
+   *
+   * <p>Default chunk size is 1 byte (one row per chunk) to mirror {@link #runSyncTool}.
+   */
+  private RepairRunResult runSyncToolWithRepair(String tableName, String... additionalArgs)
+    throws Exception {
+    return runSyncToolWithRepairAndChunkSize(tableName, 1, additionalArgs);
+  }
+
+  /**
+   * Same as {@link #runSyncToolWithRepair} but uses 1KB chunks (multiple rows per chunk).
+   */
+  private RepairRunResult runSyncToolWithRepairLargeChunks(String tableName,
+    String... additionalArgs) throws Exception {
+    return runSyncToolWithRepairAndChunkSize(tableName, 1024, additionalArgs);
+  }
+
+  /**
+   * Same as {@link #runSyncToolWithRepair} but with an explicit chunk size.
+   */
+  private RepairRunResult runSyncToolWithRepairAndChunkSize(String tableName, int chunkSize,
+    String... additionalArgs) throws Exception {
+    long fromTime = parseLongFlag(additionalArgs, "--from-time", 0L);
+    long toTime = parseLongFlag(additionalArgs, "--to-time", System.currentTimeMillis());
+    String[] pinnedArgs = ensureTimeArgs(additionalArgs, fromTime, toTime);
+
+    // First run: --dry-run, only detect mismatches.
+    String[] dryRunArgs = appendArg(pinnedArgs, "--dry-run");
+    Job dryRunJob = runSyncToolWithChunkSize(tableName, chunkSize, dryRunArgs);
+
+    // Second run: no --dry-run. Same time window so the checkpoint PK matches and any
+    // CHUNK/MISMATCHED rows from the dry-run pass are overwritten by CHUNK/REPAIRED.
+    Job repairJob = runSyncToolWithChunkSize(tableName, chunkSize, pinnedArgs);
+
+    return new RepairRunResult(dryRunJob, repairJob, fromTime, toTime);
+  }
+
+  /**
+   * Parses a long-valued command-line flag (e.g., --from-time 12345) from the args array.
+   * Returns the default value if the flag is absent.
+   */
+  private static long parseLongFlag(String[] args, String flag, long defaultValue) {
+    for (int i = 0; i < args.length - 1; i++) {
+      if (flag.equals(args[i])) {
+        return Long.parseLong(args[i + 1]);
+      }
+    }
+    return defaultValue;
+  }
+
+  /**
+   * Returns args with --from-time/--to-time appended only if they are not already present.
+   */
+  private static String[] ensureTimeArgs(String[] args, long fromTime, long toTime) {
+    boolean hasFrom = false;
+    boolean hasTo = false;
+    for (String a : args) {
+      if ("--from-time".equals(a)) {
+        hasFrom = true;
+      } else if ("--to-time".equals(a)) {
+        hasTo = true;
+      }
+    }
+    List<String> result = new ArrayList<>(Arrays.asList(args));
+    if (!hasFrom) {
+      result.add("--from-time");
+      result.add(String.valueOf(fromTime));
+    }
+    if (!hasTo) {
+      result.add("--to-time");
+      result.add(String.valueOf(toTime));
+    }
+    return result.toArray(new String[0]);
+  }
+
+  private static String[] appendArg(String[] args, String newArg) {
+    String[] result = new String[args.length + 1];
+    System.arraycopy(args, 0, result, 0, args.length);
+    result[args.length] = newArg;
+    return result;
+  }
+
+  /**
+   * After a repair pass, asserts that no CHUNK or REGION rows in the checkpoint table are
+   * still in MISMATCHED status. They should all have transitioned to REPAIRED, VERIFIED, or
+   * (when target rows are entirely tombstoned) UNREPAIRABLE.
+   */
+  private void assertNoMismatchedCheckpoints(String tableName, String tenantId)
+    throws SQLException {
+    List<PhoenixSyncTableCheckpointOutputRow> entries =
+      queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId);
+    long mismatched = countCheckpointsByStatus(entries,
+      PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED);
+    assertEquals("After repair, no MISMATCHED checkpoint rows should remain for table "
+      + tableName + " tenant=" + tenantId, 0, mismatched);
+  }
+
+  /**
+   * Counts checkpoint entries (both REGION and CHUNK rows) in the given status. Replaces the
+   * ad-hoc {@code for (entry : entries) if (status.equals(entry.getStatus())) count++} loops
+   * that recurred across several tests.
+   */
+  private static long countCheckpointsByStatus(List<PhoenixSyncTableCheckpointOutputRow> entries,
+    PhoenixSyncTableCheckpointOutputRow.Status status) {
+    long count = 0;
+    for (PhoenixSyncTableCheckpointOutputRow entry : entries) {
+      if (status.equals(entry.getStatus())) {
+        count++;
+      }
+    }
+    return count;
+  }
+
+  /**
+   * Counts checkpoint entries that match BOTH the given type (REGION or CHUNK) and status. Used
+   * when a test needs to discriminate, e.g., REPAIRED CHUNK rows from REPAIRED REGION rows.
+   */
+  private static long countCheckpointsByTypeAndStatus(
+    List<PhoenixSyncTableCheckpointOutputRow> entries,
+    PhoenixSyncTableCheckpointOutputRow.Type type,
+    PhoenixSyncTableCheckpointOutputRow.Status status) {
+    long count = 0;
+    for (PhoenixSyncTableCheckpointOutputRow entry : entries) {
+      if (type.equals(entry.getType()) && status.equals(entry.getStatus())) {
+        count++;
+      }
+    }
+    return count;
   }
 
   /**
@@ -2748,6 +3694,12 @@ private static class SyncCountersResult {
     public final long chunksVerified;
     public final long mappersVerified;
     public final long mappersMismatched;
+    public final long mappersRepaired;
+    public final long mappersUnrepairable;
+    public final long mappersRepairFailed;
+    public final long rowsMissingOnTarget;
+    public final long rowsExtraOnTarget;
+    public final long rowsDifferentOnTarget;
     public final long taskCreated;
 
     SyncCountersResult(Counters counters) {
@@ -2759,15 +3711,29 @@ private static class SyncCountersResult {
       this.chunksVerified = counters.findCounter(SyncCounters.CHUNKS_VERIFIED).getValue();
       this.mappersVerified = counters.findCounter(SyncCounters.MAPPERS_VERIFIED).getValue();
       this.mappersMismatched = counters.findCounter(SyncCounters.MAPPERS_MISMATCHED).getValue();
+      this.mappersRepaired = counters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue();
+      this.mappersUnrepairable =
+        counters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue();
+      this.mappersRepairFailed =
+        counters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue();
+      this.rowsMissingOnTarget =
+        counters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue();
+      this.rowsExtraOnTarget =
+        counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
+      this.rowsDifferentOnTarget =
+        counters.findCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue();
       this.taskCreated = counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue();
     }
 
     public void logCounters(String testName) {
       LOGGER.info(
         "{}: source rows={}, target rows={}, chunks mismatched={}, chunks verified={}, "
-          + "mappers verified={}, mappers mismatched={}",
+          + "mappers verified={}, mappers mismatched={}, mappers repaired={}, "
+          + "mappers unrepairable={}, mappers repair_failed={}, rows missing={}, "
+          + "rows extra={}, rows different={}",
         testName, sourceRowsProcessed, targetRowsProcessed, chunksMismatched, chunksVerified,
-        mappersVerified, mappersMismatched);
+        mappersVerified, mappersMismatched, mappersRepaired, mappersUnrepairable,
+        mappersRepairFailed, rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget);
     }
   }
 
@@ -2801,6 +3767,210 @@ private void validateMapperCounters(SyncCountersResult counters, long expectedMa
       counters.mappersMismatched);
   }
 
+  /**
+   * Validates mapper counters when the chunks roll up into different repair outcomes (run was
+   * non-dry-run so mismatches were repaired rather than left as MISMATCHED).
+   */
+  private void validateMapperCountersRepair(SyncCountersResult counters,
+    long expectedMappersVerified, long expectedMappersRepaired, long expectedMappersUnrepairable,
+    long expectedMappersRepairFailed) {
+    assertEquals("Should have expected verified mappers", expectedMappersVerified,
+      counters.mappersVerified);
+    assertEquals("Should have expected repaired mappers", expectedMappersRepaired,
+      counters.mappersRepaired);
+    assertEquals("Should have expected unrepairable mappers", expectedMappersUnrepairable,
+      counters.mappersUnrepairable);
+    assertEquals("Should have expected repair_failed mappers", expectedMappersRepairFailed,
+      counters.mappersRepairFailed);
+  }
+
+  /**
+   * Pins the cell-level repair drift counters to exact expected values. Use in repair tests
+   * where the drift is constructed deterministically and any miscount (off-by-one,
+   * double-counting, missed branch) should fail the test loudly.
+   */
+  private void assertRepairCellCounters(Counters counters, long expectedCellsMissing,
+    long expectedCellsExtra, long expectedCellsDifferent, long expectedRowsCannotRepair) {
+    assertEquals("CELLS_MISSING_ON_TARGET", expectedCellsMissing,
+      counters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue());
+    assertEquals("CELLS_EXTRA_ON_TARGET", expectedCellsExtra,
+      counters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue());
+    assertEquals("CELLS_DIFFERENT_ON_TARGET", expectedCellsDifferent,
+      counters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue());
+    assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair,
+      counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue());
+  }
+
+  /**
+   * Pins the chunk- and mapper-level repair-status counters. Complements
+   * {@link #validateMapperCountersRepair} (which omits chunk-level counters) for tests that
+   * need to assert both layers.
+   */
+  private void assertRepairChunkAndMapperCounters(Counters counters, long expectedChunksRepaired,
+    long expectedChunksRepairFailed, long expectedMappersRepaired, long expectedMappersUnrepairable,
+    long expectedMappersRepairFailed) {
+    assertEquals("CHUNKS_REPAIRED", expectedChunksRepaired,
+      counters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue());
+    assertEquals("CHUNKS_REPAIR_FAILED", expectedChunksRepairFailed,
+      counters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue());
+    assertEquals("MAPPERS_REPAIRED", expectedMappersRepaired,
+      counters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue());
+    assertEquals("MAPPERS_UNREPAIRABLE", expectedMappersUnrepairable,
+      counters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue());
+    assertEquals("MAPPERS_REPAIR_FAILED", expectedMappersRepairFailed,
+      counters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue());
+  }
+
+  /**
+   * Pins the row-level repair drift counters. Mirror of {@link #assertRepairCellCounters} for
+   * tests that need to assert whole-row outcomes (missing, extra, unrepairable).
+   */
+  private void assertRepairRowCounters(Counters counters, long expectedRowsMissing,
+    long expectedRowsExtra, long expectedRowsCannotRepair) {
+    assertEquals("ROWS_MISSING_ON_TARGET", expectedRowsMissing,
+      counters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue());
+    assertEquals("ROWS_EXTRA_ON_TARGET", expectedRowsExtra,
+      counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue());
+    assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair,
+      counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue());
+  }
+
+  /**
+   * Builds DDL for a "repair test" table that uses {@code COLUMN_ENCODED_BYTES=NONE} so column
+   * qualifiers on disk match the SQL column name verbatim. This lets cell-level test helpers
+   * inject raw HBase Puts/Deletes against {@code (cf=0, q=NAME)} or {@code (cf=0, q=NAME_VALUE)}
+   * without computing encoded qualifier bytes.
+   *
+   * <p>Set {@code maxVersions > 1} when the test exercises hidden-version unwinding.
+   */
+  private String buildRepairTestTableDdl(String tableName, boolean withReplication, int maxVersions,
+    String splitPoints) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("CREATE TABLE IF NOT EXISTS ").append(tableName).append(" (\n")
+      .append("    ID INTEGER NOT NULL PRIMARY KEY,\n").append("    NAME VARCHAR(50),\n")
+      .append("    NAME_VALUE BIGINT,\n").append("    UPDATED_DATE TIMESTAMP\n")
+      .append(") COLUMN_ENCODED_BYTES=NONE, UPDATE_CACHE_FREQUENCY=0");
+    if (withReplication) {
+      sb.append(", REPLICATION_SCOPE=1");
+    } else {
+      sb.append(", REPLICATION_SCOPE=0");
+    }
+    if (maxVersions > 1) {
+      sb.append(", VERSIONS=").append(maxVersions);
+    }
+    if (splitPoints != null && !splitPoints.isEmpty()) {
+      sb.append(" SPLIT ON (").append(splitPoints).append(")");
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Creates the same {@link #buildRepairTestTableDdl} schema on both source and target clusters.
+   * Used by repair tests that bypass replication and seed the two clusters separately.
+   *
+   * <p>Returns a wall-clock anchor in milliseconds. SCN-bound connections must use timestamps
+   * &ge; the anchor, otherwise an SCN below the table's CREATE-TABLE timestamp surfaces as
+   * {@code TableNotFoundException}.
+   */
+  private long createRepairTestTableOnBothClusters(String tableName, int maxVersions,
+    String splitPoints) throws SQLException {
+    executeTableCreation(sourceConnection,
+      buildRepairTestTableDdl(tableName, false, maxVersions, splitPoints));
+    executeTableCreation(targetConnection,
+      buildRepairTestTableDdl(tableName, false, maxVersions, splitPoints));
+    // Wait until the wall clock advances at least one millisecond past the CREATE TABLE
+    // timestamp so any caller-chosen SCN >= the returned anchor is guaranteed to be above the
+    // table's metadata row.
+    long anchor = System.currentTimeMillis() + 1;
+    while (System.currentTimeMillis() < anchor) {
+      // spin
+    }
+    return anchor;
+  }
+
+  /**
+   * Returns a Phoenix connection bound to {@code scnTimestamp} via
+   * {@link PhoenixRuntime#CURRENT_SCN_ATTRIB}. UPSERTs through this connection write cells with
+   * {@code timestamp == scnTimestamp}, giving tests precise control over cell timestamps without
+   * needing to construct raw HBase Puts.
+   */
+  private Connection openConnectionAtScn(String zkUrl, long scnTimestamp) throws SQLException {
+    Properties props = new Properties();
+    props.setProperty(PhoenixRuntime.CURRENT_SCN_ATTRIB, Long.toString(scnTimestamp));
+    return DriverManager.getConnection("jdbc:phoenix:" + zkUrl, props);
+  }
+
+  /**
+   * Resolves the HBase {@link Table} backing a Phoenix table for a given Phoenix
+   * {@link Connection}. Used by raw-cell helpers that need to bypass Phoenix and write cells at
+   * exact (cf, q, ts) coordinates.
+   */
+  private Table getHBaseTable(Connection phoenixConn, String phoenixTableName) throws Exception {
+    PhoenixConnection pConn = phoenixConn.unwrap(PhoenixConnection.class);
+    byte[] physicalName = pConn.getTable(phoenixTableName).getPhysicalName().getBytes();
+    return pConn.getQueryServices().getTable(physicalName);
+  }
+
+  /**
+   * Writes a single raw {@link Put} cell at the given {@code (rowKey, family, qualifier, ts,
+   * value)} coordinates, bypassing Phoenix's UPSERT path. Tests use this to plant historical
+   * versions or specific timestamps that Phoenix wouldn't naturally produce.
+   */
+  private void writeRawCell(Connection phoenixConn, String tableName, byte[] rowKey, String family,
+    String qualifier, long ts, byte[] value) throws Exception {
+    try (Table hTable = getHBaseTable(phoenixConn, tableName)) {
+      Put put = new Put(rowKey);
+      put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), ts, value);
+      hTable.put(put);
+    }
+  }
+
+  /**
+   * Plants a raw point-{@link Delete} (single-version tombstone) at {@code (rowKey, family,
+   * qualifier, ts)}. Equivalent to HBase's {@code Delete.addColumn(family, qualifier, ts)}.
+   */
+  private void writeRawPointDelete(Connection phoenixConn, String tableName, byte[] rowKey,
+    String family, String qualifier, long ts) throws Exception {
+    try (Table hTable = getHBaseTable(phoenixConn, tableName)) {
+      Delete del = new Delete(rowKey);
+      del.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), ts);
+      hTable.delete(del);
+    }
+  }
+
+  /**
+   * Plants a raw {@link Delete#addColumns} (DeleteColumn — covers all versions at {@code ts <=
+   * markerTs}) at {@code (rowKey, family, qualifier)}. Used by shadow-detection tests where a
+   * future source Put at {@code ts <= markerTs} must be suppressed.
+   */
+  private void writeRawDeleteColumn(Connection phoenixConn, String tableName, byte[] rowKey,
+    String family, String qualifier, long markerTs) throws Exception {
+    try (Table hTable = getHBaseTable(phoenixConn, tableName)) {
+      Delete del = new Delete(rowKey);
+      del.addColumns(Bytes.toBytes(family), Bytes.toBytes(qualifier), markerTs);
+      hTable.delete(del);
+    }
+  }
+
+  /**
+   * Returns the row-key bytes Phoenix uses for an INTEGER primary key value, matching the
+   * encoding used by {@code splitTableAt}.
+   */
+  private static byte[] integerRowKey(int id) {
+    return PInteger.INSTANCE.toBytes(id);
+  }
+
+  /**
+   * Returns a fresh {@link Configuration} clone of the source cluster with a custom
+   * {@link PhoenixSyncTableTool#PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE} setting baked in. Used by
+   * the mid-row-flush boundary test.
+   */
+  private static Configuration sourceClusterConfWithRepairBatchSize(int repairBatchSize) {
+    Configuration conf = sourceClusterConf();
+    conf.setInt(PhoenixSyncTableTool.PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE, repairBatchSize);
+    return conf;
+  }
+
   /**
    * Validates sync counters with exact source/target rows and minimum chunk thresholds. Use this
    * when chunk counts may vary but should be at least certain values.
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
index 673b3f0cbc0..b9f7e893a58 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
@@ -20,7 +20,6 @@
 import static org.apache.phoenix.hbase.index.IndexRegionObserver.PHOENIX_INDEX_CDC_CONSUMER_ENABLED;
 import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
 import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
-import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS;
 import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
 import static org.apache.phoenix.query.QueryServices.CLIENT_CONNECTION_MAX_ALLOWED_CONNECTIONS;
 import static org.apache.phoenix.query.QueryServices.CONNECTION_QUERY_SERVICE_METRICS_ENABLED;
@@ -228,27 +227,6 @@ public void testMultipleCQSIMetricsInParallel() throws Exception {
     assertEquals("Number of passing CSQI Metrics check should be : ", 4, counter.get());
   }
 
-  @Test
-  public void testConnectionTime() {
-    Map<String, List<ConnectionQueryServicesMetric>> metrics =
-        ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
-    List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
-    assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics);
-
-    // Find connection creation time metric
-    boolean foundMetric = false;
-    for (ConnectionQueryServicesMetric metric : serviceMetrics) {
-      System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue());
-      if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) {
-        assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0);
-        foundMetric = true;
-        break;
-      }
-    }
-    assertTrue("Connection creation time metric not found", foundMetric);
-
-  }
-
   private void checkConnectionQueryServiceMetricsValues(String queryServiceName) throws Exception {
     String CREATE_TABLE_DDL =
       "CREATE TABLE IF NOT EXISTS %s (K VARCHAR(10) NOT NULL" + " PRIMARY KEY, V VARCHAR)";
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
index 2c2b05c2c6b..91ad3941746 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
@@ -752,6 +752,233 @@ public void testParseCounterValueNull() {
     assertEquals(0L, row.getTargetRowsProcessed());
   }
 
+  @Test
+  public void testGetProcessedMapperRegionsRepairModeFiltersByStatus() throws Exception {
+    String tableName = generateUniqueName();
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    // One region per Status, distinguished by start key so result ordering is deterministic.
+    Status[] statuses = new Status[] { Status.VERIFIED, Status.MISMATCHED, Status.REPAIRED,
+      Status.UNREPAIRABLE, Status.REPAIR_FAILED };
+    for (int i = 0; i < statuses.length; i++) {
+      byte[] startKey = Bytes.toBytes(String.format("region%02d_start", i));
+      byte[] endKey = Bytes.toBytes(String.format("region%02d_end", i));
+      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+        .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION)
+        .setFromTime(0L).setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey)
+        .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp)
+        .setExecutionEndTime(timestamp).build());
+    }
+
+    // Repair mode should skip only fully-done regions (VERIFIED + REPAIRED) so the mapper
+    // re-processes MISMATCHED/UNREPAIRABLE/REPAIR_FAILED on the next run.
+    List<PhoenixSyncTableCheckpointOutputRow> repairResults =
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, false);
+    assertEquals("Repair mode should return only VERIFIED + REPAIRED regions", 2,
+      repairResults.size());
+    assertArrayEquals("First should be region00 (VERIFIED)",
+      Bytes.toBytes("region00_start"), repairResults.get(0).getStartRowKey());
+    assertArrayEquals("Second should be region02 (REPAIRED)",
+      Bytes.toBytes("region02_start"), repairResults.get(1).getStartRowKey());
+  }
+
+  @Test
+  public void testGetProcessedMapperRegionsDryRunReturnsAllStatuses() throws Exception {
+    String tableName = generateUniqueName();
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    Status[] statuses = new Status[] { Status.VERIFIED, Status.MISMATCHED, Status.REPAIRED,
+      Status.UNREPAIRABLE, Status.REPAIR_FAILED };
+    for (int i = 0; i < statuses.length; i++) {
+      byte[] startKey = Bytes.toBytes(String.format("region%02d_start", i));
+      byte[] endKey = Bytes.toBytes(String.format("region%02d_end", i));
+      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+        .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION)
+        .setFromTime(0L).setToTime(1000L).setIsDryRun(true).setStartRowKey(startKey)
+        .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp)
+        .setExecutionEndTime(timestamp).build());
+    }
+
+    // Dry-run mode does not filter by status; resume should skip every region the previous
+    // dry-run pass touched, regardless of its outcome.
+    List<PhoenixSyncTableCheckpointOutputRow> dryRunResults =
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true);
+    assertEquals("Dry-run mode should return all statuses", statuses.length, dryRunResults.size());
+  }
+
+  @Test
+  public void testGetProcessedChunksRepairModeFiltersByStatus() throws Exception {
+    String tableName = generateUniqueName();
+    // Mapper region bracket — chunkStart must be <= mapperEnd AND chunkEnd must be >=
+    // mapperStart for a chunk to overlap. Use 'a' < chunk* < 'z' so the boundary check is
+    // satisfied for every chunk and only the STATUS filter decides what comes back.
+    byte[] mapperStart = Bytes.toBytes("aaaa");
+    byte[] mapperEnd = Bytes.toBytes("zzzz");
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    Status[] statuses = new Status[] { Status.VERIFIED, Status.MISMATCHED, Status.REPAIRED,
+      Status.UNREPAIRABLE, Status.REPAIR_FAILED };
+    for (int i = 0; i < statuses.length; i++) {
+      byte[] startKey = Bytes.toBytes(String.format("chunk%02d_start", i));
+      byte[] endKey = Bytes.toBytes(String.format("chunk%02d_end", i));
+      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+        .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK)
+        .setFromTime(0L).setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey)
+        .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp)
+        .setExecutionEndTime(timestamp).build());
+    }
+
+    List<PhoenixSyncTableCheckpointOutputRow> repairResults = repository.getProcessedChunks(
+      tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, false);
+    assertEquals("Repair mode should return only VERIFIED + REPAIRED chunks", 2,
+      repairResults.size());
+    assertArrayEquals("First should be chunk00 (VERIFIED)",
+      Bytes.toBytes("chunk00_start"), repairResults.get(0).getStartRowKey());
+    assertArrayEquals("Second should be chunk02 (REPAIRED)",
+      Bytes.toBytes("chunk02_start"), repairResults.get(1).getStartRowKey());
+  }
+
+  @Test
+  public void testGetProcessedChunksDryRunReturnsAllStatuses() throws Exception {
+    String tableName = generateUniqueName();
+    byte[] mapperStart = Bytes.toBytes("aaaa");
+    byte[] mapperEnd = Bytes.toBytes("zzzz");
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    Status[] statuses = new Status[] { Status.VERIFIED, Status.MISMATCHED, Status.REPAIRED,
+      Status.UNREPAIRABLE, Status.REPAIR_FAILED };
+    for (int i = 0; i < statuses.length; i++) {
+      byte[] startKey = Bytes.toBytes(String.format("chunk%02d_start", i));
+      byte[] endKey = Bytes.toBytes(String.format("chunk%02d_end", i));
+      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+        .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK)
+        .setFromTime(0L).setToTime(1000L).setIsDryRun(true).setStartRowKey(startKey)
+        .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp)
+        .setExecutionEndTime(timestamp).build());
+    }
+
+    List<PhoenixSyncTableCheckpointOutputRow> dryRunResults = repository.getProcessedChunks(
+      tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
+    assertEquals("Dry-run mode should return all statuses", statuses.length, dryRunResults.size());
+  }
+
+  @Test
+  public void testCounterFormatterFormatChunkRoundTrip() throws Exception {
+    String tableName = generateUniqueName();
+    byte[] startKey = Bytes.toBytes("row1");
+    byte[] endKey = Bytes.toBytes("row100");
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    // Args: sourceRows, targetRows, rowsMissing, rowsExtra, rowsDifferent, rowsCannotRepair,
+    // cellsMissing, cellsExtra, cellsDifferent.
+    String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatChunk(100L, 95L,
+      3L, 2L, 6L, 1L, 7L, 5L, 4L);
+
+    repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L)
+      .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(endKey)
+      .setStatus(Status.REPAIRED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp)
+      .setCounters(counters).build());
+
+    String query = "SELECT COUNTERS FROM "
+      + PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME + " WHERE TABLE_NAME = ?";
+    String stored;
+    try (java.sql.PreparedStatement ps = connection.prepareStatement(query)) {
+      ps.setString(1, tableName);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        stored = rs.getString("COUNTERS");
+      }
+    }
+
+    // Stored COUNTERS string should pin every counter to the exact value from formatChunk.
+    assertEquals(counters, stored);
+    assertTrue("COUNTERS should contain SOURCE_ROWS_PROCESSED=100",
+      stored.contains("SOURCE_ROWS_PROCESSED=100"));
+    assertTrue("COUNTERS should contain TARGET_ROWS_PROCESSED=95",
+      stored.contains("TARGET_ROWS_PROCESSED=95"));
+    assertTrue("COUNTERS should contain ROWS_MISSING_ON_TARGET=3",
+      stored.contains("ROWS_MISSING_ON_TARGET=3"));
+    assertTrue("COUNTERS should contain ROWS_EXTRA_ON_TARGET=2",
+      stored.contains("ROWS_EXTRA_ON_TARGET=2"));
+    assertTrue("COUNTERS should contain ROWS_DIFFERENT_ON_TARGET=6",
+      stored.contains("ROWS_DIFFERENT_ON_TARGET=6"));
+    assertTrue("COUNTERS should contain ROWS_CANNOT_REPAIR=1",
+      stored.contains("ROWS_CANNOT_REPAIR=1"));
+    assertTrue("COUNTERS should contain CELLS_MISSING_ON_TARGET=7",
+      stored.contains("CELLS_MISSING_ON_TARGET=7"));
+    assertTrue("COUNTERS should contain CELLS_EXTRA_ON_TARGET=5",
+      stored.contains("CELLS_EXTRA_ON_TARGET=5"));
+    assertTrue("COUNTERS should contain CELLS_DIFFERENT_ON_TARGET=4",
+      stored.contains("CELLS_DIFFERENT_ON_TARGET=4"));
+
+    // Public parse helpers should round-trip the source/target row counts.
+    PhoenixSyncTableCheckpointOutputRow parsed = new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setStartRowKey(startKey).setCounters(counters).build();
+    assertEquals(100L, parsed.getSourceRowsProcessed());
+    assertEquals(95L, parsed.getTargetRowsProcessed());
+  }
+
+  @Test
+  public void testCounterFormatterFormatMapperRoundTrip() throws Exception {
+    String tableName = generateUniqueName();
+    byte[] startKey = Bytes.toBytes("region_start");
+    byte[] endKey = Bytes.toBytes("region_end");
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    // Args: chunksVerified, chunksMismatched, sourceRows, targetRows, rowsMissing, rowsExtra,
+    // rowsDifferent, rowsCannotRepair, cellsMissing, cellsExtra, cellsDifferent.
+    String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatMapper(8L, 2L,
+      500L, 480L, 12L, 9L, 4L, 3L, 25L, 18L, 7L);
+
+    repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L)
+      .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(endKey)
+      .setStatus(Status.REPAIRED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp)
+      .setCounters(counters).build());
+
+    String query = "SELECT COUNTERS FROM "
+      + PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME + " WHERE TABLE_NAME = ?";
+    String stored;
+    try (java.sql.PreparedStatement ps = connection.prepareStatement(query)) {
+      ps.setString(1, tableName);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        stored = rs.getString("COUNTERS");
+      }
+    }
+
+    // Stored COUNTERS string should pin every counter to the exact value from formatMapper.
+    assertEquals(counters, stored);
+    assertTrue("COUNTERS should contain CHUNKS_VERIFIED=8", stored.contains("CHUNKS_VERIFIED=8"));
+    assertTrue("COUNTERS should contain CHUNKS_MISMATCHED=2",
+      stored.contains("CHUNKS_MISMATCHED=2"));
+    assertTrue("COUNTERS should contain SOURCE_ROWS_PROCESSED=500",
+      stored.contains("SOURCE_ROWS_PROCESSED=500"));
+    assertTrue("COUNTERS should contain TARGET_ROWS_PROCESSED=480",
+      stored.contains("TARGET_ROWS_PROCESSED=480"));
+    assertTrue("COUNTERS should contain ROWS_MISSING_ON_TARGET=12",
+      stored.contains("ROWS_MISSING_ON_TARGET=12"));
+    assertTrue("COUNTERS should contain ROWS_EXTRA_ON_TARGET=9",
+      stored.contains("ROWS_EXTRA_ON_TARGET=9"));
+    assertTrue("COUNTERS should contain ROWS_DIFFERENT_ON_TARGET=4",
+      stored.contains("ROWS_DIFFERENT_ON_TARGET=4"));
+    assertTrue("COUNTERS should contain ROWS_CANNOT_REPAIR=3",
+      stored.contains("ROWS_CANNOT_REPAIR=3"));
+    assertTrue("COUNTERS should contain CELLS_MISSING_ON_TARGET=25",
+      stored.contains("CELLS_MISSING_ON_TARGET=25"));
+    assertTrue("COUNTERS should contain CELLS_EXTRA_ON_TARGET=18",
+      stored.contains("CELLS_EXTRA_ON_TARGET=18"));
+    assertTrue("COUNTERS should contain CELLS_DIFFERENT_ON_TARGET=7",
+      stored.contains("CELLS_DIFFERENT_ON_TARGET=7"));
+
+    // Public parse helpers should round-trip the source/target row counts.
+    PhoenixSyncTableCheckpointOutputRow parsed = new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setStartRowKey(startKey).setCounters(counters).build();
+    assertEquals(500L, parsed.getSourceRowsProcessed());
+    assertEquals(480L, parsed.getTargetRowsProcessed());
+  }
+
   @Test
   public void testCheckpointMapperRegionWithTenantId() throws Exception {
     String tableName = generateUniqueName();
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
index 039b7be051f..72c38816d84 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
@@ -101,27 +101,6 @@ public void testHistogramMetricsForOpenPhoenixConnectionCounter() {
     }
   }
 
-  @Test
-  public void testConnectionTime() {
-    Map<String, List<ConnectionQueryServicesMetric>> metrics =
-        ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics();
-    List<ConnectionQueryServicesMetric> serviceMetrics = metrics.get("DEFAULT_CQSN");
-    assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics);
-
-    // Find connection creation time metric
-    boolean foundMetric = false;
-    for (ConnectionQueryServicesMetric metric : serviceMetrics) {
-      System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue());
-      if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) {
-        assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0);
-        foundMetric = true;
-        break;
-      }
-    }
-    assertTrue("Connection creation time metric not found", foundMetric);
-
-  }
-
   private void updateMetricsAndHistogram(long counter, String connectionQueryServiceName) {
     ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName,
       OPEN_PHOENIX_CONNECTIONS_COUNTER, counter);

From 9955587e0d665890155e2171f489aa103976e2bc Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Sat, 6 Jun 2026 00:48:41 +0530
Subject: [PATCH 11/18] checkpointing code comment refactoring

---
 .../ConnectionQueryServicesMetrics.java       |   5 +-
 .../PhoenixSyncTableChunkRepairer.java        | 261 +++++++-------
 .../end2end/PhoenixSyncTableToolIT.java       | 321 +++++++++++++++++-
 3 files changed, 457 insertions(+), 130 deletions(-)

diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
index c87e9ad7984..49110d9d7ed 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
@@ -17,6 +17,10 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
+import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
+
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -26,7 +30,6 @@
 import org.apache.phoenix.monitoring.ConnectionQueryServicesMetricImpl;
 import org.apache.phoenix.monitoring.MetricType;
 
-import static org.apache.phoenix.monitoring.MetricType.*;
 
 /**
  * Class for Connection Query Service Metrics.
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
index 4afdbcf9130..3bda5364696 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
@@ -375,16 +375,12 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta
     CellComparator comparator = CellComparator.getInstance();
     RowRepairBuffer rowRepairBuffer = new RowRepairBuffer(sourceResult.getRow());
 
-    // Per-column max source PUT timestamp; consumed by tombstoneTargetCell (target-extra
-    // cell) to pick the delete shape. Three cases:
-    //   key absent              → DeleteColumn at target's ts (also wipes older hidden versions)
-    //   sourceMaxTs >= targetTs → point Delete at target's ts
-    //   sourceMaxTs <  targetTs → point Delete at target's ts, PLUS a point Delete for every
-    //                             hidden target Put whose ts ∈ (sourceMaxTs, targetTs); without
-    //                             these, the next hidden Put surfaces above source's mirror
-    //                             once target's visible cell is shadowed.
-    // Math::max collapses multi-version source, so case 3 only fires when target sits above
-    // ALL source versions at this column. See tombstoneTargetCell() for the application.
+    // Per-column max source PUT timestamp; consumed by tombstoneTargetCell to pick the
+    // delete shape for a target-extra cell — see its javadoc for the three cases.
+    // Math::max collapses source's multi-version cells into a single Long per column so
+    // the comparison against target's ts is a scalar check.
+    //
+    // Example: source has Put(NAME)@300 and Put(NAME)@200 → sourceMaxTsByColumn[NAME]=300.
     Map<ColumnKey, Long> sourceMaxTsByColumn = new HashMap<>();
     for (Cell sourceCell : sourceCells) {
       if (!CellUtil.isDelete(sourceCell)) {
@@ -487,18 +483,41 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
    * Tombstones a target-only cell to make target's read view at this column match source's.
    * Skips cells that are themselves already tombstones.
    *
-   * <p>Tombstone subtype depends on what source has at this {@code (cf, q)}:
-   * <ul>
-   *   <li><b>Source has no cell at this column</b> — use {@link Delete#addColumns} (DeleteColumn,
-   *       scope {@code ts <= T}) so even max-versions-hidden older target versions are
-   *       shadowed.</li>
-   *   <li><b>Source's max ts at this column is &gt;= target's ts</b> — point-Delete only
-   *       target's exact ts.</li>
-   *   <li><b>Source's max ts at this column is &lt; target's ts</b> — point-Delete target's
-   *       ts, AND point-Delete every max-versions-hidden Put on target with ts in
-   *       {@code (sourceMaxTs, targetTs)}. Otherwise after target's visible cell is
-   *       shadowed, the next hidden version surfaces above source's mirror.</li>
-   * </ul>
+   * <p>Called only when source has no cell at this target cell's exact
+   * {@code (cf, q, ts)}. If source does have a cell at the same {@code (cf, q, ts)},
+   * the caller takes the mirroring path instead
+   *
+   * <p>Tombstone subtype depends on what source has at this {@code (cf, q)}. Examples
+   * assume {@code MAX_VERSIONS=3} and show only the relevant column.
+   *
+   * <p><b>Case 1 — Source has no cell at this column:</b>
+   * <pre>
+   *   source row: (no NAME)
+   *   target row: Put(NAME, "carol")@900 visible
+   *               Put(NAME, "bob")  @600 hidden
+   *   action    : DeleteColumn(NAME)@900   (covers ts <= 900, wipes "bob" too)
+   *   result    : target reads no NAME — matches source.
+   * </pre>
+   *
+   * <p><b>Case 2 — {@code sourceMaxTs >= targetTs}:</b>
+   * <pre>
+   *   source row: Put(NAME, "alice")@500       (sourceMaxTs = 500)
+   *   target row: Put(NAME, "old",  )@200      (input cell; source has nothing at @200)
+   *   action    : point-Delete(NAME)@200
+   *   result    : "old"@200 is shadowed;
+   *              Put(NAME, "alice")@500 would already have been mirrored
+   * </pre>
+   *
+   * <p><b>Case 3 — {@code sourceMaxTs < targetTs}:</b>
+   * <pre>
+   *   source row: Put(NAME, "alice")@300       (sourceMaxTs = 300)
+   *   target row: Put(NAME, "carol")@900 visible
+   *               Put(NAME, "bob")  @600 hidden
+   *               Put(NAME, "alice")@300 hidden
+   *   action    : point-Delete(NAME)@900 + point-Delete(NAME)@600
+   *               (without the second, "bob"@600 surfaces above source's mirror)
+   *   result    : target's "alice"@300 is the highest live version — matches source.
+   * </pre>
    *
    * @return true if the cell was a live cell that contributed a tombstone marker, false if
    *         the cell was already a tombstone and was skipped.
@@ -527,54 +546,6 @@ private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairBuff
     return true;
   }
 
-  /**
-   * Loads target's per-row record with a single raw, all-versions scan. Captures both
-   * tombstones and Puts:
-   * <ul>
-   *   <li><b>Tombstones</b> drive shadow detection — would a source Put we mirror be
-   *       suppressed by an existing target tombstone on read? e.g. tgt has
-   *       DeleteColumn(NAME)@900, src wants Put(NAME, "alice")@500. Mirror would land
-   *       on disk but stay invisible because DeleteColumn@900 covers ts &lt;= 900.
-   *       Detect upfront, skip the doomed write, flag the row unrepairable.</li>
-   *   <li><b>Puts</b> drive hidden-version discovery — when target's visible Put is at a
-   *       higher ts than source's mirror, max-versions-filtered older Puts may sit between
-   *       the two and surface above source's mirror after we shadow the visible one. We
-   *       need their timestamps so we can point-Delete each. e.g. tgt has Put(NAME,
-   *       "carol")@T2 visible plus Put(NAME, "bob")@T1 hidden under MAX_VERSIONS=2; src
-   *       has Put(NAME, "alice")@T0. Point-Deleting only T2 exposes T1 above source's T0
-   *       mirror — wrong; we must point-Delete T1 too so T0 surfaces.</li>
-   * </ul>
-   * raw=true and all-versions are forced regardless of user flags so tombstones and
-   * hidden Put versions that would otherwise be filtered out are surfaced.
-   *
-   * <p>Time range: lower bound stays at {@code fromTime} since cells below the window
-   * can't affect repair inside the window; upper bound is {@code MAX_VALUE} because a
-   * tombstone at {@code ts > toTime} can still shadow a Put we mirror at
-   * {@code ts in window} during application reads.
-   */
-  private TargetRowRecord loadTargetRowRecord(byte[] rowKey, Table targetHTable)
-    throws IOException {
-    Scan scan = new Scan();
-    scan.withStartRow(rowKey, true);
-    scan.withStopRow(rowKey, true);
-    scan.setRaw(true);
-    scan.readAllVersions();
-    scan.setCacheBlocks(false);
-    scan.setTimeRange(fromTime, Long.MAX_VALUE);
-    scan.setCaching(1);
-    scan.setLimit(1);
-    TargetRowRecord rowRecord = new TargetRowRecord();
-    try (ResultScanner scanner = targetHTable.getScanner(scan)) {
-      Result raw = scanner.next();
-      if (raw != null) {
-        for (Cell cell : raw.rawCells()) {
-          rowRecord.record(cell);
-        }
-      }
-    }
-    return rowRecord;
-  }
-
   /**
    * Builds a row-level HBase scan for repair. Honors the user's {@code --raw-scan} and
    * {@code --read-all-versions} flags; adds bulk caching plus Phoenix TTL /
@@ -649,10 +620,6 @@ private void flushRepairMutations(Table targetHTable, List<Put> puts, List<Delet
     deletes.clear();
   }
 
-  // ===========================================================================
-  // Public input/output value types
-  // ===========================================================================
-
   /**
    * Inputs to a chunk repair attempt. Source range is the chunk boundary; target range may
    * be wider so the repair scan sees the same cells (including extra-on-target rows between
@@ -702,20 +669,8 @@ public ChunkRepairRequest(byte[] sourceStart, byte[] sourceEnd, byte[] targetSta
   public static final class ChunkRepairResult {
 
     public enum Status {
-      /** Every cell-level drift handled and no row landed in ROWS_CANNOT_REPAIR. */
       REPAIRED,
-      /**
-       * Repair completed but at least one row carries drift the tool cannot remove —
-       * a source-side Put was shadow-suppressed by an existing target tombstone, or
-       * target's row is entirely tombstones. Operator must major-compact target to
-       * reap shadowing tombstones, then re-run.
-       */
       UNREPAIRABLE,
-      /**
-       * Repair scan or batch flush threw {@link IOException}. Per-chunk fault isolation:
-       * mapper continues with the next chunk; the chunk re-enters as an unprocessed gap
-       * on the next invocation.
-       */
       REPAIR_FAILED
     }
 
@@ -776,29 +731,20 @@ public String toLogString() {
   }
 
   /**
-   * Per-row capture of target's tombstones AND Puts in {@code [fromTime, MAX_VALUE]},
-   * built lazily from a single raw single-row scan with all-versions enabled. Used in two
-   * roles:
+   * Per-row snapshot of target's tombstones and Puts. Two queries:
+   * {@link #wouldShadow} (shadow detection) and {@link #targetPutTimestampsBetween}
+   * (hidden-version discovery). For examples of how callers use these, see the
+   * doc on {@link RowRepairBuffer#targetRowRecord}; for scan shape and time-range
+   * rationale, see {@link #load}.
    *
-   * <ol>
-   *   <li><b>Shadow detection</b> ({@link #wouldShadow}): would a source Put we're about
-   *       to mirror be suppressed by an existing target tombstone?</li>
-   *   <li><b>Hidden-version discovery</b> ({@link #targetPutTimestampsBetween}): when
-   *       max-versions on target hides older Puts behind a newer visible one, those hidden
-   *       Puts surface on read the moment we shadow the visible one. We need their
-   *       timestamps so we can point-Delete each. e.g. src has NAME@T0="alice", tgt has
-   *       NAME@T2="carol" visible plus NAME@T1 hidden under max-versions. If we only
-   *       point-Delete T2, target reads return T1 — wrong. Discovering T1 lets us
-   *       point-Delete it too, so source's T0 mirror surfaces.</li>
-   * </ol>
-   *
-   * HBase has four tombstone subtypes, each with distinct shadow semantics:
-   *   Delete                — shadows a Put at {@code (cf, q, ts == T)} exactly
-   *   DeleteColumn          — shadows Puts at {@code (cf, q, ts <= T)}
-   *   DeleteFamily          — shadows Puts at {@code (cf, *, ts <= T)}
-   *   DeleteFamilyVersion   — shadows Puts at {@code (cf, *, ts == T)}
-   * {@link #wouldShadow(Cell)} consults all four tombstone maps and returns true on any
-   * match.
+   * <p>HBase has four tombstone subtypes; each is recorded into its own map because
+   * shadow scope differs:
+   * <pre>
+   *   Delete               shadows Put at (cf, q, ts == T) exactly
+   *   DeleteColumn         shadows Puts at (cf, q, ts &lt;= T)
+   *   DeleteFamily         shadows Puts at (cf, *, ts &lt;= T)
+   *   DeleteFamilyVersion  shadows Puts at (cf, *, ts == T)
+   * </pre>
    */
   static final class TargetRowRecord {
     private final Map<ColumnKey, Set<Long>> deletePointTs = new HashMap<>();
@@ -808,6 +754,47 @@ static final class TargetRowRecord {
     /** Per-column ts-ordered set of target's Put timestamps. */
     private final Map<ColumnKey, NavigableMap<Long, Boolean>> targetPutTs = new HashMap<>();
 
+    /**
+     * Builds a {@link TargetRowRecord} from a single-row HBase scan.
+     *
+     * <p><b>raw=true + all-versions</b> are forced regardless of user flags so tombstones
+     * and max-versions-filtered older Puts (the two things this record exists to capture)
+     * are surfaced.
+     *
+     * <p><b>Time range {@code [fromTime, MAX_VALUE]}</b>:
+     * <ul>
+     *   <li>Lower bound = {@code fromTime}: cells below the verify window can't affect
+     *       repair inside the window.</li>
+     *   <li>Upper bound = {@code MAX_VALUE} (NOT {@code toTime}): a tombstone at
+     *       {@code ts >= toTime} can still shadow a Put we mirror at {@code ts} in window
+     *       during application reads, so we must see it. e.g. window
+     *       {@code [0, 600)}, tgt has DeleteColumn@900, src wants Put@500 — without the
+     *       wide upper bound we'd miss the 900 tombstone and write a doomed mirror.</li>
+     * </ul>
+     */
+    static TargetRowRecord load(byte[] rowKey, Table targetHTable, long fromTime)
+      throws IOException {
+      Scan scan = new Scan();
+      scan.withStartRow(rowKey, true);
+      scan.withStopRow(rowKey, true);
+      scan.setRaw(true);
+      scan.readAllVersions();
+      scan.setCacheBlocks(false);
+      scan.setTimeRange(fromTime, Long.MAX_VALUE);
+      scan.setCaching(1);
+      scan.setLimit(1);
+      TargetRowRecord rowRecord = new TargetRowRecord();
+      try (ResultScanner scanner = targetHTable.getScanner(scan)) {
+        Result raw = scanner.next();
+        if (raw != null) {
+          for (Cell cell : raw.rawCells()) {
+            rowRecord.record(cell);
+          }
+        }
+      }
+      return rowRecord;
+    }
+
     void record(Cell cell) {
       if (CellUtil.isDelete(cell)) {
         recordTombstone(cell);
@@ -817,6 +804,12 @@ void record(Cell cell) {
       }
     }
 
+    /**
+     * Records one tombstone into its per-subtype map for {@link #wouldShadow} to query.
+     * {@code <=ts} delete subtypes ({@code DeleteColumn}, {@code DeleteFamily}) collapse to
+     * the max ts; exact-ts subtypes ({@code Delete}, {@code DeleteFamilyVersion})
+     * accumulate into a set.
+     */
     private void recordTombstone(Cell tombstone) {
       long ts = tombstone.getTimestamp();
       ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(tombstone));
@@ -834,7 +827,7 @@ private void recordTombstone(Cell tombstone) {
           deleteFamilyVersionTs.computeIfAbsent(family, k -> new HashSet<>()).add(ts);
           break;
         default:
-          // Caller filters via CellUtil.isDelete; non-tombstone cells should never reach here.
+          // Unreachable: caller filters via CellUtil.isDelete.
       }
     }
 
@@ -872,11 +865,11 @@ boolean wouldShadow(Cell sourcePut) {
      */
     Set<Long> targetPutTimestampsBetween(byte[] family, byte[] qualifier, long lowerExclusive,
       long upperExclusive) {
-      NavigableMap<Long, Boolean> tss = targetPutTs.get(new ColumnKey(family, qualifier));
-      if (tss == null) {
+      NavigableMap<Long, Boolean> putTimestamps = targetPutTs.get(new ColumnKey(family, qualifier));
+      if (putTimestamps == null) {
         return Collections.emptySet();
       }
-      return tss.subMap(lowerExclusive, false, upperExclusive, false).keySet();
+      return putTimestamps.subMap(lowerExclusive, false, upperExclusive, false).keySet();
     }
   }
 
@@ -940,25 +933,41 @@ Delete delete() {
     }
 
     /**
-     * Loads target's per-row record (tombstones + Puts) once per row, cached for the
-     * lifetime of this buffer. Two consumers:
+     * Returns the cached {@link TargetRowRecord} for this row, loading on first call via
+     * {@link TargetRowRecord#load} (one raw all-versions scan, time range
+     * {@code [fromTime, MAX_VALUE]}). Cache scope is the buffer's lifetime — i.e. the
+     * current row — so repeated cell-level lookups within the row pay one round-trip total.
+     *
+     * <p>Two consumers:
      *
-     * <p><b>Shadow detection</b> (used by {@link #mirrorSourceCell} via
-     * {@link TargetRowRecord#wouldShadow}). Time range is {@code [fromTime, MAX_VALUE]} —
-     * a target tombstone outside the user's verify window can still suppress a Put we
-     * mirror inside it. e.g. tgt has DeleteColumn@900, src wants Put@500, scan timeRange
-     * [0, 600). Put@500 is masked because DeleteColumn@900 covers ts &lt;= 900. Detecting
-     * this upfront lets repair skip the doomed write and flag the row unrepairable.
+     * <p><b>Shadow detection</b> — {@link #mirrorSourceCellUnlessShadowed} asks
+     * {@link TargetRowRecord#wouldShadow} before mirroring a source Put, to skip writes
+     * that target's existing tombstones would render invisible.
+     * <pre>
+     *   target row state: DeleteColumn(NAME)@T=900   (covers ts &lt;= 900)
+     *   source row state: Put(NAME, "alice")@T=500
+     *   wouldShadow(srcPut@500) → true
+     *   ⇒ skip mirror, mark row unrepairable; operator must major-compact target
+     * </pre>
      *
-     * <p><b>Hidden-version discovery</b> (used by {@link #tombstoneTargetCell} via
-     * {@link TargetRowRecord#targetPutTimestampsBetween}). When target's MAX_VERSIONS
-     * hides older Puts behind a newer visible one, point-Deleting only the visible ts
-     * exposes the hidden version above source's mirror. The record's all-versions raw
-     * scan surfaces those hidden timestamps so we can tombstone each.
+     * <p><b>Hidden-version discovery</b> — {@link #tombstoneTargetCell} asks
+     * {@link TargetRowRecord#targetPutTimestampsBetween} for max-versions-filtered Puts
+     * sitting between source's max ts and target's visible ts, so each can be point-Deleted
+     * before they surface above source's mirror.
+     * <pre>
+     *   target row state (MAX_VERSIONS=3):
+     *     Put(NAME, "carol")@T=900   visible
+     *     Put(NAME, "bob")  @T=600   hidden
+     *     Put(NAME, "alice")@T=300   hidden
+     *   source row state:
+     *     Put(NAME, "alice")@T=300   (sourceMaxTs=300)
+     *   targetPutTimestampsBetween(NAME, 300, 900) → {600}
+     *     point-Delete T=900 (visible) and T=600 (hidden) so T=300 surfaces
+     * </pre>
      */
     TargetRowRecord targetRowRecord(Table targetHTable) throws IOException {
       if (targetRowRecord == null) {
-        targetRowRecord = loadTargetRowRecord(rowKey, targetHTable);
+        targetRowRecord = TargetRowRecord.load(rowKey, targetHTable, fromTime);
       }
       return targetRowRecord;
     }
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
index 0dc84f5cf6c..5a9b0b79742 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
@@ -1932,7 +1932,7 @@ public void testRepairCellDifferentValue() throws Exception {
    * The repair scan honors {@code --to-time} and never sees the tombstone in the diff window, so
    * the diff routes to {@code mirrorWholeRow} in
    * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer}. Inside,
-   * {@code loadTargetRowRecord} deliberately uses {@code (fromTime, MAX_VALUE)} (line 487) — so it
+   * {@code TargetRowRecord.load} deliberately uses {@code (fromTime, MAX_VALUE)} — so it
    * still sees the T=600 tombstone and {@code wouldShadow} returns true on Source's Put@T=200
    * (DeleteColumn covers ts &lt;= T=600). Result: source's mirror is suppressed, the row is
    * flagged unrepairable.
@@ -1953,8 +1953,8 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception {
         "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
       scnSrc.commit();
     }
-    // Target has a tombstone strictly above --to-time. Diff scan won't see it; loadTargetRowRecord
-    // still will.
+    // Target has a tombstone strictly above --to-time. Diff scan won't see it;
+    // TargetRowRecord.load still will.
     writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME",
       tombstoneTs);
 
@@ -1990,6 +1990,292 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception {
     }
   }
 
+  /**
+   * Shadow detection via {@code DeleteFamily}: target has a {@code DeleteFamily} tombstone on
+   * cf {@code "0"} covering every qualifier in the family at {@code ts <= tombstoneTs}. Source
+   * has every cell of the row inside the diff window at {@code ts < tombstoneTs}, so each
+   * source cell would be shadowed if mirrored. Drives the
+   * {@code TargetRowRecord.deleteFamilyUpperBound} branch in {@code wouldShadow} — uncovered
+   * by other shadow ITs which only exercise {@code DeleteColumn}.
+   *
+   * <p>To force the {@code cmp < 0} (whole-row mirror) path, the tombstone is planted strictly
+   * above {@code --to-time} so the diff scan does not see target's row at all, but
+   * {@code TargetRowRecord.load} (range {@code [fromTime, MAX_VALUE]}) still surfaces it.
+   */
+  @Test
+  public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+
+    final long fromTime = 0L;
+    final long sourceTs = base + 1L;
+    final long toTime = base + 2L;
+    final long familyTombstoneTs = base + 3L;
+
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
+      scnSrc.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+      scnSrc.commit();
+    }
+    // DeleteFamily on cf "0" — covers every qualifier (NAME, NAME_VALUE, _0, ...) at
+    // ts <= familyTombstoneTs. Planted strictly above --to-time so the diff scan can't see it.
+    writeRawDeleteFamily(targetConnection, uniqueTableName, integerRowKey(rowId), "0",
+      familyTombstoneTs);
+
+    while (System.currentTimeMillis() <= familyTombstoneTs) {
+      // spin
+    }
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan");
+    assertTrue("Repair should succeed (shadowing is correctness-only)",
+      result.repairJob.isSuccessful());
+
+    Counters c = result.repairJob.getCounters();
+    // Both source cells (NAME and _0) live at sourceTs in cf "0"; DeleteFamily covers the whole
+    // family at ts <= familyTombstoneTs (sourceTs < familyTombstoneTs), so every mirror is
+    // suppressed → mirrorWholeRow returns FULLY_SHADOWED → rowsMissing stays 0,
+    // rowsCannotRepair++.
+    assertRepairRowCounters(c, 0, 0, 1);
+    assertTrue("At least one mapper should roll up to UNREPAIRABLE",
+      c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1);
+
+    // Post-repair: target should still have no visible row — every source cell was suppressed.
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertFalse("Row should not be visible on target — DeleteFamily covered every source cell",
+          rs.next());
+      }
+    }
+  }
+
+  /**
+   * Shadow detection via {@code DeleteFamilyVersion}: target has a {@code DeleteFamilyVersion}
+   * tombstone on cf {@code "0"} at exactly {@code sourceTs}. Source's cells at the same ts get
+   * shadowed because {@code DeleteFamilyVersion} matches every qualifier in the family at the
+   * exact ts. Drives the {@code TargetRowRecord.deleteFamilyVersionTs} branch in
+   * {@code wouldShadow} — also uncovered prior to this test.
+   *
+   * <p>{@code DeleteFamilyVersion} requires ts equality (not inequality) so the
+   * tombstone-above-{@code toTime} trick used in the {@code DeleteColumn}/{@code DeleteFamily}
+   * shadow tests doesn't apply here. Instead we omit {@code --raw-scan}: target has no live
+   * cells (just the tombstone), so without raw mode the diff scan sees target as empty,
+   * routing to the {@code cmp < 0} mirrorWholeRow path; {@code TargetRowRecord.load} runs
+   * raw internally and surfaces the tombstone for shadow detection.
+   */
+  @Test
+  public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+
+    final long fromTime = 0L;
+    final long sourceTs = base + 1L;
+
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
+      scnSrc.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+      scnSrc.commit();
+    }
+    // DeleteFamilyVersion on cf "0" at exactly sourceTs — covers every qualifier in the family
+    // at ts == sourceTs. Source's NAME and _0 cells, both Put at sourceTs, are shadow targets.
+    writeRawDeleteFamilyVersion(targetConnection, uniqueTableName, integerRowKey(rowId), "0",
+      sourceTs);
+
+    while (System.currentTimeMillis() <= sourceTs) {
+      // spin
+    }
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()));
+    assertTrue("Repair should succeed (shadowing is correctness-only)",
+      result.repairJob.isSuccessful());
+
+    Counters c = result.repairJob.getCounters();
+    assertRepairRowCounters(c, 0, 0, 1);
+    assertTrue("At least one mapper should roll up to UNREPAIRABLE",
+      c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1);
+
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertFalse("Row should not be visible on target — DeleteFamilyVersion shadowed every "
+          + "source cell at sourceTs", rs.next());
+      }
+    }
+  }
+
+  /**
+   * Multi-hidden-version unwinding: extends {@link #testRepairUnwindsHiddenTargetVersions}
+   * with TWO max-versions-hidden Puts beneath target's visible Put. Pins
+   * {@code targetPutTimestampsBetween} (in {@link
+   * org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.TargetRowRecord})
+   * end-to-end: when {@code sourceMaxTs < target visible ts}, the repairer must point-Delete
+   * the visible ts AND every hidden Put in {@code (sourceMaxTs, target visible ts)} —
+   * otherwise after we shadow the visible cell, a hidden Put surfaces above source's mirror.
+   */
+  @Test
+  public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 3, "3, 7");
+
+    final long fromTime = 0L;
+    final long sourceTs = base + 1L;
+    final long targetT1 = base + 2L;
+    final long targetT2 = base + 3L;
+    final long targetT3 = base + 4L;
+
+    byte[] rowKey = integerRowKey(rowId);
+    String family = "0";
+    String qualifier = "NAME";
+
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
+      scnSrc.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+      scnSrc.commit();
+    }
+
+    // Three target NAME versions, all retained under VERSIONS=3:
+    //   T1 "bob" (hidden), T2 "carol" (hidden), T3 "dave" (visible).
+    try (Connection scnTgtT1 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT1)) {
+      scnTgtT1.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')");
+      scnTgtT1.commit();
+    }
+    try (Connection scnTgtT2 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT2)) {
+      scnTgtT2.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')");
+      scnTgtT2.commit();
+    }
+    try (Connection scnTgtT3 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT3)) {
+      scnTgtT3.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'dave')");
+      scnTgtT3.commit();
+    }
+
+    // Sanity: pre-repair target visible NAME is "dave" (newest of the three).
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        assertEquals("Pre-repair target visible NAME should be dave", "dave", rs.getString(1));
+      }
+    }
+
+    while (System.currentTimeMillis() <= targetT3) {
+      // spin
+    }
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
+      "--read-all-versions");
+    assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    Counters repairCounters = result.repairJob.getCounters();
+    // Source has one NAME Put@sourceTs; target has three NAME Puts at T1, T2, T3 (all >
+    // sourceTs). Each target NAME cell drives the cellExtra branch (sourceMaxTs < ts) and the
+    // hidden-version unwinding logic point-Deletes every Put in (sourceTs, ts).
+    assertTrue("At least 3 cells should be tombstoned across target's three NAME versions",
+      repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 3);
+
+    // Post-repair: standard read on target must see source's "alice" — every hidden version
+    // ("bob"@T1, "carol"@T2) was unwound along with the visible "dave"@T3.
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        assertEquals("Post-repair target NAME must be alice (all hidden versions unwound)",
+          "alice", rs.getString(1));
+      }
+    }
+
+    // Raw scan: target should have a Delete marker covering each of T1/T2/T3 plus source's
+    // mirror. The exact tombstone cell count may include duplicates from the unwind logic
+    // (each iteration's hidden-set spans a shrinking interval, so T1 appears in T3's hidden
+    // set, then again in T2's), so assert a lower bound on distinct delete-bearing scan cells.
+    try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) {
+      Scan scan = new Scan().withStartRow(rowKey, true).withStopRow(rowKey, true).setRaw(true);
+      scan.readAllVersions();
+      int nameDeletes = 0;
+      int namePutAtSourceTs = 0;
+      try (ResultScanner sc = targetHTable.getScanner(scan)) {
+        for (Result r; (r = sc.next()) != null;) {
+          for (Cell c : r.rawCells()) {
+            if (Bytes.equals(CellUtil.cloneFamily(c), Bytes.toBytes(family))
+              && Bytes.equals(CellUtil.cloneQualifier(c), Bytes.toBytes(qualifier))) {
+              if (CellUtil.isDelete(c)) {
+                nameDeletes++;
+              } else if (c.getTimestamp() == sourceTs) {
+                namePutAtSourceTs++;
+              }
+            }
+          }
+        }
+      }
+      assertTrue("Expected at least 3 NAME delete markers on target, saw " + nameDeletes,
+        nameDeletes >= 3);
+      assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs);
+    }
+  }
+
+  /**
+   * cmp==0 row carrying a target-only tombstone cell at a coord source lacks: drives the
+   * {@code tombstoneTargetCell} return-{@code false} branch ({@link
+   * org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer}, around line 420)
+   * inside {@code generateMutationForDiffCells}'s {@code cellExtra} branch — the target cell
+   * is itself a tombstone, so no new tombstone is emitted, but {@code anyCellUnrepairable} is
+   * set. The row contributes to {@code ROWS_CANNOT_REPAIR} without bumping any cell counter.
+   *
+   * <p>Setup: source and target share a matching {@code NAME_VALUE} cell so the row exists on
+   * both. Target also has a raw point-{@link Delete} on {@code NAME} at a coord source lacks;
+   * with {@code --raw-scan} the diff scan surfaces that tombstone cell, taking the
+   * {@code cmp > 0} branch on it.
+   */
+  @Test
+  public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+
+    final long ts = base + 1L;
+    final long tombstoneTs = base + 2L;
+
+    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) {
+      scnSrc.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
+      scnSrc.commit();
+    }
+    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) {
+      scnTgt.createStatement().execute(
+        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
+      scnTgt.commit();
+    }
+    // Plant a raw point-Delete on NAME at tombstoneTs — a coord source has no cell at, but the
+    // target cell is itself a tombstone, so tombstoneTargetCell returns false in repair-mode.
+    writeRawPointDelete(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME",
+      tombstoneTs);
+
+    while (System.currentTimeMillis() <= tombstoneTs) {
+      // spin
+    }
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
+      "--to-time", String.valueOf(System.currentTimeMillis()), "--raw-scan");
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    Counters c = result.repairJob.getCounters();
+    // No mirror, no tombstone emitted — but anyCellUnrepairable was set, so the row rolls up
+    // as unrepairable. All cell counters stay 0.
+    assertRepairCellCounters(c, 0, 0, 0, 1);
+    assertRepairRowCounters(c, 0, 0, 1);
+    assertTrue("At least one mapper should roll up to UNREPAIRABLE",
+      c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1);
+  }
+
   /**
    * P7 (mid-row repair-batch flush boundary): drives many missing-row mirrors through a tiny
    * {@code repairBatchSize=2} so {@code generateMutationForDiffRows} flushes mid-stream multiple
@@ -3952,6 +4238,35 @@ private void writeRawDeleteColumn(Connection phoenixConn, String tableName, byte
     }
   }
 
+  /**
+   * Plants a raw {@link Delete#addFamily} (DeleteFamily — covers every qualifier in the family
+   * at {@code ts <= markerTs}) at {@code (rowKey, family)}. Used by shadow-detection tests
+   * exercising the {@code TargetRowRecord.deleteFamilyUpperBound} branch in {@code wouldShadow}.
+   */
+  private void writeRawDeleteFamily(Connection phoenixConn, String tableName, byte[] rowKey,
+    String family, long markerTs) throws Exception {
+    try (Table hTable = getHBaseTable(phoenixConn, tableName)) {
+      Delete del = new Delete(rowKey);
+      del.addFamily(Bytes.toBytes(family), markerTs);
+      hTable.delete(del);
+    }
+  }
+
+  /**
+   * Plants a raw {@link Delete#addFamilyVersion} (DeleteFamilyVersion — covers every qualifier
+   * in the family at exactly {@code ts == markerTs}) at {@code (rowKey, family)}. Used by
+   * shadow-detection tests exercising the {@code TargetRowRecord.deleteFamilyVersionTs} branch
+   * in {@code wouldShadow}.
+   */
+  private void writeRawDeleteFamilyVersion(Connection phoenixConn, String tableName,
+    byte[] rowKey, String family, long markerTs) throws Exception {
+    try (Table hTable = getHBaseTable(phoenixConn, tableName)) {
+      Delete del = new Delete(rowKey);
+      del.addFamilyVersion(Bytes.toBytes(family), markerTs);
+      hTable.delete(del);
+    }
+  }
+
   /**
    * Returns the row-key bytes Phoenix uses for an INTEGER primary key value, matching the
    * encoding used by {@code splitTableAt}.

From ffeb1015ce57fa695bcdb5f074fd7f9dee013cbb Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Mon, 8 Jun 2026 17:03:34 +0530
Subject: [PATCH 12/18] checkpointing test fixes

---
 .../ConnectionQueryServicesMetrics.java       |    1 -
 .../end2end/PhoenixSyncTableToolIT.java       | 2071 ++++++++++-------
 .../PhoenixSyncTableOutputRepositoryTest.java |  218 ++
 3 files changed, 1464 insertions(+), 826 deletions(-)

diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
index 49110d9d7ed..575d38530eb 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java
@@ -30,7 +30,6 @@
 import org.apache.phoenix.monitoring.ConnectionQueryServicesMetricImpl;
 import org.apache.phoenix.monitoring.MetricType;
 
-
 /**
  * Class for Connection Query Service Metrics.
  */
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
index 5a9b0b79742..1d9131d8e2d 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
@@ -43,17 +43,23 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.SimpleRegionObserver;
+import org.apache.hadoop.hbase.regionserver.MiniBatchOperationInProgress;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.Job;
@@ -72,7 +78,6 @@
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.schema.types.PInteger;
 import org.apache.phoenix.util.PhoenixRuntime;
-import org.apache.phoenix.util.SchemaUtil;
 import org.apache.phoenix.util.TestUtil;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -86,6 +91,7 @@
 import org.slf4j.LoggerFactory;
 
 @Category(NeedsOwnMiniClusterTest.class)
+@SuppressWarnings({ "SqlNoDataSourceInspection", "SqlResolve"})
 public class PhoenixSyncTableToolIT {
   private static final Logger LOGGER = LoggerFactory.getLogger(PhoenixSyncTableToolIT.class);
 
@@ -158,7 +164,7 @@ public void tearDown() throws Exception {
   }
 
   @Test
-  public void testSyncTableValidateWithDataDifference() throws Exception {
+  public void testSyncTableWithDataDifference() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 10);
 
     introduceAndVerifyTargetDifferences(uniqueTableName);
@@ -178,46 +184,28 @@ public void testSyncTableValidateWithDataDifference() throws Exception {
     // Dry-run row-level logging should flag the 3 same-key/different-value rows as
     // ROWS_DIFFERENT_ON_TARGET; nothing missing or extra (replication seeded both sides
     // with the same row keys before introduceAndVerifyTargetDifferences mutated three).
-    assertEquals("Dry-run should detect 3 rows different on target", 3,
-      dryRunCounters.rowsDifferentOnTarget);
-    assertEquals("Dry-run should report 0 rows missing on target", 0,
-      dryRunCounters.rowsMissingOnTarget);
-    assertEquals("Dry-run should report 0 rows extra on target", 0,
-      dryRunCounters.rowsExtraOnTarget);
+    assertRowDriftCounters(dryRunCounters, 0, 0, 3, 0);
 
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    validateCheckpointEntries(checkpointEntries, uniqueTableName, targetZkQuorum, 10, 10, 1, 3, 4,
-      3, null);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, null);
 
     // Phase 2: repair pass over the same window — MISMATCHED rows transition to REPAIRED in
     // place.
     Job repairJob = runSyncToolWithLargeChunks(uniqueTableName, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
-    Counters repairCounters = repairJob.getCounters();
-    assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 3, 0, 0);
-    assertRepairRowCounters(repairCounters, 0, 0, 0);
+    SyncCountersResult repairCounters = getSyncCounters(repairJob);
+    assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0);
+    assertRowDriftCounters(repairCounters, 0, 0, 0, 0);
     // 3 rows × 2 mismatched cells (NAME + Phoenix's _0 empty-key cell) = 6 missing and 6 extra.
     assertRepairCellCounters(repairCounters, 6, 6, 0, 0);
 
     // Target rows should now match source.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
 
-    List<PhoenixSyncTableCheckpointOutputRow> postRepairEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    assertEquals("Expected 3 CHUNK/REPAIRED rows after repair", 3,
-      countCheckpointsByTypeAndStatus(postRepairEntries,
-        PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
-        PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED));
-    assertEquals("Expected 3 REGION/REPAIRED rows after repair", 3,
-      countCheckpointsByTypeAndStatus(postRepairEntries,
-        PhoenixSyncTableCheckpointOutputRow.Type.REGION,
-        PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED));
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
   }
 
   @Test
-  public void testSyncTableValidateWithDifferentZkQuorumFormats() throws Exception {
+  public void testSyncTableWithDifferentZkQuorumFormats() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 10);
     introduceAndVerifyTargetDifferences(uniqueTableName);
 
@@ -239,9 +227,17 @@ public void testSyncTableValidateWithDifferentZkQuorumFormats() throws Exception
 
     // After validating detection across ZK formats, run dry-run + repair against the default
     // targetZkQuorum to confirm the tool converges source and target.
-    runSyncToolWithRepair(uniqueTableName);
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName);
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
+    validateSyncCounters(dryRunCounters, 10, 10, 7, 3);
+    validateMapperCounters(dryRunCounters, 1, 3);
+    assertRowDriftCounters(dryRunCounters, 0, 0, 3, 0);
+    assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0);
+    assertRowDriftCounters(repairCounters, 0, 0, 0, 0);
+    assertRepairCellCounters(repairCounters, 6, 6, 0, 0);
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
   }
 
   @Test
@@ -263,12 +259,22 @@ public void testSyncTableWithDeletedRowsOnTarget() throws Exception {
     validateSyncCounters(counters, 10, 7, 7, 3);
     validateMapperCounters(counters, 1, 3);
     assertEquals("Should have only 1 Mapper task created with coalescing", 4, counters.taskCreated);
+    // Three target rows were Phoenix-deleted, so dry-run sees them as missing on target.
+    assertRowDriftCounters(counters, 3, 0, 0, 0);
 
     // Repair pass only re-runs the 3 mismatched chunks (verified chunks are excluded by the
     // resume filter). Target's DELETEs left tombstones that shadow source's Puts at lower
     // timestamps, so each re-run mapper rolls up to UNREPAIRABLE.
     SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
     validateMapperCountersRepair(repairCounters, 0, 0, 3, 0);
+    assertRepairChunkAndMapperCounters(repairCounters, 0, 3, 0, 0, 3, 0);
+
+    // Tombstones still shadow source — Phoenix SELECT must still diverge.
+    verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
+
+    // 7 verified chunks/mappers from the dry-run, 3 unrepairable chunks from repair, mappers
+    // roll up to UNREPAIRABLE because every chunk in their region went UNREPAIRABLE.
+    validateCheckpointEntries(uniqueTableName, null, counters, repairCounters);
   }
 
   @Test
@@ -299,12 +305,15 @@ public void testSyncTableWithConditionalTTLExpiredRows() throws Exception {
     // Run sync tool, TTL-expired rows (1-3) should be skipped on both source and target
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName);
     SyncCountersResult counters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
 
     validateSyncCounters(counters, 7, 7, 5, 2);
     validateMapperCounters(counters, 2, 2);
+    assertRowDriftCounters(counters, 0, 0, 2, 0);
+    assertRepairChunkAndMapperCounters(repairCounters, 2, 0, 0, 2, 0, 0);
 
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, counters, repairCounters);
   }
 
   @Test
@@ -355,12 +364,20 @@ public void testSyncTableWithConditionalTTLExpiredRowsCompact() throws Exception
     // on source vs 7 on target because IS_STRICT_TTL=false returns expired rows on source
     // (uncompacted) but compaction on target physically removed them — that asymmetry is by
     // design, not drift the tool can converge.
-    runSyncToolWithRepair(uniqueTableName);
+    //
+    // Note: the two runSyncTool calls above each write CHUNK/VERIFIED rows under their own
+    // (from-time, to-time) PK, so the checkpoint table accumulates entries from prior validate
+    // passes — validateCheckpointEntries can't be applied here against a single counter
+    // snapshot. Stick with the MISMATCHED-count invariant.
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
+    assertRowDriftCounters(repairCounters, 0, 0, 0, 0);
+    assertRepairChunkAndMapperCounters(repairCounters, 0, 0, 0, 0, 0, 0);
     assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   @Test
-  public void testSyncValidateIndexTable() throws Exception {
+  public void testSyncIndexTable() throws Exception {
     // Create data table on both clusters with replication
     createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName);
 
@@ -382,24 +399,24 @@ public void testSyncValidateIndexTable() throws Exception {
 
     RepairRunResult result = runSyncToolWithRepair(indexName);
     SyncCountersResult counters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
 
     validateSyncCounters(counters, 10, 7, 7, 3);
+    assertRowDriftCounters(counters, 3, 0, 0, 0);
 
     // Verify checkpoint entries show mismatches (from dry-run pass) before repair runs.
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
       queryCheckpointTable(sourceConnection, indexName, targetZkQuorum, null);
-
     assertFalse("Should have checkpointEntries", checkpointEntries.isEmpty());
 
-    // The repair pass syncs the index physical table on target with the source index. Since the
-    // data table was also corrupted on target (3 rows deleted via deleteHBaseRows) but we only
-    // ran sync on the index, the data table itself is still drifted — only assert on index
-    // checkpoint rows.
-    assertNoMismatchedCheckpoints(indexName, null);
+    // Repair-pass outcome (REPAIRED vs UNREPAIRABLE) depends on tombstone-vs-source timestamps,
+    // which we don't pin here, so fall back to an asserCheckpoint counters check that simply
+    // mirrors whatever the repair pass produced.
+    validateCheckpointEntries(indexName, null, counters, repairCounters);
   }
 
   @Test
-  public void testSyncValidateLocalIndexTable() throws Exception {
+  public void testSyncLocalIndexTable() throws Exception {
     // Create data table on both clusters with replication
     createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName);
 
@@ -423,6 +440,7 @@ public void testSyncValidateLocalIndexTable() throws Exception {
     // missing index rows on target.
     RepairRunResult result = runSyncToolWithRepair(indexName);
     SyncCountersResult counters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
 
     assertTrue(String.format("Should have at least %d verified chunks, actual: %d", 1,
       counters.chunksVerified), counters.chunksVerified >= 1);
@@ -434,13 +452,11 @@ public void testSyncValidateLocalIndexTable() throws Exception {
       queryCheckpointTable(sourceConnection, indexName, targetZkQuorum, null);
 
     assertFalse("Should have checkpoint entries for local index", checkpointEntries.isEmpty());
-
-    // After repair, the local-index physical table on target should match source's index.
-    assertNoMismatchedCheckpoints(indexName, null);
+    validateCheckpointEntries(indexName, null, counters, repairCounters);
   }
 
   @Test
-  public void testSyncValidateMultiTenantSaltedTableDifferences() throws Exception {
+  public void testSyncMultiTenantSaltedTableDifferences() throws Exception {
     String[] tenantIds = new String[] { "TENANT_001", "TENANT_002", "TENANT_003" };
     int rowsPerTenant = 10;
     createMultiTenantSaltedTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName);
@@ -490,27 +506,26 @@ public void testSyncValidateMultiTenantSaltedTableDifferences() throws Exception
     RepairRunResult t1 =
       runSyncToolWithRepair(uniqueTableName, "--tenant-id", tenantIds[0], "--to-time", toTime);
     SyncCountersResult counters1 = getSyncCounters(t1.dryRunJob);
+    SyncCountersResult repairCounters1 = getSyncCounters(t1.repairJob);
     validateSyncCounters(counters1, 10, 10, 10, 0);
     validateMapperCounters(counters1, 4, 0);
+    assertRowDriftCounters(counters1, 0, 0, 0, 0);
+    assertRepairChunkAndMapperCounters(repairCounters1, 0, 0, 0, 0, 0, 0);
 
     // TENANT_002 has 3 modified rows. Dry-run detects, repair writes back source values.
     RepairRunResult t2 = runSyncToolWithRepair(uniqueTableName, "--tenant-id", tenantIds[1]);
     SyncCountersResult counters2 = getSyncCounters(t2.dryRunJob);
+    SyncCountersResult repairCounters2 = getSyncCounters(t2.repairJob);
     validateSyncCounters(counters2, 10, 10, 7, 3);
     validateMapperCounters(counters2, 2, 2);
+    assertRowDriftCounters(counters2, 0, 0, 3, 0);
+    assertRepairChunkAndMapperCounters(repairCounters2, 3, 0, 0, 2, 0, 0);
 
-    // Verify checkpoint table has entries for the reprocessed regions
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, "TENANT_002");
-    assertFalse("Should have checkpoint entries for TENANT_002", checkpointEntries.isEmpty());
-
-    checkpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, "TENANT_001");
-    assertFalse("Should have checkpoint entries for TENANT_001", checkpointEntries.isEmpty());
-
-    // No MISMATCHED rows should remain after repair pass for either tenant.
-    assertNoMismatchedCheckpoints(uniqueTableName, "TENANT_001");
-    assertNoMismatchedCheckpoints(uniqueTableName, "TENANT_002");
+    // Pin checkpoint state per tenant.
+    validateCheckpointEntries(uniqueTableName, "TENANT_001", counters1,
+      repairCounters1);
+    validateCheckpointEntries(uniqueTableName, "TENANT_002", counters2,
+      repairCounters2);
 
     // After repair, TENANT_002's data should be identical between source and target.
     withTenantConnections(tenantIds[1],
@@ -518,7 +533,7 @@ public void testSyncValidateMultiTenantSaltedTableDifferences() throws Exception
   }
 
   @Test
-  public void testSyncTableValidateWithTimeRangeFilter() throws Exception {
+  public void testSyncTableWithTimeRangeFilter() throws Exception {
     createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName);
 
     // Insert data BEFORE the time range window
@@ -564,7 +579,7 @@ public void testSyncTableValidateWithTimeRangeFilter() throws Exception {
   }
 
   @Test
-  public void testSyncTableValidateCheckpointWithPartialReRunAndRegionSplits() throws Exception {
+  public void testSyncTableCheckpointWithPartialReRunAndRegionSplits() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 100);
 
     List<Integer> sourceSplits = Arrays.asList(15, 45, 51, 75, 95);
@@ -639,14 +654,11 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionSplits() thr
     // REPAIRED with stale boundaries (relative to the post-split layout); the resume filter
     // skips those, so a final run can leave residual drift. Cleanup the checkpoint and run
     // a dry-run + repair pass on the stable layout to converge.
-    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    convergeAndAssertIdentical(uniqueTableName, fromTime, toTime);
   }
 
   @Test
-  public void testSyncTableValidateCheckpointWithChunkSizeChangeOnReRun() throws Exception {
+  public void testSyncTableCheckpointWithChunkSizeChangeOnReRun() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 100);
 
     List<Integer> sourceSplits = Arrays.asList(25, 50, 75);
@@ -705,14 +717,11 @@ public void testSyncTableValidateCheckpointWithChunkSizeChangeOnReRun() throws E
     // pass on the same window so the repair flow has a stable boundary set to converge.
     // Use the dry-run+repair pattern so any chunk that landed in a non-resumable state
     // (REPAIRED with stale boundaries) is re-validated rather than skipped.
-    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    convergeAndAssertIdentical(uniqueTableName, fromTime, toTime);
   }
 
   @Test
-  public void testSyncTableValidateCheckpointWithPartialReRunAndRegionMerges() throws Exception {
+  public void testSyncTableCheckpointWithPartialReRunAndRegionMerges() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 100);
 
     List<Integer> sourceSplits = Arrays.asList(10, 20, 30, 40, 50, 60, 70, 80, 90);
@@ -778,7 +787,7 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionMerges() thr
   }
 
   @Test
-  public void testSyncTableValidateIdempotentOnReRun() throws Exception {
+  public void testSyncTableIdempotentOnReRun() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 10);
 
     // Capture consistent time range for both runs (ensures checkpoint lookup will match)
@@ -820,7 +829,7 @@ public void testSyncTableValidateIdempotentOnReRun() throws Exception {
   }
 
   @Test
-  public void testSyncTableValidateIdempotentAfterRegionSplits() throws Exception {
+  public void testSyncTableIdempotentAfterRegionSplits() throws Exception {
     // Setup table with initial splits and data
     setupStandardTestWithReplication(uniqueTableName, 1, 10);
 
@@ -875,7 +884,7 @@ public void testSyncTableValidateIdempotentAfterRegionSplits() throws Exception
   }
 
   @Test
-  public void testSyncTableValidateWithSchemaAndTableNameOptions() throws Exception {
+  public void testSyncTableWithSchemaAndTableNameOptions() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 10);
 
     // Introduce differences on target
@@ -884,17 +893,21 @@ public void testSyncTableValidateWithSchemaAndTableNameOptions() throws Exceptio
     // Run sync tool with both --schema and --table-name options
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--schema", "");
     SyncCountersResult counters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
 
     // Validate counters
     validateSyncCounters(counters, 10, 10, 7, 3);
     validateMapperCounters(counters, 1, 3);
+    assertRowDriftCounters(counters, 0, 0, 3, 0);
+    assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0);
+    assertRepairCellCounters(repairCounters, 6, 6, 0, 0);
 
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, counters, repairCounters);
   }
 
   @Test
-  public void testSyncTableValidateInBackgroundMode() throws Exception {
+  public void testSyncTableInBackgroundMode() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 10);
 
     introduceAndVerifyTargetDifferences(uniqueTableName);
@@ -925,18 +938,22 @@ public void testSyncTableValidateInBackgroundMode() throws Exception {
 
     validateSyncCounters(counters, 10, 10, 7, 3);
     validateMapperCounters(counters, 1, 3);
+    assertRowDriftCounters(counters, 0, 0, 3, 0);
 
     // Now run the repair pass (foreground for synchronous assertions). Same time window so
     // the dry-run-pass MISMATCHED rows are overwritten with REPAIRED.
-    runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
+    Job repairJob = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime),
+      "--to-time", String.valueOf(toTime));
+    SyncCountersResult repairCounters = getSyncCounters(repairJob);
+    assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0);
+    assertRepairCellCounters(repairCounters, 6, 6, 0, 0);
 
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, counters, repairCounters);
   }
 
   @Test
-  public void testSyncTableValidateWithCustomTimeouts() throws Exception {
+  public void testSyncTableWithCustomTimeouts() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 10);
 
     introduceAndVerifyTargetDifferences(uniqueTableName);
@@ -976,16 +993,20 @@ public void testSyncTableValidateWithCustomTimeouts() throws Exception {
     counters.logCounters(testName.getMethodName());
     validateSyncCounters(counters, 10, 10, 7, 3);
     validateMapperCounters(counters, 1, 3);
+    assertRowDriftCounters(counters, 0, 0, 3, 0);
 
     // Repair pass over the same window: convergence + no MISMATCHED rows remaining.
-    runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
+    Job repairJob = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime),
+      "--to-time", String.valueOf(toTime));
+    SyncCountersResult repairCounters = getSyncCounters(repairJob);
+    assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0);
+    assertRepairCellCounters(repairCounters, 6, 6, 0, 0);
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, counters, repairCounters);
   }
 
   @Test
-  public void testSyncTableValidateWithExtraRowsOnTarget() throws Exception {
+  public void testSyncTableWithExtraRowsOnTarget() throws Exception {
     // Create tables on both clusters
     createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName);
 
@@ -1013,21 +1034,12 @@ public void testSyncTableValidateWithExtraRowsOnTarget() throws Exception {
     // Run dry-run + repair sharing the same time window.
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName);
     SyncCountersResult counters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
 
     validateSyncCounters(counters, 10, 15, 5, 5);
     validateMapperCounters(counters, 0, 4);
-
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-
-    // Count mismatched entries in checkpoint table — after the repair pass, all MISMATCHED
-    // rows from the dry-run pass should have been overwritten with REPAIRED.
-    long mismatchedCount = countCheckpointsByStatus(checkpointEntries,
-      PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED);
-    long repairedCount = countCheckpointsByStatus(checkpointEntries,
-      PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED);
-    assertEquals("After repair, no MISMATCHED rows should remain", 0, mismatchedCount);
-    assertTrue("Should have REPAIRED rows after repair pass", repairedCount > 0);
+    assertRowDriftCounters(counters, 0, 5, 0, 0);
+    assertRepairChunkAndMapperCounters(repairCounters, 5, 0, 0, 4, 0, 0);
 
     // After repair: target should converge to source (10 odd-id rows). The 5 extra even-id
     // rows on target had only live cells, so tombstoneWholeRow can remove them.
@@ -1036,10 +1048,12 @@ public void testSyncTableValidateWithExtraRowsOnTarget() throws Exception {
       TestUtil.getRowCount(sourceConnection, uniqueTableName));
     assertEquals("Target should now also have 10 rows after repair tombstones the extras", 10,
       TestUtil.getRowCount(targetConnection, uniqueTableName));
+
+    validateCheckpointEntries(uniqueTableName, null, counters, repairCounters);
   }
 
   @Test
-  public void testSyncTableValidateWithConcurrentRegionSplits() throws Exception {
+  public void testSyncTableWithConcurrentRegionSplits() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 100);
     // Introduce some mismatches on target before sync
     List<Integer> mismatchIds = Arrays.asList(15, 35, 55, 75, 95);
@@ -1081,10 +1095,7 @@ public void testSyncTableValidateWithConcurrentRegionSplits() throws Exception {
     // boundary mid-flight can land in REPAIRED with stale boundaries; once REPAIRED, the
     // resume filter skips it. Cleanup the checkpoint and run a dry-run + repair pass on the
     // stable region layout to converge.
-    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    convergeAndAssertIdentical(uniqueTableName, fromTime, toTime);
   }
 
   /**
@@ -1112,7 +1123,6 @@ public void testRepairWithConcurrentTargetSplits() throws Exception {
     long fromTime = 0L;
     long toTime = System.currentTimeMillis();
 
-    // Stage 1: stable dry-run populates MISMATCHED checkpoint rows.
     Job dryRunJob = runSyncTool(uniqueTableName, "--dry-run", "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     assertTrue("Stable dry-run should succeed", dryRunJob.isSuccessful());
@@ -1120,11 +1130,9 @@ public void testRepairWithConcurrentTargetSplits() throws Exception {
     assertTrue("Dry-run should detect at least one mismatched chunk",
       dryRunCounters.chunksMismatched >= 1);
 
-    // Stage 2: kick off target-side splits while the repair pass runs. Source splits left out
-    // because a target-side split is what surfaces flushRepairMutations failures.
+    // Target-side splits race with the repair pass — that's where flushRepairMutations failures
+    // would surface.
     Runnable splitJoiner = startConcurrentRegionWork(() -> {
-      // No source-side work; pass a trivial Runnable so startConcurrentRegionWork still wires
-      // both threads and the joiner times out cleanly.
     }, () -> splitTableAt(targetConnection, uniqueTableName,
       Arrays.asList(15, 25, 35, 45, 55, 65, 75, 85, 95)), "repair-splits");
 
@@ -1133,21 +1141,15 @@ public void testRepairWithConcurrentTargetSplits() throws Exception {
     splitJoiner.run();
     assertTrue("Repair pass should not throw despite concurrent splits", repairJob.isSuccessful());
 
-    // Stage 3: stable convergence pass. Cleanup checkpoint so the resume filter doesn't skip
-    // chunks that were marked REPAIRED with stale boundaries during the racing pass.
-    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
-
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
-    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    // Cleanup checkpoint so the resume filter doesn't skip chunks marked REPAIRED with stale
+    // boundaries during the racing pass.
+    convergeAndAssertIdentical(uniqueTableName, fromTime, toTime);
   }
 
   /**
-   * P4 (idempotent repair): Guards against a regression where repair claims REPAIRED but does
-   * not actually converge. Run a full dry-run + repair on a divergent table, clean the
-   * checkpoint, then run the same dry-run + repair again on the now-converged tables and assert
-   * the second pass is a no-op (zero mismatches detected, zero chunks repaired).
+   * Guards against a regression where repair claims REPAIRED but doesn't actually converge.
+   * Repair the divergent table, clean the checkpoint, then re-run dry-run + repair on the now
+   * converged tables — both passes must be no-ops.
    */
   @Test
   public void testRepairIsIdempotent() throws Exception {
@@ -1158,7 +1160,6 @@ public void testRepairIsIdempotent() throws Exception {
     long fromTime = 0L;
     long toTime = System.currentTimeMillis();
 
-    // Pass 1: detect + repair.
     RepairRunResult firstRun = runSyncToolWithRepair(uniqueTableName, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     assertTrue("First dry-run should succeed", firstRun.dryRunJob.isSuccessful());
@@ -1167,39 +1168,39 @@ public void testRepairIsIdempotent() throws Exception {
     SyncCountersResult firstDryRunCounters = getSyncCounters(firstRun.dryRunJob);
     assertTrue("First dry-run should detect mismatched chunks",
       firstDryRunCounters.chunksMismatched >= 1);
-    Counters firstRepairCounters = firstRun.repairJob.getCounters();
+    SyncCountersResult firstRepairCounters = getSyncCounters(firstRun.repairJob);
     assertTrue("First repair should mark chunks REPAIRED",
-      firstRepairCounters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue() >= 1);
+      firstRepairCounters.chunksRepaired >= 1);
 
-    // Tables must be data-identical after the first repair.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, firstDryRunCounters,
+      firstRepairCounters);
 
-    // Pass 2 prep: clean the checkpoint so the resume filter doesn't skip already-VERIFIED
-    // chunks — the second dry-run must scan the full layout from scratch.
+    // Clean the checkpoint so the second dry-run scans the full layout instead of resuming
+    // from VERIFIED chunks.
     cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
 
-    // Pass 2: same window, no further mutations. Both passes must be no-ops.
     RepairRunResult secondRun = runSyncToolWithRepair(uniqueTableName, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     assertTrue("Second dry-run should succeed", secondRun.dryRunJob.isSuccessful());
     assertTrue("Second repair should succeed", secondRun.repairJob.isSuccessful());
 
     SyncCountersResult secondDryRunCounters = getSyncCounters(secondRun.dryRunJob);
+    SyncCountersResult secondRepairCounters = getSyncCounters(secondRun.repairJob);
     assertEquals("Second dry-run should detect zero mismatches", 0,
       secondDryRunCounters.chunksMismatched);
-
-    // Second repair pass should be a no-op: nothing repaired, nothing failed.
-    assertRepairChunkAndMapperCounters(secondRun.repairJob.getCounters(), 0, 0, 0, 0, 0);
+    assertRepairChunkAndMapperCounters(secondRepairCounters, 0, 0, 0, 0, 0, 0);
+    assertRowDriftCounters(secondRepairCounters, 0, 0, 0, 0);
 
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, secondDryRunCounters,
+      secondRepairCounters);
   }
 
   /**
-   * P5 (all-tombstoned target-extra row): Source has no row K; target has row K but every cell
-   * is already a tombstone. {@code tombstoneWholeRow} returns {@code liveCellsTombstoned == 0}
-   * (line ~217 of {@code PhoenixSyncTableChunkRepairer}) → {@code drift.rowsCannotRepair++},
-   * {@code rowsExtraOnTarget} unchanged. Pins the rare "target row extra but already
-   * fully-tombstoned" path that currently has zero coverage.
+   * Target row K has only tombstones (no live cells) under {@code --raw-scan}, source lacks the
+   * row. {@code tombstoneWholeRow} finds zero live cells to tombstone → row rolls up as
+   * unrepairable, {@code rowsExtraOnTarget} stays 0.
    */
   @Test
   public void testRepairAllTombstonedTargetRowExtra() throws Exception {
@@ -1211,44 +1212,25 @@ public void testRepairAllTombstonedTargetRowExtra() throws Exception {
     final long ts = base + 1L;
     final long tombstoneTs = base + 2L;
 
-    // Plant a sentinel row on both sides so the verifier has *something* to compare and
-    // produces a non-empty chunk hash. The sentinel row stays clean — the test focuses on
-    // rowId=5 only.
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) {
-      scnSrc.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + otherRowId + ", 'sentinel')");
-      scnSrc.commit();
-    }
-    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) {
-      scnTgt.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + otherRowId + ", 'sentinel')");
-      scnTgt.commit();
-    }
+    // Sentinel row on both sides so the chunk hash is non-empty.
+    upsertAtScnBoth(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + otherRowId + ", 'sentinel')");
 
-    // Target only: plant raw DeleteColumn tombstones for row K with NO underlying Put cells.
-    // Under --raw-scan the row surfaces (tombstones are themselves cells) but every cell is a
-    // Delete, so tombstoneWholeRow() returns liveCellsTombstoned == 0 → drift.rowsCannotRepair
-    // increments and rowsExtraOnTarget stays 0. This pins the rare "row exists in raw view but
-    // has no live cells to tombstone" branch.
+    // Tombstones with no underlying Puts — row surfaces under raw scan but every cell is a Delete.
     byte[] rowKey = integerRowKey(rowId);
     writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME", tombstoneTs);
     writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME_VALUE",
       tombstoneTs);
     writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "_0", tombstoneTs);
 
-    // Spin until wall-clock advances past the highest cell timestamp so --to-time
-    // (which defaults to currentTimeMillis()) covers our planted cells.
-    while (System.currentTimeMillis() <= tombstoneTs) {
-      // spin
-    }
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
-      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
+      String.valueOf(fromTime), "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)),
       "--raw-scan");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    // Row K's cells were already all tombstones — no live cells to tombstone again, and
-    // the row is flagged unrepairable.
-    assertRepairRowCounters(result.repairJob.getCounters(), 0, 0, 1);
+    assertRowDriftCounters(getSyncCounters(result.repairJob), 0, 0, 0, 1);
+    // Phoenix SELECT already sees both sides as identical (target's row 5 is tombstone-only and
+    // invisible). Divergence is at the raw-cell level only — not asserted via SELECT here.
   }
 
   @Test
@@ -1286,8 +1268,12 @@ public void testSyncTableValidateWithOnlyTimestampDifferences() throws Exception
     Job job = runSyncTool(uniqueTableName, "--dry-run");
     SyncCountersResult counters = getSyncCounters(job);
 
-    // Validate counters - all rows should be processed and all chunks mismatched
+    // Validate counters - all rows should be processed and all chunks mismatched. The 10 rows
+    // have identical Phoenix-visible values but distinct cell timestamps, so the checksum
+    // diverges per row (10 mismatched chunks) and the row-level diff classifies them as
+    // ROWS_DIFFERENT_ON_TARGET (same key on both sides, different cell payload).
     validateSyncCounters(counters, 10, 10, 0, 10);
+    assertRowDriftCounters(counters, 0, 0, 10, 0);
 
     // Verify checkpoint entries show mismatches
     List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
@@ -1299,7 +1285,7 @@ public void testSyncTableValidateWithOnlyTimestampDifferences() throws Exception
   }
 
   @Test
-  public void testSyncTableValidateWithConcurrentRegionMerges() throws Exception {
+  public void testSyncTableWithConcurrentRegionMerges() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 100);
     // Explicitly split tables to create many regions for merging
     List<Integer> sourceSplits = Arrays.asList(10, 15, 20, 25, 40, 45, 60, 65, 80, 85);
@@ -1353,14 +1339,11 @@ public void testSyncTableValidateWithConcurrentRegionMerges() throws Exception {
     // Concurrent merges may leave chunks REPAIRED with stale boundaries; the resume filter
     // skips those on a single rerun. Cleanup the checkpoint and run a dry-run + repair pass
     // on the stable region layout to converge.
-    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    convergeAndAssertIdentical(uniqueTableName, fromTime, toTime);
   }
 
   @Test
-  public void testSyncTableValidateWithPagingTimeout() throws Exception {
+  public void testSyncTableWithPagingTimeout() throws Exception {
     // Create tables on both clusters
     setupStandardTestWithReplication(uniqueTableName, 1, 100);
 
@@ -1419,7 +1402,7 @@ public void testSyncTableValidateWithPagingTimeout() throws Exception {
   }
 
   @Test
-  public void testSyncTableValidateWithPagingTimeoutWithSplits() throws Exception {
+  public void testSyncTableWithPagingTimeoutWithSplits() throws Exception {
     // Create tables on both clusters
     setupStandardTestWithReplication(uniqueTableName, 1, 100);
 
@@ -1537,6 +1520,105 @@ public void testSyncTableMapperFailsWithMissingTargetTable() throws Exception {
       uniqueTableName));
   }
 
+  /**
+   * When the mapper successfully mutates target but the audit checkpoint UPSERT fails, the
+   * tool must increment {@code CHECKPOINT_WRITE_FAILED} and exit non-zero so the operator
+   * can investigate the audit gap. Failure is injected via a RegionObserver on the
+   * checkpoint table that throws {@link DoNotRetryIOException} on every {@code preBatchMutate}.
+   */
+  @Test
+  public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception {
+    setupStandardTestWithReplication(uniqueTableName, 1, 5);
+    introduceMismatchesByIds(uniqueTableName, Arrays.asList(2, 3, 4));
+
+    // Run once first so the checkpoint table exists; we can only attach a coprocessor to a
+    // table that's already been created.
+    Job initial = runSyncTool(uniqueTableName, "--dry-run");
+    assertTrue(initial.isSuccessful());
+
+    String ckpt = PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME;
+    TestUtil.addCoprocessor(sourceConnection, ckpt, CheckpointWriteFailingObserver.class);
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
+    try {
+      // Inline tool invocation — we need exitCode != 0, which runSyncTool would assertion-fail.
+      PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
+      tool.setConf(sourceClusterConf());
+      int exitCode = tool.run(
+        new String[] { "--table-name", uniqueTableName, "--target-cluster", targetZkQuorum,
+          "--run-foreground", "--chunk-size", "1", "--from-time", String.valueOf(fromTime),
+          "--to-time", String.valueOf(toTime) });
+
+      assertNotEquals("Tool must surface non-zero exit when checkpoint writes fail", 0, exitCode);
+      // Note: CHECKPOINT_WRITE_FAILED only increments on the chunk-outcome write path
+      // (writeChunkCheckpoint). Region-level and verified/mismatched-chunk writes propagate
+      // SQLException up and fail the mapper task instead — that still produces a non-zero
+      // exit, which is the operator-visible signal we pin here. We also can't assert on
+      // target convergence inside this block: when the very first checkpoint write throws,
+      // the mapper aborts before reaching all mismatched chunks, so target stays drifted.
+    } finally {
+      TestUtil.removeCoprocessor(sourceConnection, ckpt, CheckpointWriteFailingObserver.class);
+    }
+
+    // Recovery: re-run repair (no --dry-run) with the SAME time window. The resume filter
+    // skips only VERIFIED/REPAIRED, so the mismatched/non-terminal rows from the failed run
+    // are re-processed and target converges — no checkpoint cleanup needed.
+    Job recovery = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime),
+      "--to-time", String.valueOf(toTime));
+    assertTrue(recovery.isSuccessful());
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+  }
+
+  /**
+   * Repair-batch flush failure: a RegionObserver on the target data table fails every
+   * {@code preBatchMutate}, so {@code flushRepairMutations} throws and the chunk rolls up
+   * {@code REPAIR_FAILED}. After the observer is removed and the checkpoint cleaned up, a
+   * fresh run converges.
+   */
+  @Test
+  public void testRepairFailedSurfacesCountersAndCheckpoint() throws Exception {
+    setupStandardTestWithReplication(uniqueTableName, 1, 5);
+    introduceMismatchesByIds(uniqueTableName, Arrays.asList(2, 3, 4));
+
+    TestUtil.addCoprocessor(targetConnection, uniqueTableName, RepairBatchFailingObserver.class);
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
+    try {
+      // dryRun + repair: dry-run only reads from target, so it succeeds; repair flushes
+      // mutations and trips the observer.
+      RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
+        String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+      assertTrue("Dry-run should still succeed", result.dryRunJob.isSuccessful());
+
+      Counters c = result.repairJob.getCounters();
+      assertTrue("CHUNKS_REPAIR_FAILED must increment when target writes fail",
+        c.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue() >= 1);
+      assertTrue("MAPPERS_REPAIR_FAILED must roll up when any chunk fails",
+        c.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue() >= 1);
+
+      List<PhoenixSyncTableCheckpointOutputRow> entries =
+        queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+      assertTrue("At least one REPAIR_FAILED checkpoint row must persist",
+        countCheckpointsByStatus(entries,
+          PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED) >= 1);
+    } finally {
+      TestUtil.removeCoprocessor(targetConnection, uniqueTableName,
+        RepairBatchFailingObserver.class);
+    }
+
+    // Repair was blocked end-to-end — source/target must still diverge.
+    verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
+
+    // Recovery: re-run repair with the SAME time window — the resume filter skips only
+    // VERIFIED/REPAIRED, so REPAIR_FAILED chunks are re-processed and target converges.
+    Job recovery = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime),
+      "--to-time", String.valueOf(toTime));
+    assertTrue(recovery.isSuccessful());
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+  }
+
   @Test
   public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception {
     // Setup table with replication and insert data
@@ -1599,9 +1681,6 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception {
   }
 
   /**
-   * P1 (hidden-version unwinding): Verifies the most subtle correctness path in the repairer —
-   * tombstoneTargetCell case 3 from {@code PhoenixSyncTableChunkRepairer.tombstoneTargetCell}.
-   *
    * <p>Scenario: source row has {@code Put(NAME, "alice", T0)}; target row has {@code
    * Put(NAME, "bob", T1)} and {@code Put(NAME, "carol", T2)} where {@code T0 < T1 < T2} and
    * {@code MAX_VERSIONS=2}. Visible cell on target is "carol" (T2); "bob" (T1) is
@@ -1614,7 +1693,6 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception {
   @Test
   public void testRepairUnwindsHiddenTargetVersions() throws Exception {
     final int rowId = 5;
-    // Two clusters, no replication — we plant cells deterministically on each side.
     long base = createRepairTestTableOnBothClusters(uniqueTableName, 2, "3, 7");
 
     long fromTime = 0L;
@@ -1622,43 +1700,20 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception {
     final long targetT1 = base + 2L;
     final long targetT2 = base + 3L;
 
-    byte[] rowKey = integerRowKey(rowId);
-    String family = "0"; // COLUMN_ENCODED_BYTES=NONE → family is "0"
-    String qualifier = "NAME";
-
-    // Source: single Put(NAME, "alice", T=100). Empty-key cell is needed so the row is
-    // visible to Phoenix scans (and thus to the verifier). Use an SCN connection for that.
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
-      scnSrc.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
-      scnSrc.commit();
-    }
+    // Source: single NAME Put.
+    upsertAtScnSource(sourceTs,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
 
-    // Target: insert via SCN at T1 then again at T2 to leave two NAME versions; with VERSIONS=2
-    // both versions are retained. Visible read is "carol" (T2); "bob" (T1) is one-version-hidden.
-    try (Connection scnTgtT1 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT1)) {
-      scnTgtT1.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')");
-      scnTgtT1.commit();
-    }
-    try (Connection scnTgtT2 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT2)) {
-      scnTgtT2.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')");
-      scnTgtT2.commit();
-    }
+    // Target: two NAME versions retained under VERSIONS=2 — "carol"@T2 visible, "bob"@T1 hidden.
+    upsertAtScnTarget(targetT1,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')");
+    upsertAtScnTarget(targetT2,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')");
 
     // Sanity: target's visible NAME is "carol" before repair.
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
-      ps.setInt(1, rowId);
-      try (ResultSet rs = ps.executeQuery()) {
-        assertTrue(rs.next());
-        assertEquals("Pre-repair target visible NAME should be carol", "carol", rs.getString(1));
-      }
-    }
+    assertTargetName(uniqueTableName, rowId, "carol");
 
-    // Run dry-run + repair sharing the same time window with --read-all-versions so the
-    // verifier and repairer both see the hidden version.
+    // --read-all-versions so verifier and repairer both see the hidden version.
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
       "--read-all-versions");
@@ -1667,63 +1722,33 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception {
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     Counters repairCounters = result.repairJob.getCounters();
-    // Two NAME versions ("bob"@T1 + "carol"@T2) and two empty-key versions on target — all sit
-    // at timestamps that don't match source's single sourceTs, so each gets counted as
-    // either "different" or "extra" depending on the diff branch. We require at least 2 extras
-    // (the two extra NAME versions vs source's single mirror) — the exact split between
-    // CELLS_EXTRA and CELLS_DIFFERENT depends on per-qualifier matching. Post-repair raw scan
-    // assertions below pin the structural outcome.
     assertTrue("At least 2 cells should be tombstoned for target's hidden+visible NAME versions",
       repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 2);
 
-    // Post-repair, Phoenix's standard read on target must see source's "alice", NOT "bob".
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
-      ps.setInt(1, rowId);
-      try (ResultSet rs = ps.executeQuery()) {
-        assertTrue(rs.next());
-        assertEquals("Post-repair target NAME must be alice (hidden version unwound)", "alice",
-          rs.getString(1));
-      }
+    assertTargetName(uniqueTableName, rowId, "alice");
+
+    // Raw scan: two Delete markers (one per target NAME version) plus source's mirror Put.
+    // Target's two original Puts (at targetT1 and targetT2) remain visible to a raw scan even
+    // after repair's point-Deletes shadow them, so summary.newestPutTs == targetT2 — assert
+    // instead that source's mirror Put landed at exactly sourceTs.
+    RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId);
+    assertEquals("Two delete markers (one for each target NAME version) expected", 2,
+      summary.totalDeletes());
+    int namePutAtSourceTs = 0;
+    for (long ts : summary.putTimestamps) {
+      if (ts == sourceTs) namePutAtSourceTs++;
     }
+    assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs);
 
-    // Raw scan: target should have two Delete markers at T2 and T1 plus source's mirror Put@T0.
-    try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) {
-      Scan scan = new Scan().withStartRow(rowKey, true).withStopRow(rowKey, true).setRaw(true);
-      scan.readAllVersions();
-      int nameDeletes = 0;
-      int namePutAtSourceTs = 0;
-      try (ResultScanner sc = targetHTable.getScanner(scan)) {
-        for (Result r; (r = sc.next()) != null;) {
-          for (Cell c : r.rawCells()) {
-            if (Bytes.equals(CellUtil.cloneFamily(c), Bytes.toBytes(family))
-              && Bytes.equals(CellUtil.cloneQualifier(c), Bytes.toBytes(qualifier))) {
-              if (CellUtil.isDelete(c)) {
-                nameDeletes++;
-              } else if (c.getTimestamp() == sourceTs) {
-                namePutAtSourceTs++;
-              }
-            }
-          }
-        }
-      }
-      assertEquals("Two delete markers (one for each target NAME version) expected", 2,
-        nameDeletes);
-      assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs);
-    }
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
   }
 
   /**
-   * P2 (partial-mirror shadow): Verifies the {@code RowMirrorStatus.PARTIALLY_MIRRORED} branch
-   * indirectly via {@code generateMutationForDiffCells} — both rows exist; one source cell is
-   * shadowed by a target tombstone, sibling cells mirror successfully. {@code anyCellUnrepairable}
-   * propagates up to {@code drift.rowsCannotRepair++} while no cell counter increments for the
-   * shadowed cell (mirror returned false, so nothing was written and nothing counted).
-   *
-   * <p>Setup: row K exists on both sides via a matching {@code NAME_VALUE} cell. Target has a
-   * pre-existing {@code DeleteColumn(NAME, T=300)} shadowing any future {@code NAME} Put at
-   * {@code ts <= 300}. Source's {@code NAME, "alice", T=200} would land on disk but stay
-   * invisible — repair detects this upfront via {@code wouldShadow} and skips the doomed write.
+   * Partial-row shadow: row exists on both sides via a matching NAME_VALUE; target has a
+   * DeleteColumn on NAME above source's NAME Put. Repair must detect the shadow up front via
+   * {@code wouldShadow} and skip the doomed mirror — row rolls up unrepairable, no cell counter
+   * ticks for the suppressed mirror.
    */
   @Test
   public void testRepairPartialShadowWithinRow() throws Exception {
@@ -1734,61 +1759,115 @@ public void testRepairPartialShadowWithinRow() throws Exception {
     final long sourceTs = base + 1L;
     final long shadowTombstoneTs = base + 2L;
 
-    // Source: row K with NAME="alice" and NAME_VALUE=99, all at sourceTs.
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
-      scnSrc.createStatement().execute("UPSERT INTO " + uniqueTableName
-        + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
-      scnSrc.commit();
-    }
+    // Source: row K with NAME and NAME_VALUE at sourceTs.
+    upsertAtScnSource(sourceTs, "UPSERT INTO " + uniqueTableName
+      + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
 
-    // Target: row K with only NAME_VALUE=99 at sourceTs (matches source's NAME_VALUE).
-    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), sourceTs)) {
-      scnTgt.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
-      scnTgt.commit();
-    }
-    // Plant a DeleteColumn tombstone on target's NAME at shadowTombstoneTs, which shadows any
-    // source mirror at ts <= shadowTombstoneTs.
+    // Target: only NAME_VALUE matching source; NAME is shadowed by the DeleteColumn below.
+    upsertAtScnTarget(sourceTs,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
     writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME",
       shadowTombstoneTs);
 
-    // Spin until wall-clock advances past the highest cell timestamp so --to-time
-    // (which defaults to currentTimeMillis()) covers our planted cells.
-    while (System.currentTimeMillis() <= shadowTombstoneTs) {
-      // spin
-    }
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
-      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
-      "--raw-scan");
+      String.valueOf(fromTime), "--to-time",
+      String.valueOf(waitUntilWallClockPasses(shadowTombstoneTs)), "--raw-scan");
 
     assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
-    assertTrue("Repair pass should succeed (shadowing is correctness-only, not a job error)",
-      result.repairJob.isSuccessful());
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    Counters repairCounters = result.repairJob.getCounters();
-    // Source's NAME mirror is suppressed by the shadow → no cell counter ticks; row is unrepairable.
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
     assertRepairCellCounters(repairCounters, 0, 0, 0, 1);
     assertTrue("At least one mapper should roll up to UNREPAIRABLE",
-      repairCounters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1);
+      repairCounters.mappersUnrepairable >= 1);
 
-    // Post-repair, target's read view of NAME for row K is still null (DeleteColumn at T=300
-    // covers everything <= T=300 — including any source mirror we *might* have written). The
-    // assertion validates the repair refused to write the doomed Put.
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+    assertTargetNameNull(uniqueTableName, rowId);
+
+    // Source NAME='alice' but target NAME=null — Phoenix SELECT must still diverge.
+    verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
+  }
+
+  /**
+   * A chunk that lands in UNREPAIRABLE due to a shadowing target
+   * tombstone must recover after the operator runs a major compaction on target and re-runs
+   * sync. Pass 1 reproduces the partial-shadow setup and asserts UNREPAIRABLE; major compact
+   * on target reaps the standalone DeleteColumn; pass 2 mirrors the source Put cleanly and
+   * converges.
+   */
+  @Test
+  public void testUnrepairableRecoversAfterMajorCompactionOnTarget() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+
+    long fromTime = 0L;
+    final long sourceTs = base + 1L;
+    final long shadowTombstoneTs = base + 2L;
+
+    // Source: row K with NAME and NAME_VALUE at sourceTs.
+    upsertAtScnSource(sourceTs, "UPSERT INTO " + uniqueTableName
+      + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
+    // Target: only NAME_VALUE matching source; NAME is shadowed by the DeleteColumn below.
+    upsertAtScnTarget(sourceTs,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
+    writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME",
+      shadowTombstoneTs);
+
+    // Pass 1: shadow makes the chunk UNREPAIRABLE.
+    long pass1ToTime = waitUntilWallClockPasses(shadowTombstoneTs);
+    RepairRunResult pass1 = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(pass1ToTime), "--raw-scan");
+
+    assertTrue(pass1.dryRunJob.isSuccessful());
+    assertTrue(pass1.repairJob.isSuccessful());
+
+    SyncCountersResult pass1Counters = getSyncCounters(pass1.repairJob);
+    assertRepairCellCounters(pass1Counters, 0, 0, 0, 1);
+    assertTrue(pass1Counters.mappersUnrepairable >= 1);
+    assertTrue(countCheckpointsByStatus(
+      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null),
+      PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE) >= 1);
+
+    // Target must not be corrupted: NAME stays null while the shadow exists.
+    assertTargetNameNull(uniqueTableName, rowId);
+    // Pass 1 left the row in an unrepairable state — Phoenix SELECT must still diverge.
+    verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
+
+    // Operator recovery: reap the tombstone on target, wipe checkpoints so the resume filter
+    // re-enters the chunk, then re-run.
+    flushAndMajorCompact(CLUSTERS.getHBaseCluster2(), uniqueTableName);
+    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
+
+    // Pass 2: tombstone is gone; mirror succeeds.
+    long pass2ToTime = waitUntilWallClockPasses(pass1ToTime);
+    RepairRunResult pass2 = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(pass2ToTime), "--raw-scan");
+
+    assertTrue(pass2.dryRunJob.isSuccessful());
+    assertTrue(pass2.repairJob.isSuccessful());
+
+    SyncCountersResult pass2Counters = getSyncCounters(pass2.repairJob);
+    assertRepairCellCounters(pass2Counters, 1, 0, 0, 0);
+    assertTrue(pass2Counters.mappersRepaired >= 1);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    assertTrue(countCheckpointsByStatus(
+      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null),
+      PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED) >= 1);
+
+    try (PreparedStatement ps = targetConnection.prepareStatement(
+      "SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) {
       ps.setInt(1, rowId);
       try (ResultSet rs = ps.executeQuery()) {
         assertTrue(rs.next());
-        assertNull("NAME should still be null on target — shadow was respected",
-          rs.getString(1));
+        assertEquals("alice", rs.getString(1));
+        assertEquals(99L, rs.getLong(2));
       }
     }
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
   }
 
   /**
-   * P2 (cell-missing branch): same row exists on both sides, source has an extra column the
-   * target lacks. {@code generateMutationForDiffCells} should mirror it through the
-   * {@code cellMissing++} branch (source-only cell, no shadow on target).
+   * Cell missing on target: source has an extra column, target lacks it. Repair mirrors the
+   * source cell through the {@code cellMissing++} branch.
    */
   @Test
   public void testRepairCellMissingOnTarget() throws Exception {
@@ -1797,46 +1876,30 @@ public void testRepairCellMissingOnTarget() throws Exception {
 
     final long ts = base + 1L;
 
-    // Source: row K with NAME and NAME_VALUE.
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) {
-      scnSrc.createStatement().execute("UPSERT INTO " + uniqueTableName
-        + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
-      scnSrc.commit();
-    }
-    // Target: row K with only NAME_VALUE matching source. NAME is absent.
-    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) {
-      scnTgt.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
-      scnTgt.commit();
-    }
+    // Source: NAME and NAME_VALUE.
+    upsertAtScnSource(ts, "UPSERT INTO " + uniqueTableName
+      + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
+    // Target: only NAME_VALUE — NAME is missing.
+    upsertAtScnTarget(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
 
-    // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis())
-    // covers the planted cells.
-    while (System.currentTimeMillis() <= ts) {
-      // spin
-    }
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(System.currentTimeMillis()));
+      "--to-time", String.valueOf(waitUntilWallClockPasses(ts)));
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    assertRepairCellCounters(result.repairJob.getCounters(), 1, 0, 0, 0);
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
+    assertRepairCellCounters(repairCounters, 1, 0, 0, 0);
 
-    // Post-repair: target's NAME should equal source's "alice".
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
-      ps.setInt(1, rowId);
-      try (ResultSet rs = ps.executeQuery()) {
-        assertTrue(rs.next());
-        assertEquals("alice", rs.getString(1));
-      }
-    }
+    assertTargetName(uniqueTableName, rowId, "alice");
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
   }
 
   /**
-   * P2 (cell-extra branch): same row exists on both sides, target has an extra column source
-   * lacks. {@code generateMutationForDiffCells} should tombstone it via the
-   * {@code cellExtra++} branch ({@code tombstoneTargetCell} with {@code sourceMaxTs == null}
-   * → {@code Delete.addColumns}).
+   * Cell extra on target: target has a column source lacks. Repair tombstones it via the
+   * {@code cellExtra++} branch.
    */
   @Test
   public void testRepairCellExtraOnTarget() throws Exception {
@@ -1845,47 +1908,75 @@ public void testRepairCellExtraOnTarget() throws Exception {
 
     final long ts = base + 1L;
 
-    // Source: row K with only NAME_VALUE.
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) {
-      scnSrc.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
-      scnSrc.commit();
-    }
-    // Target: row K with same NAME_VALUE plus an extra raw NAME cell at the same ts.
-    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) {
-      scnTgt.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
-      scnTgt.commit();
-    }
+    // Source: only NAME_VALUE.
+    upsertAtScnSource(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
+    // Target: matching NAME_VALUE plus an extra raw NAME cell that source doesn't have.
+    upsertAtScnTarget(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
     writeRawCell(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", ts,
       Bytes.toBytes("bob"));
 
-    // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis())
-    // covers the planted cells.
-    while (System.currentTimeMillis() <= ts) {
-      // spin
-    }
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(System.currentTimeMillis()));
+      "--to-time", String.valueOf(waitUntilWallClockPasses(ts)));
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    assertRepairCellCounters(result.repairJob.getCounters(), 0, 1, 0, 0);
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
+    assertRepairCellCounters(repairCounters, 0, 1, 0, 0);
 
-    // Post-repair: target's NAME should be tombstoned and read as null.
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
-      ps.setInt(1, rowId);
-      try (ResultSet rs = ps.executeQuery()) {
-        assertTrue(rs.next());
-        assertNull(rs.getString(1));
-      }
-    }
+    assertTargetNameNull(uniqueTableName, rowId);
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
+  }
+
+  /**
+   * {@code tombstoneTargetCell} Case 2 ({@code sourceMaxTs >= ts}): target has a stray older
+   * version of a column that source also has at a higher ts. Repair must point-Delete the
+   * stray version only — no hidden-version sweep — so the visible NAME stays at source's value.
+   */
+  @Test
+  public void testRepairTombstonesTargetExtraVersionAtSameColumn() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 2, "3, 7");
+
+    final long olderTs = base + 1L;
+    final long ts = base + 2L;
+
+    // Source and target: NAME='alice' at ts.
+    upsertAtScnBoth(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+    // Target also gets a stray older raw NAME@olderTs that source doesn't have.
+    writeRawCell(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", olderTs,
+      Bytes.toBytes("old"));
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
+      "--to-time", String.valueOf(waitUntilWallClockPasses(ts)), "--raw-scan",
+      "--read-all-versions");
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
+    // Only the stray older NAME cell counts as extra; sourceMaxTs >= ts ⇒ single point-Delete.
+    assertRepairCellCounters(repairCounters, 0, 1, 0, 0);
+
+    assertTargetName(uniqueTableName, rowId, "alice");
+
+    // Raw scan: exactly one Delete marker at olderTs, no Delete at ts.
+    RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId);
+    assertEquals("Exactly one tombstone for the stray older NAME version", 1,
+      summary.totalDeletes());
+    assertEquals("Tombstone must target stray version only", olderTs,
+      (long) summary.deleteTimestamps.get(0));
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
   }
 
   /**
-   * P2 (cell-different branch): same row, same {@code (cf, q, ts)} coords, different value.
-   * {@code generateMutationForDiffCells} should hit the {@code cellDifferent++} branch via the
-   * {@code !matchingValue} check at the head of the loop.
+   * Cell different on target: same {@code (cf, q, ts)}, different value. Repair hits the
+   * {@code cellDifferent++} branch.
    */
   @Test
   public void testRepairCellDifferentValue() throws Exception {
@@ -1894,48 +1985,94 @@ public void testRepairCellDifferentValue() throws Exception {
 
     final long ts = base + 1L;
 
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) {
-      scnSrc.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
-      scnSrc.commit();
-    }
-    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) {
-      scnTgt.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')");
-      scnTgt.commit();
-    }
+    // Source: NAME='alice' at ts.
+    upsertAtScnSource(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+    // Target: NAME='bob' at the same ts — same (cf, q, ts), different value.
+    upsertAtScnTarget(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')");
 
-    // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis())
-    // covers the planted cells.
-    while (System.currentTimeMillis() <= ts) {
-      // spin
-    }
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(System.currentTimeMillis()));
+      "--to-time", String.valueOf(waitUntilWallClockPasses(ts)));
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    assertRepairCellCounters(result.repairJob.getCounters(), 0, 0, 1, 0);
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
+    assertRepairCellCounters(repairCounters, 0, 0, 1, 0);
 
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+    assertTargetName(uniqueTableName, rowId, "alice");
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
+  }
+
+  /**
+   * Multi-column-family repair: drift simultaneously in {@code CF1} (cell missing on target)
+   * and {@code CF2} (cell extra on target). Pins that cell-level repair scopes mutations to
+   * the correct family — the {@code (family, qualifier)} {@code ColumnKey} keying must keep
+   * the two families' cells from clobbering each other.
+   */
+  @Test
+  public void testRepairAcrossMultipleColumnFamilies() throws Exception {
+    final int rowId = 5;
+    long base = createMultiColumnFamilyTableOnBothClusters(uniqueTableName);
+
+    final long ts = base + 1L;
+
+    // Source: A in CF1 set, B in CF2 unset.
+    upsertAtScnSource(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, CF1.A) VALUES (" + rowId + ", 'a-src')");
+    // Target: A missing in CF1, B set in CF2 (extra) — drift in both families at once.
+    upsertAtScnTarget(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, CF2.B) VALUES (" + rowId + ", 'b-tgt')");
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
+      "--to-time", String.valueOf(waitUntilWallClockPasses(ts)));
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    Counters c = result.repairJob.getCounters();
+    // CF1.A missing → +1, CF2.B extra → +1; nothing different / unrepairable.
+    assertTrue("CF1.A must mirror as missing",
+      c.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue() >= 1);
+    assertTrue("CF2.B must tombstone as extra",
+      c.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 1);
+    assertEquals("No row should be unrepairable", 0,
+      c.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue());
+
+    try (PreparedStatement ps = targetConnection.prepareStatement(
+      "SELECT CF1.A, CF2.B FROM " + uniqueTableName + " WHERE ID = ?")) {
       ps.setInt(1, rowId);
       try (ResultSet rs = ps.executeQuery()) {
         assertTrue(rs.next());
-        assertEquals("alice", rs.getString(1));
+        assertEquals("CF1.A mirrored from source", "a-src", rs.getString(1));
+        assertNull("CF2.B tombstoned to match source", rs.getString(2));
       }
     }
+
+    // Multi-CF schema doesn't match verifyDataIdentical's standard query — compare inline.
+    assertEquals("Source/target rows must match across both column families",
+      collectMultiCfRows(sourceConnection, uniqueTableName),
+      collectMultiCfRows(targetConnection, uniqueTableName));
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
+  }
+
+  private List<String> collectMultiCfRows(Connection conn, String tableName) throws SQLException {
+    List<String> rows = new ArrayList<>();
+    try (Statement stmt = conn.createStatement();
+      ResultSet rs = stmt.executeQuery(
+        "SELECT ID, CF1.A, CF2.B FROM " + tableName + " ORDER BY ID")) {
+      while (rs.next()) {
+        rows.add(rs.getInt(1) + "|" + rs.getString(2) + "|" + rs.getString(3));
+      }
+    }
+    return rows;
   }
 
   /**
-   * P6 (asymmetric load-target time-range): Source has Put at T=200 inside the user's window;
-   * target has a {@code DeleteColumn} planted at T=600 — strictly above {@code --to-time T=500}.
-   * The repair scan honors {@code --to-time} and never sees the tombstone in the diff window, so
-   * the diff routes to {@code mirrorWholeRow} in
-   * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer}. Inside,
-   * {@code TargetRowRecord.load} deliberately uses {@code (fromTime, MAX_VALUE)} — so it
-   * still sees the T=600 tombstone and {@code wouldShadow} returns true on Source's Put@T=200
-   * (DeleteColumn covers ts &lt;= T=600). Result: source's mirror is suppressed, the row is
-   * flagged unrepairable.
+   * Tombstone planted strictly above {@code --to-time}: the diff scan can't see it, but
+   * {@code TargetRowRecord.load} (which uses {@code [fromTime, MAX_VALUE]}) does — so
+   * {@code wouldShadow} suppresses source's NAME mirror. The empty-key cell still mirrors,
+   * giving the row visible existence; NAME stays null.
    */
   @Test
   public void testRepairShadowFromTombstoneAboveToTime() throws Exception {
@@ -1947,60 +2084,35 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception {
     final long toTime = base + 2L;
     final long tombstoneTs = base + 3L;
 
-    // Source has the row inside the diff window.
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
-      scnSrc.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
-      scnSrc.commit();
-    }
-    // Target has a tombstone strictly above --to-time. Diff scan won't see it;
-    // TargetRowRecord.load still will.
+    // Source: NAME='alice' at sourceTs (within --to-time window).
+    upsertAtScnSource(sourceTs,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+    // Target: DeleteColumn on NAME at tombstoneTs > --to-time — invisible to the diff scan.
     writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME",
       tombstoneTs);
 
-    // Phoenix requires --to-time <= currentTimeMillis() at tool-run. Spin until wall-clock
-    // moves past tombstoneTs (the highest cell ts in this test) so toTime is unambiguously in
-    // the past.
-    while (System.currentTimeMillis() <= tombstoneTs) {
-      // spin
-    }
+    waitUntilWallClockPasses(tombstoneTs);
 
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan");
-    assertTrue("Repair should succeed (shadowing is correctness-only)",
-      result.repairJob.isSuccessful());
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    Counters c = result.repairJob.getCounters();
-    // Phoenix UPSERT plants NAME *and* the empty-key cell ("_0"). DeleteColumn shadows only NAME —
-    // "_0" mirrors through (rowsMissing++), and the row is unrepairable because NAME was suppressed.
-    assertRepairRowCounters(c, 1, 0, 1);
-    assertTrue("At least one mapper should roll up to UNREPAIRABLE",
-      c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1);
+    SyncCountersResult c = getSyncCounters(result.repairJob);
+    // The empty-key cell mirrors (rowsMissing++); NAME is shadow-suppressed (rowsCannotRepair++).
+    assertRowDriftCounters(c, 1, 0, 0, 1);
+    assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1);
 
-    // Post-repair: target's NAME should still read as null. The shadow was respected, so no Put
-    // for NAME landed (only the empty-key cell, which gives the row visible existence with NAME
-    // covered by the DeleteColumn tombstone above it).
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
-      ps.setInt(1, rowId);
-      try (ResultSet rs = ps.executeQuery()) {
-        assertTrue("Row exists on target via mirrored empty-key cell", rs.next());
-        assertNull("NAME should still be null — DeleteColumn shadow respected", rs.getString(1));
-      }
-    }
+    assertTargetNameNull(uniqueTableName, rowId);
+
+    // NAME shadow leaves target NAME=null while source has 'alice' — Phoenix SELECT diverges.
+    verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
   }
 
   /**
-   * Shadow detection via {@code DeleteFamily}: target has a {@code DeleteFamily} tombstone on
-   * cf {@code "0"} covering every qualifier in the family at {@code ts <= tombstoneTs}. Source
-   * has every cell of the row inside the diff window at {@code ts < tombstoneTs}, so each
-   * source cell would be shadowed if mirrored. Drives the
-   * {@code TargetRowRecord.deleteFamilyUpperBound} branch in {@code wouldShadow} — uncovered
-   * by other shadow ITs which only exercise {@code DeleteColumn}.
-   *
-   * <p>To force the {@code cmp < 0} (whole-row mirror) path, the tombstone is planted strictly
-   * above {@code --to-time} so the diff scan does not see target's row at all, but
-   * {@code TargetRowRecord.load} (range {@code [fromTime, MAX_VALUE]}) still surfaces it.
+   * Shadow via {@code DeleteFamily}: tombstone covers every qualifier in cf {@code "0"} at
+   * {@code ts <= tombstoneTs}, planted strictly above {@code --to-time} so only
+   * {@code TargetRowRecord.load} sees it. Every source cell mirror is suppressed → row rolls
+   * up unrepairable.
    */
   @Test
   public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception {
@@ -2012,58 +2124,34 @@ public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception {
     final long toTime = base + 2L;
     final long familyTombstoneTs = base + 3L;
 
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
-      scnSrc.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
-      scnSrc.commit();
-    }
-    // DeleteFamily on cf "0" — covers every qualifier (NAME, NAME_VALUE, _0, ...) at
-    // ts <= familyTombstoneTs. Planted strictly above --to-time so the diff scan can't see it.
+    // Source: NAME='alice' at sourceTs.
+    upsertAtScnSource(sourceTs,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+    // Target: DeleteFamily on cf "0" above --to-time — covers every source qualifier on load.
     writeRawDeleteFamily(targetConnection, uniqueTableName, integerRowKey(rowId), "0",
       familyTombstoneTs);
 
-    while (System.currentTimeMillis() <= familyTombstoneTs) {
-      // spin
-    }
+    waitUntilWallClockPasses(familyTombstoneTs);
 
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan");
-    assertTrue("Repair should succeed (shadowing is correctness-only)",
-      result.repairJob.isSuccessful());
-
-    Counters c = result.repairJob.getCounters();
-    // Both source cells (NAME and _0) live at sourceTs in cf "0"; DeleteFamily covers the whole
-    // family at ts <= familyTombstoneTs (sourceTs < familyTombstoneTs), so every mirror is
-    // suppressed → mirrorWholeRow returns FULLY_SHADOWED → rowsMissing stays 0,
-    // rowsCannotRepair++.
-    assertRepairRowCounters(c, 0, 0, 1);
-    assertTrue("At least one mapper should roll up to UNREPAIRABLE",
-      c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1);
-
-    // Post-repair: target should still have no visible row — every source cell was suppressed.
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
-      ps.setInt(1, rowId);
-      try (ResultSet rs = ps.executeQuery()) {
-        assertFalse("Row should not be visible on target — DeleteFamily covered every source cell",
-          rs.next());
-      }
-    }
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    SyncCountersResult c = getSyncCounters(result.repairJob);
+    assertRowDriftCounters(c, 0, 0, 0, 1);
+    assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1);
+
+    assertTargetRowAbsent(uniqueTableName, rowId);
+
+    // Source has the row, target's family is shadowed away — Phoenix SELECT must diverge.
+    verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
   }
 
   /**
-   * Shadow detection via {@code DeleteFamilyVersion}: target has a {@code DeleteFamilyVersion}
-   * tombstone on cf {@code "0"} at exactly {@code sourceTs}. Source's cells at the same ts get
-   * shadowed because {@code DeleteFamilyVersion} matches every qualifier in the family at the
-   * exact ts. Drives the {@code TargetRowRecord.deleteFamilyVersionTs} branch in
-   * {@code wouldShadow} — also uncovered prior to this test.
-   *
-   * <p>{@code DeleteFamilyVersion} requires ts equality (not inequality) so the
-   * tombstone-above-{@code toTime} trick used in the {@code DeleteColumn}/{@code DeleteFamily}
-   * shadow tests doesn't apply here. Instead we omit {@code --raw-scan}: target has no live
-   * cells (just the tombstone), so without raw mode the diff scan sees target as empty,
-   * routing to the {@code cmp < 0} mirrorWholeRow path; {@code TargetRowRecord.load} runs
-   * raw internally and surfaces the tombstone for shadow detection.
+   * Shadow via {@code DeleteFamilyVersion}: tombstone matches every qualifier in cf {@code "0"}
+   * at exactly {@code sourceTs}. Run without {@code --raw-scan} so the diff scan sees target as
+   * empty (no live cells); {@code TargetRowRecord.load} runs raw internally and still surfaces
+   * the tombstone for the {@code wouldShadow} check.
    */
   @Test
   public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception {
@@ -2073,48 +2161,33 @@ public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception {
     final long fromTime = 0L;
     final long sourceTs = base + 1L;
 
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
-      scnSrc.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
-      scnSrc.commit();
-    }
-    // DeleteFamilyVersion on cf "0" at exactly sourceTs — covers every qualifier in the family
-    // at ts == sourceTs. Source's NAME and _0 cells, both Put at sourceTs, are shadow targets.
+    // Source: NAME='alice' at sourceTs.
+    upsertAtScnSource(sourceTs,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
+    // Target: DeleteFamilyVersion at exactly sourceTs — shadows every source cell at that ts.
     writeRawDeleteFamilyVersion(targetConnection, uniqueTableName, integerRowKey(rowId), "0",
       sourceTs);
 
-    while (System.currentTimeMillis() <= sourceTs) {
-      // spin
-    }
-
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
-      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()));
-    assertTrue("Repair should succeed (shadowing is correctness-only)",
-      result.repairJob.isSuccessful());
+      String.valueOf(fromTime), "--to-time",
+      String.valueOf(waitUntilWallClockPasses(sourceTs)));
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    Counters c = result.repairJob.getCounters();
-    assertRepairRowCounters(c, 0, 0, 1);
-    assertTrue("At least one mapper should roll up to UNREPAIRABLE",
-      c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1);
+    SyncCountersResult c = getSyncCounters(result.repairJob);
+    assertRowDriftCounters(c, 0, 0, 0, 1);
+    assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1);
 
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
-      ps.setInt(1, rowId);
-      try (ResultSet rs = ps.executeQuery()) {
-        assertFalse("Row should not be visible on target — DeleteFamilyVersion shadowed every "
-          + "source cell at sourceTs", rs.next());
-      }
-    }
+    assertTargetRowAbsent(uniqueTableName, rowId);
+
+    // Source has 'alice', target's row is fully shadowed — Phoenix SELECT must diverge.
+    verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
   }
 
   /**
-   * Multi-hidden-version unwinding: extends {@link #testRepairUnwindsHiddenTargetVersions}
-   * with TWO max-versions-hidden Puts beneath target's visible Put. Pins
-   * {@code targetPutTimestampsBetween} (in {@link
-   * org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.TargetRowRecord})
-   * end-to-end: when {@code sourceMaxTs < target visible ts}, the repairer must point-Delete
-   * the visible ts AND every hidden Put in {@code (sourceMaxTs, target visible ts)} —
-   * otherwise after we shadow the visible cell, a hidden Put surfaces above source's mirror.
+   * Multi-hidden-version unwinding: target has THREE NAME versions (two hidden, one visible)
+   * above source's single Put. The repairer must point-Delete the visible Put AND every hidden
+   * Put in {@code (sourceMaxTs, visibleTs)} — otherwise unwinding the visible cell surfaces a
+   * hidden Put above source's mirror.
    */
   @Test
   public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception {
@@ -2127,114 +2200,54 @@ public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception {
     final long targetT2 = base + 3L;
     final long targetT3 = base + 4L;
 
-    byte[] rowKey = integerRowKey(rowId);
-    String family = "0";
-    String qualifier = "NAME";
-
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) {
-      scnSrc.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
-      scnSrc.commit();
-    }
-
-    // Three target NAME versions, all retained under VERSIONS=3:
-    //   T1 "bob" (hidden), T2 "carol" (hidden), T3 "dave" (visible).
-    try (Connection scnTgtT1 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT1)) {
-      scnTgtT1.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')");
-      scnTgtT1.commit();
-    }
-    try (Connection scnTgtT2 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT2)) {
-      scnTgtT2.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')");
-      scnTgtT2.commit();
-    }
-    try (Connection scnTgtT3 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT3)) {
-      scnTgtT3.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'dave')");
-      scnTgtT3.commit();
-    }
+    // Source: single NAME Put.
+    upsertAtScnSource(sourceTs,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')");
 
-    // Sanity: pre-repair target visible NAME is "dave" (newest of the three).
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
-      ps.setInt(1, rowId);
-      try (ResultSet rs = ps.executeQuery()) {
-        assertTrue(rs.next());
-        assertEquals("Pre-repair target visible NAME should be dave", "dave", rs.getString(1));
-      }
-    }
+    // Target: three NAME versions retained under VERSIONS=3 — "bob"@T1 (hidden), "carol"@T2
+    // (hidden), "dave"@T3 (visible).
+    upsertAtScnTarget(targetT1,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')");
+    upsertAtScnTarget(targetT2,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')");
+    upsertAtScnTarget(targetT3,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'dave')");
 
-    while (System.currentTimeMillis() <= targetT3) {
-      // spin
-    }
+    assertTargetName(uniqueTableName, rowId, "dave");
 
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
-      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
+      String.valueOf(fromTime), "--to-time", String.valueOf(waitUntilWallClockPasses(targetT3)),
       "--read-all-versions");
     assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    Counters repairCounters = result.repairJob.getCounters();
-    // Source has one NAME Put@sourceTs; target has three NAME Puts at T1, T2, T3 (all >
-    // sourceTs). Each target NAME cell drives the cellExtra branch (sourceMaxTs < ts) and the
-    // hidden-version unwinding logic point-Deletes every Put in (sourceTs, ts).
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
     assertTrue("At least 3 cells should be tombstoned across target's three NAME versions",
-      repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 3);
-
-    // Post-repair: standard read on target must see source's "alice" — every hidden version
-    // ("bob"@T1, "carol"@T2) was unwound along with the visible "dave"@T3.
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
-      ps.setInt(1, rowId);
-      try (ResultSet rs = ps.executeQuery()) {
-        assertTrue(rs.next());
-        assertEquals("Post-repair target NAME must be alice (all hidden versions unwound)",
-          "alice", rs.getString(1));
-      }
+      repairCounters.cellsExtraOnTarget >= 3);
+
+    assertTargetName(uniqueTableName, rowId, "alice");
+
+    // Lower bound on delete markers: the unwind iterates over shrinking intervals so T1 may
+    // appear in both T3's and T2's hidden sets — distinct-marker count can exceed 3.
+    RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId);
+    assertTrue("Expected at least 3 NAME delete markers on target, saw "
+      + summary.totalDeletes(), summary.totalDeletes() >= 3);
+    int namePutAtSourceTs = 0;
+    for (Long ts : summary.putTimestamps) {
+      if (ts == sourceTs) namePutAtSourceTs++;
     }
+    assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs);
 
-    // Raw scan: target should have a Delete marker covering each of T1/T2/T3 plus source's
-    // mirror. The exact tombstone cell count may include duplicates from the unwind logic
-    // (each iteration's hidden-set spans a shrinking interval, so T1 appears in T3's hidden
-    // set, then again in T2's), so assert a lower bound on distinct delete-bearing scan cells.
-    try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) {
-      Scan scan = new Scan().withStartRow(rowKey, true).withStopRow(rowKey, true).setRaw(true);
-      scan.readAllVersions();
-      int nameDeletes = 0;
-      int namePutAtSourceTs = 0;
-      try (ResultScanner sc = targetHTable.getScanner(scan)) {
-        for (Result r; (r = sc.next()) != null;) {
-          for (Cell c : r.rawCells()) {
-            if (Bytes.equals(CellUtil.cloneFamily(c), Bytes.toBytes(family))
-              && Bytes.equals(CellUtil.cloneQualifier(c), Bytes.toBytes(qualifier))) {
-              if (CellUtil.isDelete(c)) {
-                nameDeletes++;
-              } else if (c.getTimestamp() == sourceTs) {
-                namePutAtSourceTs++;
-              }
-            }
-          }
-        }
-      }
-      assertTrue("Expected at least 3 NAME delete markers on target, saw " + nameDeletes,
-        nameDeletes >= 3);
-      assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs);
-    }
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
   }
 
   /**
-   * cmp==0 row carrying a target-only tombstone cell at a coord source lacks: drives the
-   * {@code tombstoneTargetCell} return-{@code false} branch ({@link
-   * org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer}, around line 420)
-   * inside {@code generateMutationForDiffCells}'s {@code cellExtra} branch — the target cell
-   * is itself a tombstone, so no new tombstone is emitted, but {@code anyCellUnrepairable} is
-   * set. The row contributes to {@code ROWS_CANNOT_REPAIR} without bumping any cell counter.
-   *
-   * <p>Setup: source and target share a matching {@code NAME_VALUE} cell so the row exists on
-   * both. Target also has a raw point-{@link Delete} on {@code NAME} at a coord source lacks;
-   * with {@code --raw-scan} the diff scan surfaces that tombstone cell, taking the
-   * {@code cmp > 0} branch on it.
+   * Same row on both sides via a matching NAME_VALUE; target also carries a raw point-Delete on
+   * NAME that source lacks. Under {@code --raw-scan} the tombstone surfaces as a target-extra
+   * cell, but {@code tombstoneTargetCell} can't tombstone a tombstone — row rolls up
+   * unrepairable with no cell counter ticks.
    */
   @Test
   public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception {
@@ -2244,52 +2257,34 @@ public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception {
     final long ts = base + 1L;
     final long tombstoneTs = base + 2L;
 
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) {
-      scnSrc.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
-      scnSrc.commit();
-    }
-    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) {
-      scnTgt.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
-      scnTgt.commit();
-    }
-    // Plant a raw point-Delete on NAME at tombstoneTs — a coord source has no cell at, but the
-    // target cell is itself a tombstone, so tombstoneTargetCell returns false in repair-mode.
+    // Source: NAME_VALUE only.
+    upsertAtScnSource(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
+    // Target: matching NAME_VALUE plus a raw point-Delete on NAME — surfaces under --raw-scan.
+    upsertAtScnTarget(ts,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
     writeRawPointDelete(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME",
       tombstoneTs);
 
-    while (System.currentTimeMillis() <= tombstoneTs) {
-      // spin
-    }
-
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(System.currentTimeMillis()), "--raw-scan");
+      "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    Counters c = result.repairJob.getCounters();
-    // No mirror, no tombstone emitted — but anyCellUnrepairable was set, so the row rolls up
-    // as unrepairable. All cell counters stay 0.
+    SyncCountersResult c = getSyncCounters(result.repairJob);
     assertRepairCellCounters(c, 0, 0, 0, 1);
-    assertRepairRowCounters(c, 0, 0, 1);
-    assertTrue("At least one mapper should roll up to UNREPAIRABLE",
-      c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1);
+    assertRowDriftCounters(c, 0, 0, 0, 1);
+    assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1);
   }
 
   /**
-   * P7 (mid-row repair-batch flush boundary): drives many missing-row mirrors through a tiny
-   * {@code repairBatchSize=2} so {@code generateMutationForDiffRows} flushes mid-stream multiple
-   * times. Validates that every row converges despite the mid-flush boundary — i.e., no Put
-   * gets dropped because pendingPuts/pendingDeletes were drained mid-iteration.
+   * Mid-row repair flush: 8 missing source-only rows through {@code repairBatchSize=2} so
+   * {@code generateMutationForDiffRows} flushes mid-stream. Pins that no Put is dropped at a
+   * batch boundary.
    */
   @Test
   public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception {
-    // No replication — seed source manually so target legitimately lacks the rows.
     createRepairTestTableOnBothClusters(uniqueTableName, 1, null);
 
-    // Introduce extra rows on source that target lacks. Each row → at least 2 cells (NAME and the
-    // empty-key cell), so a batch size of 2 forces a flush every row, exercising the mid-stream
-    // flush in generateMutationForDiffRows.
     int[] sourceOnlyIds = new int[] { 100, 101, 102, 103, 104, 105, 106, 107 };
     String[] sourceOnlyNames = new String[sourceOnlyIds.length];
     for (int i = 0; i < sourceOnlyIds.length; i++) {
@@ -2303,47 +2298,34 @@ public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception {
 
     Configuration conf = sourceClusterConfWithRepairBatchSize(2);
 
-    // Stage 1: dry-run.
     Job dryRunJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--dry-run",
       "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     assertTrue("Dry-run should succeed", dryRunJob.isSuccessful());
     SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob);
     assertTrue("Dry-run should detect mismatched chunks", dryRunCounters.chunksMismatched >= 1);
 
-    // Stage 2: repair with the same small batch size.
     Job repairJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     assertTrue("Repair should succeed despite small batch size", repairJob.isSuccessful());
 
-    Counters repairCounters = repairJob.getCounters();
+    SyncCountersResult repairCounters = getSyncCounters(repairJob);
     assertTrue("All source-only rows should be marked missing on target",
-      repairCounters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue()
-          >= sourceOnlyIds.length);
-    assertEquals("No row should be flagged unrepairable", 0,
-      repairCounters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue());
+      repairCounters.rowsMissingOnTarget >= sourceOnlyIds.length);
+    assertEquals("No row should be flagged unrepairable", 0, repairCounters.rowsCannotRepair);
 
-    // Verify each source-only row landed on target with the right NAME.
     for (int i = 0; i < sourceOnlyIds.length; i++) {
-      try (PreparedStatement ps = targetConnection
-        .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
-        ps.setInt(1, sourceOnlyIds[i]);
-        try (ResultSet rs = ps.executeQuery()) {
-          assertTrue("Row " + sourceOnlyIds[i] + " should exist on target after repair",
-            rs.next());
-          assertEquals("Row " + sourceOnlyIds[i] + " NAME should match source",
-            sourceOnlyNames[i], rs.getString(1));
-        }
-      }
+      assertTargetName(uniqueTableName, sourceOnlyIds[i], sourceOnlyNames[i]);
     }
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
   }
 
   /**
-   * P8 ({@code --raw-scan} + {@code --read-all-versions} interplay): a multi-version row on
-   * source that includes an in-window {@code DeleteColumn} between two Puts. Target lags with
-   * only the older Put. Repair must mirror the missing tombstone (preserving its subtype via
-   * {@code mirrorSourceCell} in
-   * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer} routing Delete cells
-   * through {@code Delete#add}) and the missing newer Put.
+   * {@code --raw-scan} + {@code --read-all-versions}: source has Put@T1, DeleteColumn@T2,
+   * Put@T3; target has only Put@T1. Repair must mirror the missing tombstone (via
+   * {@code mirrorSourceCell} routing Delete cells through {@code Delete#add}) and the missing
+   * newer Put.
    */
   @Test
   public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exception {
@@ -2356,95 +2338,191 @@ public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exceptio
     final long t3 = base + 3L;
 
     // Source: Put@T1 → DeleteColumn@T2 → Put@T3.
-    try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), t1)) {
-      scnSrc.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')");
-      scnSrc.commit();
-    }
+    upsertAtScnSource(t1,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')");
     writeRawDeleteColumn(sourceConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", t2);
-    try (Connection scnSrc2 = openConnectionAtScn(CLUSTERS.getZkUrl1(), t3)) {
-      scnSrc2.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v3')");
-      scnSrc2.commit();
-    }
+    upsertAtScnSource(t3,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v3')");
 
     // Target: only the oldest Put@T1.
-    try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), t1)) {
-      scnTgt.createStatement().execute(
-        "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')");
-      scnTgt.commit();
-    }
+    upsertAtScnTarget(t1,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')");
 
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
       "--raw-scan", "--read-all-versions");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    Counters c = result.repairJob.getCounters();
-    // Source has NAME@T1 Put, NAME@T2 DeleteColumn, NAME@T3 Put + empty-key@T1 and @T3. Target has
-    // only NAME@T1 + empty-key@T1, so 3 cells missing: NAME-tombstone@T2, NAME-Put@T3, empty-key@T3.
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult c = getSyncCounters(result.repairJob);
+    // 3 missing: NAME tombstone@T2, NAME Put@T3, empty-key@T3.
     assertRepairCellCounters(c, 3, 0, 0, 0);
 
-    // Post-repair raw scan on target should show: Put@T3, DeleteColumn@T2, Put@T1 for the NAME
-    // qualifier (rawCells reverse-ts ordered).
-    int observedPuts = 0;
-    int observedDeleteColumns = 0;
-    long observedNewestPutTs = -1L;
-    try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) {
-      Scan scan = new Scan();
-      scan.withStartRow(integerRowKey(rowId), true);
-      scan.withStopRow(integerRowKey(rowId), true);
-      scan.setRaw(true);
-      scan.readAllVersions();
-      try (ResultScanner scanner = targetHTable.getScanner(scan)) {
-        Result r = scanner.next();
-        assertNotNull("Target row should exist", r);
-        for (Cell cell : r.rawCells()) {
-          if (Bytes.equals(CellUtil.cloneQualifier(cell), Bytes.toBytes("NAME"))) {
-            if (CellUtil.isDelete(cell)) {
-              observedDeleteColumns++;
-            } else {
-              observedPuts++;
-              observedNewestPutTs = Math.max(observedNewestPutTs, cell.getTimestamp());
-            }
-          }
-        }
+    RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId);
+    assertEquals("Target should have both NAME Puts after repair", 2, summary.puts);
+    assertEquals("Target should have the mirrored DeleteColumn after repair", 1,
+      summary.deleteColumns);
+    assertEquals("Newest mirrored Put should sit at T3", t3, summary.newestPutTs);
+
+    // Visible read sees "v3" — newest Put@T3 sits above the DeleteColumn@T2.
+    assertTargetName(uniqueTableName, rowId, "v3");
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c);
+  }
+
+  /**
+   * Hidden source-only Put under {@code --read-all-versions}: source has Put(NAME)@T1 and
+   * Put(NAME)@T2; target has only Put(NAME)@T2. Verifier sees the older T1 cell only because
+   * {@code readAllVersions} surfaces it; repair must mirror the missing source@T1 cell at its
+   * original timestamp so target's history matches source.
+   */
+  @Test
+  public void testRepairMirrorsHiddenSourceVersionWhenTargetHasOnlyNewest() throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 2, "3, 7");
+
+    final long t1 = base + 1L;
+    final long t2 = base + 2L;
+
+    // Source: NAME='older'@T1 then NAME='newer'@T2 — both retained under VERSIONS=2.
+    upsertAtScnSource(t1,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'older')");
+    upsertAtScnSource(t2,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'newer')");
+    // Target: only the newer Put@T2; the older T1 version is missing.
+    upsertAtScnTarget(t2,
+      "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'newer')");
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
+      "--to-time", String.valueOf(waitUntilWallClockPasses(t2)), "--read-all-versions");
+    assertTrue("Repair should succeed", result.repairJob.isSuccessful());
+
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult c = getSyncCounters(result.repairJob);
+    // Older NAME version is the only drift; mirror via cmp<0 source-only path.
+    assertTrue("Hidden source NAME@T1 must mirror as missing", c.cellsMissingOnTarget >= 1);
+    assertEquals("No extra cells", 0, c.cellsExtraOnTarget);
+    assertEquals("No row should be unrepairable", 0, c.rowsCannotRepair);
+
+    // Visible NAME stays "newer"; raw-scan must surface both versions at original timestamps.
+    assertTargetName(uniqueTableName, rowId, "newer");
+    RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId);
+    assertTrue("Mirrored older NAME Put@T1 must land at original timestamp",
+      summary.putTimestamps.contains(t1));
+    assertTrue("Existing newer NAME Put@T2 must remain",
+      summary.putTimestamps.contains(t2));
+
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c);
+  }
+
+  /**
+   * Source-only point Delete (single-version tombstone) must mirror onto target via
+   * {@code mirrorSourceCell} → target's NAME at {@code ts} is shadowed; NAME_VALUE survives.
+   */
+  @Test
+  public void testRepairMirrorsSourcePointDeleteUnderRawScan() throws Exception {
+    runMirrorSourceTombstoneTest(SourceTombstone.POINT_DELETE);
+  }
+
+  /**
+   * Source-only DeleteFamily must mirror onto target → whole family at the target row is
+   * shadowed and the row drops out of Phoenix view.
+   */
+  @Test
+  public void testRepairMirrorsSourceDeleteFamilyUnderRawScan() throws Exception {
+    runMirrorSourceTombstoneTest(SourceTombstone.DELETE_FAMILY);
+  }
+
+  /**
+   * Source-only DeleteFamilyVersion must mirror onto target → all family cells at exactly
+   * {@code ts} are shadowed; preserves DFV semantics (not flattened to DeleteColumn).
+   */
+  @Test
+  public void testRepairMirrorsSourceDeleteFamilyVersionUnderRawScan() throws Exception {
+    runMirrorSourceTombstoneTest(SourceTombstone.DELETE_FAMILY_VERSION);
+  }
+
+  private enum SourceTombstone {
+    POINT_DELETE,
+    DELETE_FAMILY,
+    DELETE_FAMILY_VERSION
+  }
+
+  private void runMirrorSourceTombstoneTest(SourceTombstone subtype) throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+    final long ts = base + 1L;
+    // POINT_DELETE / DFV must hit cells at exactly `ts`; DeleteFamily covers ts <= markerTs.
+    final long tombstoneTs = subtype == SourceTombstone.DELETE_FAMILY ? base + 2L : ts;
+
+    for (String zkUrl : new String[] { CLUSTERS.getZkUrl1(), CLUSTERS.getZkUrl2() }) {
+      try (Connection scn = openConnectionAtScn(zkUrl, ts)) {
+        scn.createStatement().execute("UPSERT INTO " + uniqueTableName
+          + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
+        scn.commit();
       }
     }
-    assertEquals("Target should have both NAME Puts after repair", 2, observedPuts);
-    assertEquals("Target should have the mirrored DeleteColumn after repair", 1,
-      observedDeleteColumns);
-    assertEquals("Newest mirrored Put should sit at T3", t3, observedNewestPutTs);
 
-    // Read-side: NAME under default visibility should now be null (T3 Put → T2 DeleteColumn covers
-    // T1; visible state is "deleted but tombstone caps NAME"). Phoenix sees no value.
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) {
+    byte[] rk = integerRowKey(rowId);
+    switch (subtype) {
+      case POINT_DELETE:
+        writeRawPointDelete(sourceConnection, uniqueTableName, rk, "0", "NAME", tombstoneTs);
+        break;
+      case DELETE_FAMILY:
+        writeRawDeleteFamily(sourceConnection, uniqueTableName, rk, "0", tombstoneTs);
+        break;
+      case DELETE_FAMILY_VERSION:
+        writeRawDeleteFamilyVersion(sourceConnection, uniqueTableName, rk, "0", tombstoneTs);
+        break;
+      default:
+        throw new IllegalStateException("unhandled subtype: " + subtype);
+    }
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
+      "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan");
+    assertTrue(result.dryRunJob.isSuccessful());
+    assertTrue(result.repairJob.isSuccessful());
+
+    Counters c = result.repairJob.getCounters();
+    // The source-only Delete cell is missing on target → mirror it. DeleteFamily mirroring
+    // also covers the empty-key sentinel cell, hence the >= 1 assertion shape.
+    assertTrue("source tombstone must mirror as a missing cell on target",
+      c.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue() >= 1);
+    assertTrue(c.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue() >= 1);
+    assertNoMismatchedCheckpoints(uniqueTableName, null);
+
+    try (PreparedStatement ps = targetConnection.prepareStatement(
+      "SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) {
       ps.setInt(1, rowId);
       try (ResultSet rs = ps.executeQuery()) {
-        // The newest Put is T3 — reads see "v3" since T3 > tombstone T2.
-        assertTrue(rs.next());
-        assertEquals("v3", rs.getString(1));
+        if (subtype == SourceTombstone.POINT_DELETE) {
+          assertTrue(rs.next());
+          assertNull(rs.getString(1));
+          assertEquals(99L, rs.getLong(2));
+        } else {
+          // Family-wide tombstone (DELETE_FAMILY or DELETE_FAMILY_VERSION) drops the row.
+          assertFalse("row should not be visible after family-wide tombstone mirror",
+            rs.next());
+        }
       }
     }
+
+    // Source tombstone is now mirrored onto target; both clusters present the same Phoenix view.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
   }
 
   /**
-   * P9 (mixed Put+Delete batch under small {@code repairBatchSize}): many missing source rows AND
-   * many extra target rows in the same chunk. With {@code repairBatchSize=4}, most flushes
-   * straddle a Put/Delete boundary and exercise {@code flushRepairMutations} in
-   * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer} on its mixed
-   * {@code Table#batch} path. Validates that no mutation gets dropped at the batch boundary.
+   * Mixed Put+Delete batch under {@code repairBatchSize=4}: 5 source-only rows + 5 target-only
+   * rows in the same chunk, so most flushes straddle a Put/Delete boundary on
+   * {@code flushRepairMutations}'s mixed {@code Table#batch} path. Pins that no mutation drops
+   * at the batch boundary.
    */
   @Test
   public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception {
-    // No replication — seed each side independently so source-only and target-only rows truly
-    // diverge.
     createRepairTestTableOnBothClusters(uniqueTableName, 1, null);
 
-    // Add 5 source-only rows (will need Puts) and 5 target-only rows (will need Deletes), all
-    // inside the same chunk so they queue together in a single mapper's pendingPuts/pendingDeletes
-    // and get flushed as mixed batches.
     int[] sourceOnly = new int[] { 200, 201, 202, 203, 204 };
     String[] sourceOnlyNames = new String[] { "s200", "s201", "s202", "s203", "s204" };
     upsertRowsOnTarget(sourceConnection, uniqueTableName, sourceOnly, sourceOnlyNames);
@@ -2482,13 +2560,8 @@ public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception {
     assertEquals("No chunk should fail repair", 0,
       c.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue());
 
-    // Convergence pass: cleanup checkpoint and re-run a stable repair to assert no chunks are
-    // mismatched after the mixed-batch flushes.
-    cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
-    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    // Convergence pass on a clean checkpoint to confirm no chunks remain mismatched.
+    convergeAndAssertIdentical(uniqueTableName, fromTime, toTime);
   }
 
   @Test
@@ -2714,7 +2787,7 @@ public void testSyncTableWithMultipleVersionAndCompactionOnTarget() throws Excep
   }
 
   @Test
-  public void testSyncTableValidateWithSplitCoalescing() throws Exception {
+  public void testSyncTableWithSplitCoalescing() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 10);
 
     introduceAndVerifyTargetDifferences(uniqueTableName);
@@ -2732,18 +2805,19 @@ public void testSyncTableValidateWithSplitCoalescing() throws Exception {
 
     validateSyncCounters(counters, 10, 10, 7, 3);
     validateMapperCounters(counters, 1, 3);
+    assertRowDriftCounters(counters, 0, 0, 3, 0);
 
     // Verify checkpoint entries from the dry-run pass are created correctly.
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    validateCheckpointEntries(checkpointEntries, uniqueTableName, targetZkQuorum, 10, 10, 7, 3, 4,
-      3, null);
+    validateCheckpointEntries(uniqueTableName, null, counters, null);
 
     // Repair pass over the same window: MISMATCHED rows transition to REPAIRED in place.
-    runSyncTool(uniqueTableName, "--coalesce-split", "--from-time", String.valueOf(fromTime),
-      "--to-time", String.valueOf(toTime));
+    Job repairJob = runSyncTool(uniqueTableName, "--coalesce-split", "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    SyncCountersResult repairCounters = getSyncCounters(repairJob);
+    assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0);
+    assertRepairCellCounters(repairCounters, 6, 6, 0, 0);
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, counters, repairCounters);
   }
 
   /**
@@ -3180,6 +3254,21 @@ private void verifyDataIdentical(Connection sourceConn, Connection targetConn, S
     }
   }
 
+  /**
+   * Negative complement of {@link #verifyDataIdentical}: asserts that source and target return
+   * different row sets via Phoenix SELECT. Use after a repair pass that is expected to leave
+   * residual divergence (UNREPAIRABLE / REPAIR_FAILED) so the test pins both that the tool
+   * reported the right status and that the data actually didn't converge.
+   */
+  private void verifyDataDiverges(Connection sourceConn, Connection targetConn, String tableName)
+    throws SQLException {
+    String query = "SELECT ID, NAME, NAME_VALUE FROM " + tableName + " ORDER BY ID";
+    List<TestRow> sourceRows = queryAllRows(sourceConn, query);
+    List<TestRow> targetRows = queryAllRows(targetConn, query);
+    assertNotEquals("Source and target rows should still differ when repair could not converge",
+      sourceRows, targetRows);
+  }
+
   private void introduceAndVerifyTargetDifferences(String tableName) throws SQLException {
     upsertRowsOnTarget(targetConnection, tableName, new int[] { 2, 5, 8 },
       new String[] { "MODIFIED_NAME_2", "MODIFIED_NAME_5", "MODIFIED_NAME_8" });
@@ -3495,8 +3584,8 @@ private void splitTableAt(Connection conn, String tableName, List<Integer> split
   private List<PhoenixSyncTableCheckpointOutputRow> queryCheckpointTable(Connection conn,
     String tableName, String targetCluster, String tenantId) throws SQLException {
     List<PhoenixSyncTableCheckpointOutputRow> entries = new ArrayList<>();
-    String query = "SELECT TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, IS_DRY_RUN, "
-      + "START_ROW_KEY, END_ROW_KEY, EXECUTION_START_TIME, EXECUTION_END_TIME, "
+    String query = "SELECT TABLE_NAME, TENANT_ID, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, "
+      + "IS_DRY_RUN, START_ROW_KEY, END_ROW_KEY, EXECUTION_START_TIME, EXECUTION_END_TIME, "
       + "STATUS, COUNTERS FROM PHOENIX_SYNC_TABLE_CHECKPOINT "
       + "WHERE TABLE_NAME = ? AND TARGET_CLUSTER = ? "
       + (tenantId != null ? "AND TENANT_ID = ?" : "AND TENANT_ID IS NULL");
@@ -3514,7 +3603,8 @@ private List<PhoenixSyncTableCheckpointOutputRow> queryCheckpointTable(Connectio
       String statusStr = rs.getString("STATUS");
 
       PhoenixSyncTableCheckpointOutputRow entry = new PhoenixSyncTableCheckpointOutputRow.Builder()
-        .setTableName(rs.getString("TABLE_NAME")).setTargetCluster(rs.getString("TARGET_CLUSTER"))
+        .setTableName(rs.getString("TABLE_NAME")).setTenantId(rs.getString("TENANT_ID"))
+        .setTargetCluster(rs.getString("TARGET_CLUSTER"))
         .setType(typeStr != null ? PhoenixSyncTableCheckpointOutputRow.Type.valueOf(typeStr) : null)
         .setFromTime(rs.getLong("FROM_TIME")).setToTime(rs.getLong("TO_TIME"))
         .setIsDryRun(rs.getBoolean("IS_DRY_RUN")).setStartRowKey(rs.getBytes("START_ROW_KEY"))
@@ -3876,18 +3966,24 @@ private static String[] appendArg(String[] args, String newArg) {
   }
 
   /**
-   * After a repair pass, asserts that no CHUNK or REGION rows in the checkpoint table are
-   * still in MISMATCHED status. They should all have transitioned to REPAIRED, VERIFIED, or
-   * (when target rows are entirely tombstoned) UNREPAIRABLE.
+   * After a clean repair pass, asserts no checkpoint rows remain in any non-terminal or
+   * stuck state — MISMATCHED, UNREPAIRABLE, or REPAIR_FAILED. Tests that legitimately leave
+   * UNREPAIRABLE/REPAIR_FAILED rows should not call this helper.
    */
   private void assertNoMismatchedCheckpoints(String tableName, String tenantId)
     throws SQLException {
     List<PhoenixSyncTableCheckpointOutputRow> entries =
       queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId);
-    long mismatched = countCheckpointsByStatus(entries,
-      PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED);
-    assertEquals("After repair, no MISMATCHED checkpoint rows should remain for table "
-      + tableName + " tenant=" + tenantId, 0, mismatched);
+    String ctx = " (table=" + tableName + " tenant=" + tenantId + ")";
+    assertEquals("MISMATCHED rows must not remain after repair" + ctx, 0,
+      countCheckpointsByStatus(entries,
+        PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED));
+    assertEquals("UNREPAIRABLE rows must not remain after repair" + ctx, 0,
+      countCheckpointsByStatus(entries,
+        PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE));
+    assertEquals("REPAIR_FAILED rows must not remain after repair" + ctx, 0,
+      countCheckpointsByStatus(entries,
+        PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED));
   }
 
   /**
@@ -3976,8 +4072,11 @@ private Job runSyncToolWithChunkSize(String tableName, int chunkSize, Configurat
   private static class SyncCountersResult {
     public final long sourceRowsProcessed;
     public final long targetRowsProcessed;
-    public final long chunksMismatched;
     public final long chunksVerified;
+    public final long chunksMismatched;
+    public final long chunksRepaired;
+    public final long chunksUnrepairable;
+    public final long chunksRepairFailed;
     public final long mappersVerified;
     public final long mappersMismatched;
     public final long mappersRepaired;
@@ -3986,15 +4085,26 @@ private static class SyncCountersResult {
     public final long rowsMissingOnTarget;
     public final long rowsExtraOnTarget;
     public final long rowsDifferentOnTarget;
+    public final long rowsCannotRepair;
+    public final long cellsMissingOnTarget;
+    public final long cellsExtraOnTarget;
+    public final long cellsDifferentOnTarget;
     public final long taskCreated;
+    private final Counters raw;
 
     SyncCountersResult(Counters counters) {
+      this.raw = counters;
       this.sourceRowsProcessed =
         counters.findCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue();
       this.targetRowsProcessed =
         counters.findCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue();
-      this.chunksMismatched = counters.findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue();
       this.chunksVerified = counters.findCounter(SyncCounters.CHUNKS_VERIFIED).getValue();
+      this.chunksMismatched = counters.findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue();
+      this.chunksRepaired = counters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue();
+      this.chunksUnrepairable =
+        counters.findCounter(SyncCounters.CHUNKS_UNREPAIRABLE).getValue();
+      this.chunksRepairFailed =
+        counters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
       this.mappersVerified = counters.findCounter(SyncCounters.MAPPERS_VERIFIED).getValue();
       this.mappersMismatched = counters.findCounter(SyncCounters.MAPPERS_MISMATCHED).getValue();
       this.mappersRepaired = counters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue();
@@ -4008,18 +4118,34 @@ private static class SyncCountersResult {
         counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
       this.rowsDifferentOnTarget =
         counters.findCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue();
+      this.rowsCannotRepair = counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue();
+      this.cellsMissingOnTarget =
+        counters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue();
+      this.cellsExtraOnTarget =
+        counters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
+      this.cellsDifferentOnTarget =
+        counters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue();
       this.taskCreated = counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue();
     }
 
+    /** Escape hatch for tests that need to read counters not exposed as fields here. */
+    public Counters getRawCounters() {
+      return raw;
+    }
+
     public void logCounters(String testName) {
       LOGGER.info(
-        "{}: source rows={}, target rows={}, chunks mismatched={}, chunks verified={}, "
+        "{}: source rows={}, target rows={}, chunks verified={}, chunks mismatched={}, "
+          + "chunks repaired={}, chunks unrepairable={}, chunks repair_failed={}, "
           + "mappers verified={}, mappers mismatched={}, mappers repaired={}, "
           + "mappers unrepairable={}, mappers repair_failed={}, rows missing={}, "
-          + "rows extra={}, rows different={}",
-        testName, sourceRowsProcessed, targetRowsProcessed, chunksMismatched, chunksVerified,
-        mappersVerified, mappersMismatched, mappersRepaired, mappersUnrepairable,
-        mappersRepairFailed, rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget);
+          + "rows extra={}, rows different={}, rows cannot repair={}, cells missing={}, "
+          + "cells extra={}, cells different={}",
+        testName, sourceRowsProcessed, targetRowsProcessed, chunksVerified, chunksMismatched,
+        chunksRepaired, chunksUnrepairable, chunksRepairFailed, mappersVerified,
+        mappersMismatched, mappersRepaired, mappersUnrepairable, mappersRepairFailed,
+        rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget, rowsCannotRepair,
+        cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget);
     }
   }
 
@@ -4075,16 +4201,13 @@ private void validateMapperCountersRepair(SyncCountersResult counters,
    * where the drift is constructed deterministically and any miscount (off-by-one,
    * double-counting, missed branch) should fail the test loudly.
    */
-  private void assertRepairCellCounters(Counters counters, long expectedCellsMissing,
+  private void assertRepairCellCounters(SyncCountersResult counters, long expectedCellsMissing,
     long expectedCellsExtra, long expectedCellsDifferent, long expectedRowsCannotRepair) {
-    assertEquals("CELLS_MISSING_ON_TARGET", expectedCellsMissing,
-      counters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue());
-    assertEquals("CELLS_EXTRA_ON_TARGET", expectedCellsExtra,
-      counters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue());
+    assertEquals("CELLS_MISSING_ON_TARGET", expectedCellsMissing, counters.cellsMissingOnTarget);
+    assertEquals("CELLS_EXTRA_ON_TARGET", expectedCellsExtra, counters.cellsExtraOnTarget);
     assertEquals("CELLS_DIFFERENT_ON_TARGET", expectedCellsDifferent,
-      counters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue());
-    assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair,
-      counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue());
+      counters.cellsDifferentOnTarget);
+    assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair, counters.rowsCannotRepair);
   }
 
   /**
@@ -4092,33 +4215,33 @@ private void assertRepairCellCounters(Counters counters, long expectedCellsMissi
    * {@link #validateMapperCountersRepair} (which omits chunk-level counters) for tests that
    * need to assert both layers.
    */
-  private void assertRepairChunkAndMapperCounters(Counters counters, long expectedChunksRepaired,
-    long expectedChunksRepairFailed, long expectedMappersRepaired, long expectedMappersUnrepairable,
+  private void assertRepairChunkAndMapperCounters(SyncCountersResult counters,
+    long expectedChunksRepaired, long expectedChunksUnrepairable, long expectedChunksRepairFailed,
+    long expectedMappersRepaired, long expectedMappersUnrepairable,
     long expectedMappersRepairFailed) {
-    assertEquals("CHUNKS_REPAIRED", expectedChunksRepaired,
-      counters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue());
-    assertEquals("CHUNKS_REPAIR_FAILED", expectedChunksRepairFailed,
-      counters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue());
-    assertEquals("MAPPERS_REPAIRED", expectedMappersRepaired,
-      counters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue());
+    assertEquals("CHUNKS_REPAIRED", expectedChunksRepaired, counters.chunksRepaired);
+    assertEquals("CHUNKS_UNREPAIRABLE", expectedChunksUnrepairable, counters.chunksUnrepairable);
+    assertEquals("CHUNKS_REPAIR_FAILED", expectedChunksRepairFailed, counters.chunksRepairFailed);
+    assertEquals("MAPPERS_REPAIRED", expectedMappersRepaired, counters.mappersRepaired);
     assertEquals("MAPPERS_UNREPAIRABLE", expectedMappersUnrepairable,
-      counters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue());
+      counters.mappersUnrepairable);
     assertEquals("MAPPERS_REPAIR_FAILED", expectedMappersRepairFailed,
-      counters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue());
+      counters.mappersRepairFailed);
   }
 
   /**
-   * Pins the row-level repair drift counters. Mirror of {@link #assertRepairCellCounters} for
-   * tests that need to assert whole-row outcomes (missing, extra, unrepairable).
+   * Pins all four row-level drift counters: missing, extra, different, and unrepairable.
+   * Dry-run runs leave {@code ROWS_CANNOT_REPAIR} at 0; repair runs leave
+   * {@code ROWS_DIFFERENT_ON_TARGET} at 0 (different rows roll up under missing/extra cell
+   * drift after repair).
    */
-  private void assertRepairRowCounters(Counters counters, long expectedRowsMissing,
-    long expectedRowsExtra, long expectedRowsCannotRepair) {
-    assertEquals("ROWS_MISSING_ON_TARGET", expectedRowsMissing,
-      counters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue());
-    assertEquals("ROWS_EXTRA_ON_TARGET", expectedRowsExtra,
-      counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue());
-    assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair,
-      counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue());
+  private void assertRowDriftCounters(SyncCountersResult counters, long expectedRowsMissing,
+    long expectedRowsExtra, long expectedRowsDifferent, long expectedRowsCannotRepair) {
+    assertEquals("ROWS_MISSING_ON_TARGET", expectedRowsMissing, counters.rowsMissingOnTarget);
+    assertEquals("ROWS_EXTRA_ON_TARGET", expectedRowsExtra, counters.rowsExtraOnTarget);
+    assertEquals("ROWS_DIFFERENT_ON_TARGET", expectedRowsDifferent,
+      counters.rowsDifferentOnTarget);
+    assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair, counters.rowsCannotRepair);
   }
 
   /**
@@ -4167,11 +4290,37 @@ private long createRepairTestTableOnBothClusters(String tableName, int maxVersio
     // Wait until the wall clock advances at least one millisecond past the CREATE TABLE
     // timestamp so any caller-chosen SCN >= the returned anchor is guaranteed to be above the
     // table's metadata row.
-    long anchor = System.currentTimeMillis() + 1;
-    while (System.currentTimeMillis() < anchor) {
+    return waitUntilWallClockPasses(System.currentTimeMillis());
+  }
+
+  /**
+   * Multi-CF variant of {@link #createRepairTestTableOnBothClusters}: two column families
+   * ({@code CF1.A VARCHAR}, {@code CF2.B VARCHAR}). Used by repair tests that need drift to
+   * span families.
+   */
+  private long createMultiColumnFamilyTableOnBothClusters(String tableName) throws SQLException {
+    String ddl = "CREATE TABLE IF NOT EXISTS " + tableName
+      + " (ID INTEGER NOT NULL PRIMARY KEY, CF1.A VARCHAR(50), CF2.B VARCHAR(50))"
+      + " COLUMN_ENCODED_BYTES=NONE, UPDATE_CACHE_FREQUENCY=0, REPLICATION_SCOPE=0";
+    executeTableCreation(sourceConnection, ddl);
+    executeTableCreation(targetConnection, ddl);
+    return waitUntilWallClockPasses(System.currentTimeMillis());
+  }
+
+  /**
+   * Spins until {@link System#currentTimeMillis()} is strictly greater than {@code minTs}, then
+   * returns the resulting wall-clock value. Used by repair tests that plant cells at handcrafted
+   * future timestamps and then need a {@code --to-time} that both (a) covers every planted cell
+   * and (b) satisfies the tool's {@code endTime <= currentTimeMillis()} validation
+   * ({@link org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil#validateTimeRange}). The
+   * spin terminates in 1-2ms — it is a deterministic precondition gate, not a sleep-based wait
+   * for an external side effect.
+   */
+  private long waitUntilWallClockPasses(long minTs) {
+    while (System.currentTimeMillis() <= minTs) {
       // spin
     }
-    return anchor;
+    return System.currentTimeMillis();
   }
 
   /**
@@ -4186,6 +4335,176 @@ private Connection openConnectionAtScn(String zkUrl, long scnTimestamp) throws S
     return DriverManager.getConnection("jdbc:phoenix:" + zkUrl, props);
   }
 
+  /**
+   * Executes a single UPSERT through an SCN-pinned connection on the given cluster and commits.
+   * Replaces the verbose {@code try (Connection scn = openConnectionAtScn(...)) { execute; commit; }}
+   * boilerplate that recurred in nearly every repair test.
+   */
+  private void upsertAtScn(String zkUrl, long ts, String upsertSql) throws SQLException {
+    try (Connection scn = openConnectionAtScn(zkUrl, ts)) {
+      scn.createStatement().execute(upsertSql);
+      scn.commit();
+    }
+  }
+
+  /** Convenience wrapper: {@link #upsertAtScn} on the source cluster. */
+  private void upsertAtScnSource(long ts, String upsertSql) throws SQLException {
+    upsertAtScn(CLUSTERS.getZkUrl1(), ts, upsertSql);
+  }
+
+  /** Convenience wrapper: {@link #upsertAtScn} on the target cluster. */
+  private void upsertAtScnTarget(long ts, String upsertSql) throws SQLException {
+    upsertAtScn(CLUSTERS.getZkUrl2(), ts, upsertSql);
+  }
+
+  /** Runs the same UPSERT against both clusters at the same SCN timestamp. */
+  private void upsertAtScnBoth(long ts, String upsertSql) throws SQLException {
+    upsertAtScnSource(ts, upsertSql);
+    upsertAtScnTarget(ts, upsertSql);
+  }
+
+  /**
+   * Asserts target's visible NAME for {@code id} equals {@code expected}. Replaces the
+   * {@code prepareStatement("SELECT NAME FROM ... WHERE ID = ?")} block repeated across repair
+   * tests.
+   */
+  private void assertTargetName(String tableName, int id, String expected) throws SQLException {
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) {
+      ps.setInt(1, id);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue("Row " + id + " should exist on target", rs.next());
+        assertEquals("Target NAME for row " + id, expected, rs.getString(1));
+      }
+    }
+  }
+
+  /** Asserts the target row exists but its visible NAME is null. */
+  private void assertTargetNameNull(String tableName, int id) throws SQLException {
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) {
+      ps.setInt(1, id);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue("Row " + id + " should exist on target", rs.next());
+        assertNull("Target NAME for row " + id + " should be null", rs.getString(1));
+      }
+    }
+  }
+
+  /** Asserts the target row is not visible to a Phoenix SELECT (e.g., shadowed by tombstones). */
+  private void assertTargetRowAbsent(String tableName, int id) throws SQLException {
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) {
+      ps.setInt(1, id);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertFalse("Row " + id + " should not be visible on target", rs.next());
+      }
+    }
+  }
+
+  /**
+   * End-of-test convergence: run dry-run + repair over [fromTime, toTime] on top of whatever
+   * checkpoint state the test already produced. The repair-mode resume filter (status IN
+   * (VERIFIED, REPAIRED)) plus chunk-level boundary intersection ensures that any chunk-level
+   * range not yet covered by a VERIFIED/REPAIRED chunk row is re-validated, so a fresh dry-run +
+   * repair on top of partial/stale prior state will still converge. Asserts no
+   * MISMATCHED/UNREPAIRABLE/REPAIR_FAILED rows remain and Phoenix-visible data matches between
+   * source and target.
+   */
+  private void convergeAndAssertIdentical(String tableName, long fromTime, long toTime)
+    throws Exception {
+    runSyncToolWithRepair(tableName, "--from-time", String.valueOf(fromTime), "--to-time",
+      String.valueOf(toTime));
+    assertNoMismatchedCheckpoints(tableName, null);
+    verifyDataIdentical(sourceConnection, targetConnection, tableName);
+  }
+
+  /**
+   * Summary of NAME-column raw cells observed under a single-row raw scan with all versions.
+   * Used by tests that pin tombstone/Put counts after repair.
+   */
+  private static final class RawCellSummary {
+    final int puts;
+    final int deleteColumns;
+    final int deleteFamilies;
+    final int deleteFamilyVersions;
+    final int pointDeletes;
+    final long newestPutTs;
+    final List<Long> putTimestamps;
+    final List<Long> deleteTimestamps;
+
+    RawCellSummary(int puts, int deleteColumns, int deleteFamilies, int deleteFamilyVersions,
+      int pointDeletes, long newestPutTs, List<Long> putTimestamps, List<Long> deleteTimestamps) {
+      this.puts = puts;
+      this.deleteColumns = deleteColumns;
+      this.deleteFamilies = deleteFamilies;
+      this.deleteFamilyVersions = deleteFamilyVersions;
+      this.pointDeletes = pointDeletes;
+      this.newestPutTs = newestPutTs;
+      this.putTimestamps = putTimestamps;
+      this.deleteTimestamps = deleteTimestamps;
+    }
+
+    int totalDeletes() {
+      return deleteColumns + deleteFamilies + deleteFamilyVersions + pointDeletes;
+    }
+  }
+
+  /**
+   * Raw scan of a single row on the target cluster, summarising every NAME-column cell by Put /
+   * tombstone subtype. Replaces the open-coded raw-scan loop repeated by tests that pin
+   * post-repair NAME tombstone counts.
+   */
+  private RawCellSummary scanRawTargetNameCells(String tableName, int rowId) throws Exception {
+    byte[] rk = integerRowKey(rowId);
+    int puts = 0;
+    int deleteColumns = 0;
+    int deleteFamilies = 0;
+    int deleteFamilyVersions = 0;
+    int pointDeletes = 0;
+    long newestPutTs = -1L;
+    List<Long> putTimestamps = new ArrayList<>();
+    List<Long> deleteTimestamps = new ArrayList<>();
+    try (Table targetHTable = getHBaseTable(targetConnection, tableName)) {
+      Scan scan = new Scan().withStartRow(rk, true).withStopRow(rk, true).setRaw(true);
+      scan.readAllVersions();
+      try (ResultScanner sc = targetHTable.getScanner(scan)) {
+        for (Result r; (r = sc.next()) != null;) {
+          for (Cell c : r.rawCells()) {
+            if (!Bytes.equals(CellUtil.cloneQualifier(c), Bytes.toBytes("NAME"))) {
+              continue;
+            }
+            if (CellUtil.isDelete(c)) {
+              deleteTimestamps.add(c.getTimestamp());
+              switch (c.getType()) {
+                case DeleteColumn:
+                  deleteColumns++;
+                  break;
+                case DeleteFamily:
+                  deleteFamilies++;
+                  break;
+                case DeleteFamilyVersion:
+                  deleteFamilyVersions++;
+                  break;
+                case Delete:
+                  pointDeletes++;
+                  break;
+                default:
+                  // ignore
+              }
+            } else {
+              puts++;
+              putTimestamps.add(c.getTimestamp());
+              newestPutTs = Math.max(newestPutTs, c.getTimestamp());
+            }
+          }
+        }
+      }
+    }
+    return new RawCellSummary(puts, deleteColumns, deleteFamilies, deleteFamilyVersions,
+      pointDeletes, newestPutTs, putTimestamps, deleteTimestamps);
+  }
+
   /**
    * Resolves the HBase {@link Table} backing a Phoenix table for a given Phoenix
    * {@link Connection}. Used by raw-cell helpers that need to bypass Phoenix and write cells at
@@ -4305,77 +4624,158 @@ private void validateSyncCountersWithMinChunk(SyncCountersResult counters,
   }
 
   /**
-   * Validates that a checkpoint table has entries with proper structure.
+   * Aggressive end-to-end checkpoint-table validation. Queries the checkpoint table itself, then:
+   * <ol>
+   *   <li>Validates per-row structural integrity — PK columns non-null, TYPE ∈ {REGION, CHUNK},
+   *     TENANT_ID matches, time-range invariants (FROM_TIME ≥ 0, TO_TIME > FROM_TIME), execution
+   *     timestamps non-null with END ≥ START, STATUS non-null, COUNTERS non-null on CHUNK rows.
+   *   </li>
+   *   <li>Pins per-(Type × Status) counts derived from the supplied counter objects: CHUNK and
+   *     REGION crossed with VERIFIED / REPAIRED / UNREPAIRABLE / REPAIR_FAILED / MISMATCHED.
+   *   </li>
+   *   <li>Cross-checks REGION-row sourceRowsProcessed/targetRowsProcessed totals against the
+   *     repair-pass counters (or dry-run counters when no repair ran).</li>
+   * </ol>
+   * <p>
+   * The two counter parameters select the validation mode:
+   * <ul>
+   *   <li>{@code dryRunCounters != null && repairCounters == null} — dry-run-only test. Expects
+   *     MISMATCHED rows to remain.</li>
+   *   <li>{@code dryRunCounters == null && repairCounters != null} — combined verify+repair pass
+   *     (no separate dry-run). VERIFIED count comes from {@code repairCounters}; MISMATCHED == 0.
+   *   </li>
+   *   <li>{@code dryRunCounters != null && repairCounters != null} — separate dry-run + repair.
+   *     VERIFIED count comes from {@code dryRunCounters} (the repair pass's resume filter skips
+   *     them); REPAIRED/UNREPAIRABLE/REPAIR_FAILED come from {@code repairCounters};
+   *     MISMATCHED == 0.</li>
+   * </ul>
+   * Use after asserting counters and before declaring success — pins the persisted checkpoint
+   * state to the in-memory counter state, so a regression that reports the right counters but
+   * writes the wrong checkpoint rows (or vice versa) fails loudly.
    */
-  private void validateCheckpointEntries(List<PhoenixSyncTableCheckpointOutputRow> entries,
-    String expectedTableName, String expectedTargetCluster, int expectedSourceRows,
-    int expectedTargetRows, int expectedChunkVerified, int expectedChunkMismatched,
-    int expectedMapperRegion, int expectedMapperMismatched, String expectedTenantId) {
-    int mapperRegionCount = 0;
-    int chunkCount = 0;
-    int mismatchedEntry = 0;
-    int sourceRowsProcessed = 0;
-    int targetRowsProcessed = 0;
+  private void validateCheckpointEntries(String tableName, String tenantId,
+    SyncCountersResult dryRunCounters, SyncCountersResult repairCounters) throws SQLException {
+    if (dryRunCounters == null && repairCounters == null) {
+      throw new IllegalArgumentException(
+        "At least one of dryRunCounters or repairCounters must be non-null");
+    }
+    List<PhoenixSyncTableCheckpointOutputRow> entries =
+      queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId);
+    String ctx = " (table=" + tableName + " tenant=" + tenantId + ")";
+
+    // Per-row structural validation + REGION row-processing totals.
+    long mapperRegionCount = 0;
+    long chunkCount = 0;
+    long sourceRowsProcessed = 0;
+    long targetRowsProcessed = 0;
     for (PhoenixSyncTableCheckpointOutputRow entry : entries) {
-      // Validate primary key columns
-      assertEquals("TABLE_NAME should match", expectedTableName, entry.getTableName());
-      assertEquals("TARGET_CLUSTER should match", expectedTargetCluster, entry.getTargetCluster());
-      assertNotNull("TYPE should not be null", entry.getType());
-      assertTrue("TYPE should be REGION or CHUNK",
+      assertEquals("TABLE_NAME should match" + ctx, tableName, entry.getTableName());
+      assertEquals("TARGET_CLUSTER should match" + ctx, targetZkQuorum, entry.getTargetCluster());
+      assertNotNull("TYPE should not be null" + ctx, entry.getType());
+      assertTrue("TYPE should be REGION or CHUNK" + ctx,
         PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())
           || PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType()));
 
-      // Validate TENANT_ID
-      if (expectedTenantId == null) {
-        assertNull("TENANT_ID should be null for non-multi-tenant tables", entry.getTenantId());
+      if (tenantId == null) {
+        assertNull("TENANT_ID should be null for non-multi-tenant tables" + ctx,
+          entry.getTenantId());
       } else {
-        assertEquals("TENANT_ID should match", expectedTenantId, entry.getTenantId());
+        assertEquals("TENANT_ID should match" + ctx, tenantId, entry.getTenantId());
       }
 
-      // Validate time range
-      assertTrue("FROM_TIME should be >= 0", entry.getFromTime() >= 0);
-      assertTrue("TO_TIME should be > FROM_TIME", entry.getToTime() > entry.getFromTime());
+      assertTrue("FROM_TIME should be >= 0" + ctx, entry.getFromTime() >= 0);
+      assertTrue("TO_TIME should be > FROM_TIME" + ctx, entry.getToTime() > entry.getFromTime());
 
-      // Validate execution timestamps
-      assertNotNull("EXECUTION_START_TIME should not be null", entry.getExecutionStartTime());
-      assertNotNull("EXECUTION_END_TIME should not be null", entry.getExecutionEndTime());
-      assertTrue("EXECUTION_END_TIME should be >= EXECUTION_START_TIME",
+      assertNotNull("EXECUTION_START_TIME should not be null" + ctx,
+        entry.getExecutionStartTime());
+      assertNotNull("EXECUTION_END_TIME should not be null" + ctx, entry.getExecutionEndTime());
+      assertTrue("EXECUTION_END_TIME should be >= EXECUTION_START_TIME" + ctx,
         entry.getExecutionEndTime().getTime() >= entry.getExecutionStartTime().getTime());
 
-      // Validate status
-      assertNotNull("STATUS should not be null", entry.getStatus());
-      assertTrue("STATUS should be VERIFIED or MISMATCHED",
-        PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED.equals(entry.getStatus())
-          || PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus()));
-
-      if (PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())) {
-        mismatchedEntry++;
-      }
+      assertNotNull("STATUS should not be null" + ctx, entry.getStatus());
 
-      // Count entry types
       if (PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())) {
         mapperRegionCount++;
-        sourceRowsProcessed += (int) entry.getSourceRowsProcessed();
-        targetRowsProcessed += (int) entry.getTargetRowsProcessed();
-      } else if (PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())) {
+        sourceRowsProcessed += entry.getSourceRowsProcessed();
+        targetRowsProcessed += entry.getTargetRowsProcessed();
+      } else {
         chunkCount++;
-        assertNotNull("COUNTERS should not be null for CHUNK entries", entry.getCounters());
+        assertNotNull("COUNTERS should not be null for CHUNK entries" + ctx, entry.getCounters());
       }
     }
 
-    assertEquals(String.format("Should have %d REGION entry", expectedMapperRegion),
-      expectedMapperMismatched, expectedMapperRegion, mapperRegionCount);
-    assertEquals(
-      String.format("Should have %d CHUNK entry", expectedChunkVerified + expectedChunkMismatched),
-      expectedChunkVerified + expectedChunkMismatched, chunkCount);
-    assertEquals(
-      String.format("Should have %d MISMATCHED entry",
-        expectedMapperMismatched + expectedChunkMismatched),
-      expectedMapperMismatched + expectedChunkMismatched, mismatchedEntry);
-    assertEquals(String.format("Should have %d Source rows processed", expectedSourceRows),
-      expectedSourceRows, sourceRowsProcessed);
-    assertEquals(String.format("Should have %d Target rows processed", expectedTargetRows),
-      expectedTargetRows, targetRowsProcessed);
+    // Per-(Type × Status) counts derived from counter objects.
+    boolean isDryRunOnly = (repairCounters == null);
+    SyncCountersResult verifiedSource = (dryRunCounters != null ? dryRunCounters : repairCounters);
+    long expectedChunkVerified = verifiedSource.chunksVerified;
+    long expectedRegionVerified = verifiedSource.mappersVerified;
+    long expectedChunkRepaired = isDryRunOnly ? 0L : repairCounters.chunksRepaired;
+    long expectedChunkUnrepairable = isDryRunOnly ? 0L : repairCounters.chunksUnrepairable;
+    long expectedChunkRepairFailed = isDryRunOnly ? 0L : repairCounters.chunksRepairFailed;
+    long expectedRegionRepaired = isDryRunOnly ? 0L : repairCounters.mappersRepaired;
+    long expectedRegionUnrepairable = isDryRunOnly ? 0L : repairCounters.mappersUnrepairable;
+    long expectedRegionRepairFailed = isDryRunOnly ? 0L : repairCounters.mappersRepairFailed;
+    long expectedChunkMismatched = isDryRunOnly ? dryRunCounters.chunksMismatched : 0L;
+    long expectedRegionMismatched = isDryRunOnly ? dryRunCounters.mappersMismatched : 0L;
+
+    assertEquals("CHUNK/VERIFIED checkpoint rows" + ctx, expectedChunkVerified,
+      countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
+        PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED));
+    assertEquals("CHUNK/REPAIRED checkpoint rows" + ctx, expectedChunkRepaired,
+      countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
+        PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED));
+    assertEquals("CHUNK/UNREPAIRABLE checkpoint rows" + ctx, expectedChunkUnrepairable,
+      countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
+        PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE));
+    assertEquals("CHUNK/REPAIR_FAILED checkpoint rows" + ctx, expectedChunkRepairFailed,
+      countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
+        PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED));
+    assertEquals("CHUNK/MISMATCHED checkpoint rows" + ctx, expectedChunkMismatched,
+      countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
+        PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED));
+
+    assertEquals("REGION/VERIFIED checkpoint rows" + ctx, expectedRegionVerified,
+      countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+        PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED));
+    assertEquals("REGION/REPAIRED checkpoint rows" + ctx, expectedRegionRepaired,
+      countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+        PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED));
+    assertEquals("REGION/UNREPAIRABLE checkpoint rows" + ctx, expectedRegionUnrepairable,
+      countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+        PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE));
+    assertEquals("REGION/REPAIR_FAILED checkpoint rows" + ctx, expectedRegionRepairFailed,
+      countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+        PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED));
+    assertEquals("REGION/MISMATCHED checkpoint rows" + ctx, expectedRegionMismatched,
+      countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+        PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED));
+
+    // Aggregate type counts (sanity — sum of per-status counts across all five statuses must
+    // equal what the loop above tallied).
+    long expectedChunkTotal = expectedChunkVerified + expectedChunkRepaired
+      + expectedChunkUnrepairable + expectedChunkRepairFailed + expectedChunkMismatched;
+    long expectedRegionTotal = expectedRegionVerified + expectedRegionRepaired
+      + expectedRegionUnrepairable + expectedRegionRepairFailed + expectedRegionMismatched;
+    assertEquals("Total CHUNK checkpoint rows" + ctx, expectedChunkTotal, chunkCount);
+    assertEquals("Total REGION checkpoint rows" + ctx, expectedRegionTotal, mapperRegionCount);
+
+    // Row-processing totals: REGION rows persist per-mapper sourceRowsProcessed /
+    // targetRowsProcessed; their sum equals the job-level counter only when no REGION row was
+    // overwritten by a subsequent repair pass. The checkpoint PK excludes IS_DRY_RUN, so a repair
+    // pass UPSERTs the REGION row at the dry-run row's PK; for partially-mismatched regions
+    // repair re-processes only the gap (non-VERIFIED) chunks and writes a smaller delta, so the
+    // sum across REGION rows no longer matches the dry-run job-level total. Only assert the
+    // strict equality when (a) no repair pass was run, or (b) the repair pass touched no
+    // regions (everything was already VERIFIED so dry-run rows stay intact).
+    boolean repairTouchedRegions = repairCounters != null
+      && (repairCounters.mappersRepaired + repairCounters.mappersUnrepairable
+        + repairCounters.mappersRepairFailed) > 0;
+    if (!repairTouchedRegions) {
+      assertEquals("REGION sourceRowsProcessed total" + ctx, verifiedSource.sourceRowsProcessed,
+        sourceRowsProcessed);
+      assertEquals("REGION targetRowsProcessed total" + ctx, verifiedSource.targetRowsProcessed,
+        targetRowsProcessed);
+    }
   }
 
   /**
@@ -4394,4 +4794,25 @@ public boolean equals(Object o) {
       return id == other.id && Objects.equals(name, other.name) && name_value == other.name_value;
     }
   }
+
+  /**
+   * RegionObserver that fails every batch mutate. Attached to PHOENIX_SYNC_TABLE_CHECKPOINT
+   * by {@code testCheckpointWriteFailureCausesNonZeroExit} to exercise the
+   * {@code CHECKPOINT_WRITE_FAILED} → non-zero exit path.
+   */
+  public static class CheckpointWriteFailingObserver extends SimpleRegionObserver {
+    @Override
+    public void preBatchMutate(ObserverContext<RegionCoprocessorEnvironment> c,
+      MiniBatchOperationInProgress<Mutation> miniBatchOp) throws IOException {
+      throw new DoNotRetryIOException("INJECTED CHECKPOINT WRITE FAIL");
+    }
+  }
+
+  public static class RepairBatchFailingObserver extends SimpleRegionObserver {
+    @Override
+    public void preBatchMutate(ObserverContext<RegionCoprocessorEnvironment> c,
+      MiniBatchOperationInProgress<Mutation> miniBatchOp) throws IOException {
+      throw new DoNotRetryIOException("INJECTED TARGET REPAIR WRITE FAIL");
+    }
+  }
 }
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
index 91ad3941746..a5977629a89 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
@@ -1072,4 +1072,222 @@ public void testChunkCheckpointChunkWithDifferentTenants() throws Exception {
       targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
     assertEquals("Null tenant query should return only null-tenant chunk", 1, results3.size());
   }
+
+  @Test
+  public void testCheckpointValidationEmptyTableName() throws Exception {
+    byte[] startKey = Bytes.toBytes("row1");
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    try {
+      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+        .setTableName("").setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L)
+        .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(startKey)
+        .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp)
+        .build());
+      fail("Should throw IllegalArgumentException for empty tableName");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getMessage().contains("TableName cannot be null or empty"));
+    }
+  }
+
+  @Test
+  public void testCheckpointValidationEmptyTargetCluster() throws Exception {
+    String tableName = generateUniqueName();
+    byte[] startKey = Bytes.toBytes("row1");
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    try {
+      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+        .setTableName(tableName).setTargetCluster("").setType(Type.REGION).setFromTime(0L)
+        .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(startKey)
+        .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp)
+        .build());
+      fail("Should throw IllegalArgumentException for empty targetCluster");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getMessage().contains("TargetCluster cannot be null or empty"));
+    }
+  }
+
+  @Test
+  public void testCheckpointValidationNullToTime() throws Exception {
+    String tableName = generateUniqueName();
+    byte[] startKey = Bytes.toBytes("row1");
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    try {
+      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+        .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION)
+        .setFromTime(0L).setToTime(null).setIsDryRun(false).setStartRowKey(startKey)
+        .setEndRowKey(startKey).setStatus(Status.VERIFIED).setExecutionStartTime(timestamp)
+        .setExecutionEndTime(timestamp).build());
+      fail("Should throw NullPointerException for null toTime");
+    } catch (NullPointerException e) {
+      assertTrue(e.getMessage().contains("ToTime cannot be null"));
+    }
+  }
+
+  @Test
+  public void testCheckpointWithNullStatusPersistsAsNull() throws Exception {
+    String tableName = generateUniqueName();
+    byte[] startKey = Bytes.toBytes("row1");
+    byte[] endKey = Bytes.toBytes("row100");
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    // Status is permitted to be null — production has an explicit null guard at the upsert site.
+    repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L)
+      .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(endKey)
+      .setStatus(null).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build());
+
+    String query = "SELECT STATUS FROM "
+      + PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME + " WHERE TABLE_NAME = ?";
+    try (java.sql.PreparedStatement ps = connection.prepareStatement(query)) {
+      ps.setString(1, tableName);
+      try (ResultSet rs = ps.executeQuery()) {
+        assertTrue(rs.next());
+        assertNull("STATUS should be null when builder sets it to null", rs.getString("STATUS"));
+      }
+    }
+  }
+
+  @Test
+  public void testBuilderSetEndRowKeyNullCoercedToEmpty() {
+    PhoenixSyncTableCheckpointOutputRow row =
+      new PhoenixSyncTableCheckpointOutputRow.Builder().setEndRowKey(null).build();
+
+    byte[] retrieved = row.getEndRowKey();
+    assertNotNull("setEndRowKey(null) should coerce to empty array, not stay null", retrieved);
+    assertEquals("Coerced array should have length 0", 0, retrieved.length);
+  }
+
+  @Test
+  public void testBuilderSetEndRowKeyEmptyArrayCoercedToEmpty() {
+    PhoenixSyncTableCheckpointOutputRow row =
+      new PhoenixSyncTableCheckpointOutputRow.Builder().setEndRowKey(new byte[0]).build();
+
+    byte[] retrieved = row.getEndRowKey();
+    assertNotNull("setEndRowKey(empty array) should remain non-null", retrieved);
+    assertEquals("Coerced array should have length 0", 0, retrieved.length);
+  }
+
+  @Test
+  public void testGetEndRowKeyDefensiveCopy() {
+    byte[] endKey = Bytes.toBytes("end");
+
+    PhoenixSyncTableCheckpointOutputRow row =
+      new PhoenixSyncTableCheckpointOutputRow.Builder().setEndRowKey(endKey).build();
+
+    byte[] retrieved = row.getEndRowKey();
+    assertNotSame("Should return a copy, not the original", endKey, retrieved);
+
+    retrieved[0] = (byte) 0xFF;
+
+    byte[] retrievedAgain = row.getEndRowKey();
+    assertNotEquals("Internal array should not be modified", (byte) 0xFF, retrievedAgain[0]);
+  }
+
+  @Test
+  public void testParseCounterValueCorruptedFormatThrows() {
+    // "FOO,BAR=1" — first token "FOO" splits by '=' to length 1, which fails the length-2 check.
+    PhoenixSyncTableCheckpointOutputRow row = new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setStartRowKey(Bytes.toBytes("start")).setCounters("FOO,BAR=1").build();
+
+    try {
+      row.getSourceRowsProcessed();
+      fail("Should throw IllegalArgumentException for corrupted counter format");
+    } catch (IllegalArgumentException e) {
+      assertTrue("Error message should explain corruption: " + e.getMessage(),
+        e.getMessage().contains("Corrupted counter format"));
+    }
+  }
+
+  @Test
+  public void testParseCounterValueCounterNameNotPresentReturnsZero() {
+    // Well-formed counters string that doesn't contain SOURCE_ROWS_PROCESSED — should default to 0.
+    PhoenixSyncTableCheckpointOutputRow row = new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setStartRowKey(Bytes.toBytes("start")).setCounters("CHUNKS_VERIFIED=5").build();
+
+    assertEquals(0L, row.getSourceRowsProcessed());
+    assertEquals(0L, row.getTargetRowsProcessed());
+  }
+
+  @Test
+  public void testParseCounterValueEmptyStringReturnsZero() {
+    // Distinct from null — exercises the counters.isEmpty() branch in parseCounterValue.
+    PhoenixSyncTableCheckpointOutputRow row = new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setStartRowKey(Bytes.toBytes("start")).setCounters("").build();
+
+    assertEquals(0L, row.getSourceRowsProcessed());
+    assertEquals(0L, row.getTargetRowsProcessed());
+  }
+
+  @Test
+  public void testGetProcessedChunksBothBoundariesFilterNonOverlappingChunks() throws Exception {
+    String tableName = generateUniqueName();
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    // Three chunks; mapper region is row25..row50. Only the middle chunk overlaps both bounds.
+    byte[] chunk1Start = Bytes.toBytes("row10");
+    byte[] chunk1End = Bytes.toBytes("row20");
+    byte[] chunk2Start = Bytes.toBytes("row30");
+    byte[] chunk2End = Bytes.toBytes("row40");
+    byte[] chunk3Start = Bytes.toBytes("row60");
+    byte[] chunk3End = Bytes.toBytes("row70");
+
+    repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L)
+      .setToTime(1000L).setIsDryRun(false).setStartRowKey(chunk1Start).setEndRowKey(chunk1End)
+      .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp)
+      .build());
+    repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L)
+      .setToTime(1000L).setIsDryRun(false).setStartRowKey(chunk2Start).setEndRowKey(chunk2End)
+      .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp)
+      .build());
+    repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L)
+      .setToTime(1000L).setIsDryRun(false).setStartRowKey(chunk3Start).setEndRowKey(chunk3End)
+      .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp)
+      .build());
+
+    // chunk1 ends at row20 (< mapperStart=row25), chunk3 starts at row60 (> mapperEnd=row50).
+    byte[] mapperStart = Bytes.toBytes("row25");
+    byte[] mapperEnd = Bytes.toBytes("row50");
+
+    List<PhoenixSyncTableCheckpointOutputRow> results = repository.getProcessedChunks(tableName,
+      targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
+
+    assertEquals("Only the overlapping chunk (row30..row40) should match", 1, results.size());
+    assertArrayEquals("Surviving chunk should be chunk2", chunk2Start,
+      results.get(0).getStartRowKey());
+  }
+
+  @Test
+  public void testGetProcessedMapperRegionsFiltersByExactTimeWindow() throws Exception {
+    String tableName = generateUniqueName();
+    byte[] startKey1 = Bytes.toBytes("row1");
+    byte[] endKey1 = Bytes.toBytes("row100");
+    byte[] startKey2 = Bytes.toBytes("row200");
+    byte[] endKey2 = Bytes.toBytes("row300");
+    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
+
+    // Two regions for the same table at distinct (fromTime, toTime) windows.
+    repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L)
+      .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey1).setEndRowKey(endKey1)
+      .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp)
+      .build());
+    repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
+      .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION)
+      .setFromTime(1000L).setToTime(2000L).setIsDryRun(false).setStartRowKey(startKey2)
+      .setEndRowKey(endKey2).setStatus(Status.VERIFIED).setExecutionStartTime(timestamp)
+      .setExecutionEndTime(timestamp).build());
+
+    List<PhoenixSyncTableCheckpointOutputRow> results =
+      repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true);
+
+    assertEquals("Only the [0, 1000) region should match", 1, results.size());
+    assertArrayEquals("Surviving region should be the [0, 1000) one", startKey1,
+      results.get(0).getStartRowKey());
+  }
 }

From f48cd20efb8999db43a23a5fbb7e8ff3579f30b2 Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Mon, 8 Jun 2026 20:50:48 +0530
Subject: [PATCH 13/18] checkpointing test modification

---
 .../end2end/PhoenixSyncTableToolIT.java       | 393 +++++++++---------
 1 file changed, 202 insertions(+), 191 deletions(-)

diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
index 1d9131d8e2d..ce2adc02505 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
@@ -174,7 +174,7 @@ public void testSyncTableWithDataDifference() throws Exception {
     long toTime = System.currentTimeMillis();
 
     // Phase 1: dry-run only — verify checkpoint table sees only VERIFIED/MISMATCHED rows.
-    Job dryRunJob = runSyncToolWithLargeChunks(uniqueTableName, "--dry-run", "--from-time",
+    Job dryRunJob = runSyncToolWithChunkSize(uniqueTableName, 1024, "--dry-run", "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob);
 
@@ -190,7 +190,7 @@ public void testSyncTableWithDataDifference() throws Exception {
 
     // Phase 2: repair pass over the same window — MISMATCHED rows transition to REPAIRED in
     // place.
-    Job repairJob = runSyncToolWithLargeChunks(uniqueTableName, "--from-time",
+    Job repairJob = runSyncToolWithChunkSize(uniqueTableName, 1024, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     SyncCountersResult repairCounters = getSyncCounters(repairJob);
     assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0);
@@ -367,13 +367,13 @@ public void testSyncTableWithConditionalTTLExpiredRowsCompact() throws Exception
     //
     // Note: the two runSyncTool calls above each write CHUNK/VERIFIED rows under their own
     // (from-time, to-time) PK, so the checkpoint table accumulates entries from prior validate
-    // passes — validateCheckpointEntries can't be applied here against a single counter
-    // snapshot. Stick with the MISMATCHED-count invariant.
+    // passes — strict validateCheckpointEntries can't be applied here against a single counter
+    // snapshot. Use the bounded variant with the repair pass's counters as the lower bound.
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName);
     SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
     assertRowDriftCounters(repairCounters, 0, 0, 0, 0);
     assertRepairChunkAndMapperCounters(repairCounters, 0, 0, 0, 0, 0, 0);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, repairCounters);
   }
 
   @Test
@@ -575,7 +575,7 @@ public void testSyncTableWithTimeRangeFilter() throws Exception {
     // no MISMATCHED rows are written. Out-of-window drift (IDs 3,5,8,23,25,28) is invisible
     // to the time-range filter and remains on target by design — full convergence is NOT
     // expected here, only checkpoint cleanliness for the window we scanned.
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, counters);
   }
 
   @Test
@@ -783,7 +783,7 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionMerges() throws Exce
 
     // Both runs were non-dry-run, so repair ran inline. Target should converge.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, counters2);
   }
 
   @Test
@@ -823,9 +823,11 @@ public void testSyncTableIdempotentOnReRun() throws Exception {
       checkpointEntriesAfterFirstRun, checkpointEntriesAfterSecondRun);
 
     // Both passes were non-dry-run with no drift to begin with; the repair flow ran as a
-    // no-op, target should still match source and no MISMATCHED rows should exist.
+    // no-op, target should still match source and no MISMATCHED rows should exist. The
+    // first run's counters are the source-of-truth bound — counters2 is all zeros because
+    // the resume filter skipped every chunk.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, counters1);
   }
 
   @Test
@@ -841,8 +843,8 @@ public void testSyncTableIdempotentAfterRegionSplits() throws Exception {
     long toTime = System.currentTimeMillis();
 
     // Run sync tool for the FIRST time (no differences, all chunks verified)
-    Job job1 = runSyncToolWithLargeChunks(uniqueTableName, "--from-time", String.valueOf(fromTime),
-      "--to-time", String.valueOf(toTime));
+    Job job1 = runSyncToolWithChunkSize(uniqueTableName, 1024, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     SyncCountersResult counters1 = getSyncCounters(job1);
 
     // Validate first run: all rows processed, no mismatches
@@ -879,8 +881,10 @@ public void testSyncTableIdempotentAfterRegionSplits() throws Exception {
       checkpointEntriesAfterSecondRun.isEmpty());
 
     // No drift was introduced; repair flow should be a no-op even after concurrent splits.
+    // Use counters1 as the lower bound — counters2 is all zeros because every chunk was
+    // already VERIFIED by the first pass.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, counters1);
   }
 
   @Test
@@ -1228,9 +1232,12 @@ public void testRepairAllTombstonedTargetRowExtra() throws Exception {
       "--raw-scan");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    assertRowDriftCounters(getSyncCounters(result.repairJob), 0, 0, 0, 1);
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
+    assertRowDriftCounters(repairCounters, 0, 0, 0, 1);
     // Phoenix SELECT already sees both sides as identical (target's row 5 is tombstone-only and
     // invisible). Divergence is at the raw-cell level only — not asserted via SELECT here.
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
   }
 
   @Test
@@ -1275,13 +1282,9 @@ public void testSyncTableValidateWithOnlyTimestampDifferences() throws Exception
     validateSyncCounters(counters, 10, 10, 0, 10);
     assertRowDriftCounters(counters, 0, 0, 10, 0);
 
-    // Verify checkpoint entries show mismatches
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-
-    long mismatchedCount = countCheckpointsByStatus(checkpointEntries,
-      PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED);
-    assertTrue("Should have mismatched entries due to timestamp differences", mismatchedCount > 0);
+    // Strict checkpoint validation: dry-run-only flow, so the MISMATCHED rows persist and counts
+    // are pinned to dryRunCounters per (Type × Status) bucket.
+    validateCheckpointEntries(uniqueTableName, null, counters, null);
   }
 
   @Test
@@ -1323,11 +1326,6 @@ public void testSyncTableWithConcurrentRegionMerges() throws Exception {
     // Validate counters - should process all 100 rows and detect mismatched chunks
     validateSyncCountersWithMinChunk(counters, 100, 100, 1, 1);
 
-    // Verify checkpoint entries were created
-    List<PhoenixSyncTableCheckpointOutputRow> checkpointEntries =
-      queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    assertFalse("Should have checkpoint entries", checkpointEntries.isEmpty());
-
     // Run sync again to verify idempotent behavior after merges
     Job job2 = runSyncToolWithChunkSize(uniqueTableName, 512, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
@@ -1336,10 +1334,8 @@ public void testSyncTableWithConcurrentRegionMerges() throws Exception {
     // Second run should process ZERO rows (all checkpointed despite region merges)
     validateSyncCounters(counters2, 0, 0, 0, 0);
 
-    // Concurrent merges may leave chunks REPAIRED with stale boundaries; the resume filter
-    // skips those on a single rerun. Cleanup the checkpoint and run a dry-run + repair pass
-    // on the stable region layout to converge.
-    convergeAndAssertIdentical(uniqueTableName, fromTime, toTime);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, counters);
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
   }
 
   @Test
@@ -1395,10 +1391,10 @@ public void testSyncTableWithPagingTimeout() throws Exception {
     // between passes and stale MISMATCHED rows from the dry-run can land outside the new
     // boundary set.
     cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time",
+    Job repairJob = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, null, getSyncCounters(repairJob));
   }
 
   @Test
@@ -1467,10 +1463,10 @@ public void testSyncTableWithPagingTimeoutWithSplits() throws Exception {
     // checkpoint rows first so the resume filter doesn't leave stale MISMATCHED entries that
     // sit outside the repair pass's chunk boundaries (paging + splits change boundary set).
     cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
-    runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time",
+    Job repairJob = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, null, getSyncCounters(repairJob));
   }
 
   @Test
@@ -1531,15 +1527,22 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception {
     setupStandardTestWithReplication(uniqueTableName, 1, 5);
     introduceMismatchesByIds(uniqueTableName, Arrays.asList(2, 3, 4));
 
+    // Pin a single time window up front and re-use it for all three runs. The checkpoint PK
+    // includes TO_TIME, so if the first dry-run defaulted to its own currentTimeMillis() the
+    // resulting MISMATCHED rows would sit at a different PK from the failing/recovery runs and
+    // never get overwritten — leaving stale MISMATCHED rows that the post-recovery validation
+    // would (correctly) flag.
+    long fromTime = 0L;
+    long toTime = System.currentTimeMillis();
+
     // Run once first so the checkpoint table exists; we can only attach a coprocessor to a
     // table that's already been created.
-    Job initial = runSyncTool(uniqueTableName, "--dry-run");
+    Job initial = runSyncTool(uniqueTableName, "--dry-run", "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     assertTrue(initial.isSuccessful());
 
     String ckpt = PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME;
     TestUtil.addCoprocessor(sourceConnection, ckpt, CheckpointWriteFailingObserver.class);
-    long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
     try {
       // Inline tool invocation — we need exitCode != 0, which runSyncTool would assertion-fail.
       PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
@@ -1566,7 +1569,7 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception {
     Job recovery = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime),
       "--to-time", String.valueOf(toTime));
     assertTrue(recovery.isSuccessful());
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, getSyncCounters(recovery));
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
   }
 
@@ -1615,7 +1618,7 @@ public void testRepairFailedSurfacesCountersAndCheckpoint() throws Exception {
     Job recovery = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime),
       "--to-time", String.valueOf(toTime));
     assertTrue(recovery.isSuccessful());
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, getSyncCounters(recovery));
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
   }
 
@@ -1678,6 +1681,12 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception {
       separatedAfterFailedRun.mappers.size());
     assertEquals("Remaining chunk entries should persist after failed run",
       setup.chunks.size() - setup.chunksToDelete.size(), separatedAfterFailedRun.chunks.size());
+
+    // Structural invariants only: the test deletes 75% of checkpoint rows mid-flight then runs a
+    // job that fails before it can write new ones, so no counter-parity check is meaningful here.
+    // Just assert the surviving rows are well-formed and no MISMATCHED/UNREPAIRABLE/REPAIR_FAILED
+    // rows leaked through.
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, null);
   }
 
   /**
@@ -1721,9 +1730,10 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception {
     assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    Counters repairCounters = result.repairJob.getCounters();
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
     assertTrue("At least 2 cells should be tombstoned for target's hidden+visible NAME versions",
-      repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 2);
+      repairCounters.cellsExtraOnTarget >= 2);
 
     assertTargetName(uniqueTableName, rowId, "alice");
 
@@ -1741,7 +1751,7 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception {
     assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs);
 
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
   }
 
   /**
@@ -1776,6 +1786,7 @@ public void testRepairPartialShadowWithinRow() throws Exception {
     assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
     SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
     assertRepairCellCounters(repairCounters, 0, 0, 0, 1);
     assertTrue("At least one mapper should roll up to UNREPAIRABLE",
@@ -1785,6 +1796,7 @@ public void testRepairPartialShadowWithinRow() throws Exception {
 
     // Source NAME='alice' but target NAME=null — Phoenix SELECT must still diverge.
     verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
   }
 
   /**
@@ -1845,10 +1857,11 @@ public void testUnrepairableRecoversAfterMajorCompactionOnTarget() throws Except
     assertTrue(pass2.dryRunJob.isSuccessful());
     assertTrue(pass2.repairJob.isSuccessful());
 
+    SyncCountersResult pass2DryRunCounters = getSyncCounters(pass2.dryRunJob);
     SyncCountersResult pass2Counters = getSyncCounters(pass2.repairJob);
     assertRepairCellCounters(pass2Counters, 1, 0, 0, 0);
     assertTrue(pass2Counters.mappersRepaired >= 1);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, pass2DryRunCounters, pass2Counters);
     assertTrue(countCheckpointsByStatus(
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null),
       PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED) >= 1);
@@ -2030,14 +2043,12 @@ public void testRepairAcrossMultipleColumnFamilies() throws Exception {
       "--to-time", String.valueOf(waitUntilWallClockPasses(ts)));
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
-    Counters c = result.repairJob.getCounters();
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
     // CF1.A missing → +1, CF2.B extra → +1; nothing different / unrepairable.
-    assertTrue("CF1.A must mirror as missing",
-      c.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue() >= 1);
-    assertTrue("CF2.B must tombstone as extra",
-      c.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 1);
-    assertEquals("No row should be unrepairable", 0,
-      c.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue());
+    assertTrue("CF1.A must mirror as missing", repairCounters.cellsMissingOnTarget >= 1);
+    assertTrue("CF2.B must tombstone as extra", repairCounters.cellsExtraOnTarget >= 1);
+    assertEquals("No row should be unrepairable", 0, repairCounters.rowsCannotRepair);
 
     try (PreparedStatement ps = targetConnection.prepareStatement(
       "SELECT CF1.A, CF2.B FROM " + uniqueTableName + " WHERE ID = ?")) {
@@ -2053,7 +2064,7 @@ public void testRepairAcrossMultipleColumnFamilies() throws Exception {
     assertEquals("Source/target rows must match across both column families",
       collectMultiCfRows(sourceConnection, uniqueTableName),
       collectMultiCfRows(targetConnection, uniqueTableName));
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
   }
 
   private List<String> collectMultiCfRows(Connection conn, String tableName) throws SQLException {
@@ -2097,6 +2108,7 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception {
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
     SyncCountersResult c = getSyncCounters(result.repairJob);
     // The empty-key cell mirrors (rowsMissing++); NAME is shadow-suppressed (rowsCannotRepair++).
     assertRowDriftCounters(c, 1, 0, 0, 1);
@@ -2106,6 +2118,7 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception {
 
     // NAME shadow leaves target NAME=null while source has 'alice' — Phoenix SELECT diverges.
     verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c);
   }
 
   /**
@@ -2137,6 +2150,7 @@ public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception {
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
     SyncCountersResult c = getSyncCounters(result.repairJob);
     assertRowDriftCounters(c, 0, 0, 0, 1);
     assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1);
@@ -2145,6 +2159,7 @@ public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception {
 
     // Source has the row, target's family is shadowed away — Phoenix SELECT must diverge.
     verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c);
   }
 
   /**
@@ -2173,6 +2188,7 @@ public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception {
       String.valueOf(waitUntilWallClockPasses(sourceTs)));
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
     SyncCountersResult c = getSyncCounters(result.repairJob);
     assertRowDriftCounters(c, 0, 0, 0, 1);
     assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1);
@@ -2181,6 +2197,7 @@ public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception {
 
     // Source has 'alice', target's row is fully shadowed — Phoenix SELECT must diverge.
     verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c);
   }
 
   /**
@@ -2270,10 +2287,12 @@ public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception {
       "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
     SyncCountersResult c = getSyncCounters(result.repairJob);
     assertRepairCellCounters(c, 0, 0, 0, 1);
     assertRowDriftCounters(c, 0, 0, 0, 1);
     assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c);
   }
 
   /**
@@ -2485,13 +2504,14 @@ private void runMirrorSourceTombstoneTest(SourceTombstone subtype) throws Except
     assertTrue(result.dryRunJob.isSuccessful());
     assertTrue(result.repairJob.isSuccessful());
 
-    Counters c = result.repairJob.getCounters();
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
     // The source-only Delete cell is missing on target → mirror it. DeleteFamily mirroring
     // also covers the empty-key sentinel cell, hence the >= 1 assertion shape.
     assertTrue("source tombstone must mirror as a missing cell on target",
-      c.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue() >= 1);
-    assertTrue(c.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue() >= 1);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+      repairCounters.cellsMissingOnTarget >= 1);
+    assertTrue(repairCounters.mappersRepaired >= 1);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
 
     try (PreparedStatement ps = targetConnection.prepareStatement(
       "SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) {
@@ -2552,16 +2572,15 @@ public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception {
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     assertTrue("Repair should succeed", repairJob.isSuccessful());
 
-    Counters c = repairJob.getCounters();
+    SyncCountersResult repairCounters = getSyncCounters(repairJob);
     assertTrue("All source-only rows should be marked missing",
-      c.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue() >= sourceOnly.length);
+      repairCounters.rowsMissingOnTarget >= sourceOnly.length);
     assertTrue("All target-only rows should be marked extra",
-      c.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue() >= targetOnly.length);
-    assertEquals("No chunk should fail repair", 0,
-      c.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue());
+      repairCounters.rowsExtraOnTarget >= targetOnly.length);
+    assertEquals("No chunk should fail repair", 0, repairCounters.chunksRepairFailed);
 
-    // Convergence pass on a clean checkpoint to confirm no chunks remain mismatched.
-    convergeAndAssertIdentical(uniqueTableName, fromTime, toTime);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
   }
 
   @Test
@@ -2629,7 +2648,10 @@ public void testSyncTableWithDeleteAndCompactionOnSource() throws Exception {
     validateMapperCountersRepair(counters3, 3, 1, 0, 0);
 
     // The standard Phoenix view (no raw-scan) on both clusters remains identical.
+    // Three separate runs each landed under their own (from-time, to-time) PK, so checkpoint
+    // state accumulates — use the bounded validator (mirrors the OnTarget sibling).
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, counters3);
   }
 
   @Test
@@ -2684,9 +2706,10 @@ public void testSyncTableWithDeleteAndCompactionOnTarget() throws Exception {
     validateMapperCounters(counters3, 4, 0);
 
     // After major compaction tombstones are gone; the third raw-scan pass is clean and the
-    // standard Phoenix view matches.
+    // standard Phoenix view matches. Three separate runs each landed under their own
+    // (from-time, to-time) PK, so checkpoint state accumulates — use the bounded validator.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, counters3);
   }
 
   @Test
@@ -2732,7 +2755,10 @@ public void testSyncTableWithMultipleVersionAndCompactionOnSource() throws Excep
     validateMapperCountersRepair(counters3, 3, 1, 0, 0);
 
     // The standard Phoenix view (latest version only) on both clusters is identical.
+    // Two separate runs each landed under their own (from-time, to-time) PK, so checkpoint
+    // state accumulates — use the bounded validator (mirrors the OnTarget sibling).
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, counters3);
   }
 
   @Test
@@ -2781,9 +2807,11 @@ public void testSyncTableWithMultipleVersionAndCompactionOnTarget() throws Excep
     validateSyncCounters(counters3, 10, 10, 10, 0);
     validateMapperCounters(counters3, 4, 0);
 
-    // After repair the standard Phoenix view matches.
+    // After repair the standard Phoenix view matches. Three separate runs each landed under
+    // their own (from-time, to-time) PK, so checkpoint state accumulates across all three —
+    // use the bounded validator with the latest run's counters as the lower bound.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntriesAtLeast(uniqueTableName, null, counters3);
   }
 
   @Test
@@ -2885,7 +2913,7 @@ public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exc
     // Run was non-dry-run with no drift; repair flow is a no-op and target should match source
     // even though toTime is older than max lookback age.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    assertNoMismatchedCheckpoints(uniqueTableName, null);
+    validateCheckpointEntries(uniqueTableName, null, null, counters);
   }
 
   /**
@@ -3838,15 +3866,6 @@ private long captureBaselineChunkCount(String tableName, int chunkSize) throws E
     return chunkCount;
   }
 
-  /**
-   * Runs the PhoenixSyncTableTool with 1KB chunk size for testing multiple rows per chunk. Returns
-   * the completed Job for counter verification.
-   */
-  private Job runSyncToolWithLargeChunks(String tableName, String... additionalArgs)
-    throws Exception {
-    return runSyncToolWithChunkSize(tableName, 1024, additionalArgs);
-  }
-
   /**
    * Runs the PhoenixSyncTableTool with specified chunk size. Returns the completed Job for counter
    * verification.
@@ -3875,46 +3894,29 @@ private static class RepairRunResult {
     }
   }
 
+  /** Default chunk size (1 byte = one row per chunk) for {@link #runSyncToolWithRepair}. */
+  private RepairRunResult runSyncToolWithRepair(String tableName, String... additionalArgs)
+    throws Exception {
+    return runSyncToolWithRepair(tableName, 1, additionalArgs);
+  }
+
   /**
    * Runs the sync tool twice with the SAME pinned time window: first as a --dry-run to detect
    * mismatches, then as a repair pass (no --dry-run) so the repair run rewrites the MISMATCHED
    * checkpoint rows in place. The shared window is mandatory because the checkpoint PK is
    * (TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY) — without
    * pinning, each invocation would fall through to System.currentTimeMillis() and the repair
-   * pass would create fresh rows instead of overwriting the dry-run pass's output.
-   *
-   * <p>If the caller does not provide --from-time / --to-time, defaults of 0 / now are pinned.
-   *
-   * <p>Default chunk size is 1 byte (one row per chunk) to mirror {@link #runSyncTool}.
-   */
-  private RepairRunResult runSyncToolWithRepair(String tableName, String... additionalArgs)
-    throws Exception {
-    return runSyncToolWithRepairAndChunkSize(tableName, 1, additionalArgs);
-  }
-
-  /**
-   * Same as {@link #runSyncToolWithRepair} but uses 1KB chunks (multiple rows per chunk).
+   * pass would create fresh rows instead of overwriting the dry-run pass's output. If the caller
+   * does not provide --from-time / --to-time, defaults of 0 / now are pinned.
    */
-  private RepairRunResult runSyncToolWithRepairLargeChunks(String tableName,
-    String... additionalArgs) throws Exception {
-    return runSyncToolWithRepairAndChunkSize(tableName, 1024, additionalArgs);
-  }
-
-  /**
-   * Same as {@link #runSyncToolWithRepair} but with an explicit chunk size.
-   */
-  private RepairRunResult runSyncToolWithRepairAndChunkSize(String tableName, int chunkSize,
+  private RepairRunResult runSyncToolWithRepair(String tableName, int chunkSize,
     String... additionalArgs) throws Exception {
     long fromTime = parseLongFlag(additionalArgs, "--from-time", 0L);
     long toTime = parseLongFlag(additionalArgs, "--to-time", System.currentTimeMillis());
     String[] pinnedArgs = ensureTimeArgs(additionalArgs, fromTime, toTime);
 
-    // First run: --dry-run, only detect mismatches.
     String[] dryRunArgs = appendArg(pinnedArgs, "--dry-run");
     Job dryRunJob = runSyncToolWithChunkSize(tableName, chunkSize, dryRunArgs);
-
-    // Second run: no --dry-run. Same time window so the checkpoint PK matches and any
-    // CHUNK/MISMATCHED rows from the dry-run pass are overwritten by CHUNK/REPAIRED.
     Job repairJob = runSyncToolWithChunkSize(tableName, chunkSize, pinnedArgs);
 
     return new RepairRunResult(dryRunJob, repairJob, fromTime, toTime);
@@ -3965,27 +3967,6 @@ private static String[] appendArg(String[] args, String newArg) {
     return result;
   }
 
-  /**
-   * After a clean repair pass, asserts no checkpoint rows remain in any non-terminal or
-   * stuck state — MISMATCHED, UNREPAIRABLE, or REPAIR_FAILED. Tests that legitimately leave
-   * UNREPAIRABLE/REPAIR_FAILED rows should not call this helper.
-   */
-  private void assertNoMismatchedCheckpoints(String tableName, String tenantId)
-    throws SQLException {
-    List<PhoenixSyncTableCheckpointOutputRow> entries =
-      queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId);
-    String ctx = " (table=" + tableName + " tenant=" + tenantId + ")";
-    assertEquals("MISMATCHED rows must not remain after repair" + ctx, 0,
-      countCheckpointsByStatus(entries,
-        PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED));
-    assertEquals("UNREPAIRABLE rows must not remain after repair" + ctx, 0,
-      countCheckpointsByStatus(entries,
-        PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE));
-    assertEquals("REPAIR_FAILED rows must not remain after repair" + ctx, 0,
-      countCheckpointsByStatus(entries,
-        PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED));
-  }
-
   /**
    * Counts checkpoint entries (both REGION and CHUNK rows) in the given status. Replaces the
    * ad-hoc {@code for (entry : entries) if (status.equals(entry.getStatus())) count++} loops
@@ -4413,9 +4394,12 @@ private void assertTargetRowAbsent(String tableName, int id) throws SQLException
    */
   private void convergeAndAssertIdentical(String tableName, long fromTime, long toTime)
     throws Exception {
-    runSyncToolWithRepair(tableName, "--from-time", String.valueOf(fromTime), "--to-time",
-      String.valueOf(toTime));
-    assertNoMismatchedCheckpoints(tableName, null);
+    RepairRunResult result = runSyncToolWithRepair(tableName, "--from-time",
+      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    // Prior checkpoint state may already cover some chunks under earlier (fromTime, toTime)
+    // PKs; strict equality won't hold, so use the bounded variant with the repair pass's
+    // counters as the lower bound.
+    validateCheckpointEntriesAtLeast(tableName, null, getSyncCounters(result.repairJob));
     verifyDataIdentical(sourceConnection, targetConnection, tableName);
   }
 
@@ -4623,35 +4607,43 @@ private void validateSyncCountersWithMinChunk(SyncCountersResult counters,
       counters.chunksMismatched >= minChunksMismatched);
   }
 
+  /** Per-row structural invariants shared by both checkpoint validators. */
+  private void validateCheckpointRowStructure(PhoenixSyncTableCheckpointOutputRow entry,
+    String tableName, String tenantId, String ctx) {
+    assertEquals("TABLE_NAME should match" + ctx, tableName, entry.getTableName());
+    assertEquals("TARGET_CLUSTER should match" + ctx, targetZkQuorum, entry.getTargetCluster());
+    assertNotNull("TYPE should not be null" + ctx, entry.getType());
+    assertTrue("TYPE should be REGION or CHUNK" + ctx,
+      PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())
+        || PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType()));
+
+    if (tenantId == null) {
+      assertNull("TENANT_ID should be null for non-multi-tenant tables" + ctx,
+        entry.getTenantId());
+    } else {
+      assertEquals("TENANT_ID should match" + ctx, tenantId, entry.getTenantId());
+    }
+
+    assertTrue("FROM_TIME should be >= 0" + ctx, entry.getFromTime() >= 0);
+    assertTrue("TO_TIME should be > FROM_TIME" + ctx, entry.getToTime() > entry.getFromTime());
+
+    assertNotNull("EXECUTION_START_TIME should not be null" + ctx,
+      entry.getExecutionStartTime());
+    assertNotNull("EXECUTION_END_TIME should not be null" + ctx, entry.getExecutionEndTime());
+    assertTrue("EXECUTION_END_TIME should be >= EXECUTION_START_TIME" + ctx,
+      entry.getExecutionEndTime().getTime() >= entry.getExecutionStartTime().getTime());
+
+    assertNotNull("STATUS should not be null" + ctx, entry.getStatus());
+    if (PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())) {
+      assertNotNull("COUNTERS should not be null for CHUNK entries" + ctx, entry.getCounters());
+    }
+  }
+
   /**
-   * Aggressive end-to-end checkpoint-table validation. Queries the checkpoint table itself, then:
-   * <ol>
-   *   <li>Validates per-row structural integrity — PK columns non-null, TYPE ∈ {REGION, CHUNK},
-   *     TENANT_ID matches, time-range invariants (FROM_TIME ≥ 0, TO_TIME > FROM_TIME), execution
-   *     timestamps non-null with END ≥ START, STATUS non-null, COUNTERS non-null on CHUNK rows.
-   *   </li>
-   *   <li>Pins per-(Type × Status) counts derived from the supplied counter objects: CHUNK and
-   *     REGION crossed with VERIFIED / REPAIRED / UNREPAIRABLE / REPAIR_FAILED / MISMATCHED.
-   *   </li>
-   *   <li>Cross-checks REGION-row sourceRowsProcessed/targetRowsProcessed totals against the
-   *     repair-pass counters (or dry-run counters when no repair ran).</li>
-   * </ol>
-   * <p>
-   * The two counter parameters select the validation mode:
-   * <ul>
-   *   <li>{@code dryRunCounters != null && repairCounters == null} — dry-run-only test. Expects
-   *     MISMATCHED rows to remain.</li>
-   *   <li>{@code dryRunCounters == null && repairCounters != null} — combined verify+repair pass
-   *     (no separate dry-run). VERIFIED count comes from {@code repairCounters}; MISMATCHED == 0.
-   *   </li>
-   *   <li>{@code dryRunCounters != null && repairCounters != null} — separate dry-run + repair.
-   *     VERIFIED count comes from {@code dryRunCounters} (the repair pass's resume filter skips
-   *     them); REPAIRED/UNREPAIRABLE/REPAIR_FAILED come from {@code repairCounters};
-   *     MISMATCHED == 0.</li>
-   * </ul>
-   * Use after asserting counters and before declaring success — pins the persisted checkpoint
-   * state to the in-memory counter state, so a regression that reports the right counters but
-   * writes the wrong checkpoint rows (or vice versa) fails loudly.
+   * Strict checkpoint-table validation: pins per-(Type × Status) counts to the supplied counter
+   * objects. VERIFIED comes from {@code dryRunCounters} when present, otherwise from
+   * {@code repairCounters}; REPAIRED/UNREPAIRABLE/REPAIR_FAILED come from {@code repairCounters}
+   * when present; MISMATCHED comes from {@code dryRunCounters} only on dry-run-only flows.
    */
   private void validateCheckpointEntries(String tableName, String tenantId,
     SyncCountersResult dryRunCounters, SyncCountersResult repairCounters) throws SQLException {
@@ -4663,48 +4655,21 @@ private void validateCheckpointEntries(String tableName, String tenantId,
       queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId);
     String ctx = " (table=" + tableName + " tenant=" + tenantId + ")";
 
-    // Per-row structural validation + REGION row-processing totals.
     long mapperRegionCount = 0;
     long chunkCount = 0;
     long sourceRowsProcessed = 0;
     long targetRowsProcessed = 0;
     for (PhoenixSyncTableCheckpointOutputRow entry : entries) {
-      assertEquals("TABLE_NAME should match" + ctx, tableName, entry.getTableName());
-      assertEquals("TARGET_CLUSTER should match" + ctx, targetZkQuorum, entry.getTargetCluster());
-      assertNotNull("TYPE should not be null" + ctx, entry.getType());
-      assertTrue("TYPE should be REGION or CHUNK" + ctx,
-        PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())
-          || PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType()));
-
-      if (tenantId == null) {
-        assertNull("TENANT_ID should be null for non-multi-tenant tables" + ctx,
-          entry.getTenantId());
-      } else {
-        assertEquals("TENANT_ID should match" + ctx, tenantId, entry.getTenantId());
-      }
-
-      assertTrue("FROM_TIME should be >= 0" + ctx, entry.getFromTime() >= 0);
-      assertTrue("TO_TIME should be > FROM_TIME" + ctx, entry.getToTime() > entry.getFromTime());
-
-      assertNotNull("EXECUTION_START_TIME should not be null" + ctx,
-        entry.getExecutionStartTime());
-      assertNotNull("EXECUTION_END_TIME should not be null" + ctx, entry.getExecutionEndTime());
-      assertTrue("EXECUTION_END_TIME should be >= EXECUTION_START_TIME" + ctx,
-        entry.getExecutionEndTime().getTime() >= entry.getExecutionStartTime().getTime());
-
-      assertNotNull("STATUS should not be null" + ctx, entry.getStatus());
-
+      validateCheckpointRowStructure(entry, tableName, tenantId, ctx);
       if (PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())) {
         mapperRegionCount++;
         sourceRowsProcessed += entry.getSourceRowsProcessed();
         targetRowsProcessed += entry.getTargetRowsProcessed();
       } else {
         chunkCount++;
-        assertNotNull("COUNTERS should not be null for CHUNK entries" + ctx, entry.getCounters());
       }
     }
 
-    // Per-(Type × Status) counts derived from counter objects.
     boolean isDryRunOnly = (repairCounters == null);
     SyncCountersResult verifiedSource = (dryRunCounters != null ? dryRunCounters : repairCounters);
     long expectedChunkVerified = verifiedSource.chunksVerified;
@@ -4750,8 +4715,6 @@ private void validateCheckpointEntries(String tableName, String tenantId,
       countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION,
         PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED));
 
-    // Aggregate type counts (sanity — sum of per-status counts across all five statuses must
-    // equal what the loop above tallied).
     long expectedChunkTotal = expectedChunkVerified + expectedChunkRepaired
       + expectedChunkUnrepairable + expectedChunkRepairFailed + expectedChunkMismatched;
     long expectedRegionTotal = expectedRegionVerified + expectedRegionRepaired
@@ -4759,14 +4722,8 @@ private void validateCheckpointEntries(String tableName, String tenantId,
     assertEquals("Total CHUNK checkpoint rows" + ctx, expectedChunkTotal, chunkCount);
     assertEquals("Total REGION checkpoint rows" + ctx, expectedRegionTotal, mapperRegionCount);
 
-    // Row-processing totals: REGION rows persist per-mapper sourceRowsProcessed /
-    // targetRowsProcessed; their sum equals the job-level counter only when no REGION row was
-    // overwritten by a subsequent repair pass. The checkpoint PK excludes IS_DRY_RUN, so a repair
-    // pass UPSERTs the REGION row at the dry-run row's PK; for partially-mismatched regions
-    // repair re-processes only the gap (non-VERIFIED) chunks and writes a smaller delta, so the
-    // sum across REGION rows no longer matches the dry-run job-level total. Only assert the
-    // strict equality when (a) no repair pass was run, or (b) the repair pass touched no
-    // regions (everything was already VERIFIED so dry-run rows stay intact).
+    // Repair UPSERTs REGION rows at the dry-run PK with a smaller (gap-only) delta, so totals
+    // only match when no REGION was actually re-processed.
     boolean repairTouchedRegions = repairCounters != null
       && (repairCounters.mappersRepaired + repairCounters.mappersUnrepairable
         + repairCounters.mappersRepairFailed) > 0;
@@ -4778,6 +4735,60 @@ private void validateCheckpointEntries(String tableName, String tenantId,
     }
   }
 
+  /**
+   * Bounded variant of {@link #validateCheckpointEntries} for tests where checkpoint rows
+   * accumulate across multiple PKs (multi-window, partial reruns under split/merge, recovery).
+   * Asserts no MISMATCHED/UNREPAIRABLE/REPAIR_FAILED remain and that VERIFIED+REPAIRED counts
+   * are at least {@code latestCounters}'s verified+repaired totals (or just the status invariants
+   * when {@code latestCounters} is null).
+   */
+  private void validateCheckpointEntriesAtLeast(String tableName, String tenantId,
+    SyncCountersResult latestCounters) throws SQLException {
+    List<PhoenixSyncTableCheckpointOutputRow> entries =
+      queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId);
+    String ctx = " (table=" + tableName + " tenant=" + tenantId + ")";
+
+    for (PhoenixSyncTableCheckpointOutputRow entry : entries) {
+      validateCheckpointRowStructure(entry, tableName, tenantId, ctx);
+    }
+
+    assertEquals("MISMATCHED rows must not remain after repair" + ctx, 0,
+      countCheckpointsByStatus(entries,
+        PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED));
+    assertEquals("UNREPAIRABLE rows must not remain after repair" + ctx, 0,
+      countCheckpointsByStatus(entries,
+        PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE));
+    assertEquals("REPAIR_FAILED rows must not remain after repair" + ctx, 0,
+      countCheckpointsByStatus(entries,
+        PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED));
+
+    if (latestCounters != null) {
+      long chunkVerifiedPlusRepaired =
+        countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
+          PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED)
+          + countCheckpointsByTypeAndStatus(entries,
+            PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
+            PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED);
+      long regionVerifiedPlusRepaired =
+        countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+          PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED)
+          + countCheckpointsByTypeAndStatus(entries,
+            PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+            PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED);
+      long expectedChunkLowerBound = latestCounters.chunksVerified + latestCounters.chunksRepaired;
+      long expectedRegionLowerBound =
+        latestCounters.mappersVerified + latestCounters.mappersRepaired;
+      assertTrue(
+        "CHUNK VERIFIED+REPAIRED >= latest counters" + ctx + " (actual="
+          + chunkVerifiedPlusRepaired + ", expected>=" + expectedChunkLowerBound + ")",
+        chunkVerifiedPlusRepaired >= expectedChunkLowerBound);
+      assertTrue(
+        "REGION VERIFIED+REPAIRED >= latest counters" + ctx + " (actual="
+          + regionVerifiedPlusRepaired + ", expected>=" + expectedRegionLowerBound + ")",
+        regionVerifiedPlusRepaired >= expectedRegionLowerBound);
+    }
+  }
+
   /**
    * Data class to hold test table data
    */

From cdaf521b24cba807ae92bbc1575116969c7e9e3c Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Mon, 8 Jun 2026 22:00:05 +0530
Subject: [PATCH 14/18] ready for review

---
 .../phoenix/jdbc/PhoenixEmbeddedDriver.java   |  18 +-
 .../PhoenixSyncTableCheckpointOutputRow.java  |  19 +-
 .../PhoenixSyncTableChunkRepairer.java        | 331 ++++----
 .../PhoenixSyncTableInputFormat.java          |  13 +-
 .../mapreduce/PhoenixSyncTableMapper.java     |  78 +-
 .../mapreduce/PhoenixSyncTableTool.java       |  42 +-
 .../end2end/PhoenixSyncTableToolIT.java       | 758 ++++++++----------
 .../ConnectionQueryServicesMetricsIT.java     |   1 -
 .../PhoenixSyncTableOutputRepositoryTest.java |  64 +-
 9 files changed, 632 insertions(+), 692 deletions(-)

diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
index 524789464b4..b5bbe2ea552 100644
--- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
+++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java
@@ -50,13 +50,13 @@ public abstract class PhoenixEmbeddedDriver implements Driver, SQLCloseable {
   private final static String DNC_JDBC_PROTOCOL_SUFFIX = "//";
   private final static String DRIVER_NAME = "PhoenixEmbeddedDriver";
   private static final String TEST_URL_AT_END =
-      "" + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR + PHOENIX_TEST_DRIVER_URL_PARAM;
+    "" + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR + PHOENIX_TEST_DRIVER_URL_PARAM;
   private static final String TEST_URL_IN_MIDDLE =
-      TEST_URL_AT_END + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR;
+    TEST_URL_AT_END + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR;
 
   private static final String[] SUPPORTED_PROTOCOLS =
-      new String[] { PhoenixRuntime.JDBC_PROTOCOL, PhoenixRuntime.JDBC_PROTOCOL_ZK,
-          PhoenixRuntime.JDBC_PROTOCOL_MASTER, PhoenixRuntime.JDBC_PROTOCOL_RPC };
+    new String[] { PhoenixRuntime.JDBC_PROTOCOL, PhoenixRuntime.JDBC_PROTOCOL_ZK,
+      PhoenixRuntime.JDBC_PROTOCOL_MASTER, PhoenixRuntime.JDBC_PROTOCOL_RPC };
 
   private final static DriverPropertyInfo[] EMPTY_INFO = new DriverPropertyInfo[0];
   public final static String MAJOR_VERSION_PROP = "DriverMajorVersion";
@@ -64,9 +64,9 @@ public abstract class PhoenixEmbeddedDriver implements Driver, SQLCloseable {
   public final static String DRIVER_NAME_PROP = "DriverName";
 
   public static final ReadOnlyProps DEFAULT_PROPS =
-      new ReadOnlyProps(ImmutableMap.of(MAJOR_VERSION_PROP,
-          Integer.toString(MetaDataProtocol.PHOENIX_MAJOR_VERSION), MINOR_VERSION_PROP,
-          Integer.toString(MetaDataProtocol.PHOENIX_MINOR_VERSION), DRIVER_NAME_PROP, DRIVER_NAME));
+    new ReadOnlyProps(ImmutableMap.of(MAJOR_VERSION_PROP,
+      Integer.toString(MetaDataProtocol.PHOENIX_MAJOR_VERSION), MINOR_VERSION_PROP,
+      Integer.toString(MetaDataProtocol.PHOENIX_MINOR_VERSION), DRIVER_NAME_PROP, DRIVER_NAME));
 
   PhoenixEmbeddedDriver() {
   }
@@ -139,7 +139,7 @@ protected final Connection createConnection(String url, Properties info) throws
       } else {
         // If empty HA group is returned, fall back to single cluster.
         url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow(
-            () -> new SQLException("HA group can not be initialized, fallback to single cluster"));
+          () -> new SQLException("HA group can not be initialized, fallback to single cluster"));
       }
     }
     ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo);
@@ -155,7 +155,7 @@ protected final Connection createConnection(String url, Properties info) throws
    * @return new or cached QuerySerices used to establish a new Connection.
    */
   protected abstract ConnectionQueryServices getConnectionQueryServices(String url, Properties info)
-      throws SQLException;
+    throws SQLException;
 
   @Override
   public int getMajorVersion() {
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
index ef3abbbeb55..3603294988d 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java
@@ -167,10 +167,10 @@ public static class CounterFormatter {
       "%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d";
 
     /**
-     * Formats chunk counters as comma-separated key=value pairs. Always emits all nine
-     * counters; unpopulated counters are 0 so operators querying the checkpoint table see
-     * a uniform format. {@code ROWS_DIFFERENT_ON_TARGET} is populated only in dry-run;
-     * cell-level counters and {@code ROWS_CANNOT_REPAIR} are populated only in repair mode.
+     * Formats chunk counters as comma-separated key=value pairs. Always emits all nine counters;
+     * unpopulated counters are 0 so operators querying the checkpoint table see a uniform format.
+     * {@code ROWS_DIFFERENT_ON_TARGET} is populated only in dry-run; cell-level counters and
+     * {@code ROWS_CANNOT_REPAIR} are populated only in repair mode.
      */
     public static String formatChunk(long sourceRows, long targetRows, long rowsMissingOnTarget,
       long rowsExtraOnTarget, long rowsDifferentOnTarget, long rowsCannotRepair,
@@ -189,14 +189,13 @@ public static String formatChunk(long sourceRows, long targetRows, long rowsMiss
     }
 
     /**
-     * Formats mapper (region-level) counters as comma-separated key=value pairs. The seven
-     * drift counters are the per-region sum of the same fields emitted by
-     * {@link #formatChunk}.
+     * Formats mapper (region-level) counters as comma-separated key=value pairs. The seven drift
+     * counters are the per-region sum of the same fields emitted by {@link #formatChunk}.
      */
     public static String formatMapper(long chunksVerified, long chunksMismatched, long sourceRows,
-      long targetRows, long rowsMissingOnTarget, long rowsExtraOnTarget,
-      long rowsDifferentOnTarget, long rowsCannotRepair, long cellsMissingOnTarget,
-      long cellsExtraOnTarget, long cellsDifferentOnTarget) {
+      long targetRows, long rowsMissingOnTarget, long rowsExtraOnTarget, long rowsDifferentOnTarget,
+      long rowsCannotRepair, long cellsMissingOnTarget, long cellsExtraOnTarget,
+      long cellsDifferentOnTarget) {
       return String.format(FORMAT_MAPPER,
         PhoenixSyncTableMapper.SyncCounters.CHUNKS_VERIFIED.name(), chunksVerified,
         PhoenixSyncTableMapper.SyncCounters.CHUNKS_MISMATCHED.name(), chunksMismatched,
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
index 3bda5364696..24deb8e7e85 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java
@@ -53,33 +53,31 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * Performs row-level repair for a mismatched chunk by merge-scanning source and target
- * cluster data and applying targeted mutations to target.
- *
- * <p>The two scan ranges may differ: the verifier reads target over a wider range than
- * source (covers extra-on-target rows that fall between consecutive source chunks);
- * repair must mirror the same boundaries so those extras are visible here as
- * {@code cmp > 0} rows and get deleted.
- *
- * <p>Merge-scan contract: both scanners return rows in ascending key order (HBase guarantee).
+ * Performs row-level repair for a mismatched chunk by merge-scanning source and target cluster data
+ * and applying targeted mutations to target.
+ * <p>
+ * The two scan ranges may differ: the verifier reads target over a wider range than source (covers
+ * extra-on-target rows that fall between consecutive source chunks); repair must mirror the same
+ * boundaries so those extras are visible here as {@code cmp > 0} rows and get deleted.
+ * <p>
+ * Merge-scan contract: both scanners return rows in ascending key order (HBase guarantee).
  * <ul>
- *   <li>{@code cmp == 0} (same row): compare cells; repair only differing cells.</li>
- *   <li>{@code cmp <  0} (source-only): mirror all source cells onto target.</li>
- *   <li>{@code cmp >  0} (target-only): tombstone target cells within {@code [fromTime, toTime]}.</li>
+ * <li>{@code cmp == 0} (same row): compare cells; repair only differing cells.</li>
+ * <li>{@code cmp <  0} (source-only): mirror all source cells onto target.</li>
+ * <li>{@code cmp >  0} (target-only): tombstone target cells within
+ * {@code [fromTime, toTime]}.</li>
  * </ul>
  * Cells outside {@code [fromTime, toTime]} are never read (scan time range), so never mutated.
- *
- * <p>Tombstone semantics: HBase has four tombstone subtypes ({@code Delete},
- * {@code DeleteColumn}, {@code DeleteFamily}, {@code DeleteFamilyVersion}). Source Puts
- * we mirror onto target may be silently shadowed by an existing target tombstone; in that
- * case the mirror is suppressed and the row carries unrepairable drift (operator must
- * major-compact target to reap shadowing tombstones before a re-run can converge). See
- * {@link TargetRowRecord}.
+ * <p>
+ * Tombstone semantics: HBase has four tombstone subtypes ({@code Delete}, {@code DeleteColumn},
+ * {@code DeleteFamily}, {@code DeleteFamilyVersion}). Source Puts we mirror onto target may be
+ * silently shadowed by an existing target tombstone; in that case the mirror is suppressed and the
+ * row carries unrepairable drift (operator must major-compact target to reap shadowing tombstones
+ * before a re-run can converge). See {@link TargetRowRecord}.
  */
 public final class PhoenixSyncTableChunkRepairer {
 
-  private static final Logger LOGGER =
-    LoggerFactory.getLogger(PhoenixSyncTableChunkRepairer.class);
+  private static final Logger LOGGER = LoggerFactory.getLogger(PhoenixSyncTableChunkRepairer.class);
 
   private final Connection sourceConnection;
   private final Connection targetConnection;
@@ -108,11 +106,11 @@ public PhoenixSyncTableChunkRepairer(Connection sourceConnection, Connection tar
   }
 
   /**
-   * Repairs one mismatched chunk. Returns a {@link ChunkRepairResult} carrying the
-   * terminal status and accumulated {@link DriftCounters}; never throws on per-chunk
-   * scan/flush failure (returns {@link ChunkRepairResult.Status#REPAIR_FAILED}). The
-   * only declared {@link SQLException} surfaces from {@link Connection#unwrap}, which
-   * indicates a misconfigured connection rather than a per-chunk fault.
+   * Repairs one mismatched chunk. Returns a {@link ChunkRepairResult} carrying the terminal status
+   * and accumulated {@link DriftCounters}; never throws on per-chunk scan/flush failure (returns
+   * {@link ChunkRepairResult.Status#REPAIR_FAILED}). The only declared {@link SQLException}
+   * surfaces from {@link Connection#unwrap}, which indicates a misconfigured connection rather than
+   * a per-chunk fault.
    */
   public ChunkRepairResult repair(ChunkRepairRequest req, Progressable progress)
     throws SQLException {
@@ -159,16 +157,16 @@ public ChunkRepairResult repair(ChunkRepairRequest req, Progressable progress)
 
     ChunkRepairResult result = ChunkRepairResult.completed(drift);
     LOGGER.info("Completed repair for chunk source=[{}, {}] with status={}: {}",
-      Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd),
-      result.status, drift.toLogString());
+      Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd), result.status,
+      drift.toLogString());
     return result;
   }
 
   /**
-   * Dry-run merge-walk: bumps the three row-level drift counters and logs each diverged row;
-   * never touches target. {@code rowsDifferentOnTarget} flags rows present on both sides whose
-   * contents differ — verifier-only signal, not produced in repair mode (which reports cell
-   * granularity instead).
+   * Dry-run merge-walk: bumps the three row-level drift counters and logs each diverged row; never
+   * touches target. {@code rowsDifferentOnTarget} flags rows present on both sides whose contents
+   * differ — verifier-only signal, not produced in repair mode (which reports cell granularity
+   * instead).
    */
   private void walkAndCountDrift(ResultScanner sourceScanner, ResultScanner targetScanner,
     DriftCounters drift, Progressable progress) throws IOException {
@@ -207,14 +205,12 @@ private void walkAndCountDrift(ResultScanner sourceScanner, ResultScanner target
    * each time the batch reaches {@link #repairBatchSize}, and finally draining the tail. Per
    * branch:
    * <ul>
-   *   <li>{@code cmp == 0} — diff cells; record cell-level drift and any row-unrepairable
-   *       flag.</li>
-   *   <li>{@code cmp <  0} — mirror the source row onto target; bump {@code rowsMissing} unless
-   *       the whole row was shadowed, and {@code rowsCannotRepair} unless every cell was
-   *       mirrored.</li>
-   *   <li>{@code cmp >  0} — tombstone the extra row on target; bump {@code rowsExtra} when at
-   *       least one live cell was tombstoned, else {@code rowsCannotRepair} (row was already
-   *       all tombstones).</li>
+   * <li>{@code cmp == 0} — diff cells; record cell-level drift and any row-unrepairable flag.</li>
+   * <li>{@code cmp <  0} — mirror the source row onto target; bump {@code rowsMissing} unless the
+   * whole row was shadowed, and {@code rowsCannotRepair} unless every cell was mirrored.</li>
+   * <li>{@code cmp >  0} — tombstone the extra row on target; bump {@code rowsExtra} when at least
+   * one live cell was tombstoned, else {@code rowsCannotRepair} (row was already all
+   * tombstones).</li>
    * </ul>
    */
   private void repairDiffRows(ResultScanner sourceScanner, ResultScanner targetScanner,
@@ -295,9 +291,9 @@ private static int compareRowKeys(Result sourceResult, Result targetResult) {
 
   /**
    * Whole-row content equality check used by dry-run row-level diffing. Delegates to
-   * {@link Result#compareResults(Result, Result, boolean)} which throws on any cell-level
-   * mismatch (family, qualifier, timestamp, type, value); we map the throw to {@code false}
-   * so the cmp==0 path can flag the row without producing repair mutations.
+   * {@link Result#compareResults(Result, Result, boolean)} which throws on any cell-level mismatch
+   * (family, qualifier, timestamp, type, value); we map the throw to {@code false} so the cmp==0
+   * path can flag the row without producing repair mutations.
    */
   private boolean rowsEqual(Result src, Result tgt) {
     try {
@@ -309,8 +305,8 @@ private boolean rowsEqual(Result src, Result tgt) {
   }
 
   /**
-   * Mirrors every source cell of a row that is missing on target. Each cell is
-   * shadow-checked against target's per-row record (see {@link TargetRowRecord}).
+   * Mirrors every source cell of a row that is missing on target. Each cell is shadow-checked
+   * against target's per-row record (see {@link TargetRowRecord}).
    */
   private RowMirrorStatus mirrorWholeRow(Result sourceResult, Table targetHTable,
     List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
@@ -325,17 +321,16 @@ private RowMirrorStatus mirrorWholeRow(Result sourceResult, Table targetHTable,
     if (mirrored == 0) {
       return RowMirrorStatus.FULLY_SHADOWED;
     }
-    return rowRepairBuffer.anyCellUnrepairable ? RowMirrorStatus.PARTIALLY_MIRRORED
+    return rowRepairBuffer.anyCellUnrepairable
+      ? RowMirrorStatus.PARTIALLY_MIRRORED
       : RowMirrorStatus.FULLY_MIRRORED;
   }
 
   /**
-   * Tombstones every live cell of a row that is extra on target. Skips cells that are
-   * themselves already tombstones (see {@link #tombstoneTargetCell}).
-   *
-   * @return the number of live cells that contributed a tombstone marker. {@code 0} means
-   *         the row was already entirely tombstones; the caller records this as
-   *         {@code ROWS_CANNOT_REPAIR}.
+   * Tombstones every live cell of a row that is extra on target. Skips cells that are themselves
+   * already tombstones (see {@link #tombstoneTargetCell}).
+   * @return the number of live cells that contributed a tombstone marker. {@code 0} means the row
+   *         was already entirely tombstones; the caller records this as {@code ROWS_CANNOT_REPAIR}.
    */
   private int tombstoneWholeRow(Result targetResult, Table targetHTable, List<Put> pendingPuts,
     List<Delete> pendingDeletes) throws IOException {
@@ -356,17 +351,17 @@ private int tombstoneWholeRow(Result targetResult, Table targetHTable, List<Put>
   /**
    * Diffs cells of two rows present on both clusters in lock-step using {@link CellComparator}
    * order and emits {@link Put}/{@link Delete} mutations.
-   *
-   * <p>Branches:
+   * <p>
+   * Branches:
    * <ul>
-   *   <li>same coords + matching value → no drift</li>
-   *   <li>same coords + different value → different++; mirror source cell (shadow-checked)</li>
-   *   <li>source-only cell → missing++; mirror source cell (shadow-checked)</li>
-   *   <li>target-only live cell → extra++; tombstone target cell</li>
-   *   <li>target-only tombstone cell → skip; row carries unrepairable drift</li>
+   * <li>same coords + matching value → no drift</li>
+   * <li>same coords + different value → different++; mirror source cell (shadow-checked)</li>
+   * <li>source-only cell → missing++; mirror source cell (shadow-checked)</li>
+   * <li>target-only live cell → extra++; tombstone target cell</li>
+   * <li>target-only tombstone cell → skip; row carries unrepairable drift</li>
    * </ul>
-   * Mirrors suppressed by shadowing do NOT bump the cell counter (nothing was written);
-   * the row-level signal flows through {@link RowDriftInfo#rowCannotRepair}.
+   * Mirrors suppressed by shadowing do NOT bump the cell counter (nothing was written); the
+   * row-level signal flows through {@link RowDriftInfo#rowCannotRepair}.
    */
   private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result targetResult,
     Table targetHTable, List<Put> pendingPuts, List<Delete> pendingDeletes) throws IOException {
@@ -399,7 +394,9 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta
       if (cmp == 0) {
         // Same coordinates, compare values.
         if (!CellUtil.matchingValue(sourceCells[sourceIdx], targetCells[targetIdx])) {
-          if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowRepairBuffer)) {
+          if (
+            mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowRepairBuffer)
+          ) {
             cellDifferent++;
           }
         }
@@ -413,8 +410,10 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta
         sourceIdx++;
       } else {
         // extra on target
-        if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer,
-          sourceMaxTsByColumn)) {
+        if (
+          tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer,
+            sourceMaxTsByColumn)
+        ) {
           cellExtra++;
         } else {
           rowRepairBuffer.anyCellUnrepairable = true;
@@ -428,15 +427,20 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta
       sourceIdx++;
     }
     while (targetIdx < targetCells.length) {
-      if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer,
-        sourceMaxTsByColumn)) {
+      if (
+        tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer,
+          sourceMaxTsByColumn)
+      ) {
         cellExtra++;
       } else {
         rowRepairBuffer.anyCellUnrepairable = true;
       }
     }
 
-    if (cellMissing == 0 && cellExtra == 0 && cellDifferent == 0 && !rowRepairBuffer.anyCellUnrepairable) {
+    if (
+      cellMissing == 0 && cellExtra == 0 && cellDifferent == 0
+        && !rowRepairBuffer.anyCellUnrepairable
+    ) {
       return RowDriftInfo.NONE;
     }
     rowRepairBuffer.flush(pendingPuts, pendingDeletes);
@@ -446,8 +450,8 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta
 
   /**
    * Routes a source cell to the right mutation kind. Tombstone cells go through
-   * {@link Delete#add(Cell)} (preserves the exact tombstone subtype); under {@code --raw-scan}
-   * this matters because {@link Put#add(Cell)} rejects non-Put cells.
+   * {@link Delete#add(Cell)} (preserves the exact tombstone subtype); under {@code --raw-scan} this
+   * matters because {@link Put#add(Cell)} rejects non-Put cells.
    */
   private void mirrorSourceCell(Cell cell, RowRepairBuffer rowRepairBuffer) throws IOException {
     if (CellUtil.isDelete(cell)) {
@@ -458,9 +462,8 @@ private void mirrorSourceCell(Cell cell, RowRepairBuffer rowRepairBuffer) throws
   }
 
   /**
-   * Mirrors a source cell onto target unless an existing target tombstone would shadow it.
-   * Shadow detection runs only if source has Put cells; tombstoned source cells always mirror.
-   *
+   * Mirrors a source cell onto target unless an existing target tombstone would shadow it. Shadow
+   * detection runs only if source has Put cells; tombstoned source cells always mirror.
    * @return {@code true} if mirrored, {@code false} if suppressed (caller marks the row
    *         unrepairable).
    */
@@ -471,7 +474,9 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
     // diverged. e.g. src Put(name, T=200) vs tgt DeleteColumn(name, T=300) covering
     // ts<=300. Skip the write and flag the row unrepairable; operator must major-compact
     // target to reap the shadow. Source tombstones can't be shadowed, hence skip the check.
-    if (!CellUtil.isDelete(cell) && rowRepairBuffer.targetRowRecord(targetHTable).wouldShadow(cell)) {
+    if (
+      !CellUtil.isDelete(cell) && rowRepairBuffer.targetRowRecord(targetHTable).wouldShadow(cell)
+    ) {
       rowRepairBuffer.anyCellUnrepairable = true;
       return false;
     }
@@ -480,17 +485,17 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
   }
 
   /**
-   * Tombstones a target-only cell to make target's read view at this column match source's.
-   * Skips cells that are themselves already tombstones.
+   * Tombstones a target-only cell to make target's read view at this column match source's. Skips
+   * cells that are themselves already tombstones.
+   * <p>
+   * Called only when source has no cell at this target cell's exact {@code (cf, q, ts)}. If source
+   * does have a cell at the same {@code (cf, q, ts)}, the caller takes the mirroring path instead
+   * <p>
+   * Tombstone subtype depends on what source has at this {@code (cf, q)}. Examples assume
+   * {@code MAX_VERSIONS=3} and show only the relevant column.
+   * <p>
+   * <b>Case 1 — Source has no cell at this column:</b>
    *
-   * <p>Called only when source has no cell at this target cell's exact
-   * {@code (cf, q, ts)}. If source does have a cell at the same {@code (cf, q, ts)},
-   * the caller takes the mirroring path instead
-   *
-   * <p>Tombstone subtype depends on what source has at this {@code (cf, q)}. Examples
-   * assume {@code MAX_VERSIONS=3} and show only the relevant column.
-   *
-   * <p><b>Case 1 — Source has no cell at this column:</b>
    * <pre>
    *   source row: (no NAME)
    *   target row: Put(NAME, "carol")@900 visible
@@ -498,8 +503,9 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
    *   action    : DeleteColumn(NAME)@900   (covers ts <= 900, wipes "bob" too)
    *   result    : target reads no NAME — matches source.
    * </pre>
+   * <p>
+   * <b>Case 2 — {@code sourceMaxTs >= targetTs}:</b>
    *
-   * <p><b>Case 2 — {@code sourceMaxTs >= targetTs}:</b>
    * <pre>
    *   source row: Put(NAME, "alice")@500       (sourceMaxTs = 500)
    *   target row: Put(NAME, "old",  )@200      (input cell; source has nothing at @200)
@@ -507,8 +513,9 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
    *   result    : "old"@200 is shadowed;
    *              Put(NAME, "alice")@500 would already have been mirrored
    * </pre>
+   * <p>
+   * <b>Case 3 — {@code sourceMaxTs < targetTs}:</b>
    *
-   * <p><b>Case 3 — {@code sourceMaxTs < targetTs}:</b>
    * <pre>
    *   source row: Put(NAME, "alice")@300       (sourceMaxTs = 300)
    *   target row: Put(NAME, "carol")@900 visible
@@ -519,11 +526,11 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
    *   result    : target's "alice"@300 is the highest live version — matches source.
    * </pre>
    *
-   * @return true if the cell was a live cell that contributed a tombstone marker, false if
-   *         the cell was already a tombstone and was skipped.
+   * @return true if the cell was a live cell that contributed a tombstone marker, false if the cell
+   *         was already a tombstone and was skipped.
    */
-  private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairBuffer rowRepairBuffer,
-    Map<ColumnKey, Long> sourceMaxTsByColumn) throws IOException {
+  private boolean tombstoneTargetCell(Cell cell, Table targetHTable,
+    RowRepairBuffer rowRepairBuffer, Map<ColumnKey, Long> sourceMaxTsByColumn) throws IOException {
     if (CellUtil.isDelete(cell)) {
       return false;
     }
@@ -548,9 +555,8 @@ private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairBuff
 
   /**
    * Builds a row-level HBase scan for repair. Honors the user's {@code --raw-scan} and
-   * {@code --read-all-versions} flags; adds bulk caching plus Phoenix TTL /
-   * {@code IS_STRICT_TTL} attributes so the cells visited here are the same cells the
-   * verifier hashed.
+   * {@code --read-all-versions} flags; adds bulk caching plus Phoenix TTL / {@code IS_STRICT_TTL}
+   * attributes so the cells visited here are the same cells the verifier hashed.
    */
   private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive,
     boolean isEndKeyInclusive, PhoenixConnection phoenixConn) throws IOException, SQLException {
@@ -571,17 +577,16 @@ private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKey
 
   /**
    * Flushes the accumulated Put and Delete batches to target as a single mixed RPC via
-   * {@link Table#batch}. The mixed batch (rather than separate {@code put()} +
-   * {@code delete()} calls) closes the inter-RPC window where a JVM/regionserver crash
-   * between the two could leave target with Puts applied but matching Deletes missing.
-   *
-   * <p>{@link Table#batch} does NOT throw for partial failures — per-mutation failures
-   * (e.g. {@code NotServingRegionException} from a region split mid-batch,
-   * {@code WrongRegionException} from a merge) land in the {@code results} array as
-   * {@link Throwable} entries. We surface the first such failure as {@link IOException}
-   * so the caller treats this chunk as {@code REPAIR_FAILED} rather than silently
-   * marking it {@code REPAIRED}; on re-run the resume filter excludes
-   * {@code REPAIR_FAILED} and the chunk re-enters as an unprocessed gap.
+   * {@link Table#batch}. The mixed batch (rather than separate {@code put()} + {@code delete()}
+   * calls) closes the inter-RPC window where a JVM/regionserver crash between the two could leave
+   * target with Puts applied but matching Deletes missing.
+   * <p>
+   * {@link Table#batch} does NOT throw for partial failures — per-mutation failures (e.g.
+   * {@code NotServingRegionException} from a region split mid-batch, {@code WrongRegionException}
+   * from a merge) land in the {@code results} array as {@link Throwable} entries. We surface the
+   * first such failure as {@link IOException} so the caller treats this chunk as
+   * {@code REPAIR_FAILED} rather than silently marking it {@code REPAIRED}; on re-run the resume
+   * filter excludes {@code REPAIR_FAILED} and the chunk re-enters as an unprocessed gap.
    */
   private void flushRepairMutations(Table targetHTable, List<Put> puts, List<Delete> deletes)
     throws IOException {
@@ -611,26 +616,26 @@ private void flushRepairMutations(Table targetHTable, List<Put> puts, List<Delet
     if (failureCount > 0) {
       Throwable firstFailure = (Throwable) results[firstFailureIdx];
       Row failedRow = mutations.get(firstFailureIdx);
-      throw new IOException(String.format(
-        "Repair batch had %d/%d mutation failure(s); first failure on row %s: %s",
-        failureCount, results.length, Bytes.toStringBinary(failedRow.getRow()),
-        firstFailure.getMessage()), firstFailure);
+      throw new IOException(
+        String.format("Repair batch had %d/%d mutation failure(s); first failure on row %s: %s",
+          failureCount, results.length, Bytes.toStringBinary(failedRow.getRow()),
+          firstFailure.getMessage()),
+        firstFailure);
     }
     puts.clear();
     deletes.clear();
   }
 
   /**
-   * Inputs to a chunk repair attempt. Source range is the chunk boundary; target range may
-   * be wider so the repair scan sees the same cells (including extra-on-target rows between
-   * consecutive source chunks) that the verifier hashed.
-   *
-   * <p>{@link #verifySourceRows} / {@link #verifyTargetRows} are the row counts the verifier
-   * recorded; threaded into the COUNTERS column on the resulting checkpoint row.
-   * {@link #verifyStartTime} is the timestamp captured when verification began for this
-   * chunk; reused as EXECUTION_START_TIME on the REPAIRED/UNREPAIRABLE/REPAIR_FAILED
-   * checkpoint row so the row spans the full verify+repair lifecycle that overwrites the
-   * MISMATCHED row.
+   * Inputs to a chunk repair attempt. Source range is the chunk boundary; target range may be wider
+   * so the repair scan sees the same cells (including extra-on-target rows between consecutive
+   * source chunks) that the verifier hashed.
+   * <p>
+   * {@link #verifySourceRows} / {@link #verifyTargetRows} are the row counts the verifier recorded;
+   * threaded into the COUNTERS column on the resulting checkpoint row. {@link #verifyStartTime} is
+   * the timestamp captured when verification began for this chunk; reused as EXECUTION_START_TIME
+   * on the REPAIRED/UNREPAIRABLE/REPAIR_FAILED checkpoint row so the row spans the full
+   * verify+repair lifecycle that overwrites the MISMATCHED row.
    */
   public static final class ChunkRepairRequest {
     public final byte[] sourceStart;
@@ -661,8 +666,8 @@ public ChunkRepairRequest(byte[] sourceStart, byte[] sourceEnd, byte[] targetSta
   }
 
   /**
-   * Outcome of a chunk repair attempt. Carries the terminal status, accumulated drift
-   * counters, end-of-attempt timestamp, and the failure exception when status is
+   * Outcome of a chunk repair attempt. Carries the terminal status, accumulated drift counters,
+   * end-of-attempt timestamp, and the failure exception when status is
    * {@link Status#REPAIR_FAILED}. Status precedence (most-severe wins):
    * {@link Status#REPAIR_FAILED} &gt; {@link Status#UNREPAIRABLE} &gt; {@link Status#REPAIRED}.
    */
@@ -699,10 +704,10 @@ static ChunkRepairResult failed(DriftCounters drift, IOException failure) {
   }
 
   /**
-   * Per-chunk aggregate of six drift counters — three row-level
-   * ({@code rowsMissingOnTarget}, {@code rowsExtraOnTarget}, {@code rowsCannotRepair}) and
-   * three cell-level ({@code cellsMissing/Extra/DifferentOnTarget}). Pure accumulator; the
-   * caller maps fields onto MapReduce job counters and the checkpoint COUNTERS string.
+   * Per-chunk aggregate of six drift counters — three row-level ({@code rowsMissingOnTarget},
+   * {@code rowsExtraOnTarget}, {@code rowsCannotRepair}) and three cell-level
+   * ({@code cellsMissing/Extra/DifferentOnTarget}). Pure accumulator; the caller maps fields onto
+   * MapReduce job counters and the checkpoint COUNTERS string.
    */
   public static final class DriftCounters {
     public long rowsMissingOnTarget;
@@ -731,14 +736,14 @@ public String toLogString() {
   }
 
   /**
-   * Per-row snapshot of target's tombstones and Puts. Two queries:
-   * {@link #wouldShadow} (shadow detection) and {@link #targetPutTimestampsBetween}
-   * (hidden-version discovery). For examples of how callers use these, see the
-   * doc on {@link RowRepairBuffer#targetRowRecord}; for scan shape and time-range
-   * rationale, see {@link #load}.
+   * Per-row snapshot of target's tombstones and Puts. Two queries: {@link #wouldShadow} (shadow
+   * detection) and {@link #targetPutTimestampsBetween} (hidden-version discovery). For examples of
+   * how callers use these, see the doc on {@link RowRepairBuffer#targetRowRecord}; for scan shape
+   * and time-range rationale, see {@link #load}.
+   * <p>
+   * HBase has four tombstone subtypes; each is recorded into its own map because shadow scope
+   * differs:
    *
-   * <p>HBase has four tombstone subtypes; each is recorded into its own map because
-   * shadow scope differs:
    * <pre>
    *   Delete               shadows Put at (cf, q, ts == T) exactly
    *   DeleteColumn         shadows Puts at (cf, q, ts &lt;= T)
@@ -756,20 +761,18 @@ static final class TargetRowRecord {
 
     /**
      * Builds a {@link TargetRowRecord} from a single-row HBase scan.
-     *
-     * <p><b>raw=true + all-versions</b> are forced regardless of user flags so tombstones
-     * and max-versions-filtered older Puts (the two things this record exists to capture)
-     * are surfaced.
-     *
-     * <p><b>Time range {@code [fromTime, MAX_VALUE]}</b>:
+     * <p>
+     * <b>raw=true + all-versions</b> are forced regardless of user flags so tombstones and
+     * max-versions-filtered older Puts (the two things this record exists to capture) are surfaced.
+     * <p>
+     * <b>Time range {@code [fromTime, MAX_VALUE]}</b>:
      * <ul>
-     *   <li>Lower bound = {@code fromTime}: cells below the verify window can't affect
-     *       repair inside the window.</li>
-     *   <li>Upper bound = {@code MAX_VALUE} (NOT {@code toTime}): a tombstone at
-     *       {@code ts >= toTime} can still shadow a Put we mirror at {@code ts} in window
-     *       during application reads, so we must see it. e.g. window
-     *       {@code [0, 600)}, tgt has DeleteColumn@900, src wants Put@500 — without the
-     *       wide upper bound we'd miss the 900 tombstone and write a doomed mirror.</li>
+     * <li>Lower bound = {@code fromTime}: cells below the verify window can't affect repair inside
+     * the window.</li>
+     * <li>Upper bound = {@code MAX_VALUE} (NOT {@code toTime}): a tombstone at {@code ts >= toTime}
+     * can still shadow a Put we mirror at {@code ts} in window during application reads, so we must
+     * see it. e.g. window {@code [0, 600)}, tgt has DeleteColumn@900, src wants Put@500 — without
+     * the wide upper bound we'd miss the 900 tombstone and write a doomed mirror.</li>
      * </ul>
      */
     static TargetRowRecord load(byte[] rowKey, Table targetHTable, long fromTime)
@@ -806,9 +809,8 @@ void record(Cell cell) {
 
     /**
      * Records one tombstone into its per-subtype map for {@link #wouldShadow} to query.
-     * {@code <=ts} delete subtypes ({@code DeleteColumn}, {@code DeleteFamily}) collapse to
-     * the max ts; exact-ts subtypes ({@code Delete}, {@code DeleteFamilyVersion})
-     * accumulate into a set.
+     * {@code <=ts} delete subtypes ({@code DeleteColumn}, {@code DeleteFamily}) collapse to the max
+     * ts; exact-ts subtypes ({@code Delete}, {@code DeleteFamilyVersion}) accumulate into a set.
      */
     private void recordTombstone(Cell tombstone) {
       long ts = tombstone.getTimestamp();
@@ -859,9 +861,9 @@ boolean wouldShadow(Cell sourcePut) {
 
     /**
      * Returns target's Put timestamps at {@code (cf, q)} that are strictly greater than
-     * {@code lowerExclusive} and strictly less than {@code upperExclusive}. Used to find
-     * hidden (max-versions-filtered) target versions sitting between source's max ts and
-     * target's visible ts so they can be point-Deleted.
+     * {@code lowerExclusive} and strictly less than {@code upperExclusive}. Used to find hidden
+     * (max-versions-filtered) target versions sitting between source's max ts and target's visible
+     * ts so they can be point-Deleted.
      */
     Set<Long> targetPutTimestampsBetween(byte[] family, byte[] qualifier, long lowerExclusive,
       long upperExclusive) {
@@ -904,8 +906,7 @@ public int hashCode() {
 
   /**
    * Per-row scratch buffer: lazily-built {@link Put}/{@link Delete} mutations, lazily-loaded
-   * {@link TargetRowRecord}, and an unrepairable-drift flag the caller reads after the
-   * merge.
+   * {@link TargetRowRecord}, and an unrepairable-drift flag the caller reads after the merge.
    */
   final class RowRepairBuffer {
     private final byte[] rowKey;
@@ -935,25 +936,27 @@ Delete delete() {
     /**
      * Returns the cached {@link TargetRowRecord} for this row, loading on first call via
      * {@link TargetRowRecord#load} (one raw all-versions scan, time range
-     * {@code [fromTime, MAX_VALUE]}). Cache scope is the buffer's lifetime — i.e. the
-     * current row — so repeated cell-level lookups within the row pay one round-trip total.
-     *
-     * <p>Two consumers:
+     * {@code [fromTime, MAX_VALUE]}). Cache scope is the buffer's lifetime — i.e. the current row —
+     * so repeated cell-level lookups within the row pay one round-trip total.
+     * <p>
+     * Two consumers:
+     * <p>
+     * <b>Shadow detection</b> — {@link #mirrorSourceCellUnlessShadowed} asks
+     * {@link TargetRowRecord#wouldShadow} before mirroring a source Put, to skip writes that
+     * target's existing tombstones would render invisible.
      *
-     * <p><b>Shadow detection</b> — {@link #mirrorSourceCellUnlessShadowed} asks
-     * {@link TargetRowRecord#wouldShadow} before mirroring a source Put, to skip writes
-     * that target's existing tombstones would render invisible.
      * <pre>
      *   target row state: DeleteColumn(NAME)@T=900   (covers ts &lt;= 900)
      *   source row state: Put(NAME, "alice")@T=500
      *   wouldShadow(srcPut@500) → true
      *   ⇒ skip mirror, mark row unrepairable; operator must major-compact target
      * </pre>
+     * <p>
+     * <b>Hidden-version discovery</b> — {@link #tombstoneTargetCell} asks
+     * {@link TargetRowRecord#targetPutTimestampsBetween} for max-versions-filtered Puts sitting
+     * between source's max ts and target's visible ts, so each can be point-Deleted before they
+     * surface above source's mirror.
      *
-     * <p><b>Hidden-version discovery</b> — {@link #tombstoneTargetCell} asks
-     * {@link TargetRowRecord#targetPutTimestampsBetween} for max-versions-filtered Puts
-     * sitting between source's max ts and target's visible ts, so each can be point-Deleted
-     * before they surface above source's mirror.
      * <pre>
      *   target row state (MAX_VERSIONS=3):
      *     Put(NAME, "carol")@T=900   visible
@@ -983,8 +986,8 @@ void flush(List<Put> pendingPuts, List<Delete> pendingDeletes) {
   }
 
   /**
-   * Cell-level drift counts produced by per-row diff. Three counters partition the cell
-   * differences into disjoint buckets — source-only, target-only-live, same-coord-diff-value.
+   * Cell-level drift counts produced by per-row diff. Three counters partition the cell differences
+   * into disjoint buckets — source-only, target-only-live, same-coord-diff-value.
    */
   static final class CellDriftCounts {
     static final CellDriftCounts NONE = new CellDriftCounts(0, 0, 0);
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
index fcd61f07582..b1bce9f8023 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
@@ -138,11 +138,10 @@ public List<InputSplit> getSplits(JobContext context) throws IOException, Interr
 
   /**
    * Queries Sync checkpoint table for completed mapper regions.
-   *
-   * @param isDryRun When false (repair mode), only VERIFIED and REPAIRED regions are filtered
-   *                 out as completed; MISMATCHED regions are re-entered as splits so their
-   *                 chunks can be repaired. When true (dry-run), all REGION rows regardless
-   *                 of status are treated as completed.
+   * @param isDryRun When false (repair mode), only VERIFIED and REPAIRED regions are filtered out
+   *                 as completed; MISMATCHED regions are re-entered as splits so their chunks can
+   *                 be repaired. When true (dry-run), all REGION rows regardless of status are
+   *                 treated as completed.
    */
   private List<KeyRange> queryCompletedMapperRegions(Configuration conf, String tableName,
     String targetZkQuorum, Long fromTime, Long toTime, boolean isDryRun) throws SQLException {
@@ -150,8 +149,8 @@ private List<KeyRange> queryCompletedMapperRegions(Configuration conf, String ta
     List<KeyRange> completedRegions = new ArrayList<>();
     try (Connection conn = ConnectionUtil.getInputConnection(conf)) {
       PhoenixSyncTableOutputRepository repository = new PhoenixSyncTableOutputRepository(conn);
-      List<PhoenixSyncTableCheckpointOutputRow> completedRows = repository.getProcessedMapperRegions(
-        tableName, targetZkQuorum, fromTime, toTime, tenantId, isDryRun);
+      List<PhoenixSyncTableCheckpointOutputRow> completedRows = repository
+        .getProcessedMapperRegions(tableName, targetZkQuorum, fromTime, toTime, tenantId, isDryRun);
       for (PhoenixSyncTableCheckpointOutputRow row : completedRows) {
         KeyRange keyRange = KeyRange.getKeyRange(row.getStartRowKey(), row.getEndRowKey());
         completedRegions.add(keyRange);
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
index aa19bcf1a89..7158d847778 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
@@ -231,8 +231,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
     long targetRowsBefore = context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue();
     long rowsMissingBefore = context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue();
     long rowsExtraBefore = context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
-    long rowsDifferentBefore =
-      context.getCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue();
+    long rowsDifferentBefore = context.getCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue();
     long rowsCannotRepairBefore = context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue();
     long cellsMissingBefore = context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue();
     long cellsExtraBefore = context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
@@ -299,11 +298,11 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
    * @param regionEndTime      Region processing end time
    * @param verifiedChunks     Number of verified chunks
    * @param mismatchedChunks   Number of mismatched chunks
-   * @param unrepairableChunks Number of chunks where any row landed in ROWS_CANNOT_REPAIR;
-   *                           if > 0 (and no repair-failed chunks) the region rolls up to
-   *                           UNREPAIRABLE, signalling operator intervention is needed
-   * @param repairFailedChunks Number of chunks whose repair threw an IOException; if > 0 the
-   *                           region rolls up to REPAIR_FAILED (highest precedence)
+   * @param unrepairableChunks Number of chunks where any row landed in ROWS_CANNOT_REPAIR; if > 0
+   *                           (and no repair-failed chunks) the region rolls up to UNREPAIRABLE,
+   *                           signalling operator intervention is needed
+   * @param repairFailedChunks Number of chunks whose repair threw an IOException; if > 0 the region
+   *                           rolls up to REPAIR_FAILED (highest precedence)
    * @param counters           Formatted counter string
    * @param context            Mapper context
    */
@@ -314,14 +313,14 @@ private void recordRegionCompletion(byte[] regionStart, byte[] regionEnd,
 
     // Region rolls up its child chunks' outcomes into one of five statuses, in precedence
     // order (most-severe wins):
-    //   REPAIR_FAILED — at least one chunk threw during merge-scan or flush.
-    //   UNREPAIRABLE  — repair completed but at least one chunk has rows that cannot be
-    //                   repaired (target tombstones shadow source Puts, or target row is
-    //                   entirely tombstones). Operator action (typically major compaction
-    //                   on target) needed before a re-run can converge.
-    //   MISMATCHED    — drift was detected but repair was not attempted (dry-run mode).
-    //   REPAIRED      — drift was detected and every chunk's repair fully succeeded.
-    //   VERIFIED      — every chunk matched; no drift in this region.
+    // REPAIR_FAILED — at least one chunk threw during merge-scan or flush.
+    // UNREPAIRABLE — repair completed but at least one chunk has rows that cannot be
+    // repaired (target tombstones shadow source Puts, or target row is
+    // entirely tombstones). Operator action (typically major compaction
+    // on target) needed before a re-run can converge.
+    // MISMATCHED — drift was detected but repair was not attempted (dry-run mode).
+    // REPAIRED — drift was detected and every chunk's repair fully succeeded.
+    // VERIFIED — every chunk matched; no drift in this region.
     // The resume filter on re-invocation skips VERIFIED and REPAIRED — UNREPAIRABLE,
     // MISMATCHED, and REPAIR_FAILED chunks are re-entered as gaps and re-attempted.
     PhoenixSyncTableCheckpointOutputRow.Status status;
@@ -445,8 +444,7 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd,
         // Target scan boundary: covers extra-on-target rows that fall before the first
         // source chunk, between consecutive source chunks, or after the last. Both verify
         // and repair use the same range so repair sees the same cells the verifier hashed.
-        byte[] targetStart =
-          previousSourceChunk == null ? rangeStart : previousSourceChunk.endKey;
+        byte[] targetStart = previousSourceChunk == null ? rangeStart : previousSourceChunk.endKey;
         byte[] targetEnd = isLastChunkOfRegion ? rangeEnd : sourceChunk.endKey;
         boolean targetEndInclusive = !isLastChunkOfRegion;
         ChunkInfo targetChunk = getTargetChunkWithSourceBoundary(targetConnection, targetStart,
@@ -473,10 +471,10 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd,
             .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L, 0L, 0L, 0L, 0L, 0L);
           handleVerifiedChunk(sourceChunk, context, counters);
         } else {
-          ChunkRepairRequest request = new ChunkRepairRequest(sourceChunk.startKey,
-            sourceChunk.endKey, targetStart, targetEnd, isTargetStartKeyInclusive,
-            targetEndInclusive, sourceChunk.rowCount, targetChunk.rowCount,
-            sourceChunk.executionStartTime, isDryRun);
+          ChunkRepairRequest request =
+            new ChunkRepairRequest(sourceChunk.startKey, sourceChunk.endKey, targetStart, targetEnd,
+              isTargetStartKeyInclusive, targetEndInclusive, sourceChunk.rowCount,
+              targetChunk.rowCount, sourceChunk.executionStartTime, isDryRun);
           ChunkRepairResult result = chunkRepairer.repair(request, context::progress);
           if (isDryRun) {
             // Dry-run: write CHUNK/MISMATCHED with real row-level drift in COUNTERS so the
@@ -805,13 +803,13 @@ boolean shouldStartKeyBeInclusive(byte[] mapperRegionStart,
 
   /**
    * Translates a {@link ChunkRepairResult} into MapReduce side effects: bumps the cell/row drift
-   * counters, builds the chunk-level checkpoint row (REPAIRED / UNREPAIRABLE / REPAIR_FAILED),
-   * and writes it via {@link #writeChunkCheckpoint} so the outcome counter is bumped only on a
+   * counters, builds the chunk-level checkpoint row (REPAIRED / UNREPAIRABLE / REPAIR_FAILED), and
+   * writes it via {@link #writeChunkCheckpoint} so the outcome counter is bumped only on a
    * successful checkpoint write (audit row and counter stay consistent).
-   *
-   * <p>{@code CHUNKS_MISMATCHED} is bumped here too: it tracks every chunk where source and
-   * target hashes differed — the drift-detected signal — regardless of whether repair ran.
-   * Without this, repair-mode {@link #recordRegionCompletion} would see {@code mismatchedChunks
+   * <p>
+   * {@code CHUNKS_MISMATCHED} is bumped here too: it tracks every chunk where source and target
+   * hashes differed — the drift-detected signal — regardless of whether repair ran. Without this,
+   * repair-mode {@link #recordRegionCompletion} would see {@code mismatchedChunks
    * == 0} for fully-repaired regions and roll them up as VERIFIED instead of REPAIRED.
    */
   private void recordRepairOutcome(ChunkInfo sourceChunk, ChunkRepairRequest request,
@@ -859,20 +857,20 @@ private void recordRepairOutcome(ChunkInfo sourceChunk, ChunkRepairRequest reque
   }
 
   /**
-   * Writes a chunk-level checkpoint row and bumps the matching outcome counter. The outcome
-   * counter is bumped only after a successful checkpoint write, so on-disk audit and in-memory
-   * counters stay in sync.
-   *
-   * <p>If the checkpoint write throws {@link SQLException}, the failure is logged and the
-   * {@link SyncCounters#CHECKPOINT_WRITE_FAILED} counter is bumped, but the exception is
-   * NOT propagated. Reasons:
+   * Writes a chunk-level checkpoint row and bumps the matching outcome counter. The outcome counter
+   * is bumped only after a successful checkpoint write, so on-disk audit and in-memory counters
+   * stay in sync.
+   * <p>
+   * If the checkpoint write throws {@link SQLException}, the failure is logged and the
+   * {@link SyncCounters#CHECKPOINT_WRITE_FAILED} counter is bumped, but the exception is NOT
+   * propagated. Reasons:
    * <ul>
-   *   <li>Target's data was already mutated during the merge — failing the mapper task
-   *       wouldn't roll that back, and would trigger a MapReduce retry that re-verifies
-   *       against already-mutated target state (audit trail loss).</li>
-   *   <li>Other chunks in this mapper still deserve a chance to be processed.</li>
-   *   <li>The {@code CHECKPOINT_WRITE_FAILED} counter surfaces the audit-row gap to
-   *       operators and drives a non-zero exit at job end.</li>
+   * <li>Target's data was already mutated during the merge — failing the mapper task wouldn't roll
+   * that back, and would trigger a MapReduce retry that re-verifies against already-mutated target
+   * state (audit trail loss).</li>
+   * <li>Other chunks in this mapper still deserve a chance to be processed.</li>
+   * <li>The {@code CHECKPOINT_WRITE_FAILED} counter surfaces the audit-row gap to operators and
+   * drives a non-zero exit at job end.</li>
    * </ul>
    */
   private void writeChunkCheckpoint(PhoenixSyncTableCheckpointOutputRow row,
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
index b939a064472..f46c1b34e36 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java
@@ -415,34 +415,34 @@ private boolean submitPhoenixSyncTableJob() throws Exception {
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_MISMATCHED).getValue();
       long repairedMappers =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIRED).getValue();
-      long unrepairableMappers = counters
-        .findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_UNREPAIRABLE).getValue();
-      long repairFailedMappers = counters
-        .findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIR_FAILED).getValue();
+      long unrepairableMappers =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_UNREPAIRABLE).getValue();
+      long repairFailedMappers =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIR_FAILED).getValue();
       long chunksVerified =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_VERIFIED).getValue();
       long chunksMismatched =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_MISMATCHED).getValue();
       long chunksRepaired =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIRED).getValue();
-      long chunksUnrepairable = counters
-        .findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_UNREPAIRABLE).getValue();
-      long chunksRepairFailed = counters
-        .findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
+      long chunksUnrepairable =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_UNREPAIRABLE).getValue();
+      long chunksRepairFailed =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
       long sourceRowsProcessed =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED).getValue();
       long targetRowsProcessed =
         counters.findCounter(PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED).getValue();
-      long rowsMissingOnTarget = counters
-        .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET).getValue();
-      long rowsExtraOnTarget = counters
-        .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
-      long rowsCannotRepair = counters
-        .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR).getValue();
+      long rowsMissingOnTarget =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET).getValue();
+      long rowsExtraOnTarget =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
+      long rowsCannotRepair =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR).getValue();
       long cellsMissingOnTarget = counters
         .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_MISSING_ON_TARGET).getValue();
-      long cellsExtraOnTarget = counters
-        .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
+      long cellsExtraOnTarget =
+        counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
       long cellsDifferentOnTarget = counters
         .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue();
       long checkpointWriteFailed = counters
@@ -457,21 +457,19 @@ private boolean submitPhoenixSyncTableJob() throws Exception {
           + "Rows Missing On Target: {}, \n Rows Extra On Target: {}, \n"
           + "Rows Cannot Repair: {}, \n"
           + "Cells Missing On Target: {}, \n Cells Extra On Target: {}, \n"
-          + "Cells Different On Target: {}, \n"
-          + "Checkpoint Write Failed: {}",
+          + "Cells Different On Target: {}, \n" + "Checkpoint Write Failed: {}",
         taskCreated, verifiedMappers, mismatchedMappers, repairedMappers, unrepairableMappers,
         repairFailedMappers, chunksVerified, chunksMismatched, chunksRepaired, chunksUnrepairable,
         chunksRepairFailed, sourceRowsProcessed, targetRowsProcessed, rowsMissingOnTarget,
         rowsExtraOnTarget, rowsCannotRepair, cellsMissingOnTarget, cellsExtraOnTarget,
         cellsDifferentOnTarget, checkpointWriteFailed);
       if (checkpointWriteFailed > 0) {
-        LOGGER.error(
-          "{} chunk(s) had a successful repair attempt but FAILED to write a checkpoint row "
+        LOGGER
+          .error("{} chunk(s) had a successful repair attempt but FAILED to write a checkpoint row "
             + "for table {}. Target data was mutated but the audit trail is incomplete. "
             + "Investigate the checkpoint table state before relying on it; affected chunks "
             + "will be re-attempted on the next invocation since they have no terminal "
-            + "checkpoint status.",
-          checkpointWriteFailed, qTable);
+            + "checkpoint status.", checkpointWriteFailed, qTable);
         return false;
       }
     } else {
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
index ce2adc02505..19863d20820 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
@@ -91,7 +91,7 @@
 import org.slf4j.LoggerFactory;
 
 @Category(NeedsOwnMiniClusterTest.class)
-@SuppressWarnings({ "SqlNoDataSourceInspection", "SqlResolve"})
+@SuppressWarnings({ "SqlNoDataSourceInspection", "SqlResolve" })
 public class PhoenixSyncTableToolIT {
   private static final Logger LOGGER = LoggerFactory.getLogger(PhoenixSyncTableToolIT.class);
 
@@ -522,10 +522,8 @@ public void testSyncMultiTenantSaltedTableDifferences() throws Exception {
     assertRepairChunkAndMapperCounters(repairCounters2, 3, 0, 0, 2, 0, 0);
 
     // Pin checkpoint state per tenant.
-    validateCheckpointEntries(uniqueTableName, "TENANT_001", counters1,
-      repairCounters1);
-    validateCheckpointEntries(uniqueTableName, "TENANT_002", counters2,
-      repairCounters2);
+    validateCheckpointEntries(uniqueTableName, "TENANT_001", counters1, repairCounters1);
+    validateCheckpointEntries(uniqueTableName, "TENANT_002", counters2, repairCounters2);
 
     // After repair, TENANT_002's data should be identical between source and target.
     withTenantConnections(tenantIds[1],
@@ -615,8 +613,10 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionSplits() throws Exce
       counters2.chunksMismatched);
 
     // (Remaining chunks from checkpoint) + (Second run) should equal (First run)
-    long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
-    long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
+    long totalSourceRows =
+      setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
+    long totalTargetRows =
+      setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
     long totalVerifiedChunks = setup.remainingCounters.chunksVerified + counters2.chunksVerified;
 
     assertEquals(
@@ -636,10 +636,11 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionSplits() throws Exce
     // Splits introduced between runs widen the second-run chunk count beyond the deleted
     // 75% of the first run's chunks (extra region boundaries → extra chunks). So we relax
     // the strict equality to >=. The row-count invariant above is unaffected by splits.
-    assertTrue("Remaining + Second run verified chunks should be >= first run verified chunks. "
-      + "Remaining: " + setup.remainingCounters.chunksVerified + ", Second run: "
-      + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected (>=): "
-      + setup.firstRunCounters.chunksVerified,
+    assertTrue(
+      "Remaining + Second run verified chunks should be >= first run verified chunks. "
+        + "Remaining: " + setup.remainingCounters.chunksVerified + ", Second run: "
+        + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected (>=): "
+        + setup.firstRunCounters.chunksVerified,
       totalVerifiedChunks >= setup.firstRunCounters.chunksVerified);
 
     // Verify checkpoint table has entries for the reprocessed regions
@@ -686,8 +687,10 @@ public void testSyncTableCheckpointWithChunkSizeChangeOnReRun() throws Exception
     SyncCountersResult counters2 = getSyncCounters(job2);
 
     // (Remaining chunks) + (Second run) should equal (First run) for row counts
-    long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
-    long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
+    long totalSourceRows =
+      setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
+    long totalTargetRows =
+      setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
 
     assertEquals("Remaining + rerun source rows should equal first run",
       counters1.sourceRowsProcessed, totalSourceRows);
@@ -747,23 +750,21 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionMerges() throws Exce
       String.valueOf(toTime));
     SyncCountersResult counters2 = getSyncCounters(job2);
 
-    long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
-    long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
+    long totalSourceRows =
+      setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed;
+    long totalTargetRows =
+      setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed;
     long totalVerifiedChunks = setup.remainingCounters.chunksVerified + counters2.chunksVerified;
 
-    assertEquals(
-      "Remaining + Second run source rows should equal first run source rows. " + "Remaining: "
-        + setup.remainingCounters.sourceRowsProcessed + ", Second run: "
-        + counters2.sourceRowsProcessed + ", Total: " + totalSourceRows + ", Expected: "
-        + counters1.sourceRowsProcessed,
-      counters1.sourceRowsProcessed, totalSourceRows);
+    assertEquals("Remaining + Second run source rows should equal first run source rows. "
+      + "Remaining: " + setup.remainingCounters.sourceRowsProcessed + ", Second run: "
+      + counters2.sourceRowsProcessed + ", Total: " + totalSourceRows + ", Expected: "
+      + counters1.sourceRowsProcessed, counters1.sourceRowsProcessed, totalSourceRows);
 
-    assertEquals(
-      "Remaining + Second run target rows should equal first run target rows. " + "Remaining: "
-        + setup.remainingCounters.targetRowsProcessed + ", Second run: "
-        + counters2.targetRowsProcessed + ", Total: " + totalTargetRows + ", Expected: "
-        + counters1.targetRowsProcessed,
-      counters1.targetRowsProcessed, totalTargetRows);
+    assertEquals("Remaining + Second run target rows should equal first run target rows. "
+      + "Remaining: " + setup.remainingCounters.targetRowsProcessed + ", Second run: "
+      + counters2.targetRowsProcessed + ", Total: " + totalTargetRows + ", Expected: "
+      + counters1.targetRowsProcessed, counters1.targetRowsProcessed, totalTargetRows);
 
     // Region merges between the two runs change mapper region boundaries, so the resume
     // filter sees stale chunks that don't align to the new mapper's range and reprocesses
@@ -1103,19 +1104,19 @@ public void testSyncTableWithConcurrentRegionSplits() throws Exception {
   }
 
   /**
-   * P3 (concurrent splits during repair pass): Today's concurrent-split tests run splits during
-   * the dry-run pass; this exercises the repair pass against splitting target regions, which is
-   * the production reality that exercises {@code flushRepairMutations}'s
+   * P3 (concurrent splits during repair pass): Today's concurrent-split tests run splits during the
+   * dry-run pass; this exercises the repair pass against splitting target regions, which is the
+   * production reality that exercises {@code flushRepairMutations}'s
    * {@code NotServingRegionException} path → {@code firstFailureIdx} → {@code REPAIR_FAILED}.
-   *
-   * <p>Convergence strategy:
+   * <p>
+   * Convergence strategy:
    * <ol>
-   *   <li>Dry-run first on a stable layout to populate MISMATCHED checkpoint rows.</li>
-   *   <li>Start concurrent splits on the target cluster, then run the repair pass. Some chunks
-   *       may land in {@code REPAIR_FAILED} if a flush hits a region in transition — the
-   *       resume filter re-enters those chunks on the next pass.</li>
-   *   <li>Run a final dry-run + repair pass after splits have settled; expect zero MISMATCHED.
-   *       {@code verifyDataIdentical} must succeed.</li>
+   * <li>Dry-run first on a stable layout to populate MISMATCHED checkpoint rows.</li>
+   * <li>Start concurrent splits on the target cluster, then run the repair pass. Some chunks may
+   * land in {@code REPAIR_FAILED} if a flush hits a region in transition — the resume filter
+   * re-enters those chunks on the next pass.</li>
+   * <li>Run a final dry-run + repair pass after splits have settled; expect zero MISMATCHED.
+   * {@code verifyDataIdentical} must succeed.</li>
    * </ol>
    */
   @Test
@@ -1151,9 +1152,9 @@ public void testRepairWithConcurrentTargetSplits() throws Exception {
   }
 
   /**
-   * Guards against a regression where repair claims REPAIRED but doesn't actually converge.
-   * Repair the divergent table, clean the checkpoint, then re-run dry-run + repair on the now
-   * converged tables — both passes must be no-ops.
+   * Guards against a regression where repair claims REPAIRED but doesn't actually converge. Repair
+   * the divergent table, clean the checkpoint, then re-run dry-run + repair on the now converged
+   * tables — both passes must be no-ops.
    */
   @Test
   public void testRepairIsIdempotent() throws Exception {
@@ -1173,12 +1174,10 @@ public void testRepairIsIdempotent() throws Exception {
     assertTrue("First dry-run should detect mismatched chunks",
       firstDryRunCounters.chunksMismatched >= 1);
     SyncCountersResult firstRepairCounters = getSyncCounters(firstRun.repairJob);
-    assertTrue("First repair should mark chunks REPAIRED",
-      firstRepairCounters.chunksRepaired >= 1);
+    assertTrue("First repair should mark chunks REPAIRED", firstRepairCounters.chunksRepaired >= 1);
 
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    validateCheckpointEntries(uniqueTableName, null, firstDryRunCounters,
-      firstRepairCounters);
+    validateCheckpointEntries(uniqueTableName, null, firstDryRunCounters, firstRepairCounters);
 
     // Clean the checkpoint so the second dry-run scans the full layout instead of resuming
     // from VERIFIED chunks.
@@ -1197,8 +1196,7 @@ public void testRepairIsIdempotent() throws Exception {
     assertRowDriftCounters(secondRepairCounters, 0, 0, 0, 0);
 
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-    validateCheckpointEntries(uniqueTableName, null, secondDryRunCounters,
-      secondRepairCounters);
+    validateCheckpointEntries(uniqueTableName, null, secondDryRunCounters, secondRepairCounters);
   }
 
   /**
@@ -1223,13 +1221,12 @@ public void testRepairAllTombstonedTargetRowExtra() throws Exception {
     // Tombstones with no underlying Puts — row surfaces under raw scan but every cell is a Delete.
     byte[] rowKey = integerRowKey(rowId);
     writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME", tombstoneTs);
-    writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME_VALUE",
-      tombstoneTs);
+    writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME_VALUE", tombstoneTs);
     writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "_0", tombstoneTs);
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
-      String.valueOf(fromTime), "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)),
-      "--raw-scan");
+    RepairRunResult result =
+      runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+        String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -1306,10 +1303,9 @@ public void testSyncTableWithConcurrentRegionMerges() throws Exception {
     long toTime = System.currentTimeMillis();
 
     // Run merges on source/target concurrently with the sync.
-    Runnable mergeJoiner = startConcurrentRegionWork(
-      () -> mergeAdjacentRegions(sourceConnection, uniqueTableName, 6),
-      () -> mergeAdjacentRegions(targetConnection, uniqueTableName, 6),
-      "merges");
+    Runnable mergeJoiner =
+      startConcurrentRegionWork(() -> mergeAdjacentRegions(sourceConnection, uniqueTableName, 6),
+        () -> mergeAdjacentRegions(targetConnection, uniqueTableName, 6), "merges");
 
     // Run sync tool while merges are happening
     Job job = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
@@ -1511,16 +1507,17 @@ public void testSyncTableMapperFailsWithMissingTargetTable() throws Exception {
     String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster",
       targetZkQuorum, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) };
 
-    assertSyncToolFails(args, String.format(
-      "Table %s does not exist on target cluster, mapper map() should fail during target scan",
-      uniqueTableName));
+    assertSyncToolFails(args,
+      String.format(
+        "Table %s does not exist on target cluster, mapper map() should fail during target scan",
+        uniqueTableName));
   }
 
   /**
-   * When the mapper successfully mutates target but the audit checkpoint UPSERT fails, the
-   * tool must increment {@code CHECKPOINT_WRITE_FAILED} and exit non-zero so the operator
-   * can investigate the audit gap. Failure is injected via a RegionObserver on the
-   * checkpoint table that throws {@link DoNotRetryIOException} on every {@code preBatchMutate}.
+   * When the mapper successfully mutates target but the audit checkpoint UPSERT fails, the tool
+   * must increment {@code CHECKPOINT_WRITE_FAILED} and exit non-zero so the operator can
+   * investigate the audit gap. Failure is injected via a RegionObserver on the checkpoint table
+   * that throws {@link DoNotRetryIOException} on every {@code preBatchMutate}.
    */
   @Test
   public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception {
@@ -1537,8 +1534,8 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception {
 
     // Run once first so the checkpoint table exists; we can only attach a coprocessor to a
     // table that's already been created.
-    Job initial = runSyncTool(uniqueTableName, "--dry-run", "--from-time",
-      String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
+    Job initial = runSyncTool(uniqueTableName, "--dry-run", "--from-time", String.valueOf(fromTime),
+      "--to-time", String.valueOf(toTime));
     assertTrue(initial.isSuccessful());
 
     String ckpt = PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME;
@@ -1547,10 +1544,9 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception {
       // Inline tool invocation — we need exitCode != 0, which runSyncTool would assertion-fail.
       PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
       tool.setConf(sourceClusterConf());
-      int exitCode = tool.run(
-        new String[] { "--table-name", uniqueTableName, "--target-cluster", targetZkQuorum,
-          "--run-foreground", "--chunk-size", "1", "--from-time", String.valueOf(fromTime),
-          "--to-time", String.valueOf(toTime) });
+      int exitCode = tool.run(new String[] { "--table-name", uniqueTableName, "--target-cluster",
+        targetZkQuorum, "--run-foreground", "--chunk-size", "1", "--from-time",
+        String.valueOf(fromTime), "--to-time", String.valueOf(toTime) });
 
       assertNotEquals("Tool must surface non-zero exit when checkpoint writes fail", 0, exitCode);
       // Note: CHECKPOINT_WRITE_FAILED only increments on the chunk-outcome write path
@@ -1576,8 +1572,8 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception {
   /**
    * Repair-batch flush failure: a RegionObserver on the target data table fails every
    * {@code preBatchMutate}, so {@code flushRepairMutations} throws and the chunk rolls up
-   * {@code REPAIR_FAILED}. After the observer is removed and the checkpoint cleaned up, a
-   * fresh run converges.
+   * {@code REPAIR_FAILED}. After the observer is removed and the checkpoint cleaned up, a fresh run
+   * converges.
    */
   @Test
   public void testRepairFailedSurfacesCountersAndCheckpoint() throws Exception {
@@ -1603,8 +1599,8 @@ public void testRepairFailedSurfacesCountersAndCheckpoint() throws Exception {
       List<PhoenixSyncTableCheckpointOutputRow> entries =
         queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null);
       assertTrue("At least one REPAIR_FAILED checkpoint row must persist",
-        countCheckpointsByStatus(entries,
-          PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED) >= 1);
+        countCheckpointsByStatus(entries, PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)
+            >= 1);
     } finally {
       TestUtil.removeCoprocessor(targetConnection, uniqueTableName,
         RepairBatchFailingObserver.class);
@@ -1690,14 +1686,15 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception {
   }
 
   /**
-   * <p>Scenario: source row has {@code Put(NAME, "alice", T0)}; target row has {@code
+   * <p>
+   * Scenario: source row has {@code Put(NAME, "alice", T0)}; target row has {@code
    * Put(NAME, "bob", T1)} and {@code Put(NAME, "carol", T2)} where {@code T0 < T1 < T2} and
    * {@code MAX_VERSIONS=2}. Visible cell on target is "carol" (T2); "bob" (T1) is
-   * MAX_VERSIONS-hidden. Naive repair would point-delete only T2, exposing "bob" above
-   * source's mirror at T0 — divergent. Correct behavior: point-delete BOTH T2 and T1.
-   *
-   * <p>Without this test, a regression that "fixes" only the visible cell (case 2) would leave
-   * target reading "bob" after a successful-looking repair pass.
+   * MAX_VERSIONS-hidden. Naive repair would point-delete only T2, exposing "bob" above source's
+   * mirror at T0 — divergent. Correct behavior: point-delete BOTH T2 and T1.
+   * <p>
+   * Without this test, a regression that "fixes" only the visible cell (case 2) would leave target
+   * reading "bob" after a successful-looking repair pass.
    */
   @Test
   public void testRepairUnwindsHiddenTargetVersions() throws Exception {
@@ -1723,9 +1720,9 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception {
     assertTargetName(uniqueTableName, rowId, "carol");
 
     // --read-all-versions so verifier and repairer both see the hidden version.
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
-      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
-      "--read-all-versions");
+    RepairRunResult result =
+      runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+        String.valueOf(System.currentTimeMillis()), "--read-all-versions");
 
     assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
@@ -1779,9 +1776,9 @@ public void testRepairPartialShadowWithinRow() throws Exception {
     writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME",
       shadowTombstoneTs);
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
-      String.valueOf(fromTime), "--to-time",
-      String.valueOf(waitUntilWallClockPasses(shadowTombstoneTs)), "--raw-scan");
+    RepairRunResult result =
+      runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+        String.valueOf(waitUntilWallClockPasses(shadowTombstoneTs)), "--raw-scan");
 
     assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
@@ -1800,11 +1797,10 @@ public void testRepairPartialShadowWithinRow() throws Exception {
   }
 
   /**
-   * A chunk that lands in UNREPAIRABLE due to a shadowing target
-   * tombstone must recover after the operator runs a major compaction on target and re-runs
-   * sync. Pass 1 reproduces the partial-shadow setup and asserts UNREPAIRABLE; major compact
-   * on target reaps the standalone DeleteColumn; pass 2 mirrors the source Put cleanly and
-   * converges.
+   * A chunk that lands in UNREPAIRABLE due to a shadowing target tombstone must recover after the
+   * operator runs a major compaction on target and re-runs sync. Pass 1 reproduces the
+   * partial-shadow setup and asserts UNREPAIRABLE; major compact on target reaps the standalone
+   * DeleteColumn; pass 2 mirrors the source Put cleanly and converges.
    */
   @Test
   public void testUnrepairableRecoversAfterMajorCompactionOnTarget() throws Exception {
@@ -1866,8 +1862,8 @@ public void testUnrepairableRecoversAfterMajorCompactionOnTarget() throws Except
       queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null),
       PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED) >= 1);
 
-    try (PreparedStatement ps = targetConnection.prepareStatement(
-      "SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) {
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) {
       ps.setInt(1, rowId);
       try (ResultSet rs = ps.executeQuery()) {
         assertTrue(rs.next());
@@ -1879,8 +1875,8 @@ public void testUnrepairableRecoversAfterMajorCompactionOnTarget() throws Except
   }
 
   /**
-   * Cell missing on target: source has an extra column, target lacks it. Repair mirrors the
-   * source cell through the {@code cellMissing++} branch.
+   * Cell missing on target: source has an extra column, target lacks it. Repair mirrors the source
+   * cell through the {@code cellMissing++} branch.
    */
   @Test
   public void testRepairCellMissingOnTarget() throws Exception {
@@ -1890,14 +1886,14 @@ public void testRepairCellMissingOnTarget() throws Exception {
     final long ts = base + 1L;
 
     // Source: NAME and NAME_VALUE.
-    upsertAtScnSource(ts, "UPSERT INTO " + uniqueTableName
-      + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
+    upsertAtScnSource(ts, "UPSERT INTO " + uniqueTableName + " (ID, NAME, NAME_VALUE) VALUES ("
+      + rowId + ", 'alice', 99)");
     // Target: only NAME_VALUE — NAME is missing.
     upsertAtScnTarget(ts,
       "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)");
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(waitUntilWallClockPasses(ts)));
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(ts)));
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -1930,8 +1926,8 @@ public void testRepairCellExtraOnTarget() throws Exception {
     writeRawCell(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", ts,
       Bytes.toBytes("bob"));
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(waitUntilWallClockPasses(ts)));
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(ts)));
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -1946,8 +1942,8 @@ public void testRepairCellExtraOnTarget() throws Exception {
 
   /**
    * {@code tombstoneTargetCell} Case 2 ({@code sourceMaxTs >= ts}): target has a stray older
-   * version of a column that source also has at a higher ts. Repair must point-Delete the
-   * stray version only — no hidden-version sweep — so the visible NAME stays at source's value.
+   * version of a column that source also has at a higher ts. Repair must point-Delete the stray
+   * version only — no hidden-version sweep — so the visible NAME stays at source's value.
    */
   @Test
   public void testRepairTombstonesTargetExtraVersionAtSameColumn() throws Exception {
@@ -1964,9 +1960,8 @@ public void testRepairTombstonesTargetExtraVersionAtSameColumn() throws Exceptio
     writeRawCell(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", olderTs,
       Bytes.toBytes("old"));
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(waitUntilWallClockPasses(ts)), "--raw-scan",
-      "--read-all-versions");
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(ts)), "--raw-scan", "--read-all-versions");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -2005,8 +2000,8 @@ public void testRepairCellDifferentValue() throws Exception {
     upsertAtScnTarget(ts,
       "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')");
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(waitUntilWallClockPasses(ts)));
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(ts)));
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -2020,10 +2015,10 @@ public void testRepairCellDifferentValue() throws Exception {
   }
 
   /**
-   * Multi-column-family repair: drift simultaneously in {@code CF1} (cell missing on target)
-   * and {@code CF2} (cell extra on target). Pins that cell-level repair scopes mutations to
-   * the correct family — the {@code (family, qualifier)} {@code ColumnKey} keying must keep
-   * the two families' cells from clobbering each other.
+   * Multi-column-family repair: drift simultaneously in {@code CF1} (cell missing on target) and
+   * {@code CF2} (cell extra on target). Pins that cell-level repair scopes mutations to the correct
+   * family — the {@code (family, qualifier)} {@code ColumnKey} keying must keep the two families'
+   * cells from clobbering each other.
    */
   @Test
   public void testRepairAcrossMultipleColumnFamilies() throws Exception {
@@ -2039,8 +2034,8 @@ public void testRepairAcrossMultipleColumnFamilies() throws Exception {
     upsertAtScnTarget(ts,
       "UPSERT INTO " + uniqueTableName + " (ID, CF2.B) VALUES (" + rowId + ", 'b-tgt')");
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(waitUntilWallClockPasses(ts)));
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(ts)));
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -2050,8 +2045,8 @@ public void testRepairAcrossMultipleColumnFamilies() throws Exception {
     assertTrue("CF2.B must tombstone as extra", repairCounters.cellsExtraOnTarget >= 1);
     assertEquals("No row should be unrepairable", 0, repairCounters.rowsCannotRepair);
 
-    try (PreparedStatement ps = targetConnection.prepareStatement(
-      "SELECT CF1.A, CF2.B FROM " + uniqueTableName + " WHERE ID = ?")) {
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT CF1.A, CF2.B FROM " + uniqueTableName + " WHERE ID = ?")) {
       ps.setInt(1, rowId);
       try (ResultSet rs = ps.executeQuery()) {
         assertTrue(rs.next());
@@ -2069,9 +2064,8 @@ public void testRepairAcrossMultipleColumnFamilies() throws Exception {
 
   private List<String> collectMultiCfRows(Connection conn, String tableName) throws SQLException {
     List<String> rows = new ArrayList<>();
-    try (Statement stmt = conn.createStatement();
-      ResultSet rs = stmt.executeQuery(
-        "SELECT ID, CF1.A, CF2.B FROM " + tableName + " ORDER BY ID")) {
+    try (Statement stmt = conn.createStatement(); ResultSet rs =
+      stmt.executeQuery("SELECT ID, CF1.A, CF2.B FROM " + tableName + " ORDER BY ID")) {
       while (rs.next()) {
         rows.add(rs.getInt(1) + "|" + rs.getString(2) + "|" + rs.getString(3));
       }
@@ -2082,8 +2076,8 @@ private List<String> collectMultiCfRows(Connection conn, String tableName) throw
   /**
    * Tombstone planted strictly above {@code --to-time}: the diff scan can't see it, but
    * {@code TargetRowRecord.load} (which uses {@code [fromTime, MAX_VALUE]}) does — so
-   * {@code wouldShadow} suppresses source's NAME mirror. The empty-key cell still mirrors,
-   * giving the row visible existence; NAME stays null.
+   * {@code wouldShadow} suppresses source's NAME mirror. The empty-key cell still mirrors, giving
+   * the row visible existence; NAME stays null.
    */
   @Test
   public void testRepairShadowFromTombstoneAboveToTime() throws Exception {
@@ -2124,8 +2118,8 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception {
   /**
    * Shadow via {@code DeleteFamily}: tombstone covers every qualifier in cf {@code "0"} at
    * {@code ts <= tombstoneTs}, planted strictly above {@code --to-time} so only
-   * {@code TargetRowRecord.load} sees it. Every source cell mirror is suppressed → row rolls
-   * up unrepairable.
+   * {@code TargetRowRecord.load} sees it. Every source cell mirror is suppressed → row rolls up
+   * unrepairable.
    */
   @Test
   public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception {
@@ -2163,10 +2157,10 @@ public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception {
   }
 
   /**
-   * Shadow via {@code DeleteFamilyVersion}: tombstone matches every qualifier in cf {@code "0"}
-   * at exactly {@code sourceTs}. Run without {@code --raw-scan} so the diff scan sees target as
-   * empty (no live cells); {@code TargetRowRecord.load} runs raw internally and still surfaces
-   * the tombstone for the {@code wouldShadow} check.
+   * Shadow via {@code DeleteFamilyVersion}: tombstone matches every qualifier in cf {@code "0"} at
+   * exactly {@code sourceTs}. Run without {@code --raw-scan} so the diff scan sees target as empty
+   * (no live cells); {@code TargetRowRecord.load} runs raw internally and still surfaces the
+   * tombstone for the {@code wouldShadow} check.
    */
   @Test
   public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception {
@@ -2184,8 +2178,7 @@ public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception {
       sourceTs);
 
     RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
-      String.valueOf(fromTime), "--to-time",
-      String.valueOf(waitUntilWallClockPasses(sourceTs)));
+      String.valueOf(fromTime), "--to-time", String.valueOf(waitUntilWallClockPasses(sourceTs)));
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -2201,10 +2194,10 @@ public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception {
   }
 
   /**
-   * Multi-hidden-version unwinding: target has THREE NAME versions (two hidden, one visible)
-   * above source's single Put. The repairer must point-Delete the visible Put AND every hidden
-   * Put in {@code (sourceMaxTs, visibleTs)} — otherwise unwinding the visible cell surfaces a
-   * hidden Put above source's mirror.
+   * Multi-hidden-version unwinding: target has THREE NAME versions (two hidden, one visible) above
+   * source's single Put. The repairer must point-Delete the visible Put AND every hidden Put in
+   * {@code (sourceMaxTs, visibleTs)} — otherwise unwinding the visible cell surfaces a hidden Put
+   * above source's mirror.
    */
   @Test
   public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception {
@@ -2232,9 +2225,9 @@ public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception {
 
     assertTargetName(uniqueTableName, rowId, "dave");
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
-      String.valueOf(fromTime), "--to-time", String.valueOf(waitUntilWallClockPasses(targetT3)),
-      "--read-all-versions");
+    RepairRunResult result =
+      runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+        String.valueOf(waitUntilWallClockPasses(targetT3)), "--read-all-versions");
     assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
@@ -2248,8 +2241,8 @@ public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception {
     // Lower bound on delete markers: the unwind iterates over shrinking intervals so T1 may
     // appear in both T3's and T2's hidden sets — distinct-marker count can exceed 3.
     RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId);
-    assertTrue("Expected at least 3 NAME delete markers on target, saw "
-      + summary.totalDeletes(), summary.totalDeletes() >= 3);
+    assertTrue("Expected at least 3 NAME delete markers on target, saw " + summary.totalDeletes(),
+      summary.totalDeletes() >= 3);
     int namePutAtSourceTs = 0;
     for (Long ts : summary.putTimestamps) {
       if (ts == sourceTs) namePutAtSourceTs++;
@@ -2262,9 +2255,9 @@ public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception {
 
   /**
    * Same row on both sides via a matching NAME_VALUE; target also carries a raw point-Delete on
-   * NAME that source lacks. Under {@code --raw-scan} the tombstone surfaces as a target-extra
-   * cell, but {@code tombstoneTargetCell} can't tombstone a tombstone — row rolls up
-   * unrepairable with no cell counter ticks.
+   * NAME that source lacks. Under {@code --raw-scan} the tombstone surfaces as a target-extra cell,
+   * but {@code tombstoneTargetCell} can't tombstone a tombstone — row rolls up unrepairable with no
+   * cell counter ticks.
    */
   @Test
   public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception {
@@ -2283,8 +2276,8 @@ public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception {
     writeRawPointDelete(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME",
       tombstoneTs);
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan");
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -2297,8 +2290,8 @@ public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception {
 
   /**
    * Mid-row repair flush: 8 missing source-only rows through {@code repairBatchSize=2} so
-   * {@code generateMutationForDiffRows} flushes mid-stream. Pins that no Put is dropped at a
-   * batch boundary.
+   * {@code generateMutationForDiffRows} flushes mid-stream. Pins that no Put is dropped at a batch
+   * boundary.
    */
   @Test
   public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception {
@@ -2341,10 +2334,9 @@ public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception {
   }
 
   /**
-   * {@code --raw-scan} + {@code --read-all-versions}: source has Put@T1, DeleteColumn@T2,
-   * Put@T3; target has only Put@T1. Repair must mirror the missing tombstone (via
-   * {@code mirrorSourceCell} routing Delete cells through {@code Delete#add}) and the missing
-   * newer Put.
+   * {@code --raw-scan} + {@code --read-all-versions}: source has Put@T1, DeleteColumn@T2, Put@T3;
+   * target has only Put@T1. Repair must mirror the missing tombstone (via {@code mirrorSourceCell}
+   * routing Delete cells through {@code Delete#add}) and the missing newer Put.
    */
   @Test
   public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exception {
@@ -2367,9 +2359,9 @@ public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exceptio
     upsertAtScnTarget(t1,
       "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')");
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
-      String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()),
-      "--raw-scan", "--read-all-versions");
+    RepairRunResult result =
+      runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
+        String.valueOf(System.currentTimeMillis()), "--raw-scan", "--read-all-versions");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -2413,8 +2405,8 @@ public void testRepairMirrorsHiddenSourceVersionWhenTargetHasOnlyNewest() throws
     upsertAtScnTarget(t2,
       "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'newer')");
 
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(waitUntilWallClockPasses(t2)), "--read-all-versions");
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(t2)), "--read-all-versions");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -2429,8 +2421,7 @@ public void testRepairMirrorsHiddenSourceVersionWhenTargetHasOnlyNewest() throws
     RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId);
     assertTrue("Mirrored older NAME Put@T1 must land at original timestamp",
       summary.putTimestamps.contains(t1));
-    assertTrue("Existing newer NAME Put@T2 must remain",
-      summary.putTimestamps.contains(t2));
+    assertTrue("Existing newer NAME Put@T2 must remain", summary.putTimestamps.contains(t2));
 
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
     validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c);
@@ -2446,8 +2437,8 @@ public void testRepairMirrorsSourcePointDeleteUnderRawScan() throws Exception {
   }
 
   /**
-   * Source-only DeleteFamily must mirror onto target → whole family at the target row is
-   * shadowed and the row drops out of Phoenix view.
+   * Source-only DeleteFamily must mirror onto target → whole family at the target row is shadowed
+   * and the row drops out of Phoenix view.
    */
   @Test
   public void testRepairMirrorsSourceDeleteFamilyUnderRawScan() throws Exception {
@@ -2463,81 +2454,11 @@ public void testRepairMirrorsSourceDeleteFamilyVersionUnderRawScan() throws Exce
     runMirrorSourceTombstoneTest(SourceTombstone.DELETE_FAMILY_VERSION);
   }
 
-  private enum SourceTombstone {
-    POINT_DELETE,
-    DELETE_FAMILY,
-    DELETE_FAMILY_VERSION
-  }
-
-  private void runMirrorSourceTombstoneTest(SourceTombstone subtype) throws Exception {
-    final int rowId = 5;
-    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
-    final long ts = base + 1L;
-    // POINT_DELETE / DFV must hit cells at exactly `ts`; DeleteFamily covers ts <= markerTs.
-    final long tombstoneTs = subtype == SourceTombstone.DELETE_FAMILY ? base + 2L : ts;
-
-    for (String zkUrl : new String[] { CLUSTERS.getZkUrl1(), CLUSTERS.getZkUrl2() }) {
-      try (Connection scn = openConnectionAtScn(zkUrl, ts)) {
-        scn.createStatement().execute("UPSERT INTO " + uniqueTableName
-          + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
-        scn.commit();
-      }
-    }
-
-    byte[] rk = integerRowKey(rowId);
-    switch (subtype) {
-      case POINT_DELETE:
-        writeRawPointDelete(sourceConnection, uniqueTableName, rk, "0", "NAME", tombstoneTs);
-        break;
-      case DELETE_FAMILY:
-        writeRawDeleteFamily(sourceConnection, uniqueTableName, rk, "0", tombstoneTs);
-        break;
-      case DELETE_FAMILY_VERSION:
-        writeRawDeleteFamilyVersion(sourceConnection, uniqueTableName, rk, "0", tombstoneTs);
-        break;
-      default:
-        throw new IllegalStateException("unhandled subtype: " + subtype);
-    }
-
-    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0",
-      "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan");
-    assertTrue(result.dryRunJob.isSuccessful());
-    assertTrue(result.repairJob.isSuccessful());
-
-    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
-    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
-    // The source-only Delete cell is missing on target → mirror it. DeleteFamily mirroring
-    // also covers the empty-key sentinel cell, hence the >= 1 assertion shape.
-    assertTrue("source tombstone must mirror as a missing cell on target",
-      repairCounters.cellsMissingOnTarget >= 1);
-    assertTrue(repairCounters.mappersRepaired >= 1);
-    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
-
-    try (PreparedStatement ps = targetConnection.prepareStatement(
-      "SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) {
-      ps.setInt(1, rowId);
-      try (ResultSet rs = ps.executeQuery()) {
-        if (subtype == SourceTombstone.POINT_DELETE) {
-          assertTrue(rs.next());
-          assertNull(rs.getString(1));
-          assertEquals(99L, rs.getLong(2));
-        } else {
-          // Family-wide tombstone (DELETE_FAMILY or DELETE_FAMILY_VERSION) drops the row.
-          assertFalse("row should not be visible after family-wide tombstone mirror",
-            rs.next());
-        }
-      }
-    }
-
-    // Source tombstone is now mirrored onto target; both clusters present the same Phoenix view.
-    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
-  }
-
   /**
-   * Mixed Put+Delete batch under {@code repairBatchSize=4}: 5 source-only rows + 5 target-only
-   * rows in the same chunk, so most flushes straddle a Put/Delete boundary on
-   * {@code flushRepairMutations}'s mixed {@code Table#batch} path. Pins that no mutation drops
-   * at the batch boundary.
+   * Mixed Put+Delete batch under {@code repairBatchSize=4}: 5 source-only rows + 5 target-only rows
+   * in the same chunk, so most flushes straddle a Put/Delete boundary on
+   * {@code flushRepairMutations}'s mixed {@code Table#batch} path. Pins that no mutation drops at
+   * the batch boundary.
    */
   @Test
   public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception {
@@ -2849,55 +2770,29 @@ public void testSyncTableWithSplitCoalescing() throws Exception {
   }
 
   /**
-   * Verifies that the sync job completes successfully when {@code endTime} (--to-time) is older
-   * than {@code phoenix.max.lookback.age.seconds}.
-   *
-   * <p>Root cause without fix: {@link PhoenixSyncTableTool} sets {@code CURRENT_SCN_VALUE =
-   * endTime} in the MR job configuration. During split generation, {@code
-   * PhoenixInputFormat.getQueryPlan()} creates a Phoenix connection with that SCN. {@code
-   * QueryCompiler.verifySCN()} (client-side) then throws {@code ERROR 538} when {@code endTime} is
-   * older than {@code phoenix.max.lookback.age.seconds}.
-   *
-   * <p>Fix: {@link PhoenixSyncTableInputFormat} overrides the parent to strip {@code
-   * CURRENT_SCN_VALUE} before creating the query plan for split generation. With SCN absent, {@code
-   * verifySCN()} returns early (SCN == null), so no exception is thrown.
-   *
-   * <p>Data access correctness: The mapper uses raw HBase {@code Scan.setTimeRange(fromTime,
-   * toTime)}, which does NOT go through {@code QueryCompiler.compile()} or {@code verifySCN()}, so
-   * data within [fromTime, toTime] is always accessible regardless of max lookback age.
+   * Sync must succeed when {@code --to-time} is older than
+   * {@code phoenix.max.lookback.age.seconds}. {@link PhoenixSyncTableInputFormat} strips
+   * {@code CURRENT_SCN_VALUE} during split generation so {@code QueryCompiler.verifySCN()} doesn't
+   * throw ERROR 538; the mapper's raw HBase scan is unaffected by SCN/lookback and reads all data
+   * in {@code [fromTime, toTime]}.
    */
   @Test
   public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exception {
-    // Setup: create tables on both clusters and replicate 10 rows
     createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName);
     insertTestData(sourceConnection, uniqueTableName, 1, 10);
     waitForReplication(targetConnection, uniqueTableName, 10);
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
 
-    // Capture toTime BEFORE the lookback window will expire
     long toTime = System.currentTimeMillis();
 
-    // Configure a short max lookback age (5 seconds) in the MR job configuration.
-    // QueryCompiler.verifySCN() reads PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY from
-    // conn.getQueryServices().getConfiguration(), which is the client-side MR conf.
     long maxLookbackAgeSeconds = 5;
     Configuration conf = sourceClusterConf();
     conf.setLong(BaseScannerRegionObserverConstants.PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY,
       maxLookbackAgeSeconds);
 
-    // Wait until toTime is older than the lookback age.
-    // After this sleep: (now - maxLookbackAgeMillis) > toTime  → verifySCN would throw ERROR 538
+    // Sleep until toTime is older than the lookback age — without the fix, verifySCN throws 538.
     Thread.sleep((maxLookbackAgeSeconds + 2) * 1000L);
 
-    // Run the sync tool with the now-stale toTime.
-    // Without PhoenixSyncTableInputFormat.getQueryPlan() override:
-    //   getSplits() → PhoenixInputFormat.getQueryPlan() sets SCN=toTime on Phoenix connection
-    //   → QueryCompiler.verifySCN() → ERROR 538 (toTime older than maxLookbackAge)
-    // With the fix:
-    //   getSplits() → overridden getQueryPlan() strips CURRENT_SCN_VALUE from conf copy
-    //   → verifySCN() sees scn == null → returns early → no exception thrown
-    // The mapper still uses raw HBase Scan.setTimeRange(0, toTime), bypassing verifySCN entirely,
-    // so all 10 rows within [0, toTime] are accessible and compared correctly.
     Job job = runSyncToolWithChunkSize(uniqueTableName, 1, conf, "--from-time", "0", "--to-time",
       String.valueOf(toTime));
 
@@ -2905,17 +2800,83 @@ public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exc
       "Sync job should complete successfully even when endTime is older than maxLookbackAge",
       job.isSuccessful());
 
-    // Verify the mapper processed all 10 rows via raw HBase scan (bypasses verifySCN)
     SyncCountersResult counters = getSyncCounters(job);
     validateSyncCounters(counters, 10, 10, 10, 0);
     validateMapperCounters(counters, 4, 0);
 
-    // Run was non-dry-run with no drift; repair flow is a no-op and target should match source
-    // even though toTime is older than max lookback age.
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
     validateCheckpointEntries(uniqueTableName, null, null, counters);
   }
 
+  private enum SourceTombstone {
+    POINT_DELETE,
+    DELETE_FAMILY,
+    DELETE_FAMILY_VERSION
+  }
+
+  private void runMirrorSourceTombstoneTest(SourceTombstone subtype) throws Exception {
+    final int rowId = 5;
+    long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7");
+    final long ts = base + 1L;
+    // POINT_DELETE / DFV must hit cells at exactly `ts`; DeleteFamily covers ts <= markerTs.
+    final long tombstoneTs = subtype == SourceTombstone.DELETE_FAMILY ? base + 2L : ts;
+
+    for (String zkUrl : new String[] { CLUSTERS.getZkUrl1(), CLUSTERS.getZkUrl2() }) {
+      try (Connection scn = openConnectionAtScn(zkUrl, ts)) {
+        scn.createStatement().execute("UPSERT INTO " + uniqueTableName
+          + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)");
+        scn.commit();
+      }
+    }
+
+    byte[] rk = integerRowKey(rowId);
+    switch (subtype) {
+      case POINT_DELETE:
+        writeRawPointDelete(sourceConnection, uniqueTableName, rk, "0", "NAME", tombstoneTs);
+        break;
+      case DELETE_FAMILY:
+        writeRawDeleteFamily(sourceConnection, uniqueTableName, rk, "0", tombstoneTs);
+        break;
+      case DELETE_FAMILY_VERSION:
+        writeRawDeleteFamilyVersion(sourceConnection, uniqueTableName, rk, "0", tombstoneTs);
+        break;
+      default:
+        throw new IllegalStateException("unhandled subtype: " + subtype);
+    }
+
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan");
+    assertTrue(result.dryRunJob.isSuccessful());
+    assertTrue(result.repairJob.isSuccessful());
+
+    SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
+    SyncCountersResult repairCounters = getSyncCounters(result.repairJob);
+    // The source-only Delete cell is missing on target → mirror it. DeleteFamily mirroring
+    // also covers the empty-key sentinel cell, hence the >= 1 assertion shape.
+    assertTrue("source tombstone must mirror as a missing cell on target",
+      repairCounters.cellsMissingOnTarget >= 1);
+    assertTrue(repairCounters.mappersRepaired >= 1);
+    validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters);
+
+    try (PreparedStatement ps = targetConnection
+      .prepareStatement("SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) {
+      ps.setInt(1, rowId);
+      try (ResultSet rs = ps.executeQuery()) {
+        if (subtype == SourceTombstone.POINT_DELETE) {
+          assertTrue(rs.next());
+          assertNull(rs.getString(1));
+          assertEquals(99L, rs.getLong(2));
+        } else {
+          // Family-wide tombstone (DELETE_FAMILY or DELETE_FAMILY_VERSION) drops the row.
+          assertFalse("row should not be visible after family-wide tombstone mirror", rs.next());
+        }
+      }
+    }
+
+    // Source tombstone is now mirrored onto target; both clusters present the same Phoenix view.
+    verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
+  }
+
   /**
    * Helper class to hold separated mapper and chunk entries.
    */
@@ -2998,9 +2959,9 @@ private static class CheckpointAggregateCounters {
   }
 
   /**
-   * Result of {@link #setupPartialRerun}. Captures the first-run job/counters, the snapshots of
-   * the checkpoint table before and after deletion, and the aggregate counters re-derived from
-   * the chunks that survived the deletion. Tests use these to assert the
+   * Result of {@link #setupPartialRerun}. Captures the first-run job/counters, the snapshots of the
+   * checkpoint table before and after deletion, and the aggregate counters re-derived from the
+   * chunks that survived the deletion. Tests use these to assert the
    * {@code remaining + rerun == first-run} invariant after re-running the sync tool.
    */
   private static class PartialRerunSetup {
@@ -3033,13 +2994,13 @@ private static class PartialRerunSetup {
   /**
    * Runs the partial-rerun preamble shared by all checkpoint-resume tests:
    * <ol>
-   *   <li>Run the sync tool once at {@code chunkSize} over the pinned [{@code fromTime},
-   *       {@code toTime}] window.</li>
-   *   <li>Query the checkpoint table and assert non-empty mapper/chunk results.</li>
-   *   <li>Select {@code deletionFraction} of each mapper's chunks for deletion (0.75 in all
-   *       current tests) and delete them along with every mapper row.</li>
-   *   <li>Re-query the checkpoint table and aggregate the surviving CHUNK rows so callers can
-   *       assert the {@code remaining + rerun == first-run} row-count invariant.</li>
+   * <li>Run the sync tool once at {@code chunkSize} over the pinned [{@code fromTime},
+   * {@code toTime}] window.</li>
+   * <li>Query the checkpoint table and assert non-empty mapper/chunk results.</li>
+   * <li>Select {@code deletionFraction} of each mapper's chunks for deletion (0.75 in all current
+   * tests) and delete them along with every mapper row.</li>
+   * <li>Re-query the checkpoint table and aggregate the surviving CHUNK rows so callers can assert
+   * the {@code remaining + rerun == first-run} row-count invariant.</li>
    * </ol>
    * Each test then performs its own divergent action (extra splits, merges, smaller chunk size,
    * dropping the target table) on the returned state.
@@ -3058,9 +3019,9 @@ private PartialRerunSetup setupPartialRerun(String tableName, long fromTime, lon
     assertFalse("Should have mapper region entries", separated.mappers.isEmpty());
     assertFalse("Should have chunk entries", separated.chunks.isEmpty());
 
-    List<PhoenixSyncTableCheckpointOutputRow> chunksToDelete = selectChunksToDeleteFromMappers(
-      sourceConnection, tableName, targetZkQuorum, fromTime, toTime, null, separated.mappers,
-      deletionFraction);
+    List<PhoenixSyncTableCheckpointOutputRow> chunksToDelete =
+      selectChunksToDeleteFromMappers(sourceConnection, tableName, targetZkQuorum, fromTime, toTime,
+        null, separated.mappers, deletionFraction);
 
     int deletedCount = deleteCheckpointEntries(sourceConnection, tableName, targetZkQuorum, null,
       separated.mappers, chunksToDelete);
@@ -3072,8 +3033,8 @@ private PartialRerunSetup setupPartialRerun(String tableName, long fromTime, lon
     CheckpointAggregateCounters remainingCounters =
       calculateAggregateCountersFromCheckpoint(entriesAfterDelete);
 
-    return new PartialRerunSetup(firstRunJob, firstRunCounters, separated.mappers,
-      separated.chunks, chunksToDelete, deletedCount, entriesAfterDelete, remainingCounters);
+    return new PartialRerunSetup(firstRunJob, firstRunCounters, separated.mappers, separated.chunks,
+      chunksToDelete, deletedCount, entriesAfterDelete, remainingCounters);
   }
 
   private List<PhoenixSyncTableCheckpointOutputRow> findChunksBelongingToMapper(Connection conn,
@@ -3285,8 +3246,8 @@ private void verifyDataIdentical(Connection sourceConn, Connection targetConn, S
   /**
    * Negative complement of {@link #verifyDataIdentical}: asserts that source and target return
    * different row sets via Phoenix SELECT. Use after a repair pass that is expected to leave
-   * residual divergence (UNREPAIRABLE / REPAIR_FAILED) so the test pins both that the tool
-   * reported the right status and that the data actually didn't converge.
+   * residual divergence (UNREPAIRABLE / REPAIR_FAILED) so the test pins both that the tool reported
+   * the right status and that the data actually didn't converge.
    */
   private void verifyDataDiverges(Connection sourceConn, Connection targetConn, String tableName)
     throws SQLException {
@@ -3791,8 +3752,8 @@ private void assertSyncToolFails(String[] args, String failureContext) {
   /**
    * Upserts a "MODIFIED_NAME_<id>" row on target for each id in {@code mismatchIds}. Replaces the
    * common pattern {@code for (int id : ids) upsertRowsOnTarget(..., new int[]{id}, new
-   * String[]{"MODIFIED_NAME_"+id})} which defeated the batch-upsert path of {@link
-   * #upsertRowsOnTarget}.
+   * String[]{"MODIFIED_NAME_"+id})} which defeated the batch-upsert path of
+   * {@link #upsertRowsOnTarget}.
    */
   private void introduceMismatchesByIds(String tableName, List<Integer> mismatchIds)
     throws SQLException {
@@ -3806,19 +3767,20 @@ private void introduceMismatchesByIds(String tableName, List<Integer> mismatchId
   }
 
   /**
-   * Starts two daemon-style threads that perform region mutations (splits or merges) on the
-   * source and target clusters and returns a {@link Runnable} the caller invokes to join them
-   * with a 30-second timeout. Both worker {@link Runnable}s are wrapped in try/catch so that an
-   * unexpected exception is logged rather than killing the JVM thread silently.
+   * Starts two daemon-style threads that perform region mutations (splits or merges) on the source
+   * and target clusters and returns a {@link Runnable} the caller invokes to join them with a
+   * 30-second timeout. Both worker {@link Runnable}s are wrapped in try/catch so that an unexpected
+   * exception is logged rather than killing the JVM thread silently.
+   * <p>
+   * Usage:
    *
-   * <p>Usage:
    * <pre>
    *   Runnable joiner = startConcurrentRegionWork(sourceWork, targetWork, "splits");
    *   ... run main sync work ...
    *   joiner.run();
    * </pre>
-   *
-   * <p>Caller is responsible for invoking the returned joiner; tests should always join before
+   * <p>
+   * Caller is responsible for invoking the returned joiner; tests should always join before
    * asserting on cluster state, otherwise late-arriving region mutations can race the assertions.
    */
   private Runnable startConcurrentRegionWork(Runnable sourceWork, Runnable targetWork,
@@ -3859,9 +3821,8 @@ private Runnable startConcurrentRegionWork(Runnable sourceWork, Runnable targetW
   private long captureBaselineChunkCount(String tableName, int chunkSize) throws Exception {
     Job baselineJob = runSyncToolWithChunkSize(tableName, chunkSize, "--dry-run", "--from-time",
       "0", "--to-time", String.valueOf(System.currentTimeMillis()));
-    long chunkCount =
-      baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue()
-        + baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue();
+    long chunkCount = baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue()
+      + baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue();
     cleanupCheckpointTable(sourceConnection, tableName, targetZkQuorum, null);
     return chunkCount;
   }
@@ -3876,9 +3837,9 @@ private Job runSyncToolWithChunkSize(String tableName, int chunkSize, String...
   }
 
   /**
-   * Holds both the dry-run and repair jobs from a {@link #runSyncToolWithRepair} invocation,
-   * along with the pinned time window so callers can re-query the checkpoint table or run
-   * additional assertions against the same range.
+   * Holds both the dry-run and repair jobs from a {@link #runSyncToolWithRepair} invocation, along
+   * with the pinned time window so callers can re-query the checkpoint table or run additional
+   * assertions against the same range.
    */
   private static class RepairRunResult {
     final Job dryRunJob;
@@ -3905,9 +3866,9 @@ private RepairRunResult runSyncToolWithRepair(String tableName, String... additi
    * mismatches, then as a repair pass (no --dry-run) so the repair run rewrites the MISMATCHED
    * checkpoint rows in place. The shared window is mandatory because the checkpoint PK is
    * (TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY) — without
-   * pinning, each invocation would fall through to System.currentTimeMillis() and the repair
-   * pass would create fresh rows instead of overwriting the dry-run pass's output. If the caller
-   * does not provide --from-time / --to-time, defaults of 0 / now are pinned.
+   * pinning, each invocation would fall through to System.currentTimeMillis() and the repair pass
+   * would create fresh rows instead of overwriting the dry-run pass's output. If the caller does
+   * not provide --from-time / --to-time, defaults of 0 / now are pinned.
    */
   private RepairRunResult runSyncToolWithRepair(String tableName, int chunkSize,
     String... additionalArgs) throws Exception {
@@ -3923,8 +3884,8 @@ private RepairRunResult runSyncToolWithRepair(String tableName, int chunkSize,
   }
 
   /**
-   * Parses a long-valued command-line flag (e.g., --from-time 12345) from the args array.
-   * Returns the default value if the flag is absent.
+   * Parses a long-valued command-line flag (e.g., --from-time 12345) from the args array. Returns
+   * the default value if the flag is absent.
    */
   private static long parseLongFlag(String[] args, String flag, long defaultValue) {
     for (int i = 0; i < args.length - 1; i++) {
@@ -3968,9 +3929,9 @@ private static String[] appendArg(String[] args, String newArg) {
   }
 
   /**
-   * Counts checkpoint entries (both REGION and CHUNK rows) in the given status. Replaces the
-   * ad-hoc {@code for (entry : entries) if (status.equals(entry.getStatus())) count++} loops
-   * that recurred across several tests.
+   * Counts checkpoint entries (both REGION and CHUNK rows) in the given status. Replaces the ad-hoc
+   * {@code for (entry : entries) if (status.equals(entry.getStatus())) count++} loops that recurred
+   * across several tests.
    */
   private static long countCheckpointsByStatus(List<PhoenixSyncTableCheckpointOutputRow> entries,
     PhoenixSyncTableCheckpointOutputRow.Status status) {
@@ -4082,28 +4043,23 @@ private static class SyncCountersResult {
       this.chunksVerified = counters.findCounter(SyncCounters.CHUNKS_VERIFIED).getValue();
       this.chunksMismatched = counters.findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue();
       this.chunksRepaired = counters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue();
-      this.chunksUnrepairable =
-        counters.findCounter(SyncCounters.CHUNKS_UNREPAIRABLE).getValue();
-      this.chunksRepairFailed =
-        counters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
+      this.chunksUnrepairable = counters.findCounter(SyncCounters.CHUNKS_UNREPAIRABLE).getValue();
+      this.chunksRepairFailed = counters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue();
       this.mappersVerified = counters.findCounter(SyncCounters.MAPPERS_VERIFIED).getValue();
       this.mappersMismatched = counters.findCounter(SyncCounters.MAPPERS_MISMATCHED).getValue();
       this.mappersRepaired = counters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue();
-      this.mappersUnrepairable =
-        counters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue();
+      this.mappersUnrepairable = counters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue();
       this.mappersRepairFailed =
         counters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue();
       this.rowsMissingOnTarget =
         counters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue();
-      this.rowsExtraOnTarget =
-        counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
+      this.rowsExtraOnTarget = counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
       this.rowsDifferentOnTarget =
         counters.findCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue();
       this.rowsCannotRepair = counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue();
       this.cellsMissingOnTarget =
         counters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue();
-      this.cellsExtraOnTarget =
-        counters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
+      this.cellsExtraOnTarget = counters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
       this.cellsDifferentOnTarget =
         counters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue();
       this.taskCreated = counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue();
@@ -4123,10 +4079,10 @@ public void logCounters(String testName) {
           + "rows extra={}, rows different={}, rows cannot repair={}, cells missing={}, "
           + "cells extra={}, cells different={}",
         testName, sourceRowsProcessed, targetRowsProcessed, chunksVerified, chunksMismatched,
-        chunksRepaired, chunksUnrepairable, chunksRepairFailed, mappersVerified,
-        mappersMismatched, mappersRepaired, mappersUnrepairable, mappersRepairFailed,
-        rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget, rowsCannotRepair,
-        cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget);
+        chunksRepaired, chunksUnrepairable, chunksRepairFailed, mappersVerified, mappersMismatched,
+        mappersRepaired, mappersUnrepairable, mappersRepairFailed, rowsMissingOnTarget,
+        rowsExtraOnTarget, rowsDifferentOnTarget, rowsCannotRepair, cellsMissingOnTarget,
+        cellsExtraOnTarget, cellsDifferentOnTarget);
     }
   }
 
@@ -4178,9 +4134,9 @@ private void validateMapperCountersRepair(SyncCountersResult counters,
   }
 
   /**
-   * Pins the cell-level repair drift counters to exact expected values. Use in repair tests
-   * where the drift is constructed deterministically and any miscount (off-by-one,
-   * double-counting, missed branch) should fail the test loudly.
+   * Pins the cell-level repair drift counters to exact expected values. Use in repair tests where
+   * the drift is constructed deterministically and any miscount (off-by-one, double-counting,
+   * missed branch) should fail the test loudly.
    */
   private void assertRepairCellCounters(SyncCountersResult counters, long expectedCellsMissing,
     long expectedCellsExtra, long expectedCellsDifferent, long expectedRowsCannotRepair) {
@@ -4193,8 +4149,8 @@ private void assertRepairCellCounters(SyncCountersResult counters, long expected
 
   /**
    * Pins the chunk- and mapper-level repair-status counters. Complements
-   * {@link #validateMapperCountersRepair} (which omits chunk-level counters) for tests that
-   * need to assert both layers.
+   * {@link #validateMapperCountersRepair} (which omits chunk-level counters) for tests that need to
+   * assert both layers.
    */
   private void assertRepairChunkAndMapperCounters(SyncCountersResult counters,
     long expectedChunksRepaired, long expectedChunksUnrepairable, long expectedChunksRepairFailed,
@@ -4204,34 +4160,31 @@ private void assertRepairChunkAndMapperCounters(SyncCountersResult counters,
     assertEquals("CHUNKS_UNREPAIRABLE", expectedChunksUnrepairable, counters.chunksUnrepairable);
     assertEquals("CHUNKS_REPAIR_FAILED", expectedChunksRepairFailed, counters.chunksRepairFailed);
     assertEquals("MAPPERS_REPAIRED", expectedMappersRepaired, counters.mappersRepaired);
-    assertEquals("MAPPERS_UNREPAIRABLE", expectedMappersUnrepairable,
-      counters.mappersUnrepairable);
+    assertEquals("MAPPERS_UNREPAIRABLE", expectedMappersUnrepairable, counters.mappersUnrepairable);
     assertEquals("MAPPERS_REPAIR_FAILED", expectedMappersRepairFailed,
       counters.mappersRepairFailed);
   }
 
   /**
-   * Pins all four row-level drift counters: missing, extra, different, and unrepairable.
-   * Dry-run runs leave {@code ROWS_CANNOT_REPAIR} at 0; repair runs leave
-   * {@code ROWS_DIFFERENT_ON_TARGET} at 0 (different rows roll up under missing/extra cell
-   * drift after repair).
+   * Pins all four row-level drift counters: missing, extra, different, and unrepairable. Dry-run
+   * runs leave {@code ROWS_CANNOT_REPAIR} at 0; repair runs leave {@code ROWS_DIFFERENT_ON_TARGET}
+   * at 0 (different rows roll up under missing/extra cell drift after repair).
    */
   private void assertRowDriftCounters(SyncCountersResult counters, long expectedRowsMissing,
     long expectedRowsExtra, long expectedRowsDifferent, long expectedRowsCannotRepair) {
     assertEquals("ROWS_MISSING_ON_TARGET", expectedRowsMissing, counters.rowsMissingOnTarget);
     assertEquals("ROWS_EXTRA_ON_TARGET", expectedRowsExtra, counters.rowsExtraOnTarget);
-    assertEquals("ROWS_DIFFERENT_ON_TARGET", expectedRowsDifferent,
-      counters.rowsDifferentOnTarget);
+    assertEquals("ROWS_DIFFERENT_ON_TARGET", expectedRowsDifferent, counters.rowsDifferentOnTarget);
     assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair, counters.rowsCannotRepair);
   }
 
   /**
    * Builds DDL for a "repair test" table that uses {@code COLUMN_ENCODED_BYTES=NONE} so column
-   * qualifiers on disk match the SQL column name verbatim. This lets cell-level test helpers
-   * inject raw HBase Puts/Deletes against {@code (cf=0, q=NAME)} or {@code (cf=0, q=NAME_VALUE)}
-   * without computing encoded qualifier bytes.
-   *
-   * <p>Set {@code maxVersions > 1} when the test exercises hidden-version unwinding.
+   * qualifiers on disk match the SQL column name verbatim. This lets cell-level test helpers inject
+   * raw HBase Puts/Deletes against {@code (cf=0, q=NAME)} or {@code (cf=0, q=NAME_VALUE)} without
+   * computing encoded qualifier bytes.
+   * <p>
+   * Set {@code maxVersions > 1} when the test exercises hidden-version unwinding.
    */
   private String buildRepairTestTableDdl(String tableName, boolean withReplication, int maxVersions,
     String splitPoints) {
@@ -4257,9 +4210,9 @@ private String buildRepairTestTableDdl(String tableName, boolean withReplication
   /**
    * Creates the same {@link #buildRepairTestTableDdl} schema on both source and target clusters.
    * Used by repair tests that bypass replication and seed the two clusters separately.
-   *
-   * <p>Returns a wall-clock anchor in milliseconds. SCN-bound connections must use timestamps
-   * &ge; the anchor, otherwise an SCN below the table's CREATE-TABLE timestamp surfaces as
+   * <p>
+   * Returns a wall-clock anchor in milliseconds. SCN-bound connections must use timestamps &ge; the
+   * anchor, otherwise an SCN below the table's CREATE-TABLE timestamp surfaces as
    * {@code TableNotFoundException}.
    */
   private long createRepairTestTableOnBothClusters(String tableName, int maxVersions,
@@ -4276,8 +4229,8 @@ private long createRepairTestTableOnBothClusters(String tableName, int maxVersio
 
   /**
    * Multi-CF variant of {@link #createRepairTestTableOnBothClusters}: two column families
-   * ({@code CF1.A VARCHAR}, {@code CF2.B VARCHAR}). Used by repair tests that need drift to
-   * span families.
+   * ({@code CF1.A VARCHAR}, {@code CF2.B VARCHAR}). Used by repair tests that need drift to span
+   * families.
    */
   private long createMultiColumnFamilyTableOnBothClusters(String tableName) throws SQLException {
     String ddl = "CREATE TABLE IF NOT EXISTS " + tableName
@@ -4291,11 +4244,11 @@ private long createMultiColumnFamilyTableOnBothClusters(String tableName) throws
   /**
    * Spins until {@link System#currentTimeMillis()} is strictly greater than {@code minTs}, then
    * returns the resulting wall-clock value. Used by repair tests that plant cells at handcrafted
-   * future timestamps and then need a {@code --to-time} that both (a) covers every planted cell
-   * and (b) satisfies the tool's {@code endTime <= currentTimeMillis()} validation
-   * ({@link org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil#validateTimeRange}). The
-   * spin terminates in 1-2ms — it is a deterministic precondition gate, not a sleep-based wait
-   * for an external side effect.
+   * future timestamps and then need a {@code --to-time} that both (a) covers every planted cell and
+   * (b) satisfies the tool's {@code endTime <= currentTimeMillis()} validation
+   * ({@link org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil#validateTimeRange}). The spin
+   * terminates in 1-2ms — it is a deterministic precondition gate, not a sleep-based wait for an
+   * external side effect.
    */
   private long waitUntilWallClockPasses(long minTs) {
     while (System.currentTimeMillis() <= minTs) {
@@ -4318,8 +4271,8 @@ private Connection openConnectionAtScn(String zkUrl, long scnTimestamp) throws S
 
   /**
    * Executes a single UPSERT through an SCN-pinned connection on the given cluster and commits.
-   * Replaces the verbose {@code try (Connection scn = openConnectionAtScn(...)) { execute; commit; }}
-   * boilerplate that recurred in nearly every repair test.
+   * Replaces the verbose {@code try (Connection scn = openConnectionAtScn(...)) { execute; commit;
+   * }} boilerplate that recurred in nearly every repair test.
    */
   private void upsertAtScn(String zkUrl, long ts, String upsertSql) throws SQLException {
     try (Connection scn = openConnectionAtScn(zkUrl, ts)) {
@@ -4350,8 +4303,8 @@ private void upsertAtScnBoth(long ts, String upsertSql) throws SQLException {
    * tests.
    */
   private void assertTargetName(String tableName, int id, String expected) throws SQLException {
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) {
+    try (PreparedStatement ps =
+      targetConnection.prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) {
       ps.setInt(1, id);
       try (ResultSet rs = ps.executeQuery()) {
         assertTrue("Row " + id + " should exist on target", rs.next());
@@ -4362,8 +4315,8 @@ private void assertTargetName(String tableName, int id, String expected) throws
 
   /** Asserts the target row exists but its visible NAME is null. */
   private void assertTargetNameNull(String tableName, int id) throws SQLException {
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) {
+    try (PreparedStatement ps =
+      targetConnection.prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) {
       ps.setInt(1, id);
       try (ResultSet rs = ps.executeQuery()) {
         assertTrue("Row " + id + " should exist on target", rs.next());
@@ -4374,8 +4327,8 @@ private void assertTargetNameNull(String tableName, int id) throws SQLException
 
   /** Asserts the target row is not visible to a Phoenix SELECT (e.g., shadowed by tombstones). */
   private void assertTargetRowAbsent(String tableName, int id) throws SQLException {
-    try (PreparedStatement ps = targetConnection
-      .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) {
+    try (PreparedStatement ps =
+      targetConnection.prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) {
       ps.setInt(1, id);
       try (ResultSet rs = ps.executeQuery()) {
         assertFalse("Row " + id + " should not be visible on target", rs.next());
@@ -4385,12 +4338,11 @@ private void assertTargetRowAbsent(String tableName, int id) throws SQLException
 
   /**
    * End-of-test convergence: run dry-run + repair over [fromTime, toTime] on top of whatever
-   * checkpoint state the test already produced. The repair-mode resume filter (status IN
-   * (VERIFIED, REPAIRED)) plus chunk-level boundary intersection ensures that any chunk-level
-   * range not yet covered by a VERIFIED/REPAIRED chunk row is re-validated, so a fresh dry-run +
-   * repair on top of partial/stale prior state will still converge. Asserts no
-   * MISMATCHED/UNREPAIRABLE/REPAIR_FAILED rows remain and Phoenix-visible data matches between
-   * source and target.
+   * checkpoint state the test already produced. The repair-mode resume filter (status IN (VERIFIED,
+   * REPAIRED)) plus chunk-level boundary intersection ensures that any chunk-level range not yet
+   * covered by a VERIFIED/REPAIRED chunk row is re-validated, so a fresh dry-run + repair on top of
+   * partial/stale prior state will still converge. Asserts no MISMATCHED/UNREPAIRABLE/REPAIR_FAILED
+   * rows remain and Phoenix-visible data matches between source and target.
    */
   private void convergeAndAssertIdentical(String tableName, long fromTime, long toTime)
     throws Exception {
@@ -4404,8 +4356,8 @@ private void convergeAndAssertIdentical(String tableName, long fromTime, long to
   }
 
   /**
-   * Summary of NAME-column raw cells observed under a single-row raw scan with all versions.
-   * Used by tests that pin tombstone/Put counts after repair.
+   * Summary of NAME-column raw cells observed under a single-row raw scan with all versions. Used
+   * by tests that pin tombstone/Put counts after repair.
    */
   private static final class RawCellSummary {
     final int puts;
@@ -4436,8 +4388,8 @@ int totalDeletes() {
 
   /**
    * Raw scan of a single row on the target cluster, summarising every NAME-column cell by Put /
-   * tombstone subtype. Replaces the open-coded raw-scan loop repeated by tests that pin
-   * post-repair NAME tombstone counts.
+   * tombstone subtype. Replaces the open-coded raw-scan loop repeated by tests that pin post-repair
+   * NAME tombstone counts.
    */
   private RawCellSummary scanRawTargetNameCells(String tableName, int rowId) throws Exception {
     byte[] rk = integerRowKey(rowId);
@@ -4542,9 +4494,9 @@ private void writeRawDeleteColumn(Connection phoenixConn, String tableName, byte
   }
 
   /**
-   * Plants a raw {@link Delete#addFamily} (DeleteFamily — covers every qualifier in the family
-   * at {@code ts <= markerTs}) at {@code (rowKey, family)}. Used by shadow-detection tests
-   * exercising the {@code TargetRowRecord.deleteFamilyUpperBound} branch in {@code wouldShadow}.
+   * Plants a raw {@link Delete#addFamily} (DeleteFamily — covers every qualifier in the family at
+   * {@code ts <= markerTs}) at {@code (rowKey, family)}. Used by shadow-detection tests exercising
+   * the {@code TargetRowRecord.deleteFamilyUpperBound} branch in {@code wouldShadow}.
    */
   private void writeRawDeleteFamily(Connection phoenixConn, String tableName, byte[] rowKey,
     String family, long markerTs) throws Exception {
@@ -4556,13 +4508,13 @@ private void writeRawDeleteFamily(Connection phoenixConn, String tableName, byte
   }
 
   /**
-   * Plants a raw {@link Delete#addFamilyVersion} (DeleteFamilyVersion — covers every qualifier
-   * in the family at exactly {@code ts == markerTs}) at {@code (rowKey, family)}. Used by
-   * shadow-detection tests exercising the {@code TargetRowRecord.deleteFamilyVersionTs} branch
-   * in {@code wouldShadow}.
+   * Plants a raw {@link Delete#addFamilyVersion} (DeleteFamilyVersion — covers every qualifier in
+   * the family at exactly {@code ts == markerTs}) at {@code (rowKey, family)}. Used by
+   * shadow-detection tests exercising the {@code TargetRowRecord.deleteFamilyVersionTs} branch in
+   * {@code wouldShadow}.
    */
-  private void writeRawDeleteFamilyVersion(Connection phoenixConn, String tableName,
-    byte[] rowKey, String family, long markerTs) throws Exception {
+  private void writeRawDeleteFamilyVersion(Connection phoenixConn, String tableName, byte[] rowKey,
+    String family, long markerTs) throws Exception {
     try (Table hTable = getHBaseTable(phoenixConn, tableName)) {
       Delete del = new Delete(rowKey);
       del.addFamilyVersion(Bytes.toBytes(family), markerTs);
@@ -4571,8 +4523,8 @@ private void writeRawDeleteFamilyVersion(Connection phoenixConn, String tableNam
   }
 
   /**
-   * Returns the row-key bytes Phoenix uses for an INTEGER primary key value, matching the
-   * encoding used by {@code splitTableAt}.
+   * Returns the row-key bytes Phoenix uses for an INTEGER primary key value, matching the encoding
+   * used by {@code splitTableAt}.
    */
   private static byte[] integerRowKey(int id) {
     return PInteger.INSTANCE.toBytes(id);
@@ -4580,8 +4532,8 @@ private static byte[] integerRowKey(int id) {
 
   /**
    * Returns a fresh {@link Configuration} clone of the source cluster with a custom
-   * {@link PhoenixSyncTableTool#PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE} setting baked in. Used by
-   * the mid-row-flush boundary test.
+   * {@link PhoenixSyncTableTool#PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE} setting baked in. Used by the
+   * mid-row-flush boundary test.
    */
   private static Configuration sourceClusterConfWithRepairBatchSize(int repairBatchSize) {
     Configuration conf = sourceClusterConf();
@@ -4618,8 +4570,7 @@ private void validateCheckpointRowStructure(PhoenixSyncTableCheckpointOutputRow
         || PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType()));
 
     if (tenantId == null) {
-      assertNull("TENANT_ID should be null for non-multi-tenant tables" + ctx,
-        entry.getTenantId());
+      assertNull("TENANT_ID should be null for non-multi-tenant tables" + ctx, entry.getTenantId());
     } else {
       assertEquals("TENANT_ID should match" + ctx, tenantId, entry.getTenantId());
     }
@@ -4627,8 +4578,7 @@ private void validateCheckpointRowStructure(PhoenixSyncTableCheckpointOutputRow
     assertTrue("FROM_TIME should be >= 0" + ctx, entry.getFromTime() >= 0);
     assertTrue("TO_TIME should be > FROM_TIME" + ctx, entry.getToTime() > entry.getFromTime());
 
-    assertNotNull("EXECUTION_START_TIME should not be null" + ctx,
-      entry.getExecutionStartTime());
+    assertNotNull("EXECUTION_START_TIME should not be null" + ctx, entry.getExecutionStartTime());
     assertNotNull("EXECUTION_END_TIME should not be null" + ctx, entry.getExecutionEndTime());
     assertTrue("EXECUTION_END_TIME should be >= EXECUTION_START_TIME" + ctx,
       entry.getExecutionEndTime().getTime() >= entry.getExecutionStartTime().getTime());
@@ -4724,9 +4674,8 @@ private void validateCheckpointEntries(String tableName, String tenantId,
 
     // Repair UPSERTs REGION rows at the dry-run PK with a smaller (gap-only) delta, so totals
     // only match when no REGION was actually re-processed.
-    boolean repairTouchedRegions = repairCounters != null
-      && (repairCounters.mappersRepaired + repairCounters.mappersUnrepairable
-        + repairCounters.mappersRepairFailed) > 0;
+    boolean repairTouchedRegions = repairCounters != null && (repairCounters.mappersRepaired
+      + repairCounters.mappersUnrepairable + repairCounters.mappersRepairFailed) > 0;
     if (!repairTouchedRegions) {
       assertEquals("REGION sourceRowsProcessed total" + ctx, verifiedSource.sourceRowsProcessed,
         sourceRowsProcessed);
@@ -4738,9 +4687,9 @@ private void validateCheckpointEntries(String tableName, String tenantId,
   /**
    * Bounded variant of {@link #validateCheckpointEntries} for tests where checkpoint rows
    * accumulate across multiple PKs (multi-window, partial reruns under split/merge, recovery).
-   * Asserts no MISMATCHED/UNREPAIRABLE/REPAIR_FAILED remain and that VERIFIED+REPAIRED counts
-   * are at least {@code latestCounters}'s verified+repaired totals (or just the status invariants
-   * when {@code latestCounters} is null).
+   * Asserts no MISMATCHED/UNREPAIRABLE/REPAIR_FAILED remain and that VERIFIED+REPAIRED counts are
+   * at least {@code latestCounters}'s verified+repaired totals (or just the status invariants when
+   * {@code latestCounters} is null).
    */
   private void validateCheckpointEntriesAtLeast(String tableName, String tenantId,
     SyncCountersResult latestCounters) throws SQLException {
@@ -4753,34 +4702,29 @@ private void validateCheckpointEntriesAtLeast(String tableName, String tenantId,
     }
 
     assertEquals("MISMATCHED rows must not remain after repair" + ctx, 0,
-      countCheckpointsByStatus(entries,
-        PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED));
+      countCheckpointsByStatus(entries, PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED));
     assertEquals("UNREPAIRABLE rows must not remain after repair" + ctx, 0,
-      countCheckpointsByStatus(entries,
-        PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE));
+      countCheckpointsByStatus(entries, PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE));
     assertEquals("REPAIR_FAILED rows must not remain after repair" + ctx, 0,
-      countCheckpointsByStatus(entries,
-        PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED));
+      countCheckpointsByStatus(entries, PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED));
 
     if (latestCounters != null) {
       long chunkVerifiedPlusRepaired =
         countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
           PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED)
-          + countCheckpointsByTypeAndStatus(entries,
-            PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
-            PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED);
-      long regionVerifiedPlusRepaired =
-        countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION,
-          PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED)
-          + countCheckpointsByTypeAndStatus(entries,
-            PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+          + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK,
             PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED);
+      long regionVerifiedPlusRepaired = countCheckpointsByTypeAndStatus(entries,
+        PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+        PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED)
+        + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION,
+          PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED);
       long expectedChunkLowerBound = latestCounters.chunksVerified + latestCounters.chunksRepaired;
       long expectedRegionLowerBound =
         latestCounters.mappersVerified + latestCounters.mappersRepaired;
       assertTrue(
-        "CHUNK VERIFIED+REPAIRED >= latest counters" + ctx + " (actual="
-          + chunkVerifiedPlusRepaired + ", expected>=" + expectedChunkLowerBound + ")",
+        "CHUNK VERIFIED+REPAIRED >= latest counters" + ctx + " (actual=" + chunkVerifiedPlusRepaired
+          + ", expected>=" + expectedChunkLowerBound + ")",
         chunkVerifiedPlusRepaired >= expectedChunkLowerBound);
       assertTrue(
         "REGION VERIFIED+REPAIRED >= latest counters" + ctx + " (actual="
@@ -4807,8 +4751,8 @@ public boolean equals(Object o) {
   }
 
   /**
-   * RegionObserver that fails every batch mutate. Attached to PHOENIX_SYNC_TABLE_CHECKPOINT
-   * by {@code testCheckpointWriteFailureCausesNonZeroExit} to exercise the
+   * RegionObserver that fails every batch mutate. Attached to PHOENIX_SYNC_TABLE_CHECKPOINT by
+   * {@code testCheckpointWriteFailureCausesNonZeroExit} to exercise the
    * {@code CHECKPOINT_WRITE_FAILED} → non-zero exit path.
    */
   public static class CheckpointWriteFailingObserver extends SimpleRegionObserver {
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
index b9f7e893a58..57791072cda 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java
@@ -26,7 +26,6 @@
 import static org.apache.phoenix.query.QueryServices.INTERNAL_CONNECTION_MAX_ALLOWED_CONNECTIONS;
 import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME;
 import static org.apache.phoenix.util.PhoenixRuntime.clearAllConnectionQueryServiceMetrics;
-import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
index a5977629a89..65cecb68804 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java
@@ -763,11 +763,11 @@ public void testGetProcessedMapperRegionsRepairModeFiltersByStatus() throws Exce
     for (int i = 0; i < statuses.length; i++) {
       byte[] startKey = Bytes.toBytes(String.format("region%02d_start", i));
       byte[] endKey = Bytes.toBytes(String.format("region%02d_end", i));
-      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
-        .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION)
-        .setFromTime(0L).setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey)
-        .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp)
-        .setExecutionEndTime(timestamp).build());
+      repository.checkpointSyncTableResult(
+        new PhoenixSyncTableCheckpointOutputRow.Builder().setTableName(tableName)
+          .setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L).setToTime(1000L)
+          .setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(endKey).setStatus(statuses[i])
+          .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build());
     }
 
     // Repair mode should skip only fully-done regions (VERIFIED + REPAIRED) so the mapper
@@ -776,10 +776,10 @@ public void testGetProcessedMapperRegionsRepairModeFiltersByStatus() throws Exce
       repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, false);
     assertEquals("Repair mode should return only VERIFIED + REPAIRED regions", 2,
       repairResults.size());
-    assertArrayEquals("First should be region00 (VERIFIED)",
-      Bytes.toBytes("region00_start"), repairResults.get(0).getStartRowKey());
-    assertArrayEquals("Second should be region02 (REPAIRED)",
-      Bytes.toBytes("region02_start"), repairResults.get(1).getStartRowKey());
+    assertArrayEquals("First should be region00 (VERIFIED)", Bytes.toBytes("region00_start"),
+      repairResults.get(0).getStartRowKey());
+    assertArrayEquals("Second should be region02 (REPAIRED)", Bytes.toBytes("region02_start"),
+      repairResults.get(1).getStartRowKey());
   }
 
   @Test
@@ -792,11 +792,11 @@ public void testGetProcessedMapperRegionsDryRunReturnsAllStatuses() throws Excep
     for (int i = 0; i < statuses.length; i++) {
       byte[] startKey = Bytes.toBytes(String.format("region%02d_start", i));
       byte[] endKey = Bytes.toBytes(String.format("region%02d_end", i));
-      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
-        .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION)
-        .setFromTime(0L).setToTime(1000L).setIsDryRun(true).setStartRowKey(startKey)
-        .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp)
-        .setExecutionEndTime(timestamp).build());
+      repository.checkpointSyncTableResult(
+        new PhoenixSyncTableCheckpointOutputRow.Builder().setTableName(tableName)
+          .setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L).setToTime(1000L)
+          .setIsDryRun(true).setStartRowKey(startKey).setEndRowKey(endKey).setStatus(statuses[i])
+          .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build());
     }
 
     // Dry-run mode does not filter by status; resume should skip every region the previous
@@ -821,21 +821,21 @@ public void testGetProcessedChunksRepairModeFiltersByStatus() throws Exception {
     for (int i = 0; i < statuses.length; i++) {
       byte[] startKey = Bytes.toBytes(String.format("chunk%02d_start", i));
       byte[] endKey = Bytes.toBytes(String.format("chunk%02d_end", i));
-      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
-        .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK)
-        .setFromTime(0L).setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey)
-        .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp)
-        .setExecutionEndTime(timestamp).build());
+      repository.checkpointSyncTableResult(
+        new PhoenixSyncTableCheckpointOutputRow.Builder().setTableName(tableName)
+          .setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L).setToTime(1000L)
+          .setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(endKey).setStatus(statuses[i])
+          .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build());
     }
 
-    List<PhoenixSyncTableCheckpointOutputRow> repairResults = repository.getProcessedChunks(
-      tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, false);
+    List<PhoenixSyncTableCheckpointOutputRow> repairResults = repository
+      .getProcessedChunks(tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, false);
     assertEquals("Repair mode should return only VERIFIED + REPAIRED chunks", 2,
       repairResults.size());
-    assertArrayEquals("First should be chunk00 (VERIFIED)",
-      Bytes.toBytes("chunk00_start"), repairResults.get(0).getStartRowKey());
-    assertArrayEquals("Second should be chunk02 (REPAIRED)",
-      Bytes.toBytes("chunk02_start"), repairResults.get(1).getStartRowKey());
+    assertArrayEquals("First should be chunk00 (VERIFIED)", Bytes.toBytes("chunk00_start"),
+      repairResults.get(0).getStartRowKey());
+    assertArrayEquals("Second should be chunk02 (REPAIRED)", Bytes.toBytes("chunk02_start"),
+      repairResults.get(1).getStartRowKey());
   }
 
   @Test
@@ -850,15 +850,15 @@ public void testGetProcessedChunksDryRunReturnsAllStatuses() throws Exception {
     for (int i = 0; i < statuses.length; i++) {
       byte[] startKey = Bytes.toBytes(String.format("chunk%02d_start", i));
       byte[] endKey = Bytes.toBytes(String.format("chunk%02d_end", i));
-      repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder()
-        .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK)
-        .setFromTime(0L).setToTime(1000L).setIsDryRun(true).setStartRowKey(startKey)
-        .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp)
-        .setExecutionEndTime(timestamp).build());
+      repository.checkpointSyncTableResult(
+        new PhoenixSyncTableCheckpointOutputRow.Builder().setTableName(tableName)
+          .setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L).setToTime(1000L)
+          .setIsDryRun(true).setStartRowKey(startKey).setEndRowKey(endKey).setStatus(statuses[i])
+          .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build());
     }
 
-    List<PhoenixSyncTableCheckpointOutputRow> dryRunResults = repository.getProcessedChunks(
-      tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
+    List<PhoenixSyncTableCheckpointOutputRow> dryRunResults = repository
+      .getProcessedChunks(tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true);
     assertEquals("Dry-run mode should return all statuses", statuses.length, dryRunResults.size());
   }
 

From 7cce16a9c2b19f9e467d32f59ca94ab1e5e16ba8 Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Tue, 9 Jun 2026 14:05:29 +0530
Subject: [PATCH 15/18] test comments

---
 .../end2end/PhoenixSyncTableToolIT.java       | 108 +++++-------------
 1 file changed, 28 insertions(+), 80 deletions(-)

diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
index 19863d20820..100d34bd567 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
@@ -3076,16 +3076,7 @@ private List<PhoenixSyncTableCheckpointOutputRow> selectChunksToDeleteFromMapper
     return new ArrayList<>(uniqueChunksToDelete.values());
   }
 
-  /**
-   * Deletes mapper and chunk checkpoint entries to simulate partial rerun scenarios.
-   * @param conn            Connection to use
-   * @param tableName       Table name
-   * @param targetZkQuorum  Target cluster ZK quorum
-   * @param tenantId        Tenant ID
-   * @param mappersToDelete List of mapper entries to delete
-   * @param chunksToDelete  List of chunk entries to delete
-   * @return Total number of entries deleted
-   */
+  /** Deletes the given mapper + chunk checkpoint rows; used by partial-rerun fixtures. */
   private int deleteCheckpointEntries(Connection conn, String tableName, String targetZkQuorum,
     String tenantId, List<PhoenixSyncTableCheckpointOutputRow> mappersToDelete,
     List<PhoenixSyncTableCheckpointOutputRow> chunksToDelete) throws SQLException {
@@ -3197,14 +3188,8 @@ private void waitForReplication(Connection targetConn, String tableName, int exp
   }
 
   /**
-   * Waits for a specific row's content to be replicated to the target cluster. This is more precise
-   * than waitForReplication() when dealing with UPDATEs where the row count doesn't change but the
-   * content does.
-   * @param targetConn   Target cluster connection
-   * @param tableName    Table name
-   * @param rowId        The ID of the row to check
-   * @param expectedName The expected NAME value
-   * @throws Exception if replication times out or query fails
+   * Polls target until the row's NAME matches {@code expectedName} — use this for UPDATEs where row
+   * count is unchanged so {@link #waitForReplication} can't tell drift from convergence.
    */
   private void waitForRowContentReplication(Connection targetConn, String tableName, int rowId,
     String expectedName) throws Exception {
@@ -3611,16 +3596,8 @@ private List<PhoenixSyncTableCheckpointOutputRow> queryCheckpointTable(Connectio
   }
 
   /**
-   * Unified method to delete a single checkpoint entry by start row key and optional type. Handles
-   * NULL/empty start keys for first region boundaries.
-   * @param conn          Connection to use
-   * @param tableName     Table name
-   * @param targetCluster Target cluster ZK quorum
-   * @param tenantId      Tenant ID (nullable)
-   * @param type          Entry type (REGION or CHUNK), or null to delete regardless of type
-   * @param startRowKey   Start row key to match
-   * @param autoCommit    Whether to commit after delete
-   * @return Number of rows deleted
+   * Deletes a single checkpoint entry by start row key. Pass {@code type=null} to match any type;
+   * NULL/empty {@code startRowKey} matches the first-region boundary row.
    */
   private int deleteSingleCheckpointEntry(Connection conn, String tableName, String targetCluster,
     String tenantId, PhoenixSyncTableCheckpointOutputRow.Type type, byte[] startRowKey,
@@ -3723,19 +3700,16 @@ private Job runSyncToolWithZkQuorum(String tableName, String zkQuorum, String...
   }
 
   /**
-   * Returns a fresh, mutable copy of the source cluster's HBase {@link Configuration}. Tests that
-   * need to override individual settings (paging, timeouts, etc.) should use this helper rather
-   * than constructing the Configuration inline so that any future change to the base config flows
-   * through one place.
+   * Returns a fresh, mutable copy of the source cluster's HBase {@link Configuration} — use this
+   * (not an inline constructor) so future base-config changes flow through one place.
    */
   private static Configuration sourceClusterConf() {
     return new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration());
   }
 
   /**
-   * Builds a {@link PhoenixSyncTableTool}, runs it with the supplied args, and asserts the run
-   * surfaces failure as a non-zero exit code rather than a thrown exception. Used by the
-   * failure-mode tests that previously hand-rolled this same try/run/assertTrue/catch-fail block.
+   * Runs the tool and asserts failure surfaces as a non-zero exit code rather than a thrown
+   * exception. Used by failure-mode tests.
    */
   private void assertSyncToolFails(String[] args, String failureContext) {
     PhoenixSyncTableTool tool = new PhoenixSyncTableTool();
@@ -3750,13 +3724,11 @@ private void assertSyncToolFails(String[] args, String failureContext) {
   }
 
   /**
-   * Upserts a "MODIFIED_NAME_<id>" row on target for each id in {@code mismatchIds}. Replaces the
-   * common pattern {@code for (int id : ids) upsertRowsOnTarget(..., new int[]{id}, new
-   * String[]{"MODIFIED_NAME_"+id})} which defeated the batch-upsert path of
-   * {@link #upsertRowsOnTarget}.
+   * Batch-upserts {@code "MODIFIED_NAME_<id>"} on target for each id, then waits for replication so
+   * the modify is observable before the test asserts on it.
    */
   private void introduceMismatchesByIds(String tableName, List<Integer> mismatchIds)
-    throws SQLException {
+    throws Exception {
     int[] ids = new int[mismatchIds.size()];
     String[] names = new String[mismatchIds.size()];
     for (int i = 0; i < mismatchIds.size(); i++) {
@@ -3764,24 +3736,16 @@ private void introduceMismatchesByIds(String tableName, List<Integer> mismatchId
       names[i] = "MODIFIED_NAME_" + mismatchIds.get(i);
     }
     upsertRowsOnTarget(targetConnection, tableName, ids, names);
+    // Confirm each modified NAME is observable on target — replication can race the upsert's
+    // cell timestamp and silently no-op the modify, which would later flake as "0 mismatched".
+    for (int i = 0; i < ids.length; i++) {
+      waitForRowContentReplication(targetConnection, tableName, ids[i], names[i]);
+    }
   }
 
   /**
-   * Starts two daemon-style threads that perform region mutations (splits or merges) on the source
-   * and target clusters and returns a {@link Runnable} the caller invokes to join them with a
-   * 30-second timeout. Both worker {@link Runnable}s are wrapped in try/catch so that an unexpected
-   * exception is logged rather than killing the JVM thread silently.
-   * <p>
-   * Usage:
-   *
-   * <pre>
-   *   Runnable joiner = startConcurrentRegionWork(sourceWork, targetWork, "splits");
-   *   ... run main sync work ...
-   *   joiner.run();
-   * </pre>
-   * <p>
-   * Caller is responsible for invoking the returned joiner; tests should always join before
-   * asserting on cluster state, otherwise late-arriving region mutations can race the assertions.
+   * Returns a joiner the caller MUST run to await the worker threads — late-arriving region
+   * mutations otherwise race the test's assertions.
    */
   private Runnable startConcurrentRegionWork(Runnable sourceWork, Runnable targetWork,
     String label) {
@@ -3928,11 +3892,7 @@ private static String[] appendArg(String[] args, String newArg) {
     return result;
   }
 
-  /**
-   * Counts checkpoint entries (both REGION and CHUNK rows) in the given status. Replaces the ad-hoc
-   * {@code for (entry : entries) if (status.equals(entry.getStatus())) count++} loops that recurred
-   * across several tests.
-   */
+  /** Counts checkpoint entries (REGION + CHUNK) in the given status. */
   private static long countCheckpointsByStatus(List<PhoenixSyncTableCheckpointOutputRow> entries,
     PhoenixSyncTableCheckpointOutputRow.Status status) {
     long count = 0;
@@ -4242,13 +4202,10 @@ private long createMultiColumnFamilyTableOnBothClusters(String tableName) throws
   }
 
   /**
-   * Spins until {@link System#currentTimeMillis()} is strictly greater than {@code minTs}, then
-   * returns the resulting wall-clock value. Used by repair tests that plant cells at handcrafted
-   * future timestamps and then need a {@code --to-time} that both (a) covers every planted cell and
-   * (b) satisfies the tool's {@code endTime <= currentTimeMillis()} validation
-   * ({@link org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil#validateTimeRange}). The spin
-   * terminates in 1-2ms — it is a deterministic precondition gate, not a sleep-based wait for an
-   * external side effect.
+   * Spins until {@code System.currentTimeMillis() > minTs} and returns the new wall clock. Used for
+   * {@code --to-time} when tests plant cells at handcrafted future timestamps — the value must
+   * cover every planted cell and satisfy {@code endTime <= currentTimeMillis()}
+   * ({@link org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil#validateTimeRange}).
    */
   private long waitUntilWallClockPasses(long minTs) {
     while (System.currentTimeMillis() <= minTs) {
@@ -4269,11 +4226,7 @@ private Connection openConnectionAtScn(String zkUrl, long scnTimestamp) throws S
     return DriverManager.getConnection("jdbc:phoenix:" + zkUrl, props);
   }
 
-  /**
-   * Executes a single UPSERT through an SCN-pinned connection on the given cluster and commits.
-   * Replaces the verbose {@code try (Connection scn = openConnectionAtScn(...)) { execute; commit;
-   * }} boilerplate that recurred in nearly every repair test.
-   */
+  /** Executes a single UPSERT through an SCN-pinned connection on the given cluster and commits. */
   private void upsertAtScn(String zkUrl, long ts, String upsertSql) throws SQLException {
     try (Connection scn = openConnectionAtScn(zkUrl, ts)) {
       scn.createStatement().execute(upsertSql);
@@ -4297,11 +4250,7 @@ private void upsertAtScnBoth(long ts, String upsertSql) throws SQLException {
     upsertAtScnTarget(ts, upsertSql);
   }
 
-  /**
-   * Asserts target's visible NAME for {@code id} equals {@code expected}. Replaces the
-   * {@code prepareStatement("SELECT NAME FROM ... WHERE ID = ?")} block repeated across repair
-   * tests.
-   */
+  /** Asserts target's visible NAME for {@code id} equals {@code expected}. */
   private void assertTargetName(String tableName, int id, String expected) throws SQLException {
     try (PreparedStatement ps =
       targetConnection.prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) {
@@ -4387,9 +4336,8 @@ int totalDeletes() {
   }
 
   /**
-   * Raw scan of a single row on the target cluster, summarising every NAME-column cell by Put /
-   * tombstone subtype. Replaces the open-coded raw-scan loop repeated by tests that pin post-repair
-   * NAME tombstone counts.
+   * Raw scan of a single row on target, summarising every NAME-column cell by Put / tombstone
+   * subtype. Used by repair tests that pin post-repair NAME tombstone counts.
    */
   private RawCellSummary scanRawTargetNameCells(String tableName, int rowId) throws Exception {
     byte[] rk = integerRowKey(rowId);

From 9b1147bb858d3865f72979c9696c4b1d0ffd5f80 Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Tue, 9 Jun 2026 17:02:00 +0530
Subject: [PATCH 16/18] add test for chunk repair

---
 .../end2end/PhoenixSyncTableToolIT.java       |  86 ++---
 .../PhoenixSyncTableChunkRepairerTest.java    | 319 ++++++++++++++++++
 2 files changed, 366 insertions(+), 39 deletions(-)
 create mode 100644 phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairerTest.java

diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
index 100d34bd567..3bc53d40636 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
@@ -171,7 +171,7 @@ public void testSyncTableWithDataDifference() throws Exception {
 
     // Pin the time window so the dry-run and repair share the same checkpoint PK.
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // Phase 1: dry-run only — verify checkpoint table sees only VERIFIED/MISMATCHED rows.
     Job dryRunJob = runSyncToolWithChunkSize(uniqueTableName, 1024, "--dry-run", "--from-time",
@@ -493,7 +493,7 @@ public void testSyncMultiTenantSaltedTableDifferences() throws Exception {
           targetRows);
       });
     }
-    String toTime = String.valueOf(System.currentTimeMillis());
+    String toTime = String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis()));
 
     for (String tenantId : tenantIds) {
       Connection tenantSourceConn = getTenantConnection(sourceConnection, tenantId);
@@ -537,12 +537,15 @@ public void testSyncTableWithTimeRangeFilter() throws Exception {
     // Insert data BEFORE the time range window
     insertTestData(sourceConnection, uniqueTableName, 1, 10);
 
-    long startTime = System.currentTimeMillis();
+    // HBase Scan.setTimeRange is half-open [from, to); Phoenix UPSERT batches commit at one
+    // ms-resolution timestamp. Wait for the wall clock to advance past the just-committed
+    // boundary cells so they land strictly outside the [startTime, endTime) window.
+    long startTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // Insert data WITHIN the time range window
     insertTestData(sourceConnection, uniqueTableName, 11, 20);
 
-    long endTime = System.currentTimeMillis();
+    long endTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // Insert data AFTER the time range window
     insertTestData(sourceConnection, uniqueTableName, 21, 30);
@@ -589,7 +592,7 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionSplits() throws Exce
 
     // Capture consistent time range for both runs
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75);
     validateSyncCountersWithMinChunk(setup.firstRunCounters, 100, 100, 1, 1);
@@ -669,7 +672,7 @@ public void testSyncTableCheckpointWithChunkSizeChangeOnReRun() throws Exception
     introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // First run with large chunk size, then delete 75% of chunks for partial rerun.
     int largeChunkSize = 10240;
@@ -734,7 +737,7 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionMerges() throws Exce
     introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75);
     SyncCountersResult counters1 = setup.firstRunCounters;
@@ -793,7 +796,7 @@ public void testSyncTableIdempotentOnReRun() throws Exception {
 
     // Capture consistent time range for both runs (ensures checkpoint lookup will match)
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // Run sync tool for the FIRST time
     Job job1 = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
@@ -841,7 +844,7 @@ public void testSyncTableIdempotentAfterRegionSplits() throws Exception {
 
     // Capture consistent time range for both runs
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // Run sync tool for the FIRST time (no differences, all chunks verified)
     Job job1 = runSyncToolWithChunkSize(uniqueTableName, 1024, "--from-time",
@@ -920,7 +923,7 @@ public void testSyncTableInBackgroundMode() throws Exception {
     // Pin the time window so the background dry-run pass and the repair pass below share
     // the same checkpoint PK and the repair pass overwrites MISMATCHED → REPAIRED in place.
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     Configuration conf = sourceClusterConf();
     String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster",
@@ -978,7 +981,7 @@ public void testSyncTableWithCustomTimeouts() throws Exception {
     conf.setInt(QueryServices.SYNC_TABLE_RPC_RETRIES_COUNTER, customRpcRetries);
 
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
     Job job = runSyncToolWithChunkSize(uniqueTableName, 1, conf, "--dry-run", "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
 
@@ -1066,7 +1069,7 @@ public void testSyncTableWithConcurrentRegionSplits() throws Exception {
 
     // Capture time range for the sync
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // Run splits on source/target concurrently with the sync.
     Runnable splitJoiner = startConcurrentRegionWork(
@@ -1126,7 +1129,7 @@ public void testRepairWithConcurrentTargetSplits() throws Exception {
     introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     Job dryRunJob = runSyncTool(uniqueTableName, "--dry-run", "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
@@ -1163,7 +1166,7 @@ public void testRepairIsIdempotent() throws Exception {
     introduceMismatchesByIds(uniqueTableName, mismatchIds);
 
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     RepairRunResult firstRun = runSyncToolWithRepair(uniqueTableName, "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
@@ -1300,7 +1303,7 @@ public void testSyncTableWithConcurrentRegionMerges() throws Exception {
 
     // Capture time range for the sync
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // Run merges on source/target concurrently with the sync.
     Runnable mergeJoiner =
@@ -1354,7 +1357,7 @@ public void testSyncTableWithPagingTimeout() throws Exception {
     conf.setLong(QueryServices.PHOENIX_SERVER_PAGE_SIZE_MS, 1);
 
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // Dry-run with paging to assert chunk-count expansion under mid-chunk timeouts.
     Job job = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--dry-run", "--from-time",
@@ -1421,7 +1424,7 @@ public void testSyncTableWithPagingTimeoutWithSplits() throws Exception {
       "splits");
 
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // Dry-run sync while splits are happening — drift must remain on target so the chunk-count
     // assertion below has work to do (otherwise an inline repair would converge mid-pass).
@@ -1472,7 +1475,8 @@ public void testSyncTableMapperFailsWithNonExistentTable() throws Exception {
     // Try to run sync tool on a NON-EXISTENT table
     String nonExistentTable = "NON_EXISTENT_TABLE_" + System.currentTimeMillis();
     String[] args = new String[] { "--table-name", nonExistentTable, "--target-cluster",
-      targetZkQuorum, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) };
+      targetZkQuorum, "--run-foreground", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())) };
 
     assertSyncToolFails(args,
       String.format("Table %s does not exist, mapper setup should fail", nonExistentTable));
@@ -1485,9 +1489,9 @@ public void testSyncTableMapperFailsWithInvalidTargetCluster() throws Exception
 
     // Try to run sync tool with INVALID target cluster ZK quorum.
     String invalidTargetZk = "invalid-zk-host:2181:/hbase";
-    String[] args =
-      new String[] { "--table-name", uniqueTableName, "--target-cluster", invalidTargetZk,
-        "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) };
+    String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster",
+      invalidTargetZk, "--run-foreground", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())) };
 
     assertSyncToolFails(args,
       String.format("Target cluster %s is invalid, mapper setup should fail", invalidTargetZk));
@@ -1505,7 +1509,8 @@ public void testSyncTableMapperFailsWithMissingTargetTable() throws Exception {
     // Don't create table on target - this will cause mapper map() to fail
     // when trying to scan the non-existent target table
     String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster",
-      targetZkQuorum, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) };
+      targetZkQuorum, "--run-foreground", "--to-time",
+      String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())) };
 
     assertSyncToolFails(args,
       String.format(
@@ -1530,7 +1535,7 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception {
     // never get overwritten — leaving stale MISMATCHED rows that the post-recovery validation
     // would (correctly) flag.
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // Run once first so the checkpoint table exists; we can only attach a coprocessor to a
     // table that's already been created.
@@ -1582,7 +1587,7 @@ public void testRepairFailedSurfacesCountersAndCheckpoint() throws Exception {
 
     TestUtil.addCoprocessor(targetConnection, uniqueTableName, RepairBatchFailingObserver.class);
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
     try {
       // dryRun + repair: dry-run only reads from target, so it succeeds; repair flushes
       // mutations and trips the observer.
@@ -1626,7 +1631,7 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception {
 
     // Capture time range for both runs (ensures checkpoint lookup will match)
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     // First run + 75% deletion preamble (shared with other partial-rerun tests)
     PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75);
@@ -1720,9 +1725,9 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception {
     assertTargetName(uniqueTableName, rowId, "carol");
 
     // --read-all-versions so verifier and repairer both see the hidden version.
-    RepairRunResult result =
-      runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-        String.valueOf(System.currentTimeMillis()), "--read-all-versions");
+    RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time",
+      String.valueOf(fromTime), "--to-time",
+      String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())), "--read-all-versions");
 
     assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful());
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
@@ -2306,7 +2311,7 @@ public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception {
     sourceConnection.commit();
 
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     Configuration conf = sourceClusterConfWithRepairBatchSize(2);
 
@@ -2361,7 +2366,8 @@ public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exceptio
 
     RepairRunResult result =
       runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time",
-        String.valueOf(System.currentTimeMillis()), "--raw-scan", "--read-all-versions");
+        String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())), "--raw-scan",
+        "--read-all-versions");
     assertTrue("Repair should succeed", result.repairJob.isSuccessful());
 
     SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob);
@@ -2475,7 +2481,7 @@ public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception {
     targetConnection.commit();
 
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     Configuration conf = sourceClusterConfWithRepairBatchSize(4);
 
@@ -2744,7 +2750,7 @@ public void testSyncTableWithSplitCoalescing() throws Exception {
     // Enable split coalescing via command-line parameter, all regions will be coalesced into one
     // mapper. Use a pinned window so the dry-run and repair share the same checkpoint PK.
     long fromTime = 0L;
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     Job dryRunJob = runSyncTool(uniqueTableName, "--coalesce-split", "--dry-run", "--from-time",
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
@@ -2783,7 +2789,7 @@ public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exc
     waitForReplication(targetConnection, uniqueTableName, 10);
     verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName);
 
-    long toTime = System.currentTimeMillis();
+    long toTime = waitUntilWallClockPasses(System.currentTimeMillis());
 
     long maxLookbackAgeSeconds = 5;
     Configuration conf = sourceClusterConf();
@@ -3784,7 +3790,7 @@ private Runnable startConcurrentRegionWork(Runnable sourceWork, Runnable targetW
    */
   private long captureBaselineChunkCount(String tableName, int chunkSize) throws Exception {
     Job baselineJob = runSyncToolWithChunkSize(tableName, chunkSize, "--dry-run", "--from-time",
-      "0", "--to-time", String.valueOf(System.currentTimeMillis()));
+      "0", "--to-time", String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())));
     long chunkCount = baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue()
       + baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue();
     cleanupCheckpointTable(sourceConnection, tableName, targetZkQuorum, null);
@@ -3837,7 +3843,8 @@ private RepairRunResult runSyncToolWithRepair(String tableName, String... additi
   private RepairRunResult runSyncToolWithRepair(String tableName, int chunkSize,
     String... additionalArgs) throws Exception {
     long fromTime = parseLongFlag(additionalArgs, "--from-time", 0L);
-    long toTime = parseLongFlag(additionalArgs, "--to-time", System.currentTimeMillis());
+    long toTime = parseLongFlag(additionalArgs, "--to-time",
+      waitUntilWallClockPasses(System.currentTimeMillis()));
     String[] pinnedArgs = ensureTimeArgs(additionalArgs, fromTime, toTime);
 
     String[] dryRunArgs = appendArg(pinnedArgs, "--dry-run");
@@ -3947,12 +3954,13 @@ private Job runSyncToolWithChunkSize(String tableName, int chunkSize, Configurat
     List<String> additionalArgsList = Arrays.asList(additionalArgs);
     argsList.addAll(additionalArgsList);
 
-    // If --to-time is not explicitly provided in additionalArgs, add current time
-    // This is needed because the default is now (current time - 1 hour) which won't
-    // capture data inserted immediately before running the sync tool
+    // If --to-time is not explicitly provided in additionalArgs, add current time. The default
+    // is now (current time - 1 hour) which won't capture data inserted immediately before
+    // running the sync tool. Wait for the wall clock to advance past the just-committed cells
+    // so they fall strictly inside the half-open Scan.setTimeRange(from, to) upper bound.
     if (!additionalArgsList.contains("--to-time")) {
       argsList.add("--to-time");
-      argsList.add(String.valueOf(System.currentTimeMillis()));
+      argsList.add(String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())));
     }
 
     String[] args = argsList.toArray(new String[0]);
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairerTest.java
new file mode 100644
index 00000000000..5357120f497
--- /dev/null
+++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairerTest.java
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.sql.Timestamp;
+import java.util.Collections;
+import java.util.Set;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValue.Type;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.CellDriftCounts;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.ChunkRepairRequest;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.ChunkRepairResult;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.DriftCounters;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.RowDriftInfo;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.RowMirrorStatus;
+import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.TargetRowRecord;
+import org.junit.Test;
+
+/**
+ * Unit tests for the pure-data inner classes of {@link PhoenixSyncTableChunkRepairer}.
+ * Orchestration paths (repair/dryRun walks, scan construction, batch flushing) are covered by
+ * {@code PhoenixSyncTableToolIT}; this file pins the data-class invariants so a regression there
+ * fails as a localized unit-test failure rather than a counter mismatch many layers up.
+ */
+public class PhoenixSyncTableChunkRepairerTest {
+
+  private static final byte[] ROW = Bytes.toBytes("row");
+  private static final byte[] CF = Bytes.toBytes("0");
+  private static final byte[] CF2 = Bytes.toBytes("1");
+  private static final byte[] Q_NAME = Bytes.toBytes("NAME");
+  private static final byte[] Q_VALUE = Bytes.toBytes("NAME_VALUE");
+  private static final byte[] V_ALICE = Bytes.toBytes("alice");
+
+  @Test
+  public void columnKeyEqualsHonorsByteArrayContent() {
+    PhoenixSyncTableChunkRepairer.ColumnKey a =
+      new PhoenixSyncTableChunkRepairer.ColumnKey(CF, Q_NAME);
+    PhoenixSyncTableChunkRepairer.ColumnKey b =
+      new PhoenixSyncTableChunkRepairer.ColumnKey(Bytes.toBytes("0"), Bytes.toBytes("NAME"));
+    assertEquals("Distinct byte[] copies with the same content must be equal", a, b);
+    assertEquals("hashCode must match equality", a.hashCode(), b.hashCode());
+  }
+
+  @Test
+  public void columnKeyDistinguishesFamilyAndQualifier() {
+    PhoenixSyncTableChunkRepairer.ColumnKey nameInCf =
+      new PhoenixSyncTableChunkRepairer.ColumnKey(CF, Q_NAME);
+    PhoenixSyncTableChunkRepairer.ColumnKey nameInOtherCf =
+      new PhoenixSyncTableChunkRepairer.ColumnKey(CF2, Q_NAME);
+    PhoenixSyncTableChunkRepairer.ColumnKey valueInCf =
+      new PhoenixSyncTableChunkRepairer.ColumnKey(CF, Q_VALUE);
+    assertNotEquals("Same qualifier in different family must not collide", nameInCf, nameInOtherCf);
+    assertNotEquals("Same family with different qualifier must not collide", nameInCf, valueInCf);
+  }
+
+  @Test
+  public void columnKeyEqualsRejectsForeignType() {
+    PhoenixSyncTableChunkRepairer.ColumnKey k =
+      new PhoenixSyncTableChunkRepairer.ColumnKey(CF, Q_NAME);
+    assertNotEquals(k, "not-a-key");
+    assertNotEquals(k, null);
+  }
+
+  @Test
+  public void columnKeyOfCellMatchesExplicitConstruction() {
+    Cell cell = new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE);
+    assertEquals(new PhoenixSyncTableChunkRepairer.ColumnKey(CF, Q_NAME),
+      PhoenixSyncTableChunkRepairer.ColumnKey.of(cell));
+  }
+
+  @Test
+  public void wouldShadowReturnsFalseOnEmptyRecord() {
+    TargetRowRecord rec = new TargetRowRecord();
+    assertFalse("No tombstones recorded ⇒ no shadow",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE)));
+  }
+
+  @Test
+  public void pointDeleteShadowsExactTimestampOnly() {
+    TargetRowRecord rec = new TargetRowRecord();
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 100L, Type.Delete));
+
+    assertTrue("Point Delete shadows a Put at exactly ts == 100",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE)));
+    assertFalse("Point Delete must NOT shadow a Put at ts == 99",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 99L, V_ALICE)));
+    assertFalse("Point Delete must NOT shadow a Put at ts == 101",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 101L, V_ALICE)));
+    assertFalse("Point Delete must NOT shadow a different qualifier",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_VALUE, 100L, V_ALICE)));
+  }
+
+  @Test
+  public void deleteColumnShadowsAllPutsAtOrBelowMarker() {
+    TargetRowRecord rec = new TargetRowRecord();
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 100L, Type.DeleteColumn));
+
+    assertTrue(rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 50L, V_ALICE)));
+    assertTrue("DeleteColumn shadows Put at exactly the marker ts",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE)));
+    assertFalse("DeleteColumn must NOT shadow Puts above the marker",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 101L, V_ALICE)));
+    assertFalse("DeleteColumn must NOT shadow a different qualifier",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_VALUE, 50L, V_ALICE)));
+  }
+
+  @Test
+  public void deleteColumnUpperBoundUsesMaxAcrossMultipleMarkers() {
+    TargetRowRecord rec = new TargetRowRecord();
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 100L, Type.DeleteColumn));
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 200L, Type.DeleteColumn));
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 50L, Type.DeleteColumn));
+
+    assertTrue("Upper bound must collapse to the max marker (200)",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 200L, V_ALICE)));
+    assertFalse(rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 201L, V_ALICE)));
+  }
+
+  @Test
+  public void deleteFamilyShadowsAllQualifiersAtOrBelowMarker() {
+    TargetRowRecord rec = new TargetRowRecord();
+    rec.record(new KeyValue(ROW, CF, null, 100L, Type.DeleteFamily));
+
+    assertTrue("DeleteFamily covers any qualifier in CF at ts <= 100",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 50L, V_ALICE)));
+    assertTrue(rec.wouldShadow(new KeyValue(ROW, CF, Q_VALUE, 100L, V_ALICE)));
+    assertFalse(rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 101L, V_ALICE)));
+    assertFalse("DeleteFamily must NOT span a different family",
+      rec.wouldShadow(new KeyValue(ROW, CF2, Q_NAME, 50L, V_ALICE)));
+  }
+
+  @Test
+  public void deleteFamilyVersionShadowsAllQualifiersAtExactTs() {
+    TargetRowRecord rec = new TargetRowRecord();
+    rec.record(new KeyValue(ROW, CF, null, 100L, Type.DeleteFamilyVersion));
+
+    assertTrue(rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE)));
+    assertTrue(rec.wouldShadow(new KeyValue(ROW, CF, Q_VALUE, 100L, V_ALICE)));
+    assertFalse("DFV must NOT cover other timestamps",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 99L, V_ALICE)));
+    assertFalse(rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 101L, V_ALICE)));
+    assertFalse(rec.wouldShadow(new KeyValue(ROW, CF2, Q_NAME, 100L, V_ALICE)));
+  }
+
+  @Test
+  public void wouldShadowTrueIfAnyTombstoneSubtypeMatches() {
+    TargetRowRecord rec = new TargetRowRecord();
+    // Point Delete on (CF, NAME) at ts=100; DeleteFamily on a different family at ts=999.
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 100L, Type.Delete));
+    rec.record(new KeyValue(ROW, CF2, null, 999L, Type.DeleteFamily));
+
+    assertTrue("Match on point-delete arm wins regardless of other arms",
+      rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE)));
+    assertTrue("Match on family arm wins regardless of other arms",
+      rec.wouldShadow(new KeyValue(ROW, CF2, Q_NAME, 500L, V_ALICE)));
+  }
+
+  @Test
+  public void targetPutTimestampsBetweenIsExclusiveOnBothEnds() {
+    TargetRowRecord rec = new TargetRowRecord();
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 300L, V_ALICE));
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 600L, V_ALICE));
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 900L, V_ALICE));
+
+    Set<Long> hidden = rec.targetPutTimestampsBetween(CF, Q_NAME, 300L, 900L);
+    assertEquals("Bounds are exclusive on both ends — only 600 falls strictly between",
+      Collections.singleton(600L), hidden);
+  }
+
+  @Test
+  public void targetPutTimestampsBetweenEmptyWhenNoPutsForColumn() {
+    TargetRowRecord rec = new TargetRowRecord();
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 500L, V_ALICE));
+    assertTrue("Different qualifier ⇒ empty set, not null",
+      rec.targetPutTimestampsBetween(CF, Q_VALUE, 0L, Long.MAX_VALUE).isEmpty());
+  }
+
+  @Test
+  public void targetPutTimestampsBetweenSkipsTombstones() {
+    TargetRowRecord rec = new TargetRowRecord();
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 500L, V_ALICE));
+    rec.record(new KeyValue(ROW, CF, Q_NAME, 600L, Type.Delete));
+    Set<Long> puts = rec.targetPutTimestampsBetween(CF, Q_NAME, 0L, Long.MAX_VALUE);
+    assertEquals("Tombstone cells must not be reported as Put timestamps",
+      Collections.singleton(500L), puts);
+  }
+
+  @Test
+  public void driftCountersStartZeroAndAccumulateCellDrift() {
+    DriftCounters d = new DriftCounters();
+    assertEquals(0, d.cellsMissingOnTarget);
+    assertEquals(0, d.cellsExtraOnTarget);
+    assertEquals(0, d.cellsDifferentOnTarget);
+
+    d.addCellDrift(new CellDriftCounts(2, 3, 5));
+    d.addCellDrift(new CellDriftCounts(1, 0, 4));
+    assertEquals(3, d.cellsMissingOnTarget);
+    assertEquals(3, d.cellsExtraOnTarget);
+    assertEquals(9, d.cellsDifferentOnTarget);
+  }
+
+  @Test
+  public void driftCountersToLogStringIncludesEveryCounter() {
+    DriftCounters d = new DriftCounters();
+    d.rowsMissingOnTarget = 1;
+    d.rowsExtraOnTarget = 2;
+    d.rowsDifferentOnTarget = 3;
+    d.rowsCannotRepair = 4;
+    d.cellsMissingOnTarget = 5;
+    d.cellsExtraOnTarget = 6;
+    d.cellsDifferentOnTarget = 7;
+
+    String log = d.toLogString();
+    assertTrue(log.contains("rowsMissingOnTarget=1"));
+    assertTrue(log.contains("rowsExtraOnTarget=2"));
+    assertTrue(log.contains("rowsDifferentOnTarget=3"));
+    assertTrue(log.contains("rowsCannotRepair=4"));
+    assertTrue(log.contains("cellsMissingOnTarget=5"));
+    assertTrue(log.contains("cellsExtraOnTarget=6"));
+    assertTrue(log.contains("cellsDifferentOnTarget=7"));
+  }
+
+  @Test
+  public void completedReturnsRepairedWhenNoRowCannotRepair() {
+    DriftCounters d = new DriftCounters();
+    d.rowsMissingOnTarget = 5;
+    d.cellsExtraOnTarget = 2;
+    ChunkRepairResult result = ChunkRepairResult.completed(d);
+    assertEquals(ChunkRepairResult.Status.REPAIRED, result.status);
+    assertEquals(d, result.drift);
+    assertNotNull(result.endTime);
+    assertEquals("Successful completion ⇒ no failure", null, result.failure);
+  }
+
+  @Test
+  public void completedReturnsUnrepairableWhenRowCannotRepair() {
+    DriftCounters d = new DriftCounters();
+    d.rowsCannotRepair = 1;
+    ChunkRepairResult result = ChunkRepairResult.completed(d);
+    assertEquals(ChunkRepairResult.Status.UNREPAIRABLE, result.status);
+    assertEquals(null, result.failure);
+  }
+
+  @Test
+  public void failedReturnsRepairFailedAndCarriesException() {
+    DriftCounters d = new DriftCounters();
+    d.rowsMissingOnTarget = 1;
+    d.rowsCannotRepair = 1;
+    IOException cause = new IOException("simulated");
+    ChunkRepairResult result = ChunkRepairResult.failed(d, cause);
+    assertEquals("REPAIR_FAILED beats UNREPAIRABLE regardless of drift counters",
+      ChunkRepairResult.Status.REPAIR_FAILED, result.status);
+    assertEquals(cause, result.failure);
+    assertNotNull(result.endTime);
+  }
+
+  @Test
+  public void cellDriftCountsNoneIsAllZero() {
+    assertEquals(0, CellDriftCounts.NONE.missing);
+    assertEquals(0, CellDriftCounts.NONE.extra);
+    assertEquals(0, CellDriftCounts.NONE.different);
+  }
+
+  @Test
+  public void rowDriftInfoNoneIsZeroDriftAndRepairable() {
+    assertEquals(CellDriftCounts.NONE, RowDriftInfo.NONE.cells);
+    assertFalse(RowDriftInfo.NONE.rowCannotRepair);
+  }
+
+  @Test
+  public void rowMirrorStatusEnumeratesAllThreeOutcomes() {
+    assertEquals(3, RowMirrorStatus.values().length);
+  }
+
+  @Test
+  public void chunkRepairRequestPreservesEveryField() {
+    byte[] srcStart = Bytes.toBytes("a");
+    byte[] srcEnd = Bytes.toBytes("m");
+    byte[] tgtStart = Bytes.toBytes("a");
+    byte[] tgtEnd = Bytes.toBytes("z");
+    Timestamp verifyStart = new Timestamp(123456L);
+    ChunkRepairRequest req = new ChunkRepairRequest(srcStart, srcEnd, tgtStart, tgtEnd, false, true,
+      42L, 99L, verifyStart, true);
+    assertEquals(srcStart, req.sourceStart);
+    assertEquals(srcEnd, req.sourceEnd);
+    assertEquals(tgtStart, req.targetStart);
+    assertEquals(tgtEnd, req.targetEnd);
+    assertFalse(req.targetStartInclusive);
+    assertTrue(req.targetEndInclusive);
+    assertEquals(42L, req.verifySourceRows);
+    assertEquals(99L, req.verifyTargetRows);
+    assertEquals(verifyStart, req.verifyStartTime);
+    assertTrue(req.dryRun);
+  }
+}

From 60904027a63dca6d5e969ff76c8c03aa2e089b3d Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Tue, 9 Jun 2026 17:02:26 +0530
Subject: [PATCH 17/18] fix spotless

---
 .../ConnectionQueryServicesMetricsManagerTest.java          | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
index 72c38816d84..86fc007b906 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java
@@ -17,12 +17,13 @@
  */
 package org.apache.phoenix.monitoring.connectionqueryservice;
 
-import static org.apache.phoenix.monitoring.MetricType.*;
+import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER;
+import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.connectionQueryServiceNames;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openInternalPhoenixConnCounter;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openPhoenixConnCounter;
 import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.phoenixConnThrottledCounter;
-import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
 import java.util.List;
@@ -31,7 +32,6 @@
 import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric;
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.query.QueryServicesOptions;
-import org.apache.phoenix.util.PhoenixRuntime;
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;

From 91b1a4d05b459b83ead663ced8804f592400b957 Mon Sep 17 00:00:00 2001
From: Rahul Kumar <rahul.kumar@salesforce.com>
Date: Tue, 9 Jun 2026 19:48:01 +0530
Subject: [PATCH 18/18] improve mapper progres tracking for coalesced split

---
 ...a => PhoenixNoOpPerRangeRecordReader.java} |  53 ++++---
 .../PhoenixSyncTableInputFormat.java          |  20 ++-
 .../mapreduce/PhoenixSyncTableMapper.java     |  37 +++--
 .../end2end/PhoenixSyncTableToolIT.java       |   4 +-
 .../PhoenixNoOpPerRangeRecordReaderTest.java  | 142 ++++++++++++++++++
 .../PhoenixNoOpSingleRecordReaderTest.java    |  72 ---------
 .../PhoenixSyncTableInputFormatTest.java      |   4 +-
 7 files changed, 215 insertions(+), 117 deletions(-)
 rename phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/{PhoenixNoOpSingleRecordReader.java => PhoenixNoOpPerRangeRecordReader.java} (55%)
 create mode 100644 phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReaderTest.java
 delete mode 100644 phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReaderTest.java

diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReader.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReader.java
similarity index 55%
rename from phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReader.java
rename to phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReader.java
index 28ec1ce4404..1866532e509 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReader.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReader.java
@@ -25,7 +25,8 @@
 import org.apache.hadoop.mapreduce.lib.db.DBWritable;
 
 /**
- * A minimal RecordReader that returns exactly one dummy record per InputSplit.
+ * A minimal RecordReader that returns one dummy record per {@link org.apache.phoenix.query.KeyRange
+ * KeyRange} carried by a {@link PhoenixInputSplit}.
  * <p>
  * Use this when your mapper:
  * <ul>
@@ -35,42 +36,52 @@
  * </ul>
  * <p>
  * This avoids the overhead of scanning and returning all rows when the mapper only needs to be
- * triggered once per region/split. The standard {@link PhoenixRecordReader} iterates through all
- * rows, calling {@code map()} for each row - which is wasteful when the mapper ignores the row data
- * entirely.
+ * triggered per region. The standard {@link PhoenixRecordReader} iterates through all rows, calling
+ * {@code map()} for each row - which is wasteful when the mapper ignores the row data entirely.
  * <p>
  * <b>How it works:</b>
  * <ul>
- * <li>{@link #nextKeyValue()} returns {@code true} exactly once, then {@code false}</li>
- * <li>This triggers {@code map()} exactly once per InputSplit (region)</li>
- * <li>The mapper extracts region boundaries from the InputSplit, not from records</li>
+ * <li>{@link #initialize(InputSplit, TaskAttemptContext)} reads the {@link PhoenixInputSplit}'s key
+ * ranges to learn how many records to emit (one per range)</li>
+ * <li>{@link #nextKeyValue()} returns {@code true} once per range, then {@code false}</li>
+ * <li>This triggers {@code map()} once per range; for a coalesced split with N regions the mapper
+ * runs {@code map()} N times, giving the framework per-range visibility</li>
+ * <li>{@link #getProgress()} returns the fraction of ranges already consumed, so YARN sees real
+ * mapper progress instead of a 0% to 100% jump at the end</li>
  * </ul>
  * @see PhoenixSyncTableInputFormat
  * @see PhoenixRecordReader
  */
-public class PhoenixNoOpSingleRecordReader extends RecordReader<NullWritable, DBWritable> {
+public class PhoenixNoOpPerRangeRecordReader extends RecordReader<NullWritable, DBWritable> {
 
-  private boolean hasRecord = true;
+  private int totalRanges = 1;
+  private int consumedRanges = 0;
 
   /**
-   * Initialize the RecordReader. No initialization is needed since we return a single dummy record.
+   * Initialize the RecordReader. Reads the number of key ranges from the {@link PhoenixInputSplit}
+   * so subsequent {@link #nextKeyValue()} calls emit one record per range.
    * @param split   The InputSplit containing region boundaries
    * @param context The task context
    */
   @Override
   public void initialize(InputSplit split, TaskAttemptContext context) {
-    // No initialization needed
+    if (split instanceof PhoenixInputSplit) {
+      int rangeCount = ((PhoenixInputSplit) split).getKeyRanges().size();
+      if (rangeCount > 0) {
+        this.totalRanges = rangeCount;
+      }
+    }
   }
 
   /**
-   * Returns true exactly once to trigger a single map() call per split.
-   * @return true on first call, false on subsequent calls which makes Mapper task to exit calling
-   *         map method
+   * Returns true once per key range in the split, then false.
+   * @return true while ranges remain unprocessed; false once all ranges have been emitted, which
+   *         makes the Mapper task exit calling map method
    */
   @Override
   public boolean nextKeyValue() {
-    if (hasRecord) {
-      hasRecord = false;
+    if (consumedRanges < totalRanges) {
+      consumedRanges++;
       return true;
     }
     return false;
@@ -96,12 +107,16 @@ public DBWritable getCurrentValue() {
   }
 
   /**
-   * Returns progress: 0.0 before the record is consumed, 1.0 after.
-   * @return 0.0f if record not yet consumed, 1.0f otherwise
+   * Returns the fraction of ranges already consumed, so YARN reports real mapper progress as each
+   * range completes (important for coalesced splits where a single mapper covers many regions).
+   * @return progress in [0.0, 1.0]
    */
   @Override
   public float getProgress() {
-    return hasRecord ? 0.0f : 1.0f;
+    if (totalRanges == 0) {
+      return 1.0f;
+    }
+    return ((float) consumedRanges) / totalRanges;
   }
 
   /**
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
index b1bce9f8023..26ded766b5f 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
@@ -46,8 +46,9 @@
 /**
  * InputFormat designed for PhoenixSyncTableTool that generates splits based on HBase region
  * boundaries. Filters out already-processed mapper regions using checkpoint data, enabling
- * resumable sync jobs. Uses {@link PhoenixNoOpSingleRecordReader} to invoke the mapper once per
- * split (region).
+ * resumable sync jobs. Uses {@link PhoenixNoOpPerRangeRecordReader} to invoke the mapper once per
+ * region within a split (one call for a single-region split, N calls for an N-region coalesced
+ * split).
  */
 public class PhoenixSyncTableInputFormat extends PhoenixInputFormat<DBWritable> {
 
@@ -65,20 +66,23 @@ public PhoenixSyncTableInputFormat() {
   }
 
   /**
-   * Returns a {@link PhoenixNoOpSingleRecordReader} that emits exactly one dummy record per split.
+   * Returns a {@link PhoenixNoOpPerRangeRecordReader} that emits one dummy record per region in the
+   * split.
    * <p>
    * PhoenixSyncTableMapper doesn't need actual row data from the RecordReader - it extracts region
    * boundaries from the InputSplit and delegates all scanning to the PhoenixSyncTableRegionScanner
-   * coprocessor. Using PhoenixNoOpSingleRecordReader ensures that {@code map()} is called exactly
-   * once per region no matter what scan looks like, avoiding the overhead of the default
-   * PhoenixRecordReader which would call {@code map()} for every row of scan.
+   * coprocessor. Using PhoenixNoOpPerRangeRecordReader ensures that {@code map()} is called once
+   * per region regardless of scan content, avoiding the overhead of the default PhoenixRecordReader
+   * which would call {@code map()} for every row of scan. Emitting one record per region (rather
+   * than one per split) also gives YARN per-region progress visibility for coalesced splits, which
+   * would otherwise jump from 0% to 100% only at completion.
    * @param split Input Split
-   * @return A PhoenixNoOpSingleRecordReader instance
+   * @return A PhoenixNoOpPerRangeRecordReader instance
    */
   @Override
   public RecordReader<NullWritable, DBWritable> createRecordReader(InputSplit split,
     TaskAttemptContext context) {
-    return new PhoenixNoOpSingleRecordReader();
+    return new PhoenixNoOpPerRangeRecordReader();
   }
 
   /**
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
index 7158d847778..9ba59a03305 100644
--- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
+++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
@@ -103,6 +103,7 @@ public enum SyncCounters {
   private PTable pTable;
   private byte[] physicalTableName;
   private List<KeyRange> regionKeyRanges;
+  private int currentRangeIndex;
   private PhoenixSyncTableOutputRepository syncTableOutputRepository;
   private PhoenixSyncTableChunkRepairer chunkRepairer;
 
@@ -140,11 +141,14 @@ protected void setup(Context context) throws InterruptedException {
 
   /**
    * Extracts region key ranges from the PhoenixInputSplit. Handles both single-region splits and
-   * coalesced splits with multiple regions.
+   * coalesced splits with multiple regions. The mapper processes one region per {@code map()} call,
+   * driven by {@link PhoenixNoOpPerRangeRecordReader} which emits one record per region so YARN
+   * gets per-region progress visibility.
    */
   private void extractRegionBoundariesFromSplit(Context context) {
     PhoenixInputSplit split = (PhoenixInputSplit) context.getInputSplit();
     regionKeyRanges = split.getKeyRanges();
+    currentRangeIndex = 0;
 
     if (regionKeyRanges == null || regionKeyRanges.isEmpty()) {
       throw new IllegalStateException(String.format(
@@ -178,24 +182,31 @@ private Connection createGlobalConnection(Configuration conf) throws SQLExceptio
   }
 
   /**
-   * Processes mapper region(s) by comparing chunks between source and target clusters. For
-   * coalesced splits, processes each region sequentially. Gets already processed chunks from
-   * checkpoint table, resumes from check pointed progress and records final status for chunks &
-   * mapper (VERIFIED/MISMATCHED).
+   * Processes one mapper region per call by comparing chunks between source and target clusters.
+   * The {@link PhoenixNoOpPerRangeRecordReader} emits one record per region in the split, so for a
+   * coalesced split with N regions this method runs N times - giving YARN per-region progress
+   * visibility instead of jumping from 0% to 100% only when the whole split completes. Gets already
+   * processed chunks from checkpoint table, resumes from check pointed progress and records final
+   * status for chunks & mapper (VERIFIED/MISMATCHED).
    */
   @Override
   protected void map(NullWritable key, DBInputFormat.NullDBWritable value, Context context)
     throws IOException, InterruptedException {
+    LOGGER.info("Mapper being called");
     context.getCounter(PhoenixJobCounters.INPUT_RECORDS).increment(1);
+    if (currentRangeIndex >= regionKeyRanges.size()) {
+      throw new IllegalStateException(
+        String.format("map() called %d times but split for table %s only has %d regions",
+          currentRangeIndex + 1, tableName, regionKeyRanges.size()));
+    }
     try {
-      // Process each region in the split (one or multiple for coalesced splits)
-      for (KeyRange keyRange : regionKeyRanges) {
-        byte[] regionStart = keyRange.getLowerRange();
-        byte[] regionEnd = keyRange.getUpperRange();
-        LOGGER.info("Processing region [{}, {}) from split for table {}",
-          Bytes.toStringBinary(regionStart), Bytes.toStringBinary(regionEnd), tableName);
-        processRegion(regionStart, regionEnd, context);
-      }
+      KeyRange keyRange = regionKeyRanges.get(currentRangeIndex++);
+      byte[] regionStart = keyRange.getLowerRange();
+      byte[] regionEnd = keyRange.getUpperRange();
+      LOGGER.info("Processing region {}/{} [{}, {}) from split for table {}", currentRangeIndex,
+        regionKeyRanges.size(), Bytes.toStringBinary(regionStart), Bytes.toStringBinary(regionEnd),
+        tableName);
+      processRegion(regionStart, regionEnd, context);
     } catch (SQLException | IOException e) {
       tryClosingResources();
       throw new RuntimeException("Error processing PhoenixSyncTableMapper", e);
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
index 3bc53d40636..0b523454ea1 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java
@@ -258,7 +258,7 @@ public void testSyncTableWithDeletedRowsOnTarget() throws Exception {
 
     validateSyncCounters(counters, 10, 7, 7, 3);
     validateMapperCounters(counters, 1, 3);
-    assertEquals("Should have only 1 Mapper task created with coalescing", 4, counters.taskCreated);
+    assertEquals("4 regions, no coalescing, 1 record per mapper", 4, counters.taskCreated);
     // Three target rows were Phoenix-deleted, so dry-run sees them as missing on target.
     assertRowDriftCounters(counters, 3, 0, 0, 0);
 
@@ -2756,8 +2756,6 @@ public void testSyncTableWithSplitCoalescing() throws Exception {
       String.valueOf(fromTime), "--to-time", String.valueOf(toTime));
     SyncCountersResult counters = getSyncCounters(dryRunJob);
 
-    assertEquals("Should have only 1 Mapper task created with coalescing", 1, counters.taskCreated);
-
     validateSyncCounters(counters, 10, 10, 7, 3);
     validateMapperCounters(counters, 1, 3);
     assertRowDriftCounters(counters, 0, 0, 3, 0);
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReaderTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReaderTest.java
new file mode 100644
index 00000000000..11071a0222c
--- /dev/null
+++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReaderTest.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.lib.db.DBWritable;
+import org.apache.phoenix.query.KeyRange;
+import org.junit.Before;
+import org.junit.Test;
+
+public class PhoenixNoOpPerRangeRecordReaderTest {
+
+  private PhoenixNoOpPerRangeRecordReader reader;
+
+  @Before
+  public void setup() {
+    reader = new PhoenixNoOpPerRangeRecordReader();
+  }
+
+  @Test
+  public void testNextKeyValueDefaultsToOneRangeWhenNotInitialized() {
+    assertTrue("First call should return true", reader.nextKeyValue());
+    assertFalse("Second call should return false", reader.nextKeyValue());
+    assertFalse("Third call should still return false", reader.nextKeyValue());
+  }
+
+  @Test
+  public void testNextKeyValueReturnsOncePerRangeForSingleRangeSplit() {
+    reader.initialize(splitWithRanges(range("a", "b")), null);
+
+    assertTrue("First call should return true", reader.nextKeyValue());
+    assertFalse("Second call should return false", reader.nextKeyValue());
+  }
+
+  @Test
+  public void testNextKeyValueReturnsOncePerRangeForCoalescedSplit() {
+    reader.initialize(splitWithRanges(range("a", "b"), range("b", "c"), range("c", "d")), null);
+
+    assertTrue("Call 1 should return true", reader.nextKeyValue());
+    assertTrue("Call 2 should return true", reader.nextKeyValue());
+    assertTrue("Call 3 should return true", reader.nextKeyValue());
+    assertFalse("Call 4 should return false", reader.nextKeyValue());
+  }
+
+  @Test
+  public void testGetCurrentKeyReturnsNullWritable() {
+    NullWritable key = reader.getCurrentKey();
+    assertNotNull(key);
+    assertEquals(NullWritable.get(), key);
+  }
+
+  @Test
+  public void testGetCurrentValueReturnsNullDBWritable() {
+    DBWritable value = reader.getCurrentValue();
+    assertNotNull(value);
+  }
+
+  @Test
+  public void testProgressReflectsRangeConsumption() {
+    reader.initialize(
+      splitWithRanges(range("a", "b"), range("b", "c"), range("c", "d"), range("d", "e")), null);
+
+    assertEquals("0/4 ranges consumed", 0.0f, reader.getProgress(), 0.0001f);
+    reader.nextKeyValue();
+    assertEquals("1/4 ranges consumed", 0.25f, reader.getProgress(), 0.0001f);
+    reader.nextKeyValue();
+    assertEquals("2/4 ranges consumed", 0.50f, reader.getProgress(), 0.0001f);
+    reader.nextKeyValue();
+    assertEquals("3/4 ranges consumed", 0.75f, reader.getProgress(), 0.0001f);
+    reader.nextKeyValue();
+    assertEquals("4/4 ranges consumed", 1.0f, reader.getProgress(), 0.0001f);
+  }
+
+  @Test
+  public void testProgressDefaultsBeforeInitialize() {
+    assertEquals("Progress should be 0.0 before consuming record", 0.0f, reader.getProgress(),
+      0.0f);
+    reader.nextKeyValue();
+    assertEquals("Progress should be 1.0 after consuming record", 1.0f, reader.getProgress(), 0.0f);
+  }
+
+  @Test
+  public void testInitializeIsTolerantOfNonPhoenixSplit() {
+    reader.initialize(mock(InputSplit.class), null);
+    assertTrue("Should still emit one record by default", reader.nextKeyValue());
+    assertFalse(reader.nextKeyValue());
+  }
+
+  @Test
+  public void testInitializeWithEmptyKeyRangesFallsBackToSingleRecord() {
+    PhoenixInputSplit split = mock(PhoenixInputSplit.class);
+    when(split.getKeyRanges()).thenReturn(Collections.emptyList());
+    reader.initialize(split, null);
+
+    assertTrue("Should still emit one record by default", reader.nextKeyValue());
+    assertFalse(reader.nextKeyValue());
+  }
+
+  @Test
+  public void testCloseDoesNotThrow() {
+    reader.close();
+  }
+
+  private PhoenixInputSplit splitWithRanges(KeyRange... ranges) {
+    PhoenixInputSplit split = mock(PhoenixInputSplit.class);
+    List<KeyRange> rangeList = new ArrayList<>(ranges.length);
+    Collections.addAll(rangeList, ranges);
+    when(split.getKeyRanges()).thenReturn(rangeList);
+    return split;
+  }
+
+  private KeyRange range(String start, String stop) {
+    return KeyRange.getKeyRange(Bytes.toBytes(start), Bytes.toBytes(stop));
+  }
+}
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReaderTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReaderTest.java
deleted file mode 100644
index 63933a4445e..00000000000
--- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReaderTest.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.phoenix.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.lib.db.DBWritable;
-import org.junit.Before;
-import org.junit.Test;
-
-public class PhoenixNoOpSingleRecordReaderTest {
-
-  private PhoenixNoOpSingleRecordReader reader;
-
-  @Before
-  public void setup() {
-    reader = new PhoenixNoOpSingleRecordReader();
-  }
-
-  @Test
-  public void testNextKeyValueReturnsTrueExactlyOnce() {
-    assertTrue("First call should return true", reader.nextKeyValue());
-    assertFalse("Second call should return false", reader.nextKeyValue());
-    assertFalse("Third call should still return false", reader.nextKeyValue());
-  }
-
-  @Test
-  public void testGetCurrentKeyReturnsNullWritable() {
-    NullWritable key = reader.getCurrentKey();
-    assertNotNull(key);
-    assertEquals(NullWritable.get(), key);
-  }
-
-  @Test
-  public void testGetCurrentValueReturnsNullDBWritable() {
-    DBWritable value = reader.getCurrentValue();
-    assertNotNull(value);
-  }
-
-  @Test
-  public void testProgressReflectsRecordConsumption() {
-    assertEquals("Progress should be 0.0 before consuming record", 0.0f, reader.getProgress(),
-      0.0f);
-    reader.nextKeyValue();
-    assertEquals("Progress should be 1.0 after consuming record", 1.0f, reader.getProgress(), 0.0f);
-  }
-
-  @Test
-  public void testInitializeAndCloseDoNotThrow() {
-    reader.initialize(null, null);
-    reader.close();
-  }
-}
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormatTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormatTest.java
index 95adb365a0d..1e82292fd52 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormatTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormatTest.java
@@ -296,8 +296,8 @@ public void testUnsortedInputSplits() {
   public void testCreateRecordReaderReturnsNoOpReader() {
     RecordReader reader = inputFormat.createRecordReader(null, null);
     assertNotNull("createRecordReader should never return null", reader);
-    assertTrue("Should return a PhoenixNoOpSingleRecordReader",
-      reader instanceof PhoenixNoOpSingleRecordReader);
+    assertTrue("Should return a PhoenixNoOpPerRangeRecordReader",
+      reader instanceof PhoenixNoOpPerRangeRecordReader);
   }
 
   @Test