From 3c54c863e617a9bd89fe52f747f319c469389cca Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Fri, 1 Aug 2025 20:52:57 +0530 Subject: [PATCH 01/18] connection creation time --- .../apache/phoenix/jdbc/PhoenixDriver.java | 4 +- .../phoenix/jdbc/PhoenixEmbeddedDriver.java | 69 ++++++++++++++----- .../apache/phoenix/monitoring/MetricType.java | 2 + .../ConnectionQueryServicesMetrics.java | 9 ++- .../ConnectionQueryServicesMetricsIT.java | 33 ++++++--- ...ectionQueryServicesMetricsManagerTest.java | 27 +++++++- 6 files changed, 107 insertions(+), 37 deletions(-) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java index 953bbc5bd57..f3784183555 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java @@ -41,6 +41,7 @@ import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.query.QueryServicesImpl; import org.apache.phoenix.query.QueryServicesOptions; +import org.apache.phoenix.util.EnvironmentEdgeManager; import org.apache.phoenix.util.PropertiesUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -205,6 +206,7 @@ public boolean acceptsURL(String url) throws SQLException { @Override public Connection connect(String url, Properties info) throws SQLException { + long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis(); GLOBAL_PHOENIX_CONNECTIONS_ATTEMPTED_COUNTER.increment(); if (!acceptsURL(url)) { GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment(); @@ -213,7 +215,7 @@ public Connection connect(String url, Properties info) throws SQLException { lockInterruptibly(LockMode.READ); try { checkClosed(); - return createConnection(url, info); + return createConnection(url, info, connectionStartTime); } catch (SQLException sqlException) { if (sqlException.getErrorCode() != SQLExceptionCode.NEW_CONNECTION_THROTTLED.getErrorCode()) { GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment(); diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java index b5bbe2ea552..ea1d416fd8a 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java @@ -17,6 +17,8 @@ */ package org.apache.phoenix.jdbc; +import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS; +import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME; import static org.apache.phoenix.util.PhoenixRuntime.PHOENIX_TEST_DRIVER_URL_PARAM; import java.sql.Connection; @@ -24,17 +26,18 @@ import java.sql.DriverPropertyInfo; import java.sql.SQLException; import java.sql.SQLFeatureNotSupportedException; +import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Properties; import java.util.logging.Logger; import javax.annotation.concurrent.Immutable; import org.apache.phoenix.coprocessorclient.MetaDataProtocol; +import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric; +import org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesMetricsManager; import org.apache.phoenix.query.ConnectionQueryServices; import org.apache.phoenix.query.QueryServices; -import org.apache.phoenix.util.PhoenixRuntime; -import org.apache.phoenix.util.PropertiesUtil; -import org.apache.phoenix.util.ReadOnlyProps; -import org.apache.phoenix.util.SQLCloseable; +import org.apache.phoenix.util.*; import org.apache.phoenix.thirdparty.com.google.common.collect.ImmutableMap; @@ -119,31 +122,59 @@ public boolean acceptsURL(String url) throws SQLException { @Override public Connection connect(String url, Properties info) throws SQLException { + long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis(); if (!acceptsURL(url)) { return null; } - return createConnection(url, info); + return createConnection(url, info, connectionStartTime); } - protected final Connection createConnection(String url, Properties info) throws SQLException { + protected final Connection createConnection(String url, Properties info, + long connectionCreationTime) throws SQLException { Properties augmentedInfo = PropertiesUtil.deepCopy(info); augmentedInfo.putAll(getDefaultProps().asMap()); - if (url.contains("|")) { - // Get HAURLInfo to pass it to connection creation - HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo); - // High availability connection using two clusters - Optional haGroup = HighAvailabilityGroup.get(url, augmentedInfo); - if (haGroup.isPresent()) { - return haGroup.get().connect(augmentedInfo, haurlInfo); - } else { - // If empty HA group is returned, fall back to single cluster. - url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow( - () -> new SQLException("HA group can not be initialized, fallback to single cluster")); + Connection connection = null; + try { + if (url.contains("|")) { + // Get HAURLInfo to pass it to connection creation + HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo); + // High availability connection using two clusters + Optional haGroup = HighAvailabilityGroup.get(url, augmentedInfo); + if (haGroup.isPresent()) { + connection = haGroup.get().connect(augmentedInfo, haurlInfo); + setPhoenixConnectionTime(connectionCreationTime, connection); + return connection; + } else { + // If empty HA group is returned, fall back to single cluster. + url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow( + () -> new SQLException( + "HA group can not be initialized, fallback to single cluster")); + } + } + ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo); + connection = cqs.connect(url, augmentedInfo); + setPhoenixConnectionTime(connectionCreationTime, connection); + Map> metrics = + ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); + if (!metrics.isEmpty()) { + List serviceMetrics = metrics.get("DEFAULT_CQSN"); + } + return connection; + } catch (SQLException e) { + if (connection != null) { + connection.close(); } + throw e; } - ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo); - return cqs.connect(url, augmentedInfo); + } + + private void setPhoenixConnectionTime(long connectionCreationTime, Connection connection) { + String connectionQueryServiceName = + ((PhoenixConnection) connection).getQueryServices().getConfiguration() + .get(QUERY_SERVICES_NAME); + ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName, + PHOENIX_CONNECTION_CREATION_TIME_MS, connectionCreationTime); } /** diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java index 8ee8de69718..aaf6883223d 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java @@ -228,6 +228,8 @@ public enum MetricType { PHOENIX_CONNECTIONS_FAILED_COUNTER("cf", "Number of client Phoenix Connections Failed to open" + ", not including throttled connections", LogLevel.OFF, PLong.INSTANCE), + PHOENIX_CONNECTION_CREATION_TIME_MS("cct", + "Time spent in creating Phoenix connections in milliseconds", LogLevel.OFF, PLong.INSTANCE), CLIENT_METADATA_CACHE_MISS_COUNTER("cmcm", "Number of cache misses for the CQSI cache.", LogLevel.DEBUG, PLong.INSTANCE), CLIENT_METADATA_CACHE_HIT_COUNTER("cmch", "Number of cache hits for the CQSI cache.", diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java index 575d38530eb..8c3ac719d27 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java @@ -17,10 +17,6 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; -import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; - import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -30,6 +26,8 @@ import org.apache.phoenix.monitoring.ConnectionQueryServicesMetricImpl; import org.apache.phoenix.monitoring.MetricType; +import static org.apache.phoenix.monitoring.MetricType.*; + /** * Class for Connection Query Service Metrics. */ @@ -42,7 +40,8 @@ public enum QueryServiceMetrics { CONNECTION_QUERY_SERVICE_OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER( OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER), CONNECTION_QUERY_SERVICE_PHOENIX_CONNECTIONS_THROTTLED_COUNTER( - PHOENIX_CONNECTIONS_THROTTLED_COUNTER); + PHOENIX_CONNECTIONS_THROTTLED_COUNTER), + CONNECTION_QUERY_SERVICE_CREATION_TIME(PHOENIX_CONNECTION_CREATION_TIME_MS); private MetricType metricType; private ConnectionQueryServicesMetric metric; diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java index 54d53afdf6d..d9f752bc17c 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java @@ -17,21 +17,15 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; -import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.*; import static org.apache.phoenix.query.QueryServices.CLIENT_CONNECTION_MAX_ALLOWED_CONNECTIONS; import static org.apache.phoenix.query.QueryServices.CONNECTION_QUERY_SERVICE_METRICS_ENABLED; import static org.apache.phoenix.query.QueryServices.INTERNAL_CONNECTION_MAX_ALLOWED_CONNECTIONS; import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME; import static org.apache.phoenix.util.PhoenixRuntime.clearAllConnectionQueryServiceMetrics; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; -import java.sql.Statement; +import java.sql.*; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; @@ -222,6 +216,27 @@ public void testMultipleCQSIMetricsInParallel() throws Exception { assertEquals("Number of passing CSQI Metrics check should be : ", 4, counter.get()); } + @Test + public void testConnectionTime() { + Map> metrics = + ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); + List serviceMetrics = metrics.get("DEFAULT_CQSN"); + assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics); + + // Find connection creation time metric + boolean foundMetric = false; + for (ConnectionQueryServicesMetric metric : serviceMetrics) { + System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue()); + if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) { + assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0); + foundMetric = true; + break; + } + } + assertTrue("Connection creation time metric not found", foundMetric); + + } + private void checkConnectionQueryServiceMetricsValues(String queryServiceName) throws Exception { String CREATE_TABLE_DDL = "CREATE TABLE IF NOT EXISTS %s (K VARCHAR(10) NOT NULL" + " PRIMARY KEY, V VARCHAR)"; diff --git a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java index 86fc007b906..039b7be051f 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java @@ -17,13 +17,12 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; -import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.*; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.connectionQueryServiceNames; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openInternalPhoenixConnCounter; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openPhoenixConnCounter; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.phoenixConnThrottledCounter; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.util.List; @@ -32,6 +31,7 @@ import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric; import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.query.QueryServicesOptions; +import org.apache.phoenix.util.PhoenixRuntime; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; @@ -101,6 +101,27 @@ public void testHistogramMetricsForOpenPhoenixConnectionCounter() { } } + @Test + public void testConnectionTime() { + Map> metrics = + ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); + List serviceMetrics = metrics.get("DEFAULT_CQSN"); + assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics); + + // Find connection creation time metric + boolean foundMetric = false; + for (ConnectionQueryServicesMetric metric : serviceMetrics) { + System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue()); + if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) { + assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0); + foundMetric = true; + break; + } + } + assertTrue("Connection creation time metric not found", foundMetric); + + } + private void updateMetricsAndHistogram(long counter, String connectionQueryServiceName) { ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName, OPEN_PHOENIX_CONNECTIONS_COUNTER, counter); From c97f7e024c9faf8ebcac38a37d0749572a795be8 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Fri, 1 Aug 2025 20:53:22 +0530 Subject: [PATCH 02/18] Revert "connection creation time" This reverts commit 3c54c863e617a9bd89fe52f747f319c469389cca. --- .../apache/phoenix/jdbc/PhoenixDriver.java | 4 +- .../phoenix/jdbc/PhoenixEmbeddedDriver.java | 69 +++++-------------- .../apache/phoenix/monitoring/MetricType.java | 2 - .../ConnectionQueryServicesMetrics.java | 9 +-- .../ConnectionQueryServicesMetricsIT.java | 33 +++------ ...ectionQueryServicesMetricsManagerTest.java | 27 +------- 6 files changed, 37 insertions(+), 107 deletions(-) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java index f3784183555..953bbc5bd57 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java @@ -41,7 +41,6 @@ import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.query.QueryServicesImpl; import org.apache.phoenix.query.QueryServicesOptions; -import org.apache.phoenix.util.EnvironmentEdgeManager; import org.apache.phoenix.util.PropertiesUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -206,7 +205,6 @@ public boolean acceptsURL(String url) throws SQLException { @Override public Connection connect(String url, Properties info) throws SQLException { - long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis(); GLOBAL_PHOENIX_CONNECTIONS_ATTEMPTED_COUNTER.increment(); if (!acceptsURL(url)) { GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment(); @@ -215,7 +213,7 @@ public Connection connect(String url, Properties info) throws SQLException { lockInterruptibly(LockMode.READ); try { checkClosed(); - return createConnection(url, info, connectionStartTime); + return createConnection(url, info); } catch (SQLException sqlException) { if (sqlException.getErrorCode() != SQLExceptionCode.NEW_CONNECTION_THROTTLED.getErrorCode()) { GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment(); diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java index ea1d416fd8a..b5bbe2ea552 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java @@ -17,8 +17,6 @@ */ package org.apache.phoenix.jdbc; -import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS; -import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME; import static org.apache.phoenix.util.PhoenixRuntime.PHOENIX_TEST_DRIVER_URL_PARAM; import java.sql.Connection; @@ -26,18 +24,17 @@ import java.sql.DriverPropertyInfo; import java.sql.SQLException; import java.sql.SQLFeatureNotSupportedException; -import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Properties; import java.util.logging.Logger; import javax.annotation.concurrent.Immutable; import org.apache.phoenix.coprocessorclient.MetaDataProtocol; -import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric; -import org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesMetricsManager; import org.apache.phoenix.query.ConnectionQueryServices; import org.apache.phoenix.query.QueryServices; -import org.apache.phoenix.util.*; +import org.apache.phoenix.util.PhoenixRuntime; +import org.apache.phoenix.util.PropertiesUtil; +import org.apache.phoenix.util.ReadOnlyProps; +import org.apache.phoenix.util.SQLCloseable; import org.apache.phoenix.thirdparty.com.google.common.collect.ImmutableMap; @@ -122,59 +119,31 @@ public boolean acceptsURL(String url) throws SQLException { @Override public Connection connect(String url, Properties info) throws SQLException { - long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis(); if (!acceptsURL(url)) { return null; } - return createConnection(url, info, connectionStartTime); + return createConnection(url, info); } - protected final Connection createConnection(String url, Properties info, - long connectionCreationTime) throws SQLException { + protected final Connection createConnection(String url, Properties info) throws SQLException { Properties augmentedInfo = PropertiesUtil.deepCopy(info); augmentedInfo.putAll(getDefaultProps().asMap()); - Connection connection = null; - try { - if (url.contains("|")) { - // Get HAURLInfo to pass it to connection creation - HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo); - // High availability connection using two clusters - Optional haGroup = HighAvailabilityGroup.get(url, augmentedInfo); - if (haGroup.isPresent()) { - connection = haGroup.get().connect(augmentedInfo, haurlInfo); - setPhoenixConnectionTime(connectionCreationTime, connection); - return connection; - } else { - // If empty HA group is returned, fall back to single cluster. - url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow( - () -> new SQLException( - "HA group can not be initialized, fallback to single cluster")); - } - } - ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo); - connection = cqs.connect(url, augmentedInfo); - setPhoenixConnectionTime(connectionCreationTime, connection); - Map> metrics = - ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); - if (!metrics.isEmpty()) { - List serviceMetrics = metrics.get("DEFAULT_CQSN"); - } - return connection; - } catch (SQLException e) { - if (connection != null) { - connection.close(); + if (url.contains("|")) { + // Get HAURLInfo to pass it to connection creation + HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo); + // High availability connection using two clusters + Optional haGroup = HighAvailabilityGroup.get(url, augmentedInfo); + if (haGroup.isPresent()) { + return haGroup.get().connect(augmentedInfo, haurlInfo); + } else { + // If empty HA group is returned, fall back to single cluster. + url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow( + () -> new SQLException("HA group can not be initialized, fallback to single cluster")); } - throw e; } - } - - private void setPhoenixConnectionTime(long connectionCreationTime, Connection connection) { - String connectionQueryServiceName = - ((PhoenixConnection) connection).getQueryServices().getConfiguration() - .get(QUERY_SERVICES_NAME); - ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName, - PHOENIX_CONNECTION_CREATION_TIME_MS, connectionCreationTime); + ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo); + return cqs.connect(url, augmentedInfo); } /** diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java index aaf6883223d..8ee8de69718 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java @@ -228,8 +228,6 @@ public enum MetricType { PHOENIX_CONNECTIONS_FAILED_COUNTER("cf", "Number of client Phoenix Connections Failed to open" + ", not including throttled connections", LogLevel.OFF, PLong.INSTANCE), - PHOENIX_CONNECTION_CREATION_TIME_MS("cct", - "Time spent in creating Phoenix connections in milliseconds", LogLevel.OFF, PLong.INSTANCE), CLIENT_METADATA_CACHE_MISS_COUNTER("cmcm", "Number of cache misses for the CQSI cache.", LogLevel.DEBUG, PLong.INSTANCE), CLIENT_METADATA_CACHE_HIT_COUNTER("cmch", "Number of cache hits for the CQSI cache.", diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java index 8c3ac719d27..575d38530eb 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java @@ -17,6 +17,10 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; +import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; + import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -26,8 +30,6 @@ import org.apache.phoenix.monitoring.ConnectionQueryServicesMetricImpl; import org.apache.phoenix.monitoring.MetricType; -import static org.apache.phoenix.monitoring.MetricType.*; - /** * Class for Connection Query Service Metrics. */ @@ -40,8 +42,7 @@ public enum QueryServiceMetrics { CONNECTION_QUERY_SERVICE_OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER( OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER), CONNECTION_QUERY_SERVICE_PHOENIX_CONNECTIONS_THROTTLED_COUNTER( - PHOENIX_CONNECTIONS_THROTTLED_COUNTER), - CONNECTION_QUERY_SERVICE_CREATION_TIME(PHOENIX_CONNECTION_CREATION_TIME_MS); + PHOENIX_CONNECTIONS_THROTTLED_COUNTER); private MetricType metricType; private ConnectionQueryServicesMetric metric; diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java index d9f752bc17c..54d53afdf6d 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java @@ -17,15 +17,21 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; -import static org.apache.phoenix.monitoring.MetricType.*; +import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; import static org.apache.phoenix.query.QueryServices.CLIENT_CONNECTION_MAX_ALLOWED_CONNECTIONS; import static org.apache.phoenix.query.QueryServices.CONNECTION_QUERY_SERVICE_METRICS_ENABLED; import static org.apache.phoenix.query.QueryServices.INTERNAL_CONNECTION_MAX_ALLOWED_CONNECTIONS; import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME; import static org.apache.phoenix.util.PhoenixRuntime.clearAllConnectionQueryServiceMetrics; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; -import java.sql.*; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; @@ -216,27 +222,6 @@ public void testMultipleCQSIMetricsInParallel() throws Exception { assertEquals("Number of passing CSQI Metrics check should be : ", 4, counter.get()); } - @Test - public void testConnectionTime() { - Map> metrics = - ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); - List serviceMetrics = metrics.get("DEFAULT_CQSN"); - assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics); - - // Find connection creation time metric - boolean foundMetric = false; - for (ConnectionQueryServicesMetric metric : serviceMetrics) { - System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue()); - if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) { - assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0); - foundMetric = true; - break; - } - } - assertTrue("Connection creation time metric not found", foundMetric); - - } - private void checkConnectionQueryServiceMetricsValues(String queryServiceName) throws Exception { String CREATE_TABLE_DDL = "CREATE TABLE IF NOT EXISTS %s (K VARCHAR(10) NOT NULL" + " PRIMARY KEY, V VARCHAR)"; diff --git a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java index 039b7be051f..86fc007b906 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java @@ -17,12 +17,13 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; -import static org.apache.phoenix.monitoring.MetricType.*; +import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.connectionQueryServiceNames; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openInternalPhoenixConnCounter; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openPhoenixConnCounter; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.phoenixConnThrottledCounter; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.util.List; @@ -31,7 +32,6 @@ import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric; import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.query.QueryServicesOptions; -import org.apache.phoenix.util.PhoenixRuntime; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; @@ -101,27 +101,6 @@ public void testHistogramMetricsForOpenPhoenixConnectionCounter() { } } - @Test - public void testConnectionTime() { - Map> metrics = - ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); - List serviceMetrics = metrics.get("DEFAULT_CQSN"); - assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics); - - // Find connection creation time metric - boolean foundMetric = false; - for (ConnectionQueryServicesMetric metric : serviceMetrics) { - System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue()); - if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) { - assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0); - foundMetric = true; - break; - } - } - assertTrue("Connection creation time metric not found", foundMetric); - - } - private void updateMetricsAndHistogram(long counter, String connectionQueryServiceName) { ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName, OPEN_PHOENIX_CONNECTIONS_COUNTER, counter); From 53e9a3bfca8dd7e265846cb6080d3ea70058b5be Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Fri, 1 Aug 2025 20:54:52 +0530 Subject: [PATCH 03/18] Revert "Revert "connection creation time"" This reverts commit c97f7e024c9faf8ebcac38a37d0749572a795be8. --- .../apache/phoenix/jdbc/PhoenixDriver.java | 4 +- .../phoenix/jdbc/PhoenixEmbeddedDriver.java | 69 ++++++++++++++----- .../apache/phoenix/monitoring/MetricType.java | 2 + .../ConnectionQueryServicesMetrics.java | 9 ++- .../ConnectionQueryServicesMetricsIT.java | 33 ++++++--- ...ectionQueryServicesMetricsManagerTest.java | 27 +++++++- 6 files changed, 107 insertions(+), 37 deletions(-) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java index 953bbc5bd57..f3784183555 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java @@ -41,6 +41,7 @@ import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.query.QueryServicesImpl; import org.apache.phoenix.query.QueryServicesOptions; +import org.apache.phoenix.util.EnvironmentEdgeManager; import org.apache.phoenix.util.PropertiesUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -205,6 +206,7 @@ public boolean acceptsURL(String url) throws SQLException { @Override public Connection connect(String url, Properties info) throws SQLException { + long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis(); GLOBAL_PHOENIX_CONNECTIONS_ATTEMPTED_COUNTER.increment(); if (!acceptsURL(url)) { GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment(); @@ -213,7 +215,7 @@ public Connection connect(String url, Properties info) throws SQLException { lockInterruptibly(LockMode.READ); try { checkClosed(); - return createConnection(url, info); + return createConnection(url, info, connectionStartTime); } catch (SQLException sqlException) { if (sqlException.getErrorCode() != SQLExceptionCode.NEW_CONNECTION_THROTTLED.getErrorCode()) { GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment(); diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java index b5bbe2ea552..ea1d416fd8a 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java @@ -17,6 +17,8 @@ */ package org.apache.phoenix.jdbc; +import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS; +import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME; import static org.apache.phoenix.util.PhoenixRuntime.PHOENIX_TEST_DRIVER_URL_PARAM; import java.sql.Connection; @@ -24,17 +26,18 @@ import java.sql.DriverPropertyInfo; import java.sql.SQLException; import java.sql.SQLFeatureNotSupportedException; +import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Properties; import java.util.logging.Logger; import javax.annotation.concurrent.Immutable; import org.apache.phoenix.coprocessorclient.MetaDataProtocol; +import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric; +import org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesMetricsManager; import org.apache.phoenix.query.ConnectionQueryServices; import org.apache.phoenix.query.QueryServices; -import org.apache.phoenix.util.PhoenixRuntime; -import org.apache.phoenix.util.PropertiesUtil; -import org.apache.phoenix.util.ReadOnlyProps; -import org.apache.phoenix.util.SQLCloseable; +import org.apache.phoenix.util.*; import org.apache.phoenix.thirdparty.com.google.common.collect.ImmutableMap; @@ -119,31 +122,59 @@ public boolean acceptsURL(String url) throws SQLException { @Override public Connection connect(String url, Properties info) throws SQLException { + long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis(); if (!acceptsURL(url)) { return null; } - return createConnection(url, info); + return createConnection(url, info, connectionStartTime); } - protected final Connection createConnection(String url, Properties info) throws SQLException { + protected final Connection createConnection(String url, Properties info, + long connectionCreationTime) throws SQLException { Properties augmentedInfo = PropertiesUtil.deepCopy(info); augmentedInfo.putAll(getDefaultProps().asMap()); - if (url.contains("|")) { - // Get HAURLInfo to pass it to connection creation - HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo); - // High availability connection using two clusters - Optional haGroup = HighAvailabilityGroup.get(url, augmentedInfo); - if (haGroup.isPresent()) { - return haGroup.get().connect(augmentedInfo, haurlInfo); - } else { - // If empty HA group is returned, fall back to single cluster. - url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow( - () -> new SQLException("HA group can not be initialized, fallback to single cluster")); + Connection connection = null; + try { + if (url.contains("|")) { + // Get HAURLInfo to pass it to connection creation + HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo); + // High availability connection using two clusters + Optional haGroup = HighAvailabilityGroup.get(url, augmentedInfo); + if (haGroup.isPresent()) { + connection = haGroup.get().connect(augmentedInfo, haurlInfo); + setPhoenixConnectionTime(connectionCreationTime, connection); + return connection; + } else { + // If empty HA group is returned, fall back to single cluster. + url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow( + () -> new SQLException( + "HA group can not be initialized, fallback to single cluster")); + } + } + ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo); + connection = cqs.connect(url, augmentedInfo); + setPhoenixConnectionTime(connectionCreationTime, connection); + Map> metrics = + ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); + if (!metrics.isEmpty()) { + List serviceMetrics = metrics.get("DEFAULT_CQSN"); + } + return connection; + } catch (SQLException e) { + if (connection != null) { + connection.close(); } + throw e; } - ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo); - return cqs.connect(url, augmentedInfo); + } + + private void setPhoenixConnectionTime(long connectionCreationTime, Connection connection) { + String connectionQueryServiceName = + ((PhoenixConnection) connection).getQueryServices().getConfiguration() + .get(QUERY_SERVICES_NAME); + ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName, + PHOENIX_CONNECTION_CREATION_TIME_MS, connectionCreationTime); } /** diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java index 8ee8de69718..aaf6883223d 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java @@ -228,6 +228,8 @@ public enum MetricType { PHOENIX_CONNECTIONS_FAILED_COUNTER("cf", "Number of client Phoenix Connections Failed to open" + ", not including throttled connections", LogLevel.OFF, PLong.INSTANCE), + PHOENIX_CONNECTION_CREATION_TIME_MS("cct", + "Time spent in creating Phoenix connections in milliseconds", LogLevel.OFF, PLong.INSTANCE), CLIENT_METADATA_CACHE_MISS_COUNTER("cmcm", "Number of cache misses for the CQSI cache.", LogLevel.DEBUG, PLong.INSTANCE), CLIENT_METADATA_CACHE_HIT_COUNTER("cmch", "Number of cache hits for the CQSI cache.", diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java index 575d38530eb..8c3ac719d27 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java @@ -17,10 +17,6 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; -import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; - import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -30,6 +26,8 @@ import org.apache.phoenix.monitoring.ConnectionQueryServicesMetricImpl; import org.apache.phoenix.monitoring.MetricType; +import static org.apache.phoenix.monitoring.MetricType.*; + /** * Class for Connection Query Service Metrics. */ @@ -42,7 +40,8 @@ public enum QueryServiceMetrics { CONNECTION_QUERY_SERVICE_OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER( OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER), CONNECTION_QUERY_SERVICE_PHOENIX_CONNECTIONS_THROTTLED_COUNTER( - PHOENIX_CONNECTIONS_THROTTLED_COUNTER); + PHOENIX_CONNECTIONS_THROTTLED_COUNTER), + CONNECTION_QUERY_SERVICE_CREATION_TIME(PHOENIX_CONNECTION_CREATION_TIME_MS); private MetricType metricType; private ConnectionQueryServicesMetric metric; diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java index 54d53afdf6d..d9f752bc17c 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java @@ -17,21 +17,15 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; -import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.*; import static org.apache.phoenix.query.QueryServices.CLIENT_CONNECTION_MAX_ALLOWED_CONNECTIONS; import static org.apache.phoenix.query.QueryServices.CONNECTION_QUERY_SERVICE_METRICS_ENABLED; import static org.apache.phoenix.query.QueryServices.INTERNAL_CONNECTION_MAX_ALLOWED_CONNECTIONS; import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME; import static org.apache.phoenix.util.PhoenixRuntime.clearAllConnectionQueryServiceMetrics; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; -import java.sql.Statement; +import java.sql.*; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; @@ -222,6 +216,27 @@ public void testMultipleCQSIMetricsInParallel() throws Exception { assertEquals("Number of passing CSQI Metrics check should be : ", 4, counter.get()); } + @Test + public void testConnectionTime() { + Map> metrics = + ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); + List serviceMetrics = metrics.get("DEFAULT_CQSN"); + assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics); + + // Find connection creation time metric + boolean foundMetric = false; + for (ConnectionQueryServicesMetric metric : serviceMetrics) { + System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue()); + if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) { + assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0); + foundMetric = true; + break; + } + } + assertTrue("Connection creation time metric not found", foundMetric); + + } + private void checkConnectionQueryServiceMetricsValues(String queryServiceName) throws Exception { String CREATE_TABLE_DDL = "CREATE TABLE IF NOT EXISTS %s (K VARCHAR(10) NOT NULL" + " PRIMARY KEY, V VARCHAR)"; diff --git a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java index 86fc007b906..039b7be051f 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java @@ -17,13 +17,12 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; -import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.*; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.connectionQueryServiceNames; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openInternalPhoenixConnCounter; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openPhoenixConnCounter; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.phoenixConnThrottledCounter; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.util.List; @@ -32,6 +31,7 @@ import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric; import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.query.QueryServicesOptions; +import org.apache.phoenix.util.PhoenixRuntime; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; @@ -101,6 +101,27 @@ public void testHistogramMetricsForOpenPhoenixConnectionCounter() { } } + @Test + public void testConnectionTime() { + Map> metrics = + ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); + List serviceMetrics = metrics.get("DEFAULT_CQSN"); + assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics); + + // Find connection creation time metric + boolean foundMetric = false; + for (ConnectionQueryServicesMetric metric : serviceMetrics) { + System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue()); + if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) { + assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0); + foundMetric = true; + break; + } + } + assertTrue("Connection creation time metric not found", foundMetric); + + } + private void updateMetricsAndHistogram(long counter, String connectionQueryServiceName) { ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName, OPEN_PHOENIX_CONNECTIONS_COUNTER, counter); From fd464043167ffe1a007f495f5c3ecb72ad62232a Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Tue, 6 Jan 2026 14:32:09 +0530 Subject: [PATCH 04/18] ITs changes --- .../org/apache/phoenix/end2end/QueryIT.java | 579 +++++++++++++++++- .../phoenix/compile/QueryCompilerTest.java | 4 +- .../phoenix/compile/WhereOptimizerTest.java | 22 +- pom.xml | 2 +- 4 files changed, 587 insertions(+), 20 deletions(-) diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java index 1ce36c241a7..b57a58fecf7 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java @@ -27,15 +27,17 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; +import java.sql.*; +import java.util.Arrays; import java.util.Collection; +import java.util.HashSet; +import java.util.List; import java.util.Properties; +import java.util.Set; import org.apache.phoenix.exception.SQLExceptionCode; +import org.apache.phoenix.thirdparty.com.google.common.collect.Lists; import org.apache.phoenix.util.PropertiesUtil; +import org.apache.phoenix.util.QueryUtil; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runners.Parameterized.Parameters; @@ -48,7 +50,14 @@ public class QueryIT extends BaseQueryIT { @Parameters(name = "QueryIT_{index}") // name is used by failsafe as file name in reports public static synchronized Collection data() { - return BaseQueryIT.allIndexes(); + // Return only one parameter set to run a single iteration + // Parameters: indexDDL, columnEncoded, keepDeletedCells + List testCases = Lists.newArrayList(); + testCases.add(new Object[] { NO_INDEX, false, false }); // No index, no column encoding + return testCases; + + // Original code that runs all iterations: + // return BaseQueryIT.allIndexes(); } public QueryIT(String indexDDL, boolean columnEncoded, boolean keepDeletedCells) { @@ -169,4 +178,562 @@ public void testDistinctLimitScan() throws Exception { conn.close(); } } + + @Test + public void testExplosion() throws Exception { + String tableName = generateUniqueName(); + String indexName = generateUniqueName(); + try (Connection conn = DriverManager.getConnection(getUrl()); + Statement stmt = conn.createStatement()) { + stmt.execute("create table " + tableName + " (id varchar primary key, ts timestamp)"); + // stmt.execute("create table " + tableName + "(id varchar NOT NULL, ts timestamp NOT NULL CONSTRAINT PK PRIMARY KEY (id, ts DESC))"); + stmt.execute("create index " + indexName + " on " + tableName + "(ts desc)"); + + String query = "select id, ts from " + tableName + + " where ts >= TIMESTAMP '2023-02-23 13:30:00' and ts < TIMESTAMP '2023-02-23 13:40:00'"; + ResultSet rs = stmt.executeQuery("EXPLAIN " + query); + String explainPlan = QueryUtil.getExplainPlan(rs); + System.out.println("EXPLAIN PLAN: " + explainPlan); + PreparedStatement statement = conn.prepareStatement(query); + rs = statement.executeQuery(); + int rowCount = 0; + while (rs.next()) { + rowCount++; + } + // ResultSet rs = stmt.executeQuery("EXPLAIN " + query); + // String explainPlan = QueryUtil.getExplainPlan(rs); + // assertEquals( + // "CLIENT PARALLEL 1-WAY RANGE SCAN OVER " + indexName + // + " [~1,677,159,600,000] - [~1,677,159,000,000]\n SERVER FILTER BY FIRST KEY ONLY", + // explainPlan); + } + } + + + + @Test + public void testKeyExplosion() throws Exception { + Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); + Connection conn = DriverManager.getConnection(getUrl(), props); + String testTable = generateUniqueName(); + try { + // Create table with DESC ordering on NUMBER column + String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" + + "ID CHAR(15) NOT NULL, " + + "NUMBER VARCHAR NOT NULL, " + + "ENTITY_ID VARCHAR NOT NULL, " + + "CREATED_BY VARCHAR, " + + "DATA VARCHAR " + + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; + conn.createStatement().execute(createTableDDL); + + // Insert test data + String upsert = "UPSERT INTO " + testTable + + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; + PreparedStatement ps = conn.prepareStatement(upsert); + + // Insert first row + ps.setString(1, "id_1"); + ps.setString(2, "20251012"); + ps.setString(3, "entity_1"); + ps.setString(4, "user1"); + ps.setString(5, "data1"); + ps.executeUpdate(); + + // Insert second row + ps.setString(1, "id_2"); + ps.setString(2, "20250912"); + ps.setString(3, "entity_2"); + ps.setString(4, "user2"); + ps.setString(5, "data2"); + ps.executeUpdate(); + + ps.setString(1, "id_3"); + ps.setString(2, "20250913"); + ps.setString(3, "entity_3"); + ps.setString(4, "user3"); + ps.setString(5, "data3"); + ps.executeUpdate(); + +// +// ps.setString(1, "id_1"); +// ps.setString(2, "20250910"); +// ps.setString(3, "entity_3"); +// ps.setString(4, "user22"); +// ps.setString(5, "data22"); +// ps.executeUpdate(); +// +// ps.setString(1, "id_3"); +// ps.setString(2, "20250911"); +// ps.setString(3, "entity_11"); +// ps.setString(4, "user21"); +// ps.setString(5, "data21"); +// ps.executeUpdate(); + + conn.commit(); + + // Run the query with IN clause + String query = "SELECT * FROM " + testTable + + " WHERE (ID, NUMBER, ENTITY_ID) IN (('id_1', '20251012', 'entity_1'), ('id_2', '20250912', 'entity_2'))"; + PreparedStatement statement = conn.prepareStatement(query); + ResultSet rs = statement.executeQuery(); + + // Verify we get exactly 2 rows back + int rowCount = 0; + while (rs.next()) { + rowCount++; + String id = rs.getString("ID"); + String number = rs.getString("NUMBER"); + String entityId = rs.getString("ENTITY_ID"); + + // Verify the data matches what we inserted + if (rowCount == 1) { + assertEquals("id_1", id); + assertEquals("20251012", number); + assertEquals("entity_1", entityId); + } else if (rowCount == 2) { + assertEquals("id_2", id); + assertEquals("20250912", number); + assertEquals("entity_2", entityId); + } + } + + assertEquals("Expected 2 rows", 2, rowCount); + } finally { + conn.close(); + } + } + + @Test + public void testKeyExplosionInteger() throws Exception { + Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); + Connection conn = DriverManager.getConnection(getUrl(), props); + String testTable = generateUniqueName(); + // Create table with DESC ordering on NUMBER column + String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" + + "ID CHAR(15) NOT NULL, " + + "NUMBER INTEGER NOT NULL, " + + "ENTITY_ID VARCHAR NOT NULL, " + + "CREATED_BY VARCHAR, " + + "DATA VARCHAR " + + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; + conn.createStatement().execute(createTableDDL); + + // Insert test data + String upsert = "UPSERT INTO " + testTable + + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; + PreparedStatement ps = conn.prepareStatement(upsert); + + // Insert first row + ps.setString(1, "id_1"); + ps.setInt(2, 20251012); + ps.setString(3, "entity_1"); + ps.setString(4, "user1"); + ps.setString(5, "data1"); + ps.executeUpdate(); + + // Insert second row + ps.setString(1, "id_2"); + ps.setInt(2, 20250912); + ps.setString(3, "entity_2"); + ps.setString(4, "user2"); + ps.setString(5, "data2"); + ps.executeUpdate(); + + ps.setString(1, "id_3"); + ps.setInt(2, 20250910); + ps.setString(3, "entity_3"); + ps.setString(4, "user3"); + ps.setString(5, "data3"); + ps.executeUpdate(); + conn.commit(); + + // Run the query with IN clause + String query = "SELECT * FROM " + testTable + + " WHERE (ID, NUMBER, ENTITY_ID) IN (('id_1', 20251012, 'entity_1'), ('id_2', 20250912, 'entity_2'))"; + PreparedStatement statement = conn.prepareStatement(query); + ResultSet rs = statement.executeQuery(); + + // Verify we get exactly 2 rows back + int rowCount = 0; + while (rs.next()) { + rowCount++; + String id = rs.getString("ID"); + String number = rs.getString("NUMBER"); + String entityId = rs.getString("ENTITY_ID"); + + // Verify the data matches what we inserted + if (rowCount == 1) { + assertEquals("id_1", id); + assertEquals("20251012", number); + assertEquals("entity_1", entityId); + } else if (rowCount == 2) { + assertEquals("id_2", id); + assertEquals("20250912", number); + assertEquals("entity_2", entityId); + } + } + + assertEquals("Expected 2 rows", 2, rowCount); + } + + + @Test + public void testExplosionIntegerIndex() throws Exception { + String tableName = generateUniqueName(); + String indexName = generateUniqueName(); + System.out.println(tableName); + System.out.println(indexName); + try (Connection conn = DriverManager.getConnection(getUrl()); + Statement stmt = conn.createStatement()) { + stmt.execute("create table " + tableName + " (id varchar primary key, ts integer)"); + stmt.execute("create index " + indexName + " on " + tableName + "(ts desc)"); + + // Insert test data + String upsert = "UPSERT INTO " + tableName + + " (id, ts) VALUES (?, ?)"; + PreparedStatement ps = conn.prepareStatement(upsert); + + // Insert first row + ps.setString(1, "id_1"); + ps.setInt(2, 20251012); + ps.executeUpdate(); + + // Insert second row + ps.setString(1, "id_2"); + ps.setInt(2, 20250912); + ps.executeUpdate(); + + ps.setString(1, "id_3"); + ps.setInt(2, 20250910); + ps.executeUpdate(); + conn.commit(); + + String query = "select * from " + tableName + + " where ts > 20250911"; + PreparedStatement statement = conn.prepareStatement(query); + ResultSet rs = statement.executeQuery(); + String explainPlan = QueryUtil.getExplainPlan(rs); + System.out.println("EXPLAIN PLAN: " + explainPlan); + statement = conn.prepareStatement(query); + rs = statement.executeQuery(); + int rowCount = 0; + while (rs.next()) { + rowCount++; + } + // ResultSet rs = stmt.executeQuery("EXPLAIN " + query); + // String explainPlan = QueryUtil.getExplainPlan(rs); + // assertEquals( + // "CLIENT PARALLEL 1-WAY RANGE SCAN OVER " + indexName + // + " [~1,677,159,600,000] - [~1,677,159,000,000]\n SERVER FILTER BY FIRST KEY ONLY", + // explainPlan); + } + } + + @Test + public void testKeyExplosionPartialCompositeIn() throws Exception { + // Variation 6: Partial composite key IN + Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); + Connection conn = DriverManager.getConnection(getUrl(), props); + String testTable = generateUniqueName(); + try { + // Create table with DESC ordering on NUMBER column + String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" + + "ID CHAR(15) NOT NULL, " + + "NUMBER VARCHAR NOT NULL, " + + "ENTITY_ID VARCHAR NOT NULL, " + + "CREATED_BY VARCHAR, " + + "DATA VARCHAR " + + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; + conn.createStatement().execute(createTableDDL); + + // Insert test data + String upsert = "UPSERT INTO " + testTable + + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; + PreparedStatement ps = conn.prepareStatement(upsert); + + ps.setString(1, "id_1"); + ps.setString(2, "20251012"); + ps.setString(3, "entity_1"); + ps.setString(4, "user1"); + ps.setString(5, "data1"); + ps.executeUpdate(); + + ps.setString(1, "id_2"); + ps.setString(2, "20250912"); + ps.setString(3, "entity_2"); + ps.setString(4, "user2"); + ps.setString(5, "data2"); + ps.executeUpdate(); + + ps.setString(1, "id_3"); + ps.setString(2, "20250913"); + ps.setString(3, "entity_3"); + ps.setString(4, "user3"); + ps.setString(5, "data3"); + ps.executeUpdate(); + + ps.setString(1, "id_1"); + ps.setString(2, "20251012"); + ps.setString(3, "entity_1b"); + ps.setString(4, "user4"); + ps.setString(5, "data4"); + ps.executeUpdate(); + + conn.commit(); + + // Run query with partial composite key IN (first two columns only) + String query = "SELECT * FROM " + testTable + + " WHERE (ID, NUMBER) IN (('id_1', '20251012'), ('id_2', '20250912'))"; + PreparedStatement statement = conn.prepareStatement(query); + ResultSet rs = statement.executeQuery(); + + // Should return 3 rows: id_1 with 2 ENTITY_IDs at same NUMBER, id_2 with 1 ENTITY_ID + int rowCount = 0; + while (rs.next()) { + rowCount++; + String id = rs.getString("ID"); + String number = rs.getString("NUMBER"); + // Verify the combinations + assertTrue("Unexpected row", + (id.equals("id_1") && number.equals("20251012")) || + (id.equals("id_2") && number.equals("20250912"))); + } + + assertEquals("Expected 3 rows", 3, rowCount); + } finally { + conn.close(); + } + } + + @Test + public void testKeyExplosionMixedAndOr() throws Exception { + // Variation 8: Mixed AND/OR with ranges on DESC column + Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); + Connection conn = DriverManager.getConnection(getUrl(), props); + String testTable = generateUniqueName(); + try { + // Create table with DESC ordering on NUMBER column + String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" + + "ID CHAR(15) NOT NULL, " + + "NUMBER VARCHAR NOT NULL, " + + "ENTITY_ID VARCHAR NOT NULL, " + + "CREATED_BY VARCHAR, " + + "DATA VARCHAR " + + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; + conn.createStatement().execute(createTableDDL); + + // Insert test data + String upsert = "UPSERT INTO " + testTable + + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; + PreparedStatement ps = conn.prepareStatement(upsert); + + ps.setString(1, "id_1"); + ps.setString(2, "20251012"); + ps.setString(3, "entity_1"); + ps.setString(4, "user1"); + ps.setString(5, "data1"); + ps.executeUpdate(); + + ps.setString(1, "id_1"); + ps.setString(2, "20250910"); + ps.setString(3, "entity_1b"); + ps.setString(4, "user2"); + ps.setString(5, "data2"); + ps.executeUpdate(); + + ps.setString(1, "id_2"); + ps.setString(2, "20251011"); + ps.setString(3, "entity_2"); + ps.setString(4, "user3"); + ps.setString(5, "data3"); + ps.executeUpdate(); + + ps.setString(1, "id_3"); + ps.setString(2, "20250913"); + ps.setString(3, "entity_3"); + ps.setString(4, "user4"); + ps.setString(5, "data4"); + ps.executeUpdate(); + + conn.commit(); + + // Run query with mixed AND/OR conditions + String query = "SELECT * FROM " + testTable + + " WHERE (ID = 'id_1' AND NUMBER > '20250911') OR (ID = 'id_2' AND NUMBER <= '20251012')"; + PreparedStatement statement = conn.prepareStatement(query); + ResultSet rs = statement.executeQuery(); + + // Should return: id_1 with NUMBER=20251012, and id_2 with NUMBER=20251011 + int rowCount = 0; + while (rs.next()) { + rowCount++; + String id = rs.getString("ID"); + String number = rs.getString("NUMBER"); + // Verify expected combinations + assertTrue("Unexpected row", + (id.equals("id_1") && number.equals("20251012")) || + (id.equals("id_2") && number.equals("20251011"))); + } + + assertEquals("Expected 2 rows", 2, rowCount); + } finally { + conn.close(); + } + } + + @Test + public void testKeyExplosionPartialCompositeInInteger() throws Exception { + // Variation 6: Partial composite key IN (INTEGER type) + Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); + Connection conn = DriverManager.getConnection(getUrl(), props); + String testTable = generateUniqueName(); + try { + // Create table with DESC ordering on NUMBER column + String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" + + "ID CHAR(15) NOT NULL, " + + "NUMBER INTEGER NOT NULL, " + + "ENTITY_ID VARCHAR NOT NULL, " + + "CREATED_BY VARCHAR, " + + "DATA VARCHAR " + + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; + conn.createStatement().execute(createTableDDL); + + // Insert test data + String upsert = "UPSERT INTO " + testTable + + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; + PreparedStatement ps = conn.prepareStatement(upsert); + + ps.setString(1, "id_1"); + ps.setInt(2, 20251012); + ps.setString(3, "entity_1"); + ps.setString(4, "user1"); + ps.setString(5, "data1"); + ps.executeUpdate(); + + ps.setString(1, "id_2"); + ps.setInt(2, 20250912); + ps.setString(3, "entity_2"); + ps.setString(4, "user2"); + ps.setString(5, "data2"); + ps.executeUpdate(); + + ps.setString(1, "id_3"); + ps.setInt(2, 20250913); + ps.setString(3, "entity_3"); + ps.setString(4, "user3"); + ps.setString(5, "data3"); + ps.executeUpdate(); + + ps.setString(1, "id_1"); + ps.setInt(2, 20251012); + ps.setString(3, "entity_1b"); + ps.setString(4, "user4"); + ps.setString(5, "data4"); + ps.executeUpdate(); + + conn.commit(); + + // Run query with partial composite key IN (first two columns only) + String query = "SELECT * FROM " + testTable + + " WHERE (ID, NUMBER) IN (('id_1', 20251012), ('id_2', 20250912))"; + PreparedStatement statement = conn.prepareStatement(query); + ResultSet rs = statement.executeQuery(); + + // Should return 3 rows: id_1 with 2 ENTITY_IDs at same NUMBER, id_2 with 1 ENTITY_ID + int rowCount = 0; + while (rs.next()) { + rowCount++; + String id = rs.getString("ID"); + int number = rs.getInt("NUMBER"); + // Verify the combinations + assertTrue("Unexpected row", + (id.equals("id_1") && number == 20251012) || + (id.equals("id_2") && number == 20250912)); + } + + assertEquals("Expected 3 rows", 3, rowCount); + } finally { + conn.close(); + } + } + + @Test + public void testKeyExplosionMixedAndOrInteger() throws Exception { + // Variation 8: Mixed AND/OR with ranges on DESC column (INTEGER type) + Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); + Connection conn = DriverManager.getConnection(getUrl(), props); + String testTable = generateUniqueName(); + try { + // Create table with DESC ordering on NUMBER column + String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" + + "ID CHAR(15) NOT NULL, " + + "NUMBER INTEGER NOT NULL, " + + "ENTITY_ID VARCHAR NOT NULL, " + + "CREATED_BY VARCHAR, " + + "DATA VARCHAR " + + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; + conn.createStatement().execute(createTableDDL); + + // Insert test data + String upsert = "UPSERT INTO " + testTable + + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; + PreparedStatement ps = conn.prepareStatement(upsert); + + ps.setString(1, "id_1"); + ps.setInt(2, 20251012); + ps.setString(3, "entity_1"); + ps.setString(4, "user1"); + ps.setString(5, "data1"); + ps.executeUpdate(); + + ps.setString(1, "id_1"); + ps.setInt(2, 20250910); + ps.setString(3, "entity_1b"); + ps.setString(4, "user2"); + ps.setString(5, "data2"); + ps.executeUpdate(); + + ps.setString(1, "id_2"); + ps.setInt(2, 20251011); + ps.setString(3, "entity_2"); + ps.setString(4, "user3"); + ps.setString(5, "data3"); + ps.executeUpdate(); + + ps.setString(1, "id_3"); + ps.setInt(2, 20250913); + ps.setString(3, "entity_3"); + ps.setString(4, "user4"); + ps.setString(5, "data4"); + ps.executeUpdate(); + + conn.commit(); + + // Run query with mixed AND/OR conditions + String query = "SELECT * FROM " + testTable + + " WHERE (ID = 'id_1' AND NUMBER > 20250911) OR (ID = 'id_2' AND NUMBER <= 20251012)"; + PreparedStatement statement = conn.prepareStatement(query); + ResultSet rs = statement.executeQuery(); + + // Should return: id_1 with NUMBER=20251012, and id_2 with NUMBER=20251011 + int rowCount = 0; + while (rs.next()) { + rowCount++; + String id = rs.getString("ID"); + int number = rs.getInt("NUMBER"); + // Verify expected combinations + assertTrue("Unexpected row", + (id.equals("id_1") && number == 20251012) || + (id.equals("id_2") && number == 20251011)); + } + + assertEquals("Expected 2 rows", 2, rowCount); + } finally { + conn.close(); + } + } + + + } diff --git a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java index 6ecfc7ff011..2820afe411b 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java @@ -7202,8 +7202,8 @@ public void testReverseVarLengthRange6916() throws Exception { String openQry = "select * from " + tableName + " where k > 'a' and k<'aaa'"; Scan openScan = getOptimizedQueryPlan(openQry, Collections.emptyList()).getContext().getScan(); - assertEquals("\\x9E\\x9E\\x9F\\x00", Bytes.toStringBinary(openScan.getStartRow())); - assertEquals("\\x9E\\xFF", Bytes.toStringBinary(openScan.getStopRow())); +// assertEquals("\\x9E\\x9E\\x9F\\x00", Bytes.toStringBinary(openScan.getStartRow())); +// assertEquals("\\x9E\\xFF", Bytes.toStringBinary(openScan.getStopRow())); ResultSet rs = stmt.executeQuery("EXPLAIN " + openQry); String explainPlan = QueryUtil.getExplainPlan(rs); assertEquals(explainExpected, explainPlan); diff --git a/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java index 33f616f189e..f6c7d82716d 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java @@ -1280,8 +1280,8 @@ public void testLikeExpressionWithDescOrder() throws SQLException { byte[] invStopRow = new byte[startRow.length]; SortOrder.invert(stopRow, 0, invStopRow, 0, stopRow.length); - assertArrayEquals(invStopRow, lowerRange); - assertArrayEquals(invStartRow, upperRange); + assertArrayEquals(startRow, lowerRange); + assertArrayEquals(stopRow, upperRange); assertFalse(lowerInclusive); assertTrue(upperInclusive); @@ -3241,15 +3241,15 @@ public void testLastPkColumnIsVariableLengthAndDescBug5307() throws Exception { + "where (OBJ.OBJECT_ID, OBJ.OBJECT_VERSION) in (('obj1', '2222'),('obj2', '1111'),('obj3', '1111'))"; queryPlan = TestUtil.getOptimizeQueryPlan(conn, sql); scan = queryPlan.getContext().getScan(); - FilterList filterList = (FilterList) scan.getFilter(); - assertTrue(filterList.getOperator() == Operator.MUST_PASS_ALL); - assertEquals(filterList.getFilters().size(), 2); - assertTrue(filterList.getFilters().get(0) instanceof SkipScanFilter); - assertTrue(filterList.getFilters().get(1) instanceof RowKeyComparisonFilter); - RowKeyComparisonFilter rowKeyComparisonFilter = - (RowKeyComparisonFilter) filterList.getFilters().get(1); - assertEquals(rowKeyComparisonFilter.toString(), - "(OBJECT_ID, OBJECT_VERSION) IN (X'6f626a3100cdcdcdcd',X'6f626a3200cececece',X'6f626a3300cececece')"); +// FilterList filterList = (FilterList) scan.getFilter(); +// assertTrue(filterList.getOperator() == Operator.MUST_PASS_ALL); +// assertEquals(filterList.getFilters().size(), 2); +// assertTrue(filterList.getFilters().get(0) instanceof SkipScanFilter); +// assertTrue(filterList.getFilters().get(1) instanceof RowKeyComparisonFilter); +// RowKeyComparisonFilter rowKeyComparisonFilter = +// (RowKeyComparisonFilter) filterList.getFilters().get(1); +// assertEquals(rowKeyComparisonFilter.toString(), +// "(OBJECT_ID, OBJECT_VERSION) IN (X'6f626a3100cdcdcdcd',X'6f626a3200cececece',X'6f626a3300cececece')"); assertTrue(queryPlan.getContext().getScanRanges().isPointLookup()); assertArrayEquals(startKey, scan.getStartRow()); diff --git a/pom.xml b/pom.xml index 3114d5e4a9d..ccae93600f6 100644 --- a/pom.xml +++ b/pom.xml @@ -93,7 +93,7 @@ true - 2.18.4.1 + 2.14.1 4.1.126.Final 3.5.2 From 58ef6a91e76025f8940e66ccfad2156412ef1fa8 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Tue, 6 Jan 2026 14:32:32 +0530 Subject: [PATCH 05/18] Revert "ITs changes" This reverts commit fd464043167ffe1a007f495f5c3ecb72ad62232a. --- .../org/apache/phoenix/end2end/QueryIT.java | 579 +----------------- .../phoenix/compile/QueryCompilerTest.java | 4 +- .../phoenix/compile/WhereOptimizerTest.java | 22 +- pom.xml | 2 +- 4 files changed, 20 insertions(+), 587 deletions(-) diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java index b57a58fecf7..1ce36c241a7 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryIT.java @@ -27,17 +27,15 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import java.sql.*; -import java.util.Arrays; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; import java.util.Collection; -import java.util.HashSet; -import java.util.List; import java.util.Properties; -import java.util.Set; import org.apache.phoenix.exception.SQLExceptionCode; -import org.apache.phoenix.thirdparty.com.google.common.collect.Lists; import org.apache.phoenix.util.PropertiesUtil; -import org.apache.phoenix.util.QueryUtil; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runners.Parameterized.Parameters; @@ -50,14 +48,7 @@ public class QueryIT extends BaseQueryIT { @Parameters(name = "QueryIT_{index}") // name is used by failsafe as file name in reports public static synchronized Collection data() { - // Return only one parameter set to run a single iteration - // Parameters: indexDDL, columnEncoded, keepDeletedCells - List testCases = Lists.newArrayList(); - testCases.add(new Object[] { NO_INDEX, false, false }); // No index, no column encoding - return testCases; - - // Original code that runs all iterations: - // return BaseQueryIT.allIndexes(); + return BaseQueryIT.allIndexes(); } public QueryIT(String indexDDL, boolean columnEncoded, boolean keepDeletedCells) { @@ -178,562 +169,4 @@ public void testDistinctLimitScan() throws Exception { conn.close(); } } - - @Test - public void testExplosion() throws Exception { - String tableName = generateUniqueName(); - String indexName = generateUniqueName(); - try (Connection conn = DriverManager.getConnection(getUrl()); - Statement stmt = conn.createStatement()) { - stmt.execute("create table " + tableName + " (id varchar primary key, ts timestamp)"); - // stmt.execute("create table " + tableName + "(id varchar NOT NULL, ts timestamp NOT NULL CONSTRAINT PK PRIMARY KEY (id, ts DESC))"); - stmt.execute("create index " + indexName + " on " + tableName + "(ts desc)"); - - String query = "select id, ts from " + tableName - + " where ts >= TIMESTAMP '2023-02-23 13:30:00' and ts < TIMESTAMP '2023-02-23 13:40:00'"; - ResultSet rs = stmt.executeQuery("EXPLAIN " + query); - String explainPlan = QueryUtil.getExplainPlan(rs); - System.out.println("EXPLAIN PLAN: " + explainPlan); - PreparedStatement statement = conn.prepareStatement(query); - rs = statement.executeQuery(); - int rowCount = 0; - while (rs.next()) { - rowCount++; - } - // ResultSet rs = stmt.executeQuery("EXPLAIN " + query); - // String explainPlan = QueryUtil.getExplainPlan(rs); - // assertEquals( - // "CLIENT PARALLEL 1-WAY RANGE SCAN OVER " + indexName - // + " [~1,677,159,600,000] - [~1,677,159,000,000]\n SERVER FILTER BY FIRST KEY ONLY", - // explainPlan); - } - } - - - - @Test - public void testKeyExplosion() throws Exception { - Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); - Connection conn = DriverManager.getConnection(getUrl(), props); - String testTable = generateUniqueName(); - try { - // Create table with DESC ordering on NUMBER column - String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" - + "ID CHAR(15) NOT NULL, " - + "NUMBER VARCHAR NOT NULL, " - + "ENTITY_ID VARCHAR NOT NULL, " - + "CREATED_BY VARCHAR, " - + "DATA VARCHAR " - + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; - conn.createStatement().execute(createTableDDL); - - // Insert test data - String upsert = "UPSERT INTO " + testTable - + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; - PreparedStatement ps = conn.prepareStatement(upsert); - - // Insert first row - ps.setString(1, "id_1"); - ps.setString(2, "20251012"); - ps.setString(3, "entity_1"); - ps.setString(4, "user1"); - ps.setString(5, "data1"); - ps.executeUpdate(); - - // Insert second row - ps.setString(1, "id_2"); - ps.setString(2, "20250912"); - ps.setString(3, "entity_2"); - ps.setString(4, "user2"); - ps.setString(5, "data2"); - ps.executeUpdate(); - - ps.setString(1, "id_3"); - ps.setString(2, "20250913"); - ps.setString(3, "entity_3"); - ps.setString(4, "user3"); - ps.setString(5, "data3"); - ps.executeUpdate(); - -// -// ps.setString(1, "id_1"); -// ps.setString(2, "20250910"); -// ps.setString(3, "entity_3"); -// ps.setString(4, "user22"); -// ps.setString(5, "data22"); -// ps.executeUpdate(); -// -// ps.setString(1, "id_3"); -// ps.setString(2, "20250911"); -// ps.setString(3, "entity_11"); -// ps.setString(4, "user21"); -// ps.setString(5, "data21"); -// ps.executeUpdate(); - - conn.commit(); - - // Run the query with IN clause - String query = "SELECT * FROM " + testTable - + " WHERE (ID, NUMBER, ENTITY_ID) IN (('id_1', '20251012', 'entity_1'), ('id_2', '20250912', 'entity_2'))"; - PreparedStatement statement = conn.prepareStatement(query); - ResultSet rs = statement.executeQuery(); - - // Verify we get exactly 2 rows back - int rowCount = 0; - while (rs.next()) { - rowCount++; - String id = rs.getString("ID"); - String number = rs.getString("NUMBER"); - String entityId = rs.getString("ENTITY_ID"); - - // Verify the data matches what we inserted - if (rowCount == 1) { - assertEquals("id_1", id); - assertEquals("20251012", number); - assertEquals("entity_1", entityId); - } else if (rowCount == 2) { - assertEquals("id_2", id); - assertEquals("20250912", number); - assertEquals("entity_2", entityId); - } - } - - assertEquals("Expected 2 rows", 2, rowCount); - } finally { - conn.close(); - } - } - - @Test - public void testKeyExplosionInteger() throws Exception { - Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); - Connection conn = DriverManager.getConnection(getUrl(), props); - String testTable = generateUniqueName(); - // Create table with DESC ordering on NUMBER column - String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" - + "ID CHAR(15) NOT NULL, " - + "NUMBER INTEGER NOT NULL, " - + "ENTITY_ID VARCHAR NOT NULL, " - + "CREATED_BY VARCHAR, " - + "DATA VARCHAR " - + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; - conn.createStatement().execute(createTableDDL); - - // Insert test data - String upsert = "UPSERT INTO " + testTable - + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; - PreparedStatement ps = conn.prepareStatement(upsert); - - // Insert first row - ps.setString(1, "id_1"); - ps.setInt(2, 20251012); - ps.setString(3, "entity_1"); - ps.setString(4, "user1"); - ps.setString(5, "data1"); - ps.executeUpdate(); - - // Insert second row - ps.setString(1, "id_2"); - ps.setInt(2, 20250912); - ps.setString(3, "entity_2"); - ps.setString(4, "user2"); - ps.setString(5, "data2"); - ps.executeUpdate(); - - ps.setString(1, "id_3"); - ps.setInt(2, 20250910); - ps.setString(3, "entity_3"); - ps.setString(4, "user3"); - ps.setString(5, "data3"); - ps.executeUpdate(); - conn.commit(); - - // Run the query with IN clause - String query = "SELECT * FROM " + testTable - + " WHERE (ID, NUMBER, ENTITY_ID) IN (('id_1', 20251012, 'entity_1'), ('id_2', 20250912, 'entity_2'))"; - PreparedStatement statement = conn.prepareStatement(query); - ResultSet rs = statement.executeQuery(); - - // Verify we get exactly 2 rows back - int rowCount = 0; - while (rs.next()) { - rowCount++; - String id = rs.getString("ID"); - String number = rs.getString("NUMBER"); - String entityId = rs.getString("ENTITY_ID"); - - // Verify the data matches what we inserted - if (rowCount == 1) { - assertEquals("id_1", id); - assertEquals("20251012", number); - assertEquals("entity_1", entityId); - } else if (rowCount == 2) { - assertEquals("id_2", id); - assertEquals("20250912", number); - assertEquals("entity_2", entityId); - } - } - - assertEquals("Expected 2 rows", 2, rowCount); - } - - - @Test - public void testExplosionIntegerIndex() throws Exception { - String tableName = generateUniqueName(); - String indexName = generateUniqueName(); - System.out.println(tableName); - System.out.println(indexName); - try (Connection conn = DriverManager.getConnection(getUrl()); - Statement stmt = conn.createStatement()) { - stmt.execute("create table " + tableName + " (id varchar primary key, ts integer)"); - stmt.execute("create index " + indexName + " on " + tableName + "(ts desc)"); - - // Insert test data - String upsert = "UPSERT INTO " + tableName - + " (id, ts) VALUES (?, ?)"; - PreparedStatement ps = conn.prepareStatement(upsert); - - // Insert first row - ps.setString(1, "id_1"); - ps.setInt(2, 20251012); - ps.executeUpdate(); - - // Insert second row - ps.setString(1, "id_2"); - ps.setInt(2, 20250912); - ps.executeUpdate(); - - ps.setString(1, "id_3"); - ps.setInt(2, 20250910); - ps.executeUpdate(); - conn.commit(); - - String query = "select * from " + tableName - + " where ts > 20250911"; - PreparedStatement statement = conn.prepareStatement(query); - ResultSet rs = statement.executeQuery(); - String explainPlan = QueryUtil.getExplainPlan(rs); - System.out.println("EXPLAIN PLAN: " + explainPlan); - statement = conn.prepareStatement(query); - rs = statement.executeQuery(); - int rowCount = 0; - while (rs.next()) { - rowCount++; - } - // ResultSet rs = stmt.executeQuery("EXPLAIN " + query); - // String explainPlan = QueryUtil.getExplainPlan(rs); - // assertEquals( - // "CLIENT PARALLEL 1-WAY RANGE SCAN OVER " + indexName - // + " [~1,677,159,600,000] - [~1,677,159,000,000]\n SERVER FILTER BY FIRST KEY ONLY", - // explainPlan); - } - } - - @Test - public void testKeyExplosionPartialCompositeIn() throws Exception { - // Variation 6: Partial composite key IN - Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); - Connection conn = DriverManager.getConnection(getUrl(), props); - String testTable = generateUniqueName(); - try { - // Create table with DESC ordering on NUMBER column - String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" - + "ID CHAR(15) NOT NULL, " - + "NUMBER VARCHAR NOT NULL, " - + "ENTITY_ID VARCHAR NOT NULL, " - + "CREATED_BY VARCHAR, " - + "DATA VARCHAR " - + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; - conn.createStatement().execute(createTableDDL); - - // Insert test data - String upsert = "UPSERT INTO " + testTable - + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; - PreparedStatement ps = conn.prepareStatement(upsert); - - ps.setString(1, "id_1"); - ps.setString(2, "20251012"); - ps.setString(3, "entity_1"); - ps.setString(4, "user1"); - ps.setString(5, "data1"); - ps.executeUpdate(); - - ps.setString(1, "id_2"); - ps.setString(2, "20250912"); - ps.setString(3, "entity_2"); - ps.setString(4, "user2"); - ps.setString(5, "data2"); - ps.executeUpdate(); - - ps.setString(1, "id_3"); - ps.setString(2, "20250913"); - ps.setString(3, "entity_3"); - ps.setString(4, "user3"); - ps.setString(5, "data3"); - ps.executeUpdate(); - - ps.setString(1, "id_1"); - ps.setString(2, "20251012"); - ps.setString(3, "entity_1b"); - ps.setString(4, "user4"); - ps.setString(5, "data4"); - ps.executeUpdate(); - - conn.commit(); - - // Run query with partial composite key IN (first two columns only) - String query = "SELECT * FROM " + testTable - + " WHERE (ID, NUMBER) IN (('id_1', '20251012'), ('id_2', '20250912'))"; - PreparedStatement statement = conn.prepareStatement(query); - ResultSet rs = statement.executeQuery(); - - // Should return 3 rows: id_1 with 2 ENTITY_IDs at same NUMBER, id_2 with 1 ENTITY_ID - int rowCount = 0; - while (rs.next()) { - rowCount++; - String id = rs.getString("ID"); - String number = rs.getString("NUMBER"); - // Verify the combinations - assertTrue("Unexpected row", - (id.equals("id_1") && number.equals("20251012")) || - (id.equals("id_2") && number.equals("20250912"))); - } - - assertEquals("Expected 3 rows", 3, rowCount); - } finally { - conn.close(); - } - } - - @Test - public void testKeyExplosionMixedAndOr() throws Exception { - // Variation 8: Mixed AND/OR with ranges on DESC column - Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); - Connection conn = DriverManager.getConnection(getUrl(), props); - String testTable = generateUniqueName(); - try { - // Create table with DESC ordering on NUMBER column - String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" - + "ID CHAR(15) NOT NULL, " - + "NUMBER VARCHAR NOT NULL, " - + "ENTITY_ID VARCHAR NOT NULL, " - + "CREATED_BY VARCHAR, " - + "DATA VARCHAR " - + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; - conn.createStatement().execute(createTableDDL); - - // Insert test data - String upsert = "UPSERT INTO " + testTable - + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; - PreparedStatement ps = conn.prepareStatement(upsert); - - ps.setString(1, "id_1"); - ps.setString(2, "20251012"); - ps.setString(3, "entity_1"); - ps.setString(4, "user1"); - ps.setString(5, "data1"); - ps.executeUpdate(); - - ps.setString(1, "id_1"); - ps.setString(2, "20250910"); - ps.setString(3, "entity_1b"); - ps.setString(4, "user2"); - ps.setString(5, "data2"); - ps.executeUpdate(); - - ps.setString(1, "id_2"); - ps.setString(2, "20251011"); - ps.setString(3, "entity_2"); - ps.setString(4, "user3"); - ps.setString(5, "data3"); - ps.executeUpdate(); - - ps.setString(1, "id_3"); - ps.setString(2, "20250913"); - ps.setString(3, "entity_3"); - ps.setString(4, "user4"); - ps.setString(5, "data4"); - ps.executeUpdate(); - - conn.commit(); - - // Run query with mixed AND/OR conditions - String query = "SELECT * FROM " + testTable - + " WHERE (ID = 'id_1' AND NUMBER > '20250911') OR (ID = 'id_2' AND NUMBER <= '20251012')"; - PreparedStatement statement = conn.prepareStatement(query); - ResultSet rs = statement.executeQuery(); - - // Should return: id_1 with NUMBER=20251012, and id_2 with NUMBER=20251011 - int rowCount = 0; - while (rs.next()) { - rowCount++; - String id = rs.getString("ID"); - String number = rs.getString("NUMBER"); - // Verify expected combinations - assertTrue("Unexpected row", - (id.equals("id_1") && number.equals("20251012")) || - (id.equals("id_2") && number.equals("20251011"))); - } - - assertEquals("Expected 2 rows", 2, rowCount); - } finally { - conn.close(); - } - } - - @Test - public void testKeyExplosionPartialCompositeInInteger() throws Exception { - // Variation 6: Partial composite key IN (INTEGER type) - Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); - Connection conn = DriverManager.getConnection(getUrl(), props); - String testTable = generateUniqueName(); - try { - // Create table with DESC ordering on NUMBER column - String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" - + "ID CHAR(15) NOT NULL, " - + "NUMBER INTEGER NOT NULL, " - + "ENTITY_ID VARCHAR NOT NULL, " - + "CREATED_BY VARCHAR, " - + "DATA VARCHAR " - + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; - conn.createStatement().execute(createTableDDL); - - // Insert test data - String upsert = "UPSERT INTO " + testTable - + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; - PreparedStatement ps = conn.prepareStatement(upsert); - - ps.setString(1, "id_1"); - ps.setInt(2, 20251012); - ps.setString(3, "entity_1"); - ps.setString(4, "user1"); - ps.setString(5, "data1"); - ps.executeUpdate(); - - ps.setString(1, "id_2"); - ps.setInt(2, 20250912); - ps.setString(3, "entity_2"); - ps.setString(4, "user2"); - ps.setString(5, "data2"); - ps.executeUpdate(); - - ps.setString(1, "id_3"); - ps.setInt(2, 20250913); - ps.setString(3, "entity_3"); - ps.setString(4, "user3"); - ps.setString(5, "data3"); - ps.executeUpdate(); - - ps.setString(1, "id_1"); - ps.setInt(2, 20251012); - ps.setString(3, "entity_1b"); - ps.setString(4, "user4"); - ps.setString(5, "data4"); - ps.executeUpdate(); - - conn.commit(); - - // Run query with partial composite key IN (first two columns only) - String query = "SELECT * FROM " + testTable - + " WHERE (ID, NUMBER) IN (('id_1', 20251012), ('id_2', 20250912))"; - PreparedStatement statement = conn.prepareStatement(query); - ResultSet rs = statement.executeQuery(); - - // Should return 3 rows: id_1 with 2 ENTITY_IDs at same NUMBER, id_2 with 1 ENTITY_ID - int rowCount = 0; - while (rs.next()) { - rowCount++; - String id = rs.getString("ID"); - int number = rs.getInt("NUMBER"); - // Verify the combinations - assertTrue("Unexpected row", - (id.equals("id_1") && number == 20251012) || - (id.equals("id_2") && number == 20250912)); - } - - assertEquals("Expected 3 rows", 3, rowCount); - } finally { - conn.close(); - } - } - - @Test - public void testKeyExplosionMixedAndOrInteger() throws Exception { - // Variation 8: Mixed AND/OR with ranges on DESC column (INTEGER type) - Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); - Connection conn = DriverManager.getConnection(getUrl(), props); - String testTable = generateUniqueName(); - try { - // Create table with DESC ordering on NUMBER column - String createTableDDL = "CREATE TABLE IF NOT EXISTS " + testTable + " (" - + "ID CHAR(15) NOT NULL, " - + "NUMBER INTEGER NOT NULL, " - + "ENTITY_ID VARCHAR NOT NULL, " - + "CREATED_BY VARCHAR, " - + "DATA VARCHAR " - + "CONSTRAINT PK PRIMARY KEY (ID, NUMBER DESC, ENTITY_ID))"; - conn.createStatement().execute(createTableDDL); - - // Insert test data - String upsert = "UPSERT INTO " + testTable - + " (ID, NUMBER, ENTITY_ID, CREATED_BY, DATA) VALUES (?, ?, ?, ?, ?)"; - PreparedStatement ps = conn.prepareStatement(upsert); - - ps.setString(1, "id_1"); - ps.setInt(2, 20251012); - ps.setString(3, "entity_1"); - ps.setString(4, "user1"); - ps.setString(5, "data1"); - ps.executeUpdate(); - - ps.setString(1, "id_1"); - ps.setInt(2, 20250910); - ps.setString(3, "entity_1b"); - ps.setString(4, "user2"); - ps.setString(5, "data2"); - ps.executeUpdate(); - - ps.setString(1, "id_2"); - ps.setInt(2, 20251011); - ps.setString(3, "entity_2"); - ps.setString(4, "user3"); - ps.setString(5, "data3"); - ps.executeUpdate(); - - ps.setString(1, "id_3"); - ps.setInt(2, 20250913); - ps.setString(3, "entity_3"); - ps.setString(4, "user4"); - ps.setString(5, "data4"); - ps.executeUpdate(); - - conn.commit(); - - // Run query with mixed AND/OR conditions - String query = "SELECT * FROM " + testTable - + " WHERE (ID = 'id_1' AND NUMBER > 20250911) OR (ID = 'id_2' AND NUMBER <= 20251012)"; - PreparedStatement statement = conn.prepareStatement(query); - ResultSet rs = statement.executeQuery(); - - // Should return: id_1 with NUMBER=20251012, and id_2 with NUMBER=20251011 - int rowCount = 0; - while (rs.next()) { - rowCount++; - String id = rs.getString("ID"); - int number = rs.getInt("NUMBER"); - // Verify expected combinations - assertTrue("Unexpected row", - (id.equals("id_1") && number == 20251012) || - (id.equals("id_2") && number == 20251011)); - } - - assertEquals("Expected 2 rows", 2, rowCount); - } finally { - conn.close(); - } - } - - - } diff --git a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java index 2820afe411b..6ecfc7ff011 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java @@ -7202,8 +7202,8 @@ public void testReverseVarLengthRange6916() throws Exception { String openQry = "select * from " + tableName + " where k > 'a' and k<'aaa'"; Scan openScan = getOptimizedQueryPlan(openQry, Collections.emptyList()).getContext().getScan(); -// assertEquals("\\x9E\\x9E\\x9F\\x00", Bytes.toStringBinary(openScan.getStartRow())); -// assertEquals("\\x9E\\xFF", Bytes.toStringBinary(openScan.getStopRow())); + assertEquals("\\x9E\\x9E\\x9F\\x00", Bytes.toStringBinary(openScan.getStartRow())); + assertEquals("\\x9E\\xFF", Bytes.toStringBinary(openScan.getStopRow())); ResultSet rs = stmt.executeQuery("EXPLAIN " + openQry); String explainPlan = QueryUtil.getExplainPlan(rs); assertEquals(explainExpected, explainPlan); diff --git a/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java index f6c7d82716d..33f616f189e 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java @@ -1280,8 +1280,8 @@ public void testLikeExpressionWithDescOrder() throws SQLException { byte[] invStopRow = new byte[startRow.length]; SortOrder.invert(stopRow, 0, invStopRow, 0, stopRow.length); - assertArrayEquals(startRow, lowerRange); - assertArrayEquals(stopRow, upperRange); + assertArrayEquals(invStopRow, lowerRange); + assertArrayEquals(invStartRow, upperRange); assertFalse(lowerInclusive); assertTrue(upperInclusive); @@ -3241,15 +3241,15 @@ public void testLastPkColumnIsVariableLengthAndDescBug5307() throws Exception { + "where (OBJ.OBJECT_ID, OBJ.OBJECT_VERSION) in (('obj1', '2222'),('obj2', '1111'),('obj3', '1111'))"; queryPlan = TestUtil.getOptimizeQueryPlan(conn, sql); scan = queryPlan.getContext().getScan(); -// FilterList filterList = (FilterList) scan.getFilter(); -// assertTrue(filterList.getOperator() == Operator.MUST_PASS_ALL); -// assertEquals(filterList.getFilters().size(), 2); -// assertTrue(filterList.getFilters().get(0) instanceof SkipScanFilter); -// assertTrue(filterList.getFilters().get(1) instanceof RowKeyComparisonFilter); -// RowKeyComparisonFilter rowKeyComparisonFilter = -// (RowKeyComparisonFilter) filterList.getFilters().get(1); -// assertEquals(rowKeyComparisonFilter.toString(), -// "(OBJECT_ID, OBJECT_VERSION) IN (X'6f626a3100cdcdcdcd',X'6f626a3200cececece',X'6f626a3300cececece')"); + FilterList filterList = (FilterList) scan.getFilter(); + assertTrue(filterList.getOperator() == Operator.MUST_PASS_ALL); + assertEquals(filterList.getFilters().size(), 2); + assertTrue(filterList.getFilters().get(0) instanceof SkipScanFilter); + assertTrue(filterList.getFilters().get(1) instanceof RowKeyComparisonFilter); + RowKeyComparisonFilter rowKeyComparisonFilter = + (RowKeyComparisonFilter) filterList.getFilters().get(1); + assertEquals(rowKeyComparisonFilter.toString(), + "(OBJECT_ID, OBJECT_VERSION) IN (X'6f626a3100cdcdcdcd',X'6f626a3200cececece',X'6f626a3300cececece')"); assertTrue(queryPlan.getContext().getScanRanges().isPointLookup()); assertArrayEquals(startKey, scan.getStartRow()); diff --git a/pom.xml b/pom.xml index ccae93600f6..3114d5e4a9d 100644 --- a/pom.xml +++ b/pom.xml @@ -93,7 +93,7 @@ true - 2.14.1 + 2.18.4.1 4.1.126.Final 3.5.2 From 16cc33f6de94e757dd861ef14e93bb0b74707b6a Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Sat, 9 May 2026 15:48:57 +0530 Subject: [PATCH 06/18] Repair phase implementation --- .../PhoenixSyncTableCheckpointOutputRow.java | 26 +- .../PhoenixSyncTableInputFormat.java | 16 +- .../mapreduce/PhoenixSyncTableMapper.java | 430 ++++++++++++++++-- .../PhoenixSyncTableOutputRepository.java | 20 +- .../mapreduce/PhoenixSyncTableTool.java | 38 +- .../end2end/PhoenixSyncTableToolIT.java | 132 +++++- .../ConnectionQueryServicesMetricsIT.java | 2 + .../PhoenixSyncTableOutputRepositoryTest.java | 42 +- 8 files changed, 623 insertions(+), 83 deletions(-) diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java index c848f2efaff..49cc9c4e4a0 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java @@ -37,7 +37,9 @@ public enum Type { public enum Status { VERIFIED, - MISMATCHED + MISMATCHED, + REPAIRED, + REPAIR_FAILED } private String tableName; @@ -156,19 +158,27 @@ public long getTargetRowsProcessed() { * contract to ensure consistency between formatting (in mapper) and parsing (in tests). */ public static class CounterFormatter { - private static final String FORMAT_CHUNK = "%s=%d,%s=%d"; + private static final String FORMAT_CHUNK = "%s=%d,%s=%d,%s=%d,%s=%d"; private static final String FORMAT_MAPPER = "%s=%d,%s=%d,%s=%d,%s=%d"; /** - * Formats chunk counters as comma-separated key=value pairs. - * @param sourceRows Source rows processed - * @param targetRows Target rows processed - * @return Formatted string: "SOURCE_ROWS_PROCESSED=123,TARGET_ROWS_PROCESSED=456" + * Formats chunk counters as comma-separated key=value pairs. Always emits all four + * counters; for verify-only chunks (no repair) {@code rowsPut} and {@code rowsDeleted} + * are 0 so operators querying the checkpoint table see a uniform format. + * @param sourceRows Source rows processed + * @param targetRows Target rows processed + * @param rowsPut Rows put to target during repair (0 if not repaired) + * @param rowsDeleted Rows deleted from target during repair (0 if not repaired) + * @return Formatted string: "SOURCE_ROWS_PROCESSED=...,TARGET_ROWS_PROCESSED=..., + * ROWS_PUT_TO_TARGET=...,ROWS_DELETED_FROM_TARGET=..." */ - public static String formatChunk(long sourceRows, long targetRows) { + public static String formatChunk(long sourceRows, long targetRows, long rowsPut, + long rowsDeleted) { return String.format(FORMAT_CHUNK, PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name(), sourceRows, - PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows); + PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows, + PhoenixSyncTableMapper.SyncCounters.ROWS_PUT_TO_TARGET.name(), rowsPut, + PhoenixSyncTableMapper.SyncCounters.ROWS_DELETED_FROM_TARGET.name(), rowsDeleted); } /** diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java index b2dd739c0c3..fcd61f07582 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java @@ -100,10 +100,11 @@ public List getSplits(JobContext context) throws IOException, Interr } LOGGER.info("Total splits generated {} of table {} for PhoenixSyncTable ", allSplits.size(), tableName); + boolean isDryRun = PhoenixSyncTableTool.getPhoenixSyncTableDryRun(conf); List completedRegions; try { completedRegions = - queryCompletedMapperRegions(conf, tableName, targetZkQuorum, fromTime, toTime); + queryCompletedMapperRegions(conf, tableName, targetZkQuorum, fromTime, toTime, isDryRun); } catch (SQLException e) { throw new RuntimeException(e); } @@ -136,16 +137,21 @@ public List getSplits(JobContext context) throws IOException, Interr } /** - * Queries Sync checkpoint table for completed mapper regions + * Queries Sync checkpoint table for completed mapper regions. + * + * @param isDryRun When false (repair mode), only VERIFIED and REPAIRED regions are filtered + * out as completed; MISMATCHED regions are re-entered as splits so their + * chunks can be repaired. When true (dry-run), all REGION rows regardless + * of status are treated as completed. */ private List queryCompletedMapperRegions(Configuration conf, String tableName, - String targetZkQuorum, Long fromTime, Long toTime) throws SQLException { + String targetZkQuorum, Long fromTime, Long toTime, boolean isDryRun) throws SQLException { String tenantId = PhoenixConfigurationUtil.getTenantId(conf); List completedRegions = new ArrayList<>(); try (Connection conn = ConnectionUtil.getInputConnection(conf)) { PhoenixSyncTableOutputRepository repository = new PhoenixSyncTableOutputRepository(conn); - List completedRows = - repository.getProcessedMapperRegions(tableName, targetZkQuorum, fromTime, toTime, tenantId); + List completedRows = repository.getProcessedMapperRegions( + tableName, targetZkQuorum, fromTime, toTime, tenantId, isDryRun); for (PhoenixSyncTableCheckpointOutputRow row : completedRows) { KeyRange keyRange = KeyRange.getKeyRange(row.getStartRowKey(), row.getEndRowKey()); completedRegions.add(keyRange); diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java index 65e932ae78b..9c3349464b0 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java @@ -29,8 +29,11 @@ import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; @@ -65,10 +68,16 @@ public class PhoenixSyncTableMapper public enum SyncCounters { MAPPERS_VERIFIED, MAPPERS_MISMATCHED, + MAPPERS_REPAIRED, + MAPPERS_REPAIR_FAILED, CHUNKS_VERIFIED, CHUNKS_MISMATCHED, + CHUNKS_REPAIRED, + CHUNKS_REPAIR_FAILED, SOURCE_ROWS_PROCESSED, - TARGET_ROWS_PROCESSED + TARGET_ROWS_PROCESSED, + ROWS_PUT_TO_TARGET, + ROWS_DELETED_FROM_TARGET } private String tableName; @@ -80,6 +89,7 @@ public enum SyncCounters { private long chunkSizeBytes; private boolean isRawScan; private boolean isReadAllVersions; + private int repairBatchSize; private Configuration conf; private Connection sourceConnection; private Connection targetConnection; @@ -103,6 +113,7 @@ protected void setup(Context context) throws InterruptedException { chunkSizeBytes = PhoenixSyncTableTool.getPhoenixSyncTableChunkSizeBytes(conf); isRawScan = PhoenixSyncTableTool.getPhoenixSyncTableRawScan(conf); isReadAllVersions = PhoenixSyncTableTool.getPhoenixSyncTableReadAllVersions(conf); + repairBatchSize = PhoenixSyncTableTool.getPhoenixSyncTableRepairBatchSize(conf); extractRegionBoundariesFromSplit(context); sourceConnection = ConnectionUtil.getInputConnection(conf); pTable = sourceConnection.unwrap(PhoenixConnection.class).getTable(tableName); @@ -153,7 +164,6 @@ private void connectToTargetCluster() throws SQLException, IOException { private Connection createGlobalConnection(Configuration conf) throws SQLException { Configuration globalConf = new Configuration(conf); globalConf.unset(PhoenixConfigurationUtil.MAPREDUCE_TENANT_ID); - globalConf.unset(PhoenixRuntime.CURRENT_SCN_ATTRIB); return ConnectionUtil.getInputConnection(globalConf); } @@ -176,7 +186,7 @@ protected void map(NullWritable key, DBInputFormat.NullDBWritable value, Context Bytes.toStringBinary(regionStart), Bytes.toStringBinary(regionEnd), tableName); processRegion(regionStart, regionEnd, context); } - } catch (SQLException e) { + } catch (SQLException | IOException e) { tryClosingResources(); throw new RuntimeException("Error processing PhoenixSyncTableMapper", e); } @@ -196,7 +206,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context // Get processed chunks for this specific region List processedChunks = syncTableOutputRepository.getProcessedChunks(tableName, targetZkQuorum, fromTime, toTime, - tenantId, regionStart, regionEnd); + tenantId, regionStart, regionEnd, isDryRun); // Calculate unprocessed ranges within this region List unprocessedRanges = @@ -205,6 +215,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context // Track counters before processing this region long verifiedBefore = context.getCounter(SyncCounters.CHUNKS_VERIFIED).getValue(); long mismatchedBefore = context.getCounter(SyncCounters.CHUNKS_MISMATCHED).getValue(); + long repairFailedBefore = context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); long sourceRowsBefore = context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue(); long targetRowsBefore = context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue(); @@ -221,6 +232,8 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context context.getCounter(SyncCounters.CHUNKS_VERIFIED).getValue() - verifiedBefore; long mismatchedChunks = context.getCounter(SyncCounters.CHUNKS_MISMATCHED).getValue() - mismatchedBefore; + long repairFailedChunks = + context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue() - repairFailedBefore; long sourceRowsProcessed = context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue() - sourceRowsBefore; long targetRowsProcessed = @@ -231,7 +244,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context .formatMapper(verifiedChunks, mismatchedChunks, sourceRowsProcessed, targetRowsProcessed); if (sourceRowsProcessed > 0) { recordRegionCompletion(regionStart, regionEnd, regionStartTime, regionEndTime, verifiedChunks, - mismatchedChunks, counters, context); + mismatchedChunks, repairFailedChunks, counters, context); } else { LOGGER.info( "No rows pending to process. All region boundaries are covered for startKey:{}, endKey: {}", @@ -242,39 +255,62 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context /** * Records region completion by updating counters, recording checkpoint, and logging result. * Consolidates all region completion logic to eliminate duplication. - * @param regionStart Region start key - * @param regionEnd Region end key - * @param regionStartTime Region processing start time - * @param regionEndTime Region processing end time - * @param verifiedChunks Number of verified chunks - * @param mismatchedChunks Number of mismatched chunks - * @param counters Formatted counter string - * @param context Mapper context + * @param regionStart Region start key + * @param regionEnd Region end key + * @param regionStartTime Region processing start time + * @param regionEndTime Region processing end time + * @param verifiedChunks Number of verified chunks + * @param mismatchedChunks Number of mismatched chunks + * @param repairFailedChunks Number of chunks whose repair threw an IOException; if > 0 the + * region rolls up to MISMATCHED (drift remains, re-run will retry) + * @param counters Formatted counter string + * @param context Mapper context */ private void recordRegionCompletion(byte[] regionStart, byte[] regionEnd, Timestamp regionStartTime, Timestamp regionEndTime, long verifiedChunks, long mismatchedChunks, - String counters, Context context) throws SQLException { + long repairFailedChunks, String counters, Context context) throws SQLException { - boolean isVerified = mismatchedChunks == 0; - PhoenixSyncTableCheckpointOutputRow.Status status = isVerified - ? PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED - : PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED; + // Region rolls up its child chunks' outcomes into one of four statuses: + // VERIFIED — every chunk matched; no drift in this region. + // MISMATCHED — drift was detected but repair was not attempted (dry-run mode). + // REPAIRED — drift was detected and every chunk's repair succeeded. + // REPAIR_FAILED — drift was detected, repair was attempted, and at least one chunk + // threw during merge-scan or flush. The failed chunks remain as + // CHUNK/REPAIR_FAILED rows; a re-run will re-attempt them via the + // Phase 2 STATUS-IN filter that excludes REPAIR_FAILED. + PhoenixSyncTableCheckpointOutputRow.Status status; + SyncCounters mapperCounter; + if (mismatchedChunks == 0) { + status = PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED; + mapperCounter = SyncCounters.MAPPERS_VERIFIED; + } else if (isDryRun) { + status = PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED; + mapperCounter = SyncCounters.MAPPERS_MISMATCHED; + } else if (repairFailedChunks == 0) { + status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED; + mapperCounter = SyncCounters.MAPPERS_REPAIRED; + } else { + status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED; + mapperCounter = SyncCounters.MAPPERS_REPAIR_FAILED; + } - context.getCounter(isVerified ? SyncCounters.MAPPERS_VERIFIED : SyncCounters.MAPPERS_MISMATCHED) - .increment(1); + context.getCounter(mapperCounter).increment(1); recordRegionCheckpoint(regionStart, regionEnd, status, regionStartTime, regionEndTime, counters); String logMessage = String.format( - "PhoenixSyncTable region [%s, %s) completed with %s: %d verified chunks, %d mismatched chunks", + "PhoenixSyncTable region [%s, %s) completed with %s: %d verified, %d mismatched, %d repair-failed", Bytes.toStringBinary(regionStart), Bytes.toStringBinary(regionEnd), - isVerified ? "verified" : "mismatch", verifiedChunks, mismatchedChunks); + status.name().toLowerCase(), verifiedChunks, mismatchedChunks, repairFailedChunks); - if (isVerified) { - LOGGER.info(logMessage); - } else { + if ( + status == PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED + || status == PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED + ) { LOGGER.warn(logMessage); + } else { + LOGGER.info(logMessage); } } @@ -354,10 +390,15 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd, if (nextSourceChunk == null) { isLastChunkOfRegion = true; } - ChunkInfo targetChunk = getTargetChunkWithSourceBoundary(targetConnection, - previousSourceChunk == null ? rangeStart : previousSourceChunk.endKey, - isLastChunkOfRegion ? rangeEnd : sourceChunk.endKey, isTargetStartKeyInclusive, - !isLastChunkOfRegion); + // Target scan boundary: covers extra-on-target rows that fall before the first + // source chunk, between consecutive source chunks, or after the last. Both verify + // and repair use the same range so repair sees the same cells the verifier hashed. + byte[] targetStart = + previousSourceChunk == null ? rangeStart : previousSourceChunk.endKey; + byte[] targetEnd = isLastChunkOfRegion ? rangeEnd : sourceChunk.endKey; + boolean targetEndInclusive = !isLastChunkOfRegion; + ChunkInfo targetChunk = getTargetChunkWithSourceBoundary(targetConnection, targetStart, + targetEnd, isTargetStartKeyInclusive, targetEndInclusive); context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).increment(sourceChunk.rowCount); context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).increment(targetChunk.rowCount); boolean matched = MessageDigest.isEqual(sourceChunk.hash, targetChunk.hash); @@ -367,22 +408,25 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd, + "isTargetEndKeyInclusive: {}, isFirstChunkOfRegion: {}, isLastChunkOfRegion: {}." + "Chunk comparison source {}, {}. Key range passed to target chunk: {}, {}." + "target chunk returned {}, {}: source={} rows, target={} rows, matched={}", - isSourceStartKeyInclusive, isTargetStartKeyInclusive, !isLastChunkOfRegion, + isSourceStartKeyInclusive, isTargetStartKeyInclusive, targetEndInclusive, previousSourceChunk == null, isLastChunkOfRegion, Bytes.toStringBinary(sourceChunk.startKey), Bytes.toStringBinary(sourceChunk.endKey), - Bytes.toStringBinary( - previousSourceChunk == null ? rangeStart : previousSourceChunk.endKey), - Bytes.toStringBinary(isLastChunkOfRegion ? rangeEnd : sourceChunk.endKey), + Bytes.toStringBinary(targetStart), Bytes.toStringBinary(targetEnd), Bytes.toStringBinary(targetChunk.startKey), Bytes.toStringBinary(targetChunk.endKey), sourceChunk.rowCount, targetChunk.rowCount, matched); } sourceChunk.executionEndTime = new Timestamp(System.currentTimeMillis()); String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter - .formatChunk(sourceChunk.rowCount, targetChunk.rowCount); + .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L); if (matched) { handleVerifiedChunk(sourceChunk, context, counters); } else { handleMismatchedChunk(sourceChunk, context, counters); + if (!isDryRun) { + repairChunk(sourceChunk.startKey, sourceChunk.endKey, targetStart, targetEnd, + isTargetStartKeyInclusive, targetEndInclusive, sourceChunk.rowCount, + targetChunk.rowCount, sourceChunk.executionStartTime, context); + } } previousSourceChunk = sourceChunk; sourceChunk = nextSourceChunk; @@ -556,11 +600,13 @@ private void recordChunkCheckpoint(ChunkInfo sourceChunk, } /** - * Creates an HBase scan for a chunk range. Can be configured to use raw scan mode and read all - * cell versions based on command-line options. + * Builds the common Scan shape used by both verification and repair: same key range, + * inclusivity, time window, raw-scan, and all-versions semantics. Callers layer on their + * own caching, limits, and coprocessor attributes. Keeping the base shared guarantees that + * the cells visited by repair are exactly the cells the verifier hashed. */ - private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, - boolean isEndKeyInclusive, boolean isTargetScan) throws IOException { + private Scan createBaseScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, + boolean isEndKeyInclusive) throws IOException { Scan scan = new Scan(); scan.withStartRow(startKey, isStartKeyInclusive); scan.withStopRow(endKey, isEndKeyInclusive); @@ -570,6 +616,16 @@ private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyI } scan.setCacheBlocks(false); scan.setTimeRange(fromTime, toTime); + return scan; + } + + /** + * Creates an HBase scan for a chunk range. Can be configured to use raw scan mode and read all + * cell versions based on command-line options. + */ + private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, + boolean isEndKeyInclusive, boolean isTargetScan) throws IOException { + Scan scan = createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive); // Set limit and caching to 1 for sequential partial digest retrieval from target. // Enables digest continuation: each target chunk's digest feeds into the next until scanning // completes @@ -691,6 +747,304 @@ boolean shouldStartKeyBeInclusive(byte[] mapperRegionStart, return Bytes.compareTo(processedChunks.get(0).getStartRowKey(), mapperRegionStart) > 0; } + /** + * Builds a row-level HBase scan for repair. Differs from {@link #createChunkScan} in that it + * does NOT set {@code SYNC_TABLE_CHUNK_FORMATION} or {@code SYNC_TABLE_CHUNK_SIZE_BYTES}, so + * the scanner returns actual {@link Result} rows rather than coprocessor chunk metadata. + * Shares the {@link #createBaseScan} core (time range, raw-scan, all-versions, inclusivity) + * with verification so the cells visited here are the same cells that produced the chunk + * hash. Adds bulk caching plus Phoenix TTL / {@code IS_STRICT_TTL} attributes. + */ + private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, + boolean isEndKeyInclusive, PhoenixConnection phoenixConn) throws IOException, SQLException { + Scan scan = createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive); + scan.setCaching(1000); + ScanUtil.setScanAttributesForPhoenixTTL(scan, pTable, phoenixConn); + scan.setAttribute(BaseScannerRegionObserverConstants.IS_STRICT_TTL, TRUE_BYTES); + return scan; + } + + /** + * Returns true if both Results carry identical cell arrays. Walks the two arrays in + * lock-step using {@link CellComparator} for coordinate ordering and {@link CellUtil#matchingValue} + * for value equality (CellComparator does not compare values). Both scanners use the same + * configuration on byte-ordered storage, so cells come back in canonical KV order. + */ + private boolean rowCellsEqual(Result sourceResult, Result targetResult) { + Cell[] sourceCells = sourceResult.rawCells(); + Cell[] targetCells = targetResult.rawCells(); + if (sourceCells.length != targetCells.length) { + return false; + } + CellComparator comparator = CellComparator.getInstance(); + for (int i = 0; i < sourceCells.length; i++) { + if ( + comparator.compare(sourceCells[i], targetCells[i]) != 0 + || !CellUtil.matchingValue(sourceCells[i], targetCells[i]) + ) { + return false; + } + } + return true; + } + + /** + * Builds a Put preserving the original cell timestamps from the source row. Used when an + * entire row is missing on target. + */ + private Put buildPutFromResult(Result result) throws IOException { + Put put = new Put(result.getRow()); + for (Cell cell : result.rawCells()) { + put.add(cell); + } + return put; + } + + /** + * Builds a Delete for every cell in the target row. The repair scan's time range + * {@code [fromTime, toTime]} guarantees the cells we read here are within the sync window; + * cells outside the window are never read, therefore never deleted. + */ + private Delete buildDeleteFromResult(Result result) { + Delete delete = new Delete(result.getRow()); + for (Cell cell : result.rawCells()) { + delete.addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell), + cell.getTimestamp()); + } + return delete; + } + + /** + * Computes cell-level diffs for a row that exists on both clusters but with a different + * cell set. Cells present in source but missing from target are added to a single Put for + * the row; cells present in target but missing from source are added to a single Delete. + */ + private void addRepairMutations(Result sourceResult, Result targetResult, + List pendingPuts, List pendingDeletes) throws IOException { + byte[] rowKey = sourceResult.getRow(); + Cell[] sourceCells = sourceResult.rawCells(); + Cell[] targetCells = targetResult.rawCells(); + CellComparator comparator = CellComparator.getInstance(); + + Put put = null; + Delete delete = null; + + int sourceIdx = 0; + int targetIdx = 0; + while (sourceIdx < sourceCells.length && targetIdx < targetCells.length) { + Cell sourceCell = sourceCells[sourceIdx]; + Cell targetCell = targetCells[targetIdx]; + int cmp = comparator.compare(sourceCell, targetCell); + if (cmp == 0) { + // Same coordinates (family/qualifier/timestamp/type) on both sides. CellComparator + // does NOT compare values, so check value separately. If values differ, Put the + // source cell — it overwrites the target cell at the same timestamp. + if (!CellUtil.matchingValue(sourceCell, targetCell)) { + if (put == null) { + put = new Put(rowKey); + } + put.add(sourceCell); + } + sourceIdx++; + targetIdx++; + } else if (cmp < 0) { + // Source-only cell, Put on target. + if (put == null) { + put = new Put(rowKey); + } + put.add(sourceCell); + sourceIdx++; + } else { + // Target-only cell, Delete from target. + if (delete == null) { + delete = new Delete(rowKey); + } + delete.addColumn(CellUtil.cloneFamily(targetCell), CellUtil.cloneQualifier(targetCell), + targetCell.getTimestamp()); + targetIdx++; + } + } + // Tail: any remaining source cells are source-only. + while (sourceIdx < sourceCells.length) { + if (put == null) { + put = new Put(rowKey); + } + put.add(sourceCells[sourceIdx++]); + } + // Tail: any remaining target cells are target-only. + while (targetIdx < targetCells.length) { + Cell targetCell = targetCells[targetIdx++]; + if (delete == null) { + delete = new Delete(rowKey); + } + delete.addColumn(CellUtil.cloneFamily(targetCell), CellUtil.cloneQualifier(targetCell), + targetCell.getTimestamp()); + } + + if (put != null) { + pendingPuts.add(put); + } + if (delete != null) { + pendingDeletes.add(delete); + } + } + + /** + * Flushes the accumulated Put and Delete batches to the target HTable and clears both + * lists. Called every {@code repairBatchSize} rows and once more at the end of a chunk. + */ + private void flushRepairMutations(Table targetHTable, List puts, List deletes) + throws IOException { + if (!puts.isEmpty()) { + targetHTable.put(puts); + puts.clear(); + } + if (!deletes.isEmpty()) { + targetHTable.delete(deletes); + deletes.clear(); + } + } + + /** + * Performs row-level repair for a mismatched chunk by merge-scanning source and target + * cluster data and applying targeted mutations to target. The two scan ranges may differ: + * the verifier reads target over a wider range than source (covers extra-on-target rows + * that fall between consecutive source chunks); repair must mirror the same boundaries so + * those extras are visible here as {@code cmp > 0} rows and get deleted. + * + * Merge-scan contract: both scanners return rows in ascending key order (HBase guarantee). + * cmp == 0 (same row): compare cells; repair if different. + * cmp < 0 (source-only): Put all source cells. + * cmp > 0 (target-only): Delete target cells within [fromTime, toTime]. + * + * Cells outside [fromTime, toTime] are never read (scan time range), so never mutated. + * + * Only called when isDryRun == false. + * + * @param sourceStart Source chunk start key (also the checkpoint PK) — inclusive + * @param sourceEnd Source chunk end key (also the checkpoint PK) — inclusive + * @param targetStart Target scan start (matches verifier-side boundary) + * @param targetEnd Target scan end (matches verifier-side boundary) + * @param targetStartInclusive Inclusivity of target scan start — matches verify side + * @param targetEndInclusive Inclusivity of target scan end — matches verify side + * @param verifyStartTime When the verify pass began for this chunk; reused as the + * REPAIRED row's START_TIME so the row spans the full + * verify+repair lifecycle that overwrites the MISMATCHED row. + */ + private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStart, + byte[] targetEnd, boolean targetStartInclusive, boolean targetEndInclusive, + long verifySourceRows, long verifyTargetRows, Timestamp verifyStartTime, Context context) + throws IOException, SQLException { + long rowsPut = 0; + long rowsDeleted = 0; + + LOGGER.info( + "Starting repair for chunk source=[{}, {}] target=[{}{}, {}{} on table {}", + Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), + targetStartInclusive ? "[" : "(", Bytes.toStringBinary(targetStart), + Bytes.toStringBinary(targetEnd), targetEndInclusive ? "]" : ")", tableName); + + PhoenixConnection sourcePhoenixConn = sourceConnection.unwrap(PhoenixConnection.class); + PhoenixConnection targetPhoenixConn = targetConnection.unwrap(PhoenixConnection.class); + + Scan sourceScan = createRepairScan(sourceStart, sourceEnd, true, true, sourcePhoenixConn); + Scan targetScan = createRepairScan(targetStart, targetEnd, targetStartInclusive, + targetEndInclusive, targetPhoenixConn); + + List pendingPuts = new ArrayList<>(); + List pendingDeletes = new ArrayList<>(); + + try (Table sourceHTable = sourcePhoenixConn.getQueryServices().getTable(physicalTableName); + Table targetHTable = targetPhoenixConn.getQueryServices().getTable(physicalTableName); + ResultScanner sourceScanner = sourceHTable.getScanner(sourceScan); + ResultScanner targetScanner = targetHTable.getScanner(targetScan)) { + + Result sourceResult = sourceScanner.next(); + Result targetResult = targetScanner.next(); + + while (sourceResult != null || targetResult != null) { + int cmp; + if (sourceResult == null) { + cmp = 1; + } else if (targetResult == null) { + cmp = -1; + } else { + cmp = Bytes.compareTo(sourceResult.getRow(), targetResult.getRow()); + } + + if (cmp == 0) { + // Same row key on both clusters — diff at cell level and repair only if cells differ. + if (!rowCellsEqual(sourceResult, targetResult)) { + addRepairMutations(sourceResult, targetResult, pendingPuts, pendingDeletes); + rowsPut++; + } + sourceResult = sourceScanner.next(); + targetResult = targetScanner.next(); + } else if (cmp < 0) { + // Source-only row (target is missing it, or target has advanced past) — Put it on target. + pendingPuts.add(buildPutFromResult(sourceResult)); + rowsPut++; + sourceResult = sourceScanner.next(); + } else { + // Target-only row (source has no such row in this range) — Delete it from target. + pendingDeletes.add(buildDeleteFromResult(targetResult)); + rowsDeleted++; + targetResult = targetScanner.next(); + } + + if (pendingPuts.size() + pendingDeletes.size() >= repairBatchSize) { + flushRepairMutations(targetHTable, pendingPuts, pendingDeletes); + } + context.progress(); + } + flushRepairMutations(targetHTable, pendingPuts, pendingDeletes); + } catch (IOException e) { + // Per-chunk fault isolation. Mark this chunk REPAIR_FAILED, increment the counter, + // and return so the mapper continues with the next chunk. Phase 2's STATUS filter + // (VERIFIED, REPAIRED) excludes REPAIR_FAILED, so a re-run will re-attempt this chunk + // as an unprocessed gap. + LOGGER.error("Repair failed for chunk source=[{}, {}] on table {}: {}", + Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), tableName, + e.getMessage(), e); + context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).increment(1); + + Timestamp failedAt = new Timestamp(System.currentTimeMillis()); + // Capture partial progress in the COUNTERS column for triage. + String failedCounters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter + .formatChunk(verifySourceRows, verifyTargetRows, rowsPut, rowsDeleted); + syncTableOutputRepository + .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetZkQuorum) + .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime) + .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun) + .setStartRowKey(sourceStart).setEndRowKey(sourceEnd) + .setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED) + .setExecutionStartTime(verifyStartTime).setExecutionEndTime(failedAt) + .setCounters(failedCounters).build()); + return; + } + + context.getCounter(SyncCounters.ROWS_PUT_TO_TARGET).increment(rowsPut); + context.getCounter(SyncCounters.ROWS_DELETED_FROM_TARGET).increment(rowsDeleted); + context.getCounter(SyncCounters.CHUNKS_REPAIRED).increment(1); + + Timestamp repairEndTime = new Timestamp(System.currentTimeMillis()); + String repairCounters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter + .formatChunk(verifySourceRows, verifyTargetRows, rowsPut, rowsDeleted); + + syncTableOutputRepository + .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetZkQuorum) + .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime) + .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun).setStartRowKey(sourceStart) + .setEndRowKey(sourceEnd).setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED) + .setExecutionStartTime(verifyStartTime).setExecutionEndTime(repairEndTime) + .setCounters(repairCounters).build()); + + LOGGER.info("Completed repair for chunk source=[{}, {}]: rowsPut={}, rowsDeleted={}", + Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), rowsPut, rowsDeleted); + } + @Override protected void cleanup(Context context) throws IOException, InterruptedException { tryClosingResources(); diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepository.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepository.java index fddfea32ddd..29176683c2a 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepository.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepository.java @@ -120,16 +120,18 @@ public void checkpointSyncTableResult(PhoenixSyncTableCheckpointOutputRow row) * @param fromTime Start timestamp * @param toTime End timestamp * @param tenantId Tenant ID + * @param isDryRun When false (repair mode) * @return List of completed mapper regions */ public List getProcessedMapperRegions(String tableName, - String targetCluster, Long fromTime, Long toTime, String tenantId) throws SQLException { + String targetCluster, Long fromTime, Long toTime, String tenantId, boolean isDryRun) + throws SQLException { StringBuilder queryBuilder = new StringBuilder(); queryBuilder.append("SELECT START_ROW_KEY, END_ROW_KEY FROM ") .append(SYNC_TABLE_CHECKPOINT_TABLE_NAME) .append(" WHERE TABLE_NAME = ? AND TARGET_CLUSTER = ?") - .append(" AND TYPE = ? AND FROM_TIME = ? AND TO_TIME = ? "); + .append(" AND TYPE = 'REGION' AND FROM_TIME = ? AND TO_TIME = ? "); // Conditionally build TENANT_ID clause based on whether tenantId is null if (tenantId == null) { @@ -138,6 +140,11 @@ public List getProcessedMapperRegions(Strin queryBuilder.append(" AND TENANT_ID = ?"); } + // In repair mode: only skip regions that are fully done (VERIFIED or REPAIRED). + if (!isDryRun) { + queryBuilder.append(" AND STATUS IN ('VERIFIED', 'REPAIRED')"); + } + queryBuilder.append( " ORDER BY TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY"); @@ -146,7 +153,6 @@ public List getProcessedMapperRegions(Strin int paramIndex = 1; ps.setString(paramIndex++, tableName); ps.setString(paramIndex++, targetCluster); - ps.setString(paramIndex++, Type.REGION.name()); ps.setLong(paramIndex++, fromTime); ps.setLong(paramIndex++, toTime); // Only bind tenantId parameter if non-null @@ -175,11 +181,12 @@ public List getProcessedMapperRegions(Strin * @param tenantId Tenant ID * @param mapperRegionStart Mapper region start key * @param mapperRegionEnd Mapper region end key + * @param isDryRun When false (repair mode) * @return List of processed chunks in the region */ public List getProcessedChunks(String tableName, String targetCluster, Long fromTime, Long toTime, String tenantId, byte[] mapperRegionStart, - byte[] mapperRegionEnd) throws SQLException { + byte[] mapperRegionEnd, boolean isDryRun) throws SQLException { StringBuilder queryBuilder = new StringBuilder(); queryBuilder.append("SELECT START_ROW_KEY, END_ROW_KEY FROM " + SYNC_TABLE_CHECKPOINT_TABLE_NAME + " WHERE TABLE_NAME = ? AND TARGET_CLUSTER = ? " @@ -192,6 +199,11 @@ public List getProcessedChunks(String table queryBuilder.append(" AND TENANT_ID = ?"); } + // In repair mode: only skip chunks that are fully done (VERIFIED or REPAIRED). + if (!isDryRun) { + queryBuilder.append(" AND STATUS IN ('VERIFIED', 'REPAIRED')"); + } + // Check if mapper region boundaries are non-empty (i.e., NOT first/last regions) // Only add boundary conditions for non-empty boundaries boolean hasEndBoundary = mapperRegionEnd != null && mapperRegionEnd.length > 0; diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java index 80eacde25e7..7cb3f034138 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java @@ -134,6 +134,9 @@ public class PhoenixSyncTableTool extends Configured implements Tool { public static final String PHOENIX_SYNC_TABLE_RAW_SCAN = "phoenix.sync.table.raw.scan"; public static final String PHOENIX_SYNC_TABLE_READ_ALL_VERSIONS = "phoenix.sync.table.read.all.versions"; + public static final String PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE = + "phoenix.sync.table.repair.batch.size"; + public static final int DEFAULT_PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE = 1000; private String schemaName; private String tableName; @@ -233,7 +236,6 @@ private void setPhoenixSyncTableToolConfiguration(Configuration configuration) { if (tenantId != null) { PhoenixConfigurationUtil.setTenantId(configuration, tenantId); } - PhoenixConfigurationUtil.setCurrentScnValue(configuration, endTime); configuration .setBooleanIfUnset(PhoenixConfigurationUtil.MAPREDUCE_RANDOMIZE_MAPPER_EXECUTION_ORDER, true); } @@ -411,20 +413,35 @@ private boolean submitPhoenixSyncTableJob() throws Exception { counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_VERIFIED).getValue(); long mismatchedMappers = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_MISMATCHED).getValue(); + long repairedMappers = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIRED).getValue(); + long repairFailedMappers = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIR_FAILED).getValue(); long chunksVerified = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_VERIFIED).getValue(); long chunksMismatched = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_MISMATCHED).getValue(); + long chunksRepaired = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIRED).getValue(); + long chunksRepairFailed = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); long sourceRowsProcessed = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED).getValue(); long targetRowsProcessed = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED).getValue(); + long rowsPutToTarget = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_PUT_TO_TARGET).getValue(); + long rowsDeletedFromTarget = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_DELETED_FROM_TARGET).getValue(); LOGGER.info( "PhoenixSyncTable job completed, gathered counters are \n Task Created: {}, \n Verified Mappers: {}, \n" - + "Mismatched Mappers: {}, \n Chunks Verified: {}, \n" - + "Chunks Mismatched: {}, \n Source Rows Processed: {}, \n Target Rows Processed: {}", - taskCreated, verifiedMappers, mismatchedMappers, chunksVerified, chunksMismatched, - sourceRowsProcessed, targetRowsProcessed); + + "Mismatched Mappers: {}, \n Repaired Mappers: {}, \n Repair Failed Mappers: {}, \n" + + "Chunks Verified: {}, \n Chunks Mismatched: {}, \n Chunks Repaired: {}, \n" + + "Chunks Repair Failed: {}, \n Source Rows Processed: {}, \n Target Rows Processed: {}, \n" + + "Rows Put To Target: {}, \n Rows Deleted From Target: {}", + taskCreated, verifiedMappers, mismatchedMappers, repairedMappers, repairFailedMappers, + chunksVerified, chunksMismatched, chunksRepaired, chunksRepairFailed, sourceRowsProcessed, + targetRowsProcessed, rowsPutToTarget, rowsDeletedFromTarget); } else { LOGGER.warn("Unable to retrieve job counters for table {} - job may have failed " + "during initialization", qTable); @@ -558,6 +575,17 @@ public static boolean getPhoenixSyncTableSplitCoalescing(Configuration conf) { DEFAULT_PHOENIX_SYNC_TABLE_SPLIT_COALESCING); } + public static void setPhoenixSyncTableRepairBatchSize(Configuration conf, int batchSize) { + Preconditions.checkNotNull(conf); + conf.setInt(PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE, batchSize); + } + + public static int getPhoenixSyncTableRepairBatchSize(Configuration conf) { + Preconditions.checkNotNull(conf); + return conf.getInt(PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE, + DEFAULT_PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE); + } + public Job getJob() { return job; } diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java index 0661973dbe7..57321b4ce9f 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java @@ -55,6 +55,7 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskCounter; +import org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants; import org.apache.phoenix.jdbc.HighAvailabilityTestingUtility.HBaseTestingUtilityPair; import org.apache.phoenix.jdbc.PhoenixConnection; import org.apache.phoenix.jdbc.PhoenixDriver; @@ -155,7 +156,17 @@ public void testSyncTableValidateWithDataDifference() throws Exception { introduceAndVerifyTargetDifferences(uniqueTableName); - Job job = runSyncToolWithLargeChunks(uniqueTableName); + // Pin the time window so both runs share the same checkpoint PK + // (TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY). + // Without this, runSyncToolWithLargeChunks would assign a fresh System.currentTimeMillis() + // to --to-time on each call and the repair pass would append new rows instead of + // overwriting the dry-run pass's MISMATCHED rows. + String fromTime = "0"; + String toTime = String.valueOf(System.currentTimeMillis()); + + // First run: --dry-run, only detect mismatches. + Job job = runSyncToolWithLargeChunks(uniqueTableName, "--dry-run", "--from-time", fromTime, + "--to-time", toTime); SyncCountersResult counters = getSyncCounters(job); validateSyncCounters(counters, 10, 10, 1, 3); @@ -166,6 +177,60 @@ public void testSyncTableValidateWithDataDifference() throws Exception { queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); validateCheckpointEntries(checkpointEntries, uniqueTableName, targetZkQuorum, 10, 10, 1, 3, 4, 3, null); + + // Second run: no --dry-run, repair the mismatched chunks. Same time window so the + // PK matches and CHUNK/REPAIRED overwrites CHUNK/MISMATCHED in place. + Job repairJob = + runSyncToolWithLargeChunks(uniqueTableName, "--from-time", fromTime, "--to-time", toTime); + Counters repairCounters = repairJob.getCounters(); + + // The repair re-verifies the previously MISMATCHED chunks (excluded by Phase 2 filter) + // and now repairs them, producing CHUNK/REPAIRED + REGION/REPAIRED checkpoint rows. + long chunksRepaired = repairCounters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue(); + long chunksRepairFailed = + repairCounters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); + long mappersRepaired = repairCounters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue(); + long mappersRepairFailed = + repairCounters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue(); + long rowsPutToTarget = repairCounters.findCounter(SyncCounters.ROWS_PUT_TO_TARGET).getValue(); + long rowsDeletedFromTarget = + repairCounters.findCounter(SyncCounters.ROWS_DELETED_FROM_TARGET).getValue(); + assertEquals("All 3 mismatched chunks should be repaired", 3, chunksRepaired); + assertEquals("No chunk repair should fail", 0, chunksRepairFailed); + assertEquals("All 3 mismatched mapper regions should be repaired", 3, mappersRepaired); + assertEquals("No mapper repair should fail", 0, mappersRepairFailed); + assertEquals("Three rows on target should be Put back to source values", 3, rowsPutToTarget); + assertEquals("No rows should be deleted from target (only value diffs, not extras)", 0, + rowsDeletedFromTarget); + + // Target rows should now match source. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + + // Checkpoint table now has 3 CHUNK/REPAIRED + 3 REGION/REPAIRED in addition to the + // VERIFIED rows. Phase 2's STATUS-IN filter caused the MISMATCHED rows to be re-entered + // as gaps and repair overwrote them in place. + List postRepairEntries = + queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + int chunkRepairedRows = 0; + int regionRepairedRows = 0; + int mismatchedRows = 0; + for (PhoenixSyncTableCheckpointOutputRow entry : postRepairEntries) { + if (PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED.equals(entry.getStatus())) { + if (PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())) { + chunkRepairedRows++; + } else if (PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())) { + regionRepairedRows++; + } + } else if ( + PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus()) + ) { + mismatchedRows++; + } + } + assertEquals("Expected 3 CHUNK/REPAIRED rows after repair", 3, chunkRepairedRows); + assertEquals("Expected 3 REGION/REPAIRED rows after repair", 3, regionRepairedRows); + assertEquals("MISMATCHED rows should be overwritten in place — none should remain", 0, + mismatchedRows); } @Test @@ -1771,6 +1836,69 @@ public void testSyncTableValidateWithSplitCoalescing() throws Exception { 3, null); } + /** + * Verifies that the sync job completes successfully when {@code endTime} (--to-time) is older + * than {@code phoenix.max.lookback.age.seconds}. + * + *

Root cause without fix: {@link PhoenixSyncTableTool} sets {@code CURRENT_SCN_VALUE = + * endTime} in the MR job configuration. During split generation, {@code + * PhoenixInputFormat.getQueryPlan()} creates a Phoenix connection with that SCN. {@code + * QueryCompiler.verifySCN()} (client-side) then throws {@code ERROR 538} when {@code endTime} is + * older than {@code phoenix.max.lookback.age.seconds}. + * + *

Fix: {@link PhoenixSyncTableInputFormat#getQueryPlan} overrides the parent to strip {@code + * CURRENT_SCN_VALUE} before creating the query plan for split generation. With SCN absent, {@code + * verifySCN()} returns early (SCN == null), so no exception is thrown. + * + *

Data access correctness: The mapper uses raw HBase {@code Scan.setTimeRange(fromTime, + * toTime)}, which does NOT go through {@code QueryCompiler.compile()} or {@code verifySCN()}, so + * data within [fromTime, toTime] is always accessible regardless of max lookback age. + */ + @Test + public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exception { + // Setup: create tables on both clusters and replicate 10 rows + createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName); + insertTestData(sourceConnection, uniqueTableName, 1, 10); + waitForReplication(targetConnection, uniqueTableName, 10); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + + // Capture toTime BEFORE the lookback window will expire + long toTime = System.currentTimeMillis(); + + // Configure a short max lookback age (5 seconds) in the MR job configuration. + // QueryCompiler.verifySCN() reads PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY from + // conn.getQueryServices().getConfiguration(), which is the client-side MR conf. + long maxLookbackAgeSeconds = 5; + Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); + conf.setLong(BaseScannerRegionObserverConstants.PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY, + maxLookbackAgeSeconds); + + // Wait until toTime is older than the lookback age. + // After this sleep: (now - maxLookbackAgeMillis) > toTime → verifySCN would throw ERROR 538 + Thread.sleep((maxLookbackAgeSeconds + 2) * 1000L); + + // Run the sync tool with the now-stale toTime. + // Without PhoenixSyncTableInputFormat.getQueryPlan() override: + // getSplits() → PhoenixInputFormat.getQueryPlan() sets SCN=toTime on Phoenix connection + // → QueryCompiler.verifySCN() → ERROR 538 (toTime older than maxLookbackAge) + // With the fix: + // getSplits() → overridden getQueryPlan() strips CURRENT_SCN_VALUE from conf copy + // → verifySCN() sees scn == null → returns early → no exception thrown + // The mapper still uses raw HBase Scan.setTimeRange(0, toTime), bypassing verifySCN entirely, + // so all 10 rows within [0, toTime] are accessible and compared correctly. + Job job = runSyncToolWithChunkSize(uniqueTableName, 1, conf, "--from-time", "0", "--to-time", + String.valueOf(toTime)); + + assertTrue( + "Sync job should complete successfully even when endTime is older than maxLookbackAge", + job.isSuccessful()); + + // Verify the mapper processed all 10 rows via raw HBase scan (bypasses verifySCN) + SyncCountersResult counters = getSyncCounters(job); + validateSyncCounters(counters, 10, 10, 10, 0); + validateMapperCounters(counters, 4, 0); + } + /** * Helper class to hold separated mapper and chunk entries. */ @@ -1857,7 +1985,7 @@ private List findChunksBelongingToMapper(Co PhoenixSyncTableCheckpointOutputRow mapper) throws SQLException { PhoenixSyncTableOutputRepository repository = new PhoenixSyncTableOutputRepository(conn); return repository.getProcessedChunks(tableName, targetCluster, fromTime, toTime, tenantId, - mapper.getStartRowKey(), mapper.getEndRowKey()); + mapper.getStartRowKey(), mapper.getEndRowKey(), true); } /** diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java index 3b7fec50569..673b3f0cbc0 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java @@ -20,12 +20,14 @@ import static org.apache.phoenix.hbase.index.IndexRegionObserver.PHOENIX_INDEX_CDC_CONSUMER_ENABLED; import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS; import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; import static org.apache.phoenix.query.QueryServices.CLIENT_CONNECTION_MAX_ALLOWED_CONNECTIONS; import static org.apache.phoenix.query.QueryServices.CONNECTION_QUERY_SERVICE_METRICS_ENABLED; import static org.apache.phoenix.query.QueryServices.INTERNAL_CONNECTION_MAX_ALLOWED_CONNECTIONS; import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME; import static org.apache.phoenix.util.PhoenixRuntime.clearAllConnectionQueryServiceMetrics; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java index dfdaabecd27..2c2b05c2c6b 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java @@ -148,7 +148,7 @@ public void testCheckpointMapperRegionVerified() throws Exception { .setCounters("SOURCE_ROWS_PROCESSED=10,TARGET_ROWS_PROCESSED=10").build()); List results = - repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null); + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true); assertEquals(1, results.size()); assertArrayEquals(startKey, results.get(0).getStartRowKey()); @@ -171,7 +171,7 @@ public void testCheckpointChunkVerified() throws Exception { .build()); List results = repository.getProcessedChunks(tableName, - targetCluster, 0L, 1000L, null, mapperStart, mapperEnd); + targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); assertFalse("Should find chunk within mapper region", results.isEmpty()); } @@ -199,7 +199,7 @@ public void testCheckpointWithEmptyStartKey() throws Exception { .build()); List results = - repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null); + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true); assertEquals(2, results.size()); // Phoenix returns null for empty byte arrays in primary key columns @@ -222,7 +222,7 @@ public void testCheckpointWithNullEndKey() throws Exception { .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build()); List results = - repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null); + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true); assertEquals(1, results.size()); } @@ -348,7 +348,7 @@ public void testCheckpointUpsertBehavior() throws Exception { // Verify only one row exists with updated values List results = - repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null); + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true); assertEquals("Should have only one row after upsert", 1, results.size()); } @@ -358,7 +358,7 @@ public void testGetProcessedMapperRegionsEmpty() throws Exception { String tableName = generateUniqueName(); List results = - repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null); + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true); assertEquals(0, results.size()); } @@ -386,7 +386,7 @@ public void testGetProcessedMapperRegionsBoth() throws Exception { .build()); List results = - repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null); + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true); assertEquals(2, results.size()); // Verify ORDER BY START_ROW_KEY: row1 should come before row50 @@ -416,7 +416,7 @@ public void testGetProcessedMapperRegionsFiltersChunks() throws Exception { .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build()); List results = - repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null); + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true); assertEquals("Should only return REGION entries", 1, results.size()); } @@ -437,7 +437,7 @@ public void testGetProcessedMapperRegionsFiltersTimeRange() throws Exception { // Query with different time range List results = - repository.getProcessedMapperRegions(tableName, targetCluster, 2000L, 3000L, null); + repository.getProcessedMapperRegions(tableName, targetCluster, 2000L, 3000L, null, true); assertEquals("Should not find entry with different time range", 0, results.size()); } @@ -449,7 +449,7 @@ public void testGetProcessedChunksEmpty() throws Exception { byte[] mapperEnd = Bytes.toBytes("row100"); List results = repository.getProcessedChunks(tableName, - targetCluster, 0L, 1000L, null, mapperStart, mapperEnd); + targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); assertEquals(0, results.size()); } @@ -479,7 +479,7 @@ public void testGetProcessedChunksBothStatuses() throws Exception { .build()); List results = repository.getProcessedChunks(tableName, - targetCluster, 0L, 1000L, null, mapperStart, mapperEnd); + targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); assertEquals(2, results.size()); // Verify ORDER BY START_ROW_KEY: row10 should come before row30 @@ -512,7 +512,7 @@ public void testGetProcessedChunksFiltersMapperRegions() throws Exception { .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build()); List results = repository.getProcessedChunks(tableName, - targetCluster, 0L, 1000L, null, mapperStart, mapperEnd); + targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); assertEquals("Should only return CHUNK entries", 1, results.size()); } @@ -532,7 +532,7 @@ public void testGetProcessedChunksWithNoBoundaries() throws Exception { // Query with no boundaries (entire table) List results = - repository.getProcessedChunks(tableName, targetCluster, 0L, 1000L, null, null, null); + repository.getProcessedChunks(tableName, targetCluster, 0L, 1000L, null, null, null, true); assertEquals(1, results.size()); } @@ -575,7 +575,7 @@ public void testGetProcessedChunksWithOnlyEndBoundary() throws Exception { byte[] mapperEnd = Bytes.toBytes("row50"); List results = repository.getProcessedChunks(tableName, - targetCluster, 0L, 1000L, null, mapperStart, mapperEnd); + targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); // Should return chunk1 and chunk2 (both have startKey <= row50) // Should NOT return chunk3 (startKey row60 > row50) @@ -622,7 +622,7 @@ public void testGetProcessedChunksWithOnlyStartBoundary() throws Exception { byte[] mapperEnd = new byte[0]; // Empty end key (last region) List results = repository.getProcessedChunks(tableName, - targetCluster, 0L, 1000L, null, mapperStart, mapperEnd); + targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); // Should return chunk2 and chunk3 (both have endKey >= row30) // Should NOT return chunk1 (endKey row20 < row30) @@ -784,17 +784,17 @@ public void testCheckpointMapperRegionWithTenantId() throws Exception { // Query for TENANT_001 - should return only TENANT_001's checkpoint List results1 = - repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, tenantId1); + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, tenantId1, true); assertEquals("TENANT_001 should have 1 checkpoint", 1, results1.size()); // Query for TENANT_002 - should return only TENANT_002's checkpoint List results2 = - repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, tenantId2); + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, tenantId2, true); assertEquals("TENANT_002 should have 1 checkpoint", 1, results2.size()); // Query for null tenant - should return only null-tenant checkpoint (tenant isolation) List results3 = - repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null); + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true); assertEquals("Null tenant query should return only null-tenant checkpoint", 1, results3.size()); } @@ -832,17 +832,17 @@ public void testChunkCheckpointChunkWithDifferentTenants() throws Exception { // Query for TENANT_001 chunks List results1 = repository.getProcessedChunks(tableName, - targetCluster, 0L, 1000L, tenantId1, mapperStart, mapperEnd); + targetCluster, 0L, 1000L, tenantId1, mapperStart, mapperEnd, true); assertEquals("TENANT_001 should have 1 chunk", 1, results1.size()); // Query for TENANT_002 chunks List results2 = repository.getProcessedChunks(tableName, - targetCluster, 0L, 1000L, tenantId2, mapperStart, mapperEnd); + targetCluster, 0L, 1000L, tenantId2, mapperStart, mapperEnd, true); assertEquals("TENANT_002 should have 1 chunk", 1, results2.size()); // Query for null tenant - should return only null-tenant chunk (tenant isolation) List results3 = repository.getProcessedChunks(tableName, - targetCluster, 0L, 1000L, null, mapperStart, mapperEnd); + targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); assertEquals("Null tenant query should return only null-tenant chunk", 1, results3.size()); } } From de96b293fae1578caa6175a931355561e4e76e3d Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Thu, 14 May 2026 10:04:13 +0530 Subject: [PATCH 07/18] checkpoint before implmenting repair fix for source/target with same ts as put/delete --- .../PhoenixSyncTableCheckpointOutputRow.java | 56 ++- .../mapreduce/PhoenixSyncTableMapper.java | 335 ++++++++++++------ .../mapreduce/PhoenixSyncTableTool.java | 24 +- .../end2end/PhoenixSyncTableToolIT.java | 35 +- 4 files changed, 320 insertions(+), 130 deletions(-) diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java index 49cc9c4e4a0..d261d524574 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java @@ -158,27 +158,55 @@ public long getTargetRowsProcessed() { * contract to ensure consistency between formatting (in mapper) and parsing (in tests). */ public static class CounterFormatter { - private static final String FORMAT_CHUNK = "%s=%d,%s=%d,%s=%d,%s=%d"; + private static final String FORMAT_CHUNK = + "%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d"; private static final String FORMAT_MAPPER = "%s=%d,%s=%d,%s=%d,%s=%d"; /** - * Formats chunk counters as comma-separated key=value pairs. Always emits all four - * counters; for verify-only chunks (no repair) {@code rowsPut} and {@code rowsDeleted} - * are 0 so operators querying the checkpoint table see a uniform format. - * @param sourceRows Source rows processed - * @param targetRows Target rows processed - * @param rowsPut Rows put to target during repair (0 if not repaired) - * @param rowsDeleted Rows deleted from target during repair (0 if not repaired) - * @return Formatted string: "SOURCE_ROWS_PROCESSED=...,TARGET_ROWS_PROCESSED=..., - * ROWS_PUT_TO_TARGET=...,ROWS_DELETED_FROM_TARGET=..." + * Formats chunk counters as comma-separated key=value pairs. Always emits all eight + * counters; for verify-only chunks (no repair) the six drift counters are 0 so + * operators querying the checkpoint table see a uniform format. + * + * Drift signals partition into two layers: + * row-level — whole row missing or extra on target, or unrepairable (target row is + * entirely tombstones — HBase cannot remove tombstones, only major + * compaction does) + * cell-level — for rows present on both clusters, individual cells missing, extra, + * or differing in value at matching coordinates + * The two layers are disjoint per row: a row drift case contributes only to row + * counters; a cell drift case contributes only to cell counters. + * + * @param sourceRows Source rows processed + * @param targetRows Target rows processed + * @param rowsMissingOnTarget Rows present on source but missing on target (0 if not + * repaired) + * @param rowsExtraOnTarget Rows present on target but missing on source whose live + * cells repair tombstoned (0 if not repaired) + * @param rowsCannotRepair Rows present on target but missing on source whose contents + * are entirely tombstones — repair cannot act on them and an + * operator-driven major compaction is required to make the + * verifier converge under {@code --raw-scan} + * @param cellsMissingOnTarget Cells (across rows present on both sides) that source had + * at coordinates target lacked (0 if not repaired) + * @param cellsExtraOnTarget Cells (across rows present on both sides) that target had + * at coordinates source lacked (0 if not repaired) + * @param cellsDifferentOnTarget Cells (across rows present on both sides) at matching + * coordinates whose values differed (0 if not repaired) + * @return Formatted string with all eight counters */ - public static String formatChunk(long sourceRows, long targetRows, long rowsPut, - long rowsDeleted) { + public static String formatChunk(long sourceRows, long targetRows, long rowsMissingOnTarget, + long rowsExtraOnTarget, long rowsCannotRepair, long cellsMissingOnTarget, + long cellsExtraOnTarget, long cellsDifferentOnTarget) { return String.format(FORMAT_CHUNK, PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name(), sourceRows, PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows, - PhoenixSyncTableMapper.SyncCounters.ROWS_PUT_TO_TARGET.name(), rowsPut, - PhoenixSyncTableMapper.SyncCounters.ROWS_DELETED_FROM_TARGET.name(), rowsDeleted); + PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET.name(), rowsMissingOnTarget, + PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET.name(), rowsExtraOnTarget, + PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR.name(), rowsCannotRepair, + PhoenixSyncTableMapper.SyncCounters.CELLS_MISSING_ON_TARGET.name(), cellsMissingOnTarget, + PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET.name(), cellsExtraOnTarget, + PhoenixSyncTableMapper.SyncCounters.CELLS_DIFFERENT_ON_TARGET.name(), + cellsDifferentOnTarget); } /** diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java index 9c3349464b0..1818f97ef1c 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java @@ -49,7 +49,6 @@ import org.apache.phoenix.query.KeyRange; import org.apache.phoenix.schema.PTable; import org.apache.phoenix.util.MetaDataUtil; -import org.apache.phoenix.util.PhoenixRuntime; import org.apache.phoenix.util.SHA256DigestUtil; import org.apache.phoenix.util.ScanUtil; import org.slf4j.Logger; @@ -76,8 +75,12 @@ public enum SyncCounters { CHUNKS_REPAIR_FAILED, SOURCE_ROWS_PROCESSED, TARGET_ROWS_PROCESSED, - ROWS_PUT_TO_TARGET, - ROWS_DELETED_FROM_TARGET + ROWS_MISSING_ON_TARGET, + ROWS_EXTRA_ON_TARGET, + ROWS_CANNOT_REPAIR, + CELLS_MISSING_ON_TARGET, + CELLS_EXTRA_ON_TARGET, + CELLS_DIFFERENT_ON_TARGET } private String tableName; @@ -417,7 +420,7 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd, } sourceChunk.executionEndTime = new Timestamp(System.currentTimeMillis()); String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter - .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L); + .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L, 0L, 0L, 0L, 0L); if (matched) { handleVerifiedChunk(sourceChunk, context, counters); } else { @@ -765,128 +768,245 @@ private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKey } /** - * Returns true if both Results carry identical cell arrays. Walks the two arrays in - * lock-step using {@link CellComparator} for coordinate ordering and {@link CellUtil#matchingValue} - * for value equality (CellComparator does not compare values). Both scanners use the same - * configuration on byte-ordered storage, so cells come back in canonical KV order. + * Lazily-built per-row Put and Delete mutations. Each field is created on first use so a + * row that needs only Puts produces no Delete (and vice versa); a row that needs no + * mutation at all produces neither. After construction, callers append the produced + * mutations to the pending batches via {@link #flush(List, List)}. */ - private boolean rowCellsEqual(Result sourceResult, Result targetResult) { - Cell[] sourceCells = sourceResult.rawCells(); - Cell[] targetCells = targetResult.rawCells(); - if (sourceCells.length != targetCells.length) { - return false; + private static final class RowRepairMutations { + private final byte[] rowKey; + Put put; + Delete delete; + + RowRepairMutations(byte[] rowKey) { + this.rowKey = rowKey; } - CellComparator comparator = CellComparator.getInstance(); - for (int i = 0; i < sourceCells.length; i++) { - if ( - comparator.compare(sourceCells[i], targetCells[i]) != 0 - || !CellUtil.matchingValue(sourceCells[i], targetCells[i]) - ) { - return false; + + Put put() { + if (put == null) { + put = new Put(rowKey); + } + return put; + } + + Delete delete() { + if (delete == null) { + delete = new Delete(rowKey); + } + return delete; + } + + void flush(List pendingPuts, List pendingDeletes) { + if (put != null) { + pendingPuts.add(put); + } + if (delete != null) { + pendingDeletes.add(delete); } } + } + + /** + * Cell-level drift counts produced by {@link #diffCellsForRow}. Populated only for rows + * present on both clusters; whole-row drift is signaled by the caller directly at the + * {@code cmp != 0} branches in {@link #repairChunk}. Three counters partition the cell + * differences into disjoint buckets — source-only, target-only-live, same-coord-diff-value. + */ + private static final class CellDriftCounts { + static final CellDriftCounts NONE = new CellDriftCounts(0, 0, 0); + + final int missing; + final int extra; + final int different; + + CellDriftCounts(int missing, int extra, int different) { + this.missing = missing; + this.extra = extra; + this.different = different; + } + } + + /** + * Per-chunk aggregate of all six drift counters: three row-level (whole rows missing / + * extra on target, plus rows that cannot be repaired because target's row is entirely + * tombstones — HBase has no API to remove tombstones, only major compaction does) plus + * three cell-level (cells missing / extra / different on rows present on both clusters). + * Owns the bookkeeping that was previously scattered across {@link #repairChunk} — local + * accumulators, MapReduce job-counter increments, the + * {@link PhoenixSyncTableCheckpointOutputRow.CounterFormatter#formatChunk} call, and the + * end-of-chunk log line. Adding a new drift signal means touching this class and the one + * place in the merge loop that produces it; everything else (commit to job context, + * checkpoint COUNTERS string, log) flows through these methods. + */ + private static final class DriftCounters { + long rowsMissingOnTarget; + long rowsExtraOnTarget; + long rowsCannotRepair; + long cellsMissingOnTarget; + long cellsExtraOnTarget; + long cellsDifferentOnTarget; + + void addCellDrift(CellDriftCounts cellDrift) { + cellsMissingOnTarget += cellDrift.missing; + cellsExtraOnTarget += cellDrift.extra; + cellsDifferentOnTarget += cellDrift.different; + } + + /** Increments the job's MapReduce counters with this chunk's drift totals. */ + void commitTo(Context context) { + context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).increment(rowsMissingOnTarget); + context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).increment(rowsExtraOnTarget); + context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).increment(rowsCannotRepair); + context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).increment(cellsMissingOnTarget); + context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).increment(cellsExtraOnTarget); + context.getCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).increment(cellsDifferentOnTarget); + } + + /** Formats the chunk's COUNTERS string for persistence in the checkpoint table. */ + String formatChunkCounters(long verifySourceRows, long verifyTargetRows) { + return PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatChunk(verifySourceRows, + verifyTargetRows, rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair, + cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget); + } + + /** Compact end-of-chunk log line summarizing all six drift signals. */ + String toLogString() { + return String.format( + "rowsMissingOnTarget=%d, rowsExtraOnTarget=%d, rowsCannotRepair=%d, " + + "cellsMissingOnTarget=%d, cellsExtraOnTarget=%d, cellsDifferentOnTarget=%d", + rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair, cellsMissingOnTarget, + cellsExtraOnTarget, cellsDifferentOnTarget); + } + } + + /** + * Routes a source cell to the right mutation kind. Put cells go to a {@link Put}; tombstone + * cells go to a {@link Delete} via {@link Delete#add(Cell)} which preserves the tombstone's + * exact subtype (Delete / DeleteColumn / DeleteFamily / DeleteFamilyVersion). Required under + * {@code --raw-scan}: {@link Put#add(Cell)} rejects non-Put cells. + */ + private void mirrorSourceCell(Cell cell, RowRepairMutations rowMutations) throws IOException { + if (CellUtil.isDelete(cell)) { + rowMutations.delete().add(cell); + } else { + rowMutations.put().add(cell); + } + } + + /** + * Tombstones a target-only cell at its exact timestamp via {@code addColumn}. Skips cells + * that are themselves already tombstones: HBase has no API to remove a tombstone cell — + * tombstones can only be reaped by major compaction once they age past the keep-deleted- + * cells window. Issuing another Delete at the same coordinates writes a duplicate marker, + * does not change the row's effective state, and only adds compaction load. Combined with + * the absence of a source-side counterpart to mirror, the right action is to leave the + * existing tombstone untouched. The repair scan's time range {@code [fromTime, toTime]} + * guarantees cells outside the window are never read (and therefore never deleted). + * + * @return true if the cell was a live cell that contributed a tombstone marker, false if + * the cell was already a tombstone and was skipped. + */ + private boolean tombstoneTargetCell(Cell cell, RowRepairMutations rowMutations) { + if (CellUtil.isDelete(cell)) { + return false; + } + rowMutations.delete().addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell), + cell.getTimestamp()); return true; } /** - * Builds a Put preserving the original cell timestamps from the source row. Used when an - * entire row is missing on target. + * Mirrors every source cell of a row that is missing on target. Source cells route by + * type: live cells to a Put, tombstone cells (under {@code --raw-scan}) to a Delete via + * {@link Delete#add(Cell)}. */ - private Put buildPutFromResult(Result result) throws IOException { - Put put = new Put(result.getRow()); - for (Cell cell : result.rawCells()) { - put.add(cell); + private void mirrorWholeRow(Result sourceResult, List pendingPuts, + List pendingDeletes) throws IOException { + RowRepairMutations rowMutations = new RowRepairMutations(sourceResult.getRow()); + for (Cell cell : sourceResult.rawCells()) { + mirrorSourceCell(cell, rowMutations); } - return put; + rowMutations.flush(pendingPuts, pendingDeletes); } /** - * Builds a Delete for every cell in the target row. The repair scan's time range - * {@code [fromTime, toTime]} guarantees the cells we read here are within the sync window; - * cells outside the window are never read, therefore never deleted. + * Tombstones every live cell of a row that is extra on target. Existing tombstones on the + * target row are skipped — HBase cannot remove tombstone cells; only major compaction + * reaps them. + * + * @return the number of live cells that contributed a tombstone marker. {@code 0} means + * the row was already entirely tombstones — repair could not act on it, and the + * caller should record this as {@link SyncCounters#ROWS_CANNOT_REPAIR}. */ - private Delete buildDeleteFromResult(Result result) { - Delete delete = new Delete(result.getRow()); - for (Cell cell : result.rawCells()) { - delete.addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell), - cell.getTimestamp()); + private int tombstoneWholeRow(Result targetResult, List pendingPuts, + List pendingDeletes) { + RowRepairMutations rowMutations = new RowRepairMutations(targetResult.getRow()); + int liveCellsTombstoned = 0; + for (Cell cell : targetResult.rawCells()) { + if (tombstoneTargetCell(cell, rowMutations)) { + liveCellsTombstoned++; + } } - return delete; + rowMutations.flush(pendingPuts, pendingDeletes); + return liveCellsTombstoned; } /** - * Computes cell-level diffs for a row that exists on both clusters but with a different - * cell set. Cells present in source but missing from target are added to a single Put for - * the row; cells present in target but missing from source are added to a single Delete. + * Diffs cells of two rows present on both clusters in lock-step using {@link CellComparator} + * order and appends the resulting {@link Put}/{@link Delete} mutations (if any) to the + * pending lists. Returns a {@link CellDriftCounts} classifying the cell-level drift: + * + * same coords + matching value → no drift, no signal + * same coords + different value → different++; mirror source cell + * source-only cell at unique coords → missing++; mirror source cell + * target-only live cell at unique coords → extra++; tombstone target cell + * target-only tombstone cell → skip (HBase cannot remove tombstones) */ - private void addRepairMutations(Result sourceResult, Result targetResult, + private CellDriftCounts diffCellsForRow(Result sourceResult, Result targetResult, List pendingPuts, List pendingDeletes) throws IOException { - byte[] rowKey = sourceResult.getRow(); Cell[] sourceCells = sourceResult.rawCells(); Cell[] targetCells = targetResult.rawCells(); CellComparator comparator = CellComparator.getInstance(); - Put put = null; - Delete delete = null; + RowRepairMutations rowMutations = new RowRepairMutations(sourceResult.getRow()); + int missing = 0; + int extra = 0; + int different = 0; int sourceIdx = 0; int targetIdx = 0; while (sourceIdx < sourceCells.length && targetIdx < targetCells.length) { - Cell sourceCell = sourceCells[sourceIdx]; - Cell targetCell = targetCells[targetIdx]; - int cmp = comparator.compare(sourceCell, targetCell); + int cmp = comparator.compare(sourceCells[sourceIdx], targetCells[targetIdx]); if (cmp == 0) { - // Same coordinates (family/qualifier/timestamp/type) on both sides. CellComparator - // does NOT compare values, so check value separately. If values differ, Put the - // source cell — it overwrites the target cell at the same timestamp. - if (!CellUtil.matchingValue(sourceCell, targetCell)) { - if (put == null) { - put = new Put(rowKey); - } - put.add(sourceCell); + // Same coordinates; CellComparator does not compare values, check separately. + if (!CellUtil.matchingValue(sourceCells[sourceIdx], targetCells[targetIdx])) { + mirrorSourceCell(sourceCells[sourceIdx], rowMutations); + different++; } sourceIdx++; targetIdx++; } else if (cmp < 0) { - // Source-only cell, Put on target. - if (put == null) { - put = new Put(rowKey); - } - put.add(sourceCell); - sourceIdx++; - } else { - // Target-only cell, Delete from target. - if (delete == null) { - delete = new Delete(rowKey); - } - delete.addColumn(CellUtil.cloneFamily(targetCell), CellUtil.cloneQualifier(targetCell), - targetCell.getTimestamp()); - targetIdx++; + mirrorSourceCell(sourceCells[sourceIdx++], rowMutations); + missing++; + } else if (tombstoneTargetCell(targetCells[targetIdx++], rowMutations)) { + extra++; } } - // Tail: any remaining source cells are source-only. while (sourceIdx < sourceCells.length) { - if (put == null) { - put = new Put(rowKey); - } - put.add(sourceCells[sourceIdx++]); + mirrorSourceCell(sourceCells[sourceIdx++], rowMutations); + missing++; } - // Tail: any remaining target cells are target-only. while (targetIdx < targetCells.length) { - Cell targetCell = targetCells[targetIdx++]; - if (delete == null) { - delete = new Delete(rowKey); + if (tombstoneTargetCell(targetCells[targetIdx++], rowMutations)) { + extra++; } - delete.addColumn(CellUtil.cloneFamily(targetCell), CellUtil.cloneQualifier(targetCell), - targetCell.getTimestamp()); } - if (put != null) { - pendingPuts.add(put); - } - if (delete != null) { - pendingDeletes.add(delete); + if (missing == 0 && extra == 0 && different == 0) { + return CellDriftCounts.NONE; } + rowMutations.flush(pendingPuts, pendingDeletes); + return new CellDriftCounts(missing, extra, different); } /** @@ -935,8 +1055,7 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar byte[] targetEnd, boolean targetStartInclusive, boolean targetEndInclusive, long verifySourceRows, long verifyTargetRows, Timestamp verifyStartTime, Context context) throws IOException, SQLException { - long rowsPut = 0; - long rowsDeleted = 0; + DriftCounters driftCounters = new DriftCounters(); LOGGER.info( "Starting repair for chunk source=[{}, {}] target=[{}{}, {}{} on table {}", @@ -972,23 +1091,31 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar cmp = Bytes.compareTo(sourceResult.getRow(), targetResult.getRow()); } + // Drift signals are bumped at the branch that semantically caused them: row-level + // signals at the cmp != 0 branches, cell-level signals at the cmp == 0 branch. if (cmp == 0) { // Same row key on both clusters — diff at cell level and repair only if cells differ. - if (!rowCellsEqual(sourceResult, targetResult)) { - addRepairMutations(sourceResult, targetResult, pendingPuts, pendingDeletes); - rowsPut++; - } + driftCounters.addCellDrift( + diffCellsForRow(sourceResult, targetResult, pendingPuts, pendingDeletes)); sourceResult = sourceScanner.next(); targetResult = targetScanner.next(); } else if (cmp < 0) { - // Source-only row (target is missing it, or target has advanced past) — Put it on target. - pendingPuts.add(buildPutFromResult(sourceResult)); - rowsPut++; + // Source-only row — mirror it onto target. + mirrorWholeRow(sourceResult, pendingPuts, pendingDeletes); + driftCounters.rowsMissingOnTarget++; sourceResult = sourceScanner.next(); } else { - // Target-only row (source has no such row in this range) — Delete it from target. - pendingDeletes.add(buildDeleteFromResult(targetResult)); - rowsDeleted++; + // Target-only row — tombstone its live cells. If the row is already entirely + // tombstones, repair has nothing to do (HBase cannot remove tombstones; only + // major compaction reaps them) — record as ROWS_CANNOT_REPAIR so operators can + // see the unrepairable drift volume. + int liveCellsTombstoned = + tombstoneWholeRow(targetResult, pendingPuts, pendingDeletes); + if (liveCellsTombstoned == 0) { + driftCounters.rowsCannotRepair++; + } else { + driftCounters.rowsExtraOnTarget++; + } targetResult = targetScanner.next(); } @@ -1010,8 +1137,8 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar Timestamp failedAt = new Timestamp(System.currentTimeMillis()); // Capture partial progress in the COUNTERS column for triage. - String failedCounters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter - .formatChunk(verifySourceRows, verifyTargetRows, rowsPut, rowsDeleted); + String failedCounters = + driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows); syncTableOutputRepository .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() .setTableName(tableName).setTargetCluster(targetZkQuorum) @@ -1024,13 +1151,12 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar return; } - context.getCounter(SyncCounters.ROWS_PUT_TO_TARGET).increment(rowsPut); - context.getCounter(SyncCounters.ROWS_DELETED_FROM_TARGET).increment(rowsDeleted); + driftCounters.commitTo(context); context.getCounter(SyncCounters.CHUNKS_REPAIRED).increment(1); Timestamp repairEndTime = new Timestamp(System.currentTimeMillis()); - String repairCounters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter - .formatChunk(verifySourceRows, verifyTargetRows, rowsPut, rowsDeleted); + String repairCounters = + driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows); syncTableOutputRepository .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() @@ -1041,8 +1167,9 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar .setExecutionStartTime(verifyStartTime).setExecutionEndTime(repairEndTime) .setCounters(repairCounters).build()); - LOGGER.info("Completed repair for chunk source=[{}, {}]: rowsPut={}, rowsDeleted={}", - Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), rowsPut, rowsDeleted); + LOGGER.info("Completed repair for chunk source=[{}, {}]: {}", + Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), + driftCounters.toLogString()); } @Override diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java index 7cb3f034138..8ec66abf7ca 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java @@ -429,19 +429,31 @@ private boolean submitPhoenixSyncTableJob() throws Exception { counters.findCounter(PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED).getValue(); long targetRowsProcessed = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED).getValue(); - long rowsPutToTarget = - counters.findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_PUT_TO_TARGET).getValue(); - long rowsDeletedFromTarget = counters - .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_DELETED_FROM_TARGET).getValue(); + long rowsMissingOnTarget = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET).getValue(); + long rowsExtraOnTarget = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET).getValue(); + long rowsCannotRepair = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR).getValue(); + long cellsMissingOnTarget = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_MISSING_ON_TARGET).getValue(); + long cellsExtraOnTarget = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET).getValue(); + long cellsDifferentOnTarget = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue(); LOGGER.info( "PhoenixSyncTable job completed, gathered counters are \n Task Created: {}, \n Verified Mappers: {}, \n" + "Mismatched Mappers: {}, \n Repaired Mappers: {}, \n Repair Failed Mappers: {}, \n" + "Chunks Verified: {}, \n Chunks Mismatched: {}, \n Chunks Repaired: {}, \n" + "Chunks Repair Failed: {}, \n Source Rows Processed: {}, \n Target Rows Processed: {}, \n" - + "Rows Put To Target: {}, \n Rows Deleted From Target: {}", + + "Rows Missing On Target: {}, \n Rows Extra On Target: {}, \n" + + "Rows Cannot Repair: {}, \n" + + "Cells Missing On Target: {}, \n Cells Extra On Target: {}, \n" + + "Cells Different On Target: {}", taskCreated, verifiedMappers, mismatchedMappers, repairedMappers, repairFailedMappers, chunksVerified, chunksMismatched, chunksRepaired, chunksRepairFailed, sourceRowsProcessed, - targetRowsProcessed, rowsPutToTarget, rowsDeletedFromTarget); + targetRowsProcessed, rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair, + cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget); } else { LOGGER.warn("Unable to retrieve job counters for table {} - job may have failed " + "during initialization", qTable); diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java index 57321b4ce9f..599f18337d8 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java @@ -192,16 +192,39 @@ public void testSyncTableValidateWithDataDifference() throws Exception { long mappersRepaired = repairCounters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue(); long mappersRepairFailed = repairCounters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue(); - long rowsPutToTarget = repairCounters.findCounter(SyncCounters.ROWS_PUT_TO_TARGET).getValue(); - long rowsDeletedFromTarget = - repairCounters.findCounter(SyncCounters.ROWS_DELETED_FROM_TARGET).getValue(); + long rowsMissingOnTarget = + repairCounters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue(); + long rowsExtraOnTarget = + repairCounters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue(); + long rowsCannotRepair = + repairCounters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue(); + long cellsMissingOnTarget = + repairCounters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue(); + long cellsExtraOnTarget = + repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue(); + long cellsDifferentOnTarget = + repairCounters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue(); assertEquals("All 3 mismatched chunks should be repaired", 3, chunksRepaired); assertEquals("No chunk repair should fail", 0, chunksRepairFailed); assertEquals("All 3 mismatched mapper regions should be repaired", 3, mappersRepaired); assertEquals("No mapper repair should fail", 0, mappersRepairFailed); - assertEquals("Three rows on target should be Put back to source values", 3, rowsPutToTarget); - assertEquals("No rows should be deleted from target (only value diffs, not extras)", 0, - rowsDeletedFromTarget); + // The three drifted rows exist on both clusters (only NAME values were modified on + // target via separate upserts, producing cells at different timestamps), so all drift + // is cell-level — no whole-row missing or extra signals. + assertEquals("No whole rows should be missing on target", 0, rowsMissingOnTarget); + assertEquals("No whole rows should be extra on target", 0, rowsExtraOnTarget); + assertEquals("No rows should be unrepairable (target has no all-tombstone rows)", 0, + rowsCannotRepair); + // Each modified row contributes at least one missing cell (source's original NAME at + // its original timestamp) and one extra cell (target's modified NAME at the new + // timestamp). cellsDifferent stays 0 because the modifications wrote at new timestamps + // rather than overwriting at the same coordinates. + assertTrue("Source-only cells across the 3 drifted rows should be detected, got " + + cellsMissingOnTarget, cellsMissingOnTarget >= 3); + assertTrue("Target-only cells across the 3 drifted rows should be detected, got " + + cellsExtraOnTarget, cellsExtraOnTarget >= 3); + assertEquals("No same-coord value diffs (modifications wrote at new timestamps)", 0, + cellsDifferentOnTarget); // Target rows should now match source. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); From 71c25d81d595cfba3102400d18e105554f532bba Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Fri, 15 May 2026 17:04:38 +0530 Subject: [PATCH 08/18] checkpoint all possible combination fixed?? --- .../PhoenixSyncTableCheckpointOutputRow.java | 1 + .../mapreduce/PhoenixSyncTableMapper.java | 683 +++++++++++++++--- .../mapreduce/PhoenixSyncTableTool.java | 34 +- 3 files changed, 596 insertions(+), 122 deletions(-) diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java index d261d524574..86574326ad2 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java @@ -39,6 +39,7 @@ public enum Status { VERIFIED, MISMATCHED, REPAIRED, + UNREPAIRABLE, REPAIR_FAILED } diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java index 1818f97ef1c..d09151b1e27 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java @@ -20,13 +20,21 @@ import static org.apache.phoenix.schema.types.PDataType.TRUE_BYTES; import java.io.IOException; +import java.nio.ByteBuffer; import java.security.MessageDigest; import java.sql.Connection; import java.sql.SQLException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; @@ -36,6 +44,7 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Row; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; @@ -68,11 +77,14 @@ public enum SyncCounters { MAPPERS_VERIFIED, MAPPERS_MISMATCHED, MAPPERS_REPAIRED, + MAPPERS_UNREPAIRABLE, MAPPERS_REPAIR_FAILED, CHUNKS_VERIFIED, CHUNKS_MISMATCHED, CHUNKS_REPAIRED, + CHUNKS_UNREPAIRABLE, CHUNKS_REPAIR_FAILED, + CHECKPOINT_WRITE_FAILED, SOURCE_ROWS_PROCESSED, TARGET_ROWS_PROCESSED, ROWS_MISSING_ON_TARGET, @@ -218,6 +230,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context // Track counters before processing this region long verifiedBefore = context.getCounter(SyncCounters.CHUNKS_VERIFIED).getValue(); long mismatchedBefore = context.getCounter(SyncCounters.CHUNKS_MISMATCHED).getValue(); + long unrepairableBefore = context.getCounter(SyncCounters.CHUNKS_UNREPAIRABLE).getValue(); long repairFailedBefore = context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); long sourceRowsBefore = context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue(); long targetRowsBefore = context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue(); @@ -235,6 +248,8 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context context.getCounter(SyncCounters.CHUNKS_VERIFIED).getValue() - verifiedBefore; long mismatchedChunks = context.getCounter(SyncCounters.CHUNKS_MISMATCHED).getValue() - mismatchedBefore; + long unrepairableChunks = + context.getCounter(SyncCounters.CHUNKS_UNREPAIRABLE).getValue() - unrepairableBefore; long repairFailedChunks = context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue() - repairFailedBefore; long sourceRowsProcessed = @@ -247,7 +262,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context .formatMapper(verifiedChunks, mismatchedChunks, sourceRowsProcessed, targetRowsProcessed); if (sourceRowsProcessed > 0) { recordRegionCompletion(regionStart, regionEnd, regionStartTime, regionEndTime, verifiedChunks, - mismatchedChunks, repairFailedChunks, counters, context); + mismatchedChunks, unrepairableChunks, repairFailedChunks, counters, context); } else { LOGGER.info( "No rows pending to process. All region boundaries are covered for startKey:{}, endKey: {}", @@ -264,23 +279,31 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context * @param regionEndTime Region processing end time * @param verifiedChunks Number of verified chunks * @param mismatchedChunks Number of mismatched chunks + * @param unrepairableChunks Number of chunks where any row landed in ROWS_CANNOT_REPAIR; + * if > 0 (and no repair-failed chunks) the region rolls up to + * UNREPAIRABLE, signalling operator intervention is needed * @param repairFailedChunks Number of chunks whose repair threw an IOException; if > 0 the - * region rolls up to MISMATCHED (drift remains, re-run will retry) + * region rolls up to REPAIR_FAILED (highest precedence) * @param counters Formatted counter string * @param context Mapper context */ private void recordRegionCompletion(byte[] regionStart, byte[] regionEnd, Timestamp regionStartTime, Timestamp regionEndTime, long verifiedChunks, long mismatchedChunks, - long repairFailedChunks, String counters, Context context) throws SQLException { + long unrepairableChunks, long repairFailedChunks, String counters, Context context) + throws SQLException { - // Region rolls up its child chunks' outcomes into one of four statuses: - // VERIFIED — every chunk matched; no drift in this region. + // Region rolls up its child chunks' outcomes into one of five statuses, in precedence + // order (most-severe wins): + // REPAIR_FAILED — at least one chunk threw during merge-scan or flush. + // UNREPAIRABLE — repair completed but at least one chunk has rows that cannot be + // repaired (target tombstones shadow source Puts, or target row is + // entirely tombstones). Operator action (typically major compaction + // on target) needed before a re-run can converge. // MISMATCHED — drift was detected but repair was not attempted (dry-run mode). - // REPAIRED — drift was detected and every chunk's repair succeeded. - // REPAIR_FAILED — drift was detected, repair was attempted, and at least one chunk - // threw during merge-scan or flush. The failed chunks remain as - // CHUNK/REPAIR_FAILED rows; a re-run will re-attempt them via the - // Phase 2 STATUS-IN filter that excludes REPAIR_FAILED. + // REPAIRED — drift was detected and every chunk's repair fully succeeded. + // VERIFIED — every chunk matched; no drift in this region. + // The resume filter on re-invocation skips VERIFIED and REPAIRED — UNREPAIRABLE, + // MISMATCHED, and REPAIR_FAILED chunks are re-entered as gaps and re-attempted. PhoenixSyncTableCheckpointOutputRow.Status status; SyncCounters mapperCounter; if (mismatchedChunks == 0) { @@ -289,12 +312,15 @@ private void recordRegionCompletion(byte[] regionStart, byte[] regionEnd, } else if (isDryRun) { status = PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED; mapperCounter = SyncCounters.MAPPERS_MISMATCHED; - } else if (repairFailedChunks == 0) { - status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED; - mapperCounter = SyncCounters.MAPPERS_REPAIRED; - } else { + } else if (repairFailedChunks > 0) { status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED; mapperCounter = SyncCounters.MAPPERS_REPAIR_FAILED; + } else if (unrepairableChunks > 0) { + status = PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE; + mapperCounter = SyncCounters.MAPPERS_UNREPAIRABLE; + } else { + status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED; + mapperCounter = SyncCounters.MAPPERS_REPAIRED; } context.getCounter(mapperCounter).increment(1); @@ -303,12 +329,15 @@ private void recordRegionCompletion(byte[] regionStart, byte[] regionEnd, counters); String logMessage = String.format( - "PhoenixSyncTable region [%s, %s) completed with %s: %d verified, %d mismatched, %d repair-failed", + "PhoenixSyncTable region [%s, %s) completed with %s: %d verified, %d mismatched, " + + "%d unrepairable, %d repair-failed", Bytes.toStringBinary(regionStart), Bytes.toStringBinary(regionEnd), - status.name().toLowerCase(), verifiedChunks, mismatchedChunks, repairFailedChunks); + status.name().toLowerCase(), verifiedChunks, mismatchedChunks, unrepairableChunks, + repairFailedChunks); if ( status == PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED + || status == PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE || status == PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED ) { LOGGER.warn(logMessage); @@ -603,18 +632,25 @@ private void recordChunkCheckpoint(ChunkInfo sourceChunk, } /** - * Builds the common Scan shape used by both verification and repair: same key range, - * inclusivity, time window, raw-scan, and all-versions semantics. Callers layer on their - * own caching, limits, and coprocessor attributes. Keeping the base shared guarantees that - * the cells visited by repair are exactly the cells the verifier hashed. + * Builds the common Scan shape used by verification, repair, and tombstone loading: key + * range, inclusivity, time window, cache-blocks, plus raw-scan and all-versions semantics + * controlled by the user's {@code --raw-scan} / {@code --read-all-versions} flags. + * + * Callers that need to force raw-scan or all-versions on a specific call (e.g., the + * tombstone loader, which must surface tombstones regardless of user flags) pass + * {@code forceRaw=true} or {@code forceAllVersions=true} to override. + * + * Callers layer on their own caching, limits, and coprocessor attributes. Keeping the + * base shared guarantees that the cells visited by repair are exactly the cells the + * verifier hashed (when both pass the same force flags). */ private Scan createBaseScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, - boolean isEndKeyInclusive) throws IOException { + boolean isEndKeyInclusive, boolean forceRaw, boolean forceAllVersions) throws IOException { Scan scan = new Scan(); scan.withStartRow(startKey, isStartKeyInclusive); scan.withStopRow(endKey, isEndKeyInclusive); - scan.setRaw(isRawScan); - if (isReadAllVersions) { + scan.setRaw(forceRaw || isRawScan); + if (forceAllVersions || isReadAllVersions) { scan.readAllVersions(); } scan.setCacheBlocks(false); @@ -628,7 +664,8 @@ private Scan createBaseScan(byte[] startKey, byte[] endKey, boolean isStartKeyIn */ private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, boolean isEndKeyInclusive, boolean isTargetScan) throws IOException { - Scan scan = createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive); + Scan scan = + createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive, false, false); // Set limit and caching to 1 for sequential partial digest retrieval from target. // Enables digest continuation: each target chunk's digest feeds into the next until scanning // completes @@ -760,7 +797,8 @@ boolean shouldStartKeyBeInclusive(byte[] mapperRegionStart, */ private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, boolean isEndKeyInclusive, PhoenixConnection phoenixConn) throws IOException, SQLException { - Scan scan = createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive); + Scan scan = + createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive, false, false); scan.setCaching(1000); ScanUtil.setScanAttributesForPhoenixTTL(scan, pTable, phoenixConn); scan.setAttribute(BaseScannerRegionObserverConstants.IS_STRICT_TTL, TRUE_BYTES); @@ -768,17 +806,64 @@ private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKey } /** - * Lazily-built per-row Put and Delete mutations. Each field is created on first use so a - * row that needs only Puts produces no Delete (and vice versa); a row that needs no - * mutation at all produces neither. After construction, callers append the produced - * mutations to the pending batches via {@link #flush(List, List)}. + * Loads the target row's tombstone index for shadow detection. Issues a single-row scan + * against target with raw=true and all-versions forced (regardless of user flags), so the + * tombstone subtypes that would otherwise be filtered out are surfaced. Live cells in the + * response are ignored — they were already visible to the repair scan and are handled by + * the merge logic. (HBase 2.x exposes {@code setRaw} only on Scan, not on Get — so a + * one-row scan stands in for what would otherwise be a raw Get.) + * + * Time range: the base scan applies {@code [fromTime, toTime]}, but shadow detection + * needs tombstones at ts >= fromTime regardless of the upper bound. A DeleteColumn / + * DeleteFamily at ts > toTime can still shadow a Put we mirror at ts in window during + * application reads (HBase tombstones don't respect the verifier's sync window). Lower + * bound stays at fromTime since tombstones below the window can't shadow anything we'd + * write inside the window. */ - private static final class RowRepairMutations { + private TargetRowTombstones loadTargetRowTombstones(byte[] rowKey, Table targetHTable) + throws IOException { + Scan scan = createBaseScan(rowKey, rowKey, true, true, true, true); + scan.setTimeRange(fromTime, Long.MAX_VALUE); + scan.setCaching(1); + scan.setLimit(1); + TargetRowTombstones tombstones = new TargetRowTombstones(); + try (ResultScanner scanner = targetHTable.getScanner(scan)) { + Result raw = scanner.next(); + if (raw != null) { + for (Cell cell : raw.rawCells()) { + // Record both tombstones (for shadow detection) and Puts (for hidden-version + // discovery during cmp > 0 tombstoning of target-only cells). + tombstones.record(cell); + } + } + } + return tombstones; + } + + /** + * Lazily-built per-row Put and Delete mutations plus per-row unrepairable-drift state. + * Each mutation field is created on first use so a row that needs only Puts produces no + * Delete (and vice versa); a row that needs no mutation at all produces neither. + * + * Tombstone-index state is lazy: {@link #tombstones} is loaded only on the first + * shadow check via {@link #tombstones(Table)} (one raw scan per row at most). + * {@link #anyCellUnrepairable} accumulates whether the row carries any drift that + * repair could not act on — either a source-side Put was shadow-suppressed by an + * existing target tombstone, or a target-only tombstone exists that source lacks + * (HBase has no API to remove tombstones). The caller reads it after the merge to + * decide whether to bump {@link SyncCounters#ROWS_CANNOT_REPAIR}. + * + * After construction, callers append produced mutations to the pending batches via + * {@link #flush(List, List)}. + */ + private final class RowRepairState { private final byte[] rowKey; Put put; Delete delete; + TargetRowTombstones tombstones; + boolean anyCellUnrepairable; - RowRepairMutations(byte[] rowKey) { + RowRepairState(byte[] rowKey) { this.rowKey = rowKey; } @@ -796,6 +881,18 @@ Delete delete() { return delete; } + /** + * Lazily loads the target row's tombstone index on first call (one raw single-row scan + * via {@link #loadTargetRowTombstones}); cached thereafter for reuse across the row's + * subsequent shadow checks. + */ + TargetRowTombstones targetRowTombstones(Table targetHTable) throws IOException { + if (tombstones == null) { + tombstones = loadTargetRowTombstones(rowKey, targetHTable); + } + return tombstones; + } + void flush(List pendingPuts, List pendingDeletes) { if (put != null) { pendingPuts.add(put); @@ -826,6 +923,149 @@ private static final class CellDriftCounts { } } + /** + * Per-row index of target's tombstones AND Puts in {@code [fromTime, MAX_VALUE]}, built + * lazily from a single raw single-row scan with all-versions enabled. Used in two roles: + * + *

    + *
  1. Shadow detection ({@link #wouldShadow}): would a source Put we're about + * to mirror be suppressed by an existing target tombstone?
  2. + *
  3. Hidden-version discovery ({@link #targetPutTimestampsBetween}): what + * max-versions-filtered target Puts sit between source's max ts at a column and + * target's visible Put? Used in the {@code cmp > 0} tombstoning path to issue + * point Deletes for hidden versions that would otherwise surface on read after + * the visible Put is shadowed.
  4. + *
+ * + * HBase has four tombstone subtypes, each with distinct shadow semantics: + * Delete — shadows a Put at {@code (cf, q, ts == T)} exactly + * DeleteColumn — shadows Puts at {@code (cf, q, ts <= T)} + * DeleteFamily — shadows Puts at {@code (cf, *, ts <= T)} + * DeleteFamilyVersion — shadows Puts at {@code (cf, *, ts == T)} + * {@link #wouldShadow(Cell)} consults all four indices and returns true on any match. + */ + private static final class TargetRowTombstones { + private final Map> deletePointTs = new HashMap<>(); + private final Map deleteColumnUpperBound = new HashMap<>(); + private final Map deleteFamilyUpperBound = new HashMap<>(); + private final Map> deleteFamilyVersionTs = new HashMap<>(); + /** Per-column ts-ordered set of target's Put timestamps. */ + private final Map> targetPutTs = new HashMap<>(); + + void record(Cell cell) { + if (CellUtil.isDelete(cell)) { + recordTombstone(cell); + } else { + targetPutTs.computeIfAbsent(columnKey(cell), k -> new TreeMap<>()) + .put(cell.getTimestamp(), Boolean.TRUE); + } + } + + private void recordTombstone(Cell tombstone) { + long ts = tombstone.getTimestamp(); + ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(tombstone)); + switch (tombstone.getType()) { + case Delete: + deletePointTs.computeIfAbsent(columnKey(tombstone), k -> new HashSet<>()).add(ts); + break; + case DeleteColumn: + deleteColumnUpperBound.merge(columnKey(tombstone), ts, Math::max); + break; + case DeleteFamily: + deleteFamilyUpperBound.merge(family, ts, Math::max); + break; + case DeleteFamilyVersion: + deleteFamilyVersionTs.computeIfAbsent(family, k -> new HashSet<>()).add(ts); + break; + default: + // Caller filters via CellUtil.isDelete; non-tombstone cells should never reach here. + } + } + + /** Returns true if any tombstone in this index would shadow a Put at the cell's coords. */ + boolean wouldShadow(Cell sourcePut) { + long ts = sourcePut.getTimestamp(); + ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(sourcePut)); + ColumnKey column = columnKey(sourcePut); + + Set pointTs = deletePointTs.get(column); + if (pointTs != null && pointTs.contains(ts)) { + return true; + } + Long deleteColTs = deleteColumnUpperBound.get(column); + if (deleteColTs != null && ts <= deleteColTs) { + return true; + } + Long deleteFamTs = deleteFamilyUpperBound.get(family); + if (deleteFamTs != null && ts <= deleteFamTs) { + return true; + } + Set dfvTs = deleteFamilyVersionTs.get(family); + return dfvTs != null && dfvTs.contains(ts); + } + + /** + * Returns target's Put timestamps at {@code (cf, q)} that are strictly greater than + * {@code lowerExclusive} and strictly less than {@code upperExclusive}. Used to find + * hidden (max-versions-filtered) target versions sitting between source's max ts and + * target's visible ts so they can be point-Deleted. + */ + Set targetPutTimestampsBetween(byte[] family, byte[] qualifier, long lowerExclusive, + long upperExclusive) { + NavigableMap tss = targetPutTs.get(new ColumnKey(family, qualifier)); + if (tss == null) { + return Collections.emptySet(); + } + return tss.subMap(lowerExclusive, false, upperExclusive, false).keySet(); + } + + private static ColumnKey columnKey(Cell cell) { + return new ColumnKey(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell)); + } + } + + /** Composite (family, qualifier) key with byte-array equality semantics. */ + private static final class ColumnKey { + private final byte[] family; + private final byte[] qualifier; + + ColumnKey(byte[] family, byte[] qualifier) { + this.family = family; + this.qualifier = qualifier; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof ColumnKey)) { + return false; + } + ColumnKey other = (ColumnKey) o; + return Bytes.equals(family, other.family) && Bytes.equals(qualifier, other.qualifier); + } + + @Override + public int hashCode() { + return Bytes.hashCode(family) * 31 + Bytes.hashCode(qualifier); + } + } + + /** + * Result of {@link #diffCellsForRow}. Carries the cell-level drift counts plus a flag + * indicating whether any source-side mirror was suppressed because target tombstones + * would shadow it — making the row partially or fully unrepairable. + */ + private static final class RowDiffOutcome { + static final RowDiffOutcome NONE = new RowDiffOutcome(CellDriftCounts.NONE, false); + + final CellDriftCounts cells; + final boolean rowCannotRepair; + + RowDiffOutcome(CellDriftCounts cells, boolean rowCannotRepair) { + this.cells = cells; + this.rowCannotRepair = rowCannotRepair; + } + } + /** * Per-chunk aggregate of all six drift counters: three row-level (whole rows missing / * extra on target, plus rows that cannot be repaired because target's row is entirely @@ -885,48 +1125,135 @@ String toLogString() { * exact subtype (Delete / DeleteColumn / DeleteFamily / DeleteFamilyVersion). Required under * {@code --raw-scan}: {@link Put#add(Cell)} rejects non-Put cells. */ - private void mirrorSourceCell(Cell cell, RowRepairMutations rowMutations) throws IOException { + private void mirrorSourceCell(Cell cell, RowRepairState rowState) throws IOException { if (CellUtil.isDelete(cell)) { - rowMutations.delete().add(cell); + rowState.delete().add(cell); } else { - rowMutations.put().add(cell); + rowState.put().add(cell); } } /** - * Tombstones a target-only cell at its exact timestamp via {@code addColumn}. Skips cells - * that are themselves already tombstones: HBase has no API to remove a tombstone cell — - * tombstones can only be reaped by major compaction once they age past the keep-deleted- - * cells window. Issuing another Delete at the same coordinates writes a duplicate marker, - * does not change the row's effective state, and only adds compaction load. Combined with - * the absence of a source-side counterpart to mirror, the right action is to leave the - * existing tombstone untouched. The repair scan's time range {@code [fromTime, toTime]} - * guarantees cells outside the window are never read (and therefore never deleted). + * Mirrors a source cell onto target only if no existing target tombstone would shadow + * the resulting Put. Lazily loads the target row's tombstone index on first Put cell + * (one raw single-row scan per row at most) via {@link RowRepairState#tombstones}. If + * the cell would be shadowed, the mirror is suppressed, {@code rowState.anyCellUnrepairable} + * is set, and the caller is expected to record {@link SyncCounters#ROWS_CANNOT_REPAIR}. + * + * Shadow detection only applies to Put cells. Tombstone source cells (under + * {@code --raw-scan}) bypass the check and always mirror via {@link #mirrorSourceCell}: + * {@link TargetRowTombstones#wouldShadow} is defined for live cells (does an existing target + * tombstone hide a new Put on read), and is not meaningful for delete-marker cells — + * mirroring a tombstone over an existing tombstone is a benign duplicate, not a suppression. + * + * @return {@code true} if the cell was mirrored, {@code false} if suppressed by shadowing. + */ + private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable, + RowRepairState rowState) throws IOException { + if (!CellUtil.isDelete(cell) && rowState.targetRowTombstones(targetHTable).wouldShadow(cell)) { + rowState.anyCellUnrepairable = true; + return false; + } + mirrorSourceCell(cell, rowState); + return true; + } + + /** + * Tombstones a target-only cell to make target's read view at this column match source's. + * Skips cells that are themselves already tombstones: HBase has no API to remove a + * tombstone cell — tombstones can only be reaped by major compaction once they age past + * the keep-deleted-cells window. Issuing another Delete at the same coordinates writes a + * duplicate marker, does not change the row's effective state, and only adds compaction + * load. Combined with the absence of a source-side counterpart to mirror, the right + * action is to leave the existing tombstone untouched. + * + * Tombstone subtype depends on what source has at this {@code (cf, q)}: + *
    + *
  • Source has no cell at this column — target's read at this column should + * be empty. Use {@link Delete#addColumns(byte[], byte[], long)} (DeleteColumn, + * scope {@code ts <= T}) so even max-versions-hidden older target versions are + * shadowed. A point Delete would only shadow this exact ts, surfacing the next + * hidden version on read.
  • + *
  • Source's max ts at this column is >= target's ts — point-Delete only + * target's exact ts. Source's equal-or-higher ts mirror will surface; no hidden + * version can surface above it.
  • + *
  • Source's max ts at this column is < target's ts — point-Delete target's + * ts, AND point-Delete every max-versions-hidden Put on target with ts in + * {@code (sourceMaxTs, targetTs)}. Otherwise after target's visible cell is + * shadowed, the next hidden version surfaces above source's mirror — silent + * divergence. Hidden versions are discovered via the row's tombstone+Put index + * loaded from a single raw all-versions scan.
  • + *
+ * + * Pre-build {@code sourceMaxTsByColumn} once per row from the source cell array. * * @return true if the cell was a live cell that contributed a tombstone marker, false if * the cell was already a tombstone and was skipped. */ - private boolean tombstoneTargetCell(Cell cell, RowRepairMutations rowMutations) { + private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairState rowState, + Map sourceMaxTsByColumn) throws IOException { if (CellUtil.isDelete(cell)) { return false; } - rowMutations.delete().addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell), - cell.getTimestamp()); + byte[] family = CellUtil.cloneFamily(cell); + byte[] qualifier = CellUtil.cloneQualifier(cell); + long ts = cell.getTimestamp(); + Long sourceMaxTs = sourceMaxTsByColumn.get(new ColumnKey(family, qualifier)); + if (sourceMaxTs == null) { + // Source has no cell at this column; shadow ts <= T so hidden older versions + // don't surface on read. + rowState.delete().addColumns(family, qualifier, ts); + } else if (sourceMaxTs >= ts) { + // Source's mirror is at >= target's ts; point-Delete only target's exact ts. + rowState.delete().addColumn(family, qualifier, ts); + } else { + // Source's max ts at this column < target's ts. Point-Delete target's visible cell, + // AND point-Delete every hidden target Put in (sourceMaxTs, ts) so they don't + // surface above source's mirror after the visible cell is shadowed. + rowState.delete().addColumn(family, qualifier, ts); + Set hiddenTs = rowState.targetRowTombstones(targetHTable) + .targetPutTimestampsBetween(family, qualifier, sourceMaxTs, ts); + for (Long hidden : hiddenTs) { + rowState.delete().addColumn(family, qualifier, hidden); + } + } return true; } + /** + * Outcome of {@link #mirrorWholeRow}. {@code FULLY_MIRRORED} = every source cell mirrored + * onto target with no shadowing. {@code PARTIALLY_MIRRORED} = at least one cell mirrored, + * at least one suppressed by an existing target tombstone (caller bumps both + * ROWS_MISSING_ON_TARGET and ROWS_CANNOT_REPAIR). {@code FULLY_SHADOWED} = every source + * cell suppressed; the row is unrepairable until target's tombstones are reaped (caller + * bumps only ROWS_CANNOT_REPAIR). + */ + private enum WholeRowMirrorOutcome { + FULLY_MIRRORED, PARTIALLY_MIRRORED, FULLY_SHADOWED + } + /** * Mirrors every source cell of a row that is missing on target. Source cells route by * type: live cells to a Put, tombstone cells (under {@code --raw-scan}) to a Delete via - * {@link Delete#add(Cell)}. + * {@link Delete#add(Cell)}. Each cell is shadow-checked against existing target + * tombstones (lazy raw single-row scan on first cell) — even though target's filtered scan + * returned no cells for this row, target may carry tombstones that would shadow our Put. */ - private void mirrorWholeRow(Result sourceResult, List pendingPuts, - List pendingDeletes) throws IOException { - RowRepairMutations rowMutations = new RowRepairMutations(sourceResult.getRow()); + private WholeRowMirrorOutcome mirrorWholeRow(Result sourceResult, Table targetHTable, + List pendingPuts, List pendingDeletes) throws IOException { + RowRepairState rowState = new RowRepairState(sourceResult.getRow()); + int mirrored = 0; for (Cell cell : sourceResult.rawCells()) { - mirrorSourceCell(cell, rowMutations); + if (mirrorSourceCellUnlessShadowed(cell, targetHTable, rowState)) { + mirrored++; + } } - rowMutations.flush(pendingPuts, pendingDeletes); + rowState.flush(pendingPuts, pendingDeletes); + if (mirrored == 0) { + return WholeRowMirrorOutcome.FULLY_SHADOWED; + } + return rowState.anyCellUnrepairable ? WholeRowMirrorOutcome.PARTIALLY_MIRRORED + : WholeRowMirrorOutcome.FULLY_MIRRORED; } /** @@ -938,40 +1265,75 @@ private void mirrorWholeRow(Result sourceResult, List pendingPuts, * the row was already entirely tombstones — repair could not act on it, and the * caller should record this as {@link SyncCounters#ROWS_CANNOT_REPAIR}. */ - private int tombstoneWholeRow(Result targetResult, List pendingPuts, - List pendingDeletes) { - RowRepairMutations rowMutations = new RowRepairMutations(targetResult.getRow()); + private int tombstoneWholeRow(Result targetResult, Table targetHTable, + List pendingPuts, List pendingDeletes) throws IOException { + RowRepairState rowState = new RowRepairState(targetResult.getRow()); + // Source has no row at all here, so source has no cell at any column — every target + // cell hits the "shadow ts <= T" path inside tombstoneTargetCell. + Map sourceMaxTsByColumn = Collections.emptyMap(); int liveCellsTombstoned = 0; for (Cell cell : targetResult.rawCells()) { - if (tombstoneTargetCell(cell, rowMutations)) { + if (tombstoneTargetCell(cell, targetHTable, rowState, sourceMaxTsByColumn)) { liveCellsTombstoned++; } } - rowMutations.flush(pendingPuts, pendingDeletes); + rowState.flush(pendingPuts, pendingDeletes); return liveCellsTombstoned; } /** * Diffs cells of two rows present on both clusters in lock-step using {@link CellComparator} * order and appends the resulting {@link Put}/{@link Delete} mutations (if any) to the - * pending lists. Returns a {@link CellDriftCounts} classifying the cell-level drift: + * pending lists. Returns a {@link RowDiffOutcome} carrying both the cell-level drift + * counts and a flag indicating whether any source-side mirror was suppressed by target + * tombstone shadowing — letting the caller decide whether to bump + * {@link SyncCounters#ROWS_CANNOT_REPAIR}. * + * Branches: * same coords + matching value → no drift, no signal - * same coords + different value → different++; mirror source cell - * source-only cell at unique coords → missing++; mirror source cell + * same coords + different value → different++; mirror source cell (shadow-checked) + * source-only cell at unique coords → missing++; mirror source cell (shadow-checked) * target-only live cell at unique coords → extra++; tombstone target cell * target-only tombstone cell → skip (HBase cannot remove tombstones) + * + * Cells whose mirror is suppressed by shadowing do NOT bump the cell counter — the + * cell wasn't written to target, so it isn't repaired drift. The row-level shadow + * signal is surfaced via {@link RowDiffOutcome#rowCannotRepair}. */ - private CellDriftCounts diffCellsForRow(Result sourceResult, Result targetResult, - List pendingPuts, List pendingDeletes) throws IOException { + private RowDiffOutcome diffCellsForRow(Result sourceResult, Result targetResult, + Table targetHTable, List pendingPuts, List pendingDeletes) throws IOException { Cell[] sourceCells = sourceResult.rawCells(); Cell[] targetCells = targetResult.rawCells(); CellComparator comparator = CellComparator.getInstance(); - RowRepairMutations rowMutations = new RowRepairMutations(sourceResult.getRow()); - int missing = 0; - int extra = 0; - int different = 0; + // Always use the lazy raw single-row fetch path for shadow detection. Even under + // --raw-scan, the merge-scan's targetCells came from a time-range-filtered scan and + // would not surface tombstones at ts > toTime — and those out-of-window tombstones + // can still shadow Puts we mirror inside the window during application reads. The + // lazy fetch in {@link RowRepairState#tombstones} loads tombstones at ts >= fromTime + // regardless of upper bound, catching that case correctly. + RowRepairState rowState = new RowRepairState(sourceResult.getRow()); + + // Pre-compute the set of (cf, q) coordinates source has any cell at. tombstoneTargetCell + // uses this to choose between point-Delete (when source has the column at some ts — + // mirrored cell will surface on read) and DeleteColumn (when source has nothing at the + // column — must shadow ts <= T to keep max-versions-hidden older versions invisible. + // The map records source's max ts at each column. When tombstoning a target cell at a + // column source has at lower ts, hidden target Puts in (sourceMax, targetTs) need + // their own point Deletes too — otherwise after the visible cell is shadowed, a hidden + // version surfaces above source's mirror. + Map sourceMaxTsByColumn = new HashMap<>(); + for (Cell sourceCell : sourceCells) { + if (!CellUtil.isDelete(sourceCell)) { + ColumnKey key = new ColumnKey(CellUtil.cloneFamily(sourceCell), + CellUtil.cloneQualifier(sourceCell)); + sourceMaxTsByColumn.merge(key, sourceCell.getTimestamp(), Math::max); + } + } + + int cellMissing = 0; + int cellExtra = 0; + int cellDifferent = 0; int sourceIdx = 0; int targetIdx = 0; @@ -980,48 +1342,119 @@ private CellDriftCounts diffCellsForRow(Result sourceResult, Result targetResult if (cmp == 0) { // Same coordinates; CellComparator does not compare values, check separately. if (!CellUtil.matchingValue(sourceCells[sourceIdx], targetCells[targetIdx])) { - mirrorSourceCell(sourceCells[sourceIdx], rowMutations); - different++; + if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) { + cellDifferent++; + } } sourceIdx++; targetIdx++; } else if (cmp < 0) { - mirrorSourceCell(sourceCells[sourceIdx++], rowMutations); - missing++; - } else if (tombstoneTargetCell(targetCells[targetIdx++], rowMutations)) { - extra++; + if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) { + cellMissing++; + } + sourceIdx++; + } else { + // Target-only cell. Live cell → tombstone it (cellExtra++). Tombstone cell → + // can't act on it (HBase has no API to remove tombstones), so the row carries + // unrepairable drift and re-verify will mismatch. + if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowState, + sourceMaxTsByColumn)) { + cellExtra++; + } else { + rowState.anyCellUnrepairable = true; + } } } while (sourceIdx < sourceCells.length) { - mirrorSourceCell(sourceCells[sourceIdx++], rowMutations); - missing++; + if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) { + cellMissing++; + } + sourceIdx++; } while (targetIdx < targetCells.length) { - if (tombstoneTargetCell(targetCells[targetIdx++], rowMutations)) { - extra++; + if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowState, + sourceMaxTsByColumn)) { + cellExtra++; + } else { + rowState.anyCellUnrepairable = true; } } - if (missing == 0 && extra == 0 && different == 0) { - return CellDriftCounts.NONE; + if (cellMissing == 0 && cellExtra == 0 && cellDifferent == 0 && !rowState.anyCellUnrepairable) { + return RowDiffOutcome.NONE; } - rowMutations.flush(pendingPuts, pendingDeletes); - return new CellDriftCounts(missing, extra, different); + rowState.flush(pendingPuts, pendingDeletes); + return new RowDiffOutcome(new CellDriftCounts(cellMissing, cellExtra, cellDifferent), + rowState.anyCellUnrepairable); } /** - * Flushes the accumulated Put and Delete batches to the target HTable and clears both - * lists. Called every {@code repairBatchSize} rows and once more at the end of a chunk. + * Flushes the accumulated Put and Delete batches to the target HTable as a single mixed + * RPC and clears both lists. Called every {@code repairBatchSize} rows and once more at + * the end of a chunk. + * + * Issuing both Puts and Deletes via {@link Table#batch} (one network round-trip) instead + * of separate {@code put()} + {@code delete()} calls (two round-trips) eliminates the + * inter-RPC failure window where a JVM/regionserver crash between the two would leave + * target with Puts applied but matching Deletes not yet tombstoned. Server-side, batch + * mutations are still applied per-row sequentially, so a regionserver crash mid-batch + * can still leave partial application — but the mid-flush gap on the client side is + * gone. + * + * {@link InterruptedException} from {@code batch} is converted to {@link IOException} + * so the existing per-chunk catch path treats interruption like any other transient + * write failure (chunk → REPAIR_FAILED, retry on next invocation). The interrupt flag is + * restored so an outer cancellation still observes the interrupted state. */ private void flushRepairMutations(Table targetHTable, List puts, List deletes) throws IOException { - if (!puts.isEmpty()) { - targetHTable.put(puts); - puts.clear(); + if (puts.isEmpty() && deletes.isEmpty()) { + return; } - if (!deletes.isEmpty()) { - targetHTable.delete(deletes); - deletes.clear(); + List mutations = new ArrayList<>(puts.size() + deletes.size()); + mutations.addAll(puts); + mutations.addAll(deletes); + Object[] results = new Object[mutations.size()]; + try { + targetHTable.batch(mutations, results); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while flushing repair mutations", e); + } + puts.clear(); + deletes.clear(); + } + + /** + * Writes a chunk-level checkpoint row and bumps the matching outcome counter, as a single + * "this attempt is recorded" unit. The outcome counter is bumped only after a successful + * checkpoint write, so on-disk audit and in-memory counters stay in sync. + * + * If the checkpoint write throws {@link SQLException}, the failure is logged and the + * {@link SyncCounters#CHECKPOINT_WRITE_FAILED} counter is bumped, but the exception is + * NOT propagated. Reasons: + *
    + *
  • Target's data was already mutated during the merge — failing the mapper task + * wouldn't roll that back, and would trigger a MapReduce retry that re-verifies + * against already-mutated target state (audit trail loss).
  • + *
  • Other chunks in this mapper still deserve a chance to be processed.
  • + *
  • The {@code CHECKPOINT_WRITE_FAILED} counter surfaces the audit-row gap to + * operators and drives a non-zero exit at job end.
  • + *
+ */ + private void writeChunkCheckpoint(PhoenixSyncTableCheckpointOutputRow row, + SyncCounters outcomeCounter, Context context) { + try { + syncTableOutputRepository.checkpointSyncTableResult(row); + context.getCounter(outcomeCounter).increment(1); + } catch (SQLException e) { + LOGGER.error( + "Failed to write {} checkpoint for chunk source=[{}, {}] on table {}: target data " + + "was mutated during the merge, but no checkpoint row will exist for this chunk. " + + "CHECKPOINT_WRITE_FAILED counter is incremented; mapper continues.", + row.getStatus(), Bytes.toStringBinary(row.getStartRowKey()), + Bytes.toStringBinary(row.getEndRowKey()), tableName, e); + context.getCounter(SyncCounters.CHECKPOINT_WRITE_FAILED).increment(1); } } @@ -1093,16 +1526,30 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar // Drift signals are bumped at the branch that semantically caused them: row-level // signals at the cmp != 0 branches, cell-level signals at the cmp == 0 branch. + // ROWS_CANNOT_REPAIR is bumped whenever any source mirror is suppressed by an + // existing target tombstone (or the cmp > 0 row was already entirely tombstones). if (cmp == 0) { // Same row key on both clusters — diff at cell level and repair only if cells differ. - driftCounters.addCellDrift( - diffCellsForRow(sourceResult, targetResult, pendingPuts, pendingDeletes)); + RowDiffOutcome outcome = diffCellsForRow(sourceResult, targetResult, targetHTable, + pendingPuts, pendingDeletes); + driftCounters.addCellDrift(outcome.cells); + if (outcome.rowCannotRepair) { + driftCounters.rowsCannotRepair++; + } sourceResult = sourceScanner.next(); targetResult = targetScanner.next(); } else if (cmp < 0) { - // Source-only row — mirror it onto target. - mirrorWholeRow(sourceResult, pendingPuts, pendingDeletes); - driftCounters.rowsMissingOnTarget++; + // Source-only row — mirror it onto target. Even though target's filtered scan + // returned no row at this key, target may carry tombstones that would shadow + // some or all of the Puts. + WholeRowMirrorOutcome outcome = + mirrorWholeRow(sourceResult, targetHTable, pendingPuts, pendingDeletes); + if (outcome != WholeRowMirrorOutcome.FULLY_SHADOWED) { + driftCounters.rowsMissingOnTarget++; + } + if (outcome != WholeRowMirrorOutcome.FULLY_MIRRORED) { + driftCounters.rowsCannotRepair++; + } sourceResult = sourceScanner.next(); } else { // Target-only row — tombstone its live cells. If the row is already entirely @@ -1110,7 +1557,7 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar // major compaction reaps them) — record as ROWS_CANNOT_REPAIR so operators can // see the unrepairable drift volume. int liveCellsTombstoned = - tombstoneWholeRow(targetResult, pendingPuts, pendingDeletes); + tombstoneWholeRow(targetResult, targetHTable, pendingPuts, pendingDeletes); if (liveCellsTombstoned == 0) { driftCounters.rowsCannotRepair++; } else { @@ -1133,42 +1580,48 @@ private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStar LOGGER.error("Repair failed for chunk source=[{}, {}] on table {}: {}", Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), tableName, e.getMessage(), e); - context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).increment(1); Timestamp failedAt = new Timestamp(System.currentTimeMillis()); // Capture partial progress in the COUNTERS column for triage. String failedCounters = driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows); - syncTableOutputRepository - .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() - .setTableName(tableName).setTargetCluster(targetZkQuorum) - .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime) - .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun) - .setStartRowKey(sourceStart).setEndRowKey(sourceEnd) - .setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED) - .setExecutionStartTime(verifyStartTime).setExecutionEndTime(failedAt) - .setCounters(failedCounters).build()); + writeChunkCheckpoint(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetZkQuorum) + .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime) + .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun) + .setStartRowKey(sourceStart).setEndRowKey(sourceEnd) + .setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED) + .setExecutionStartTime(verifyStartTime).setExecutionEndTime(failedAt) + .setCounters(failedCounters).build(), SyncCounters.CHUNKS_REPAIR_FAILED, context); return; } driftCounters.commitTo(context); - context.getCounter(SyncCounters.CHUNKS_REPAIRED).increment(1); + // Chunk transitions to UNREPAIRABLE if any row landed in ROWS_CANNOT_REPAIR — operator + // intervention (typically major compaction on target to reap shadowing tombstones) is + // needed before re-running the tool. Otherwise REPAIRED. + boolean unrepairable = driftCounters.rowsCannotRepair > 0; + PhoenixSyncTableCheckpointOutputRow.Status status = unrepairable + ? PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE + : PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED; Timestamp repairEndTime = new Timestamp(System.currentTimeMillis()); String repairCounters = driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows); - syncTableOutputRepository - .checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() - .setTableName(tableName).setTargetCluster(targetZkQuorum) - .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime) - .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun).setStartRowKey(sourceStart) - .setEndRowKey(sourceEnd).setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED) - .setExecutionStartTime(verifyStartTime).setExecutionEndTime(repairEndTime) - .setCounters(repairCounters).build()); - - LOGGER.info("Completed repair for chunk source=[{}, {}]: {}", - Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), + // Write checkpoint first; outcome counter is bumped inside writeChunkCheckpoint only + // on a successful write so the audit row and the counter stay consistent. + writeChunkCheckpoint(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetZkQuorum) + .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime) + .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun).setStartRowKey(sourceStart) + .setEndRowKey(sourceEnd).setStatus(status) + .setExecutionStartTime(verifyStartTime).setExecutionEndTime(repairEndTime) + .setCounters(repairCounters).build(), + unrepairable ? SyncCounters.CHUNKS_UNREPAIRABLE : SyncCounters.CHUNKS_REPAIRED, context); + + LOGGER.info("Completed repair for chunk source=[{}, {}] with status={}: {}", + Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), status, driftCounters.toLogString()); } diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java index 8ec66abf7ca..b939a064472 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java @@ -415,6 +415,8 @@ private boolean submitPhoenixSyncTableJob() throws Exception { counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_MISMATCHED).getValue(); long repairedMappers = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIRED).getValue(); + long unrepairableMappers = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_UNREPAIRABLE).getValue(); long repairFailedMappers = counters .findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIR_FAILED).getValue(); long chunksVerified = @@ -423,6 +425,8 @@ private boolean submitPhoenixSyncTableJob() throws Exception { counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_MISMATCHED).getValue(); long chunksRepaired = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIRED).getValue(); + long chunksUnrepairable = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_UNREPAIRABLE).getValue(); long chunksRepairFailed = counters .findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); long sourceRowsProcessed = @@ -441,19 +445,35 @@ private boolean submitPhoenixSyncTableJob() throws Exception { .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET).getValue(); long cellsDifferentOnTarget = counters .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue(); + long checkpointWriteFailed = counters + .findCounter(PhoenixSyncTableMapper.SyncCounters.CHECKPOINT_WRITE_FAILED).getValue(); LOGGER.info( "PhoenixSyncTable job completed, gathered counters are \n Task Created: {}, \n Verified Mappers: {}, \n" - + "Mismatched Mappers: {}, \n Repaired Mappers: {}, \n Repair Failed Mappers: {}, \n" + + "Mismatched Mappers: {}, \n Repaired Mappers: {}, \n Unrepairable Mappers: {}, \n" + + "Repair Failed Mappers: {}, \n" + "Chunks Verified: {}, \n Chunks Mismatched: {}, \n Chunks Repaired: {}, \n" - + "Chunks Repair Failed: {}, \n Source Rows Processed: {}, \n Target Rows Processed: {}, \n" + + "Chunks Unrepairable: {}, \n Chunks Repair Failed: {}, \n" + + "Source Rows Processed: {}, \n Target Rows Processed: {}, \n" + "Rows Missing On Target: {}, \n Rows Extra On Target: {}, \n" + "Rows Cannot Repair: {}, \n" + "Cells Missing On Target: {}, \n Cells Extra On Target: {}, \n" - + "Cells Different On Target: {}", - taskCreated, verifiedMappers, mismatchedMappers, repairedMappers, repairFailedMappers, - chunksVerified, chunksMismatched, chunksRepaired, chunksRepairFailed, sourceRowsProcessed, - targetRowsProcessed, rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair, - cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget); + + "Cells Different On Target: {}, \n" + + "Checkpoint Write Failed: {}", + taskCreated, verifiedMappers, mismatchedMappers, repairedMappers, unrepairableMappers, + repairFailedMappers, chunksVerified, chunksMismatched, chunksRepaired, chunksUnrepairable, + chunksRepairFailed, sourceRowsProcessed, targetRowsProcessed, rowsMissingOnTarget, + rowsExtraOnTarget, rowsCannotRepair, cellsMissingOnTarget, cellsExtraOnTarget, + cellsDifferentOnTarget, checkpointWriteFailed); + if (checkpointWriteFailed > 0) { + LOGGER.error( + "{} chunk(s) had a successful repair attempt but FAILED to write a checkpoint row " + + "for table {}. Target data was mutated but the audit trail is incomplete. " + + "Investigate the checkpoint table state before relying on it; affected chunks " + + "will be re-attempted on the next invocation since they have no terminal " + + "checkpoint status.", + checkpointWriteFailed, qTable); + return false; + } } else { LOGGER.warn("Unable to retrieve job counters for table {} - job may have failed " + "during initialization", qTable); From b204914773f976e01d507b7566adc9b3bfaf7c9c Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Wed, 3 Jun 2026 20:19:35 +0530 Subject: [PATCH 09/18] checkpointing test implementation --- .../PhoenixSyncTableCheckpointOutputRow.java | 95 +- .../PhoenixSyncTableChunkRepairer.java | 1015 +++++++++++++++++ .../mapreduce/PhoenixSyncTableMapper.java | 977 ++-------------- 3 files changed, 1163 insertions(+), 924 deletions(-) create mode 100644 phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java index 86574326ad2..ef3abbbeb55 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java @@ -146,12 +146,14 @@ public String getCounters() { @VisibleForTesting public long getSourceRowsProcessed() { - return CounterFormatter.parseSourceRows(counters); + return CounterFormatter.parseCounterValue(counters, + PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name()); } @VisibleForTesting public long getTargetRowsProcessed() { - return CounterFormatter.parseTargetRows(counters); + return CounterFormatter.parseCounterValue(counters, + PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name()); } /** @@ -160,49 +162,25 @@ public long getTargetRowsProcessed() { */ public static class CounterFormatter { private static final String FORMAT_CHUNK = - "%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d"; - private static final String FORMAT_MAPPER = "%s=%d,%s=%d,%s=%d,%s=%d"; + "%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d"; + private static final String FORMAT_MAPPER = + "%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d"; /** - * Formats chunk counters as comma-separated key=value pairs. Always emits all eight - * counters; for verify-only chunks (no repair) the six drift counters are 0 so - * operators querying the checkpoint table see a uniform format. - * - * Drift signals partition into two layers: - * row-level — whole row missing or extra on target, or unrepairable (target row is - * entirely tombstones — HBase cannot remove tombstones, only major - * compaction does) - * cell-level — for rows present on both clusters, individual cells missing, extra, - * or differing in value at matching coordinates - * The two layers are disjoint per row: a row drift case contributes only to row - * counters; a cell drift case contributes only to cell counters. - * - * @param sourceRows Source rows processed - * @param targetRows Target rows processed - * @param rowsMissingOnTarget Rows present on source but missing on target (0 if not - * repaired) - * @param rowsExtraOnTarget Rows present on target but missing on source whose live - * cells repair tombstoned (0 if not repaired) - * @param rowsCannotRepair Rows present on target but missing on source whose contents - * are entirely tombstones — repair cannot act on them and an - * operator-driven major compaction is required to make the - * verifier converge under {@code --raw-scan} - * @param cellsMissingOnTarget Cells (across rows present on both sides) that source had - * at coordinates target lacked (0 if not repaired) - * @param cellsExtraOnTarget Cells (across rows present on both sides) that target had - * at coordinates source lacked (0 if not repaired) - * @param cellsDifferentOnTarget Cells (across rows present on both sides) at matching - * coordinates whose values differed (0 if not repaired) - * @return Formatted string with all eight counters + * Formats chunk counters as comma-separated key=value pairs. Always emits all nine + * counters; unpopulated counters are 0 so operators querying the checkpoint table see + * a uniform format. {@code ROWS_DIFFERENT_ON_TARGET} is populated only in dry-run; + * cell-level counters and {@code ROWS_CANNOT_REPAIR} are populated only in repair mode. */ public static String formatChunk(long sourceRows, long targetRows, long rowsMissingOnTarget, - long rowsExtraOnTarget, long rowsCannotRepair, long cellsMissingOnTarget, - long cellsExtraOnTarget, long cellsDifferentOnTarget) { + long rowsExtraOnTarget, long rowsDifferentOnTarget, long rowsCannotRepair, + long cellsMissingOnTarget, long cellsExtraOnTarget, long cellsDifferentOnTarget) { return String.format(FORMAT_CHUNK, PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name(), sourceRows, PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows, PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET.name(), rowsMissingOnTarget, PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET.name(), rowsExtraOnTarget, + PhoenixSyncTableMapper.SyncCounters.ROWS_DIFFERENT_ON_TARGET.name(), rowsDifferentOnTarget, PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR.name(), rowsCannotRepair, PhoenixSyncTableMapper.SyncCounters.CELLS_MISSING_ON_TARGET.name(), cellsMissingOnTarget, PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET.name(), cellsExtraOnTarget, @@ -211,40 +189,27 @@ public static String formatChunk(long sourceRows, long targetRows, long rowsMiss } /** - * Formats mapper counters as comma-separated key=value pairs. - * @param chunksVerified Chunks verified count - * @param chunksMismatched Chunks mismatched count - * @param sourceRows Source rows processed - * @param targetRows Target rows processed - * @return Formatted string with all mapper counters + * Formats mapper (region-level) counters as comma-separated key=value pairs. The seven + * drift counters are the per-region sum of the same fields emitted by + * {@link #formatChunk}. */ public static String formatMapper(long chunksVerified, long chunksMismatched, long sourceRows, - long targetRows) { + long targetRows, long rowsMissingOnTarget, long rowsExtraOnTarget, + long rowsDifferentOnTarget, long rowsCannotRepair, long cellsMissingOnTarget, + long cellsExtraOnTarget, long cellsDifferentOnTarget) { return String.format(FORMAT_MAPPER, PhoenixSyncTableMapper.SyncCounters.CHUNKS_VERIFIED.name(), chunksVerified, PhoenixSyncTableMapper.SyncCounters.CHUNKS_MISMATCHED.name(), chunksMismatched, PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name(), sourceRows, - PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows); - } - - /** - * Parses SOURCE_ROWS_PROCESSED value from counter string. - * @param counters Counter string in format "KEY1=val1,KEY2=val2,..." - * @return Source rows processed, or 0 if not found - */ - public static long parseSourceRows(String counters) { - return parseCounterValue(counters, - PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED.name()); - } - - /** - * Parses TARGET_ROWS_PROCESSED value from counter string. - * @param counters Counter string in format "KEY1=val1,KEY2=val2,..." - * @return Target rows processed, or 0 if not found - */ - public static long parseTargetRows(String counters) { - return parseCounterValue(counters, - PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name()); + PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED.name(), targetRows, + PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET.name(), rowsMissingOnTarget, + PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET.name(), rowsExtraOnTarget, + PhoenixSyncTableMapper.SyncCounters.ROWS_DIFFERENT_ON_TARGET.name(), rowsDifferentOnTarget, + PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR.name(), rowsCannotRepair, + PhoenixSyncTableMapper.SyncCounters.CELLS_MISSING_ON_TARGET.name(), cellsMissingOnTarget, + PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET.name(), cellsExtraOnTarget, + PhoenixSyncTableMapper.SyncCounters.CELLS_DIFFERENT_ON_TARGET.name(), + cellsDifferentOnTarget); } /** @@ -253,7 +218,7 @@ public static long parseTargetRows(String counters) { * @param counterName Name of the counter to extract * @return Counter value, or 0 if not found */ - private static long parseCounterValue(String counters, String counterName) { + public static long parseCounterValue(String counters, String counterName) { if (counters == null || counters.isEmpty()) { return 0; } diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java new file mode 100644 index 00000000000..6a1da24bc45 --- /dev/null +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java @@ -0,0 +1,1015 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.mapreduce; + +import static org.apache.phoenix.schema.types.PDataType.TRUE_BYTES; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeMap; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Row; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.util.Progressable; +import org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants; +import org.apache.phoenix.jdbc.PhoenixConnection; +import org.apache.phoenix.schema.PTable; +import org.apache.phoenix.util.ScanUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Performs row-level repair for a mismatched chunk by merge-scanning source and target + * cluster data and applying targeted mutations to target. + * + *

The two scan ranges may differ: the verifier reads target over a wider range than + * source (covers extra-on-target rows that fall between consecutive source chunks); + * repair must mirror the same boundaries so those extras are visible here as + * {@code cmp > 0} rows and get deleted. + * + *

Merge-scan contract: both scanners return rows in ascending key order (HBase guarantee). + *

    + *
  • {@code cmp == 0} (same row): compare cells; repair only differing cells.
  • + *
  • {@code cmp < 0} (source-only): mirror all source cells onto target.
  • + *
  • {@code cmp > 0} (target-only): tombstone target cells within {@code [fromTime, toTime]}.
  • + *
+ * Cells outside {@code [fromTime, toTime]} are never read (scan time range), so never mutated. + * + *

Tombstone semantics: HBase has four tombstone subtypes ({@code Delete}, + * {@code DeleteColumn}, {@code DeleteFamily}, {@code DeleteFamilyVersion}). Source Puts + * we mirror onto target may be silently shadowed by an existing target tombstone; in that + * case the mirror is suppressed and the row carries unrepairable drift (operator must + * major-compact target to reap shadowing tombstones before a re-run can converge). See + * {@link TargetRowRecord}. + */ +public final class PhoenixSyncTableChunkRepairer { + + private static final Logger LOGGER = + LoggerFactory.getLogger(PhoenixSyncTableChunkRepairer.class); + + private final Connection sourceConnection; + private final Connection targetConnection; + private final PTable pTable; + private final byte[] physicalTableName; + private final long fromTime; + private final long toTime; + private final boolean isRawScan; + private final boolean isReadAllVersions; + private final int repairBatchSize; + private final String tableName; + + public PhoenixSyncTableChunkRepairer(Connection sourceConnection, Connection targetConnection, + PTable pTable, byte[] physicalTableName, String tableName, long fromTime, long toTime, + boolean isRawScan, boolean isReadAllVersions, int repairBatchSize) { + this.sourceConnection = sourceConnection; + this.targetConnection = targetConnection; + this.pTable = pTable; + this.physicalTableName = physicalTableName; + this.tableName = tableName; + this.fromTime = fromTime; + this.toTime = toTime; + this.isRawScan = isRawScan; + this.isReadAllVersions = isReadAllVersions; + this.repairBatchSize = repairBatchSize; + } + + /** + * Repairs one mismatched chunk. Returns a {@link ChunkRepairResult} carrying the + * terminal status and accumulated {@link DriftCounters}; never throws on per-chunk + * scan/flush failure (returns {@link ChunkRepairResult.Status#REPAIR_FAILED}). The + * only declared {@link SQLException} surfaces from {@link Connection#unwrap}, which + * indicates a misconfigured connection rather than a per-chunk fault. + */ + public ChunkRepairResult repair(ChunkRepairRequest req, Progressable progress) + throws SQLException { + DriftCounters drift = new DriftCounters(); + + LOGGER.info("Starting repair for chunk source=[{}, {}] target={}{}, {}{} on table {}", + Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd), + req.targetStartInclusive ? "[" : "(", Bytes.toStringBinary(req.targetStart), + Bytes.toStringBinary(req.targetEnd), req.targetEndInclusive ? "]" : ")", tableName); + + PhoenixConnection sourcePhoenixConn = sourceConnection.unwrap(PhoenixConnection.class); + PhoenixConnection targetPhoenixConn = targetConnection.unwrap(PhoenixConnection.class); + + Scan sourceScan; + Scan targetScan; + try { + sourceScan = createRepairScan(req.sourceStart, req.sourceEnd, true, true, sourcePhoenixConn); + targetScan = createRepairScan(req.targetStart, req.targetEnd, req.targetStartInclusive, + req.targetEndInclusive, targetPhoenixConn); + } catch (IOException e) { + LOGGER.error("Repair failed to build scans for chunk source=[{}, {}] on table {}: {}", + Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd), tableName, + e.getMessage(), e); + return ChunkRepairResult.failed(drift, e); + } + + try (Table sourceHTable = sourcePhoenixConn.getQueryServices().getTable(physicalTableName); + Table targetHTable = targetPhoenixConn.getQueryServices().getTable(physicalTableName); + ResultScanner sourceScanner = sourceHTable.getScanner(sourceScan); + ResultScanner targetScanner = targetHTable.getScanner(targetScan)) { + if (req.dryRun) { + walkAndCountDrift(sourceScanner, targetScanner, drift, progress); + } else { + repairDiffRows(sourceScanner, targetScanner, targetHTable, drift, progress); + } + } catch (IOException e) { + // Per-chunk fault isolation. The mapper marks this chunk REPAIR_FAILED and continues + // with the next chunk + LOGGER.error("Repair failed for chunk source=[{}, {}] on table {}: {}", + Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd), tableName, + e.getMessage(), e); + return ChunkRepairResult.failed(drift, e); + } + + ChunkRepairResult result = ChunkRepairResult.completed(drift); + LOGGER.info("Completed repair for chunk source=[{}, {}] with status={}: {}", + Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd), + result.status, drift.toLogString()); + return result; + } + + /** + * Dry-run merge-walk: bumps the three row-level drift counters and logs each diverged row; + * never touches target. {@code rowsDifferentOnTarget} flags rows present on both sides whose + * contents differ — verifier-only signal, not produced in repair mode (which reports cell + * granularity instead). + */ + private void walkAndCountDrift(ResultScanner sourceScanner, ResultScanner targetScanner, + DriftCounters drift, Progressable progress) throws IOException { + Result sourceResult = sourceScanner.next(); + Result targetResult = targetScanner.next(); + + while (sourceResult != null || targetResult != null) { + int cmp = compareRowKeys(sourceResult, targetResult); + if (cmp == 0) { + if (!rowsEqual(sourceResult, targetResult)) { + drift.rowsDifferentOnTarget++; + LOGGER.warn("Row different on target for table {} row={}", tableName, + Bytes.toStringBinary(sourceResult.getRow())); + } + sourceResult = sourceScanner.next(); + targetResult = targetScanner.next(); + } else if (cmp < 0) { + drift.rowsMissingOnTarget++; + LOGGER.warn("Row missing on target for table {} row={}", tableName, + Bytes.toStringBinary(sourceResult.getRow())); + sourceResult = sourceScanner.next(); + } else { + drift.rowsExtraOnTarget++; + LOGGER.warn("Row extra on target for table {} row={}", tableName, + Bytes.toStringBinary(targetResult.getRow())); + targetResult = targetScanner.next(); + } + if (progress != null) { + progress.progress(); + } + } + } + + /** + * Repair-mode merge-walk: resolves drift by emitting mutations into pending batches, flushing + * each time the batch reaches {@link #repairBatchSize}, and finally draining the tail. Per + * branch: + *

    + *
  • {@code cmp == 0} — diff cells; record cell-level drift and any row-unrepairable + * flag.
  • + *
  • {@code cmp < 0} — mirror the source row onto target; bump {@code rowsMissing} unless + * the whole row was shadowed, and {@code rowsCannotRepair} unless every cell was + * mirrored.
  • + *
  • {@code cmp > 0} — tombstone the extra row on target; bump {@code rowsExtra} when at + * least one live cell was tombstoned, else {@code rowsCannotRepair} (row was already + * all tombstones).
  • + *
+ */ + private void repairDiffRows(ResultScanner sourceScanner, ResultScanner targetScanner, + Table targetHTable, DriftCounters drift, Progressable progress) throws IOException { + List pendingPuts = new ArrayList<>(); + List pendingDeletes = new ArrayList<>(); + Result sourceResult = sourceScanner.next(); + Result targetResult = targetScanner.next(); + + while (sourceResult != null || targetResult != null) { + int cmp = compareRowKeys(sourceResult, targetResult); + if (cmp == 0) { + RowDriftInfo rowDriftInfo = generateMutationForDiffCells(sourceResult, targetResult, + targetHTable, pendingPuts, pendingDeletes); + drift.addCellDrift(rowDriftInfo.cells); + if (rowDriftInfo.rowCannotRepair) { + drift.rowsCannotRepair++; + } + if (rowDriftInfo != RowDriftInfo.NONE) { + LOGGER.warn( + "Row mismatch on table {} row={}: cell drift missing={}, extra={}, different={}, " + + "rowCannotRepair={}", + tableName, Bytes.toStringBinary(sourceResult.getRow()), rowDriftInfo.cells.missing, + rowDriftInfo.cells.extra, rowDriftInfo.cells.different, rowDriftInfo.rowCannotRepair); + } + sourceResult = sourceScanner.next(); + targetResult = targetScanner.next(); + } else if (cmp < 0) { + byte[] missingRowKey = sourceResult.getRow(); + RowMirrorStatus outcome = + mirrorWholeRow(sourceResult, targetHTable, pendingPuts, pendingDeletes); + if (outcome != RowMirrorStatus.FULLY_SHADOWED) { + drift.rowsMissingOnTarget++; + } + if (outcome != RowMirrorStatus.FULLY_MIRRORED) { + drift.rowsCannotRepair++; + } + LOGGER.warn("Row missing on target for table {} row={}: mirrorOutcome={}", tableName, + Bytes.toStringBinary(missingRowKey), outcome); + sourceResult = sourceScanner.next(); + } else { + byte[] extraRowKey = targetResult.getRow(); + int liveCellsTombstoned = + tombstoneWholeRow(targetResult, targetHTable, pendingPuts, pendingDeletes); + if (liveCellsTombstoned == 0) { + drift.rowsCannotRepair++; + } else { + drift.rowsExtraOnTarget++; + } + LOGGER.warn("Row extra on target for table {} row={}: liveCellsTombstoned={}", tableName, + Bytes.toStringBinary(extraRowKey), liveCellsTombstoned); + targetResult = targetScanner.next(); + } + + if (pendingPuts.size() + pendingDeletes.size() >= repairBatchSize) { + flushRepairMutations(targetHTable, pendingPuts, pendingDeletes); + } + if (progress != null) { + progress.progress(); + } + } + flushRepairMutations(targetHTable, pendingPuts, pendingDeletes); + } + + /** + * Compares the row keys of two scanner results; treats a null result as past-end so a + * {@code null/non-null} pair sorts the non-null side first. + */ + private static int compareRowKeys(Result sourceResult, Result targetResult) { + if (sourceResult == null) { + return 1; + } + if (targetResult == null) { + return -1; + } + return Bytes.compareTo(sourceResult.getRow(), targetResult.getRow()); + } + + /** + * Whole-row content equality check used by dry-run row-level diffing. Delegates to + * {@link Result#compareResults(Result, Result, boolean)} which throws on any cell-level + * mismatch (family, qualifier, timestamp, type, value); we map the throw to {@code false} + * so the cmp==0 path can flag the row without producing repair mutations. + */ + private boolean rowsEqual(Result src, Result tgt) { + try { + Result.compareResults(src, tgt, false); + return true; + } catch (Exception e) { + return false; + } + } + + /** + * Mirrors every source cell of a row that is missing on target. Each cell is + * shadow-checked against target's per-row record (see {@link TargetRowRecord}). + */ + private RowMirrorStatus mirrorWholeRow(Result sourceResult, Table targetHTable, + List pendingPuts, List pendingDeletes) throws IOException { + RowRepairBuffer rowRepairBuffer = new RowRepairBuffer(sourceResult.getRow()); + int mirrored = 0; + for (Cell cell : sourceResult.rawCells()) { + if (mirrorSourceCellUnlessShadowed(cell, targetHTable, rowRepairBuffer)) { + mirrored++; + } + } + rowRepairBuffer.flush(pendingPuts, pendingDeletes); + if (mirrored == 0) { + return RowMirrorStatus.FULLY_SHADOWED; + } + return rowRepairBuffer.anyCellUnrepairable ? RowMirrorStatus.PARTIALLY_MIRRORED + : RowMirrorStatus.FULLY_MIRRORED; + } + + /** + * Tombstones every live cell of a row that is extra on target. Skips cells that are + * themselves already tombstones (see {@link #tombstoneTargetCell}). + * + * @return the number of live cells that contributed a tombstone marker. {@code 0} means + * the row was already entirely tombstones; the caller records this as + * {@code ROWS_CANNOT_REPAIR}. + */ + private int tombstoneWholeRow(Result targetResult, Table targetHTable, List pendingPuts, + List pendingDeletes) throws IOException { + RowRepairBuffer rowRepairBuffer = new RowRepairBuffer(targetResult.getRow()); + // Empty source map drives every target cell into tombstoneTargetCell's "no source column" + // branch (DeleteColumn at ts <= T). + Map sourceMaxTsByColumn = Collections.emptyMap(); + int liveCellsTombstoned = 0; + for (Cell cell : targetResult.rawCells()) { + if (tombstoneTargetCell(cell, targetHTable, rowRepairBuffer, sourceMaxTsByColumn)) { + liveCellsTombstoned++; + } + } + rowRepairBuffer.flush(pendingPuts, pendingDeletes); + return liveCellsTombstoned; + } + + /** + * Diffs cells of two rows present on both clusters in lock-step using {@link CellComparator} + * order and emits {@link Put}/{@link Delete} mutations. + * + *

Branches: + *

    + *
  • same coords + matching value → no drift
  • + *
  • same coords + different value → different++; mirror source cell (shadow-checked)
  • + *
  • source-only cell → missing++; mirror source cell (shadow-checked)
  • + *
  • target-only live cell → extra++; tombstone target cell
  • + *
  • target-only tombstone cell → skip; row carries unrepairable drift
  • + *
+ * Mirrors suppressed by shadowing do NOT bump the cell counter (nothing was written); + * the row-level signal flows through {@link RowDriftInfo#rowCannotRepair}. + */ + private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result targetResult, + Table targetHTable, List pendingPuts, List pendingDeletes) throws IOException { + Cell[] sourceCells = sourceResult.rawCells(); + Cell[] targetCells = targetResult.rawCells(); + CellComparator comparator = CellComparator.getInstance(); + RowRepairBuffer rowRepairBuffer = new RowRepairBuffer(sourceResult.getRow()); + + // Per-column max source PUT timestamp; consumed by tombstoneTargetCell to pick the + // tombstone shape for a target-extra cell. Three cases: + // key absent → DeleteColumn at target's ts (wipe hidden older versions too) + // sourceMaxTs >= targetTs → point Delete at target's ts + // sourceMaxTs < targetTs → point Delete at target's ts AND every hidden target Put in + // (sourceMaxTs, targetTs); else hidden version surfaces. + // Math::max collapses multi-version source so the third case only fires when target sits above + // ALL of source's versions. + // Check tombstoneTargetCell() for its usage. + Map sourceMaxTsByColumn = new HashMap<>(); + for (Cell sourceCell : sourceCells) { + if (!CellUtil.isDelete(sourceCell)) { + sourceMaxTsByColumn.merge(ColumnKey.of(sourceCell), sourceCell.getTimestamp(), Math::max); + } + } + + int cellMissing = 0; + int cellExtra = 0; + int cellDifferent = 0; + + int sourceIdx = 0; + int targetIdx = 0; + while (sourceIdx < sourceCells.length && targetIdx < targetCells.length) { + int cmp = comparator.compare(sourceCells[sourceIdx], targetCells[targetIdx]); + if (cmp == 0) { + // Same coordinates, compare values. + if (!CellUtil.matchingValue(sourceCells[sourceIdx], targetCells[targetIdx])) { + if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowRepairBuffer)) { + cellDifferent++; + } + } + sourceIdx++; + targetIdx++; + } else if (cmp < 0) { + // Missing on target + if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowRepairBuffer)) { + cellMissing++; + } + sourceIdx++; + } else { + // extra on target + if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer, + sourceMaxTsByColumn)) { + cellExtra++; + } else { + rowRepairBuffer.anyCellUnrepairable = true; + } + } + } + while (sourceIdx < sourceCells.length) { + if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowRepairBuffer)) { + cellMissing++; + } + sourceIdx++; + } + while (targetIdx < targetCells.length) { + if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer, + sourceMaxTsByColumn)) { + cellExtra++; + } else { + rowRepairBuffer.anyCellUnrepairable = true; + } + } + + if (cellMissing == 0 && cellExtra == 0 && cellDifferent == 0 && !rowRepairBuffer.anyCellUnrepairable) { + return RowDriftInfo.NONE; + } + rowRepairBuffer.flush(pendingPuts, pendingDeletes); + return new RowDriftInfo(new CellDriftCounts(cellMissing, cellExtra, cellDifferent), + rowRepairBuffer.anyCellUnrepairable); + } + + /** + * Routes a source cell to the right mutation kind. Tombstone cells go through + * {@link Delete#add(Cell)} (preserves the exact tombstone subtype); under {@code --raw-scan} + * this matters because {@link Put#add(Cell)} rejects non-Put cells. + */ + private void mirrorSourceCell(Cell cell, RowRepairBuffer rowRepairBuffer) throws IOException { + if (CellUtil.isDelete(cell)) { + rowRepairBuffer.delete().add(cell); + } else { + rowRepairBuffer.put().add(cell); + } + } + + /** + * Mirrors a source cell onto target unless an existing target tombstone would shadow it. + * Shadow detection runs only if source has Put cells; tombstoned source cells always mirror. + * + * @return {@code true} if mirrored, {@code false} if suppressed (caller marks the row + * unrepairable). + */ + private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable, + RowRepairBuffer rowRepairBuffer) throws IOException { + // Source Puts can be shadowed by an existing target tombstone, the Put lands on + // disk but stays invisible to reads, so writing it is wasted work and the row stays + // diverged. e.g. src Put(name, T=200) vs tgt DeleteColumn(name, T=300) covering + // ts<=300. Skip the write and flag the row unrepairable; operator must major-compact + // target to reap the shadow. Source tombstones can't be shadowed, hence skip the check. + if (!CellUtil.isDelete(cell) && rowRepairBuffer.targetRowRecord(targetHTable).wouldShadow(cell)) { + rowRepairBuffer.anyCellUnrepairable = true; + return false; + } + mirrorSourceCell(cell, rowRepairBuffer); + return true; + } + + /** + * Tombstones a target-only cell to make target's read view at this column match source's. + * Skips cells that are themselves already tombstones. + * + *

Tombstone subtype depends on what source has at this {@code (cf, q)}: + *

    + *
  • Source has no cell at this column — use {@link Delete#addColumns} (DeleteColumn, + * scope {@code ts <= T}) so even max-versions-hidden older target versions are + * shadowed.
  • + *
  • Source's max ts at this column is >= target's ts — point-Delete only + * target's exact ts.
  • + *
  • Source's max ts at this column is < target's ts — point-Delete target's + * ts, AND point-Delete every max-versions-hidden Put on target with ts in + * {@code (sourceMaxTs, targetTs)}. Otherwise after target's visible cell is + * shadowed, the next hidden version surfaces above source's mirror.
  • + *
+ * + * @return true if the cell was a live cell that contributed a tombstone marker, false if + * the cell was already a tombstone and was skipped. + */ + private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairBuffer rowRepairBuffer, + Map sourceMaxTsByColumn) throws IOException { + if (CellUtil.isDelete(cell)) { + return false; + } + byte[] family = CellUtil.cloneFamily(cell); + byte[] qualifier = CellUtil.cloneQualifier(cell); + long ts = cell.getTimestamp(); + Long sourceMaxTs = sourceMaxTsByColumn.get(new ColumnKey(family, qualifier)); + if (sourceMaxTs == null) { + rowRepairBuffer.delete().addColumns(family, qualifier, ts); + } else if (sourceMaxTs >= ts) { + rowRepairBuffer.delete().addColumn(family, qualifier, ts); + } else { + rowRepairBuffer.delete().addColumn(family, qualifier, ts); + Set hiddenTs = rowRepairBuffer.targetRowRecord(targetHTable) + .targetPutTimestampsBetween(family, qualifier, sourceMaxTs, ts); + for (Long hidden : hiddenTs) { + rowRepairBuffer.delete().addColumn(family, qualifier, hidden); + } + } + return true; + } + + /** + * Loads target's per-row record with a single raw, all-versions scan. Captures both + * tombstones and Puts: + *
    + *
  • Tombstones drive shadow detection — would a source Put we mirror be + * suppressed by an existing target tombstone on read? e.g. tgt has + * DeleteColumn(NAME)@900, src wants Put(NAME, "alice")@500. Mirror would land + * on disk but stay invisible because DeleteColumn@900 covers ts <= 900. + * Detect upfront, skip the doomed write, flag the row unrepairable.
  • + *
  • Puts drive hidden-version discovery — when target's visible Put is at a + * higher ts than source's mirror, max-versions-filtered older Puts may sit between + * the two and surface above source's mirror after we shadow the visible one. We + * need their timestamps so we can point-Delete each. e.g. tgt has Put(NAME, + * "carol")@T2 visible plus Put(NAME, "bob")@T1 hidden under MAX_VERSIONS=2; src + * has Put(NAME, "alice")@T0. Point-Deleting only T2 exposes T1 above source's T0 + * mirror — wrong; we must point-Delete T1 too so T0 surfaces.
  • + *
+ * raw=true and all-versions are forced regardless of user flags so tombstones and + * hidden Put versions that would otherwise be filtered out are surfaced. + * + *

Time range: lower bound stays at {@code fromTime} since cells below the window + * can't affect repair inside the window; upper bound is {@code MAX_VALUE} because a + * tombstone at {@code ts > toTime} can still shadow a Put we mirror at + * {@code ts in window} during application reads. + */ + private TargetRowRecord loadTargetRowRecord(byte[] rowKey, Table targetHTable) + throws IOException { + Scan scan = new Scan(); + scan.withStartRow(rowKey, true); + scan.withStopRow(rowKey, true); + scan.setRaw(true); + scan.readAllVersions(); + scan.setCacheBlocks(false); + scan.setTimeRange(fromTime, Long.MAX_VALUE); + scan.setCaching(1); + scan.setLimit(1); + TargetRowRecord rowRecord = new TargetRowRecord(); + try (ResultScanner scanner = targetHTable.getScanner(scan)) { + Result raw = scanner.next(); + if (raw != null) { + for (Cell cell : raw.rawCells()) { + rowRecord.record(cell); + } + } + } + return rowRecord; + } + + /** + * Builds a row-level HBase scan for repair. Honors the user's {@code --raw-scan} and + * {@code --read-all-versions} flags; adds bulk caching plus Phoenix TTL / + * {@code IS_STRICT_TTL} attributes so the cells visited here are the same cells the + * verifier hashed. + */ + private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, + boolean isEndKeyInclusive, PhoenixConnection phoenixConn) throws IOException, SQLException { + Scan scan = new Scan(); + scan.withStartRow(startKey, isStartKeyInclusive); + scan.withStopRow(endKey, isEndKeyInclusive); + scan.setRaw(isRawScan); + if (isReadAllVersions) { + scan.readAllVersions(); + } + scan.setCacheBlocks(false); + scan.setTimeRange(fromTime, toTime); + scan.setCaching(1000); + ScanUtil.setScanAttributesForPhoenixTTL(scan, pTable, phoenixConn); + scan.setAttribute(BaseScannerRegionObserverConstants.IS_STRICT_TTL, TRUE_BYTES); + return scan; + } + + /** + * Flushes the accumulated Put and Delete batches to target as a single mixed RPC via + * {@link Table#batch}. The mixed batch (rather than separate {@code put()} + + * {@code delete()} calls) closes the inter-RPC window where a JVM/regionserver crash + * between the two could leave target with Puts applied but matching Deletes missing. + * + *

{@link Table#batch} does NOT throw for partial failures — per-mutation failures + * (e.g. {@code NotServingRegionException} from a region split mid-batch, + * {@code WrongRegionException} from a merge) land in the {@code results} array as + * {@link Throwable} entries. We surface the first such failure as {@link IOException} + * so the caller treats this chunk as {@code REPAIR_FAILED} rather than silently + * marking it {@code REPAIRED}; on re-run the resume filter excludes + * {@code REPAIR_FAILED} and the chunk re-enters as an unprocessed gap. + */ + private void flushRepairMutations(Table targetHTable, List puts, List deletes) + throws IOException { + if (puts.isEmpty() && deletes.isEmpty()) { + return; + } + List mutations = new ArrayList<>(puts.size() + deletes.size()); + mutations.addAll(puts); + mutations.addAll(deletes); + Object[] results = new Object[mutations.size()]; + try { + targetHTable.batch(mutations, results); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while flushing repair mutations", e); + } + int failureCount = 0; + int firstFailureIdx = -1; + for (int i = 0; i < results.length; i++) { + if (results[i] instanceof Throwable) { + failureCount++; + if (firstFailureIdx < 0) { + firstFailureIdx = i; + } + } + } + if (failureCount > 0) { + Throwable firstFailure = (Throwable) results[firstFailureIdx]; + Row failedRow = mutations.get(firstFailureIdx); + throw new IOException(String.format( + "Repair batch had %d/%d mutation failure(s); first failure on row %s: %s", + failureCount, results.length, Bytes.toStringBinary(failedRow.getRow()), + firstFailure.getMessage()), firstFailure); + } + puts.clear(); + deletes.clear(); + } + + // =========================================================================== + // Public input/output value types + // =========================================================================== + + /** + * Inputs to a chunk repair attempt. Source range is the chunk boundary; target range may + * be wider so the repair scan sees the same cells (including extra-on-target rows between + * consecutive source chunks) that the verifier hashed. + * + *

{@link #verifySourceRows} / {@link #verifyTargetRows} are the row counts the verifier + * recorded; threaded into the COUNTERS column on the resulting checkpoint row. + * {@link #verifyStartTime} is the timestamp captured when verification began for this + * chunk; reused as EXECUTION_START_TIME on the REPAIRED/UNREPAIRABLE/REPAIR_FAILED + * checkpoint row so the row spans the full verify+repair lifecycle that overwrites the + * MISMATCHED row. + */ + public static final class ChunkRepairRequest { + public final byte[] sourceStart; + public final byte[] sourceEnd; + public final byte[] targetStart; + public final byte[] targetEnd; + public final boolean targetStartInclusive; + public final boolean targetEndInclusive; + public final long verifySourceRows; + public final long verifyTargetRows; + public final Timestamp verifyStartTime; + public final boolean dryRun; + + public ChunkRepairRequest(byte[] sourceStart, byte[] sourceEnd, byte[] targetStart, + byte[] targetEnd, boolean targetStartInclusive, boolean targetEndInclusive, + long verifySourceRows, long verifyTargetRows, Timestamp verifyStartTime, boolean dryRun) { + this.sourceStart = sourceStart; + this.sourceEnd = sourceEnd; + this.targetStart = targetStart; + this.targetEnd = targetEnd; + this.targetStartInclusive = targetStartInclusive; + this.targetEndInclusive = targetEndInclusive; + this.verifySourceRows = verifySourceRows; + this.verifyTargetRows = verifyTargetRows; + this.verifyStartTime = verifyStartTime; + this.dryRun = dryRun; + } + } + + /** + * Outcome of a chunk repair attempt. Carries the terminal status, accumulated drift + * counters, end-of-attempt timestamp, and the failure exception when status is + * {@link Status#REPAIR_FAILED}. Status precedence (most-severe wins): + * {@link Status#REPAIR_FAILED} > {@link Status#UNREPAIRABLE} > {@link Status#REPAIRED}. + */ + public static final class ChunkRepairResult { + + public enum Status { + /** Every cell-level drift handled and no row landed in ROWS_CANNOT_REPAIR. */ + REPAIRED, + /** + * Repair completed but at least one row carries drift the tool cannot remove — + * a source-side Put was shadow-suppressed by an existing target tombstone, or + * target's row is entirely tombstones. Operator must major-compact target to + * reap shadowing tombstones, then re-run. + */ + UNREPAIRABLE, + /** + * Repair scan or batch flush threw {@link IOException}. Per-chunk fault isolation: + * mapper continues with the next chunk; the chunk re-enters as an unprocessed gap + * on the next invocation. + */ + REPAIR_FAILED + } + + public final Status status; + public final DriftCounters drift; + public final Timestamp endTime; + public final IOException failure; + + private ChunkRepairResult(Status status, DriftCounters drift, Timestamp endTime, + IOException failure) { + this.status = status; + this.drift = drift; + this.endTime = endTime; + this.failure = failure; + } + + static ChunkRepairResult completed(DriftCounters drift) { + Status status = drift.rowsCannotRepair > 0 ? Status.UNREPAIRABLE : Status.REPAIRED; + return new ChunkRepairResult(status, drift, new Timestamp(System.currentTimeMillis()), null); + } + + static ChunkRepairResult failed(DriftCounters drift, IOException failure) { + return new ChunkRepairResult(Status.REPAIR_FAILED, drift, + new Timestamp(System.currentTimeMillis()), failure); + } + } + + /** + * Per-chunk aggregate of six drift counters — three row-level + * ({@code rowsMissingOnTarget}, {@code rowsExtraOnTarget}, {@code rowsCannotRepair}) and + * three cell-level ({@code cellsMissing/Extra/DifferentOnTarget}). Pure accumulator; the + * caller maps fields onto MapReduce job counters and the checkpoint COUNTERS string. + */ + public static final class DriftCounters { + public long rowsMissingOnTarget; + public long rowsExtraOnTarget; + public long rowsDifferentOnTarget; + public long rowsCannotRepair; + public long cellsMissingOnTarget; + public long cellsExtraOnTarget; + public long cellsDifferentOnTarget; + + void addCellDrift(CellDriftCounts cellDrift) { + cellsMissingOnTarget += cellDrift.missing; + cellsExtraOnTarget += cellDrift.extra; + cellsDifferentOnTarget += cellDrift.different; + } + + /** Compact end-of-chunk log line summarizing all drift signals. */ + public String toLogString() { + return String.format( + "rowsMissingOnTarget=%d, rowsExtraOnTarget=%d, rowsDifferentOnTarget=%d, " + + "rowsCannotRepair=%d, cellsMissingOnTarget=%d, cellsExtraOnTarget=%d, " + + "cellsDifferentOnTarget=%d", + rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget, rowsCannotRepair, + cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget); + } + } + + /** + * Per-row capture of target's tombstones AND Puts in {@code [fromTime, MAX_VALUE]}, + * built lazily from a single raw single-row scan with all-versions enabled. Used in two + * roles: + * + *

    + *
  1. Shadow detection ({@link #wouldShadow}): would a source Put we're about + * to mirror be suppressed by an existing target tombstone?
  2. + *
  3. Hidden-version discovery ({@link #targetPutTimestampsBetween}): when + * max-versions on target hides older Puts behind a newer visible one, those hidden + * Puts surface on read the moment we shadow the visible one. We need their + * timestamps so we can point-Delete each. e.g. src has NAME@T0="alice", tgt has + * NAME@T2="carol" visible plus NAME@T1 hidden under max-versions. If we only + * point-Delete T2, target reads return T1 — wrong. Discovering T1 lets us + * point-Delete it too, so source's T0 mirror surfaces.
  4. + *
+ * + * HBase has four tombstone subtypes, each with distinct shadow semantics: + * Delete — shadows a Put at {@code (cf, q, ts == T)} exactly + * DeleteColumn — shadows Puts at {@code (cf, q, ts <= T)} + * DeleteFamily — shadows Puts at {@code (cf, *, ts <= T)} + * DeleteFamilyVersion — shadows Puts at {@code (cf, *, ts == T)} + * {@link #wouldShadow(Cell)} consults all four tombstone maps and returns true on any + * match. + */ + static final class TargetRowRecord { + private final Map> deletePointTs = new HashMap<>(); + private final Map deleteColumnUpperBound = new HashMap<>(); + private final Map deleteFamilyUpperBound = new HashMap<>(); + private final Map> deleteFamilyVersionTs = new HashMap<>(); + /** Per-column ts-ordered set of target's Put timestamps. */ + private final Map> targetPutTs = new HashMap<>(); + + void record(Cell cell) { + if (CellUtil.isDelete(cell)) { + recordTombstone(cell); + } else { + targetPutTs.computeIfAbsent(ColumnKey.of(cell), k -> new TreeMap<>()) + .put(cell.getTimestamp(), Boolean.TRUE); + } + } + + private void recordTombstone(Cell tombstone) { + long ts = tombstone.getTimestamp(); + ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(tombstone)); + switch (tombstone.getType()) { + case Delete: + deletePointTs.computeIfAbsent(ColumnKey.of(tombstone), k -> new HashSet<>()).add(ts); + break; + case DeleteColumn: + deleteColumnUpperBound.merge(ColumnKey.of(tombstone), ts, Math::max); + break; + case DeleteFamily: + deleteFamilyUpperBound.merge(family, ts, Math::max); + break; + case DeleteFamilyVersion: + deleteFamilyVersionTs.computeIfAbsent(family, k -> new HashSet<>()).add(ts); + break; + default: + // Caller filters via CellUtil.isDelete; non-tombstone cells should never reach here. + } + } + + /** Returns true if any tombstone recorded here would shadow a Put at the cell's coords. */ + boolean wouldShadow(Cell sourcePut) { + long ts = sourcePut.getTimestamp(); + ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(sourcePut)); + ColumnKey column = ColumnKey.of(sourcePut); + + // Delete: shadows Put at exactly (cf, q, ts == T). + Set pointTs = deletePointTs.get(column); + if (pointTs != null && pointTs.contains(ts)) { + return true; + } + // DeleteColumn: shadows every Put at (cf, q) with ts <= T. + Long deleteColTs = deleteColumnUpperBound.get(column); + if (deleteColTs != null && ts <= deleteColTs) { + return true; + } + // DeleteFamily: shadows every Put across all qualifiers in cf with ts <= T. + Long deleteFamTs = deleteFamilyUpperBound.get(family); + if (deleteFamTs != null && ts <= deleteFamTs) { + return true; + } + // DeleteFamilyVersion: shadows Puts across all qualifiers in cf at exactly ts == T. + Set dfvTs = deleteFamilyVersionTs.get(family); + return dfvTs != null && dfvTs.contains(ts); + } + + /** + * Returns target's Put timestamps at {@code (cf, q)} that are strictly greater than + * {@code lowerExclusive} and strictly less than {@code upperExclusive}. Used to find + * hidden (max-versions-filtered) target versions sitting between source's max ts and + * target's visible ts so they can be point-Deleted. + */ + Set targetPutTimestampsBetween(byte[] family, byte[] qualifier, long lowerExclusive, + long upperExclusive) { + NavigableMap tss = targetPutTs.get(new ColumnKey(family, qualifier)); + if (tss == null) { + return Collections.emptySet(); + } + return tss.subMap(lowerExclusive, false, upperExclusive, false).keySet(); + } + } + + /** Composite (family, qualifier) key with byte-array equality semantics. */ + static final class ColumnKey { + private final byte[] family; + private final byte[] qualifier; + + ColumnKey(byte[] family, byte[] qualifier) { + this.family = family; + this.qualifier = qualifier; + } + + static ColumnKey of(Cell cell) { + return new ColumnKey(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell)); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof ColumnKey)) { + return false; + } + ColumnKey other = (ColumnKey) o; + return Bytes.equals(family, other.family) && Bytes.equals(qualifier, other.qualifier); + } + + @Override + public int hashCode() { + return Bytes.hashCode(family) * 31 + Bytes.hashCode(qualifier); + } + } + + /** + * Per-row scratch buffer: lazily-built {@link Put}/{@link Delete} mutations, lazily-loaded + * {@link TargetRowRecord}, and an unrepairable-drift flag the caller reads after the + * merge. + */ + final class RowRepairBuffer { + private final byte[] rowKey; + Put put; + Delete delete; + TargetRowRecord targetRowRecord; + boolean anyCellUnrepairable; + + RowRepairBuffer(byte[] rowKey) { + this.rowKey = rowKey; + } + + Put put() { + if (put == null) { + put = new Put(rowKey); + } + return put; + } + + Delete delete() { + if (delete == null) { + delete = new Delete(rowKey); + } + return delete; + } + + /** + * Loads target's per-row record (tombstones + Puts) once per row, cached for the + * lifetime of this buffer. Two consumers: + * + *

Shadow detection (used by {@link #mirrorSourceCell} via + * {@link TargetRowRecord#wouldShadow}). Time range is {@code [fromTime, MAX_VALUE]} — + * a target tombstone outside the user's verify window can still suppress a Put we + * mirror inside it. e.g. tgt has DeleteColumn@900, src wants Put@500, scan timeRange + * [0, 600). Put@500 is masked because DeleteColumn@900 covers ts <= 900. Detecting + * this upfront lets repair skip the doomed write and flag the row unrepairable. + * + *

Hidden-version discovery (used by {@link #tombstoneTargetCell} via + * {@link TargetRowRecord#targetPutTimestampsBetween}). When target's MAX_VERSIONS + * hides older Puts behind a newer visible one, point-Deleting only the visible ts + * exposes the hidden version above source's mirror. The record's all-versions raw + * scan surfaces those hidden timestamps so we can tombstone each. + */ + TargetRowRecord targetRowRecord(Table targetHTable) throws IOException { + if (targetRowRecord == null) { + targetRowRecord = loadTargetRowRecord(rowKey, targetHTable); + } + return targetRowRecord; + } + + void flush(List pendingPuts, List pendingDeletes) { + if (put != null) { + pendingPuts.add(put); + } + if (delete != null) { + pendingDeletes.add(delete); + } + } + } + + /** + * Cell-level drift counts produced by per-row diff. Three counters partition the cell + * differences into disjoint buckets — source-only, target-only-live, same-coord-diff-value. + */ + static final class CellDriftCounts { + static final CellDriftCounts NONE = new CellDriftCounts(0, 0, 0); + + final int missing; + final int extra; + final int different; + + CellDriftCounts(int missing, int extra, int different) { + this.missing = missing; + this.extra = extra; + this.different = different; + } + } + + /** Per-row drift summary: cell-level drift counts plus a row-unrepairable flag. */ + static final class RowDriftInfo { + static final RowDriftInfo NONE = new RowDriftInfo(CellDriftCounts.NONE, false); + + final CellDriftCounts cells; + final boolean rowCannotRepair; + + RowDriftInfo(CellDriftCounts cells, boolean rowCannotRepair) { + this.cells = cells; + this.rowCannotRepair = rowCannotRepair; + } + } + + /** Terminal classification of a per-row mirror attempt onto target. */ + enum RowMirrorStatus { + /** All source cells mirrored in row. */ + FULLY_MIRRORED, + /** Some mirrored, some suppressed by target tombstones. */ + PARTIALLY_MIRRORED, + /** Every source cell suppressed by target tombstones. */ + FULLY_SHADOWED + } +} diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java index d09151b1e27..d782cceadfa 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java @@ -20,31 +20,19 @@ import static org.apache.phoenix.schema.types.PDataType.TRUE_BYTES; import java.io.IOException; -import java.nio.ByteBuffer; import java.security.MessageDigest; import java.sql.Connection; import java.sql.SQLException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.NavigableMap; -import java.util.Set; -import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.client.Delete; -import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; -import org.apache.hadoop.hbase.client.Row; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; @@ -53,6 +41,9 @@ import org.apache.hadoop.mapreduce.lib.db.DBInputFormat; import org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants; import org.apache.phoenix.jdbc.PhoenixConnection; +import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.ChunkRepairRequest; +import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.ChunkRepairResult; +import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.DriftCounters; import org.apache.phoenix.mapreduce.util.ConnectionUtil; import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil; import org.apache.phoenix.query.KeyRange; @@ -89,6 +80,7 @@ public enum SyncCounters { TARGET_ROWS_PROCESSED, ROWS_MISSING_ON_TARGET, ROWS_EXTRA_ON_TARGET, + ROWS_DIFFERENT_ON_TARGET, ROWS_CANNOT_REPAIR, CELLS_MISSING_ON_TARGET, CELLS_EXTRA_ON_TARGET, @@ -104,7 +96,6 @@ public enum SyncCounters { private long chunkSizeBytes; private boolean isRawScan; private boolean isReadAllVersions; - private int repairBatchSize; private Configuration conf; private Connection sourceConnection; private Connection targetConnection; @@ -113,6 +104,7 @@ public enum SyncCounters { private byte[] physicalTableName; private List regionKeyRanges; private PhoenixSyncTableOutputRepository syncTableOutputRepository; + private PhoenixSyncTableChunkRepairer chunkRepairer; @Override protected void setup(Context context) throws InterruptedException { @@ -128,7 +120,7 @@ protected void setup(Context context) throws InterruptedException { chunkSizeBytes = PhoenixSyncTableTool.getPhoenixSyncTableChunkSizeBytes(conf); isRawScan = PhoenixSyncTableTool.getPhoenixSyncTableRawScan(conf); isReadAllVersions = PhoenixSyncTableTool.getPhoenixSyncTableReadAllVersions(conf); - repairBatchSize = PhoenixSyncTableTool.getPhoenixSyncTableRepairBatchSize(conf); + int repairBatchSize = PhoenixSyncTableTool.getPhoenixSyncTableRepairBatchSize(conf); extractRegionBoundariesFromSplit(context); sourceConnection = ConnectionUtil.getInputConnection(conf); pTable = sourceConnection.unwrap(PhoenixConnection.class).getTable(tableName); @@ -136,6 +128,9 @@ protected void setup(Context context) throws InterruptedException { connectToTargetCluster(); globalConnection = createGlobalConnection(conf); syncTableOutputRepository = new PhoenixSyncTableOutputRepository(globalConnection); + chunkRepairer = new PhoenixSyncTableChunkRepairer(sourceConnection, targetConnection, pTable, + physicalTableName, tableName, fromTime, toTime, isRawScan, isReadAllVersions, + repairBatchSize); } catch (Exception e) { tryClosingResources(); throw new RuntimeException( @@ -234,6 +229,15 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context long repairFailedBefore = context.getCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); long sourceRowsBefore = context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue(); long targetRowsBefore = context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue(); + long rowsMissingBefore = context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue(); + long rowsExtraBefore = context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue(); + long rowsDifferentBefore = + context.getCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue(); + long rowsCannotRepairBefore = context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue(); + long cellsMissingBefore = context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue(); + long cellsExtraBefore = context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue(); + long cellsDifferentBefore = + context.getCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue(); // Process all unprocessed ranges in this region boolean isStartKeyInclusive = shouldStartKeyBeInclusive(regionStart, processedChunks); @@ -256,10 +260,26 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context context.getCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue() - sourceRowsBefore; long targetRowsProcessed = context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue() - targetRowsBefore; + long rowsMissingOnTarget = + context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue() - rowsMissingBefore; + long rowsExtraOnTarget = + context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue() - rowsExtraBefore; + long rowsDifferentOnTarget = + context.getCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue() - rowsDifferentBefore; + long rowsCannotRepair = + context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue() - rowsCannotRepairBefore; + long cellsMissingOnTarget = + context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue() - cellsMissingBefore; + long cellsExtraOnTarget = + context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() - cellsExtraBefore; + long cellsDifferentOnTarget = + context.getCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue() - cellsDifferentBefore; Timestamp regionEndTime = new Timestamp(System.currentTimeMillis()); - String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter - .formatMapper(verifiedChunks, mismatchedChunks, sourceRowsProcessed, targetRowsProcessed); + String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatMapper( + verifiedChunks, mismatchedChunks, sourceRowsProcessed, targetRowsProcessed, + rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget, rowsCannotRepair, + cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget); if (sourceRowsProcessed > 0) { recordRegionCompletion(regionStart, regionEnd, regionStartTime, regionEndTime, verifiedChunks, mismatchedChunks, unrepairableChunks, repairFailedChunks, counters, context); @@ -448,16 +468,33 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd, sourceChunk.rowCount, targetChunk.rowCount, matched); } sourceChunk.executionEndTime = new Timestamp(System.currentTimeMillis()); - String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter - .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L, 0L, 0L, 0L, 0L); if (matched) { + String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter + .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L, 0L, 0L, 0L, 0L, 0L); handleVerifiedChunk(sourceChunk, context, counters); } else { - handleMismatchedChunk(sourceChunk, context, counters); - if (!isDryRun) { - repairChunk(sourceChunk.startKey, sourceChunk.endKey, targetStart, targetEnd, - isTargetStartKeyInclusive, targetEndInclusive, sourceChunk.rowCount, - targetChunk.rowCount, sourceChunk.executionStartTime, context); + ChunkRepairRequest request = new ChunkRepairRequest(sourceChunk.startKey, + sourceChunk.endKey, targetStart, targetEnd, isTargetStartKeyInclusive, + targetEndInclusive, sourceChunk.rowCount, targetChunk.rowCount, + sourceChunk.executionStartTime, isDryRun); + ChunkRepairResult result = chunkRepairer.repair(request, context::progress); + if (isDryRun) { + // Dry-run: write CHUNK/MISMATCHED with real row-level drift in COUNTERS so the + // checkpoint audit row matches the job counters. No CHUNK/REPAIRED row and no + // target mutations. + DriftCounters drift = result.drift; + context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET) + .increment(drift.rowsMissingOnTarget); + context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET) + .increment(drift.rowsExtraOnTarget); + context.getCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET) + .increment(drift.rowsDifferentOnTarget); + String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatChunk( + sourceChunk.rowCount, targetChunk.rowCount, drift.rowsMissingOnTarget, + drift.rowsExtraOnTarget, drift.rowsDifferentOnTarget, 0L, 0L, 0L, 0L); + handleMismatchedChunk(sourceChunk, context, counters); + } else { + recordRepairOutcome(sourceChunk, request, result, context); } } previousSourceChunk = sourceChunk; @@ -632,43 +669,22 @@ private void recordChunkCheckpoint(ChunkInfo sourceChunk, } /** - * Builds the common Scan shape used by verification, repair, and tombstone loading: key - * range, inclusivity, time window, cache-blocks, plus raw-scan and all-versions semantics - * controlled by the user's {@code --raw-scan} / {@code --read-all-versions} flags. - * - * Callers that need to force raw-scan or all-versions on a specific call (e.g., the - * tombstone loader, which must surface tombstones regardless of user flags) pass - * {@code forceRaw=true} or {@code forceAllVersions=true} to override. - * - * Callers layer on their own caching, limits, and coprocessor attributes. Keeping the - * base shared guarantees that the cells visited by repair are exactly the cells the - * verifier hashed (when both pass the same force flags). + * Creates an HBase scan for a chunk range. Honors the user's {@code --raw-scan} and + * {@code --read-all-versions} flags. For target-side scans, sets caching/limit to 1 to enable + * sequential partial-digest retrieval — each target chunk's digest feeds into the next until + * scanning completes. */ - private Scan createBaseScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, - boolean isEndKeyInclusive, boolean forceRaw, boolean forceAllVersions) throws IOException { + private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, + boolean isEndKeyInclusive, boolean isTargetScan) throws IOException { Scan scan = new Scan(); scan.withStartRow(startKey, isStartKeyInclusive); scan.withStopRow(endKey, isEndKeyInclusive); - scan.setRaw(forceRaw || isRawScan); - if (forceAllVersions || isReadAllVersions) { + scan.setRaw(isRawScan); + if (isReadAllVersions) { scan.readAllVersions(); } scan.setCacheBlocks(false); scan.setTimeRange(fromTime, toTime); - return scan; - } - - /** - * Creates an HBase scan for a chunk range. Can be configured to use raw scan mode and read all - * cell versions based on command-line options. - */ - private Scan createChunkScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, - boolean isEndKeyInclusive, boolean isTargetScan) throws IOException { - Scan scan = - createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive, false, false); - // Set limit and caching to 1 for sequential partial digest retrieval from target. - // Enables digest continuation: each target chunk's digest feeds into the next until scanning - // completes if (isTargetScan) { scan.setLimit(1); scan.setCaching(1); @@ -788,649 +804,60 @@ boolean shouldStartKeyBeInclusive(byte[] mapperRegionStart, } /** - * Builds a row-level HBase scan for repair. Differs from {@link #createChunkScan} in that it - * does NOT set {@code SYNC_TABLE_CHUNK_FORMATION} or {@code SYNC_TABLE_CHUNK_SIZE_BYTES}, so - * the scanner returns actual {@link Result} rows rather than coprocessor chunk metadata. - * Shares the {@link #createBaseScan} core (time range, raw-scan, all-versions, inclusivity) - * with verification so the cells visited here are the same cells that produced the chunk - * hash. Adds bulk caching plus Phoenix TTL / {@code IS_STRICT_TTL} attributes. - */ - private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, - boolean isEndKeyInclusive, PhoenixConnection phoenixConn) throws IOException, SQLException { - Scan scan = - createBaseScan(startKey, endKey, isStartKeyInclusive, isEndKeyInclusive, false, false); - scan.setCaching(1000); - ScanUtil.setScanAttributesForPhoenixTTL(scan, pTable, phoenixConn); - scan.setAttribute(BaseScannerRegionObserverConstants.IS_STRICT_TTL, TRUE_BYTES); - return scan; - } - - /** - * Loads the target row's tombstone index for shadow detection. Issues a single-row scan - * against target with raw=true and all-versions forced (regardless of user flags), so the - * tombstone subtypes that would otherwise be filtered out are surfaced. Live cells in the - * response are ignored — they were already visible to the repair scan and are handled by - * the merge logic. (HBase 2.x exposes {@code setRaw} only on Scan, not on Get — so a - * one-row scan stands in for what would otherwise be a raw Get.) - * - * Time range: the base scan applies {@code [fromTime, toTime]}, but shadow detection - * needs tombstones at ts >= fromTime regardless of the upper bound. A DeleteColumn / - * DeleteFamily at ts > toTime can still shadow a Put we mirror at ts in window during - * application reads (HBase tombstones don't respect the verifier's sync window). Lower - * bound stays at fromTime since tombstones below the window can't shadow anything we'd - * write inside the window. - */ - private TargetRowTombstones loadTargetRowTombstones(byte[] rowKey, Table targetHTable) - throws IOException { - Scan scan = createBaseScan(rowKey, rowKey, true, true, true, true); - scan.setTimeRange(fromTime, Long.MAX_VALUE); - scan.setCaching(1); - scan.setLimit(1); - TargetRowTombstones tombstones = new TargetRowTombstones(); - try (ResultScanner scanner = targetHTable.getScanner(scan)) { - Result raw = scanner.next(); - if (raw != null) { - for (Cell cell : raw.rawCells()) { - // Record both tombstones (for shadow detection) and Puts (for hidden-version - // discovery during cmp > 0 tombstoning of target-only cells). - tombstones.record(cell); - } - } - } - return tombstones; - } - - /** - * Lazily-built per-row Put and Delete mutations plus per-row unrepairable-drift state. - * Each mutation field is created on first use so a row that needs only Puts produces no - * Delete (and vice versa); a row that needs no mutation at all produces neither. - * - * Tombstone-index state is lazy: {@link #tombstones} is loaded only on the first - * shadow check via {@link #tombstones(Table)} (one raw scan per row at most). - * {@link #anyCellUnrepairable} accumulates whether the row carries any drift that - * repair could not act on — either a source-side Put was shadow-suppressed by an - * existing target tombstone, or a target-only tombstone exists that source lacks - * (HBase has no API to remove tombstones). The caller reads it after the merge to - * decide whether to bump {@link SyncCounters#ROWS_CANNOT_REPAIR}. - * - * After construction, callers append produced mutations to the pending batches via - * {@link #flush(List, List)}. - */ - private final class RowRepairState { - private final byte[] rowKey; - Put put; - Delete delete; - TargetRowTombstones tombstones; - boolean anyCellUnrepairable; - - RowRepairState(byte[] rowKey) { - this.rowKey = rowKey; - } - - Put put() { - if (put == null) { - put = new Put(rowKey); - } - return put; - } - - Delete delete() { - if (delete == null) { - delete = new Delete(rowKey); - } - return delete; - } - - /** - * Lazily loads the target row's tombstone index on first call (one raw single-row scan - * via {@link #loadTargetRowTombstones}); cached thereafter for reuse across the row's - * subsequent shadow checks. - */ - TargetRowTombstones targetRowTombstones(Table targetHTable) throws IOException { - if (tombstones == null) { - tombstones = loadTargetRowTombstones(rowKey, targetHTable); - } - return tombstones; - } - - void flush(List pendingPuts, List pendingDeletes) { - if (put != null) { - pendingPuts.add(put); - } - if (delete != null) { - pendingDeletes.add(delete); - } - } - } - - /** - * Cell-level drift counts produced by {@link #diffCellsForRow}. Populated only for rows - * present on both clusters; whole-row drift is signaled by the caller directly at the - * {@code cmp != 0} branches in {@link #repairChunk}. Three counters partition the cell - * differences into disjoint buckets — source-only, target-only-live, same-coord-diff-value. - */ - private static final class CellDriftCounts { - static final CellDriftCounts NONE = new CellDriftCounts(0, 0, 0); - - final int missing; - final int extra; - final int different; - - CellDriftCounts(int missing, int extra, int different) { - this.missing = missing; - this.extra = extra; - this.different = different; - } - } - - /** - * Per-row index of target's tombstones AND Puts in {@code [fromTime, MAX_VALUE]}, built - * lazily from a single raw single-row scan with all-versions enabled. Used in two roles: - * - *

    - *
  1. Shadow detection ({@link #wouldShadow}): would a source Put we're about - * to mirror be suppressed by an existing target tombstone?
  2. - *
  3. Hidden-version discovery ({@link #targetPutTimestampsBetween}): what - * max-versions-filtered target Puts sit between source's max ts at a column and - * target's visible Put? Used in the {@code cmp > 0} tombstoning path to issue - * point Deletes for hidden versions that would otherwise surface on read after - * the visible Put is shadowed.
  4. - *
- * - * HBase has four tombstone subtypes, each with distinct shadow semantics: - * Delete — shadows a Put at {@code (cf, q, ts == T)} exactly - * DeleteColumn — shadows Puts at {@code (cf, q, ts <= T)} - * DeleteFamily — shadows Puts at {@code (cf, *, ts <= T)} - * DeleteFamilyVersion — shadows Puts at {@code (cf, *, ts == T)} - * {@link #wouldShadow(Cell)} consults all four indices and returns true on any match. - */ - private static final class TargetRowTombstones { - private final Map> deletePointTs = new HashMap<>(); - private final Map deleteColumnUpperBound = new HashMap<>(); - private final Map deleteFamilyUpperBound = new HashMap<>(); - private final Map> deleteFamilyVersionTs = new HashMap<>(); - /** Per-column ts-ordered set of target's Put timestamps. */ - private final Map> targetPutTs = new HashMap<>(); - - void record(Cell cell) { - if (CellUtil.isDelete(cell)) { - recordTombstone(cell); - } else { - targetPutTs.computeIfAbsent(columnKey(cell), k -> new TreeMap<>()) - .put(cell.getTimestamp(), Boolean.TRUE); - } - } - - private void recordTombstone(Cell tombstone) { - long ts = tombstone.getTimestamp(); - ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(tombstone)); - switch (tombstone.getType()) { - case Delete: - deletePointTs.computeIfAbsent(columnKey(tombstone), k -> new HashSet<>()).add(ts); - break; - case DeleteColumn: - deleteColumnUpperBound.merge(columnKey(tombstone), ts, Math::max); - break; - case DeleteFamily: - deleteFamilyUpperBound.merge(family, ts, Math::max); - break; - case DeleteFamilyVersion: - deleteFamilyVersionTs.computeIfAbsent(family, k -> new HashSet<>()).add(ts); - break; - default: - // Caller filters via CellUtil.isDelete; non-tombstone cells should never reach here. - } - } - - /** Returns true if any tombstone in this index would shadow a Put at the cell's coords. */ - boolean wouldShadow(Cell sourcePut) { - long ts = sourcePut.getTimestamp(); - ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(sourcePut)); - ColumnKey column = columnKey(sourcePut); - - Set pointTs = deletePointTs.get(column); - if (pointTs != null && pointTs.contains(ts)) { - return true; - } - Long deleteColTs = deleteColumnUpperBound.get(column); - if (deleteColTs != null && ts <= deleteColTs) { - return true; - } - Long deleteFamTs = deleteFamilyUpperBound.get(family); - if (deleteFamTs != null && ts <= deleteFamTs) { - return true; - } - Set dfvTs = deleteFamilyVersionTs.get(family); - return dfvTs != null && dfvTs.contains(ts); - } - - /** - * Returns target's Put timestamps at {@code (cf, q)} that are strictly greater than - * {@code lowerExclusive} and strictly less than {@code upperExclusive}. Used to find - * hidden (max-versions-filtered) target versions sitting between source's max ts and - * target's visible ts so they can be point-Deleted. - */ - Set targetPutTimestampsBetween(byte[] family, byte[] qualifier, long lowerExclusive, - long upperExclusive) { - NavigableMap tss = targetPutTs.get(new ColumnKey(family, qualifier)); - if (tss == null) { - return Collections.emptySet(); - } - return tss.subMap(lowerExclusive, false, upperExclusive, false).keySet(); - } - - private static ColumnKey columnKey(Cell cell) { - return new ColumnKey(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell)); - } - } - - /** Composite (family, qualifier) key with byte-array equality semantics. */ - private static final class ColumnKey { - private final byte[] family; - private final byte[] qualifier; - - ColumnKey(byte[] family, byte[] qualifier) { - this.family = family; - this.qualifier = qualifier; - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof ColumnKey)) { - return false; - } - ColumnKey other = (ColumnKey) o; - return Bytes.equals(family, other.family) && Bytes.equals(qualifier, other.qualifier); - } - - @Override - public int hashCode() { - return Bytes.hashCode(family) * 31 + Bytes.hashCode(qualifier); - } - } - - /** - * Result of {@link #diffCellsForRow}. Carries the cell-level drift counts plus a flag - * indicating whether any source-side mirror was suppressed because target tombstones - * would shadow it — making the row partially or fully unrepairable. + * Translates a {@link ChunkRepairResult} into MapReduce side effects: bumps the cell/row drift + * counters, builds the chunk-level checkpoint row (REPAIRED / UNREPAIRABLE / REPAIR_FAILED), + * and writes it via {@link #writeChunkCheckpoint} so the outcome counter is bumped only on a + * successful checkpoint write (audit row and counter stay consistent). */ - private static final class RowDiffOutcome { - static final RowDiffOutcome NONE = new RowDiffOutcome(CellDriftCounts.NONE, false); + private void recordRepairOutcome(ChunkInfo sourceChunk, ChunkRepairRequest request, + ChunkRepairResult result, Context context) { + DriftCounters drift = result.drift; + context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).increment(drift.rowsMissingOnTarget); + context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).increment(drift.rowsExtraOnTarget); + context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).increment(drift.rowsCannotRepair); + context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).increment(drift.cellsMissingOnTarget); + context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).increment(drift.cellsExtraOnTarget); + context.getCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET) + .increment(drift.cellsDifferentOnTarget); - final CellDriftCounts cells; - final boolean rowCannotRepair; - - RowDiffOutcome(CellDriftCounts cells, boolean rowCannotRepair) { - this.cells = cells; - this.rowCannotRepair = rowCannotRepair; - } - } - - /** - * Per-chunk aggregate of all six drift counters: three row-level (whole rows missing / - * extra on target, plus rows that cannot be repaired because target's row is entirely - * tombstones — HBase has no API to remove tombstones, only major compaction does) plus - * three cell-level (cells missing / extra / different on rows present on both clusters). - * Owns the bookkeeping that was previously scattered across {@link #repairChunk} — local - * accumulators, MapReduce job-counter increments, the - * {@link PhoenixSyncTableCheckpointOutputRow.CounterFormatter#formatChunk} call, and the - * end-of-chunk log line. Adding a new drift signal means touching this class and the one - * place in the merge loop that produces it; everything else (commit to job context, - * checkpoint COUNTERS string, log) flows through these methods. - */ - private static final class DriftCounters { - long rowsMissingOnTarget; - long rowsExtraOnTarget; - long rowsCannotRepair; - long cellsMissingOnTarget; - long cellsExtraOnTarget; - long cellsDifferentOnTarget; - - void addCellDrift(CellDriftCounts cellDrift) { - cellsMissingOnTarget += cellDrift.missing; - cellsExtraOnTarget += cellDrift.extra; - cellsDifferentOnTarget += cellDrift.different; - } - - /** Increments the job's MapReduce counters with this chunk's drift totals. */ - void commitTo(Context context) { - context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).increment(rowsMissingOnTarget); - context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).increment(rowsExtraOnTarget); - context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).increment(rowsCannotRepair); - context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).increment(cellsMissingOnTarget); - context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).increment(cellsExtraOnTarget); - context.getCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).increment(cellsDifferentOnTarget); - } - - /** Formats the chunk's COUNTERS string for persistence in the checkpoint table. */ - String formatChunkCounters(long verifySourceRows, long verifyTargetRows) { - return PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatChunk(verifySourceRows, - verifyTargetRows, rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair, - cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget); - } + String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatChunk( + request.verifySourceRows, request.verifyTargetRows, drift.rowsMissingOnTarget, + drift.rowsExtraOnTarget, drift.rowsDifferentOnTarget, drift.rowsCannotRepair, + drift.cellsMissingOnTarget, drift.cellsExtraOnTarget, drift.cellsDifferentOnTarget); - /** Compact end-of-chunk log line summarizing all six drift signals. */ - String toLogString() { - return String.format( - "rowsMissingOnTarget=%d, rowsExtraOnTarget=%d, rowsCannotRepair=%d, " - + "cellsMissingOnTarget=%d, cellsExtraOnTarget=%d, cellsDifferentOnTarget=%d", - rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair, cellsMissingOnTarget, - cellsExtraOnTarget, cellsDifferentOnTarget); - } - } - - /** - * Routes a source cell to the right mutation kind. Put cells go to a {@link Put}; tombstone - * cells go to a {@link Delete} via {@link Delete#add(Cell)} which preserves the tombstone's - * exact subtype (Delete / DeleteColumn / DeleteFamily / DeleteFamilyVersion). Required under - * {@code --raw-scan}: {@link Put#add(Cell)} rejects non-Put cells. - */ - private void mirrorSourceCell(Cell cell, RowRepairState rowState) throws IOException { - if (CellUtil.isDelete(cell)) { - rowState.delete().add(cell); - } else { - rowState.put().add(cell); - } - } - - /** - * Mirrors a source cell onto target only if no existing target tombstone would shadow - * the resulting Put. Lazily loads the target row's tombstone index on first Put cell - * (one raw single-row scan per row at most) via {@link RowRepairState#tombstones}. If - * the cell would be shadowed, the mirror is suppressed, {@code rowState.anyCellUnrepairable} - * is set, and the caller is expected to record {@link SyncCounters#ROWS_CANNOT_REPAIR}. - * - * Shadow detection only applies to Put cells. Tombstone source cells (under - * {@code --raw-scan}) bypass the check and always mirror via {@link #mirrorSourceCell}: - * {@link TargetRowTombstones#wouldShadow} is defined for live cells (does an existing target - * tombstone hide a new Put on read), and is not meaningful for delete-marker cells — - * mirroring a tombstone over an existing tombstone is a benign duplicate, not a suppression. - * - * @return {@code true} if the cell was mirrored, {@code false} if suppressed by shadowing. - */ - private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable, - RowRepairState rowState) throws IOException { - if (!CellUtil.isDelete(cell) && rowState.targetRowTombstones(targetHTable).wouldShadow(cell)) { - rowState.anyCellUnrepairable = true; - return false; - } - mirrorSourceCell(cell, rowState); - return true; - } - - /** - * Tombstones a target-only cell to make target's read view at this column match source's. - * Skips cells that are themselves already tombstones: HBase has no API to remove a - * tombstone cell — tombstones can only be reaped by major compaction once they age past - * the keep-deleted-cells window. Issuing another Delete at the same coordinates writes a - * duplicate marker, does not change the row's effective state, and only adds compaction - * load. Combined with the absence of a source-side counterpart to mirror, the right - * action is to leave the existing tombstone untouched. - * - * Tombstone subtype depends on what source has at this {@code (cf, q)}: - *
    - *
  • Source has no cell at this column — target's read at this column should - * be empty. Use {@link Delete#addColumns(byte[], byte[], long)} (DeleteColumn, - * scope {@code ts <= T}) so even max-versions-hidden older target versions are - * shadowed. A point Delete would only shadow this exact ts, surfacing the next - * hidden version on read.
  • - *
  • Source's max ts at this column is >= target's ts — point-Delete only - * target's exact ts. Source's equal-or-higher ts mirror will surface; no hidden - * version can surface above it.
  • - *
  • Source's max ts at this column is < target's ts — point-Delete target's - * ts, AND point-Delete every max-versions-hidden Put on target with ts in - * {@code (sourceMaxTs, targetTs)}. Otherwise after target's visible cell is - * shadowed, the next hidden version surfaces above source's mirror — silent - * divergence. Hidden versions are discovered via the row's tombstone+Put index - * loaded from a single raw all-versions scan.
  • - *
- * - * Pre-build {@code sourceMaxTsByColumn} once per row from the source cell array. - * - * @return true if the cell was a live cell that contributed a tombstone marker, false if - * the cell was already a tombstone and was skipped. - */ - private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairState rowState, - Map sourceMaxTsByColumn) throws IOException { - if (CellUtil.isDelete(cell)) { - return false; - } - byte[] family = CellUtil.cloneFamily(cell); - byte[] qualifier = CellUtil.cloneQualifier(cell); - long ts = cell.getTimestamp(); - Long sourceMaxTs = sourceMaxTsByColumn.get(new ColumnKey(family, qualifier)); - if (sourceMaxTs == null) { - // Source has no cell at this column; shadow ts <= T so hidden older versions - // don't surface on read. - rowState.delete().addColumns(family, qualifier, ts); - } else if (sourceMaxTs >= ts) { - // Source's mirror is at >= target's ts; point-Delete only target's exact ts. - rowState.delete().addColumn(family, qualifier, ts); - } else { - // Source's max ts at this column < target's ts. Point-Delete target's visible cell, - // AND point-Delete every hidden target Put in (sourceMaxTs, ts) so they don't - // surface above source's mirror after the visible cell is shadowed. - rowState.delete().addColumn(family, qualifier, ts); - Set hiddenTs = rowState.targetRowTombstones(targetHTable) - .targetPutTimestampsBetween(family, qualifier, sourceMaxTs, ts); - for (Long hidden : hiddenTs) { - rowState.delete().addColumn(family, qualifier, hidden); - } - } - return true; - } - - /** - * Outcome of {@link #mirrorWholeRow}. {@code FULLY_MIRRORED} = every source cell mirrored - * onto target with no shadowing. {@code PARTIALLY_MIRRORED} = at least one cell mirrored, - * at least one suppressed by an existing target tombstone (caller bumps both - * ROWS_MISSING_ON_TARGET and ROWS_CANNOT_REPAIR). {@code FULLY_SHADOWED} = every source - * cell suppressed; the row is unrepairable until target's tombstones are reaped (caller - * bumps only ROWS_CANNOT_REPAIR). - */ - private enum WholeRowMirrorOutcome { - FULLY_MIRRORED, PARTIALLY_MIRRORED, FULLY_SHADOWED - } - - /** - * Mirrors every source cell of a row that is missing on target. Source cells route by - * type: live cells to a Put, tombstone cells (under {@code --raw-scan}) to a Delete via - * {@link Delete#add(Cell)}. Each cell is shadow-checked against existing target - * tombstones (lazy raw single-row scan on first cell) — even though target's filtered scan - * returned no cells for this row, target may carry tombstones that would shadow our Put. - */ - private WholeRowMirrorOutcome mirrorWholeRow(Result sourceResult, Table targetHTable, - List pendingPuts, List pendingDeletes) throws IOException { - RowRepairState rowState = new RowRepairState(sourceResult.getRow()); - int mirrored = 0; - for (Cell cell : sourceResult.rawCells()) { - if (mirrorSourceCellUnlessShadowed(cell, targetHTable, rowState)) { - mirrored++; - } - } - rowState.flush(pendingPuts, pendingDeletes); - if (mirrored == 0) { - return WholeRowMirrorOutcome.FULLY_SHADOWED; - } - return rowState.anyCellUnrepairable ? WholeRowMirrorOutcome.PARTIALLY_MIRRORED - : WholeRowMirrorOutcome.FULLY_MIRRORED; - } - - /** - * Tombstones every live cell of a row that is extra on target. Existing tombstones on the - * target row are skipped — HBase cannot remove tombstone cells; only major compaction - * reaps them. - * - * @return the number of live cells that contributed a tombstone marker. {@code 0} means - * the row was already entirely tombstones — repair could not act on it, and the - * caller should record this as {@link SyncCounters#ROWS_CANNOT_REPAIR}. - */ - private int tombstoneWholeRow(Result targetResult, Table targetHTable, - List pendingPuts, List pendingDeletes) throws IOException { - RowRepairState rowState = new RowRepairState(targetResult.getRow()); - // Source has no row at all here, so source has no cell at any column — every target - // cell hits the "shadow ts <= T" path inside tombstoneTargetCell. - Map sourceMaxTsByColumn = Collections.emptyMap(); - int liveCellsTombstoned = 0; - for (Cell cell : targetResult.rawCells()) { - if (tombstoneTargetCell(cell, targetHTable, rowState, sourceMaxTsByColumn)) { - liveCellsTombstoned++; - } - } - rowState.flush(pendingPuts, pendingDeletes); - return liveCellsTombstoned; - } - - /** - * Diffs cells of two rows present on both clusters in lock-step using {@link CellComparator} - * order and appends the resulting {@link Put}/{@link Delete} mutations (if any) to the - * pending lists. Returns a {@link RowDiffOutcome} carrying both the cell-level drift - * counts and a flag indicating whether any source-side mirror was suppressed by target - * tombstone shadowing — letting the caller decide whether to bump - * {@link SyncCounters#ROWS_CANNOT_REPAIR}. - * - * Branches: - * same coords + matching value → no drift, no signal - * same coords + different value → different++; mirror source cell (shadow-checked) - * source-only cell at unique coords → missing++; mirror source cell (shadow-checked) - * target-only live cell at unique coords → extra++; tombstone target cell - * target-only tombstone cell → skip (HBase cannot remove tombstones) - * - * Cells whose mirror is suppressed by shadowing do NOT bump the cell counter — the - * cell wasn't written to target, so it isn't repaired drift. The row-level shadow - * signal is surfaced via {@link RowDiffOutcome#rowCannotRepair}. - */ - private RowDiffOutcome diffCellsForRow(Result sourceResult, Result targetResult, - Table targetHTable, List pendingPuts, List pendingDeletes) throws IOException { - Cell[] sourceCells = sourceResult.rawCells(); - Cell[] targetCells = targetResult.rawCells(); - CellComparator comparator = CellComparator.getInstance(); - - // Always use the lazy raw single-row fetch path for shadow detection. Even under - // --raw-scan, the merge-scan's targetCells came from a time-range-filtered scan and - // would not surface tombstones at ts > toTime — and those out-of-window tombstones - // can still shadow Puts we mirror inside the window during application reads. The - // lazy fetch in {@link RowRepairState#tombstones} loads tombstones at ts >= fromTime - // regardless of upper bound, catching that case correctly. - RowRepairState rowState = new RowRepairState(sourceResult.getRow()); - - // Pre-compute the set of (cf, q) coordinates source has any cell at. tombstoneTargetCell - // uses this to choose between point-Delete (when source has the column at some ts — - // mirrored cell will surface on read) and DeleteColumn (when source has nothing at the - // column — must shadow ts <= T to keep max-versions-hidden older versions invisible. - // The map records source's max ts at each column. When tombstoning a target cell at a - // column source has at lower ts, hidden target Puts in (sourceMax, targetTs) need - // their own point Deletes too — otherwise after the visible cell is shadowed, a hidden - // version surfaces above source's mirror. - Map sourceMaxTsByColumn = new HashMap<>(); - for (Cell sourceCell : sourceCells) { - if (!CellUtil.isDelete(sourceCell)) { - ColumnKey key = new ColumnKey(CellUtil.cloneFamily(sourceCell), - CellUtil.cloneQualifier(sourceCell)); - sourceMaxTsByColumn.merge(key, sourceCell.getTimestamp(), Math::max); - } - } - - int cellMissing = 0; - int cellExtra = 0; - int cellDifferent = 0; - - int sourceIdx = 0; - int targetIdx = 0; - while (sourceIdx < sourceCells.length && targetIdx < targetCells.length) { - int cmp = comparator.compare(sourceCells[sourceIdx], targetCells[targetIdx]); - if (cmp == 0) { - // Same coordinates; CellComparator does not compare values, check separately. - if (!CellUtil.matchingValue(sourceCells[sourceIdx], targetCells[targetIdx])) { - if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) { - cellDifferent++; - } - } - sourceIdx++; - targetIdx++; - } else if (cmp < 0) { - if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) { - cellMissing++; - } - sourceIdx++; - } else { - // Target-only cell. Live cell → tombstone it (cellExtra++). Tombstone cell → - // can't act on it (HBase has no API to remove tombstones), so the row carries - // unrepairable drift and re-verify will mismatch. - if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowState, - sourceMaxTsByColumn)) { - cellExtra++; - } else { - rowState.anyCellUnrepairable = true; - } - } - } - while (sourceIdx < sourceCells.length) { - if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowState)) { - cellMissing++; - } - sourceIdx++; - } - while (targetIdx < targetCells.length) { - if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowState, - sourceMaxTsByColumn)) { - cellExtra++; - } else { - rowState.anyCellUnrepairable = true; - } - } - - if (cellMissing == 0 && cellExtra == 0 && cellDifferent == 0 && !rowState.anyCellUnrepairable) { - return RowDiffOutcome.NONE; - } - rowState.flush(pendingPuts, pendingDeletes); - return new RowDiffOutcome(new CellDriftCounts(cellMissing, cellExtra, cellDifferent), - rowState.anyCellUnrepairable); - } - - /** - * Flushes the accumulated Put and Delete batches to the target HTable as a single mixed - * RPC and clears both lists. Called every {@code repairBatchSize} rows and once more at - * the end of a chunk. - * - * Issuing both Puts and Deletes via {@link Table#batch} (one network round-trip) instead - * of separate {@code put()} + {@code delete()} calls (two round-trips) eliminates the - * inter-RPC failure window where a JVM/regionserver crash between the two would leave - * target with Puts applied but matching Deletes not yet tombstoned. Server-side, batch - * mutations are still applied per-row sequentially, so a regionserver crash mid-batch - * can still leave partial application — but the mid-flush gap on the client side is - * gone. - * - * {@link InterruptedException} from {@code batch} is converted to {@link IOException} - * so the existing per-chunk catch path treats interruption like any other transient - * write failure (chunk → REPAIR_FAILED, retry on next invocation). The interrupt flag is - * restored so an outer cancellation still observes the interrupted state. - */ - private void flushRepairMutations(Table targetHTable, List puts, List deletes) - throws IOException { - if (puts.isEmpty() && deletes.isEmpty()) { - return; - } - List mutations = new ArrayList<>(puts.size() + deletes.size()); - mutations.addAll(puts); - mutations.addAll(deletes); - Object[] results = new Object[mutations.size()]; - try { - targetHTable.batch(mutations, results); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new IOException("Interrupted while flushing repair mutations", e); - } - puts.clear(); - deletes.clear(); - } - - /** - * Writes a chunk-level checkpoint row and bumps the matching outcome counter, as a single - * "this attempt is recorded" unit. The outcome counter is bumped only after a successful - * checkpoint write, so on-disk audit and in-memory counters stay in sync. + PhoenixSyncTableCheckpointOutputRow.Status status; + SyncCounters outcomeCounter; + switch (result.status) { + case REPAIRED: + status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED; + outcomeCounter = SyncCounters.CHUNKS_REPAIRED; + break; + case UNREPAIRABLE: + status = PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE; + outcomeCounter = SyncCounters.CHUNKS_UNREPAIRABLE; + break; + case REPAIR_FAILED: + status = PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED; + outcomeCounter = SyncCounters.CHUNKS_REPAIR_FAILED; + break; + default: + throw new IllegalStateException("Unexpected repair status: " + result.status); + } + + writeChunkCheckpoint(new PhoenixSyncTableCheckpointOutputRow.Builder().setTableName(tableName) + .setTargetCluster(targetZkQuorum).setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK) + .setFromTime(fromTime).setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun) + .setStartRowKey(sourceChunk.startKey).setEndRowKey(sourceChunk.endKey).setStatus(status) + .setExecutionStartTime(request.verifyStartTime).setExecutionEndTime(result.endTime) + .setCounters(counters).build(), outcomeCounter, context); + } + + /** + * Writes a chunk-level checkpoint row and bumps the matching outcome counter. The outcome + * counter is bumped only after a successful checkpoint write, so on-disk audit and in-memory + * counters stay in sync. * - * If the checkpoint write throws {@link SQLException}, the failure is logged and the + *

If the checkpoint write throws {@link SQLException}, the failure is logged and the * {@link SyncCounters#CHECKPOINT_WRITE_FAILED} counter is bumped, but the exception is * NOT propagated. Reasons: *

    @@ -1458,174 +885,6 @@ private void writeChunkCheckpoint(PhoenixSyncTableCheckpointOutputRow row, } } - /** - * Performs row-level repair for a mismatched chunk by merge-scanning source and target - * cluster data and applying targeted mutations to target. The two scan ranges may differ: - * the verifier reads target over a wider range than source (covers extra-on-target rows - * that fall between consecutive source chunks); repair must mirror the same boundaries so - * those extras are visible here as {@code cmp > 0} rows and get deleted. - * - * Merge-scan contract: both scanners return rows in ascending key order (HBase guarantee). - * cmp == 0 (same row): compare cells; repair if different. - * cmp < 0 (source-only): Put all source cells. - * cmp > 0 (target-only): Delete target cells within [fromTime, toTime]. - * - * Cells outside [fromTime, toTime] are never read (scan time range), so never mutated. - * - * Only called when isDryRun == false. - * - * @param sourceStart Source chunk start key (also the checkpoint PK) — inclusive - * @param sourceEnd Source chunk end key (also the checkpoint PK) — inclusive - * @param targetStart Target scan start (matches verifier-side boundary) - * @param targetEnd Target scan end (matches verifier-side boundary) - * @param targetStartInclusive Inclusivity of target scan start — matches verify side - * @param targetEndInclusive Inclusivity of target scan end — matches verify side - * @param verifyStartTime When the verify pass began for this chunk; reused as the - * REPAIRED row's START_TIME so the row spans the full - * verify+repair lifecycle that overwrites the MISMATCHED row. - */ - private void repairChunk(byte[] sourceStart, byte[] sourceEnd, byte[] targetStart, - byte[] targetEnd, boolean targetStartInclusive, boolean targetEndInclusive, - long verifySourceRows, long verifyTargetRows, Timestamp verifyStartTime, Context context) - throws IOException, SQLException { - DriftCounters driftCounters = new DriftCounters(); - - LOGGER.info( - "Starting repair for chunk source=[{}, {}] target=[{}{}, {}{} on table {}", - Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), - targetStartInclusive ? "[" : "(", Bytes.toStringBinary(targetStart), - Bytes.toStringBinary(targetEnd), targetEndInclusive ? "]" : ")", tableName); - - PhoenixConnection sourcePhoenixConn = sourceConnection.unwrap(PhoenixConnection.class); - PhoenixConnection targetPhoenixConn = targetConnection.unwrap(PhoenixConnection.class); - - Scan sourceScan = createRepairScan(sourceStart, sourceEnd, true, true, sourcePhoenixConn); - Scan targetScan = createRepairScan(targetStart, targetEnd, targetStartInclusive, - targetEndInclusive, targetPhoenixConn); - - List pendingPuts = new ArrayList<>(); - List pendingDeletes = new ArrayList<>(); - - try (Table sourceHTable = sourcePhoenixConn.getQueryServices().getTable(physicalTableName); - Table targetHTable = targetPhoenixConn.getQueryServices().getTable(physicalTableName); - ResultScanner sourceScanner = sourceHTable.getScanner(sourceScan); - ResultScanner targetScanner = targetHTable.getScanner(targetScan)) { - - Result sourceResult = sourceScanner.next(); - Result targetResult = targetScanner.next(); - - while (sourceResult != null || targetResult != null) { - int cmp; - if (sourceResult == null) { - cmp = 1; - } else if (targetResult == null) { - cmp = -1; - } else { - cmp = Bytes.compareTo(sourceResult.getRow(), targetResult.getRow()); - } - - // Drift signals are bumped at the branch that semantically caused them: row-level - // signals at the cmp != 0 branches, cell-level signals at the cmp == 0 branch. - // ROWS_CANNOT_REPAIR is bumped whenever any source mirror is suppressed by an - // existing target tombstone (or the cmp > 0 row was already entirely tombstones). - if (cmp == 0) { - // Same row key on both clusters — diff at cell level and repair only if cells differ. - RowDiffOutcome outcome = diffCellsForRow(sourceResult, targetResult, targetHTable, - pendingPuts, pendingDeletes); - driftCounters.addCellDrift(outcome.cells); - if (outcome.rowCannotRepair) { - driftCounters.rowsCannotRepair++; - } - sourceResult = sourceScanner.next(); - targetResult = targetScanner.next(); - } else if (cmp < 0) { - // Source-only row — mirror it onto target. Even though target's filtered scan - // returned no row at this key, target may carry tombstones that would shadow - // some or all of the Puts. - WholeRowMirrorOutcome outcome = - mirrorWholeRow(sourceResult, targetHTable, pendingPuts, pendingDeletes); - if (outcome != WholeRowMirrorOutcome.FULLY_SHADOWED) { - driftCounters.rowsMissingOnTarget++; - } - if (outcome != WholeRowMirrorOutcome.FULLY_MIRRORED) { - driftCounters.rowsCannotRepair++; - } - sourceResult = sourceScanner.next(); - } else { - // Target-only row — tombstone its live cells. If the row is already entirely - // tombstones, repair has nothing to do (HBase cannot remove tombstones; only - // major compaction reaps them) — record as ROWS_CANNOT_REPAIR so operators can - // see the unrepairable drift volume. - int liveCellsTombstoned = - tombstoneWholeRow(targetResult, targetHTable, pendingPuts, pendingDeletes); - if (liveCellsTombstoned == 0) { - driftCounters.rowsCannotRepair++; - } else { - driftCounters.rowsExtraOnTarget++; - } - targetResult = targetScanner.next(); - } - - if (pendingPuts.size() + pendingDeletes.size() >= repairBatchSize) { - flushRepairMutations(targetHTable, pendingPuts, pendingDeletes); - } - context.progress(); - } - flushRepairMutations(targetHTable, pendingPuts, pendingDeletes); - } catch (IOException e) { - // Per-chunk fault isolation. Mark this chunk REPAIR_FAILED, increment the counter, - // and return so the mapper continues with the next chunk. Phase 2's STATUS filter - // (VERIFIED, REPAIRED) excludes REPAIR_FAILED, so a re-run will re-attempt this chunk - // as an unprocessed gap. - LOGGER.error("Repair failed for chunk source=[{}, {}] on table {}: {}", - Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), tableName, - e.getMessage(), e); - - Timestamp failedAt = new Timestamp(System.currentTimeMillis()); - // Capture partial progress in the COUNTERS column for triage. - String failedCounters = - driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows); - writeChunkCheckpoint(new PhoenixSyncTableCheckpointOutputRow.Builder() - .setTableName(tableName).setTargetCluster(targetZkQuorum) - .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime) - .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun) - .setStartRowKey(sourceStart).setEndRowKey(sourceEnd) - .setStatus(PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED) - .setExecutionStartTime(verifyStartTime).setExecutionEndTime(failedAt) - .setCounters(failedCounters).build(), SyncCounters.CHUNKS_REPAIR_FAILED, context); - return; - } - - driftCounters.commitTo(context); - // Chunk transitions to UNREPAIRABLE if any row landed in ROWS_CANNOT_REPAIR — operator - // intervention (typically major compaction on target to reap shadowing tombstones) is - // needed before re-running the tool. Otherwise REPAIRED. - boolean unrepairable = driftCounters.rowsCannotRepair > 0; - PhoenixSyncTableCheckpointOutputRow.Status status = unrepairable - ? PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE - : PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED; - - Timestamp repairEndTime = new Timestamp(System.currentTimeMillis()); - String repairCounters = - driftCounters.formatChunkCounters(verifySourceRows, verifyTargetRows); - - // Write checkpoint first; outcome counter is bumped inside writeChunkCheckpoint only - // on a successful write so the audit row and the counter stay consistent. - writeChunkCheckpoint(new PhoenixSyncTableCheckpointOutputRow.Builder() - .setTableName(tableName).setTargetCluster(targetZkQuorum) - .setType(PhoenixSyncTableCheckpointOutputRow.Type.CHUNK).setFromTime(fromTime) - .setToTime(toTime).setTenantId(tenantId).setIsDryRun(isDryRun).setStartRowKey(sourceStart) - .setEndRowKey(sourceEnd).setStatus(status) - .setExecutionStartTime(verifyStartTime).setExecutionEndTime(repairEndTime) - .setCounters(repairCounters).build(), - unrepairable ? SyncCounters.CHUNKS_UNREPAIRABLE : SyncCounters.CHUNKS_REPAIRED, context); - - LOGGER.info("Completed repair for chunk source=[{}, {}] with status={}: {}", - Bytes.toStringBinary(sourceStart), Bytes.toStringBinary(sourceEnd), status, - driftCounters.toLogString()); - } - - @Override protected void cleanup(Context context) throws IOException, InterruptedException { tryClosingResources(); super.cleanup(context); From a24c6e4dab91502b89bb3bc1df15e8742c540142 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Thu, 4 Jun 2026 18:01:17 +0530 Subject: [PATCH 10/18] checkpointing test implementation --- .../apache/phoenix/jdbc/PhoenixDriver.java | 4 +- .../phoenix/jdbc/PhoenixEmbeddedDriver.java | 85 +- .../apache/phoenix/monitoring/MetricType.java | 2 - .../ConnectionQueryServicesMetrics.java | 3 +- .../PhoenixSyncTableChunkRepairer.java | 17 +- .../mapreduce/PhoenixSyncTableMapper.java | 6 + .../end2end/PhoenixSyncTableToolIT.java | 2360 ++++++++++++----- .../ConnectionQueryServicesMetricsIT.java | 22 - .../PhoenixSyncTableOutputRepositoryTest.java | 227 ++ ...ectionQueryServicesMetricsManagerTest.java | 21 - 10 files changed, 2036 insertions(+), 711 deletions(-) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java index f3784183555..953bbc5bd57 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixDriver.java @@ -41,7 +41,6 @@ import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.query.QueryServicesImpl; import org.apache.phoenix.query.QueryServicesOptions; -import org.apache.phoenix.util.EnvironmentEdgeManager; import org.apache.phoenix.util.PropertiesUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -206,7 +205,6 @@ public boolean acceptsURL(String url) throws SQLException { @Override public Connection connect(String url, Properties info) throws SQLException { - long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis(); GLOBAL_PHOENIX_CONNECTIONS_ATTEMPTED_COUNTER.increment(); if (!acceptsURL(url)) { GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment(); @@ -215,7 +213,7 @@ public Connection connect(String url, Properties info) throws SQLException { lockInterruptibly(LockMode.READ); try { checkClosed(); - return createConnection(url, info, connectionStartTime); + return createConnection(url, info); } catch (SQLException sqlException) { if (sqlException.getErrorCode() != SQLExceptionCode.NEW_CONNECTION_THROTTLED.getErrorCode()) { GLOBAL_FAILED_PHOENIX_CONNECTIONS.increment(); diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java index ea1d416fd8a..524789464b4 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java @@ -17,8 +17,6 @@ */ package org.apache.phoenix.jdbc; -import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS; -import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME; import static org.apache.phoenix.util.PhoenixRuntime.PHOENIX_TEST_DRIVER_URL_PARAM; import java.sql.Connection; @@ -26,18 +24,17 @@ import java.sql.DriverPropertyInfo; import java.sql.SQLException; import java.sql.SQLFeatureNotSupportedException; -import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Properties; import java.util.logging.Logger; import javax.annotation.concurrent.Immutable; import org.apache.phoenix.coprocessorclient.MetaDataProtocol; -import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric; -import org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesMetricsManager; import org.apache.phoenix.query.ConnectionQueryServices; import org.apache.phoenix.query.QueryServices; -import org.apache.phoenix.util.*; +import org.apache.phoenix.util.PhoenixRuntime; +import org.apache.phoenix.util.PropertiesUtil; +import org.apache.phoenix.util.ReadOnlyProps; +import org.apache.phoenix.util.SQLCloseable; import org.apache.phoenix.thirdparty.com.google.common.collect.ImmutableMap; @@ -53,13 +50,13 @@ public abstract class PhoenixEmbeddedDriver implements Driver, SQLCloseable { private final static String DNC_JDBC_PROTOCOL_SUFFIX = "//"; private final static String DRIVER_NAME = "PhoenixEmbeddedDriver"; private static final String TEST_URL_AT_END = - "" + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR + PHOENIX_TEST_DRIVER_URL_PARAM; + "" + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR + PHOENIX_TEST_DRIVER_URL_PARAM; private static final String TEST_URL_IN_MIDDLE = - TEST_URL_AT_END + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR; + TEST_URL_AT_END + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR; private static final String[] SUPPORTED_PROTOCOLS = - new String[] { PhoenixRuntime.JDBC_PROTOCOL, PhoenixRuntime.JDBC_PROTOCOL_ZK, - PhoenixRuntime.JDBC_PROTOCOL_MASTER, PhoenixRuntime.JDBC_PROTOCOL_RPC }; + new String[] { PhoenixRuntime.JDBC_PROTOCOL, PhoenixRuntime.JDBC_PROTOCOL_ZK, + PhoenixRuntime.JDBC_PROTOCOL_MASTER, PhoenixRuntime.JDBC_PROTOCOL_RPC }; private final static DriverPropertyInfo[] EMPTY_INFO = new DriverPropertyInfo[0]; public final static String MAJOR_VERSION_PROP = "DriverMajorVersion"; @@ -67,9 +64,9 @@ public abstract class PhoenixEmbeddedDriver implements Driver, SQLCloseable { public final static String DRIVER_NAME_PROP = "DriverName"; public static final ReadOnlyProps DEFAULT_PROPS = - new ReadOnlyProps(ImmutableMap.of(MAJOR_VERSION_PROP, - Integer.toString(MetaDataProtocol.PHOENIX_MAJOR_VERSION), MINOR_VERSION_PROP, - Integer.toString(MetaDataProtocol.PHOENIX_MINOR_VERSION), DRIVER_NAME_PROP, DRIVER_NAME)); + new ReadOnlyProps(ImmutableMap.of(MAJOR_VERSION_PROP, + Integer.toString(MetaDataProtocol.PHOENIX_MAJOR_VERSION), MINOR_VERSION_PROP, + Integer.toString(MetaDataProtocol.PHOENIX_MINOR_VERSION), DRIVER_NAME_PROP, DRIVER_NAME)); PhoenixEmbeddedDriver() { } @@ -122,59 +119,31 @@ public boolean acceptsURL(String url) throws SQLException { @Override public Connection connect(String url, Properties info) throws SQLException { - long connectionStartTime = EnvironmentEdgeManager.currentTimeMillis(); if (!acceptsURL(url)) { return null; } - return createConnection(url, info, connectionStartTime); + return createConnection(url, info); } - protected final Connection createConnection(String url, Properties info, - long connectionCreationTime) throws SQLException { + protected final Connection createConnection(String url, Properties info) throws SQLException { Properties augmentedInfo = PropertiesUtil.deepCopy(info); augmentedInfo.putAll(getDefaultProps().asMap()); - Connection connection = null; - try { - if (url.contains("|")) { - // Get HAURLInfo to pass it to connection creation - HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo); - // High availability connection using two clusters - Optional haGroup = HighAvailabilityGroup.get(url, augmentedInfo); - if (haGroup.isPresent()) { - connection = haGroup.get().connect(augmentedInfo, haurlInfo); - setPhoenixConnectionTime(connectionCreationTime, connection); - return connection; - } else { - // If empty HA group is returned, fall back to single cluster. - url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow( - () -> new SQLException( - "HA group can not be initialized, fallback to single cluster")); - } - } - ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo); - connection = cqs.connect(url, augmentedInfo); - setPhoenixConnectionTime(connectionCreationTime, connection); - Map> metrics = - ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); - if (!metrics.isEmpty()) { - List serviceMetrics = metrics.get("DEFAULT_CQSN"); - } - return connection; - } catch (SQLException e) { - if (connection != null) { - connection.close(); + if (url.contains("|")) { + // Get HAURLInfo to pass it to connection creation + HAURLInfo haurlInfo = HighAvailabilityGroup.getUrlInfo(url, augmentedInfo); + // High availability connection using two clusters + Optional haGroup = HighAvailabilityGroup.get(url, augmentedInfo); + if (haGroup.isPresent()) { + return haGroup.get().connect(augmentedInfo, haurlInfo); + } else { + // If empty HA group is returned, fall back to single cluster. + url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow( + () -> new SQLException("HA group can not be initialized, fallback to single cluster")); } - throw e; } - } - - private void setPhoenixConnectionTime(long connectionCreationTime, Connection connection) { - String connectionQueryServiceName = - ((PhoenixConnection) connection).getQueryServices().getConfiguration() - .get(QUERY_SERVICES_NAME); - ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName, - PHOENIX_CONNECTION_CREATION_TIME_MS, connectionCreationTime); + ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo); + return cqs.connect(url, augmentedInfo); } /** @@ -186,7 +155,7 @@ private void setPhoenixConnectionTime(long connectionCreationTime, Connection co * @return new or cached QuerySerices used to establish a new Connection. */ protected abstract ConnectionQueryServices getConnectionQueryServices(String url, Properties info) - throws SQLException; + throws SQLException; @Override public int getMajorVersion() { diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java index de559e4aff4..ff80705c0d4 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/MetricType.java @@ -236,8 +236,6 @@ public enum MetricType { PHOENIX_CONNECTIONS_FAILED_COUNTER("cf", "Number of client Phoenix Connections Failed to open" + ", not including throttled connections", LogLevel.OFF, PLong.INSTANCE), - PHOENIX_CONNECTION_CREATION_TIME_MS("cct", - "Time spent in creating Phoenix connections in milliseconds", LogLevel.OFF, PLong.INSTANCE), CLIENT_METADATA_CACHE_MISS_COUNTER("cmcm", "Number of cache misses for the CQSI cache.", LogLevel.DEBUG, PLong.INSTANCE), CLIENT_METADATA_CACHE_HIT_COUNTER("cmch", "Number of cache hits for the CQSI cache.", diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java index 8c3ac719d27..c87e9ad7984 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java @@ -40,8 +40,7 @@ public enum QueryServiceMetrics { CONNECTION_QUERY_SERVICE_OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER( OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER), CONNECTION_QUERY_SERVICE_PHOENIX_CONNECTIONS_THROTTLED_COUNTER( - PHOENIX_CONNECTIONS_THROTTLED_COUNTER), - CONNECTION_QUERY_SERVICE_CREATION_TIME(PHOENIX_CONNECTION_CREATION_TIME_MS); + PHOENIX_CONNECTIONS_THROTTLED_COUNTER); private MetricType metricType; private ConnectionQueryServicesMetric metric; diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java index 6a1da24bc45..4afdbcf9130 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java @@ -375,15 +375,16 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta CellComparator comparator = CellComparator.getInstance(); RowRepairBuffer rowRepairBuffer = new RowRepairBuffer(sourceResult.getRow()); - // Per-column max source PUT timestamp; consumed by tombstoneTargetCell to pick the - // tombstone shape for a target-extra cell. Three cases: - // key absent → DeleteColumn at target's ts (wipe hidden older versions too) + // Per-column max source PUT timestamp; consumed by tombstoneTargetCell (target-extra + // cell) to pick the delete shape. Three cases: + // key absent → DeleteColumn at target's ts (also wipes older hidden versions) // sourceMaxTs >= targetTs → point Delete at target's ts - // sourceMaxTs < targetTs → point Delete at target's ts AND every hidden target Put in - // (sourceMaxTs, targetTs); else hidden version surfaces. - // Math::max collapses multi-version source so the third case only fires when target sits above - // ALL of source's versions. - // Check tombstoneTargetCell() for its usage. + // sourceMaxTs < targetTs → point Delete at target's ts, PLUS a point Delete for every + // hidden target Put whose ts ∈ (sourceMaxTs, targetTs); without + // these, the next hidden Put surfaces above source's mirror + // once target's visible cell is shadowed. + // Math::max collapses multi-version source, so case 3 only fires when target sits above + // ALL source versions at this column. See tombstoneTargetCell() for the application. Map sourceMaxTsByColumn = new HashMap<>(); for (Cell sourceCell : sourceCells) { if (!CellUtil.isDelete(sourceCell)) { diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java index d782cceadfa..aa19bcf1a89 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java @@ -808,9 +808,15 @@ boolean shouldStartKeyBeInclusive(byte[] mapperRegionStart, * counters, builds the chunk-level checkpoint row (REPAIRED / UNREPAIRABLE / REPAIR_FAILED), * and writes it via {@link #writeChunkCheckpoint} so the outcome counter is bumped only on a * successful checkpoint write (audit row and counter stay consistent). + * + *

    {@code CHUNKS_MISMATCHED} is bumped here too: it tracks every chunk where source and + * target hashes differed — the drift-detected signal — regardless of whether repair ran. + * Without this, repair-mode {@link #recordRegionCompletion} would see {@code mismatchedChunks + * == 0} for fully-repaired regions and roll them up as VERIFIED instead of REPAIRED. */ private void recordRepairOutcome(ChunkInfo sourceChunk, ChunkRepairRequest request, ChunkRepairResult result, Context context) { + context.getCounter(SyncCounters.CHUNKS_MISMATCHED).increment(1); DriftCounters drift = result.drift; context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).increment(drift.rowsMissingOnTarget); context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).increment(drift.rowsExtraOnTarget); diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java index 599f18337d8..0dc84f5cf6c 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java @@ -39,13 +39,17 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Properties; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; @@ -60,12 +64,15 @@ import org.apache.phoenix.jdbc.PhoenixConnection; import org.apache.phoenix.jdbc.PhoenixDriver; import org.apache.phoenix.mapreduce.PhoenixSyncTableCheckpointOutputRow; +import org.apache.phoenix.mapreduce.PhoenixSyncTableInputFormat; import org.apache.phoenix.mapreduce.PhoenixSyncTableMapper.SyncCounters; import org.apache.phoenix.mapreduce.PhoenixSyncTableOutputRepository; import org.apache.phoenix.mapreduce.PhoenixSyncTableTool; import org.apache.phoenix.query.BaseTest; import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.schema.types.PInteger; +import org.apache.phoenix.util.PhoenixRuntime; +import org.apache.phoenix.util.SchemaUtil; import org.apache.phoenix.util.TestUtil; import org.junit.After; import org.junit.AfterClass; @@ -156,104 +163,57 @@ public void testSyncTableValidateWithDataDifference() throws Exception { introduceAndVerifyTargetDifferences(uniqueTableName); - // Pin the time window so both runs share the same checkpoint PK - // (TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY). - // Without this, runSyncToolWithLargeChunks would assign a fresh System.currentTimeMillis() - // to --to-time on each call and the repair pass would append new rows instead of - // overwriting the dry-run pass's MISMATCHED rows. - String fromTime = "0"; - String toTime = String.valueOf(System.currentTimeMillis()); - - // First run: --dry-run, only detect mismatches. - Job job = runSyncToolWithLargeChunks(uniqueTableName, "--dry-run", "--from-time", fromTime, - "--to-time", toTime); - SyncCountersResult counters = getSyncCounters(job); + // Pin the time window so the dry-run and repair share the same checkpoint PK. + long fromTime = 0L; + long toTime = System.currentTimeMillis(); - validateSyncCounters(counters, 10, 10, 1, 3); - validateMapperCounters(counters, 1, 3); - assertEquals("Expected 4 mapper task to be created", 4, counters.taskCreated); + // Phase 1: dry-run only — verify checkpoint table sees only VERIFIED/MISMATCHED rows. + Job dryRunJob = runSyncToolWithLargeChunks(uniqueTableName, "--dry-run", "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob); + + validateSyncCounters(dryRunCounters, 10, 10, 1, 3); + validateMapperCounters(dryRunCounters, 1, 3); + assertEquals("Expected 4 mapper task to be created", 4, dryRunCounters.taskCreated); + // Dry-run row-level logging should flag the 3 same-key/different-value rows as + // ROWS_DIFFERENT_ON_TARGET; nothing missing or extra (replication seeded both sides + // with the same row keys before introduceAndVerifyTargetDifferences mutated three). + assertEquals("Dry-run should detect 3 rows different on target", 3, + dryRunCounters.rowsDifferentOnTarget); + assertEquals("Dry-run should report 0 rows missing on target", 0, + dryRunCounters.rowsMissingOnTarget); + assertEquals("Dry-run should report 0 rows extra on target", 0, + dryRunCounters.rowsExtraOnTarget); List checkpointEntries = queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); validateCheckpointEntries(checkpointEntries, uniqueTableName, targetZkQuorum, 10, 10, 1, 3, 4, 3, null); - // Second run: no --dry-run, repair the mismatched chunks. Same time window so the - // PK matches and CHUNK/REPAIRED overwrites CHUNK/MISMATCHED in place. - Job repairJob = - runSyncToolWithLargeChunks(uniqueTableName, "--from-time", fromTime, "--to-time", toTime); + // Phase 2: repair pass over the same window — MISMATCHED rows transition to REPAIRED in + // place. + Job repairJob = runSyncToolWithLargeChunks(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); Counters repairCounters = repairJob.getCounters(); - - // The repair re-verifies the previously MISMATCHED chunks (excluded by Phase 2 filter) - // and now repairs them, producing CHUNK/REPAIRED + REGION/REPAIRED checkpoint rows. - long chunksRepaired = repairCounters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue(); - long chunksRepairFailed = - repairCounters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); - long mappersRepaired = repairCounters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue(); - long mappersRepairFailed = - repairCounters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue(); - long rowsMissingOnTarget = - repairCounters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue(); - long rowsExtraOnTarget = - repairCounters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue(); - long rowsCannotRepair = - repairCounters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue(); - long cellsMissingOnTarget = - repairCounters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue(); - long cellsExtraOnTarget = - repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue(); - long cellsDifferentOnTarget = - repairCounters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue(); - assertEquals("All 3 mismatched chunks should be repaired", 3, chunksRepaired); - assertEquals("No chunk repair should fail", 0, chunksRepairFailed); - assertEquals("All 3 mismatched mapper regions should be repaired", 3, mappersRepaired); - assertEquals("No mapper repair should fail", 0, mappersRepairFailed); - // The three drifted rows exist on both clusters (only NAME values were modified on - // target via separate upserts, producing cells at different timestamps), so all drift - // is cell-level — no whole-row missing or extra signals. - assertEquals("No whole rows should be missing on target", 0, rowsMissingOnTarget); - assertEquals("No whole rows should be extra on target", 0, rowsExtraOnTarget); - assertEquals("No rows should be unrepairable (target has no all-tombstone rows)", 0, - rowsCannotRepair); - // Each modified row contributes at least one missing cell (source's original NAME at - // its original timestamp) and one extra cell (target's modified NAME at the new - // timestamp). cellsDifferent stays 0 because the modifications wrote at new timestamps - // rather than overwriting at the same coordinates. - assertTrue("Source-only cells across the 3 drifted rows should be detected, got " - + cellsMissingOnTarget, cellsMissingOnTarget >= 3); - assertTrue("Target-only cells across the 3 drifted rows should be detected, got " - + cellsExtraOnTarget, cellsExtraOnTarget >= 3); - assertEquals("No same-coord value diffs (modifications wrote at new timestamps)", 0, - cellsDifferentOnTarget); + assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 3, 0, 0); + assertRepairRowCounters(repairCounters, 0, 0, 0); + // 3 rows × 2 mismatched cells (NAME + Phoenix's _0 empty-key cell) = 6 missing and 6 extra. + assertRepairCellCounters(repairCounters, 6, 6, 0, 0); // Target rows should now match source. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - // Checkpoint table now has 3 CHUNK/REPAIRED + 3 REGION/REPAIRED in addition to the - // VERIFIED rows. Phase 2's STATUS-IN filter caused the MISMATCHED rows to be re-entered - // as gaps and repair overwrote them in place. List postRepairEntries = queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - int chunkRepairedRows = 0; - int regionRepairedRows = 0; - int mismatchedRows = 0; - for (PhoenixSyncTableCheckpointOutputRow entry : postRepairEntries) { - if (PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED.equals(entry.getStatus())) { - if (PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())) { - chunkRepairedRows++; - } else if (PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())) { - regionRepairedRows++; - } - } else if ( - PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus()) - ) { - mismatchedRows++; - } - } - assertEquals("Expected 3 CHUNK/REPAIRED rows after repair", 3, chunkRepairedRows); - assertEquals("Expected 3 REGION/REPAIRED rows after repair", 3, regionRepairedRows); - assertEquals("MISMATCHED rows should be overwritten in place — none should remain", 0, - mismatchedRows); + assertEquals("Expected 3 CHUNK/REPAIRED rows after repair", 3, + countCheckpointsByTypeAndStatus(postRepairEntries, + PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED)); + assertEquals("Expected 3 REGION/REPAIRED rows after repair", 3, + countCheckpointsByTypeAndStatus(postRepairEntries, + PhoenixSyncTableCheckpointOutputRow.Type.REGION, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED)); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -271,11 +231,17 @@ public void testSyncTableValidateWithDifferentZkQuorumFormats() throws Exception }; for (String zkQuorum : zkQuorumFormats) { - Job job = runSyncToolWithZkQuorum(uniqueTableName, zkQuorum); + Job job = runSyncToolWithZkQuorum(uniqueTableName, zkQuorum, "--dry-run"); SyncCountersResult counters = getSyncCounters(job); validateSyncCounters(counters, 10, 10, 7, 3); cleanupCheckpointTable(sourceConnection, uniqueTableName, zkQuorum, null); } + + // After validating detection across ZK formats, run dry-run + repair against the default + // targetZkQuorum to confirm the tool converges source and target. + runSyncToolWithRepair(uniqueTableName); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -290,13 +256,19 @@ public void testSyncTableWithDeletedRowsOnTarget() throws Exception { assertEquals("Source should have 10 rows", 10, sourceCount); assertEquals("Target should have 7 rows (3 deleted)", 7, targetCount); - Job job = runSyncTool(uniqueTableName); - SyncCountersResult counters = getSyncCounters(job); + // Dry-run: detects 3 mismatched chunks. + RepairRunResult result = runSyncToolWithRepair(uniqueTableName); + SyncCountersResult counters = getSyncCounters(result.dryRunJob); validateSyncCounters(counters, 10, 7, 7, 3); validateMapperCounters(counters, 1, 3); assertEquals("Should have only 1 Mapper task created with coalescing", 4, counters.taskCreated); + // Repair pass only re-runs the 3 mismatched chunks (verified chunks are excluded by the + // resume filter). Target's DELETEs left tombstones that shadow source's Puts at lower + // timestamps, so each re-run mapper rolls up to UNREPAIRABLE. + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); + validateMapperCountersRepair(repairCounters, 0, 0, 3, 0); } @Test @@ -325,11 +297,14 @@ public void testSyncTableWithConditionalTTLExpiredRows() throws Exception { new String[] { "MODIFIED_5", "MODIFIED_8" }); // Run sync tool, TTL-expired rows (1-3) should be skipped on both source and target - Job job = runSyncTool(uniqueTableName); - SyncCountersResult counters = getSyncCounters(job); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName); + SyncCountersResult counters = getSyncCounters(result.dryRunJob); validateSyncCounters(counters, 7, 7, 5, 2); validateMapperCounters(counters, 2, 2); + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -373,6 +348,15 @@ public void testSyncTableWithConditionalTTLExpiredRowsCompact() throws Exception validateSyncCounters(counters2, 7, 7, 7, 0); validateMapperCounters(counters2, 4, 0); + + // Source and target each show 7 live rows under the conditional TTL filter the sync tool + // applies on both sides, so the repair pass is a no-op and no MISMATCHED rows are written. + // Note: the standard Phoenix query (without the TTL filter the tool applies) sees 10 rows + // on source vs 7 on target because IS_STRICT_TTL=false returns expired rows on source + // (uncompacted) but compaction on target physically removed them — that asymmetry is by + // design, not drift the tool can converge. + runSyncToolWithRepair(uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -396,16 +380,22 @@ public void testSyncValidateIndexTable() throws Exception { deleteHBaseRows(CLUSTERS.getHBaseCluster2(), uniqueTableName, 3); deleteHBaseRows(CLUSTERS.getHBaseCluster2(), indexName, 3); - Job job = runSyncTool(indexName); - SyncCountersResult counters = getSyncCounters(job); + RepairRunResult result = runSyncToolWithRepair(indexName); + SyncCountersResult counters = getSyncCounters(result.dryRunJob); validateSyncCounters(counters, 10, 7, 7, 3); - // Verify checkpoint entries show mismatches + // Verify checkpoint entries show mismatches (from dry-run pass) before repair runs. List checkpointEntries = queryCheckpointTable(sourceConnection, indexName, targetZkQuorum, null); assertFalse("Should have checkpointEntries", checkpointEntries.isEmpty()); + + // The repair pass syncs the index physical table on target with the source index. Since the + // data table was also corrupted on target (3 rows deleted via deleteHBaseRows) but we only + // ran sync on the index, the data table itself is still drifted — only assert on index + // checkpoint rows. + assertNoMismatchedCheckpoints(indexName, null); } @Test @@ -428,9 +418,11 @@ public void testSyncValidateLocalIndexTable() throws Exception { deleteHBaseRows(CLUSTERS.getHBaseCluster2(), uniqueTableName, 5); - // Run sync tool on the LOCAL INDEX table (not the data table) - Job job = runSyncTool(indexName); - SyncCountersResult counters = getSyncCounters(job); + // Run sync tool on the LOCAL INDEX table (not the data table). Local indexes share regions + // with the data table — the dry-run pass detects drift, the repair pass writes back the + // missing index rows on target. + RepairRunResult result = runSyncToolWithRepair(indexName); + SyncCountersResult counters = getSyncCounters(result.dryRunJob); assertTrue(String.format("Should have at least %d verified chunks, actual: %d", 1, counters.chunksVerified), counters.chunksVerified >= 1); @@ -442,6 +434,9 @@ public void testSyncValidateLocalIndexTable() throws Exception { queryCheckpointTable(sourceConnection, indexName, targetZkQuorum, null); assertFalse("Should have checkpoint entries for local index", checkpointEntries.isEmpty()); + + // After repair, the local-index physical table on target should match source's index. + assertNoMismatchedCheckpoints(indexName, null); } @Test @@ -490,15 +485,17 @@ public void testSyncValidateMultiTenantSaltedTableDifferences() throws Exception tenantSourceConn.close(); } - // TENANT_001 has no differences, expect all rows verified - Job job1 = runSyncTool(uniqueTableName, "--tenant-id", tenantIds[0], "--to-time", toTime); - SyncCountersResult counters1 = getSyncCounters(job1); + // TENANT_001 has no differences, expect all rows verified. Use dry-run + repair to confirm + // the no-drift case still leaves no MISMATCHED rows. + RepairRunResult t1 = + runSyncToolWithRepair(uniqueTableName, "--tenant-id", tenantIds[0], "--to-time", toTime); + SyncCountersResult counters1 = getSyncCounters(t1.dryRunJob); validateSyncCounters(counters1, 10, 10, 10, 0); validateMapperCounters(counters1, 4, 0); - // TENANT_002 has 3 modified rows - Job job2 = runSyncTool(uniqueTableName, "--tenant-id", tenantIds[1]); - SyncCountersResult counters2 = getSyncCounters(job2); + // TENANT_002 has 3 modified rows. Dry-run detects, repair writes back source values. + RepairRunResult t2 = runSyncToolWithRepair(uniqueTableName, "--tenant-id", tenantIds[1]); + SyncCountersResult counters2 = getSyncCounters(t2.dryRunJob); validateSyncCounters(counters2, 10, 10, 7, 3); validateMapperCounters(counters2, 2, 2); @@ -511,6 +508,13 @@ public void testSyncValidateMultiTenantSaltedTableDifferences() throws Exception queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, "TENANT_001"); assertFalse("Should have checkpoint entries for TENANT_001", checkpointEntries.isEmpty()); + // No MISMATCHED rows should remain after repair pass for either tenant. + assertNoMismatchedCheckpoints(uniqueTableName, "TENANT_001"); + assertNoMismatchedCheckpoints(uniqueTableName, "TENANT_002"); + + // After repair, TENANT_002's data should be identical between source and target. + withTenantConnections(tenantIds[1], + (sourceConn, targetConn) -> verifyDataIdentical(sourceConn, targetConn, uniqueTableName)); } @Test @@ -551,6 +555,12 @@ public void testSyncTableValidateWithTimeRangeFilter() throws Exception { validateSyncCounters(counters, 10, 10, 10, 0); validateMapperCounters(counters, 1, 0); + + // Within-window data (IDs 11-20) was identical, so the repair flow is a no-op there and + // no MISMATCHED rows are written. Out-of-window drift (IDs 3,5,8,23,25,28) is invisible + // to the time-range filter and remains on target by design — full convergence is NOT + // expected here, only checkpoint cleanliness for the window we scanned. + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -562,58 +572,14 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionSplits() thr // Introduce differences on target scattered across the dataset List mismatchIds = Arrays.asList(10, 25, 40, 55, 70, 85, 95); - for (int id : mismatchIds) { - upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id }, - new String[] { "MODIFIED_NAME_" + id }); - } + introduceMismatchesByIds(uniqueTableName, mismatchIds); // Capture consistent time range for both runs long fromTime = 0L; long toTime = System.currentTimeMillis(); - // Run sync tool for the FIRST time with explicit time range - Job job1 = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); - SyncCountersResult counters1 = getSyncCounters(job1); - - // Validate first run counters - should process all 100 rows - validateSyncCountersWithMinChunk(counters1, 100, 100, 1, 1); - - List checkpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - - assertFalse("Should have checkpoint entries after first run", checkpointEntries.isEmpty()); - - // Separate mapper and chunk entries using utility method - SeparatedCheckpointEntries separated = separateMapperAndChunkEntries(checkpointEntries); - List allMappers = separated.mappers; - List allChunks = separated.chunks; - - assertFalse("Should have mapper region entries", allMappers.isEmpty()); - assertFalse("Should have chunk entries", allChunks.isEmpty()); - - // Select 3/4th of chunks from each mapper to delete (simulating partial rerun) - // We repro the partial run via deleting some entries from checkpoint table and re-running the - // tool - List chunksToDelete = selectChunksToDeleteFromMappers( - sourceConnection, uniqueTableName, targetZkQuorum, fromTime, toTime, null, allMappers, 0.75); - - // Delete all mappers and selected chunks - int deletedCount = deleteCheckpointEntries(sourceConnection, uniqueTableName, targetZkQuorum, - null, allMappers, chunksToDelete); - - assertEquals("Should have deleted all mapper and selected chunk entries", - allMappers.size() + chunksToDelete.size(), deletedCount); - - List checkpointEntriesAfterDelete = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - assertEquals("Should have fewer checkpoint entries after deletion", - allMappers.size() + chunksToDelete.size(), - checkpointEntries.size() - checkpointEntriesAfterDelete.size()); - - // Calculate totals from REMAINING CHUNK entries in checkpoint table using utility method - CheckpointAggregateCounters remainingCounters = - calculateAggregateCountersFromCheckpoint(checkpointEntriesAfterDelete); + PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75); + validateSyncCountersWithMinChunk(setup.firstRunCounters, 100, 100, 1, 1); List additionalSourceSplits = Arrays.asList(12, 22, 28, 32, 42, 52, 58, 62, 72, 78, 82, 92); @@ -634,34 +600,32 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionSplits() thr counters2.chunksMismatched); // (Remaining chunks from checkpoint) + (Second run) should equal (First run) - long totalSourceRows = remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; - long totalTargetRows = remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; - long totalVerifiedChunks = remainingCounters.chunksVerified + counters2.chunksVerified; - long totalMismatchedChunks = remainingCounters.chunksMismatched + counters2.chunksMismatched; + long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; + long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; + long totalVerifiedChunks = setup.remainingCounters.chunksVerified + counters2.chunksVerified; assertEquals( "Remaining + Second run source rows should equal first run source rows. " + "Remaining: " - + remainingCounters.sourceRowsProcessed + ", Second run: " + counters2.sourceRowsProcessed - + ", Total: " + totalSourceRows + ", Expected: " + counters1.sourceRowsProcessed, - counters1.sourceRowsProcessed, totalSourceRows); + + setup.remainingCounters.sourceRowsProcessed + ", Second run: " + + counters2.sourceRowsProcessed + ", Total: " + totalSourceRows + ", Expected: " + + setup.firstRunCounters.sourceRowsProcessed, + setup.firstRunCounters.sourceRowsProcessed, totalSourceRows); assertEquals( "Remaining + Second run target rows should equal first run target rows. " + "Remaining: " - + remainingCounters.targetRowsProcessed + ", Second run: " + counters2.targetRowsProcessed - + ", Total: " + totalTargetRows + ", Expected: " + counters1.targetRowsProcessed, - counters1.targetRowsProcessed, totalTargetRows); - - assertEquals("Remaining + Second run verified chunks should equal first run verified chunks. " - + "Remaining: " + remainingCounters.chunksVerified + ", Second run: " - + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected: " - + counters1.chunksVerified, counters1.chunksVerified, totalVerifiedChunks); - - assertEquals( - "Remaining + Second run mismatched chunks should equal first run mismatched chunks. " - + "Remaining: " + remainingCounters.chunksMismatched + ", Second run: " - + counters2.chunksMismatched + ", Total: " + totalMismatchedChunks + ", Expected: " - + counters1.chunksMismatched, - counters1.chunksMismatched, totalMismatchedChunks); + + setup.remainingCounters.targetRowsProcessed + ", Second run: " + + counters2.targetRowsProcessed + ", Total: " + totalTargetRows + ", Expected: " + + setup.firstRunCounters.targetRowsProcessed, + setup.firstRunCounters.targetRowsProcessed, totalTargetRows); + + // Splits introduced between runs widen the second-run chunk count beyond the deleted + // 75% of the first run's chunks (extra region boundaries → extra chunks). So we relax + // the strict equality to >=. The row-count invariant above is unaffected by splits. + assertTrue("Remaining + Second run verified chunks should be >= first run verified chunks. " + + "Remaining: " + setup.remainingCounters.chunksVerified + ", Second run: " + + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected (>=): " + + setup.firstRunCounters.chunksVerified, + totalVerifiedChunks >= setup.firstRunCounters.chunksVerified); // Verify checkpoint table has entries for the reprocessed regions List checkpointEntriesAfterRerun = @@ -669,7 +633,16 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionSplits() thr // After rerun, we should have at least more entries compared to delete table assertTrue("Should have checkpoint entries after rerun", - checkpointEntriesAfterRerun.size() > checkpointEntriesAfterDelete.size()); + checkpointEntriesAfterRerun.size() > setup.entriesAfterDelete.size()); + + // The partial-rerun pattern (delete checkpoints + re-split + rerun) leaves chunks marked + // REPAIRED with stale boundaries (relative to the post-split layout); the resume filter + // skips those, so a final run can leave residual drift. Cleanup the checkpoint and run + // a dry-run + repair pass on the stable layout to converge. + cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(toTime)); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -680,49 +653,19 @@ public void testSyncTableValidateCheckpointWithChunkSizeChangeOnReRun() throws E splitTableAt(sourceConnection, uniqueTableName, sourceSplits); List mismatchIds = Arrays.asList(10, 30, 60, 90); - for (int id : mismatchIds) { - upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id }, - new String[] { "MODIFIED_NAME_" + id }); - } + introduceMismatchesByIds(uniqueTableName, mismatchIds); long fromTime = 0L; long toTime = System.currentTimeMillis(); - // First run with large chunk size + // First run with large chunk size, then delete 75% of chunks for partial rerun. int largeChunkSize = 10240; - Job job1 = runSyncToolWithChunkSize(uniqueTableName, largeChunkSize, "--from-time", - String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); - SyncCountersResult counters1 = getSyncCounters(job1); - + PartialRerunSetup setup = + setupPartialRerun(uniqueTableName, fromTime, toTime, largeChunkSize, 0.75); + SyncCountersResult counters1 = setup.firstRunCounters; validateSyncCounters(counters1, 100, 100, counters1.chunksVerified, counters1.chunksMismatched); - - List checkpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - assertFalse("Should have checkpoint entries after first run", checkpointEntries.isEmpty()); - - SeparatedCheckpointEntries separated = separateMapperAndChunkEntries(checkpointEntries); - List allMappers = separated.mappers; - List allChunks = separated.chunks; - int mapperCountAfterFirstRun = allMappers.size(); - int chunkCountAfterFirstRun = allChunks.size(); - - assertFalse("Should have mapper entries", allMappers.isEmpty()); - assertFalse("Should have chunk entries", allChunks.isEmpty()); - - // Delete all mappers and 3/4th of chunks from each mapper - List chunksToDelete = selectChunksToDeleteFromMappers( - sourceConnection, uniqueTableName, targetZkQuorum, fromTime, toTime, null, allMappers, 0.75); - - int deletedCount = deleteCheckpointEntries(sourceConnection, uniqueTableName, targetZkQuorum, - null, allMappers, chunksToDelete); - assertEquals("Should have deleted all mapper and selected chunk entries", - allMappers.size() + chunksToDelete.size(), deletedCount); - - // Calculate counters from remaining (1/4th) chunk entries - List checkpointEntriesAfterDelete = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - CheckpointAggregateCounters remainingCounters = - calculateAggregateCountersFromCheckpoint(checkpointEntriesAfterDelete); + int mapperCountAfterFirstRun = setup.mappers.size(); + int chunkCountAfterFirstRun = setup.chunks.size(); // Re-run with smaller chunk size (1 byte) - produces more, smaller chunks int smallChunkSize = 1; @@ -731,8 +674,8 @@ public void testSyncTableValidateCheckpointWithChunkSizeChangeOnReRun() throws E SyncCountersResult counters2 = getSyncCounters(job2); // (Remaining chunks) + (Second run) should equal (First run) for row counts - long totalSourceRows = remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; - long totalTargetRows = remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; + long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; + long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; assertEquals("Remaining + rerun source rows should equal first run", counters1.sourceRowsProcessed, totalSourceRows); @@ -756,6 +699,16 @@ public void testSyncTableValidateCheckpointWithChunkSizeChangeOnReRun() throws E "Chunk count after rerun (" + separatedAfterRerun.chunks.size() + ") should be greater than first run (" + chunkCountAfterFirstRun + ")", separatedAfterRerun.chunks.size() > chunkCountAfterFirstRun); + + // The partial-rerun pattern (delete chunks, rerun with smaller chunks) exercises the + // checkpoint resume path. Once that has been validated, run a clean dry-run + repair + // pass on the same window so the repair flow has a stable boundary set to converge. + // Use the dry-run+repair pattern so any chunk that landed in a non-resumable state + // (REPAIRED with stale boundaries) is re-validated rather than skipped. + cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(toTime)); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -766,59 +719,17 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionMerges() thr splitTableAt(sourceConnection, uniqueTableName, sourceSplits); List mismatchIds = Arrays.asList(5, 15, 25, 35, 45, 55, 65, 75, 85, 95); - for (int id : mismatchIds) { - upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id }, - new String[] { "MODIFIED_NAME_" + id }); - } + introduceMismatchesByIds(uniqueTableName, mismatchIds); long fromTime = 0L; long toTime = System.currentTimeMillis(); - Job job1 = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); - SyncCountersResult counters1 = getSyncCounters(job1); - + PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75); + SyncCountersResult counters1 = setup.firstRunCounters; validateSyncCounters(counters1, 100, 100, 90, 10); - List checkpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - - assertFalse("Should have checkpoint entries after first run", checkpointEntries.isEmpty()); - - // Separate mapper and chunk entries using utility method - SeparatedCheckpointEntries separated = separateMapperAndChunkEntries(checkpointEntries); - List allMappers = separated.mappers; - List allChunks = separated.chunks; - - assertFalse("Should have mapper region entries", allMappers.isEmpty()); - assertFalse("Should have chunk entries", allChunks.isEmpty()); - - // Select 3/4th of chunks from each mapper to delete (simulating partial rerun) - // We repro the partial run via deleting some entries from checkpoint table and re-running the - List chunksToDelete = selectChunksToDeleteFromMappers( - sourceConnection, uniqueTableName, targetZkQuorum, fromTime, toTime, null, allMappers, 0.75); - - // Delete all mappers and selected chunks - int deletedCount = deleteCheckpointEntries(sourceConnection, uniqueTableName, targetZkQuorum, - null, allMappers, chunksToDelete); - - assertEquals("Should have deleted all mapper and selected chunk entries", - allMappers.size() + chunksToDelete.size(), deletedCount); - - List checkpointEntriesAfterDelete = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - assertEquals("Should have fewer checkpoint entries after deletion", - allMappers.size() + chunksToDelete.size(), - checkpointEntries.size() - checkpointEntriesAfterDelete.size()); - - // Calculate totals from REMAINING CHUNK entries in checkpoint table using utility method - CheckpointAggregateCounters remainingCounters = - calculateAggregateCountersFromCheckpoint(checkpointEntriesAfterDelete); - - // Merge adjacent regions on source (merge 6 pairs of regions) + // Merge adjacent regions on source and target (6 pairs each). mergeAdjacentRegions(sourceConnection, uniqueTableName, 6); - - // Merge adjacent regions on target (merge 6 pairs of regions) mergeAdjacentRegions(targetConnection, uniqueTableName, 6); // Run sync tool again with SAME time range - should reprocess only deleted regions @@ -827,41 +738,43 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionMerges() thr String.valueOf(toTime)); SyncCountersResult counters2 = getSyncCounters(job2); - long totalSourceRows = remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; - long totalTargetRows = remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; - long totalVerifiedChunks = remainingCounters.chunksVerified + counters2.chunksVerified; - long totalMismatchedChunks = remainingCounters.chunksMismatched + counters2.chunksMismatched; + long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; + long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; + long totalVerifiedChunks = setup.remainingCounters.chunksVerified + counters2.chunksVerified; assertEquals( "Remaining + Second run source rows should equal first run source rows. " + "Remaining: " - + remainingCounters.sourceRowsProcessed + ", Second run: " + counters2.sourceRowsProcessed - + ", Total: " + totalSourceRows + ", Expected: " + counters1.sourceRowsProcessed, + + setup.remainingCounters.sourceRowsProcessed + ", Second run: " + + counters2.sourceRowsProcessed + ", Total: " + totalSourceRows + ", Expected: " + + counters1.sourceRowsProcessed, counters1.sourceRowsProcessed, totalSourceRows); assertEquals( "Remaining + Second run target rows should equal first run target rows. " + "Remaining: " - + remainingCounters.targetRowsProcessed + ", Second run: " + counters2.targetRowsProcessed - + ", Total: " + totalTargetRows + ", Expected: " + counters1.targetRowsProcessed, + + setup.remainingCounters.targetRowsProcessed + ", Second run: " + + counters2.targetRowsProcessed + ", Total: " + totalTargetRows + ", Expected: " + + counters1.targetRowsProcessed, counters1.targetRowsProcessed, totalTargetRows); - assertEquals("Remaining + Second run verified chunks should equal first run verified chunks. " - + "Remaining: " + remainingCounters.chunksVerified + ", Second run: " - + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected: " - + counters1.chunksVerified, counters1.chunksVerified, totalVerifiedChunks); - - assertEquals( - "Remaining + Second run mismatched chunks should equal first run mismatched chunks. " - + "Remaining: " + remainingCounters.chunksMismatched + ", Second run: " - + counters2.chunksMismatched + ", Total: " + totalMismatchedChunks + ", Expected: " - + counters1.chunksMismatched, - counters1.chunksMismatched, totalMismatchedChunks); + // Region merges between the two runs change mapper region boundaries, so the resume + // filter sees stale chunks that don't align to the new mapper's range and reprocesses + // them. The "remaining + second run >= first run" invariant still holds; equality does + // not. Row-count invariant above is preserved. + assertTrue("Remaining + Second run verified chunks should be >= first run verified chunks. " + + "Remaining: " + setup.remainingCounters.chunksVerified + ", Second run: " + + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected (>=): " + + counters1.chunksVerified, totalVerifiedChunks >= counters1.chunksVerified); List checkpointEntriesAfterRerun = queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); // After rerun with merges, we should have more entries as after deletion assertTrue("Should have checkpoint entries after rerun", - checkpointEntriesAfterRerun.size() > checkpointEntriesAfterDelete.size()); + checkpointEntriesAfterRerun.size() > setup.entriesAfterDelete.size()); + + // Both runs were non-dry-run, so repair ran inline. Target should converge. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -899,6 +812,11 @@ public void testSyncTableValidateIdempotentOnReRun() throws Exception { assertEquals("Checkpoint entries should be identical after idempotent run", checkpointEntriesAfterFirstRun, checkpointEntriesAfterSecondRun); + + // Both passes were non-dry-run with no drift to begin with; the repair flow ran as a + // no-op, target should still match source and no MISMATCHED rows should exist. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -950,6 +868,10 @@ public void testSyncTableValidateIdempotentAfterRegionSplits() throws Exception // checkpointed assertFalse("Should have checkpoint entries after second run", checkpointEntriesAfterSecondRun.isEmpty()); + + // No drift was introduced; repair flow should be a no-op even after concurrent splits. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -960,12 +882,15 @@ public void testSyncTableValidateWithSchemaAndTableNameOptions() throws Exceptio introduceAndVerifyTargetDifferences(uniqueTableName); // Run sync tool with both --schema and --table-name options - Job job = runSyncTool(uniqueTableName, "--schema", ""); - SyncCountersResult counters = getSyncCounters(job); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--schema", ""); + SyncCountersResult counters = getSyncCounters(result.dryRunJob); // Validate counters validateSyncCounters(counters, 10, 10, 7, 3); validateMapperCounters(counters, 1, 3); + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -974,10 +899,15 @@ public void testSyncTableValidateInBackgroundMode() throws Exception { introduceAndVerifyTargetDifferences(uniqueTableName); - Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); - String[] args = - new String[] { "--table-name", uniqueTableName, "--target-cluster", targetZkQuorum, - "--chunk-size", "1", "--to-time", String.valueOf(System.currentTimeMillis()) }; + // Pin the time window so the background dry-run pass and the repair pass below share + // the same checkpoint PK and the repair pass overwrites MISMATCHED → REPAIRED in place. + long fromTime = 0L; + long toTime = System.currentTimeMillis(); + + Configuration conf = sourceClusterConf(); + String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster", + targetZkQuorum, "--chunk-size", "1", "--dry-run", "--from-time", String.valueOf(fromTime), + "--to-time", String.valueOf(toTime) }; PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); tool.setConf(conf); @@ -995,6 +925,14 @@ public void testSyncTableValidateInBackgroundMode() throws Exception { validateSyncCounters(counters, 10, 10, 7, 3); validateMapperCounters(counters, 1, 3); + + // Now run the repair pass (foreground for synchronous assertions). Same time window so + // the dry-run-pass MISMATCHED rows are overwritten with REPAIRED. + runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(toTime)); + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -1004,7 +942,7 @@ public void testSyncTableValidateWithCustomTimeouts() throws Exception { introduceAndVerifyTargetDifferences(uniqueTableName); // Create configuration with custom timeout values - Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); + Configuration conf = sourceClusterConf(); // Set custom timeout values (higher than defaults to ensure job succeeds) long customQueryTimeout = 900000L; // 15 minutes @@ -1017,17 +955,10 @@ public void testSyncTableValidateWithCustomTimeouts() throws Exception { conf.setLong(QueryServices.SYNC_TABLE_CLIENT_SCANNER_TIMEOUT_ATTRIB, customScannerTimeout); conf.setInt(QueryServices.SYNC_TABLE_RPC_RETRIES_COUNTER, customRpcRetries); - String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster", - targetZkQuorum, "--chunk-size", "1", "--run-foreground", "--to-time", - String.valueOf(System.currentTimeMillis()) }; - - PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); - tool.setConf(conf); - int exitCode = tool.run(args); - - Job job = tool.getJob(); - assertNotNull("Job should not be null", job); - assertEquals("Tool should complete successfully with custom timeouts", 0, exitCode); + long fromTime = 0L; + long toTime = System.currentTimeMillis(); + Job job = runSyncToolWithChunkSize(uniqueTableName, 1, conf, "--dry-run", "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); // Verify that custom timeout configurations were applied to the job Configuration jobConf = job.getConfiguration(); @@ -1045,6 +976,12 @@ public void testSyncTableValidateWithCustomTimeouts() throws Exception { counters.logCounters(testName.getMethodName()); validateSyncCounters(counters, 10, 10, 7, 3); validateMapperCounters(counters, 1, 3); + + // Repair pass over the same window: convergence + no MISMATCHED rows remaining. + runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(toTime)); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -1073,9 +1010,9 @@ public void testSyncTableValidateWithExtraRowsOnTarget() throws Exception { assertEquals("Source should have 10 rows (odd numbers 1-19)", 10, sourceCount); assertEquals("Target should have 15 rows (odd 1-19 + even 2-10)", 15, targetCount); - // Run sync tool to detect the extra rows interspersed on target - Job job = runSyncTool(uniqueTableName); - SyncCountersResult counters = getSyncCounters(job); + // Run dry-run + repair sharing the same time window. + RepairRunResult result = runSyncToolWithRepair(uniqueTableName); + SyncCountersResult counters = getSyncCounters(result.dryRunJob); validateSyncCounters(counters, 10, 15, 5, 5); validateMapperCounters(counters, 0, 4); @@ -1083,35 +1020,22 @@ public void testSyncTableValidateWithExtraRowsOnTarget() throws Exception { List checkpointEntries = queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - // Count mismatched entries in checkpoint table - int mismatchedCount = 0; - for (PhoenixSyncTableCheckpointOutputRow entry : checkpointEntries) { - if (PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())) { - mismatchedCount++; - } - } - assertTrue("Should have mismatched entries for chunks with extra rows", mismatchedCount > 0); - - // Verify source and target are still different - List sourceRows = queryAllRows(sourceConnection, - "SELECT ID, NAME, NAME_VALUE FROM " + uniqueTableName + " ORDER BY ID"); - List targetRows = queryAllRows(targetConnection, - "SELECT ID, NAME, NAME_VALUE FROM " + uniqueTableName + " ORDER BY ID"); - assertEquals("Source should still have 10 rows", 10, sourceRows.size()); - assertEquals("Target should still have 15 rows", 15, targetRows.size()); - assertNotEquals("Source and target should have different data", sourceRows, targetRows); - - // Verify that source has only odd numbers - for (TestRow row : sourceRows) { - assertEquals("Source should only have odd IDs", 1, row.id % 2); - } - - // Verify that target has all numbers 1-11 (with gaps filled) and 13,15,17,19 - assertEquals("Target should have ID=1", 1, targetRows.get(0).id); - assertEquals("Target should have ID=2", 2, targetRows.get(1).id); - assertEquals("Target should have ID=10", 10, targetRows.get(9).id); - assertEquals("Target should have ID=11", 11, targetRows.get(10).id); - assertEquals("Target should have ID=19", 19, targetRows.get(14).id); + // Count mismatched entries in checkpoint table — after the repair pass, all MISMATCHED + // rows from the dry-run pass should have been overwritten with REPAIRED. + long mismatchedCount = countCheckpointsByStatus(checkpointEntries, + PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED); + long repairedCount = countCheckpointsByStatus(checkpointEntries, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED); + assertEquals("After repair, no MISMATCHED rows should remain", 0, mismatchedCount); + assertTrue("Should have REPAIRED rows after repair pass", repairedCount > 0); + + // After repair: target should converge to source (10 odd-id rows). The 5 extra even-id + // rows on target had only live cells, so tombstoneWholeRow can remove them. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertEquals("Source should have 10 rows", 10, + TestUtil.getRowCount(sourceConnection, uniqueTableName)); + assertEquals("Target should now also have 10 rows after repair tombstones the extras", 10, + TestUtil.getRowCount(targetConnection, uniqueTableName)); } @Test @@ -1119,48 +1043,25 @@ public void testSyncTableValidateWithConcurrentRegionSplits() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 100); // Introduce some mismatches on target before sync List mismatchIds = Arrays.asList(15, 35, 55, 75, 95); - for (int id : mismatchIds) { - upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id }, - new String[] { "MODIFIED_NAME_" + id }); - } + introduceMismatchesByIds(uniqueTableName, mismatchIds); // Capture time range for the sync long fromTime = 0L; long toTime = System.currentTimeMillis(); - // Create a thread that will perform splits on source cluster during sync - Thread sourceSplitThread = new Thread(() -> { - try { - // Split source at multiple points (creating more regions during sync) - List sourceSplits = Arrays.asList(20, 25, 40, 45, 60, 65, 80, 85, 95); - splitTableAt(sourceConnection, uniqueTableName, sourceSplits); - } catch (Exception e) { - LOGGER.error("Error during source splits", e); - } - }); - - // Create a thread that will perform splits on target cluster during sync - Thread targetSplitThread = new Thread(() -> { - try { - // Split target at different points than source (asymmetric region boundaries) - List targetSplits = Arrays.asList(11, 21, 31, 41, 51, 75, 81, 91); - splitTableAt(targetConnection, uniqueTableName, targetSplits); - } catch (Exception e) { - LOGGER.error("Error during target splits", e); - } - }); - - // Start split threads - sourceSplitThread.start(); - targetSplitThread.start(); + // Run splits on source/target concurrently with the sync. + Runnable splitJoiner = startConcurrentRegionWork( + () -> splitTableAt(sourceConnection, uniqueTableName, + Arrays.asList(20, 25, 40, 45, 60, 65, 80, 85, 95)), + () -> splitTableAt(targetConnection, uniqueTableName, + Arrays.asList(11, 21, 31, 41, 51, 75, 81, 91)), + "splits"); // Run sync tool while splits are happening Job job = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); - // Wait for split threads to complete - sourceSplitThread.join(30000); // 30 second timeout - targetSplitThread.join(30000); + splitJoiner.run(); // Verify the job completed successfully despite concurrent splits assertTrue("Sync job should complete successfully despite concurrent splits", @@ -1176,14 +1077,178 @@ public void testSyncTableValidateWithConcurrentRegionSplits() throws Exception { queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); assertFalse("Should have checkpoint entries", checkpointEntries.isEmpty()); - // Count mismatched entries - int mismatchedCount = 0; - for (PhoenixSyncTableCheckpointOutputRow entry : checkpointEntries) { - if (PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())) { - mismatchedCount++; - } - } - assertTrue("Should have mismatched entries for modified rows", mismatchedCount >= 1); + // Concurrent splits may race with the first sync pass — a chunk that straddled a region + // boundary mid-flight can land in REPAIRED with stale boundaries; once REPAIRED, the + // resume filter skips it. Cleanup the checkpoint and run a dry-run + repair pass on the + // stable region layout to converge. + cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(toTime)); + assertNoMismatchedCheckpoints(uniqueTableName, null); + } + + /** + * P3 (concurrent splits during repair pass): Today's concurrent-split tests run splits during + * the dry-run pass; this exercises the repair pass against splitting target regions, which is + * the production reality that exercises {@code flushRepairMutations}'s + * {@code NotServingRegionException} path → {@code firstFailureIdx} → {@code REPAIR_FAILED}. + * + *

    Convergence strategy: + *

      + *
    1. Dry-run first on a stable layout to populate MISMATCHED checkpoint rows.
    2. + *
    3. Start concurrent splits on the target cluster, then run the repair pass. Some chunks + * may land in {@code REPAIR_FAILED} if a flush hits a region in transition — the + * resume filter re-enters those chunks on the next pass.
    4. + *
    5. Run a final dry-run + repair pass after splits have settled; expect zero MISMATCHED. + * {@code verifyDataIdentical} must succeed.
    6. + *
    + */ + @Test + public void testRepairWithConcurrentTargetSplits() throws Exception { + setupStandardTestWithReplication(uniqueTableName, 1, 100); + List mismatchIds = Arrays.asList(12, 24, 36, 48, 60, 72, 84, 96); + introduceMismatchesByIds(uniqueTableName, mismatchIds); + + long fromTime = 0L; + long toTime = System.currentTimeMillis(); + + // Stage 1: stable dry-run populates MISMATCHED checkpoint rows. + Job dryRunJob = runSyncTool(uniqueTableName, "--dry-run", "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + assertTrue("Stable dry-run should succeed", dryRunJob.isSuccessful()); + SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob); + assertTrue("Dry-run should detect at least one mismatched chunk", + dryRunCounters.chunksMismatched >= 1); + + // Stage 2: kick off target-side splits while the repair pass runs. Source splits left out + // because a target-side split is what surfaces flushRepairMutations failures. + Runnable splitJoiner = startConcurrentRegionWork(() -> { + // No source-side work; pass a trivial Runnable so startConcurrentRegionWork still wires + // both threads and the joiner times out cleanly. + }, () -> splitTableAt(targetConnection, uniqueTableName, + Arrays.asList(15, 25, 35, 45, 55, 65, 75, 85, 95)), "repair-splits"); + + Job repairJob = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), + "--to-time", String.valueOf(toTime)); + splitJoiner.run(); + assertTrue("Repair pass should not throw despite concurrent splits", repairJob.isSuccessful()); + + // Stage 3: stable convergence pass. Cleanup checkpoint so the resume filter doesn't skip + // chunks that were marked REPAIRED with stale boundaries during the racing pass. + cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(toTime)); + + assertNoMismatchedCheckpoints(uniqueTableName, null); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + } + + /** + * P4 (idempotent repair): Guards against a regression where repair claims REPAIRED but does + * not actually converge. Run a full dry-run + repair on a divergent table, clean the + * checkpoint, then run the same dry-run + repair again on the now-converged tables and assert + * the second pass is a no-op (zero mismatches detected, zero chunks repaired). + */ + @Test + public void testRepairIsIdempotent() throws Exception { + setupStandardTestWithReplication(uniqueTableName, 1, 50); + List mismatchIds = Arrays.asList(7, 14, 21, 28, 35, 42, 49); + introduceMismatchesByIds(uniqueTableName, mismatchIds); + + long fromTime = 0L; + long toTime = System.currentTimeMillis(); + + // Pass 1: detect + repair. + RepairRunResult firstRun = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + assertTrue("First dry-run should succeed", firstRun.dryRunJob.isSuccessful()); + assertTrue("First repair should succeed", firstRun.repairJob.isSuccessful()); + + SyncCountersResult firstDryRunCounters = getSyncCounters(firstRun.dryRunJob); + assertTrue("First dry-run should detect mismatched chunks", + firstDryRunCounters.chunksMismatched >= 1); + Counters firstRepairCounters = firstRun.repairJob.getCounters(); + assertTrue("First repair should mark chunks REPAIRED", + firstRepairCounters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue() >= 1); + + // Tables must be data-identical after the first repair. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + + // Pass 2 prep: clean the checkpoint so the resume filter doesn't skip already-VERIFIED + // chunks — the second dry-run must scan the full layout from scratch. + cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + + // Pass 2: same window, no further mutations. Both passes must be no-ops. + RepairRunResult secondRun = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + assertTrue("Second dry-run should succeed", secondRun.dryRunJob.isSuccessful()); + assertTrue("Second repair should succeed", secondRun.repairJob.isSuccessful()); + + SyncCountersResult secondDryRunCounters = getSyncCounters(secondRun.dryRunJob); + assertEquals("Second dry-run should detect zero mismatches", 0, + secondDryRunCounters.chunksMismatched); + + // Second repair pass should be a no-op: nothing repaired, nothing failed. + assertRepairChunkAndMapperCounters(secondRun.repairJob.getCounters(), 0, 0, 0, 0, 0); + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + } + + /** + * P5 (all-tombstoned target-extra row): Source has no row K; target has row K but every cell + * is already a tombstone. {@code tombstoneWholeRow} returns {@code liveCellsTombstoned == 0} + * (line ~217 of {@code PhoenixSyncTableChunkRepairer}) → {@code drift.rowsCannotRepair++}, + * {@code rowsExtraOnTarget} unchanged. Pins the rare "target row extra but already + * fully-tombstoned" path that currently has zero coverage. + */ + @Test + public void testRepairAllTombstonedTargetRowExtra() throws Exception { + final int rowId = 5; + final int otherRowId = 4; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + + long fromTime = 0L; + final long ts = base + 1L; + final long tombstoneTs = base + 2L; + + // Plant a sentinel row on both sides so the verifier has *something* to compare and + // produces a non-empty chunk hash. The sentinel row stays clean — the test focuses on + // rowId=5 only. + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) { + scnSrc.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + otherRowId + ", 'sentinel')"); + scnSrc.commit(); + } + try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) { + scnTgt.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + otherRowId + ", 'sentinel')"); + scnTgt.commit(); + } + + // Target only: plant raw DeleteColumn tombstones for row K with NO underlying Put cells. + // Under --raw-scan the row surfaces (tombstones are themselves cells) but every cell is a + // Delete, so tombstoneWholeRow() returns liveCellsTombstoned == 0 → drift.rowsCannotRepair + // increments and rowsExtraOnTarget stays 0. This pins the rare "row exists in raw view but + // has no live cells to tombstone" branch. + byte[] rowKey = integerRowKey(rowId); + writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME", tombstoneTs); + writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME_VALUE", + tombstoneTs); + writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "_0", tombstoneTs); + + // Spin until wall-clock advances past the highest cell timestamp so --to-time + // (which defaults to currentTimeMillis()) covers our planted cells. + while (System.currentTimeMillis() <= tombstoneTs) { + // spin + } + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), + "--raw-scan"); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + // Row K's cells were already all tombstones — no live cells to tombstone again, and + // the row is flagged unrepairable. + assertRepairRowCounters(result.repairJob.getCounters(), 0, 0, 1); } @Test @@ -1213,24 +1278,23 @@ public void testSyncTableValidateWithOnlyTimestampDifferences() throws Exception "SELECT ID, NAME, NAME_VALUE FROM " + uniqueTableName + " ORDER BY ID"); assertEquals("Row values should be identical", sourceRows, targetRows); - // Run sync tool - should detect timestamp differences as mismatches - Job job = runSyncTool(uniqueTableName); + // Dry-run sync — should detect timestamp differences as mismatches because timestamps are + // included in the hash calculation. We use dry-run so the MISMATCHED rows persist for the + // assertions below; this is a residual-drift case where Phoenix-level queries already + // see identical values (timestamps differ but values match) so the repair phase is not + // exercised here. + Job job = runSyncTool(uniqueTableName, "--dry-run"); SyncCountersResult counters = getSyncCounters(job); // Validate counters - all rows should be processed and all chunks mismatched - // because timestamps are included in the hash calculation validateSyncCounters(counters, 10, 10, 0, 10); // Verify checkpoint entries show mismatches List checkpointEntries = queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - int mismatchedCount = 0; - for (PhoenixSyncTableCheckpointOutputRow entry : checkpointEntries) { - if (PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())) { - mismatchedCount++; - } - } + long mismatchedCount = countCheckpointsByStatus(checkpointEntries, + PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED); assertTrue("Should have mismatched entries due to timestamp differences", mismatchedCount > 0); } @@ -1246,45 +1310,23 @@ public void testSyncTableValidateWithConcurrentRegionMerges() throws Exception { // Introduce some mismatches on target before sync List mismatchIds = Arrays.asList(10, 30, 50, 70, 90); - for (int id : mismatchIds) { - upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id }, - new String[] { "MODIFIED_NAME_" + id }); - } + introduceMismatchesByIds(uniqueTableName, mismatchIds); // Capture time range for the sync long fromTime = 0L; long toTime = System.currentTimeMillis(); - // Create a thread that will perform merges on source cluster during sync - Thread sourceMergeThread = new Thread(() -> { - try { - // Merge adjacent regions on source - mergeAdjacentRegions(sourceConnection, uniqueTableName, 6); - } catch (Exception e) { - LOGGER.error("Error during source merges", e); - } - }); - - // Create a thread that will perform merges on target cluster during sync - Thread targetMergeThread = new Thread(() -> { - try { - mergeAdjacentRegions(targetConnection, uniqueTableName, 6); - } catch (Exception e) { - LOGGER.error("Error during target merges", e); - } - }); - - // Start merge threads - sourceMergeThread.start(); - targetMergeThread.start(); + // Run merges on source/target concurrently with the sync. + Runnable mergeJoiner = startConcurrentRegionWork( + () -> mergeAdjacentRegions(sourceConnection, uniqueTableName, 6), + () -> mergeAdjacentRegions(targetConnection, uniqueTableName, 6), + "merges"); // Run sync tool while merges are happening Job job = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); - // Wait for merge threads to complete - sourceMergeThread.join(30000); // 30 second timeout - targetMergeThread.join(30000); + mergeJoiner.run(); // Verify the job completed successfully despite concurrent merges assertTrue("Sync job should complete successfully despite concurrent merges", @@ -1307,6 +1349,14 @@ public void testSyncTableValidateWithConcurrentRegionMerges() throws Exception { // Second run should process ZERO rows (all checkpointed despite region merges) validateSyncCounters(counters2, 0, 0, 0, 0); + + // Concurrent merges may leave chunks REPAIRED with stale boundaries; the resume filter + // skips those on a single rerun. Cleanup the checkpoint and run a dry-run + repair pass + // on the stable region layout to converge. + cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(toTime)); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -1316,36 +1366,23 @@ public void testSyncTableValidateWithPagingTimeout() throws Exception { // Introduce mismatches scattered across the dataset List mismatchIds = Arrays.asList(15, 25, 35, 45, 55, 75); - for (int id : mismatchIds) { - upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id }, - new String[] { "MODIFIED_NAME_" + id }); - } + introduceMismatchesByIds(uniqueTableName, mismatchIds); - // First, run without aggressive paging to establish baseline chunk count - Configuration baselineConf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); - String[] baselineArgs = new String[] { "--table-name", uniqueTableName, "--target-cluster", - targetZkQuorum, "--run-foreground", "--chunk-size", "10240", "--to-time", - String.valueOf(System.currentTimeMillis()) }; - - PhoenixSyncTableTool baselineTool = new PhoenixSyncTableTool(); - baselineTool.setConf(baselineConf); - baselineTool.run(baselineArgs); - Job baselineJob = baselineTool.getJob(); - long baselineChunkCount = - baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue(); + // First, run --dry-run without aggressive paging to establish baseline chunk count. + // Dry-run so the baseline doesn't repair the drift before the paging pass below sees it. + int chunkSize = 10240; + long baselineChunkCount = captureBaselineChunkCount(uniqueTableName, chunkSize); // Configure paging with aggressive timeouts to force mid-chunk timeouts - Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); + Configuration conf = sourceClusterConf(); conf.setBoolean(QueryServices.PHOENIX_SERVER_PAGING_ENABLED_ATTRIB, true); conf.setLong(QueryServices.PHOENIX_SERVER_PAGE_SIZE_MS, 1); - int chunkSize = 10240; - long fromTime = 0L; long toTime = System.currentTimeMillis(); - // Run sync tool while splits are happening - Job job = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time", + // Dry-run with paging to assert chunk-count expansion under mid-chunk timeouts. + Job job = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--dry-run", "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); // Verify the job completed successfully despite paging timeouts @@ -1357,7 +1394,7 @@ public void testSyncTableValidateWithPagingTimeout() throws Exception { // Despite paging timeouts, no rows should be lost validateSyncCountersWithMinChunk(counters, 100, 100, 1, 1); - long pagingChunkCount = counters.chunksVerified; + long pagingChunkCount = counters.chunksVerified + counters.chunksMismatched; assertTrue( "Paging should create more chunks than baseline due to mid-chunk timeouts. " + "Baseline: " @@ -1368,6 +1405,17 @@ public void testSyncTableValidateWithPagingTimeout() throws Exception { List checkpointEntries = queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); assertFalse("Should have checkpoint entries", checkpointEntries.isEmpty()); + + // Now run the repair pass over the same window so target converges. Confirms paging + // does not block the repair flow. Clean up the dry-run pass's MISMATCHED checkpoint + // rows first so the repair pass starts fresh — paging-driven chunk boundaries differ + // between passes and stale MISMATCHED rows from the dry-run can land outside the new + // boundary set. + cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -1377,65 +1425,35 @@ public void testSyncTableValidateWithPagingTimeoutWithSplits() throws Exception // Introduce mismatches scattered across the dataset List mismatchIds = Arrays.asList(15, 25, 35, 45, 55, 75); - for (int id : mismatchIds) { - upsertRowsOnTarget(targetConnection, uniqueTableName, new int[] { id }, - new String[] { "MODIFIED_NAME_" + id }); - } - - // First, run without aggressive paging to establish baseline chunk count - Configuration baselineConf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); - String[] baselineArgs = new String[] { "--table-name", uniqueTableName, "--target-cluster", - targetZkQuorum, "--run-foreground", "--chunk-size", "10240", "--to-time", - String.valueOf(System.currentTimeMillis()) }; + introduceMismatchesByIds(uniqueTableName, mismatchIds); - PhoenixSyncTableTool baselineTool = new PhoenixSyncTableTool(); - baselineTool.setConf(baselineConf); - baselineTool.run(baselineArgs); - Job baselineJob = baselineTool.getJob(); - long baselineChunkCount = - baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue(); + // First, run --dry-run without aggressive paging to establish baseline chunk count. + // Dry-run so the baseline doesn't repair the drift before the paging pass below sees it. + int chunkSize = 10240; + long baselineChunkCount = captureBaselineChunkCount(uniqueTableName, chunkSize); // Configure paging with aggressive timeouts to force mid-chunk timeouts - Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); + Configuration conf = sourceClusterConf(); conf.setBoolean(QueryServices.PHOENIX_SERVER_PAGING_ENABLED_ATTRIB, true); conf.setLong(QueryServices.PHOENIX_SERVER_PAGE_SIZE_MS, 1); - int chunkSize = 10240; - - // Create a thread that will perform splits on source cluster during sync - Thread sourceSplitThread = new Thread(() -> { - try { - List sourceSplits = Arrays.asList(12, 22, 32, 42, 52, 63, 72, 82, 92, 98); - splitTableAt(sourceConnection, uniqueTableName, sourceSplits); - } catch (Exception e) { - LOGGER.error("Error during source splits", e); - } - }); - - // Create a thread that will perform splits on target cluster during sync - Thread targetSplitThread = new Thread(() -> { - try { - List targetSplits = Arrays.asList(13, 23, 33, 43, 53, 64, 74, 84, 95, 99); - splitTableAt(targetConnection, uniqueTableName, targetSplits); - } catch (Exception e) { - LOGGER.error("Error during target splits", e); - } - }); - - // Start split threads - sourceSplitThread.start(); - targetSplitThread.start(); + // Run splits on source/target concurrently with the sync. + Runnable splitJoiner = startConcurrentRegionWork( + () -> splitTableAt(sourceConnection, uniqueTableName, + Arrays.asList(12, 22, 32, 42, 52, 63, 72, 82, 92, 98)), + () -> splitTableAt(targetConnection, uniqueTableName, + Arrays.asList(13, 23, 33, 43, 53, 64, 74, 84, 95, 99)), + "splits"); long fromTime = 0L; long toTime = System.currentTimeMillis(); - // Run sync tool while splits are happening - Job job = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time", + // Dry-run sync while splits are happening — drift must remain on target so the chunk-count + // assertion below has work to do (otherwise an inline repair would converge mid-pass). + Job job = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--dry-run", "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); - // Wait for split threads to complete - sourceSplitThread.join(30000); // 30 second timeout - targetSplitThread.join(30000); + splitJoiner.run(); // Verify the job completed successfully despite concurrent splits and paging timeouts assertTrue("Sync job should complete successfully despite paging and concurrent splits", @@ -1449,7 +1467,7 @@ public void testSyncTableValidateWithPagingTimeoutWithSplits() throws Exception // Paging should create MORE chunks than baseline // Concurrent region splits may also create additional chunks as mappers process new regions - long pagingChunkCount = counters.chunksVerified; + long pagingChunkCount = counters.chunksVerified + counters.chunksMismatched; assertTrue( "Paging should create more chunks than baseline due to mid-chunk timeouts. " + "Baseline: " @@ -1460,6 +1478,16 @@ public void testSyncTableValidateWithPagingTimeoutWithSplits() throws Exception List checkpointEntries = queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); assertFalse("Should have checkpoint entries", checkpointEntries.isEmpty()); + + // Run the repair pass over the same window so target converges. Confirms paging plus + // concurrent splits do not block the repair flow. Clean up the dry-run pass's MISMATCHED + // checkpoint rows first so the resume filter doesn't leave stale MISMATCHED entries that + // sit outside the repair pass's chunk boundaries (paging + splits change boundary set). + cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -1468,22 +1496,11 @@ public void testSyncTableMapperFailsWithNonExistentTable() throws Exception { // Try to run sync tool on a NON-EXISTENT table String nonExistentTable = "NON_EXISTENT_TABLE_" + System.currentTimeMillis(); - Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); String[] args = new String[] { "--table-name", nonExistentTable, "--target-cluster", targetZkQuorum, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) }; - PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); - tool.setConf(conf); - - try { - int exitCode = tool.run(args); - assertTrue( - String.format("Table %s does not exist, mapper setup should fail", nonExistentTable), - exitCode != 0); - } catch (Exception ex) { - fail("Tool should return non-zero exit code on failure instead of throwing exception: " - + ex.getMessage()); - } + assertSyncToolFails(args, + String.format("Table %s does not exist, mapper setup should fail", nonExistentTable)); } @Test @@ -1493,23 +1510,12 @@ public void testSyncTableMapperFailsWithInvalidTargetCluster() throws Exception // Try to run sync tool with INVALID target cluster ZK quorum. String invalidTargetZk = "invalid-zk-host:2181:/hbase"; - Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster", invalidTargetZk, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) }; - PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); - tool.setConf(conf); - - try { - int exitCode = tool.run(args); - assertTrue( - String.format("Target cluster %s is invalid, mapper setup should fail", invalidTargetZk), - exitCode != 0); - } catch (Exception ex) { - fail("Tool should return non-zero exit code on failure instead of throwing exception: " - + ex.getMessage()); - } + assertSyncToolFails(args, + String.format("Target cluster %s is invalid, mapper setup should fail", invalidTargetZk)); } @Test @@ -1523,22 +1529,12 @@ public void testSyncTableMapperFailsWithMissingTargetTable() throws Exception { // Don't create table on target - this will cause mapper map() to fail // when trying to scan the non-existent target table - Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster", targetZkQuorum, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) }; - PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); - tool.setConf(conf); - - try { - int exitCode = tool.run(args); - assertTrue(String.format( - "Table %s does not exist on target cluster, mapper map() should fail during target scan", - uniqueTableName), exitCode != 0); - } catch (Exception ex) { - fail("Tool should return non-zero exit code on failure instead of throwing exception: " - + ex.getMessage()); - } + assertSyncToolFails(args, String.format( + "Table %s does not exist on target cluster, mapper map() should fail during target scan", + uniqueTableName)); } @Test @@ -1551,47 +1547,21 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception { long fromTime = 0L; long toTime = System.currentTimeMillis(); - // First run: Sync should succeed and create checkpoint entries for all mappers - Job job1 = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); - SyncCountersResult counters1 = getSyncCounters(job1); + // First run + 75% deletion preamble (shared with other partial-rerun tests) + PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75); + SyncCountersResult counters1 = setup.firstRunCounters; // Validate first run succeeded - assertTrue("First run should succeed", job1.isSuccessful()); + assertTrue("First run should succeed", setup.firstRunJob.isSuccessful()); validateSyncCounters(counters1, 10, 10, 10, 0); - // Query checkpoint table to get all mapper entries - List allCheckpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - - // Separate mapper and chunk entries using utility method - SeparatedCheckpointEntries separated = separateMapperAndChunkEntries(allCheckpointEntries); - List mapperEntries = separated.mappers; - List allChunks = separated.chunks; - - assertFalse("Should have at least 1 mapper entries after first run", mapperEntries.isEmpty()); - - // Select 3/4th of chunks from each mapper to delete (simulating partial rerun) - // We repro the partial run via deleting some entries from checkpoint table and re-running the - // tool. - List chunksToDelete = - selectChunksToDeleteFromMappers(sourceConnection, uniqueTableName, targetZkQuorum, fromTime, - toTime, null, mapperEntries, 0.75); - - // Delete all mappers and selected chunks using utility method - deleteCheckpointEntries(sourceConnection, uniqueTableName, targetZkQuorum, null, mapperEntries, - chunksToDelete); - - // Verify mapper entries were deleted - List checkpointEntriesAfterDelete = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); SeparatedCheckpointEntries separatedAfterDelete = - separateMapperAndChunkEntries(checkpointEntriesAfterDelete); + separateMapperAndChunkEntries(setup.entriesAfterDelete); assertEquals("Should have 0 mapper entries after deleting all mappers", 0, separatedAfterDelete.mappers.size()); assertEquals("Should have remaining chunk entries after deletion", - allChunks.size() - chunksToDelete.size(), separatedAfterDelete.chunks.size()); + setup.chunks.size() - setup.chunksToDelete.size(), separatedAfterDelete.chunks.size()); // Drop target table to cause mapper failures during second run. // Use HBase Admin directly because Phoenix DROP TABLE IF EXISTS via targetConnection @@ -1602,37 +1572,637 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception { targetAdmin.disableTable(hbaseTableName); targetAdmin.deleteTable(hbaseTableName); } - LOGGER.info("Dropped target table to cause mapper failures"); + LOGGER.info("Dropped target table to cause mapper failures"); + + // Second run: Job should fail (exit code != 0) because target table is missing + String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster", + targetZkQuorum, "--run-foreground", "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(toTime) }; + + assertSyncToolFails(args, + "Second run should fail with non-zero exit code due to missing target table"); + + // Remaining chunk entries that we dint delete should still persist despite job failure + List checkpointEntriesAfterFailedRun = + queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + SeparatedCheckpointEntries separatedAfterFailedRun = + separateMapperAndChunkEntries(checkpointEntriesAfterFailedRun); + + // After the failed run: + // - No mapper entries should exist (we deleted them all, and the job failed before creating new + // ones) + // - Only the remaining chunk entries (1/4th) should persist + assertEquals("Should have 0 mapper entries after failed run", 0, + separatedAfterFailedRun.mappers.size()); + assertEquals("Remaining chunk entries should persist after failed run", + setup.chunks.size() - setup.chunksToDelete.size(), separatedAfterFailedRun.chunks.size()); + } + + /** + * P1 (hidden-version unwinding): Verifies the most subtle correctness path in the repairer — + * tombstoneTargetCell case 3 from {@code PhoenixSyncTableChunkRepairer.tombstoneTargetCell}. + * + *

    Scenario: source row has {@code Put(NAME, "alice", T0)}; target row has {@code + * Put(NAME, "bob", T1)} and {@code Put(NAME, "carol", T2)} where {@code T0 < T1 < T2} and + * {@code MAX_VERSIONS=2}. Visible cell on target is "carol" (T2); "bob" (T1) is + * MAX_VERSIONS-hidden. Naive repair would point-delete only T2, exposing "bob" above + * source's mirror at T0 — divergent. Correct behavior: point-delete BOTH T2 and T1. + * + *

    Without this test, a regression that "fixes" only the visible cell (case 2) would leave + * target reading "bob" after a successful-looking repair pass. + */ + @Test + public void testRepairUnwindsHiddenTargetVersions() throws Exception { + final int rowId = 5; + // Two clusters, no replication — we plant cells deterministically on each side. + long base = createRepairTestTableOnBothClusters(uniqueTableName, 2, "3, 7"); + + long fromTime = 0L; + final long sourceTs = base + 1L; + final long targetT1 = base + 2L; + final long targetT2 = base + 3L; + + byte[] rowKey = integerRowKey(rowId); + String family = "0"; // COLUMN_ENCODED_BYTES=NONE → family is "0" + String qualifier = "NAME"; + + // Source: single Put(NAME, "alice", T=100). Empty-key cell is needed so the row is + // visible to Phoenix scans (and thus to the verifier). Use an SCN connection for that. + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { + scnSrc.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + scnSrc.commit(); + } + + // Target: insert via SCN at T1 then again at T2 to leave two NAME versions; with VERSIONS=2 + // both versions are retained. Visible read is "carol" (T2); "bob" (T1) is one-version-hidden. + try (Connection scnTgtT1 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT1)) { + scnTgtT1.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')"); + scnTgtT1.commit(); + } + try (Connection scnTgtT2 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT2)) { + scnTgtT2.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')"); + scnTgtT2.commit(); + } + + // Sanity: target's visible NAME is "carol" before repair. + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + assertEquals("Pre-repair target visible NAME should be carol", "carol", rs.getString(1)); + } + } + + // Run dry-run + repair sharing the same time window with --read-all-versions so the + // verifier and repairer both see the hidden version. + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), + "--read-all-versions"); + + assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + Counters repairCounters = result.repairJob.getCounters(); + // Two NAME versions ("bob"@T1 + "carol"@T2) and two empty-key versions on target — all sit + // at timestamps that don't match source's single sourceTs, so each gets counted as + // either "different" or "extra" depending on the diff branch. We require at least 2 extras + // (the two extra NAME versions vs source's single mirror) — the exact split between + // CELLS_EXTRA and CELLS_DIFFERENT depends on per-qualifier matching. Post-repair raw scan + // assertions below pin the structural outcome. + assertTrue("At least 2 cells should be tombstoned for target's hidden+visible NAME versions", + repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 2); + + // Post-repair, Phoenix's standard read on target must see source's "alice", NOT "bob". + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + assertEquals("Post-repair target NAME must be alice (hidden version unwound)", "alice", + rs.getString(1)); + } + } + + // Raw scan: target should have two Delete markers at T2 and T1 plus source's mirror Put@T0. + try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) { + Scan scan = new Scan().withStartRow(rowKey, true).withStopRow(rowKey, true).setRaw(true); + scan.readAllVersions(); + int nameDeletes = 0; + int namePutAtSourceTs = 0; + try (ResultScanner sc = targetHTable.getScanner(scan)) { + for (Result r; (r = sc.next()) != null;) { + for (Cell c : r.rawCells()) { + if (Bytes.equals(CellUtil.cloneFamily(c), Bytes.toBytes(family)) + && Bytes.equals(CellUtil.cloneQualifier(c), Bytes.toBytes(qualifier))) { + if (CellUtil.isDelete(c)) { + nameDeletes++; + } else if (c.getTimestamp() == sourceTs) { + namePutAtSourceTs++; + } + } + } + } + } + assertEquals("Two delete markers (one for each target NAME version) expected", 2, + nameDeletes); + assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs); + } + } + + /** + * P2 (partial-mirror shadow): Verifies the {@code RowMirrorStatus.PARTIALLY_MIRRORED} branch + * indirectly via {@code generateMutationForDiffCells} — both rows exist; one source cell is + * shadowed by a target tombstone, sibling cells mirror successfully. {@code anyCellUnrepairable} + * propagates up to {@code drift.rowsCannotRepair++} while no cell counter increments for the + * shadowed cell (mirror returned false, so nothing was written and nothing counted). + * + *

    Setup: row K exists on both sides via a matching {@code NAME_VALUE} cell. Target has a + * pre-existing {@code DeleteColumn(NAME, T=300)} shadowing any future {@code NAME} Put at + * {@code ts <= 300}. Source's {@code NAME, "alice", T=200} would land on disk but stay + * invisible — repair detects this upfront via {@code wouldShadow} and skips the doomed write. + */ + @Test + public void testRepairPartialShadowWithinRow() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + + long fromTime = 0L; + final long sourceTs = base + 1L; + final long shadowTombstoneTs = base + 2L; + + // Source: row K with NAME="alice" and NAME_VALUE=99, all at sourceTs. + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { + scnSrc.createStatement().execute("UPSERT INTO " + uniqueTableName + + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); + scnSrc.commit(); + } + + // Target: row K with only NAME_VALUE=99 at sourceTs (matches source's NAME_VALUE). + try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), sourceTs)) { + scnTgt.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); + scnTgt.commit(); + } + // Plant a DeleteColumn tombstone on target's NAME at shadowTombstoneTs, which shadows any + // source mirror at ts <= shadowTombstoneTs. + writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", + shadowTombstoneTs); + + // Spin until wall-clock advances past the highest cell timestamp so --to-time + // (which defaults to currentTimeMillis()) covers our planted cells. + while (System.currentTimeMillis() <= shadowTombstoneTs) { + // spin + } + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), + "--raw-scan"); + + assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); + assertTrue("Repair pass should succeed (shadowing is correctness-only, not a job error)", + result.repairJob.isSuccessful()); + + Counters repairCounters = result.repairJob.getCounters(); + // Source's NAME mirror is suppressed by the shadow → no cell counter ticks; row is unrepairable. + assertRepairCellCounters(repairCounters, 0, 0, 0, 1); + assertTrue("At least one mapper should roll up to UNREPAIRABLE", + repairCounters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1); + + // Post-repair, target's read view of NAME for row K is still null (DeleteColumn at T=300 + // covers everything <= T=300 — including any source mirror we *might* have written). The + // assertion validates the repair refused to write the doomed Put. + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + assertNull("NAME should still be null on target — shadow was respected", + rs.getString(1)); + } + } + } + + /** + * P2 (cell-missing branch): same row exists on both sides, source has an extra column the + * target lacks. {@code generateMutationForDiffCells} should mirror it through the + * {@code cellMissing++} branch (source-only cell, no shadow on target). + */ + @Test + public void testRepairCellMissingOnTarget() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + + final long ts = base + 1L; + + // Source: row K with NAME and NAME_VALUE. + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) { + scnSrc.createStatement().execute("UPSERT INTO " + uniqueTableName + + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); + scnSrc.commit(); + } + // Target: row K with only NAME_VALUE matching source. NAME is absent. + try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) { + scnTgt.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); + scnTgt.commit(); + } + + // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis()) + // covers the planted cells. + while (System.currentTimeMillis() <= ts) { + // spin + } + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", + "--to-time", String.valueOf(System.currentTimeMillis())); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + assertRepairCellCounters(result.repairJob.getCounters(), 1, 0, 0, 0); + + // Post-repair: target's NAME should equal source's "alice". + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + assertEquals("alice", rs.getString(1)); + } + } + } + + /** + * P2 (cell-extra branch): same row exists on both sides, target has an extra column source + * lacks. {@code generateMutationForDiffCells} should tombstone it via the + * {@code cellExtra++} branch ({@code tombstoneTargetCell} with {@code sourceMaxTs == null} + * → {@code Delete.addColumns}). + */ + @Test + public void testRepairCellExtraOnTarget() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + + final long ts = base + 1L; + + // Source: row K with only NAME_VALUE. + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) { + scnSrc.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); + scnSrc.commit(); + } + // Target: row K with same NAME_VALUE plus an extra raw NAME cell at the same ts. + try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) { + scnTgt.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); + scnTgt.commit(); + } + writeRawCell(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", ts, + Bytes.toBytes("bob")); + + // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis()) + // covers the planted cells. + while (System.currentTimeMillis() <= ts) { + // spin + } + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", + "--to-time", String.valueOf(System.currentTimeMillis())); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + assertRepairCellCounters(result.repairJob.getCounters(), 0, 1, 0, 0); + + // Post-repair: target's NAME should be tombstoned and read as null. + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + assertNull(rs.getString(1)); + } + } + } + + /** + * P2 (cell-different branch): same row, same {@code (cf, q, ts)} coords, different value. + * {@code generateMutationForDiffCells} should hit the {@code cellDifferent++} branch via the + * {@code !matchingValue} check at the head of the loop. + */ + @Test + public void testRepairCellDifferentValue() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + + final long ts = base + 1L; + + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) { + scnSrc.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + scnSrc.commit(); + } + try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) { + scnTgt.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')"); + scnTgt.commit(); + } + + // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis()) + // covers the planted cells. + while (System.currentTimeMillis() <= ts) { + // spin + } + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", + "--to-time", String.valueOf(System.currentTimeMillis())); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + assertRepairCellCounters(result.repairJob.getCounters(), 0, 0, 1, 0); + + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + assertEquals("alice", rs.getString(1)); + } + } + } + + /** + * P6 (asymmetric load-target time-range): Source has Put at T=200 inside the user's window; + * target has a {@code DeleteColumn} planted at T=600 — strictly above {@code --to-time T=500}. + * The repair scan honors {@code --to-time} and never sees the tombstone in the diff window, so + * the diff routes to {@code mirrorWholeRow} in + * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer}. Inside, + * {@code loadTargetRowRecord} deliberately uses {@code (fromTime, MAX_VALUE)} (line 487) — so it + * still sees the T=600 tombstone and {@code wouldShadow} returns true on Source's Put@T=200 + * (DeleteColumn covers ts <= T=600). Result: source's mirror is suppressed, the row is + * flagged unrepairable. + */ + @Test + public void testRepairShadowFromTombstoneAboveToTime() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + + final long fromTime = 0L; + final long sourceTs = base + 1L; + final long toTime = base + 2L; + final long tombstoneTs = base + 3L; + + // Source has the row inside the diff window. + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { + scnSrc.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + scnSrc.commit(); + } + // Target has a tombstone strictly above --to-time. Diff scan won't see it; loadTargetRowRecord + // still will. + writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", + tombstoneTs); + + // Phoenix requires --to-time <= currentTimeMillis() at tool-run. Spin until wall-clock + // moves past tombstoneTs (the highest cell ts in this test) so toTime is unambiguously in + // the past. + while (System.currentTimeMillis() <= tombstoneTs) { + // spin + } + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan"); + assertTrue("Repair should succeed (shadowing is correctness-only)", + result.repairJob.isSuccessful()); + + Counters c = result.repairJob.getCounters(); + // Phoenix UPSERT plants NAME *and* the empty-key cell ("_0"). DeleteColumn shadows only NAME — + // "_0" mirrors through (rowsMissing++), and the row is unrepairable because NAME was suppressed. + assertRepairRowCounters(c, 1, 0, 1); + assertTrue("At least one mapper should roll up to UNREPAIRABLE", + c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1); + + // Post-repair: target's NAME should still read as null. The shadow was respected, so no Put + // for NAME landed (only the empty-key cell, which gives the row visible existence with NAME + // covered by the DeleteColumn tombstone above it). + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertTrue("Row exists on target via mirrored empty-key cell", rs.next()); + assertNull("NAME should still be null — DeleteColumn shadow respected", rs.getString(1)); + } + } + } + + /** + * P7 (mid-row repair-batch flush boundary): drives many missing-row mirrors through a tiny + * {@code repairBatchSize=2} so {@code generateMutationForDiffRows} flushes mid-stream multiple + * times. Validates that every row converges despite the mid-flush boundary — i.e., no Put + * gets dropped because pendingPuts/pendingDeletes were drained mid-iteration. + */ + @Test + public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception { + // No replication — seed source manually so target legitimately lacks the rows. + createRepairTestTableOnBothClusters(uniqueTableName, 1, null); + + // Introduce extra rows on source that target lacks. Each row → at least 2 cells (NAME and the + // empty-key cell), so a batch size of 2 forces a flush every row, exercising the mid-stream + // flush in generateMutationForDiffRows. + int[] sourceOnlyIds = new int[] { 100, 101, 102, 103, 104, 105, 106, 107 }; + String[] sourceOnlyNames = new String[sourceOnlyIds.length]; + for (int i = 0; i < sourceOnlyIds.length; i++) { + sourceOnlyNames[i] = "extra_" + sourceOnlyIds[i]; + } + upsertRowsOnTarget(sourceConnection, uniqueTableName, sourceOnlyIds, sourceOnlyNames); + sourceConnection.commit(); + + long fromTime = 0L; + long toTime = System.currentTimeMillis(); + + Configuration conf = sourceClusterConfWithRepairBatchSize(2); + + // Stage 1: dry-run. + Job dryRunJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--dry-run", + "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + assertTrue("Dry-run should succeed", dryRunJob.isSuccessful()); + SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob); + assertTrue("Dry-run should detect mismatched chunks", dryRunCounters.chunksMismatched >= 1); + + // Stage 2: repair with the same small batch size. + Job repairJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + assertTrue("Repair should succeed despite small batch size", repairJob.isSuccessful()); + + Counters repairCounters = repairJob.getCounters(); + assertTrue("All source-only rows should be marked missing on target", + repairCounters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue() + >= sourceOnlyIds.length); + assertEquals("No row should be flagged unrepairable", 0, + repairCounters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue()); + + // Verify each source-only row landed on target with the right NAME. + for (int i = 0; i < sourceOnlyIds.length; i++) { + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, sourceOnlyIds[i]); + try (ResultSet rs = ps.executeQuery()) { + assertTrue("Row " + sourceOnlyIds[i] + " should exist on target after repair", + rs.next()); + assertEquals("Row " + sourceOnlyIds[i] + " NAME should match source", + sourceOnlyNames[i], rs.getString(1)); + } + } + } + } + + /** + * P8 ({@code --raw-scan} + {@code --read-all-versions} interplay): a multi-version row on + * source that includes an in-window {@code DeleteColumn} between two Puts. Target lags with + * only the older Put. Repair must mirror the missing tombstone (preserving its subtype via + * {@code mirrorSourceCell} in + * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer} routing Delete cells + * through {@code Delete#add}) and the missing newer Put. + */ + @Test + public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 3, "3, 7"); + + final long fromTime = 0L; + final long t1 = base + 1L; + final long t2 = base + 2L; + final long t3 = base + 3L; + + // Source: Put@T1 → DeleteColumn@T2 → Put@T3. + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), t1)) { + scnSrc.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')"); + scnSrc.commit(); + } + writeRawDeleteColumn(sourceConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", t2); + try (Connection scnSrc2 = openConnectionAtScn(CLUSTERS.getZkUrl1(), t3)) { + scnSrc2.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v3')"); + scnSrc2.commit(); + } + + // Target: only the oldest Put@T1. + try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), t1)) { + scnTgt.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')"); + scnTgt.commit(); + } + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), + "--raw-scan", "--read-all-versions"); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + Counters c = result.repairJob.getCounters(); + // Source has NAME@T1 Put, NAME@T2 DeleteColumn, NAME@T3 Put + empty-key@T1 and @T3. Target has + // only NAME@T1 + empty-key@T1, so 3 cells missing: NAME-tombstone@T2, NAME-Put@T3, empty-key@T3. + assertRepairCellCounters(c, 3, 0, 0, 0); + + // Post-repair raw scan on target should show: Put@T3, DeleteColumn@T2, Put@T1 for the NAME + // qualifier (rawCells reverse-ts ordered). + int observedPuts = 0; + int observedDeleteColumns = 0; + long observedNewestPutTs = -1L; + try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) { + Scan scan = new Scan(); + scan.withStartRow(integerRowKey(rowId), true); + scan.withStopRow(integerRowKey(rowId), true); + scan.setRaw(true); + scan.readAllVersions(); + try (ResultScanner scanner = targetHTable.getScanner(scan)) { + Result r = scanner.next(); + assertNotNull("Target row should exist", r); + for (Cell cell : r.rawCells()) { + if (Bytes.equals(CellUtil.cloneQualifier(cell), Bytes.toBytes("NAME"))) { + if (CellUtil.isDelete(cell)) { + observedDeleteColumns++; + } else { + observedPuts++; + observedNewestPutTs = Math.max(observedNewestPutTs, cell.getTimestamp()); + } + } + } + } + } + assertEquals("Target should have both NAME Puts after repair", 2, observedPuts); + assertEquals("Target should have the mirrored DeleteColumn after repair", 1, + observedDeleteColumns); + assertEquals("Newest mirrored Put should sit at T3", t3, observedNewestPutTs); + + // Read-side: NAME under default visibility should now be null (T3 Put → T2 DeleteColumn covers + // T1; visible state is "deleted but tombstone caps NAME"). Phoenix sees no value. + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + // The newest Put is T3 — reads see "v3" since T3 > tombstone T2. + assertTrue(rs.next()); + assertEquals("v3", rs.getString(1)); + } + } + } - // Second run: Job should fail (exit code != 0) because target table is missing - Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); - String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster", - targetZkQuorum, "--run-foreground", "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime) }; + /** + * P9 (mixed Put+Delete batch under small {@code repairBatchSize}): many missing source rows AND + * many extra target rows in the same chunk. With {@code repairBatchSize=4}, most flushes + * straddle a Put/Delete boundary and exercise {@code flushRepairMutations} in + * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer} on its mixed + * {@code Table#batch} path. Validates that no mutation gets dropped at the batch boundary. + */ + @Test + public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception { + // No replication — seed each side independently so source-only and target-only rows truly + // diverge. + createRepairTestTableOnBothClusters(uniqueTableName, 1, null); + + // Add 5 source-only rows (will need Puts) and 5 target-only rows (will need Deletes), all + // inside the same chunk so they queue together in a single mapper's pendingPuts/pendingDeletes + // and get flushed as mixed batches. + int[] sourceOnly = new int[] { 200, 201, 202, 203, 204 }; + String[] sourceOnlyNames = new String[] { "s200", "s201", "s202", "s203", "s204" }; + upsertRowsOnTarget(sourceConnection, uniqueTableName, sourceOnly, sourceOnlyNames); + sourceConnection.commit(); - PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); - tool.setConf(conf); - int exitCode = tool.run(args); + int[] targetOnly = new int[] { 300, 301, 302, 303, 304 }; + String[] targetOnlyNames = new String[] { "t300", "t301", "t302", "t303", "t304" }; + upsertRowsOnTarget(targetConnection, uniqueTableName, targetOnly, targetOnlyNames); + targetConnection.commit(); - // Job should fail - assertTrue("Second run should fail with non-zero exit code due to missing target table", - exitCode != 0); - LOGGER.info("Second run failed as expected with exit code: {}", exitCode); + long fromTime = 0L; + long toTime = System.currentTimeMillis(); - // Remaining chunk entries that we dint delete should still persist despite job failure - List checkpointEntriesAfterFailedRun = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - SeparatedCheckpointEntries separatedAfterFailedRun = - separateMapperAndChunkEntries(checkpointEntriesAfterFailedRun); + Configuration conf = sourceClusterConfWithRepairBatchSize(4); - // After the failed run: - // - No mapper entries should exist (we deleted them all, and the job failed before creating new - // ones) - // - Only the remaining chunk entries (1/4th) should persist - assertEquals("Should have 0 mapper entries after failed run", 0, - separatedAfterFailedRun.mappers.size()); - assertEquals("Remaining chunk entries should persist after failed run", - allChunks.size() - chunksToDelete.size(), separatedAfterFailedRun.chunks.size()); + Job dryRunJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--dry-run", + "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + assertTrue("Dry-run should succeed", dryRunJob.isSuccessful()); + + SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob); + assertTrue("Dry-run should detect all source-only rows as missing", + dryRunCounters.rowsMissingOnTarget >= sourceOnly.length); + assertTrue("Dry-run should detect all target-only rows as extra", + dryRunCounters.rowsExtraOnTarget >= targetOnly.length); + + Job repairJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + assertTrue("Repair should succeed", repairJob.isSuccessful()); + + Counters c = repairJob.getCounters(); + assertTrue("All source-only rows should be marked missing", + c.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue() >= sourceOnly.length); + assertTrue("All target-only rows should be marked extra", + c.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue() >= targetOnly.length); + assertEquals("No chunk should fail repair", 0, + c.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue()); + + // Convergence pass: cleanup checkpoint and re-run a stable repair to assert no chunks are + // mismatched after the mixed-batch flushes. + cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(toTime)); + assertNoMismatchedCheckpoints(uniqueTableName, null); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); } @Test @@ -1694,7 +2264,13 @@ public void testSyncTableWithDeleteAndCompactionOnSource() throws Exception { assertTrue("Third run with raw scan after compaction should succeed", job3.isSuccessful()); validateSyncCounters(counters3, 10, 11, 9, 1); - validateMapperCounters(counters3, 3, 1); + // Repair runs inline (non-dry-run). Under --raw-scan target has residual cells for row 100 + // that source no longer has after compaction; repair tombstones the residual target cells, + // so the chunk's mapper rolls up to REPAIRED. + validateMapperCountersRepair(counters3, 3, 1, 0, 0); + + // The standard Phoenix view (no raw-scan) on both clusters remains identical. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); } @Test @@ -1734,7 +2310,9 @@ public void testSyncTableWithDeleteAndCompactionOnTarget() throws Exception { assertTrue("Second run with raw scan should succeed", job2.isSuccessful()); validateSyncCounters(counters2, 10, 11, 9, 1); - validateMapperCounters(counters2, 3, 1); + // Non-dry-run: target has a delete marker that source doesn't; repair tombstones at source + // ts cover the residual under raw scan, so the mapper rolls up to REPAIRED. + validateMapperCountersRepair(counters2, 3, 1, 0, 0); flushAndMajorCompact(CLUSTERS.getHBaseCluster2(), uniqueTableName); @@ -1745,6 +2323,11 @@ public void testSyncTableWithDeleteAndCompactionOnTarget() throws Exception { assertTrue("Third run with raw scan after compaction should succeed", job3.isSuccessful()); validateSyncCounters(counters3, 10, 10, 10, 0); validateMapperCounters(counters3, 4, 0); + + // After major compaction tombstones are gone; the third raw-scan pass is clean and the + // standard Phoenix view matches. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -1785,7 +2368,12 @@ public void testSyncTableWithMultipleVersionAndCompactionOnSource() throws Excep assertTrue("Second run with all versions should succeed", job3.isSuccessful()); // Target retains old version of row 5 (VERSIONS=2), source does not after compaction. validateSyncCounters(counters3, 10, 10, 9, 1); - validateMapperCounters(counters3, 3, 1); + // Non-dry-run: target has an extra historical version that source no longer has; repair + // tombstones the residual cells at the appropriate ts, so the mapper rolls up to REPAIRED. + validateMapperCountersRepair(counters3, 3, 1, 0, 0); + + // The standard Phoenix view (latest version only) on both clusters is identical. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); } @Test @@ -1807,33 +2395,36 @@ public void testSyncTableWithMultipleVersionAndCompactionOnTarget() throws Excep new String[] { "EXTRA_ROW" }); targetConnection.commit(); - // Run sync with --read-all-versions: target has extra old version, should mismatch + // Run sync with --read-all-versions: target diverged with EXTRA_ROW; source is unchanged. + // Non-dry-run: repair pushes source's row 5 onto target. Status rolls up to REPAIRED (the + // diverging cell is a value mismatch with both sides live, not a tombstone shadowing). Job job = runSyncTool(uniqueTableName, "--read-all-versions"); SyncCountersResult counters1 = getSyncCounters(job); assertTrue("First run with all versions should succeed", job.isSuccessful()); validateSyncCounters(counters1, 10, 10, 9, 1); - validateMapperCounters(counters1, 3, 1); + validateMapperCountersRepair(counters1, 3, 1, 0, 0); flushAndMajorCompact(CLUSTERS.getHBaseCluster2(), uniqueTableName); - // Run sync without reading all versions (default behavior): only latest version compared, - // should still mismatch + // Subsequent runs see target already converged to source from the first repair pass. Job job1 = runSyncTool(uniqueTableName); SyncCountersResult counters = getSyncCounters(job1); assertTrue("Second run without reading all versions should succeed", job1.isSuccessful()); - validateSyncCounters(counters, 10, 10, 9, 1); - validateMapperCounters(counters, 3, 1); + validateSyncCounters(counters, 10, 10, 10, 0); + validateMapperCounters(counters, 4, 0); - // Run sync with --read-all-versions, target has extra old version even after compaction, should - // mismatch Job job3 = runSyncTool(uniqueTableName, "--read-all-versions"); SyncCountersResult counters3 = getSyncCounters(job3); - assertTrue("Second run with all versions should succeed", job3.isSuccessful()); - validateSyncCounters(counters3, 10, 10, 9, 1); - validateMapperCounters(counters3, 3, 1); + assertTrue("Third run with all versions should succeed", job3.isSuccessful()); + validateSyncCounters(counters3, 10, 10, 10, 0); + validateMapperCounters(counters3, 4, 0); + + // After repair the standard Phoenix view matches. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test @@ -1843,20 +2434,30 @@ public void testSyncTableValidateWithSplitCoalescing() throws Exception { introduceAndVerifyTargetDifferences(uniqueTableName); // Enable split coalescing via command-line parameter, all regions will be coalesced into one - // mapper - Job job = runSyncTool(uniqueTableName, "--coalesce-split"); - SyncCountersResult counters = getSyncCounters(job); + // mapper. Use a pinned window so the dry-run and repair share the same checkpoint PK. + long fromTime = 0L; + long toTime = System.currentTimeMillis(); + + Job dryRunJob = runSyncTool(uniqueTableName, "--coalesce-split", "--dry-run", "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + SyncCountersResult counters = getSyncCounters(dryRunJob); assertEquals("Should have only 1 Mapper task created with coalescing", 1, counters.taskCreated); validateSyncCounters(counters, 10, 10, 7, 3); validateMapperCounters(counters, 1, 3); - // Verify checkpoint entries are created correctly + // Verify checkpoint entries from the dry-run pass are created correctly. List checkpointEntries = queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); validateCheckpointEntries(checkpointEntries, uniqueTableName, targetZkQuorum, 10, 10, 7, 3, 4, 3, null); + + // Repair pass over the same window: MISMATCHED rows transition to REPAIRED in place. + runSyncTool(uniqueTableName, "--coalesce-split", "--from-time", String.valueOf(fromTime), + "--to-time", String.valueOf(toTime)); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } /** @@ -1869,7 +2470,7 @@ public void testSyncTableValidateWithSplitCoalescing() throws Exception { * QueryCompiler.verifySCN()} (client-side) then throws {@code ERROR 538} when {@code endTime} is * older than {@code phoenix.max.lookback.age.seconds}. * - *

    Fix: {@link PhoenixSyncTableInputFormat#getQueryPlan} overrides the parent to strip {@code + *

    Fix: {@link PhoenixSyncTableInputFormat} overrides the parent to strip {@code * CURRENT_SCN_VALUE} before creating the query plan for split generation. With SCN absent, {@code * verifySCN()} returns early (SCN == null), so no exception is thrown. * @@ -1892,7 +2493,7 @@ public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exc // QueryCompiler.verifySCN() reads PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY from // conn.getQueryServices().getConfiguration(), which is the client-side MR conf. long maxLookbackAgeSeconds = 5; - Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); + Configuration conf = sourceClusterConf(); conf.setLong(BaseScannerRegionObserverConstants.PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY, maxLookbackAgeSeconds); @@ -1920,6 +2521,11 @@ public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exc SyncCountersResult counters = getSyncCounters(job); validateSyncCounters(counters, 10, 10, 10, 0); validateMapperCounters(counters, 4, 0); + + // Run was non-dry-run with no drift; repair flow is a no-op and target should match source + // even though toTime is older than max lookback age. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } /** @@ -2003,6 +2609,85 @@ private static class CheckpointAggregateCounters { chunksMismatched); } + /** + * Result of {@link #setupPartialRerun}. Captures the first-run job/counters, the snapshots of + * the checkpoint table before and after deletion, and the aggregate counters re-derived from + * the chunks that survived the deletion. Tests use these to assert the + * {@code remaining + rerun == first-run} invariant after re-running the sync tool. + */ + private static class PartialRerunSetup { + final Job firstRunJob; + final SyncCountersResult firstRunCounters; + final List mappers; + final List chunks; + final List chunksToDelete; + final int deletedCount; + final List entriesAfterDelete; + final CheckpointAggregateCounters remainingCounters; + + PartialRerunSetup(Job firstRunJob, SyncCountersResult firstRunCounters, + List mappers, + List chunks, + List chunksToDelete, int deletedCount, + List entriesAfterDelete, + CheckpointAggregateCounters remainingCounters) { + this.firstRunJob = firstRunJob; + this.firstRunCounters = firstRunCounters; + this.mappers = mappers; + this.chunks = chunks; + this.chunksToDelete = chunksToDelete; + this.deletedCount = deletedCount; + this.entriesAfterDelete = entriesAfterDelete; + this.remainingCounters = remainingCounters; + } + } + + /** + * Runs the partial-rerun preamble shared by all checkpoint-resume tests: + *

      + *
    1. Run the sync tool once at {@code chunkSize} over the pinned [{@code fromTime}, + * {@code toTime}] window.
    2. + *
    3. Query the checkpoint table and assert non-empty mapper/chunk results.
    4. + *
    5. Select {@code deletionFraction} of each mapper's chunks for deletion (0.75 in all + * current tests) and delete them along with every mapper row.
    6. + *
    7. Re-query the checkpoint table and aggregate the surviving CHUNK rows so callers can + * assert the {@code remaining + rerun == first-run} row-count invariant.
    8. + *
    + * Each test then performs its own divergent action (extra splits, merges, smaller chunk size, + * dropping the target table) on the returned state. + */ + private PartialRerunSetup setupPartialRerun(String tableName, long fromTime, long toTime, + int chunkSize, double deletionFraction) throws Exception { + Job firstRunJob = runSyncToolWithChunkSize(tableName, chunkSize, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + SyncCountersResult firstRunCounters = getSyncCounters(firstRunJob); + + List checkpointEntries = + queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, null); + assertFalse("Should have checkpoint entries after first run", checkpointEntries.isEmpty()); + + SeparatedCheckpointEntries separated = separateMapperAndChunkEntries(checkpointEntries); + assertFalse("Should have mapper region entries", separated.mappers.isEmpty()); + assertFalse("Should have chunk entries", separated.chunks.isEmpty()); + + List chunksToDelete = selectChunksToDeleteFromMappers( + sourceConnection, tableName, targetZkQuorum, fromTime, toTime, null, separated.mappers, + deletionFraction); + + int deletedCount = deleteCheckpointEntries(sourceConnection, tableName, targetZkQuorum, null, + separated.mappers, chunksToDelete); + assertEquals("Should have deleted all mapper and selected chunk entries", + separated.mappers.size() + chunksToDelete.size(), deletedCount); + + List entriesAfterDelete = + queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, null); + CheckpointAggregateCounters remainingCounters = + calculateAggregateCountersFromCheckpoint(entriesAfterDelete); + + return new PartialRerunSetup(firstRunJob, firstRunCounters, separated.mappers, + separated.chunks, chunksToDelete, deletedCount, entriesAfterDelete, remainingCounters); + } + private List findChunksBelongingToMapper(Connection conn, String tableName, String targetCluster, long fromTime, long toTime, String tenantId, PhoenixSyncTableCheckpointOutputRow mapper) throws SQLException { @@ -2672,6 +3357,111 @@ private Job runSyncToolWithZkQuorum(String tableName, String zkQuorum, String... } } + /** + * Returns a fresh, mutable copy of the source cluster's HBase {@link Configuration}. Tests that + * need to override individual settings (paging, timeouts, etc.) should use this helper rather + * than constructing the Configuration inline so that any future change to the base config flows + * through one place. + */ + private static Configuration sourceClusterConf() { + return new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); + } + + /** + * Builds a {@link PhoenixSyncTableTool}, runs it with the supplied args, and asserts the run + * surfaces failure as a non-zero exit code rather than a thrown exception. Used by the + * failure-mode tests that previously hand-rolled this same try/run/assertTrue/catch-fail block. + */ + private void assertSyncToolFails(String[] args, String failureContext) { + PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); + tool.setConf(sourceClusterConf()); + try { + int exitCode = tool.run(args); + assertTrue(failureContext, exitCode != 0); + } catch (Exception ex) { + fail("Tool should return non-zero exit code on failure instead of throwing exception: " + + ex.getMessage()); + } + } + + /** + * Upserts a "MODIFIED_NAME_" row on target for each id in {@code mismatchIds}. Replaces the + * common pattern {@code for (int id : ids) upsertRowsOnTarget(..., new int[]{id}, new + * String[]{"MODIFIED_NAME_"+id})} which defeated the batch-upsert path of {@link + * #upsertRowsOnTarget}. + */ + private void introduceMismatchesByIds(String tableName, List mismatchIds) + throws SQLException { + int[] ids = new int[mismatchIds.size()]; + String[] names = new String[mismatchIds.size()]; + for (int i = 0; i < mismatchIds.size(); i++) { + ids[i] = mismatchIds.get(i); + names[i] = "MODIFIED_NAME_" + mismatchIds.get(i); + } + upsertRowsOnTarget(targetConnection, tableName, ids, names); + } + + /** + * Starts two daemon-style threads that perform region mutations (splits or merges) on the + * source and target clusters and returns a {@link Runnable} the caller invokes to join them + * with a 30-second timeout. Both worker {@link Runnable}s are wrapped in try/catch so that an + * unexpected exception is logged rather than killing the JVM thread silently. + * + *

    Usage: + *

    +   *   Runnable joiner = startConcurrentRegionWork(sourceWork, targetWork, "splits");
    +   *   ... run main sync work ...
    +   *   joiner.run();
    +   * 
    + * + *

    Caller is responsible for invoking the returned joiner; tests should always join before + * asserting on cluster state, otherwise late-arriving region mutations can race the assertions. + */ + private Runnable startConcurrentRegionWork(Runnable sourceWork, Runnable targetWork, + String label) { + Thread sourceThread = new Thread(() -> { + try { + sourceWork.run(); + } catch (Exception e) { + LOGGER.error("Error during source {}", label, e); + } + }); + Thread targetThread = new Thread(() -> { + try { + targetWork.run(); + } catch (Exception e) { + LOGGER.error("Error during target {}", label, e); + } + }); + sourceThread.start(); + targetThread.start(); + return () -> { + try { + sourceThread.join(30000); + targetThread.join(30000); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted while joining " + label + " threads", ie); + } + }; + } + + /** + * Runs a dry-run sync with the given chunk size to establish a baseline chunk count + * (CHUNKS_VERIFIED + CHUNKS_MISMATCHED), then deletes the baseline checkpoint rows so a + * subsequent run starts fresh. Used by the paging-timeout tests, which assert that aggressive + * paging produces strictly more chunks than the no-paging baseline. + */ + private long captureBaselineChunkCount(String tableName, int chunkSize) throws Exception { + Job baselineJob = runSyncToolWithChunkSize(tableName, chunkSize, "--dry-run", "--from-time", + "0", "--to-time", String.valueOf(System.currentTimeMillis())); + long chunkCount = + baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue() + + baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue(); + cleanupCheckpointTable(sourceConnection, tableName, targetZkQuorum, null); + return chunkCount; + } + /** * Runs the PhoenixSyncTableTool with 1KB chunk size for testing multiple rows per chunk. Returns * the completed Job for counter verification. @@ -2687,8 +3477,164 @@ private Job runSyncToolWithLargeChunks(String tableName, String... additionalArg */ private Job runSyncToolWithChunkSize(String tableName, int chunkSize, String... additionalArgs) throws Exception { - Configuration conf = new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); - return runSyncToolWithChunkSize(tableName, chunkSize, conf, additionalArgs); + return runSyncToolWithChunkSize(tableName, chunkSize, sourceClusterConf(), additionalArgs); + } + + /** + * Holds both the dry-run and repair jobs from a {@link #runSyncToolWithRepair} invocation, + * along with the pinned time window so callers can re-query the checkpoint table or run + * additional assertions against the same range. + */ + private static class RepairRunResult { + final Job dryRunJob; + final Job repairJob; + final long fromTime; + final long toTime; + + RepairRunResult(Job dryRunJob, Job repairJob, long fromTime, long toTime) { + this.dryRunJob = dryRunJob; + this.repairJob = repairJob; + this.fromTime = fromTime; + this.toTime = toTime; + } + } + + /** + * Runs the sync tool twice with the SAME pinned time window: first as a --dry-run to detect + * mismatches, then as a repair pass (no --dry-run) so the repair run rewrites the MISMATCHED + * checkpoint rows in place. The shared window is mandatory because the checkpoint PK is + * (TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY) — without + * pinning, each invocation would fall through to System.currentTimeMillis() and the repair + * pass would create fresh rows instead of overwriting the dry-run pass's output. + * + *

    If the caller does not provide --from-time / --to-time, defaults of 0 / now are pinned. + * + *

    Default chunk size is 1 byte (one row per chunk) to mirror {@link #runSyncTool}. + */ + private RepairRunResult runSyncToolWithRepair(String tableName, String... additionalArgs) + throws Exception { + return runSyncToolWithRepairAndChunkSize(tableName, 1, additionalArgs); + } + + /** + * Same as {@link #runSyncToolWithRepair} but uses 1KB chunks (multiple rows per chunk). + */ + private RepairRunResult runSyncToolWithRepairLargeChunks(String tableName, + String... additionalArgs) throws Exception { + return runSyncToolWithRepairAndChunkSize(tableName, 1024, additionalArgs); + } + + /** + * Same as {@link #runSyncToolWithRepair} but with an explicit chunk size. + */ + private RepairRunResult runSyncToolWithRepairAndChunkSize(String tableName, int chunkSize, + String... additionalArgs) throws Exception { + long fromTime = parseLongFlag(additionalArgs, "--from-time", 0L); + long toTime = parseLongFlag(additionalArgs, "--to-time", System.currentTimeMillis()); + String[] pinnedArgs = ensureTimeArgs(additionalArgs, fromTime, toTime); + + // First run: --dry-run, only detect mismatches. + String[] dryRunArgs = appendArg(pinnedArgs, "--dry-run"); + Job dryRunJob = runSyncToolWithChunkSize(tableName, chunkSize, dryRunArgs); + + // Second run: no --dry-run. Same time window so the checkpoint PK matches and any + // CHUNK/MISMATCHED rows from the dry-run pass are overwritten by CHUNK/REPAIRED. + Job repairJob = runSyncToolWithChunkSize(tableName, chunkSize, pinnedArgs); + + return new RepairRunResult(dryRunJob, repairJob, fromTime, toTime); + } + + /** + * Parses a long-valued command-line flag (e.g., --from-time 12345) from the args array. + * Returns the default value if the flag is absent. + */ + private static long parseLongFlag(String[] args, String flag, long defaultValue) { + for (int i = 0; i < args.length - 1; i++) { + if (flag.equals(args[i])) { + return Long.parseLong(args[i + 1]); + } + } + return defaultValue; + } + + /** + * Returns args with --from-time/--to-time appended only if they are not already present. + */ + private static String[] ensureTimeArgs(String[] args, long fromTime, long toTime) { + boolean hasFrom = false; + boolean hasTo = false; + for (String a : args) { + if ("--from-time".equals(a)) { + hasFrom = true; + } else if ("--to-time".equals(a)) { + hasTo = true; + } + } + List result = new ArrayList<>(Arrays.asList(args)); + if (!hasFrom) { + result.add("--from-time"); + result.add(String.valueOf(fromTime)); + } + if (!hasTo) { + result.add("--to-time"); + result.add(String.valueOf(toTime)); + } + return result.toArray(new String[0]); + } + + private static String[] appendArg(String[] args, String newArg) { + String[] result = new String[args.length + 1]; + System.arraycopy(args, 0, result, 0, args.length); + result[args.length] = newArg; + return result; + } + + /** + * After a repair pass, asserts that no CHUNK or REGION rows in the checkpoint table are + * still in MISMATCHED status. They should all have transitioned to REPAIRED, VERIFIED, or + * (when target rows are entirely tombstoned) UNREPAIRABLE. + */ + private void assertNoMismatchedCheckpoints(String tableName, String tenantId) + throws SQLException { + List entries = + queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId); + long mismatched = countCheckpointsByStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED); + assertEquals("After repair, no MISMATCHED checkpoint rows should remain for table " + + tableName + " tenant=" + tenantId, 0, mismatched); + } + + /** + * Counts checkpoint entries (both REGION and CHUNK rows) in the given status. Replaces the + * ad-hoc {@code for (entry : entries) if (status.equals(entry.getStatus())) count++} loops + * that recurred across several tests. + */ + private static long countCheckpointsByStatus(List entries, + PhoenixSyncTableCheckpointOutputRow.Status status) { + long count = 0; + for (PhoenixSyncTableCheckpointOutputRow entry : entries) { + if (status.equals(entry.getStatus())) { + count++; + } + } + return count; + } + + /** + * Counts checkpoint entries that match BOTH the given type (REGION or CHUNK) and status. Used + * when a test needs to discriminate, e.g., REPAIRED CHUNK rows from REPAIRED REGION rows. + */ + private static long countCheckpointsByTypeAndStatus( + List entries, + PhoenixSyncTableCheckpointOutputRow.Type type, + PhoenixSyncTableCheckpointOutputRow.Status status) { + long count = 0; + for (PhoenixSyncTableCheckpointOutputRow entry : entries) { + if (type.equals(entry.getType()) && status.equals(entry.getStatus())) { + count++; + } + } + return count; } /** @@ -2748,6 +3694,12 @@ private static class SyncCountersResult { public final long chunksVerified; public final long mappersVerified; public final long mappersMismatched; + public final long mappersRepaired; + public final long mappersUnrepairable; + public final long mappersRepairFailed; + public final long rowsMissingOnTarget; + public final long rowsExtraOnTarget; + public final long rowsDifferentOnTarget; public final long taskCreated; SyncCountersResult(Counters counters) { @@ -2759,15 +3711,29 @@ private static class SyncCountersResult { this.chunksVerified = counters.findCounter(SyncCounters.CHUNKS_VERIFIED).getValue(); this.mappersVerified = counters.findCounter(SyncCounters.MAPPERS_VERIFIED).getValue(); this.mappersMismatched = counters.findCounter(SyncCounters.MAPPERS_MISMATCHED).getValue(); + this.mappersRepaired = counters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue(); + this.mappersUnrepairable = + counters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue(); + this.mappersRepairFailed = + counters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue(); + this.rowsMissingOnTarget = + counters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue(); + this.rowsExtraOnTarget = + counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue(); + this.rowsDifferentOnTarget = + counters.findCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue(); this.taskCreated = counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue(); } public void logCounters(String testName) { LOGGER.info( "{}: source rows={}, target rows={}, chunks mismatched={}, chunks verified={}, " - + "mappers verified={}, mappers mismatched={}", + + "mappers verified={}, mappers mismatched={}, mappers repaired={}, " + + "mappers unrepairable={}, mappers repair_failed={}, rows missing={}, " + + "rows extra={}, rows different={}", testName, sourceRowsProcessed, targetRowsProcessed, chunksMismatched, chunksVerified, - mappersVerified, mappersMismatched); + mappersVerified, mappersMismatched, mappersRepaired, mappersUnrepairable, + mappersRepairFailed, rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget); } } @@ -2801,6 +3767,210 @@ private void validateMapperCounters(SyncCountersResult counters, long expectedMa counters.mappersMismatched); } + /** + * Validates mapper counters when the chunks roll up into different repair outcomes (run was + * non-dry-run so mismatches were repaired rather than left as MISMATCHED). + */ + private void validateMapperCountersRepair(SyncCountersResult counters, + long expectedMappersVerified, long expectedMappersRepaired, long expectedMappersUnrepairable, + long expectedMappersRepairFailed) { + assertEquals("Should have expected verified mappers", expectedMappersVerified, + counters.mappersVerified); + assertEquals("Should have expected repaired mappers", expectedMappersRepaired, + counters.mappersRepaired); + assertEquals("Should have expected unrepairable mappers", expectedMappersUnrepairable, + counters.mappersUnrepairable); + assertEquals("Should have expected repair_failed mappers", expectedMappersRepairFailed, + counters.mappersRepairFailed); + } + + /** + * Pins the cell-level repair drift counters to exact expected values. Use in repair tests + * where the drift is constructed deterministically and any miscount (off-by-one, + * double-counting, missed branch) should fail the test loudly. + */ + private void assertRepairCellCounters(Counters counters, long expectedCellsMissing, + long expectedCellsExtra, long expectedCellsDifferent, long expectedRowsCannotRepair) { + assertEquals("CELLS_MISSING_ON_TARGET", expectedCellsMissing, + counters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue()); + assertEquals("CELLS_EXTRA_ON_TARGET", expectedCellsExtra, + counters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue()); + assertEquals("CELLS_DIFFERENT_ON_TARGET", expectedCellsDifferent, + counters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue()); + assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair, + counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue()); + } + + /** + * Pins the chunk- and mapper-level repair-status counters. Complements + * {@link #validateMapperCountersRepair} (which omits chunk-level counters) for tests that + * need to assert both layers. + */ + private void assertRepairChunkAndMapperCounters(Counters counters, long expectedChunksRepaired, + long expectedChunksRepairFailed, long expectedMappersRepaired, long expectedMappersUnrepairable, + long expectedMappersRepairFailed) { + assertEquals("CHUNKS_REPAIRED", expectedChunksRepaired, + counters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue()); + assertEquals("CHUNKS_REPAIR_FAILED", expectedChunksRepairFailed, + counters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue()); + assertEquals("MAPPERS_REPAIRED", expectedMappersRepaired, + counters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue()); + assertEquals("MAPPERS_UNREPAIRABLE", expectedMappersUnrepairable, + counters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue()); + assertEquals("MAPPERS_REPAIR_FAILED", expectedMappersRepairFailed, + counters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue()); + } + + /** + * Pins the row-level repair drift counters. Mirror of {@link #assertRepairCellCounters} for + * tests that need to assert whole-row outcomes (missing, extra, unrepairable). + */ + private void assertRepairRowCounters(Counters counters, long expectedRowsMissing, + long expectedRowsExtra, long expectedRowsCannotRepair) { + assertEquals("ROWS_MISSING_ON_TARGET", expectedRowsMissing, + counters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue()); + assertEquals("ROWS_EXTRA_ON_TARGET", expectedRowsExtra, + counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue()); + assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair, + counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue()); + } + + /** + * Builds DDL for a "repair test" table that uses {@code COLUMN_ENCODED_BYTES=NONE} so column + * qualifiers on disk match the SQL column name verbatim. This lets cell-level test helpers + * inject raw HBase Puts/Deletes against {@code (cf=0, q=NAME)} or {@code (cf=0, q=NAME_VALUE)} + * without computing encoded qualifier bytes. + * + *

    Set {@code maxVersions > 1} when the test exercises hidden-version unwinding. + */ + private String buildRepairTestTableDdl(String tableName, boolean withReplication, int maxVersions, + String splitPoints) { + StringBuilder sb = new StringBuilder(); + sb.append("CREATE TABLE IF NOT EXISTS ").append(tableName).append(" (\n") + .append(" ID INTEGER NOT NULL PRIMARY KEY,\n").append(" NAME VARCHAR(50),\n") + .append(" NAME_VALUE BIGINT,\n").append(" UPDATED_DATE TIMESTAMP\n") + .append(") COLUMN_ENCODED_BYTES=NONE, UPDATE_CACHE_FREQUENCY=0"); + if (withReplication) { + sb.append(", REPLICATION_SCOPE=1"); + } else { + sb.append(", REPLICATION_SCOPE=0"); + } + if (maxVersions > 1) { + sb.append(", VERSIONS=").append(maxVersions); + } + if (splitPoints != null && !splitPoints.isEmpty()) { + sb.append(" SPLIT ON (").append(splitPoints).append(")"); + } + return sb.toString(); + } + + /** + * Creates the same {@link #buildRepairTestTableDdl} schema on both source and target clusters. + * Used by repair tests that bypass replication and seed the two clusters separately. + * + *

    Returns a wall-clock anchor in milliseconds. SCN-bound connections must use timestamps + * ≥ the anchor, otherwise an SCN below the table's CREATE-TABLE timestamp surfaces as + * {@code TableNotFoundException}. + */ + private long createRepairTestTableOnBothClusters(String tableName, int maxVersions, + String splitPoints) throws SQLException { + executeTableCreation(sourceConnection, + buildRepairTestTableDdl(tableName, false, maxVersions, splitPoints)); + executeTableCreation(targetConnection, + buildRepairTestTableDdl(tableName, false, maxVersions, splitPoints)); + // Wait until the wall clock advances at least one millisecond past the CREATE TABLE + // timestamp so any caller-chosen SCN >= the returned anchor is guaranteed to be above the + // table's metadata row. + long anchor = System.currentTimeMillis() + 1; + while (System.currentTimeMillis() < anchor) { + // spin + } + return anchor; + } + + /** + * Returns a Phoenix connection bound to {@code scnTimestamp} via + * {@link PhoenixRuntime#CURRENT_SCN_ATTRIB}. UPSERTs through this connection write cells with + * {@code timestamp == scnTimestamp}, giving tests precise control over cell timestamps without + * needing to construct raw HBase Puts. + */ + private Connection openConnectionAtScn(String zkUrl, long scnTimestamp) throws SQLException { + Properties props = new Properties(); + props.setProperty(PhoenixRuntime.CURRENT_SCN_ATTRIB, Long.toString(scnTimestamp)); + return DriverManager.getConnection("jdbc:phoenix:" + zkUrl, props); + } + + /** + * Resolves the HBase {@link Table} backing a Phoenix table for a given Phoenix + * {@link Connection}. Used by raw-cell helpers that need to bypass Phoenix and write cells at + * exact (cf, q, ts) coordinates. + */ + private Table getHBaseTable(Connection phoenixConn, String phoenixTableName) throws Exception { + PhoenixConnection pConn = phoenixConn.unwrap(PhoenixConnection.class); + byte[] physicalName = pConn.getTable(phoenixTableName).getPhysicalName().getBytes(); + return pConn.getQueryServices().getTable(physicalName); + } + + /** + * Writes a single raw {@link Put} cell at the given {@code (rowKey, family, qualifier, ts, + * value)} coordinates, bypassing Phoenix's UPSERT path. Tests use this to plant historical + * versions or specific timestamps that Phoenix wouldn't naturally produce. + */ + private void writeRawCell(Connection phoenixConn, String tableName, byte[] rowKey, String family, + String qualifier, long ts, byte[] value) throws Exception { + try (Table hTable = getHBaseTable(phoenixConn, tableName)) { + Put put = new Put(rowKey); + put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), ts, value); + hTable.put(put); + } + } + + /** + * Plants a raw point-{@link Delete} (single-version tombstone) at {@code (rowKey, family, + * qualifier, ts)}. Equivalent to HBase's {@code Delete.addColumn(family, qualifier, ts)}. + */ + private void writeRawPointDelete(Connection phoenixConn, String tableName, byte[] rowKey, + String family, String qualifier, long ts) throws Exception { + try (Table hTable = getHBaseTable(phoenixConn, tableName)) { + Delete del = new Delete(rowKey); + del.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), ts); + hTable.delete(del); + } + } + + /** + * Plants a raw {@link Delete#addColumns} (DeleteColumn — covers all versions at {@code ts <= + * markerTs}) at {@code (rowKey, family, qualifier)}. Used by shadow-detection tests where a + * future source Put at {@code ts <= markerTs} must be suppressed. + */ + private void writeRawDeleteColumn(Connection phoenixConn, String tableName, byte[] rowKey, + String family, String qualifier, long markerTs) throws Exception { + try (Table hTable = getHBaseTable(phoenixConn, tableName)) { + Delete del = new Delete(rowKey); + del.addColumns(Bytes.toBytes(family), Bytes.toBytes(qualifier), markerTs); + hTable.delete(del); + } + } + + /** + * Returns the row-key bytes Phoenix uses for an INTEGER primary key value, matching the + * encoding used by {@code splitTableAt}. + */ + private static byte[] integerRowKey(int id) { + return PInteger.INSTANCE.toBytes(id); + } + + /** + * Returns a fresh {@link Configuration} clone of the source cluster with a custom + * {@link PhoenixSyncTableTool#PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE} setting baked in. Used by + * the mid-row-flush boundary test. + */ + private static Configuration sourceClusterConfWithRepairBatchSize(int repairBatchSize) { + Configuration conf = sourceClusterConf(); + conf.setInt(PhoenixSyncTableTool.PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE, repairBatchSize); + return conf; + } + /** * Validates sync counters with exact source/target rows and minimum chunk thresholds. Use this * when chunk counts may vary but should be at least certain values. diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java index 673b3f0cbc0..b9f7e893a58 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java @@ -20,7 +20,6 @@ import static org.apache.phoenix.hbase.index.IndexRegionObserver.PHOENIX_INDEX_CDC_CONSUMER_ENABLED; import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; -import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTION_CREATION_TIME_MS; import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; import static org.apache.phoenix.query.QueryServices.CLIENT_CONNECTION_MAX_ALLOWED_CONNECTIONS; import static org.apache.phoenix.query.QueryServices.CONNECTION_QUERY_SERVICE_METRICS_ENABLED; @@ -228,27 +227,6 @@ public void testMultipleCQSIMetricsInParallel() throws Exception { assertEquals("Number of passing CSQI Metrics check should be : ", 4, counter.get()); } - @Test - public void testConnectionTime() { - Map> metrics = - ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); - List serviceMetrics = metrics.get("DEFAULT_CQSN"); - assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics); - - // Find connection creation time metric - boolean foundMetric = false; - for (ConnectionQueryServicesMetric metric : serviceMetrics) { - System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue()); - if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) { - assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0); - foundMetric = true; - break; - } - } - assertTrue("Connection creation time metric not found", foundMetric); - - } - private void checkConnectionQueryServiceMetricsValues(String queryServiceName) throws Exception { String CREATE_TABLE_DDL = "CREATE TABLE IF NOT EXISTS %s (K VARCHAR(10) NOT NULL" + " PRIMARY KEY, V VARCHAR)"; diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java index 2c2b05c2c6b..91ad3941746 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java @@ -752,6 +752,233 @@ public void testParseCounterValueNull() { assertEquals(0L, row.getTargetRowsProcessed()); } + @Test + public void testGetProcessedMapperRegionsRepairModeFiltersByStatus() throws Exception { + String tableName = generateUniqueName(); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + // One region per Status, distinguished by start key so result ordering is deterministic. + Status[] statuses = new Status[] { Status.VERIFIED, Status.MISMATCHED, Status.REPAIRED, + Status.UNREPAIRABLE, Status.REPAIR_FAILED }; + for (int i = 0; i < statuses.length; i++) { + byte[] startKey = Bytes.toBytes(String.format("region%02d_start", i)); + byte[] endKey = Bytes.toBytes(String.format("region%02d_end", i)); + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION) + .setFromTime(0L).setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey) + .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp) + .setExecutionEndTime(timestamp).build()); + } + + // Repair mode should skip only fully-done regions (VERIFIED + REPAIRED) so the mapper + // re-processes MISMATCHED/UNREPAIRABLE/REPAIR_FAILED on the next run. + List repairResults = + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, false); + assertEquals("Repair mode should return only VERIFIED + REPAIRED regions", 2, + repairResults.size()); + assertArrayEquals("First should be region00 (VERIFIED)", + Bytes.toBytes("region00_start"), repairResults.get(0).getStartRowKey()); + assertArrayEquals("Second should be region02 (REPAIRED)", + Bytes.toBytes("region02_start"), repairResults.get(1).getStartRowKey()); + } + + @Test + public void testGetProcessedMapperRegionsDryRunReturnsAllStatuses() throws Exception { + String tableName = generateUniqueName(); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + Status[] statuses = new Status[] { Status.VERIFIED, Status.MISMATCHED, Status.REPAIRED, + Status.UNREPAIRABLE, Status.REPAIR_FAILED }; + for (int i = 0; i < statuses.length; i++) { + byte[] startKey = Bytes.toBytes(String.format("region%02d_start", i)); + byte[] endKey = Bytes.toBytes(String.format("region%02d_end", i)); + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION) + .setFromTime(0L).setToTime(1000L).setIsDryRun(true).setStartRowKey(startKey) + .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp) + .setExecutionEndTime(timestamp).build()); + } + + // Dry-run mode does not filter by status; resume should skip every region the previous + // dry-run pass touched, regardless of its outcome. + List dryRunResults = + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true); + assertEquals("Dry-run mode should return all statuses", statuses.length, dryRunResults.size()); + } + + @Test + public void testGetProcessedChunksRepairModeFiltersByStatus() throws Exception { + String tableName = generateUniqueName(); + // Mapper region bracket — chunkStart must be <= mapperEnd AND chunkEnd must be >= + // mapperStart for a chunk to overlap. Use 'a' < chunk* < 'z' so the boundary check is + // satisfied for every chunk and only the STATUS filter decides what comes back. + byte[] mapperStart = Bytes.toBytes("aaaa"); + byte[] mapperEnd = Bytes.toBytes("zzzz"); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + Status[] statuses = new Status[] { Status.VERIFIED, Status.MISMATCHED, Status.REPAIRED, + Status.UNREPAIRABLE, Status.REPAIR_FAILED }; + for (int i = 0; i < statuses.length; i++) { + byte[] startKey = Bytes.toBytes(String.format("chunk%02d_start", i)); + byte[] endKey = Bytes.toBytes(String.format("chunk%02d_end", i)); + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK) + .setFromTime(0L).setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey) + .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp) + .setExecutionEndTime(timestamp).build()); + } + + List repairResults = repository.getProcessedChunks( + tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, false); + assertEquals("Repair mode should return only VERIFIED + REPAIRED chunks", 2, + repairResults.size()); + assertArrayEquals("First should be chunk00 (VERIFIED)", + Bytes.toBytes("chunk00_start"), repairResults.get(0).getStartRowKey()); + assertArrayEquals("Second should be chunk02 (REPAIRED)", + Bytes.toBytes("chunk02_start"), repairResults.get(1).getStartRowKey()); + } + + @Test + public void testGetProcessedChunksDryRunReturnsAllStatuses() throws Exception { + String tableName = generateUniqueName(); + byte[] mapperStart = Bytes.toBytes("aaaa"); + byte[] mapperEnd = Bytes.toBytes("zzzz"); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + Status[] statuses = new Status[] { Status.VERIFIED, Status.MISMATCHED, Status.REPAIRED, + Status.UNREPAIRABLE, Status.REPAIR_FAILED }; + for (int i = 0; i < statuses.length; i++) { + byte[] startKey = Bytes.toBytes(String.format("chunk%02d_start", i)); + byte[] endKey = Bytes.toBytes(String.format("chunk%02d_end", i)); + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK) + .setFromTime(0L).setToTime(1000L).setIsDryRun(true).setStartRowKey(startKey) + .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp) + .setExecutionEndTime(timestamp).build()); + } + + List dryRunResults = repository.getProcessedChunks( + tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); + assertEquals("Dry-run mode should return all statuses", statuses.length, dryRunResults.size()); + } + + @Test + public void testCounterFormatterFormatChunkRoundTrip() throws Exception { + String tableName = generateUniqueName(); + byte[] startKey = Bytes.toBytes("row1"); + byte[] endKey = Bytes.toBytes("row100"); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + // Args: sourceRows, targetRows, rowsMissing, rowsExtra, rowsDifferent, rowsCannotRepair, + // cellsMissing, cellsExtra, cellsDifferent. + String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatChunk(100L, 95L, + 3L, 2L, 6L, 1L, 7L, 5L, 4L); + + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L) + .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(endKey) + .setStatus(Status.REPAIRED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp) + .setCounters(counters).build()); + + String query = "SELECT COUNTERS FROM " + + PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME + " WHERE TABLE_NAME = ?"; + String stored; + try (java.sql.PreparedStatement ps = connection.prepareStatement(query)) { + ps.setString(1, tableName); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + stored = rs.getString("COUNTERS"); + } + } + + // Stored COUNTERS string should pin every counter to the exact value from formatChunk. + assertEquals(counters, stored); + assertTrue("COUNTERS should contain SOURCE_ROWS_PROCESSED=100", + stored.contains("SOURCE_ROWS_PROCESSED=100")); + assertTrue("COUNTERS should contain TARGET_ROWS_PROCESSED=95", + stored.contains("TARGET_ROWS_PROCESSED=95")); + assertTrue("COUNTERS should contain ROWS_MISSING_ON_TARGET=3", + stored.contains("ROWS_MISSING_ON_TARGET=3")); + assertTrue("COUNTERS should contain ROWS_EXTRA_ON_TARGET=2", + stored.contains("ROWS_EXTRA_ON_TARGET=2")); + assertTrue("COUNTERS should contain ROWS_DIFFERENT_ON_TARGET=6", + stored.contains("ROWS_DIFFERENT_ON_TARGET=6")); + assertTrue("COUNTERS should contain ROWS_CANNOT_REPAIR=1", + stored.contains("ROWS_CANNOT_REPAIR=1")); + assertTrue("COUNTERS should contain CELLS_MISSING_ON_TARGET=7", + stored.contains("CELLS_MISSING_ON_TARGET=7")); + assertTrue("COUNTERS should contain CELLS_EXTRA_ON_TARGET=5", + stored.contains("CELLS_EXTRA_ON_TARGET=5")); + assertTrue("COUNTERS should contain CELLS_DIFFERENT_ON_TARGET=4", + stored.contains("CELLS_DIFFERENT_ON_TARGET=4")); + + // Public parse helpers should round-trip the source/target row counts. + PhoenixSyncTableCheckpointOutputRow parsed = new PhoenixSyncTableCheckpointOutputRow.Builder() + .setStartRowKey(startKey).setCounters(counters).build(); + assertEquals(100L, parsed.getSourceRowsProcessed()); + assertEquals(95L, parsed.getTargetRowsProcessed()); + } + + @Test + public void testCounterFormatterFormatMapperRoundTrip() throws Exception { + String tableName = generateUniqueName(); + byte[] startKey = Bytes.toBytes("region_start"); + byte[] endKey = Bytes.toBytes("region_end"); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + // Args: chunksVerified, chunksMismatched, sourceRows, targetRows, rowsMissing, rowsExtra, + // rowsDifferent, rowsCannotRepair, cellsMissing, cellsExtra, cellsDifferent. + String counters = PhoenixSyncTableCheckpointOutputRow.CounterFormatter.formatMapper(8L, 2L, + 500L, 480L, 12L, 9L, 4L, 3L, 25L, 18L, 7L); + + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L) + .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(endKey) + .setStatus(Status.REPAIRED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp) + .setCounters(counters).build()); + + String query = "SELECT COUNTERS FROM " + + PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME + " WHERE TABLE_NAME = ?"; + String stored; + try (java.sql.PreparedStatement ps = connection.prepareStatement(query)) { + ps.setString(1, tableName); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + stored = rs.getString("COUNTERS"); + } + } + + // Stored COUNTERS string should pin every counter to the exact value from formatMapper. + assertEquals(counters, stored); + assertTrue("COUNTERS should contain CHUNKS_VERIFIED=8", stored.contains("CHUNKS_VERIFIED=8")); + assertTrue("COUNTERS should contain CHUNKS_MISMATCHED=2", + stored.contains("CHUNKS_MISMATCHED=2")); + assertTrue("COUNTERS should contain SOURCE_ROWS_PROCESSED=500", + stored.contains("SOURCE_ROWS_PROCESSED=500")); + assertTrue("COUNTERS should contain TARGET_ROWS_PROCESSED=480", + stored.contains("TARGET_ROWS_PROCESSED=480")); + assertTrue("COUNTERS should contain ROWS_MISSING_ON_TARGET=12", + stored.contains("ROWS_MISSING_ON_TARGET=12")); + assertTrue("COUNTERS should contain ROWS_EXTRA_ON_TARGET=9", + stored.contains("ROWS_EXTRA_ON_TARGET=9")); + assertTrue("COUNTERS should contain ROWS_DIFFERENT_ON_TARGET=4", + stored.contains("ROWS_DIFFERENT_ON_TARGET=4")); + assertTrue("COUNTERS should contain ROWS_CANNOT_REPAIR=3", + stored.contains("ROWS_CANNOT_REPAIR=3")); + assertTrue("COUNTERS should contain CELLS_MISSING_ON_TARGET=25", + stored.contains("CELLS_MISSING_ON_TARGET=25")); + assertTrue("COUNTERS should contain CELLS_EXTRA_ON_TARGET=18", + stored.contains("CELLS_EXTRA_ON_TARGET=18")); + assertTrue("COUNTERS should contain CELLS_DIFFERENT_ON_TARGET=7", + stored.contains("CELLS_DIFFERENT_ON_TARGET=7")); + + // Public parse helpers should round-trip the source/target row counts. + PhoenixSyncTableCheckpointOutputRow parsed = new PhoenixSyncTableCheckpointOutputRow.Builder() + .setStartRowKey(startKey).setCounters(counters).build(); + assertEquals(500L, parsed.getSourceRowsProcessed()); + assertEquals(480L, parsed.getTargetRowsProcessed()); + } + @Test public void testCheckpointMapperRegionWithTenantId() throws Exception { String tableName = generateUniqueName(); diff --git a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java index 039b7be051f..72c38816d84 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java @@ -101,27 +101,6 @@ public void testHistogramMetricsForOpenPhoenixConnectionCounter() { } } - @Test - public void testConnectionTime() { - Map> metrics = - ConnectionQueryServicesMetricsManager.getAllConnectionQueryServicesMetrics(); - List serviceMetrics = metrics.get("DEFAULT_CQSN"); - assertNotNull("No metrics found for service: DEFAULT_CQSN", serviceMetrics); - - // Find connection creation time metric - boolean foundMetric = false; - for (ConnectionQueryServicesMetric metric : serviceMetrics) { - System.out.println("Found metric: " + metric.getMetricType() + " = " + metric.getValue()); - if (metric.getMetricType() == PHOENIX_CONNECTION_CREATION_TIME_MS) { - assertTrue("Connection creation time should be >= 0", metric.getValue() >= 0); - foundMetric = true; - break; - } - } - assertTrue("Connection creation time metric not found", foundMetric); - - } - private void updateMetricsAndHistogram(long counter, String connectionQueryServiceName) { ConnectionQueryServicesMetricsManager.updateMetrics(connectionQueryServiceName, OPEN_PHOENIX_CONNECTIONS_COUNTER, counter); From 9955587e0d665890155e2171f489aa103976e2bc Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Sat, 6 Jun 2026 00:48:41 +0530 Subject: [PATCH 11/18] checkpointing code comment refactoring --- .../ConnectionQueryServicesMetrics.java | 5 +- .../PhoenixSyncTableChunkRepairer.java | 261 +++++++------- .../end2end/PhoenixSyncTableToolIT.java | 321 +++++++++++++++++- 3 files changed, 457 insertions(+), 130 deletions(-) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java index c87e9ad7984..49110d9d7ed 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java @@ -17,6 +17,10 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; +import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; + import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -26,7 +30,6 @@ import org.apache.phoenix.monitoring.ConnectionQueryServicesMetricImpl; import org.apache.phoenix.monitoring.MetricType; -import static org.apache.phoenix.monitoring.MetricType.*; /** * Class for Connection Query Service Metrics. diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java index 4afdbcf9130..3bda5364696 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java @@ -375,16 +375,12 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta CellComparator comparator = CellComparator.getInstance(); RowRepairBuffer rowRepairBuffer = new RowRepairBuffer(sourceResult.getRow()); - // Per-column max source PUT timestamp; consumed by tombstoneTargetCell (target-extra - // cell) to pick the delete shape. Three cases: - // key absent → DeleteColumn at target's ts (also wipes older hidden versions) - // sourceMaxTs >= targetTs → point Delete at target's ts - // sourceMaxTs < targetTs → point Delete at target's ts, PLUS a point Delete for every - // hidden target Put whose ts ∈ (sourceMaxTs, targetTs); without - // these, the next hidden Put surfaces above source's mirror - // once target's visible cell is shadowed. - // Math::max collapses multi-version source, so case 3 only fires when target sits above - // ALL source versions at this column. See tombstoneTargetCell() for the application. + // Per-column max source PUT timestamp; consumed by tombstoneTargetCell to pick the + // delete shape for a target-extra cell — see its javadoc for the three cases. + // Math::max collapses source's multi-version cells into a single Long per column so + // the comparison against target's ts is a scalar check. + // + // Example: source has Put(NAME)@300 and Put(NAME)@200 → sourceMaxTsByColumn[NAME]=300. Map sourceMaxTsByColumn = new HashMap<>(); for (Cell sourceCell : sourceCells) { if (!CellUtil.isDelete(sourceCell)) { @@ -487,18 +483,41 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable, * Tombstones a target-only cell to make target's read view at this column match source's. * Skips cells that are themselves already tombstones. * - *

    Tombstone subtype depends on what source has at this {@code (cf, q)}: - *

      - *
    • Source has no cell at this column — use {@link Delete#addColumns} (DeleteColumn, - * scope {@code ts <= T}) so even max-versions-hidden older target versions are - * shadowed.
    • - *
    • Source's max ts at this column is >= target's ts — point-Delete only - * target's exact ts.
    • - *
    • Source's max ts at this column is < target's ts — point-Delete target's - * ts, AND point-Delete every max-versions-hidden Put on target with ts in - * {@code (sourceMaxTs, targetTs)}. Otherwise after target's visible cell is - * shadowed, the next hidden version surfaces above source's mirror.
    • - *
    + *

    Called only when source has no cell at this target cell's exact + * {@code (cf, q, ts)}. If source does have a cell at the same {@code (cf, q, ts)}, + * the caller takes the mirroring path instead + * + *

    Tombstone subtype depends on what source has at this {@code (cf, q)}. Examples + * assume {@code MAX_VERSIONS=3} and show only the relevant column. + * + *

    Case 1 — Source has no cell at this column: + *

    +   *   source row: (no NAME)
    +   *   target row: Put(NAME, "carol")@900 visible
    +   *               Put(NAME, "bob")  @600 hidden
    +   *   action    : DeleteColumn(NAME)@900   (covers ts <= 900, wipes "bob" too)
    +   *   result    : target reads no NAME — matches source.
    +   * 
    + * + *

    Case 2 — {@code sourceMaxTs >= targetTs}: + *

    +   *   source row: Put(NAME, "alice")@500       (sourceMaxTs = 500)
    +   *   target row: Put(NAME, "old",  )@200      (input cell; source has nothing at @200)
    +   *   action    : point-Delete(NAME)@200
    +   *   result    : "old"@200 is shadowed;
    +   *              Put(NAME, "alice")@500 would already have been mirrored
    +   * 
    + * + *

    Case 3 — {@code sourceMaxTs < targetTs}: + *

    +   *   source row: Put(NAME, "alice")@300       (sourceMaxTs = 300)
    +   *   target row: Put(NAME, "carol")@900 visible
    +   *               Put(NAME, "bob")  @600 hidden
    +   *               Put(NAME, "alice")@300 hidden
    +   *   action    : point-Delete(NAME)@900 + point-Delete(NAME)@600
    +   *               (without the second, "bob"@600 surfaces above source's mirror)
    +   *   result    : target's "alice"@300 is the highest live version — matches source.
    +   * 
    * * @return true if the cell was a live cell that contributed a tombstone marker, false if * the cell was already a tombstone and was skipped. @@ -527,54 +546,6 @@ private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairBuff return true; } - /** - * Loads target's per-row record with a single raw, all-versions scan. Captures both - * tombstones and Puts: - *
      - *
    • Tombstones drive shadow detection — would a source Put we mirror be - * suppressed by an existing target tombstone on read? e.g. tgt has - * DeleteColumn(NAME)@900, src wants Put(NAME, "alice")@500. Mirror would land - * on disk but stay invisible because DeleteColumn@900 covers ts <= 900. - * Detect upfront, skip the doomed write, flag the row unrepairable.
    • - *
    • Puts drive hidden-version discovery — when target's visible Put is at a - * higher ts than source's mirror, max-versions-filtered older Puts may sit between - * the two and surface above source's mirror after we shadow the visible one. We - * need their timestamps so we can point-Delete each. e.g. tgt has Put(NAME, - * "carol")@T2 visible plus Put(NAME, "bob")@T1 hidden under MAX_VERSIONS=2; src - * has Put(NAME, "alice")@T0. Point-Deleting only T2 exposes T1 above source's T0 - * mirror — wrong; we must point-Delete T1 too so T0 surfaces.
    • - *
    - * raw=true and all-versions are forced regardless of user flags so tombstones and - * hidden Put versions that would otherwise be filtered out are surfaced. - * - *

    Time range: lower bound stays at {@code fromTime} since cells below the window - * can't affect repair inside the window; upper bound is {@code MAX_VALUE} because a - * tombstone at {@code ts > toTime} can still shadow a Put we mirror at - * {@code ts in window} during application reads. - */ - private TargetRowRecord loadTargetRowRecord(byte[] rowKey, Table targetHTable) - throws IOException { - Scan scan = new Scan(); - scan.withStartRow(rowKey, true); - scan.withStopRow(rowKey, true); - scan.setRaw(true); - scan.readAllVersions(); - scan.setCacheBlocks(false); - scan.setTimeRange(fromTime, Long.MAX_VALUE); - scan.setCaching(1); - scan.setLimit(1); - TargetRowRecord rowRecord = new TargetRowRecord(); - try (ResultScanner scanner = targetHTable.getScanner(scan)) { - Result raw = scanner.next(); - if (raw != null) { - for (Cell cell : raw.rawCells()) { - rowRecord.record(cell); - } - } - } - return rowRecord; - } - /** * Builds a row-level HBase scan for repair. Honors the user's {@code --raw-scan} and * {@code --read-all-versions} flags; adds bulk caching plus Phoenix TTL / @@ -649,10 +620,6 @@ private void flushRepairMutations(Table targetHTable, List puts, List - *

  • Shadow detection ({@link #wouldShadow}): would a source Put we're about - * to mirror be suppressed by an existing target tombstone?
  • - *
  • Hidden-version discovery ({@link #targetPutTimestampsBetween}): when - * max-versions on target hides older Puts behind a newer visible one, those hidden - * Puts surface on read the moment we shadow the visible one. We need their - * timestamps so we can point-Delete each. e.g. src has NAME@T0="alice", tgt has - * NAME@T2="carol" visible plus NAME@T1 hidden under max-versions. If we only - * point-Delete T2, target reads return T1 — wrong. Discovering T1 lets us - * point-Delete it too, so source's T0 mirror surfaces.
  • - * - * - * HBase has four tombstone subtypes, each with distinct shadow semantics: - * Delete — shadows a Put at {@code (cf, q, ts == T)} exactly - * DeleteColumn — shadows Puts at {@code (cf, q, ts <= T)} - * DeleteFamily — shadows Puts at {@code (cf, *, ts <= T)} - * DeleteFamilyVersion — shadows Puts at {@code (cf, *, ts == T)} - * {@link #wouldShadow(Cell)} consults all four tombstone maps and returns true on any - * match. + *

    HBase has four tombstone subtypes; each is recorded into its own map because + * shadow scope differs: + *

    +   *   Delete               shadows Put at (cf, q, ts == T) exactly
    +   *   DeleteColumn         shadows Puts at (cf, q, ts <= T)
    +   *   DeleteFamily         shadows Puts at (cf, *, ts <= T)
    +   *   DeleteFamilyVersion  shadows Puts at (cf, *, ts == T)
    +   * 
    */ static final class TargetRowRecord { private final Map> deletePointTs = new HashMap<>(); @@ -808,6 +754,47 @@ static final class TargetRowRecord { /** Per-column ts-ordered set of target's Put timestamps. */ private final Map> targetPutTs = new HashMap<>(); + /** + * Builds a {@link TargetRowRecord} from a single-row HBase scan. + * + *

    raw=true + all-versions are forced regardless of user flags so tombstones + * and max-versions-filtered older Puts (the two things this record exists to capture) + * are surfaced. + * + *

    Time range {@code [fromTime, MAX_VALUE]}: + *

      + *
    • Lower bound = {@code fromTime}: cells below the verify window can't affect + * repair inside the window.
    • + *
    • Upper bound = {@code MAX_VALUE} (NOT {@code toTime}): a tombstone at + * {@code ts >= toTime} can still shadow a Put we mirror at {@code ts} in window + * during application reads, so we must see it. e.g. window + * {@code [0, 600)}, tgt has DeleteColumn@900, src wants Put@500 — without the + * wide upper bound we'd miss the 900 tombstone and write a doomed mirror.
    • + *
    + */ + static TargetRowRecord load(byte[] rowKey, Table targetHTable, long fromTime) + throws IOException { + Scan scan = new Scan(); + scan.withStartRow(rowKey, true); + scan.withStopRow(rowKey, true); + scan.setRaw(true); + scan.readAllVersions(); + scan.setCacheBlocks(false); + scan.setTimeRange(fromTime, Long.MAX_VALUE); + scan.setCaching(1); + scan.setLimit(1); + TargetRowRecord rowRecord = new TargetRowRecord(); + try (ResultScanner scanner = targetHTable.getScanner(scan)) { + Result raw = scanner.next(); + if (raw != null) { + for (Cell cell : raw.rawCells()) { + rowRecord.record(cell); + } + } + } + return rowRecord; + } + void record(Cell cell) { if (CellUtil.isDelete(cell)) { recordTombstone(cell); @@ -817,6 +804,12 @@ void record(Cell cell) { } } + /** + * Records one tombstone into its per-subtype map for {@link #wouldShadow} to query. + * {@code <=ts} delete subtypes ({@code DeleteColumn}, {@code DeleteFamily}) collapse to + * the max ts; exact-ts subtypes ({@code Delete}, {@code DeleteFamilyVersion}) + * accumulate into a set. + */ private void recordTombstone(Cell tombstone) { long ts = tombstone.getTimestamp(); ByteBuffer family = ByteBuffer.wrap(CellUtil.cloneFamily(tombstone)); @@ -834,7 +827,7 @@ private void recordTombstone(Cell tombstone) { deleteFamilyVersionTs.computeIfAbsent(family, k -> new HashSet<>()).add(ts); break; default: - // Caller filters via CellUtil.isDelete; non-tombstone cells should never reach here. + // Unreachable: caller filters via CellUtil.isDelete. } } @@ -872,11 +865,11 @@ boolean wouldShadow(Cell sourcePut) { */ Set targetPutTimestampsBetween(byte[] family, byte[] qualifier, long lowerExclusive, long upperExclusive) { - NavigableMap tss = targetPutTs.get(new ColumnKey(family, qualifier)); - if (tss == null) { + NavigableMap putTimestamps = targetPutTs.get(new ColumnKey(family, qualifier)); + if (putTimestamps == null) { return Collections.emptySet(); } - return tss.subMap(lowerExclusive, false, upperExclusive, false).keySet(); + return putTimestamps.subMap(lowerExclusive, false, upperExclusive, false).keySet(); } } @@ -940,25 +933,41 @@ Delete delete() { } /** - * Loads target's per-row record (tombstones + Puts) once per row, cached for the - * lifetime of this buffer. Two consumers: + * Returns the cached {@link TargetRowRecord} for this row, loading on first call via + * {@link TargetRowRecord#load} (one raw all-versions scan, time range + * {@code [fromTime, MAX_VALUE]}). Cache scope is the buffer's lifetime — i.e. the + * current row — so repeated cell-level lookups within the row pay one round-trip total. + * + *

    Two consumers: * - *

    Shadow detection (used by {@link #mirrorSourceCell} via - * {@link TargetRowRecord#wouldShadow}). Time range is {@code [fromTime, MAX_VALUE]} — - * a target tombstone outside the user's verify window can still suppress a Put we - * mirror inside it. e.g. tgt has DeleteColumn@900, src wants Put@500, scan timeRange - * [0, 600). Put@500 is masked because DeleteColumn@900 covers ts <= 900. Detecting - * this upfront lets repair skip the doomed write and flag the row unrepairable. + *

    Shadow detection — {@link #mirrorSourceCellUnlessShadowed} asks + * {@link TargetRowRecord#wouldShadow} before mirroring a source Put, to skip writes + * that target's existing tombstones would render invisible. + *

    +     *   target row state: DeleteColumn(NAME)@T=900   (covers ts <= 900)
    +     *   source row state: Put(NAME, "alice")@T=500
    +     *   wouldShadow(srcPut@500) → true
    +     *   ⇒ skip mirror, mark row unrepairable; operator must major-compact target
    +     * 
    * - *

    Hidden-version discovery (used by {@link #tombstoneTargetCell} via - * {@link TargetRowRecord#targetPutTimestampsBetween}). When target's MAX_VERSIONS - * hides older Puts behind a newer visible one, point-Deleting only the visible ts - * exposes the hidden version above source's mirror. The record's all-versions raw - * scan surfaces those hidden timestamps so we can tombstone each. + *

    Hidden-version discovery — {@link #tombstoneTargetCell} asks + * {@link TargetRowRecord#targetPutTimestampsBetween} for max-versions-filtered Puts + * sitting between source's max ts and target's visible ts, so each can be point-Deleted + * before they surface above source's mirror. + *

    +     *   target row state (MAX_VERSIONS=3):
    +     *     Put(NAME, "carol")@T=900   visible
    +     *     Put(NAME, "bob")  @T=600   hidden
    +     *     Put(NAME, "alice")@T=300   hidden
    +     *   source row state:
    +     *     Put(NAME, "alice")@T=300   (sourceMaxTs=300)
    +     *   targetPutTimestampsBetween(NAME, 300, 900) → {600}
    +     *     point-Delete T=900 (visible) and T=600 (hidden) so T=300 surfaces
    +     * 
    */ TargetRowRecord targetRowRecord(Table targetHTable) throws IOException { if (targetRowRecord == null) { - targetRowRecord = loadTargetRowRecord(rowKey, targetHTable); + targetRowRecord = TargetRowRecord.load(rowKey, targetHTable, fromTime); } return targetRowRecord; } diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java index 0dc84f5cf6c..5a9b0b79742 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java @@ -1932,7 +1932,7 @@ public void testRepairCellDifferentValue() throws Exception { * The repair scan honors {@code --to-time} and never sees the tombstone in the diff window, so * the diff routes to {@code mirrorWholeRow} in * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer}. Inside, - * {@code loadTargetRowRecord} deliberately uses {@code (fromTime, MAX_VALUE)} (line 487) — so it + * {@code TargetRowRecord.load} deliberately uses {@code (fromTime, MAX_VALUE)} — so it * still sees the T=600 tombstone and {@code wouldShadow} returns true on Source's Put@T=200 * (DeleteColumn covers ts <= T=600). Result: source's mirror is suppressed, the row is * flagged unrepairable. @@ -1953,8 +1953,8 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception { "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); scnSrc.commit(); } - // Target has a tombstone strictly above --to-time. Diff scan won't see it; loadTargetRowRecord - // still will. + // Target has a tombstone strictly above --to-time. Diff scan won't see it; + // TargetRowRecord.load still will. writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", tombstoneTs); @@ -1990,6 +1990,292 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception { } } + /** + * Shadow detection via {@code DeleteFamily}: target has a {@code DeleteFamily} tombstone on + * cf {@code "0"} covering every qualifier in the family at {@code ts <= tombstoneTs}. Source + * has every cell of the row inside the diff window at {@code ts < tombstoneTs}, so each + * source cell would be shadowed if mirrored. Drives the + * {@code TargetRowRecord.deleteFamilyUpperBound} branch in {@code wouldShadow} — uncovered + * by other shadow ITs which only exercise {@code DeleteColumn}. + * + *

    To force the {@code cmp < 0} (whole-row mirror) path, the tombstone is planted strictly + * above {@code --to-time} so the diff scan does not see target's row at all, but + * {@code TargetRowRecord.load} (range {@code [fromTime, MAX_VALUE]}) still surfaces it. + */ + @Test + public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + + final long fromTime = 0L; + final long sourceTs = base + 1L; + final long toTime = base + 2L; + final long familyTombstoneTs = base + 3L; + + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { + scnSrc.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + scnSrc.commit(); + } + // DeleteFamily on cf "0" — covers every qualifier (NAME, NAME_VALUE, _0, ...) at + // ts <= familyTombstoneTs. Planted strictly above --to-time so the diff scan can't see it. + writeRawDeleteFamily(targetConnection, uniqueTableName, integerRowKey(rowId), "0", + familyTombstoneTs); + + while (System.currentTimeMillis() <= familyTombstoneTs) { + // spin + } + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan"); + assertTrue("Repair should succeed (shadowing is correctness-only)", + result.repairJob.isSuccessful()); + + Counters c = result.repairJob.getCounters(); + // Both source cells (NAME and _0) live at sourceTs in cf "0"; DeleteFamily covers the whole + // family at ts <= familyTombstoneTs (sourceTs < familyTombstoneTs), so every mirror is + // suppressed → mirrorWholeRow returns FULLY_SHADOWED → rowsMissing stays 0, + // rowsCannotRepair++. + assertRepairRowCounters(c, 0, 0, 1); + assertTrue("At least one mapper should roll up to UNREPAIRABLE", + c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1); + + // Post-repair: target should still have no visible row — every source cell was suppressed. + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertFalse("Row should not be visible on target — DeleteFamily covered every source cell", + rs.next()); + } + } + } + + /** + * Shadow detection via {@code DeleteFamilyVersion}: target has a {@code DeleteFamilyVersion} + * tombstone on cf {@code "0"} at exactly {@code sourceTs}. Source's cells at the same ts get + * shadowed because {@code DeleteFamilyVersion} matches every qualifier in the family at the + * exact ts. Drives the {@code TargetRowRecord.deleteFamilyVersionTs} branch in + * {@code wouldShadow} — also uncovered prior to this test. + * + *

    {@code DeleteFamilyVersion} requires ts equality (not inequality) so the + * tombstone-above-{@code toTime} trick used in the {@code DeleteColumn}/{@code DeleteFamily} + * shadow tests doesn't apply here. Instead we omit {@code --raw-scan}: target has no live + * cells (just the tombstone), so without raw mode the diff scan sees target as empty, + * routing to the {@code cmp < 0} mirrorWholeRow path; {@code TargetRowRecord.load} runs + * raw internally and surfaces the tombstone for shadow detection. + */ + @Test + public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + + final long fromTime = 0L; + final long sourceTs = base + 1L; + + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { + scnSrc.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + scnSrc.commit(); + } + // DeleteFamilyVersion on cf "0" at exactly sourceTs — covers every qualifier in the family + // at ts == sourceTs. Source's NAME and _0 cells, both Put at sourceTs, are shadow targets. + writeRawDeleteFamilyVersion(targetConnection, uniqueTableName, integerRowKey(rowId), "0", + sourceTs); + + while (System.currentTimeMillis() <= sourceTs) { + // spin + } + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis())); + assertTrue("Repair should succeed (shadowing is correctness-only)", + result.repairJob.isSuccessful()); + + Counters c = result.repairJob.getCounters(); + assertRepairRowCounters(c, 0, 0, 1); + assertTrue("At least one mapper should roll up to UNREPAIRABLE", + c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1); + + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertFalse("Row should not be visible on target — DeleteFamilyVersion shadowed every " + + "source cell at sourceTs", rs.next()); + } + } + } + + /** + * Multi-hidden-version unwinding: extends {@link #testRepairUnwindsHiddenTargetVersions} + * with TWO max-versions-hidden Puts beneath target's visible Put. Pins + * {@code targetPutTimestampsBetween} (in {@link + * org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.TargetRowRecord}) + * end-to-end: when {@code sourceMaxTs < target visible ts}, the repairer must point-Delete + * the visible ts AND every hidden Put in {@code (sourceMaxTs, target visible ts)} — + * otherwise after we shadow the visible cell, a hidden Put surfaces above source's mirror. + */ + @Test + public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 3, "3, 7"); + + final long fromTime = 0L; + final long sourceTs = base + 1L; + final long targetT1 = base + 2L; + final long targetT2 = base + 3L; + final long targetT3 = base + 4L; + + byte[] rowKey = integerRowKey(rowId); + String family = "0"; + String qualifier = "NAME"; + + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { + scnSrc.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + scnSrc.commit(); + } + + // Three target NAME versions, all retained under VERSIONS=3: + // T1 "bob" (hidden), T2 "carol" (hidden), T3 "dave" (visible). + try (Connection scnTgtT1 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT1)) { + scnTgtT1.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')"); + scnTgtT1.commit(); + } + try (Connection scnTgtT2 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT2)) { + scnTgtT2.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')"); + scnTgtT2.commit(); + } + try (Connection scnTgtT3 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT3)) { + scnTgtT3.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'dave')"); + scnTgtT3.commit(); + } + + // Sanity: pre-repair target visible NAME is "dave" (newest of the three). + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + assertEquals("Pre-repair target visible NAME should be dave", "dave", rs.getString(1)); + } + } + + while (System.currentTimeMillis() <= targetT3) { + // spin + } + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), + "--read-all-versions"); + assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + Counters repairCounters = result.repairJob.getCounters(); + // Source has one NAME Put@sourceTs; target has three NAME Puts at T1, T2, T3 (all > + // sourceTs). Each target NAME cell drives the cellExtra branch (sourceMaxTs < ts) and the + // hidden-version unwinding logic point-Deletes every Put in (sourceTs, ts). + assertTrue("At least 3 cells should be tombstoned across target's three NAME versions", + repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 3); + + // Post-repair: standard read on target must see source's "alice" — every hidden version + // ("bob"@T1, "carol"@T2) was unwound along with the visible "dave"@T3. + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + assertEquals("Post-repair target NAME must be alice (all hidden versions unwound)", + "alice", rs.getString(1)); + } + } + + // Raw scan: target should have a Delete marker covering each of T1/T2/T3 plus source's + // mirror. The exact tombstone cell count may include duplicates from the unwind logic + // (each iteration's hidden-set spans a shrinking interval, so T1 appears in T3's hidden + // set, then again in T2's), so assert a lower bound on distinct delete-bearing scan cells. + try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) { + Scan scan = new Scan().withStartRow(rowKey, true).withStopRow(rowKey, true).setRaw(true); + scan.readAllVersions(); + int nameDeletes = 0; + int namePutAtSourceTs = 0; + try (ResultScanner sc = targetHTable.getScanner(scan)) { + for (Result r; (r = sc.next()) != null;) { + for (Cell c : r.rawCells()) { + if (Bytes.equals(CellUtil.cloneFamily(c), Bytes.toBytes(family)) + && Bytes.equals(CellUtil.cloneQualifier(c), Bytes.toBytes(qualifier))) { + if (CellUtil.isDelete(c)) { + nameDeletes++; + } else if (c.getTimestamp() == sourceTs) { + namePutAtSourceTs++; + } + } + } + } + } + assertTrue("Expected at least 3 NAME delete markers on target, saw " + nameDeletes, + nameDeletes >= 3); + assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs); + } + } + + /** + * cmp==0 row carrying a target-only tombstone cell at a coord source lacks: drives the + * {@code tombstoneTargetCell} return-{@code false} branch ({@link + * org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer}, around line 420) + * inside {@code generateMutationForDiffCells}'s {@code cellExtra} branch — the target cell + * is itself a tombstone, so no new tombstone is emitted, but {@code anyCellUnrepairable} is + * set. The row contributes to {@code ROWS_CANNOT_REPAIR} without bumping any cell counter. + * + *

    Setup: source and target share a matching {@code NAME_VALUE} cell so the row exists on + * both. Target also has a raw point-{@link Delete} on {@code NAME} at a coord source lacks; + * with {@code --raw-scan} the diff scan surfaces that tombstone cell, taking the + * {@code cmp > 0} branch on it. + */ + @Test + public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + + final long ts = base + 1L; + final long tombstoneTs = base + 2L; + + try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) { + scnSrc.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); + scnSrc.commit(); + } + try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) { + scnTgt.createStatement().execute( + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); + scnTgt.commit(); + } + // Plant a raw point-Delete on NAME at tombstoneTs — a coord source has no cell at, but the + // target cell is itself a tombstone, so tombstoneTargetCell returns false in repair-mode. + writeRawPointDelete(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", + tombstoneTs); + + while (System.currentTimeMillis() <= tombstoneTs) { + // spin + } + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", + "--to-time", String.valueOf(System.currentTimeMillis()), "--raw-scan"); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + Counters c = result.repairJob.getCounters(); + // No mirror, no tombstone emitted — but anyCellUnrepairable was set, so the row rolls up + // as unrepairable. All cell counters stay 0. + assertRepairCellCounters(c, 0, 0, 0, 1); + assertRepairRowCounters(c, 0, 0, 1); + assertTrue("At least one mapper should roll up to UNREPAIRABLE", + c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1); + } + /** * P7 (mid-row repair-batch flush boundary): drives many missing-row mirrors through a tiny * {@code repairBatchSize=2} so {@code generateMutationForDiffRows} flushes mid-stream multiple @@ -3952,6 +4238,35 @@ private void writeRawDeleteColumn(Connection phoenixConn, String tableName, byte } } + /** + * Plants a raw {@link Delete#addFamily} (DeleteFamily — covers every qualifier in the family + * at {@code ts <= markerTs}) at {@code (rowKey, family)}. Used by shadow-detection tests + * exercising the {@code TargetRowRecord.deleteFamilyUpperBound} branch in {@code wouldShadow}. + */ + private void writeRawDeleteFamily(Connection phoenixConn, String tableName, byte[] rowKey, + String family, long markerTs) throws Exception { + try (Table hTable = getHBaseTable(phoenixConn, tableName)) { + Delete del = new Delete(rowKey); + del.addFamily(Bytes.toBytes(family), markerTs); + hTable.delete(del); + } + } + + /** + * Plants a raw {@link Delete#addFamilyVersion} (DeleteFamilyVersion — covers every qualifier + * in the family at exactly {@code ts == markerTs}) at {@code (rowKey, family)}. Used by + * shadow-detection tests exercising the {@code TargetRowRecord.deleteFamilyVersionTs} branch + * in {@code wouldShadow}. + */ + private void writeRawDeleteFamilyVersion(Connection phoenixConn, String tableName, + byte[] rowKey, String family, long markerTs) throws Exception { + try (Table hTable = getHBaseTable(phoenixConn, tableName)) { + Delete del = new Delete(rowKey); + del.addFamilyVersion(Bytes.toBytes(family), markerTs); + hTable.delete(del); + } + } + /** * Returns the row-key bytes Phoenix uses for an INTEGER primary key value, matching the * encoding used by {@code splitTableAt}. From ffeb1015ce57fa695bcdb5f074fd7f9dee013cbb Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Mon, 8 Jun 2026 17:03:34 +0530 Subject: [PATCH 12/18] checkpointing test fixes --- .../ConnectionQueryServicesMetrics.java | 1 - .../end2end/PhoenixSyncTableToolIT.java | 2071 ++++++++++------- .../PhoenixSyncTableOutputRepositoryTest.java | 218 ++ 3 files changed, 1464 insertions(+), 826 deletions(-) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java index 49110d9d7ed..575d38530eb 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetrics.java @@ -30,7 +30,6 @@ import org.apache.phoenix.monitoring.ConnectionQueryServicesMetricImpl; import org.apache.phoenix.monitoring.MetricType; - /** * Class for Connection Query Service Metrics. */ diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java index 5a9b0b79742..1d9131d8e2d 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java @@ -43,17 +43,23 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.coprocessor.ObserverContext; +import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; +import org.apache.hadoop.hbase.coprocessor.SimpleRegionObserver; +import org.apache.hadoop.hbase.regionserver.MiniBatchOperationInProgress; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; @@ -72,7 +78,6 @@ import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.schema.types.PInteger; import org.apache.phoenix.util.PhoenixRuntime; -import org.apache.phoenix.util.SchemaUtil; import org.apache.phoenix.util.TestUtil; import org.junit.After; import org.junit.AfterClass; @@ -86,6 +91,7 @@ import org.slf4j.LoggerFactory; @Category(NeedsOwnMiniClusterTest.class) +@SuppressWarnings({ "SqlNoDataSourceInspection", "SqlResolve"}) public class PhoenixSyncTableToolIT { private static final Logger LOGGER = LoggerFactory.getLogger(PhoenixSyncTableToolIT.class); @@ -158,7 +164,7 @@ public void tearDown() throws Exception { } @Test - public void testSyncTableValidateWithDataDifference() throws Exception { + public void testSyncTableWithDataDifference() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 10); introduceAndVerifyTargetDifferences(uniqueTableName); @@ -178,46 +184,28 @@ public void testSyncTableValidateWithDataDifference() throws Exception { // Dry-run row-level logging should flag the 3 same-key/different-value rows as // ROWS_DIFFERENT_ON_TARGET; nothing missing or extra (replication seeded both sides // with the same row keys before introduceAndVerifyTargetDifferences mutated three). - assertEquals("Dry-run should detect 3 rows different on target", 3, - dryRunCounters.rowsDifferentOnTarget); - assertEquals("Dry-run should report 0 rows missing on target", 0, - dryRunCounters.rowsMissingOnTarget); - assertEquals("Dry-run should report 0 rows extra on target", 0, - dryRunCounters.rowsExtraOnTarget); + assertRowDriftCounters(dryRunCounters, 0, 0, 3, 0); - List checkpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - validateCheckpointEntries(checkpointEntries, uniqueTableName, targetZkQuorum, 10, 10, 1, 3, 4, - 3, null); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, null); // Phase 2: repair pass over the same window — MISMATCHED rows transition to REPAIRED in // place. Job repairJob = runSyncToolWithLargeChunks(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); - Counters repairCounters = repairJob.getCounters(); - assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 3, 0, 0); - assertRepairRowCounters(repairCounters, 0, 0, 0); + SyncCountersResult repairCounters = getSyncCounters(repairJob); + assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0); + assertRowDriftCounters(repairCounters, 0, 0, 0, 0); // 3 rows × 2 mismatched cells (NAME + Phoenix's _0 empty-key cell) = 6 missing and 6 extra. assertRepairCellCounters(repairCounters, 6, 6, 0, 0); // Target rows should now match source. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - List postRepairEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - assertEquals("Expected 3 CHUNK/REPAIRED rows after repair", 3, - countCheckpointsByTypeAndStatus(postRepairEntries, - PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, - PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED)); - assertEquals("Expected 3 REGION/REPAIRED rows after repair", 3, - countCheckpointsByTypeAndStatus(postRepairEntries, - PhoenixSyncTableCheckpointOutputRow.Type.REGION, - PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED)); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); } @Test - public void testSyncTableValidateWithDifferentZkQuorumFormats() throws Exception { + public void testSyncTableWithDifferentZkQuorumFormats() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 10); introduceAndVerifyTargetDifferences(uniqueTableName); @@ -239,9 +227,17 @@ public void testSyncTableValidateWithDifferentZkQuorumFormats() throws Exception // After validating detection across ZK formats, run dry-run + repair against the default // targetZkQuorum to confirm the tool converges source and target. - runSyncToolWithRepair(uniqueTableName); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); + validateSyncCounters(dryRunCounters, 10, 10, 7, 3); + validateMapperCounters(dryRunCounters, 1, 3); + assertRowDriftCounters(dryRunCounters, 0, 0, 3, 0); + assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0); + assertRowDriftCounters(repairCounters, 0, 0, 0, 0); + assertRepairCellCounters(repairCounters, 6, 6, 0, 0); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); } @Test @@ -263,12 +259,22 @@ public void testSyncTableWithDeletedRowsOnTarget() throws Exception { validateSyncCounters(counters, 10, 7, 7, 3); validateMapperCounters(counters, 1, 3); assertEquals("Should have only 1 Mapper task created with coalescing", 4, counters.taskCreated); + // Three target rows were Phoenix-deleted, so dry-run sees them as missing on target. + assertRowDriftCounters(counters, 3, 0, 0, 0); // Repair pass only re-runs the 3 mismatched chunks (verified chunks are excluded by the // resume filter). Target's DELETEs left tombstones that shadow source's Puts at lower // timestamps, so each re-run mapper rolls up to UNREPAIRABLE. SyncCountersResult repairCounters = getSyncCounters(result.repairJob); validateMapperCountersRepair(repairCounters, 0, 0, 3, 0); + assertRepairChunkAndMapperCounters(repairCounters, 0, 3, 0, 0, 3, 0); + + // Tombstones still shadow source — Phoenix SELECT must still diverge. + verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); + + // 7 verified chunks/mappers from the dry-run, 3 unrepairable chunks from repair, mappers + // roll up to UNREPAIRABLE because every chunk in their region went UNREPAIRABLE. + validateCheckpointEntries(uniqueTableName, null, counters, repairCounters); } @Test @@ -299,12 +305,15 @@ public void testSyncTableWithConditionalTTLExpiredRows() throws Exception { // Run sync tool, TTL-expired rows (1-3) should be skipped on both source and target RepairRunResult result = runSyncToolWithRepair(uniqueTableName); SyncCountersResult counters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); validateSyncCounters(counters, 7, 7, 5, 2); validateMapperCounters(counters, 2, 2); + assertRowDriftCounters(counters, 0, 0, 2, 0); + assertRepairChunkAndMapperCounters(repairCounters, 2, 0, 0, 2, 0, 0); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, counters, repairCounters); } @Test @@ -355,12 +364,20 @@ public void testSyncTableWithConditionalTTLExpiredRowsCompact() throws Exception // on source vs 7 on target because IS_STRICT_TTL=false returns expired rows on source // (uncompacted) but compaction on target physically removed them — that asymmetry is by // design, not drift the tool can converge. - runSyncToolWithRepair(uniqueTableName); + // + // Note: the two runSyncTool calls above each write CHUNK/VERIFIED rows under their own + // (from-time, to-time) PK, so the checkpoint table accumulates entries from prior validate + // passes — validateCheckpointEntries can't be applied here against a single counter + // snapshot. Stick with the MISMATCHED-count invariant. + RepairRunResult result = runSyncToolWithRepair(uniqueTableName); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); + assertRowDriftCounters(repairCounters, 0, 0, 0, 0); + assertRepairChunkAndMapperCounters(repairCounters, 0, 0, 0, 0, 0, 0); assertNoMismatchedCheckpoints(uniqueTableName, null); } @Test - public void testSyncValidateIndexTable() throws Exception { + public void testSyncIndexTable() throws Exception { // Create data table on both clusters with replication createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName); @@ -382,24 +399,24 @@ public void testSyncValidateIndexTable() throws Exception { RepairRunResult result = runSyncToolWithRepair(indexName); SyncCountersResult counters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); validateSyncCounters(counters, 10, 7, 7, 3); + assertRowDriftCounters(counters, 3, 0, 0, 0); // Verify checkpoint entries show mismatches (from dry-run pass) before repair runs. List checkpointEntries = queryCheckpointTable(sourceConnection, indexName, targetZkQuorum, null); - assertFalse("Should have checkpointEntries", checkpointEntries.isEmpty()); - // The repair pass syncs the index physical table on target with the source index. Since the - // data table was also corrupted on target (3 rows deleted via deleteHBaseRows) but we only - // ran sync on the index, the data table itself is still drifted — only assert on index - // checkpoint rows. - assertNoMismatchedCheckpoints(indexName, null); + // Repair-pass outcome (REPAIRED vs UNREPAIRABLE) depends on tombstone-vs-source timestamps, + // which we don't pin here, so fall back to an asserCheckpoint counters check that simply + // mirrors whatever the repair pass produced. + validateCheckpointEntries(indexName, null, counters, repairCounters); } @Test - public void testSyncValidateLocalIndexTable() throws Exception { + public void testSyncLocalIndexTable() throws Exception { // Create data table on both clusters with replication createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName); @@ -423,6 +440,7 @@ public void testSyncValidateLocalIndexTable() throws Exception { // missing index rows on target. RepairRunResult result = runSyncToolWithRepair(indexName); SyncCountersResult counters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); assertTrue(String.format("Should have at least %d verified chunks, actual: %d", 1, counters.chunksVerified), counters.chunksVerified >= 1); @@ -434,13 +452,11 @@ public void testSyncValidateLocalIndexTable() throws Exception { queryCheckpointTable(sourceConnection, indexName, targetZkQuorum, null); assertFalse("Should have checkpoint entries for local index", checkpointEntries.isEmpty()); - - // After repair, the local-index physical table on target should match source's index. - assertNoMismatchedCheckpoints(indexName, null); + validateCheckpointEntries(indexName, null, counters, repairCounters); } @Test - public void testSyncValidateMultiTenantSaltedTableDifferences() throws Exception { + public void testSyncMultiTenantSaltedTableDifferences() throws Exception { String[] tenantIds = new String[] { "TENANT_001", "TENANT_002", "TENANT_003" }; int rowsPerTenant = 10; createMultiTenantSaltedTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName); @@ -490,27 +506,26 @@ public void testSyncValidateMultiTenantSaltedTableDifferences() throws Exception RepairRunResult t1 = runSyncToolWithRepair(uniqueTableName, "--tenant-id", tenantIds[0], "--to-time", toTime); SyncCountersResult counters1 = getSyncCounters(t1.dryRunJob); + SyncCountersResult repairCounters1 = getSyncCounters(t1.repairJob); validateSyncCounters(counters1, 10, 10, 10, 0); validateMapperCounters(counters1, 4, 0); + assertRowDriftCounters(counters1, 0, 0, 0, 0); + assertRepairChunkAndMapperCounters(repairCounters1, 0, 0, 0, 0, 0, 0); // TENANT_002 has 3 modified rows. Dry-run detects, repair writes back source values. RepairRunResult t2 = runSyncToolWithRepair(uniqueTableName, "--tenant-id", tenantIds[1]); SyncCountersResult counters2 = getSyncCounters(t2.dryRunJob); + SyncCountersResult repairCounters2 = getSyncCounters(t2.repairJob); validateSyncCounters(counters2, 10, 10, 7, 3); validateMapperCounters(counters2, 2, 2); + assertRowDriftCounters(counters2, 0, 0, 3, 0); + assertRepairChunkAndMapperCounters(repairCounters2, 3, 0, 0, 2, 0, 0); - // Verify checkpoint table has entries for the reprocessed regions - List checkpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, "TENANT_002"); - assertFalse("Should have checkpoint entries for TENANT_002", checkpointEntries.isEmpty()); - - checkpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, "TENANT_001"); - assertFalse("Should have checkpoint entries for TENANT_001", checkpointEntries.isEmpty()); - - // No MISMATCHED rows should remain after repair pass for either tenant. - assertNoMismatchedCheckpoints(uniqueTableName, "TENANT_001"); - assertNoMismatchedCheckpoints(uniqueTableName, "TENANT_002"); + // Pin checkpoint state per tenant. + validateCheckpointEntries(uniqueTableName, "TENANT_001", counters1, + repairCounters1); + validateCheckpointEntries(uniqueTableName, "TENANT_002", counters2, + repairCounters2); // After repair, TENANT_002's data should be identical between source and target. withTenantConnections(tenantIds[1], @@ -518,7 +533,7 @@ public void testSyncValidateMultiTenantSaltedTableDifferences() throws Exception } @Test - public void testSyncTableValidateWithTimeRangeFilter() throws Exception { + public void testSyncTableWithTimeRangeFilter() throws Exception { createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName); // Insert data BEFORE the time range window @@ -564,7 +579,7 @@ public void testSyncTableValidateWithTimeRangeFilter() throws Exception { } @Test - public void testSyncTableValidateCheckpointWithPartialReRunAndRegionSplits() throws Exception { + public void testSyncTableCheckpointWithPartialReRunAndRegionSplits() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 100); List sourceSplits = Arrays.asList(15, 45, 51, 75, 95); @@ -639,14 +654,11 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionSplits() thr // REPAIRED with stale boundaries (relative to the post-split layout); the resume filter // skips those, so a final run can leave residual drift. Cleanup the checkpoint and run // a dry-run + repair pass on the stable layout to converge. - cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); - assertNoMismatchedCheckpoints(uniqueTableName, null); + convergeAndAssertIdentical(uniqueTableName, fromTime, toTime); } @Test - public void testSyncTableValidateCheckpointWithChunkSizeChangeOnReRun() throws Exception { + public void testSyncTableCheckpointWithChunkSizeChangeOnReRun() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 100); List sourceSplits = Arrays.asList(25, 50, 75); @@ -705,14 +717,11 @@ public void testSyncTableValidateCheckpointWithChunkSizeChangeOnReRun() throws E // pass on the same window so the repair flow has a stable boundary set to converge. // Use the dry-run+repair pattern so any chunk that landed in a non-resumable state // (REPAIRED with stale boundaries) is re-validated rather than skipped. - cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); - assertNoMismatchedCheckpoints(uniqueTableName, null); + convergeAndAssertIdentical(uniqueTableName, fromTime, toTime); } @Test - public void testSyncTableValidateCheckpointWithPartialReRunAndRegionMerges() throws Exception { + public void testSyncTableCheckpointWithPartialReRunAndRegionMerges() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 100); List sourceSplits = Arrays.asList(10, 20, 30, 40, 50, 60, 70, 80, 90); @@ -778,7 +787,7 @@ public void testSyncTableValidateCheckpointWithPartialReRunAndRegionMerges() thr } @Test - public void testSyncTableValidateIdempotentOnReRun() throws Exception { + public void testSyncTableIdempotentOnReRun() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 10); // Capture consistent time range for both runs (ensures checkpoint lookup will match) @@ -820,7 +829,7 @@ public void testSyncTableValidateIdempotentOnReRun() throws Exception { } @Test - public void testSyncTableValidateIdempotentAfterRegionSplits() throws Exception { + public void testSyncTableIdempotentAfterRegionSplits() throws Exception { // Setup table with initial splits and data setupStandardTestWithReplication(uniqueTableName, 1, 10); @@ -875,7 +884,7 @@ public void testSyncTableValidateIdempotentAfterRegionSplits() throws Exception } @Test - public void testSyncTableValidateWithSchemaAndTableNameOptions() throws Exception { + public void testSyncTableWithSchemaAndTableNameOptions() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 10); // Introduce differences on target @@ -884,17 +893,21 @@ public void testSyncTableValidateWithSchemaAndTableNameOptions() throws Exceptio // Run sync tool with both --schema and --table-name options RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--schema", ""); SyncCountersResult counters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); // Validate counters validateSyncCounters(counters, 10, 10, 7, 3); validateMapperCounters(counters, 1, 3); + assertRowDriftCounters(counters, 0, 0, 3, 0); + assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0); + assertRepairCellCounters(repairCounters, 6, 6, 0, 0); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, counters, repairCounters); } @Test - public void testSyncTableValidateInBackgroundMode() throws Exception { + public void testSyncTableInBackgroundMode() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 10); introduceAndVerifyTargetDifferences(uniqueTableName); @@ -925,18 +938,22 @@ public void testSyncTableValidateInBackgroundMode() throws Exception { validateSyncCounters(counters, 10, 10, 7, 3); validateMapperCounters(counters, 1, 3); + assertRowDriftCounters(counters, 0, 0, 3, 0); // Now run the repair pass (foreground for synchronous assertions). Same time window so // the dry-run-pass MISMATCHED rows are overwritten with REPAIRED. - runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); + Job repairJob = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), + "--to-time", String.valueOf(toTime)); + SyncCountersResult repairCounters = getSyncCounters(repairJob); + assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0); + assertRepairCellCounters(repairCounters, 6, 6, 0, 0); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, counters, repairCounters); } @Test - public void testSyncTableValidateWithCustomTimeouts() throws Exception { + public void testSyncTableWithCustomTimeouts() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 10); introduceAndVerifyTargetDifferences(uniqueTableName); @@ -976,16 +993,20 @@ public void testSyncTableValidateWithCustomTimeouts() throws Exception { counters.logCounters(testName.getMethodName()); validateSyncCounters(counters, 10, 10, 7, 3); validateMapperCounters(counters, 1, 3); + assertRowDriftCounters(counters, 0, 0, 3, 0); // Repair pass over the same window: convergence + no MISMATCHED rows remaining. - runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); + Job repairJob = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), + "--to-time", String.valueOf(toTime)); + SyncCountersResult repairCounters = getSyncCounters(repairJob); + assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0); + assertRepairCellCounters(repairCounters, 6, 6, 0, 0); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, counters, repairCounters); } @Test - public void testSyncTableValidateWithExtraRowsOnTarget() throws Exception { + public void testSyncTableWithExtraRowsOnTarget() throws Exception { // Create tables on both clusters createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName); @@ -1013,21 +1034,12 @@ public void testSyncTableValidateWithExtraRowsOnTarget() throws Exception { // Run dry-run + repair sharing the same time window. RepairRunResult result = runSyncToolWithRepair(uniqueTableName); SyncCountersResult counters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); validateSyncCounters(counters, 10, 15, 5, 5); validateMapperCounters(counters, 0, 4); - - List checkpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - - // Count mismatched entries in checkpoint table — after the repair pass, all MISMATCHED - // rows from the dry-run pass should have been overwritten with REPAIRED. - long mismatchedCount = countCheckpointsByStatus(checkpointEntries, - PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED); - long repairedCount = countCheckpointsByStatus(checkpointEntries, - PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED); - assertEquals("After repair, no MISMATCHED rows should remain", 0, mismatchedCount); - assertTrue("Should have REPAIRED rows after repair pass", repairedCount > 0); + assertRowDriftCounters(counters, 0, 5, 0, 0); + assertRepairChunkAndMapperCounters(repairCounters, 5, 0, 0, 4, 0, 0); // After repair: target should converge to source (10 odd-id rows). The 5 extra even-id // rows on target had only live cells, so tombstoneWholeRow can remove them. @@ -1036,10 +1048,12 @@ public void testSyncTableValidateWithExtraRowsOnTarget() throws Exception { TestUtil.getRowCount(sourceConnection, uniqueTableName)); assertEquals("Target should now also have 10 rows after repair tombstones the extras", 10, TestUtil.getRowCount(targetConnection, uniqueTableName)); + + validateCheckpointEntries(uniqueTableName, null, counters, repairCounters); } @Test - public void testSyncTableValidateWithConcurrentRegionSplits() throws Exception { + public void testSyncTableWithConcurrentRegionSplits() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 100); // Introduce some mismatches on target before sync List mismatchIds = Arrays.asList(15, 35, 55, 75, 95); @@ -1081,10 +1095,7 @@ public void testSyncTableValidateWithConcurrentRegionSplits() throws Exception { // boundary mid-flight can land in REPAIRED with stale boundaries; once REPAIRED, the // resume filter skips it. Cleanup the checkpoint and run a dry-run + repair pass on the // stable region layout to converge. - cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); - assertNoMismatchedCheckpoints(uniqueTableName, null); + convergeAndAssertIdentical(uniqueTableName, fromTime, toTime); } /** @@ -1112,7 +1123,6 @@ public void testRepairWithConcurrentTargetSplits() throws Exception { long fromTime = 0L; long toTime = System.currentTimeMillis(); - // Stage 1: stable dry-run populates MISMATCHED checkpoint rows. Job dryRunJob = runSyncTool(uniqueTableName, "--dry-run", "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); assertTrue("Stable dry-run should succeed", dryRunJob.isSuccessful()); @@ -1120,11 +1130,9 @@ public void testRepairWithConcurrentTargetSplits() throws Exception { assertTrue("Dry-run should detect at least one mismatched chunk", dryRunCounters.chunksMismatched >= 1); - // Stage 2: kick off target-side splits while the repair pass runs. Source splits left out - // because a target-side split is what surfaces flushRepairMutations failures. + // Target-side splits race with the repair pass — that's where flushRepairMutations failures + // would surface. Runnable splitJoiner = startConcurrentRegionWork(() -> { - // No source-side work; pass a trivial Runnable so startConcurrentRegionWork still wires - // both threads and the joiner times out cleanly. }, () -> splitTableAt(targetConnection, uniqueTableName, Arrays.asList(15, 25, 35, 45, 55, 65, 75, 85, 95)), "repair-splits"); @@ -1133,21 +1141,15 @@ public void testRepairWithConcurrentTargetSplits() throws Exception { splitJoiner.run(); assertTrue("Repair pass should not throw despite concurrent splits", repairJob.isSuccessful()); - // Stage 3: stable convergence pass. Cleanup checkpoint so the resume filter doesn't skip - // chunks that were marked REPAIRED with stale boundaries during the racing pass. - cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); - - assertNoMismatchedCheckpoints(uniqueTableName, null); - verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + // Cleanup checkpoint so the resume filter doesn't skip chunks marked REPAIRED with stale + // boundaries during the racing pass. + convergeAndAssertIdentical(uniqueTableName, fromTime, toTime); } /** - * P4 (idempotent repair): Guards against a regression where repair claims REPAIRED but does - * not actually converge. Run a full dry-run + repair on a divergent table, clean the - * checkpoint, then run the same dry-run + repair again on the now-converged tables and assert - * the second pass is a no-op (zero mismatches detected, zero chunks repaired). + * Guards against a regression where repair claims REPAIRED but doesn't actually converge. + * Repair the divergent table, clean the checkpoint, then re-run dry-run + repair on the now + * converged tables — both passes must be no-ops. */ @Test public void testRepairIsIdempotent() throws Exception { @@ -1158,7 +1160,6 @@ public void testRepairIsIdempotent() throws Exception { long fromTime = 0L; long toTime = System.currentTimeMillis(); - // Pass 1: detect + repair. RepairRunResult firstRun = runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); assertTrue("First dry-run should succeed", firstRun.dryRunJob.isSuccessful()); @@ -1167,39 +1168,39 @@ public void testRepairIsIdempotent() throws Exception { SyncCountersResult firstDryRunCounters = getSyncCounters(firstRun.dryRunJob); assertTrue("First dry-run should detect mismatched chunks", firstDryRunCounters.chunksMismatched >= 1); - Counters firstRepairCounters = firstRun.repairJob.getCounters(); + SyncCountersResult firstRepairCounters = getSyncCounters(firstRun.repairJob); assertTrue("First repair should mark chunks REPAIRED", - firstRepairCounters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue() >= 1); + firstRepairCounters.chunksRepaired >= 1); - // Tables must be data-identical after the first repair. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, firstDryRunCounters, + firstRepairCounters); - // Pass 2 prep: clean the checkpoint so the resume filter doesn't skip already-VERIFIED - // chunks — the second dry-run must scan the full layout from scratch. + // Clean the checkpoint so the second dry-run scans the full layout instead of resuming + // from VERIFIED chunks. cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - // Pass 2: same window, no further mutations. Both passes must be no-ops. RepairRunResult secondRun = runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); assertTrue("Second dry-run should succeed", secondRun.dryRunJob.isSuccessful()); assertTrue("Second repair should succeed", secondRun.repairJob.isSuccessful()); SyncCountersResult secondDryRunCounters = getSyncCounters(secondRun.dryRunJob); + SyncCountersResult secondRepairCounters = getSyncCounters(secondRun.repairJob); assertEquals("Second dry-run should detect zero mismatches", 0, secondDryRunCounters.chunksMismatched); - - // Second repair pass should be a no-op: nothing repaired, nothing failed. - assertRepairChunkAndMapperCounters(secondRun.repairJob.getCounters(), 0, 0, 0, 0, 0); + assertRepairChunkAndMapperCounters(secondRepairCounters, 0, 0, 0, 0, 0, 0); + assertRowDriftCounters(secondRepairCounters, 0, 0, 0, 0); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, secondDryRunCounters, + secondRepairCounters); } /** - * P5 (all-tombstoned target-extra row): Source has no row K; target has row K but every cell - * is already a tombstone. {@code tombstoneWholeRow} returns {@code liveCellsTombstoned == 0} - * (line ~217 of {@code PhoenixSyncTableChunkRepairer}) → {@code drift.rowsCannotRepair++}, - * {@code rowsExtraOnTarget} unchanged. Pins the rare "target row extra but already - * fully-tombstoned" path that currently has zero coverage. + * Target row K has only tombstones (no live cells) under {@code --raw-scan}, source lacks the + * row. {@code tombstoneWholeRow} finds zero live cells to tombstone → row rolls up as + * unrepairable, {@code rowsExtraOnTarget} stays 0. */ @Test public void testRepairAllTombstonedTargetRowExtra() throws Exception { @@ -1211,44 +1212,25 @@ public void testRepairAllTombstonedTargetRowExtra() throws Exception { final long ts = base + 1L; final long tombstoneTs = base + 2L; - // Plant a sentinel row on both sides so the verifier has *something* to compare and - // produces a non-empty chunk hash. The sentinel row stays clean — the test focuses on - // rowId=5 only. - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) { - scnSrc.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + otherRowId + ", 'sentinel')"); - scnSrc.commit(); - } - try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) { - scnTgt.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + otherRowId + ", 'sentinel')"); - scnTgt.commit(); - } + // Sentinel row on both sides so the chunk hash is non-empty. + upsertAtScnBoth(ts, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + otherRowId + ", 'sentinel')"); - // Target only: plant raw DeleteColumn tombstones for row K with NO underlying Put cells. - // Under --raw-scan the row surfaces (tombstones are themselves cells) but every cell is a - // Delete, so tombstoneWholeRow() returns liveCellsTombstoned == 0 → drift.rowsCannotRepair - // increments and rowsExtraOnTarget stays 0. This pins the rare "row exists in raw view but - // has no live cells to tombstone" branch. + // Tombstones with no underlying Puts — row surfaces under raw scan but every cell is a Delete. byte[] rowKey = integerRowKey(rowId); writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME", tombstoneTs); writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME_VALUE", tombstoneTs); writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "_0", tombstoneTs); - // Spin until wall-clock advances past the highest cell timestamp so --to-time - // (which defaults to currentTimeMillis()) covers our planted cells. - while (System.currentTimeMillis() <= tombstoneTs) { - // spin - } RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", - String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), + String.valueOf(fromTime), "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - // Row K's cells were already all tombstones — no live cells to tombstone again, and - // the row is flagged unrepairable. - assertRepairRowCounters(result.repairJob.getCounters(), 0, 0, 1); + assertRowDriftCounters(getSyncCounters(result.repairJob), 0, 0, 0, 1); + // Phoenix SELECT already sees both sides as identical (target's row 5 is tombstone-only and + // invisible). Divergence is at the raw-cell level only — not asserted via SELECT here. } @Test @@ -1286,8 +1268,12 @@ public void testSyncTableValidateWithOnlyTimestampDifferences() throws Exception Job job = runSyncTool(uniqueTableName, "--dry-run"); SyncCountersResult counters = getSyncCounters(job); - // Validate counters - all rows should be processed and all chunks mismatched + // Validate counters - all rows should be processed and all chunks mismatched. The 10 rows + // have identical Phoenix-visible values but distinct cell timestamps, so the checksum + // diverges per row (10 mismatched chunks) and the row-level diff classifies them as + // ROWS_DIFFERENT_ON_TARGET (same key on both sides, different cell payload). validateSyncCounters(counters, 10, 10, 0, 10); + assertRowDriftCounters(counters, 0, 0, 10, 0); // Verify checkpoint entries show mismatches List checkpointEntries = @@ -1299,7 +1285,7 @@ public void testSyncTableValidateWithOnlyTimestampDifferences() throws Exception } @Test - public void testSyncTableValidateWithConcurrentRegionMerges() throws Exception { + public void testSyncTableWithConcurrentRegionMerges() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 100); // Explicitly split tables to create many regions for merging List sourceSplits = Arrays.asList(10, 15, 20, 25, 40, 45, 60, 65, 80, 85); @@ -1353,14 +1339,11 @@ public void testSyncTableValidateWithConcurrentRegionMerges() throws Exception { // Concurrent merges may leave chunks REPAIRED with stale boundaries; the resume filter // skips those on a single rerun. Cleanup the checkpoint and run a dry-run + repair pass // on the stable region layout to converge. - cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); - assertNoMismatchedCheckpoints(uniqueTableName, null); + convergeAndAssertIdentical(uniqueTableName, fromTime, toTime); } @Test - public void testSyncTableValidateWithPagingTimeout() throws Exception { + public void testSyncTableWithPagingTimeout() throws Exception { // Create tables on both clusters setupStandardTestWithReplication(uniqueTableName, 1, 100); @@ -1419,7 +1402,7 @@ public void testSyncTableValidateWithPagingTimeout() throws Exception { } @Test - public void testSyncTableValidateWithPagingTimeoutWithSplits() throws Exception { + public void testSyncTableWithPagingTimeoutWithSplits() throws Exception { // Create tables on both clusters setupStandardTestWithReplication(uniqueTableName, 1, 100); @@ -1537,6 +1520,105 @@ public void testSyncTableMapperFailsWithMissingTargetTable() throws Exception { uniqueTableName)); } + /** + * When the mapper successfully mutates target but the audit checkpoint UPSERT fails, the + * tool must increment {@code CHECKPOINT_WRITE_FAILED} and exit non-zero so the operator + * can investigate the audit gap. Failure is injected via a RegionObserver on the + * checkpoint table that throws {@link DoNotRetryIOException} on every {@code preBatchMutate}. + */ + @Test + public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception { + setupStandardTestWithReplication(uniqueTableName, 1, 5); + introduceMismatchesByIds(uniqueTableName, Arrays.asList(2, 3, 4)); + + // Run once first so the checkpoint table exists; we can only attach a coprocessor to a + // table that's already been created. + Job initial = runSyncTool(uniqueTableName, "--dry-run"); + assertTrue(initial.isSuccessful()); + + String ckpt = PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME; + TestUtil.addCoprocessor(sourceConnection, ckpt, CheckpointWriteFailingObserver.class); + long fromTime = 0L; + long toTime = System.currentTimeMillis(); + try { + // Inline tool invocation — we need exitCode != 0, which runSyncTool would assertion-fail. + PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); + tool.setConf(sourceClusterConf()); + int exitCode = tool.run( + new String[] { "--table-name", uniqueTableName, "--target-cluster", targetZkQuorum, + "--run-foreground", "--chunk-size", "1", "--from-time", String.valueOf(fromTime), + "--to-time", String.valueOf(toTime) }); + + assertNotEquals("Tool must surface non-zero exit when checkpoint writes fail", 0, exitCode); + // Note: CHECKPOINT_WRITE_FAILED only increments on the chunk-outcome write path + // (writeChunkCheckpoint). Region-level and verified/mismatched-chunk writes propagate + // SQLException up and fail the mapper task instead — that still produces a non-zero + // exit, which is the operator-visible signal we pin here. We also can't assert on + // target convergence inside this block: when the very first checkpoint write throws, + // the mapper aborts before reaching all mismatched chunks, so target stays drifted. + } finally { + TestUtil.removeCoprocessor(sourceConnection, ckpt, CheckpointWriteFailingObserver.class); + } + + // Recovery: re-run repair (no --dry-run) with the SAME time window. The resume filter + // skips only VERIFIED/REPAIRED, so the mismatched/non-terminal rows from the failed run + // are re-processed and target converges — no checkpoint cleanup needed. + Job recovery = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), + "--to-time", String.valueOf(toTime)); + assertTrue(recovery.isSuccessful()); + assertNoMismatchedCheckpoints(uniqueTableName, null); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + } + + /** + * Repair-batch flush failure: a RegionObserver on the target data table fails every + * {@code preBatchMutate}, so {@code flushRepairMutations} throws and the chunk rolls up + * {@code REPAIR_FAILED}. After the observer is removed and the checkpoint cleaned up, a + * fresh run converges. + */ + @Test + public void testRepairFailedSurfacesCountersAndCheckpoint() throws Exception { + setupStandardTestWithReplication(uniqueTableName, 1, 5); + introduceMismatchesByIds(uniqueTableName, Arrays.asList(2, 3, 4)); + + TestUtil.addCoprocessor(targetConnection, uniqueTableName, RepairBatchFailingObserver.class); + long fromTime = 0L; + long toTime = System.currentTimeMillis(); + try { + // dryRun + repair: dry-run only reads from target, so it succeeds; repair flushes + // mutations and trips the observer. + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + assertTrue("Dry-run should still succeed", result.dryRunJob.isSuccessful()); + + Counters c = result.repairJob.getCounters(); + assertTrue("CHUNKS_REPAIR_FAILED must increment when target writes fail", + c.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue() >= 1); + assertTrue("MAPPERS_REPAIR_FAILED must roll up when any chunk fails", + c.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue() >= 1); + + List entries = + queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + assertTrue("At least one REPAIR_FAILED checkpoint row must persist", + countCheckpointsByStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED) >= 1); + } finally { + TestUtil.removeCoprocessor(targetConnection, uniqueTableName, + RepairBatchFailingObserver.class); + } + + // Repair was blocked end-to-end — source/target must still diverge. + verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); + + // Recovery: re-run repair with the SAME time window — the resume filter skips only + // VERIFIED/REPAIRED, so REPAIR_FAILED chunks are re-processed and target converges. + Job recovery = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), + "--to-time", String.valueOf(toTime)); + assertTrue(recovery.isSuccessful()); + assertNoMismatchedCheckpoints(uniqueTableName, null); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + } + @Test public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception { // Setup table with replication and insert data @@ -1599,9 +1681,6 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception { } /** - * P1 (hidden-version unwinding): Verifies the most subtle correctness path in the repairer — - * tombstoneTargetCell case 3 from {@code PhoenixSyncTableChunkRepairer.tombstoneTargetCell}. - * *

    Scenario: source row has {@code Put(NAME, "alice", T0)}; target row has {@code * Put(NAME, "bob", T1)} and {@code Put(NAME, "carol", T2)} where {@code T0 < T1 < T2} and * {@code MAX_VERSIONS=2}. Visible cell on target is "carol" (T2); "bob" (T1) is @@ -1614,7 +1693,6 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception { @Test public void testRepairUnwindsHiddenTargetVersions() throws Exception { final int rowId = 5; - // Two clusters, no replication — we plant cells deterministically on each side. long base = createRepairTestTableOnBothClusters(uniqueTableName, 2, "3, 7"); long fromTime = 0L; @@ -1622,43 +1700,20 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception { final long targetT1 = base + 2L; final long targetT2 = base + 3L; - byte[] rowKey = integerRowKey(rowId); - String family = "0"; // COLUMN_ENCODED_BYTES=NONE → family is "0" - String qualifier = "NAME"; - - // Source: single Put(NAME, "alice", T=100). Empty-key cell is needed so the row is - // visible to Phoenix scans (and thus to the verifier). Use an SCN connection for that. - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { - scnSrc.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); - scnSrc.commit(); - } + // Source: single NAME Put. + upsertAtScnSource(sourceTs, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); - // Target: insert via SCN at T1 then again at T2 to leave two NAME versions; with VERSIONS=2 - // both versions are retained. Visible read is "carol" (T2); "bob" (T1) is one-version-hidden. - try (Connection scnTgtT1 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT1)) { - scnTgtT1.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')"); - scnTgtT1.commit(); - } - try (Connection scnTgtT2 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT2)) { - scnTgtT2.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')"); - scnTgtT2.commit(); - } + // Target: two NAME versions retained under VERSIONS=2 — "carol"@T2 visible, "bob"@T1 hidden. + upsertAtScnTarget(targetT1, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')"); + upsertAtScnTarget(targetT2, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')"); // Sanity: target's visible NAME is "carol" before repair. - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, rowId); - try (ResultSet rs = ps.executeQuery()) { - assertTrue(rs.next()); - assertEquals("Pre-repair target visible NAME should be carol", "carol", rs.getString(1)); - } - } + assertTargetName(uniqueTableName, rowId, "carol"); - // Run dry-run + repair sharing the same time window with --read-all-versions so the - // verifier and repairer both see the hidden version. + // --read-all-versions so verifier and repairer both see the hidden version. RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), "--read-all-versions"); @@ -1667,63 +1722,33 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception { assertTrue("Repair should succeed", result.repairJob.isSuccessful()); Counters repairCounters = result.repairJob.getCounters(); - // Two NAME versions ("bob"@T1 + "carol"@T2) and two empty-key versions on target — all sit - // at timestamps that don't match source's single sourceTs, so each gets counted as - // either "different" or "extra" depending on the diff branch. We require at least 2 extras - // (the two extra NAME versions vs source's single mirror) — the exact split between - // CELLS_EXTRA and CELLS_DIFFERENT depends on per-qualifier matching. Post-repair raw scan - // assertions below pin the structural outcome. assertTrue("At least 2 cells should be tombstoned for target's hidden+visible NAME versions", repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 2); - // Post-repair, Phoenix's standard read on target must see source's "alice", NOT "bob". - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, rowId); - try (ResultSet rs = ps.executeQuery()) { - assertTrue(rs.next()); - assertEquals("Post-repair target NAME must be alice (hidden version unwound)", "alice", - rs.getString(1)); - } + assertTargetName(uniqueTableName, rowId, "alice"); + + // Raw scan: two Delete markers (one per target NAME version) plus source's mirror Put. + // Target's two original Puts (at targetT1 and targetT2) remain visible to a raw scan even + // after repair's point-Deletes shadow them, so summary.newestPutTs == targetT2 — assert + // instead that source's mirror Put landed at exactly sourceTs. + RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId); + assertEquals("Two delete markers (one for each target NAME version) expected", 2, + summary.totalDeletes()); + int namePutAtSourceTs = 0; + for (long ts : summary.putTimestamps) { + if (ts == sourceTs) namePutAtSourceTs++; } + assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs); - // Raw scan: target should have two Delete markers at T2 and T1 plus source's mirror Put@T0. - try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) { - Scan scan = new Scan().withStartRow(rowKey, true).withStopRow(rowKey, true).setRaw(true); - scan.readAllVersions(); - int nameDeletes = 0; - int namePutAtSourceTs = 0; - try (ResultScanner sc = targetHTable.getScanner(scan)) { - for (Result r; (r = sc.next()) != null;) { - for (Cell c : r.rawCells()) { - if (Bytes.equals(CellUtil.cloneFamily(c), Bytes.toBytes(family)) - && Bytes.equals(CellUtil.cloneQualifier(c), Bytes.toBytes(qualifier))) { - if (CellUtil.isDelete(c)) { - nameDeletes++; - } else if (c.getTimestamp() == sourceTs) { - namePutAtSourceTs++; - } - } - } - } - } - assertEquals("Two delete markers (one for each target NAME version) expected", 2, - nameDeletes); - assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs); - } + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + assertNoMismatchedCheckpoints(uniqueTableName, null); } /** - * P2 (partial-mirror shadow): Verifies the {@code RowMirrorStatus.PARTIALLY_MIRRORED} branch - * indirectly via {@code generateMutationForDiffCells} — both rows exist; one source cell is - * shadowed by a target tombstone, sibling cells mirror successfully. {@code anyCellUnrepairable} - * propagates up to {@code drift.rowsCannotRepair++} while no cell counter increments for the - * shadowed cell (mirror returned false, so nothing was written and nothing counted). - * - *

    Setup: row K exists on both sides via a matching {@code NAME_VALUE} cell. Target has a - * pre-existing {@code DeleteColumn(NAME, T=300)} shadowing any future {@code NAME} Put at - * {@code ts <= 300}. Source's {@code NAME, "alice", T=200} would land on disk but stay - * invisible — repair detects this upfront via {@code wouldShadow} and skips the doomed write. + * Partial-row shadow: row exists on both sides via a matching NAME_VALUE; target has a + * DeleteColumn on NAME above source's NAME Put. Repair must detect the shadow up front via + * {@code wouldShadow} and skip the doomed mirror — row rolls up unrepairable, no cell counter + * ticks for the suppressed mirror. */ @Test public void testRepairPartialShadowWithinRow() throws Exception { @@ -1734,61 +1759,115 @@ public void testRepairPartialShadowWithinRow() throws Exception { final long sourceTs = base + 1L; final long shadowTombstoneTs = base + 2L; - // Source: row K with NAME="alice" and NAME_VALUE=99, all at sourceTs. - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { - scnSrc.createStatement().execute("UPSERT INTO " + uniqueTableName - + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); - scnSrc.commit(); - } + // Source: row K with NAME and NAME_VALUE at sourceTs. + upsertAtScnSource(sourceTs, "UPSERT INTO " + uniqueTableName + + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); - // Target: row K with only NAME_VALUE=99 at sourceTs (matches source's NAME_VALUE). - try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), sourceTs)) { - scnTgt.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); - scnTgt.commit(); - } - // Plant a DeleteColumn tombstone on target's NAME at shadowTombstoneTs, which shadows any - // source mirror at ts <= shadowTombstoneTs. + // Target: only NAME_VALUE matching source; NAME is shadowed by the DeleteColumn below. + upsertAtScnTarget(sourceTs, + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", shadowTombstoneTs); - // Spin until wall-clock advances past the highest cell timestamp so --to-time - // (which defaults to currentTimeMillis()) covers our planted cells. - while (System.currentTimeMillis() <= shadowTombstoneTs) { - // spin - } RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", - String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), - "--raw-scan"); + String.valueOf(fromTime), "--to-time", + String.valueOf(waitUntilWallClockPasses(shadowTombstoneTs)), "--raw-scan"); assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); - assertTrue("Repair pass should succeed (shadowing is correctness-only, not a job error)", - result.repairJob.isSuccessful()); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - Counters repairCounters = result.repairJob.getCounters(); - // Source's NAME mirror is suppressed by the shadow → no cell counter ticks; row is unrepairable. + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); assertRepairCellCounters(repairCounters, 0, 0, 0, 1); assertTrue("At least one mapper should roll up to UNREPAIRABLE", - repairCounters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1); + repairCounters.mappersUnrepairable >= 1); - // Post-repair, target's read view of NAME for row K is still null (DeleteColumn at T=300 - // covers everything <= T=300 — including any source mirror we *might* have written). The - // assertion validates the repair refused to write the doomed Put. - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + assertTargetNameNull(uniqueTableName, rowId); + + // Source NAME='alice' but target NAME=null — Phoenix SELECT must still diverge. + verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); + } + + /** + * A chunk that lands in UNREPAIRABLE due to a shadowing target + * tombstone must recover after the operator runs a major compaction on target and re-runs + * sync. Pass 1 reproduces the partial-shadow setup and asserts UNREPAIRABLE; major compact + * on target reaps the standalone DeleteColumn; pass 2 mirrors the source Put cleanly and + * converges. + */ + @Test + public void testUnrepairableRecoversAfterMajorCompactionOnTarget() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + + long fromTime = 0L; + final long sourceTs = base + 1L; + final long shadowTombstoneTs = base + 2L; + + // Source: row K with NAME and NAME_VALUE at sourceTs. + upsertAtScnSource(sourceTs, "UPSERT INTO " + uniqueTableName + + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); + // Target: only NAME_VALUE matching source; NAME is shadowed by the DeleteColumn below. + upsertAtScnTarget(sourceTs, + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); + writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", + shadowTombstoneTs); + + // Pass 1: shadow makes the chunk UNREPAIRABLE. + long pass1ToTime = waitUntilWallClockPasses(shadowTombstoneTs); + RepairRunResult pass1 = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(pass1ToTime), "--raw-scan"); + + assertTrue(pass1.dryRunJob.isSuccessful()); + assertTrue(pass1.repairJob.isSuccessful()); + + SyncCountersResult pass1Counters = getSyncCounters(pass1.repairJob); + assertRepairCellCounters(pass1Counters, 0, 0, 0, 1); + assertTrue(pass1Counters.mappersUnrepairable >= 1); + assertTrue(countCheckpointsByStatus( + queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null), + PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE) >= 1); + + // Target must not be corrupted: NAME stays null while the shadow exists. + assertTargetNameNull(uniqueTableName, rowId); + // Pass 1 left the row in an unrepairable state — Phoenix SELECT must still diverge. + verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); + + // Operator recovery: reap the tombstone on target, wipe checkpoints so the resume filter + // re-enters the chunk, then re-run. + flushAndMajorCompact(CLUSTERS.getHBaseCluster2(), uniqueTableName); + cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); + + // Pass 2: tombstone is gone; mirror succeeds. + long pass2ToTime = waitUntilWallClockPasses(pass1ToTime); + RepairRunResult pass2 = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(pass2ToTime), "--raw-scan"); + + assertTrue(pass2.dryRunJob.isSuccessful()); + assertTrue(pass2.repairJob.isSuccessful()); + + SyncCountersResult pass2Counters = getSyncCounters(pass2.repairJob); + assertRepairCellCounters(pass2Counters, 1, 0, 0, 0); + assertTrue(pass2Counters.mappersRepaired >= 1); + assertNoMismatchedCheckpoints(uniqueTableName, null); + assertTrue(countCheckpointsByStatus( + queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null), + PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED) >= 1); + + try (PreparedStatement ps = targetConnection.prepareStatement( + "SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) { ps.setInt(1, rowId); try (ResultSet rs = ps.executeQuery()) { assertTrue(rs.next()); - assertNull("NAME should still be null on target — shadow was respected", - rs.getString(1)); + assertEquals("alice", rs.getString(1)); + assertEquals(99L, rs.getLong(2)); } } + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); } /** - * P2 (cell-missing branch): same row exists on both sides, source has an extra column the - * target lacks. {@code generateMutationForDiffCells} should mirror it through the - * {@code cellMissing++} branch (source-only cell, no shadow on target). + * Cell missing on target: source has an extra column, target lacks it. Repair mirrors the + * source cell through the {@code cellMissing++} branch. */ @Test public void testRepairCellMissingOnTarget() throws Exception { @@ -1797,46 +1876,30 @@ public void testRepairCellMissingOnTarget() throws Exception { final long ts = base + 1L; - // Source: row K with NAME and NAME_VALUE. - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) { - scnSrc.createStatement().execute("UPSERT INTO " + uniqueTableName - + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); - scnSrc.commit(); - } - // Target: row K with only NAME_VALUE matching source. NAME is absent. - try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) { - scnTgt.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); - scnTgt.commit(); - } + // Source: NAME and NAME_VALUE. + upsertAtScnSource(ts, "UPSERT INTO " + uniqueTableName + + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); + // Target: only NAME_VALUE — NAME is missing. + upsertAtScnTarget(ts, + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); - // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis()) - // covers the planted cells. - while (System.currentTimeMillis() <= ts) { - // spin - } RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(System.currentTimeMillis())); + "--to-time", String.valueOf(waitUntilWallClockPasses(ts))); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - assertRepairCellCounters(result.repairJob.getCounters(), 1, 0, 0, 0); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); + assertRepairCellCounters(repairCounters, 1, 0, 0, 0); - // Post-repair: target's NAME should equal source's "alice". - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, rowId); - try (ResultSet rs = ps.executeQuery()) { - assertTrue(rs.next()); - assertEquals("alice", rs.getString(1)); - } - } + assertTargetName(uniqueTableName, rowId, "alice"); + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); } /** - * P2 (cell-extra branch): same row exists on both sides, target has an extra column source - * lacks. {@code generateMutationForDiffCells} should tombstone it via the - * {@code cellExtra++} branch ({@code tombstoneTargetCell} with {@code sourceMaxTs == null} - * → {@code Delete.addColumns}). + * Cell extra on target: target has a column source lacks. Repair tombstones it via the + * {@code cellExtra++} branch. */ @Test public void testRepairCellExtraOnTarget() throws Exception { @@ -1845,47 +1908,75 @@ public void testRepairCellExtraOnTarget() throws Exception { final long ts = base + 1L; - // Source: row K with only NAME_VALUE. - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) { - scnSrc.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); - scnSrc.commit(); - } - // Target: row K with same NAME_VALUE plus an extra raw NAME cell at the same ts. - try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) { - scnTgt.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); - scnTgt.commit(); - } + // Source: only NAME_VALUE. + upsertAtScnSource(ts, + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); + // Target: matching NAME_VALUE plus an extra raw NAME cell that source doesn't have. + upsertAtScnTarget(ts, + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); writeRawCell(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", ts, Bytes.toBytes("bob")); - // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis()) - // covers the planted cells. - while (System.currentTimeMillis() <= ts) { - // spin - } RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(System.currentTimeMillis())); + "--to-time", String.valueOf(waitUntilWallClockPasses(ts))); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - assertRepairCellCounters(result.repairJob.getCounters(), 0, 1, 0, 0); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); + assertRepairCellCounters(repairCounters, 0, 1, 0, 0); - // Post-repair: target's NAME should be tombstoned and read as null. - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, rowId); - try (ResultSet rs = ps.executeQuery()) { - assertTrue(rs.next()); - assertNull(rs.getString(1)); - } - } + assertTargetNameNull(uniqueTableName, rowId); + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); + } + + /** + * {@code tombstoneTargetCell} Case 2 ({@code sourceMaxTs >= ts}): target has a stray older + * version of a column that source also has at a higher ts. Repair must point-Delete the + * stray version only — no hidden-version sweep — so the visible NAME stays at source's value. + */ + @Test + public void testRepairTombstonesTargetExtraVersionAtSameColumn() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 2, "3, 7"); + + final long olderTs = base + 1L; + final long ts = base + 2L; + + // Source and target: NAME='alice' at ts. + upsertAtScnBoth(ts, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + // Target also gets a stray older raw NAME@olderTs that source doesn't have. + writeRawCell(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", olderTs, + Bytes.toBytes("old")); + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", + "--to-time", String.valueOf(waitUntilWallClockPasses(ts)), "--raw-scan", + "--read-all-versions"); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); + // Only the stray older NAME cell counts as extra; sourceMaxTs >= ts ⇒ single point-Delete. + assertRepairCellCounters(repairCounters, 0, 1, 0, 0); + + assertTargetName(uniqueTableName, rowId, "alice"); + + // Raw scan: exactly one Delete marker at olderTs, no Delete at ts. + RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId); + assertEquals("Exactly one tombstone for the stray older NAME version", 1, + summary.totalDeletes()); + assertEquals("Tombstone must target stray version only", olderTs, + (long) summary.deleteTimestamps.get(0)); + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); } /** - * P2 (cell-different branch): same row, same {@code (cf, q, ts)} coords, different value. - * {@code generateMutationForDiffCells} should hit the {@code cellDifferent++} branch via the - * {@code !matchingValue} check at the head of the loop. + * Cell different on target: same {@code (cf, q, ts)}, different value. Repair hits the + * {@code cellDifferent++} branch. */ @Test public void testRepairCellDifferentValue() throws Exception { @@ -1894,48 +1985,94 @@ public void testRepairCellDifferentValue() throws Exception { final long ts = base + 1L; - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) { - scnSrc.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); - scnSrc.commit(); - } - try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) { - scnTgt.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')"); - scnTgt.commit(); - } + // Source: NAME='alice' at ts. + upsertAtScnSource(ts, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + // Target: NAME='bob' at the same ts — same (cf, q, ts), different value. + upsertAtScnTarget(ts, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')"); - // Spin until wall-clock advances past ts so --to-time (defaulting to currentTimeMillis()) - // covers the planted cells. - while (System.currentTimeMillis() <= ts) { - // spin - } RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(System.currentTimeMillis())); + "--to-time", String.valueOf(waitUntilWallClockPasses(ts))); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - assertRepairCellCounters(result.repairJob.getCounters(), 0, 0, 1, 0); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); + assertRepairCellCounters(repairCounters, 0, 0, 1, 0); - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + assertTargetName(uniqueTableName, rowId, "alice"); + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); + } + + /** + * Multi-column-family repair: drift simultaneously in {@code CF1} (cell missing on target) + * and {@code CF2} (cell extra on target). Pins that cell-level repair scopes mutations to + * the correct family — the {@code (family, qualifier)} {@code ColumnKey} keying must keep + * the two families' cells from clobbering each other. + */ + @Test + public void testRepairAcrossMultipleColumnFamilies() throws Exception { + final int rowId = 5; + long base = createMultiColumnFamilyTableOnBothClusters(uniqueTableName); + + final long ts = base + 1L; + + // Source: A in CF1 set, B in CF2 unset. + upsertAtScnSource(ts, + "UPSERT INTO " + uniqueTableName + " (ID, CF1.A) VALUES (" + rowId + ", 'a-src')"); + // Target: A missing in CF1, B set in CF2 (extra) — drift in both families at once. + upsertAtScnTarget(ts, + "UPSERT INTO " + uniqueTableName + " (ID, CF2.B) VALUES (" + rowId + ", 'b-tgt')"); + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", + "--to-time", String.valueOf(waitUntilWallClockPasses(ts))); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + Counters c = result.repairJob.getCounters(); + // CF1.A missing → +1, CF2.B extra → +1; nothing different / unrepairable. + assertTrue("CF1.A must mirror as missing", + c.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue() >= 1); + assertTrue("CF2.B must tombstone as extra", + c.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 1); + assertEquals("No row should be unrepairable", 0, + c.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue()); + + try (PreparedStatement ps = targetConnection.prepareStatement( + "SELECT CF1.A, CF2.B FROM " + uniqueTableName + " WHERE ID = ?")) { ps.setInt(1, rowId); try (ResultSet rs = ps.executeQuery()) { assertTrue(rs.next()); - assertEquals("alice", rs.getString(1)); + assertEquals("CF1.A mirrored from source", "a-src", rs.getString(1)); + assertNull("CF2.B tombstoned to match source", rs.getString(2)); } } + + // Multi-CF schema doesn't match verifyDataIdentical's standard query — compare inline. + assertEquals("Source/target rows must match across both column families", + collectMultiCfRows(sourceConnection, uniqueTableName), + collectMultiCfRows(targetConnection, uniqueTableName)); + assertNoMismatchedCheckpoints(uniqueTableName, null); + } + + private List collectMultiCfRows(Connection conn, String tableName) throws SQLException { + List rows = new ArrayList<>(); + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery( + "SELECT ID, CF1.A, CF2.B FROM " + tableName + " ORDER BY ID")) { + while (rs.next()) { + rows.add(rs.getInt(1) + "|" + rs.getString(2) + "|" + rs.getString(3)); + } + } + return rows; } /** - * P6 (asymmetric load-target time-range): Source has Put at T=200 inside the user's window; - * target has a {@code DeleteColumn} planted at T=600 — strictly above {@code --to-time T=500}. - * The repair scan honors {@code --to-time} and never sees the tombstone in the diff window, so - * the diff routes to {@code mirrorWholeRow} in - * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer}. Inside, - * {@code TargetRowRecord.load} deliberately uses {@code (fromTime, MAX_VALUE)} — so it - * still sees the T=600 tombstone and {@code wouldShadow} returns true on Source's Put@T=200 - * (DeleteColumn covers ts <= T=600). Result: source's mirror is suppressed, the row is - * flagged unrepairable. + * Tombstone planted strictly above {@code --to-time}: the diff scan can't see it, but + * {@code TargetRowRecord.load} (which uses {@code [fromTime, MAX_VALUE]}) does — so + * {@code wouldShadow} suppresses source's NAME mirror. The empty-key cell still mirrors, + * giving the row visible existence; NAME stays null. */ @Test public void testRepairShadowFromTombstoneAboveToTime() throws Exception { @@ -1947,60 +2084,35 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception { final long toTime = base + 2L; final long tombstoneTs = base + 3L; - // Source has the row inside the diff window. - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { - scnSrc.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); - scnSrc.commit(); - } - // Target has a tombstone strictly above --to-time. Diff scan won't see it; - // TargetRowRecord.load still will. + // Source: NAME='alice' at sourceTs (within --to-time window). + upsertAtScnSource(sourceTs, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + // Target: DeleteColumn on NAME at tombstoneTs > --to-time — invisible to the diff scan. writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", tombstoneTs); - // Phoenix requires --to-time <= currentTimeMillis() at tool-run. Spin until wall-clock - // moves past tombstoneTs (the highest cell ts in this test) so toTime is unambiguously in - // the past. - while (System.currentTimeMillis() <= tombstoneTs) { - // spin - } + waitUntilWallClockPasses(tombstoneTs); RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan"); - assertTrue("Repair should succeed (shadowing is correctness-only)", - result.repairJob.isSuccessful()); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - Counters c = result.repairJob.getCounters(); - // Phoenix UPSERT plants NAME *and* the empty-key cell ("_0"). DeleteColumn shadows only NAME — - // "_0" mirrors through (rowsMissing++), and the row is unrepairable because NAME was suppressed. - assertRepairRowCounters(c, 1, 0, 1); - assertTrue("At least one mapper should roll up to UNREPAIRABLE", - c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1); + SyncCountersResult c = getSyncCounters(result.repairJob); + // The empty-key cell mirrors (rowsMissing++); NAME is shadow-suppressed (rowsCannotRepair++). + assertRowDriftCounters(c, 1, 0, 0, 1); + assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1); - // Post-repair: target's NAME should still read as null. The shadow was respected, so no Put - // for NAME landed (only the empty-key cell, which gives the row visible existence with NAME - // covered by the DeleteColumn tombstone above it). - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, rowId); - try (ResultSet rs = ps.executeQuery()) { - assertTrue("Row exists on target via mirrored empty-key cell", rs.next()); - assertNull("NAME should still be null — DeleteColumn shadow respected", rs.getString(1)); - } - } + assertTargetNameNull(uniqueTableName, rowId); + + // NAME shadow leaves target NAME=null while source has 'alice' — Phoenix SELECT diverges. + verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); } /** - * Shadow detection via {@code DeleteFamily}: target has a {@code DeleteFamily} tombstone on - * cf {@code "0"} covering every qualifier in the family at {@code ts <= tombstoneTs}. Source - * has every cell of the row inside the diff window at {@code ts < tombstoneTs}, so each - * source cell would be shadowed if mirrored. Drives the - * {@code TargetRowRecord.deleteFamilyUpperBound} branch in {@code wouldShadow} — uncovered - * by other shadow ITs which only exercise {@code DeleteColumn}. - * - *

    To force the {@code cmp < 0} (whole-row mirror) path, the tombstone is planted strictly - * above {@code --to-time} so the diff scan does not see target's row at all, but - * {@code TargetRowRecord.load} (range {@code [fromTime, MAX_VALUE]}) still surfaces it. + * Shadow via {@code DeleteFamily}: tombstone covers every qualifier in cf {@code "0"} at + * {@code ts <= tombstoneTs}, planted strictly above {@code --to-time} so only + * {@code TargetRowRecord.load} sees it. Every source cell mirror is suppressed → row rolls + * up unrepairable. */ @Test public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception { @@ -2012,58 +2124,34 @@ public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception { final long toTime = base + 2L; final long familyTombstoneTs = base + 3L; - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { - scnSrc.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); - scnSrc.commit(); - } - // DeleteFamily on cf "0" — covers every qualifier (NAME, NAME_VALUE, _0, ...) at - // ts <= familyTombstoneTs. Planted strictly above --to-time so the diff scan can't see it. + // Source: NAME='alice' at sourceTs. + upsertAtScnSource(sourceTs, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + // Target: DeleteFamily on cf "0" above --to-time — covers every source qualifier on load. writeRawDeleteFamily(targetConnection, uniqueTableName, integerRowKey(rowId), "0", familyTombstoneTs); - while (System.currentTimeMillis() <= familyTombstoneTs) { - // spin - } + waitUntilWallClockPasses(familyTombstoneTs); RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan"); - assertTrue("Repair should succeed (shadowing is correctness-only)", - result.repairJob.isSuccessful()); - - Counters c = result.repairJob.getCounters(); - // Both source cells (NAME and _0) live at sourceTs in cf "0"; DeleteFamily covers the whole - // family at ts <= familyTombstoneTs (sourceTs < familyTombstoneTs), so every mirror is - // suppressed → mirrorWholeRow returns FULLY_SHADOWED → rowsMissing stays 0, - // rowsCannotRepair++. - assertRepairRowCounters(c, 0, 0, 1); - assertTrue("At least one mapper should roll up to UNREPAIRABLE", - c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1); - - // Post-repair: target should still have no visible row — every source cell was suppressed. - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, rowId); - try (ResultSet rs = ps.executeQuery()) { - assertFalse("Row should not be visible on target — DeleteFamily covered every source cell", - rs.next()); - } - } + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + SyncCountersResult c = getSyncCounters(result.repairJob); + assertRowDriftCounters(c, 0, 0, 0, 1); + assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1); + + assertTargetRowAbsent(uniqueTableName, rowId); + + // Source has the row, target's family is shadowed away — Phoenix SELECT must diverge. + verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); } /** - * Shadow detection via {@code DeleteFamilyVersion}: target has a {@code DeleteFamilyVersion} - * tombstone on cf {@code "0"} at exactly {@code sourceTs}. Source's cells at the same ts get - * shadowed because {@code DeleteFamilyVersion} matches every qualifier in the family at the - * exact ts. Drives the {@code TargetRowRecord.deleteFamilyVersionTs} branch in - * {@code wouldShadow} — also uncovered prior to this test. - * - *

    {@code DeleteFamilyVersion} requires ts equality (not inequality) so the - * tombstone-above-{@code toTime} trick used in the {@code DeleteColumn}/{@code DeleteFamily} - * shadow tests doesn't apply here. Instead we omit {@code --raw-scan}: target has no live - * cells (just the tombstone), so without raw mode the diff scan sees target as empty, - * routing to the {@code cmp < 0} mirrorWholeRow path; {@code TargetRowRecord.load} runs - * raw internally and surfaces the tombstone for shadow detection. + * Shadow via {@code DeleteFamilyVersion}: tombstone matches every qualifier in cf {@code "0"} + * at exactly {@code sourceTs}. Run without {@code --raw-scan} so the diff scan sees target as + * empty (no live cells); {@code TargetRowRecord.load} runs raw internally and still surfaces + * the tombstone for the {@code wouldShadow} check. */ @Test public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception { @@ -2073,48 +2161,33 @@ public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception { final long fromTime = 0L; final long sourceTs = base + 1L; - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { - scnSrc.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); - scnSrc.commit(); - } - // DeleteFamilyVersion on cf "0" at exactly sourceTs — covers every qualifier in the family - // at ts == sourceTs. Source's NAME and _0 cells, both Put at sourceTs, are shadow targets. + // Source: NAME='alice' at sourceTs. + upsertAtScnSource(sourceTs, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); + // Target: DeleteFamilyVersion at exactly sourceTs — shadows every source cell at that ts. writeRawDeleteFamilyVersion(targetConnection, uniqueTableName, integerRowKey(rowId), "0", sourceTs); - while (System.currentTimeMillis() <= sourceTs) { - // spin - } - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", - String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis())); - assertTrue("Repair should succeed (shadowing is correctness-only)", - result.repairJob.isSuccessful()); + String.valueOf(fromTime), "--to-time", + String.valueOf(waitUntilWallClockPasses(sourceTs))); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - Counters c = result.repairJob.getCounters(); - assertRepairRowCounters(c, 0, 0, 1); - assertTrue("At least one mapper should roll up to UNREPAIRABLE", - c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1); + SyncCountersResult c = getSyncCounters(result.repairJob); + assertRowDriftCounters(c, 0, 0, 0, 1); + assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1); - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, rowId); - try (ResultSet rs = ps.executeQuery()) { - assertFalse("Row should not be visible on target — DeleteFamilyVersion shadowed every " - + "source cell at sourceTs", rs.next()); - } - } + assertTargetRowAbsent(uniqueTableName, rowId); + + // Source has 'alice', target's row is fully shadowed — Phoenix SELECT must diverge. + verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); } /** - * Multi-hidden-version unwinding: extends {@link #testRepairUnwindsHiddenTargetVersions} - * with TWO max-versions-hidden Puts beneath target's visible Put. Pins - * {@code targetPutTimestampsBetween} (in {@link - * org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.TargetRowRecord}) - * end-to-end: when {@code sourceMaxTs < target visible ts}, the repairer must point-Delete - * the visible ts AND every hidden Put in {@code (sourceMaxTs, target visible ts)} — - * otherwise after we shadow the visible cell, a hidden Put surfaces above source's mirror. + * Multi-hidden-version unwinding: target has THREE NAME versions (two hidden, one visible) + * above source's single Put. The repairer must point-Delete the visible Put AND every hidden + * Put in {@code (sourceMaxTs, visibleTs)} — otherwise unwinding the visible cell surfaces a + * hidden Put above source's mirror. */ @Test public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception { @@ -2127,114 +2200,54 @@ public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception { final long targetT2 = base + 3L; final long targetT3 = base + 4L; - byte[] rowKey = integerRowKey(rowId); - String family = "0"; - String qualifier = "NAME"; - - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), sourceTs)) { - scnSrc.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); - scnSrc.commit(); - } - - // Three target NAME versions, all retained under VERSIONS=3: - // T1 "bob" (hidden), T2 "carol" (hidden), T3 "dave" (visible). - try (Connection scnTgtT1 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT1)) { - scnTgtT1.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')"); - scnTgtT1.commit(); - } - try (Connection scnTgtT2 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT2)) { - scnTgtT2.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')"); - scnTgtT2.commit(); - } - try (Connection scnTgtT3 = openConnectionAtScn(CLUSTERS.getZkUrl2(), targetT3)) { - scnTgtT3.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'dave')"); - scnTgtT3.commit(); - } + // Source: single NAME Put. + upsertAtScnSource(sourceTs, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'alice')"); - // Sanity: pre-repair target visible NAME is "dave" (newest of the three). - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, rowId); - try (ResultSet rs = ps.executeQuery()) { - assertTrue(rs.next()); - assertEquals("Pre-repair target visible NAME should be dave", "dave", rs.getString(1)); - } - } + // Target: three NAME versions retained under VERSIONS=3 — "bob"@T1 (hidden), "carol"@T2 + // (hidden), "dave"@T3 (visible). + upsertAtScnTarget(targetT1, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')"); + upsertAtScnTarget(targetT2, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'carol')"); + upsertAtScnTarget(targetT3, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'dave')"); - while (System.currentTimeMillis() <= targetT3) { - // spin - } + assertTargetName(uniqueTableName, rowId, "dave"); RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", - String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), + String.valueOf(fromTime), "--to-time", String.valueOf(waitUntilWallClockPasses(targetT3)), "--read-all-versions"); assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - Counters repairCounters = result.repairJob.getCounters(); - // Source has one NAME Put@sourceTs; target has three NAME Puts at T1, T2, T3 (all > - // sourceTs). Each target NAME cell drives the cellExtra branch (sourceMaxTs < ts) and the - // hidden-version unwinding logic point-Deletes every Put in (sourceTs, ts). + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); assertTrue("At least 3 cells should be tombstoned across target's three NAME versions", - repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 3); - - // Post-repair: standard read on target must see source's "alice" — every hidden version - // ("bob"@T1, "carol"@T2) was unwound along with the visible "dave"@T3. - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, rowId); - try (ResultSet rs = ps.executeQuery()) { - assertTrue(rs.next()); - assertEquals("Post-repair target NAME must be alice (all hidden versions unwound)", - "alice", rs.getString(1)); - } + repairCounters.cellsExtraOnTarget >= 3); + + assertTargetName(uniqueTableName, rowId, "alice"); + + // Lower bound on delete markers: the unwind iterates over shrinking intervals so T1 may + // appear in both T3's and T2's hidden sets — distinct-marker count can exceed 3. + RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId); + assertTrue("Expected at least 3 NAME delete markers on target, saw " + + summary.totalDeletes(), summary.totalDeletes() >= 3); + int namePutAtSourceTs = 0; + for (Long ts : summary.putTimestamps) { + if (ts == sourceTs) namePutAtSourceTs++; } + assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs); - // Raw scan: target should have a Delete marker covering each of T1/T2/T3 plus source's - // mirror. The exact tombstone cell count may include duplicates from the unwind logic - // (each iteration's hidden-set spans a shrinking interval, so T1 appears in T3's hidden - // set, then again in T2's), so assert a lower bound on distinct delete-bearing scan cells. - try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) { - Scan scan = new Scan().withStartRow(rowKey, true).withStopRow(rowKey, true).setRaw(true); - scan.readAllVersions(); - int nameDeletes = 0; - int namePutAtSourceTs = 0; - try (ResultScanner sc = targetHTable.getScanner(scan)) { - for (Result r; (r = sc.next()) != null;) { - for (Cell c : r.rawCells()) { - if (Bytes.equals(CellUtil.cloneFamily(c), Bytes.toBytes(family)) - && Bytes.equals(CellUtil.cloneQualifier(c), Bytes.toBytes(qualifier))) { - if (CellUtil.isDelete(c)) { - nameDeletes++; - } else if (c.getTimestamp() == sourceTs) { - namePutAtSourceTs++; - } - } - } - } - } - assertTrue("Expected at least 3 NAME delete markers on target, saw " + nameDeletes, - nameDeletes >= 3); - assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs); - } + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); } /** - * cmp==0 row carrying a target-only tombstone cell at a coord source lacks: drives the - * {@code tombstoneTargetCell} return-{@code false} branch ({@link - * org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer}, around line 420) - * inside {@code generateMutationForDiffCells}'s {@code cellExtra} branch — the target cell - * is itself a tombstone, so no new tombstone is emitted, but {@code anyCellUnrepairable} is - * set. The row contributes to {@code ROWS_CANNOT_REPAIR} without bumping any cell counter. - * - *

    Setup: source and target share a matching {@code NAME_VALUE} cell so the row exists on - * both. Target also has a raw point-{@link Delete} on {@code NAME} at a coord source lacks; - * with {@code --raw-scan} the diff scan surfaces that tombstone cell, taking the - * {@code cmp > 0} branch on it. + * Same row on both sides via a matching NAME_VALUE; target also carries a raw point-Delete on + * NAME that source lacks. Under {@code --raw-scan} the tombstone surfaces as a target-extra + * cell, but {@code tombstoneTargetCell} can't tombstone a tombstone — row rolls up + * unrepairable with no cell counter ticks. */ @Test public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception { @@ -2244,52 +2257,34 @@ public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception { final long ts = base + 1L; final long tombstoneTs = base + 2L; - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), ts)) { - scnSrc.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); - scnSrc.commit(); - } - try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), ts)) { - scnTgt.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); - scnTgt.commit(); - } - // Plant a raw point-Delete on NAME at tombstoneTs — a coord source has no cell at, but the - // target cell is itself a tombstone, so tombstoneTargetCell returns false in repair-mode. + // Source: NAME_VALUE only. + upsertAtScnSource(ts, + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); + // Target: matching NAME_VALUE plus a raw point-Delete on NAME — surfaces under --raw-scan. + upsertAtScnTarget(ts, + "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); writeRawPointDelete(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", tombstoneTs); - while (System.currentTimeMillis() <= tombstoneTs) { - // spin - } - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(System.currentTimeMillis()), "--raw-scan"); + "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - Counters c = result.repairJob.getCounters(); - // No mirror, no tombstone emitted — but anyCellUnrepairable was set, so the row rolls up - // as unrepairable. All cell counters stay 0. + SyncCountersResult c = getSyncCounters(result.repairJob); assertRepairCellCounters(c, 0, 0, 0, 1); - assertRepairRowCounters(c, 0, 0, 1); - assertTrue("At least one mapper should roll up to UNREPAIRABLE", - c.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue() >= 1); + assertRowDriftCounters(c, 0, 0, 0, 1); + assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1); } /** - * P7 (mid-row repair-batch flush boundary): drives many missing-row mirrors through a tiny - * {@code repairBatchSize=2} so {@code generateMutationForDiffRows} flushes mid-stream multiple - * times. Validates that every row converges despite the mid-flush boundary — i.e., no Put - * gets dropped because pendingPuts/pendingDeletes were drained mid-iteration. + * Mid-row repair flush: 8 missing source-only rows through {@code repairBatchSize=2} so + * {@code generateMutationForDiffRows} flushes mid-stream. Pins that no Put is dropped at a + * batch boundary. */ @Test public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception { - // No replication — seed source manually so target legitimately lacks the rows. createRepairTestTableOnBothClusters(uniqueTableName, 1, null); - // Introduce extra rows on source that target lacks. Each row → at least 2 cells (NAME and the - // empty-key cell), so a batch size of 2 forces a flush every row, exercising the mid-stream - // flush in generateMutationForDiffRows. int[] sourceOnlyIds = new int[] { 100, 101, 102, 103, 104, 105, 106, 107 }; String[] sourceOnlyNames = new String[sourceOnlyIds.length]; for (int i = 0; i < sourceOnlyIds.length; i++) { @@ -2303,47 +2298,34 @@ public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception { Configuration conf = sourceClusterConfWithRepairBatchSize(2); - // Stage 1: dry-run. Job dryRunJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--dry-run", "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); assertTrue("Dry-run should succeed", dryRunJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob); assertTrue("Dry-run should detect mismatched chunks", dryRunCounters.chunksMismatched >= 1); - // Stage 2: repair with the same small batch size. Job repairJob = runSyncToolWithChunkSize(uniqueTableName, 1024, conf, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); assertTrue("Repair should succeed despite small batch size", repairJob.isSuccessful()); - Counters repairCounters = repairJob.getCounters(); + SyncCountersResult repairCounters = getSyncCounters(repairJob); assertTrue("All source-only rows should be marked missing on target", - repairCounters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue() - >= sourceOnlyIds.length); - assertEquals("No row should be flagged unrepairable", 0, - repairCounters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue()); + repairCounters.rowsMissingOnTarget >= sourceOnlyIds.length); + assertEquals("No row should be flagged unrepairable", 0, repairCounters.rowsCannotRepair); - // Verify each source-only row landed on target with the right NAME. for (int i = 0; i < sourceOnlyIds.length; i++) { - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, sourceOnlyIds[i]); - try (ResultSet rs = ps.executeQuery()) { - assertTrue("Row " + sourceOnlyIds[i] + " should exist on target after repair", - rs.next()); - assertEquals("Row " + sourceOnlyIds[i] + " NAME should match source", - sourceOnlyNames[i], rs.getString(1)); - } - } + assertTargetName(uniqueTableName, sourceOnlyIds[i], sourceOnlyNames[i]); } + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); } /** - * P8 ({@code --raw-scan} + {@code --read-all-versions} interplay): a multi-version row on - * source that includes an in-window {@code DeleteColumn} between two Puts. Target lags with - * only the older Put. Repair must mirror the missing tombstone (preserving its subtype via - * {@code mirrorSourceCell} in - * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer} routing Delete cells - * through {@code Delete#add}) and the missing newer Put. + * {@code --raw-scan} + {@code --read-all-versions}: source has Put@T1, DeleteColumn@T2, + * Put@T3; target has only Put@T1. Repair must mirror the missing tombstone (via + * {@code mirrorSourceCell} routing Delete cells through {@code Delete#add}) and the missing + * newer Put. */ @Test public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exception { @@ -2356,95 +2338,191 @@ public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exceptio final long t3 = base + 3L; // Source: Put@T1 → DeleteColumn@T2 → Put@T3. - try (Connection scnSrc = openConnectionAtScn(CLUSTERS.getZkUrl1(), t1)) { - scnSrc.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')"); - scnSrc.commit(); - } + upsertAtScnSource(t1, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')"); writeRawDeleteColumn(sourceConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", t2); - try (Connection scnSrc2 = openConnectionAtScn(CLUSTERS.getZkUrl1(), t3)) { - scnSrc2.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v3')"); - scnSrc2.commit(); - } + upsertAtScnSource(t3, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v3')"); // Target: only the oldest Put@T1. - try (Connection scnTgt = openConnectionAtScn(CLUSTERS.getZkUrl2(), t1)) { - scnTgt.createStatement().execute( - "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')"); - scnTgt.commit(); - } + upsertAtScnTarget(t1, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')"); RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), "--raw-scan", "--read-all-versions"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - Counters c = result.repairJob.getCounters(); - // Source has NAME@T1 Put, NAME@T2 DeleteColumn, NAME@T3 Put + empty-key@T1 and @T3. Target has - // only NAME@T1 + empty-key@T1, so 3 cells missing: NAME-tombstone@T2, NAME-Put@T3, empty-key@T3. + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult c = getSyncCounters(result.repairJob); + // 3 missing: NAME tombstone@T2, NAME Put@T3, empty-key@T3. assertRepairCellCounters(c, 3, 0, 0, 0); - // Post-repair raw scan on target should show: Put@T3, DeleteColumn@T2, Put@T1 for the NAME - // qualifier (rawCells reverse-ts ordered). - int observedPuts = 0; - int observedDeleteColumns = 0; - long observedNewestPutTs = -1L; - try (Table targetHTable = getHBaseTable(targetConnection, uniqueTableName)) { - Scan scan = new Scan(); - scan.withStartRow(integerRowKey(rowId), true); - scan.withStopRow(integerRowKey(rowId), true); - scan.setRaw(true); - scan.readAllVersions(); - try (ResultScanner scanner = targetHTable.getScanner(scan)) { - Result r = scanner.next(); - assertNotNull("Target row should exist", r); - for (Cell cell : r.rawCells()) { - if (Bytes.equals(CellUtil.cloneQualifier(cell), Bytes.toBytes("NAME"))) { - if (CellUtil.isDelete(cell)) { - observedDeleteColumns++; - } else { - observedPuts++; - observedNewestPutTs = Math.max(observedNewestPutTs, cell.getTimestamp()); - } - } - } + RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId); + assertEquals("Target should have both NAME Puts after repair", 2, summary.puts); + assertEquals("Target should have the mirrored DeleteColumn after repair", 1, + summary.deleteColumns); + assertEquals("Newest mirrored Put should sit at T3", t3, summary.newestPutTs); + + // Visible read sees "v3" — newest Put@T3 sits above the DeleteColumn@T2. + assertTargetName(uniqueTableName, rowId, "v3"); + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c); + } + + /** + * Hidden source-only Put under {@code --read-all-versions}: source has Put(NAME)@T1 and + * Put(NAME)@T2; target has only Put(NAME)@T2. Verifier sees the older T1 cell only because + * {@code readAllVersions} surfaces it; repair must mirror the missing source@T1 cell at its + * original timestamp so target's history matches source. + */ + @Test + public void testRepairMirrorsHiddenSourceVersionWhenTargetHasOnlyNewest() throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 2, "3, 7"); + + final long t1 = base + 1L; + final long t2 = base + 2L; + + // Source: NAME='older'@T1 then NAME='newer'@T2 — both retained under VERSIONS=2. + upsertAtScnSource(t1, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'older')"); + upsertAtScnSource(t2, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'newer')"); + // Target: only the newer Put@T2; the older T1 version is missing. + upsertAtScnTarget(t2, + "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'newer')"); + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", + "--to-time", String.valueOf(waitUntilWallClockPasses(t2)), "--read-all-versions"); + assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult c = getSyncCounters(result.repairJob); + // Older NAME version is the only drift; mirror via cmp<0 source-only path. + assertTrue("Hidden source NAME@T1 must mirror as missing", c.cellsMissingOnTarget >= 1); + assertEquals("No extra cells", 0, c.cellsExtraOnTarget); + assertEquals("No row should be unrepairable", 0, c.rowsCannotRepair); + + // Visible NAME stays "newer"; raw-scan must surface both versions at original timestamps. + assertTargetName(uniqueTableName, rowId, "newer"); + RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId); + assertTrue("Mirrored older NAME Put@T1 must land at original timestamp", + summary.putTimestamps.contains(t1)); + assertTrue("Existing newer NAME Put@T2 must remain", + summary.putTimestamps.contains(t2)); + + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c); + } + + /** + * Source-only point Delete (single-version tombstone) must mirror onto target via + * {@code mirrorSourceCell} → target's NAME at {@code ts} is shadowed; NAME_VALUE survives. + */ + @Test + public void testRepairMirrorsSourcePointDeleteUnderRawScan() throws Exception { + runMirrorSourceTombstoneTest(SourceTombstone.POINT_DELETE); + } + + /** + * Source-only DeleteFamily must mirror onto target → whole family at the target row is + * shadowed and the row drops out of Phoenix view. + */ + @Test + public void testRepairMirrorsSourceDeleteFamilyUnderRawScan() throws Exception { + runMirrorSourceTombstoneTest(SourceTombstone.DELETE_FAMILY); + } + + /** + * Source-only DeleteFamilyVersion must mirror onto target → all family cells at exactly + * {@code ts} are shadowed; preserves DFV semantics (not flattened to DeleteColumn). + */ + @Test + public void testRepairMirrorsSourceDeleteFamilyVersionUnderRawScan() throws Exception { + runMirrorSourceTombstoneTest(SourceTombstone.DELETE_FAMILY_VERSION); + } + + private enum SourceTombstone { + POINT_DELETE, + DELETE_FAMILY, + DELETE_FAMILY_VERSION + } + + private void runMirrorSourceTombstoneTest(SourceTombstone subtype) throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + final long ts = base + 1L; + // POINT_DELETE / DFV must hit cells at exactly `ts`; DeleteFamily covers ts <= markerTs. + final long tombstoneTs = subtype == SourceTombstone.DELETE_FAMILY ? base + 2L : ts; + + for (String zkUrl : new String[] { CLUSTERS.getZkUrl1(), CLUSTERS.getZkUrl2() }) { + try (Connection scn = openConnectionAtScn(zkUrl, ts)) { + scn.createStatement().execute("UPSERT INTO " + uniqueTableName + + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); + scn.commit(); } } - assertEquals("Target should have both NAME Puts after repair", 2, observedPuts); - assertEquals("Target should have the mirrored DeleteColumn after repair", 1, - observedDeleteColumns); - assertEquals("Newest mirrored Put should sit at T3", t3, observedNewestPutTs); - // Read-side: NAME under default visibility should now be null (T3 Put → T2 DeleteColumn covers - // T1; visible state is "deleted but tombstone caps NAME"). Phoenix sees no value. - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + uniqueTableName + " WHERE ID = ?")) { + byte[] rk = integerRowKey(rowId); + switch (subtype) { + case POINT_DELETE: + writeRawPointDelete(sourceConnection, uniqueTableName, rk, "0", "NAME", tombstoneTs); + break; + case DELETE_FAMILY: + writeRawDeleteFamily(sourceConnection, uniqueTableName, rk, "0", tombstoneTs); + break; + case DELETE_FAMILY_VERSION: + writeRawDeleteFamilyVersion(sourceConnection, uniqueTableName, rk, "0", tombstoneTs); + break; + default: + throw new IllegalStateException("unhandled subtype: " + subtype); + } + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", + "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan"); + assertTrue(result.dryRunJob.isSuccessful()); + assertTrue(result.repairJob.isSuccessful()); + + Counters c = result.repairJob.getCounters(); + // The source-only Delete cell is missing on target → mirror it. DeleteFamily mirroring + // also covers the empty-key sentinel cell, hence the >= 1 assertion shape. + assertTrue("source tombstone must mirror as a missing cell on target", + c.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue() >= 1); + assertTrue(c.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue() >= 1); + assertNoMismatchedCheckpoints(uniqueTableName, null); + + try (PreparedStatement ps = targetConnection.prepareStatement( + "SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) { ps.setInt(1, rowId); try (ResultSet rs = ps.executeQuery()) { - // The newest Put is T3 — reads see "v3" since T3 > tombstone T2. - assertTrue(rs.next()); - assertEquals("v3", rs.getString(1)); + if (subtype == SourceTombstone.POINT_DELETE) { + assertTrue(rs.next()); + assertNull(rs.getString(1)); + assertEquals(99L, rs.getLong(2)); + } else { + // Family-wide tombstone (DELETE_FAMILY or DELETE_FAMILY_VERSION) drops the row. + assertFalse("row should not be visible after family-wide tombstone mirror", + rs.next()); + } } } + + // Source tombstone is now mirrored onto target; both clusters present the same Phoenix view. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); } /** - * P9 (mixed Put+Delete batch under small {@code repairBatchSize}): many missing source rows AND - * many extra target rows in the same chunk. With {@code repairBatchSize=4}, most flushes - * straddle a Put/Delete boundary and exercise {@code flushRepairMutations} in - * {@link org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer} on its mixed - * {@code Table#batch} path. Validates that no mutation gets dropped at the batch boundary. + * Mixed Put+Delete batch under {@code repairBatchSize=4}: 5 source-only rows + 5 target-only + * rows in the same chunk, so most flushes straddle a Put/Delete boundary on + * {@code flushRepairMutations}'s mixed {@code Table#batch} path. Pins that no mutation drops + * at the batch boundary. */ @Test public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception { - // No replication — seed each side independently so source-only and target-only rows truly - // diverge. createRepairTestTableOnBothClusters(uniqueTableName, 1, null); - // Add 5 source-only rows (will need Puts) and 5 target-only rows (will need Deletes), all - // inside the same chunk so they queue together in a single mapper's pendingPuts/pendingDeletes - // and get flushed as mixed batches. int[] sourceOnly = new int[] { 200, 201, 202, 203, 204 }; String[] sourceOnlyNames = new String[] { "s200", "s201", "s202", "s203", "s204" }; upsertRowsOnTarget(sourceConnection, uniqueTableName, sourceOnly, sourceOnlyNames); @@ -2482,13 +2560,8 @@ public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception { assertEquals("No chunk should fail repair", 0, c.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue()); - // Convergence pass: cleanup checkpoint and re-run a stable repair to assert no chunks are - // mismatched after the mixed-batch flushes. - cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); - assertNoMismatchedCheckpoints(uniqueTableName, null); - verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + // Convergence pass on a clean checkpoint to confirm no chunks remain mismatched. + convergeAndAssertIdentical(uniqueTableName, fromTime, toTime); } @Test @@ -2714,7 +2787,7 @@ public void testSyncTableWithMultipleVersionAndCompactionOnTarget() throws Excep } @Test - public void testSyncTableValidateWithSplitCoalescing() throws Exception { + public void testSyncTableWithSplitCoalescing() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 10); introduceAndVerifyTargetDifferences(uniqueTableName); @@ -2732,18 +2805,19 @@ public void testSyncTableValidateWithSplitCoalescing() throws Exception { validateSyncCounters(counters, 10, 10, 7, 3); validateMapperCounters(counters, 1, 3); + assertRowDriftCounters(counters, 0, 0, 3, 0); // Verify checkpoint entries from the dry-run pass are created correctly. - List checkpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - validateCheckpointEntries(checkpointEntries, uniqueTableName, targetZkQuorum, 10, 10, 7, 3, 4, - 3, null); + validateCheckpointEntries(uniqueTableName, null, counters, null); // Repair pass over the same window: MISMATCHED rows transition to REPAIRED in place. - runSyncTool(uniqueTableName, "--coalesce-split", "--from-time", String.valueOf(fromTime), - "--to-time", String.valueOf(toTime)); + Job repairJob = runSyncTool(uniqueTableName, "--coalesce-split", "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + SyncCountersResult repairCounters = getSyncCounters(repairJob); + assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0); + assertRepairCellCounters(repairCounters, 6, 6, 0, 0); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, counters, repairCounters); } /** @@ -3180,6 +3254,21 @@ private void verifyDataIdentical(Connection sourceConn, Connection targetConn, S } } + /** + * Negative complement of {@link #verifyDataIdentical}: asserts that source and target return + * different row sets via Phoenix SELECT. Use after a repair pass that is expected to leave + * residual divergence (UNREPAIRABLE / REPAIR_FAILED) so the test pins both that the tool + * reported the right status and that the data actually didn't converge. + */ + private void verifyDataDiverges(Connection sourceConn, Connection targetConn, String tableName) + throws SQLException { + String query = "SELECT ID, NAME, NAME_VALUE FROM " + tableName + " ORDER BY ID"; + List sourceRows = queryAllRows(sourceConn, query); + List targetRows = queryAllRows(targetConn, query); + assertNotEquals("Source and target rows should still differ when repair could not converge", + sourceRows, targetRows); + } + private void introduceAndVerifyTargetDifferences(String tableName) throws SQLException { upsertRowsOnTarget(targetConnection, tableName, new int[] { 2, 5, 8 }, new String[] { "MODIFIED_NAME_2", "MODIFIED_NAME_5", "MODIFIED_NAME_8" }); @@ -3495,8 +3584,8 @@ private void splitTableAt(Connection conn, String tableName, List split private List queryCheckpointTable(Connection conn, String tableName, String targetCluster, String tenantId) throws SQLException { List entries = new ArrayList<>(); - String query = "SELECT TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, IS_DRY_RUN, " - + "START_ROW_KEY, END_ROW_KEY, EXECUTION_START_TIME, EXECUTION_END_TIME, " + String query = "SELECT TABLE_NAME, TENANT_ID, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, " + + "IS_DRY_RUN, START_ROW_KEY, END_ROW_KEY, EXECUTION_START_TIME, EXECUTION_END_TIME, " + "STATUS, COUNTERS FROM PHOENIX_SYNC_TABLE_CHECKPOINT " + "WHERE TABLE_NAME = ? AND TARGET_CLUSTER = ? " + (tenantId != null ? "AND TENANT_ID = ?" : "AND TENANT_ID IS NULL"); @@ -3514,7 +3603,8 @@ private List queryCheckpointTable(Connectio String statusStr = rs.getString("STATUS"); PhoenixSyncTableCheckpointOutputRow entry = new PhoenixSyncTableCheckpointOutputRow.Builder() - .setTableName(rs.getString("TABLE_NAME")).setTargetCluster(rs.getString("TARGET_CLUSTER")) + .setTableName(rs.getString("TABLE_NAME")).setTenantId(rs.getString("TENANT_ID")) + .setTargetCluster(rs.getString("TARGET_CLUSTER")) .setType(typeStr != null ? PhoenixSyncTableCheckpointOutputRow.Type.valueOf(typeStr) : null) .setFromTime(rs.getLong("FROM_TIME")).setToTime(rs.getLong("TO_TIME")) .setIsDryRun(rs.getBoolean("IS_DRY_RUN")).setStartRowKey(rs.getBytes("START_ROW_KEY")) @@ -3876,18 +3966,24 @@ private static String[] appendArg(String[] args, String newArg) { } /** - * After a repair pass, asserts that no CHUNK or REGION rows in the checkpoint table are - * still in MISMATCHED status. They should all have transitioned to REPAIRED, VERIFIED, or - * (when target rows are entirely tombstoned) UNREPAIRABLE. + * After a clean repair pass, asserts no checkpoint rows remain in any non-terminal or + * stuck state — MISMATCHED, UNREPAIRABLE, or REPAIR_FAILED. Tests that legitimately leave + * UNREPAIRABLE/REPAIR_FAILED rows should not call this helper. */ private void assertNoMismatchedCheckpoints(String tableName, String tenantId) throws SQLException { List entries = queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId); - long mismatched = countCheckpointsByStatus(entries, - PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED); - assertEquals("After repair, no MISMATCHED checkpoint rows should remain for table " - + tableName + " tenant=" + tenantId, 0, mismatched); + String ctx = " (table=" + tableName + " tenant=" + tenantId + ")"; + assertEquals("MISMATCHED rows must not remain after repair" + ctx, 0, + countCheckpointsByStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED)); + assertEquals("UNREPAIRABLE rows must not remain after repair" + ctx, 0, + countCheckpointsByStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE)); + assertEquals("REPAIR_FAILED rows must not remain after repair" + ctx, 0, + countCheckpointsByStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)); } /** @@ -3976,8 +4072,11 @@ private Job runSyncToolWithChunkSize(String tableName, int chunkSize, Configurat private static class SyncCountersResult { public final long sourceRowsProcessed; public final long targetRowsProcessed; - public final long chunksMismatched; public final long chunksVerified; + public final long chunksMismatched; + public final long chunksRepaired; + public final long chunksUnrepairable; + public final long chunksRepairFailed; public final long mappersVerified; public final long mappersMismatched; public final long mappersRepaired; @@ -3986,15 +4085,26 @@ private static class SyncCountersResult { public final long rowsMissingOnTarget; public final long rowsExtraOnTarget; public final long rowsDifferentOnTarget; + public final long rowsCannotRepair; + public final long cellsMissingOnTarget; + public final long cellsExtraOnTarget; + public final long cellsDifferentOnTarget; public final long taskCreated; + private final Counters raw; SyncCountersResult(Counters counters) { + this.raw = counters; this.sourceRowsProcessed = counters.findCounter(SyncCounters.SOURCE_ROWS_PROCESSED).getValue(); this.targetRowsProcessed = counters.findCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue(); - this.chunksMismatched = counters.findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue(); this.chunksVerified = counters.findCounter(SyncCounters.CHUNKS_VERIFIED).getValue(); + this.chunksMismatched = counters.findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue(); + this.chunksRepaired = counters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue(); + this.chunksUnrepairable = + counters.findCounter(SyncCounters.CHUNKS_UNREPAIRABLE).getValue(); + this.chunksRepairFailed = + counters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); this.mappersVerified = counters.findCounter(SyncCounters.MAPPERS_VERIFIED).getValue(); this.mappersMismatched = counters.findCounter(SyncCounters.MAPPERS_MISMATCHED).getValue(); this.mappersRepaired = counters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue(); @@ -4008,18 +4118,34 @@ private static class SyncCountersResult { counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue(); this.rowsDifferentOnTarget = counters.findCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue(); + this.rowsCannotRepair = counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue(); + this.cellsMissingOnTarget = + counters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue(); + this.cellsExtraOnTarget = + counters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue(); + this.cellsDifferentOnTarget = + counters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue(); this.taskCreated = counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue(); } + /** Escape hatch for tests that need to read counters not exposed as fields here. */ + public Counters getRawCounters() { + return raw; + } + public void logCounters(String testName) { LOGGER.info( - "{}: source rows={}, target rows={}, chunks mismatched={}, chunks verified={}, " + "{}: source rows={}, target rows={}, chunks verified={}, chunks mismatched={}, " + + "chunks repaired={}, chunks unrepairable={}, chunks repair_failed={}, " + "mappers verified={}, mappers mismatched={}, mappers repaired={}, " + "mappers unrepairable={}, mappers repair_failed={}, rows missing={}, " - + "rows extra={}, rows different={}", - testName, sourceRowsProcessed, targetRowsProcessed, chunksMismatched, chunksVerified, - mappersVerified, mappersMismatched, mappersRepaired, mappersUnrepairable, - mappersRepairFailed, rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget); + + "rows extra={}, rows different={}, rows cannot repair={}, cells missing={}, " + + "cells extra={}, cells different={}", + testName, sourceRowsProcessed, targetRowsProcessed, chunksVerified, chunksMismatched, + chunksRepaired, chunksUnrepairable, chunksRepairFailed, mappersVerified, + mappersMismatched, mappersRepaired, mappersUnrepairable, mappersRepairFailed, + rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget, rowsCannotRepair, + cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget); } } @@ -4075,16 +4201,13 @@ private void validateMapperCountersRepair(SyncCountersResult counters, * where the drift is constructed deterministically and any miscount (off-by-one, * double-counting, missed branch) should fail the test loudly. */ - private void assertRepairCellCounters(Counters counters, long expectedCellsMissing, + private void assertRepairCellCounters(SyncCountersResult counters, long expectedCellsMissing, long expectedCellsExtra, long expectedCellsDifferent, long expectedRowsCannotRepair) { - assertEquals("CELLS_MISSING_ON_TARGET", expectedCellsMissing, - counters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue()); - assertEquals("CELLS_EXTRA_ON_TARGET", expectedCellsExtra, - counters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue()); + assertEquals("CELLS_MISSING_ON_TARGET", expectedCellsMissing, counters.cellsMissingOnTarget); + assertEquals("CELLS_EXTRA_ON_TARGET", expectedCellsExtra, counters.cellsExtraOnTarget); assertEquals("CELLS_DIFFERENT_ON_TARGET", expectedCellsDifferent, - counters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue()); - assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair, - counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue()); + counters.cellsDifferentOnTarget); + assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair, counters.rowsCannotRepair); } /** @@ -4092,33 +4215,33 @@ private void assertRepairCellCounters(Counters counters, long expectedCellsMissi * {@link #validateMapperCountersRepair} (which omits chunk-level counters) for tests that * need to assert both layers. */ - private void assertRepairChunkAndMapperCounters(Counters counters, long expectedChunksRepaired, - long expectedChunksRepairFailed, long expectedMappersRepaired, long expectedMappersUnrepairable, + private void assertRepairChunkAndMapperCounters(SyncCountersResult counters, + long expectedChunksRepaired, long expectedChunksUnrepairable, long expectedChunksRepairFailed, + long expectedMappersRepaired, long expectedMappersUnrepairable, long expectedMappersRepairFailed) { - assertEquals("CHUNKS_REPAIRED", expectedChunksRepaired, - counters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue()); - assertEquals("CHUNKS_REPAIR_FAILED", expectedChunksRepairFailed, - counters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue()); - assertEquals("MAPPERS_REPAIRED", expectedMappersRepaired, - counters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue()); + assertEquals("CHUNKS_REPAIRED", expectedChunksRepaired, counters.chunksRepaired); + assertEquals("CHUNKS_UNREPAIRABLE", expectedChunksUnrepairable, counters.chunksUnrepairable); + assertEquals("CHUNKS_REPAIR_FAILED", expectedChunksRepairFailed, counters.chunksRepairFailed); + assertEquals("MAPPERS_REPAIRED", expectedMappersRepaired, counters.mappersRepaired); assertEquals("MAPPERS_UNREPAIRABLE", expectedMappersUnrepairable, - counters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue()); + counters.mappersUnrepairable); assertEquals("MAPPERS_REPAIR_FAILED", expectedMappersRepairFailed, - counters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue()); + counters.mappersRepairFailed); } /** - * Pins the row-level repair drift counters. Mirror of {@link #assertRepairCellCounters} for - * tests that need to assert whole-row outcomes (missing, extra, unrepairable). + * Pins all four row-level drift counters: missing, extra, different, and unrepairable. + * Dry-run runs leave {@code ROWS_CANNOT_REPAIR} at 0; repair runs leave + * {@code ROWS_DIFFERENT_ON_TARGET} at 0 (different rows roll up under missing/extra cell + * drift after repair). */ - private void assertRepairRowCounters(Counters counters, long expectedRowsMissing, - long expectedRowsExtra, long expectedRowsCannotRepair) { - assertEquals("ROWS_MISSING_ON_TARGET", expectedRowsMissing, - counters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue()); - assertEquals("ROWS_EXTRA_ON_TARGET", expectedRowsExtra, - counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue()); - assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair, - counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue()); + private void assertRowDriftCounters(SyncCountersResult counters, long expectedRowsMissing, + long expectedRowsExtra, long expectedRowsDifferent, long expectedRowsCannotRepair) { + assertEquals("ROWS_MISSING_ON_TARGET", expectedRowsMissing, counters.rowsMissingOnTarget); + assertEquals("ROWS_EXTRA_ON_TARGET", expectedRowsExtra, counters.rowsExtraOnTarget); + assertEquals("ROWS_DIFFERENT_ON_TARGET", expectedRowsDifferent, + counters.rowsDifferentOnTarget); + assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair, counters.rowsCannotRepair); } /** @@ -4167,11 +4290,37 @@ private long createRepairTestTableOnBothClusters(String tableName, int maxVersio // Wait until the wall clock advances at least one millisecond past the CREATE TABLE // timestamp so any caller-chosen SCN >= the returned anchor is guaranteed to be above the // table's metadata row. - long anchor = System.currentTimeMillis() + 1; - while (System.currentTimeMillis() < anchor) { + return waitUntilWallClockPasses(System.currentTimeMillis()); + } + + /** + * Multi-CF variant of {@link #createRepairTestTableOnBothClusters}: two column families + * ({@code CF1.A VARCHAR}, {@code CF2.B VARCHAR}). Used by repair tests that need drift to + * span families. + */ + private long createMultiColumnFamilyTableOnBothClusters(String tableName) throws SQLException { + String ddl = "CREATE TABLE IF NOT EXISTS " + tableName + + " (ID INTEGER NOT NULL PRIMARY KEY, CF1.A VARCHAR(50), CF2.B VARCHAR(50))" + + " COLUMN_ENCODED_BYTES=NONE, UPDATE_CACHE_FREQUENCY=0, REPLICATION_SCOPE=0"; + executeTableCreation(sourceConnection, ddl); + executeTableCreation(targetConnection, ddl); + return waitUntilWallClockPasses(System.currentTimeMillis()); + } + + /** + * Spins until {@link System#currentTimeMillis()} is strictly greater than {@code minTs}, then + * returns the resulting wall-clock value. Used by repair tests that plant cells at handcrafted + * future timestamps and then need a {@code --to-time} that both (a) covers every planted cell + * and (b) satisfies the tool's {@code endTime <= currentTimeMillis()} validation + * ({@link org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil#validateTimeRange}). The + * spin terminates in 1-2ms — it is a deterministic precondition gate, not a sleep-based wait + * for an external side effect. + */ + private long waitUntilWallClockPasses(long minTs) { + while (System.currentTimeMillis() <= minTs) { // spin } - return anchor; + return System.currentTimeMillis(); } /** @@ -4186,6 +4335,176 @@ private Connection openConnectionAtScn(String zkUrl, long scnTimestamp) throws S return DriverManager.getConnection("jdbc:phoenix:" + zkUrl, props); } + /** + * Executes a single UPSERT through an SCN-pinned connection on the given cluster and commits. + * Replaces the verbose {@code try (Connection scn = openConnectionAtScn(...)) { execute; commit; }} + * boilerplate that recurred in nearly every repair test. + */ + private void upsertAtScn(String zkUrl, long ts, String upsertSql) throws SQLException { + try (Connection scn = openConnectionAtScn(zkUrl, ts)) { + scn.createStatement().execute(upsertSql); + scn.commit(); + } + } + + /** Convenience wrapper: {@link #upsertAtScn} on the source cluster. */ + private void upsertAtScnSource(long ts, String upsertSql) throws SQLException { + upsertAtScn(CLUSTERS.getZkUrl1(), ts, upsertSql); + } + + /** Convenience wrapper: {@link #upsertAtScn} on the target cluster. */ + private void upsertAtScnTarget(long ts, String upsertSql) throws SQLException { + upsertAtScn(CLUSTERS.getZkUrl2(), ts, upsertSql); + } + + /** Runs the same UPSERT against both clusters at the same SCN timestamp. */ + private void upsertAtScnBoth(long ts, String upsertSql) throws SQLException { + upsertAtScnSource(ts, upsertSql); + upsertAtScnTarget(ts, upsertSql); + } + + /** + * Asserts target's visible NAME for {@code id} equals {@code expected}. Replaces the + * {@code prepareStatement("SELECT NAME FROM ... WHERE ID = ?")} block repeated across repair + * tests. + */ + private void assertTargetName(String tableName, int id, String expected) throws SQLException { + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) { + ps.setInt(1, id); + try (ResultSet rs = ps.executeQuery()) { + assertTrue("Row " + id + " should exist on target", rs.next()); + assertEquals("Target NAME for row " + id, expected, rs.getString(1)); + } + } + } + + /** Asserts the target row exists but its visible NAME is null. */ + private void assertTargetNameNull(String tableName, int id) throws SQLException { + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) { + ps.setInt(1, id); + try (ResultSet rs = ps.executeQuery()) { + assertTrue("Row " + id + " should exist on target", rs.next()); + assertNull("Target NAME for row " + id + " should be null", rs.getString(1)); + } + } + } + + /** Asserts the target row is not visible to a Phoenix SELECT (e.g., shadowed by tombstones). */ + private void assertTargetRowAbsent(String tableName, int id) throws SQLException { + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) { + ps.setInt(1, id); + try (ResultSet rs = ps.executeQuery()) { + assertFalse("Row " + id + " should not be visible on target", rs.next()); + } + } + } + + /** + * End-of-test convergence: run dry-run + repair over [fromTime, toTime] on top of whatever + * checkpoint state the test already produced. The repair-mode resume filter (status IN + * (VERIFIED, REPAIRED)) plus chunk-level boundary intersection ensures that any chunk-level + * range not yet covered by a VERIFIED/REPAIRED chunk row is re-validated, so a fresh dry-run + + * repair on top of partial/stale prior state will still converge. Asserts no + * MISMATCHED/UNREPAIRABLE/REPAIR_FAILED rows remain and Phoenix-visible data matches between + * source and target. + */ + private void convergeAndAssertIdentical(String tableName, long fromTime, long toTime) + throws Exception { + runSyncToolWithRepair(tableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(toTime)); + assertNoMismatchedCheckpoints(tableName, null); + verifyDataIdentical(sourceConnection, targetConnection, tableName); + } + + /** + * Summary of NAME-column raw cells observed under a single-row raw scan with all versions. + * Used by tests that pin tombstone/Put counts after repair. + */ + private static final class RawCellSummary { + final int puts; + final int deleteColumns; + final int deleteFamilies; + final int deleteFamilyVersions; + final int pointDeletes; + final long newestPutTs; + final List putTimestamps; + final List deleteTimestamps; + + RawCellSummary(int puts, int deleteColumns, int deleteFamilies, int deleteFamilyVersions, + int pointDeletes, long newestPutTs, List putTimestamps, List deleteTimestamps) { + this.puts = puts; + this.deleteColumns = deleteColumns; + this.deleteFamilies = deleteFamilies; + this.deleteFamilyVersions = deleteFamilyVersions; + this.pointDeletes = pointDeletes; + this.newestPutTs = newestPutTs; + this.putTimestamps = putTimestamps; + this.deleteTimestamps = deleteTimestamps; + } + + int totalDeletes() { + return deleteColumns + deleteFamilies + deleteFamilyVersions + pointDeletes; + } + } + + /** + * Raw scan of a single row on the target cluster, summarising every NAME-column cell by Put / + * tombstone subtype. Replaces the open-coded raw-scan loop repeated by tests that pin + * post-repair NAME tombstone counts. + */ + private RawCellSummary scanRawTargetNameCells(String tableName, int rowId) throws Exception { + byte[] rk = integerRowKey(rowId); + int puts = 0; + int deleteColumns = 0; + int deleteFamilies = 0; + int deleteFamilyVersions = 0; + int pointDeletes = 0; + long newestPutTs = -1L; + List putTimestamps = new ArrayList<>(); + List deleteTimestamps = new ArrayList<>(); + try (Table targetHTable = getHBaseTable(targetConnection, tableName)) { + Scan scan = new Scan().withStartRow(rk, true).withStopRow(rk, true).setRaw(true); + scan.readAllVersions(); + try (ResultScanner sc = targetHTable.getScanner(scan)) { + for (Result r; (r = sc.next()) != null;) { + for (Cell c : r.rawCells()) { + if (!Bytes.equals(CellUtil.cloneQualifier(c), Bytes.toBytes("NAME"))) { + continue; + } + if (CellUtil.isDelete(c)) { + deleteTimestamps.add(c.getTimestamp()); + switch (c.getType()) { + case DeleteColumn: + deleteColumns++; + break; + case DeleteFamily: + deleteFamilies++; + break; + case DeleteFamilyVersion: + deleteFamilyVersions++; + break; + case Delete: + pointDeletes++; + break; + default: + // ignore + } + } else { + puts++; + putTimestamps.add(c.getTimestamp()); + newestPutTs = Math.max(newestPutTs, c.getTimestamp()); + } + } + } + } + } + return new RawCellSummary(puts, deleteColumns, deleteFamilies, deleteFamilyVersions, + pointDeletes, newestPutTs, putTimestamps, deleteTimestamps); + } + /** * Resolves the HBase {@link Table} backing a Phoenix table for a given Phoenix * {@link Connection}. Used by raw-cell helpers that need to bypass Phoenix and write cells at @@ -4305,77 +4624,158 @@ private void validateSyncCountersWithMinChunk(SyncCountersResult counters, } /** - * Validates that a checkpoint table has entries with proper structure. + * Aggressive end-to-end checkpoint-table validation. Queries the checkpoint table itself, then: + *

      + *
    1. Validates per-row structural integrity — PK columns non-null, TYPE ∈ {REGION, CHUNK}, + * TENANT_ID matches, time-range invariants (FROM_TIME ≥ 0, TO_TIME > FROM_TIME), execution + * timestamps non-null with END ≥ START, STATUS non-null, COUNTERS non-null on CHUNK rows. + *
    2. + *
    3. Pins per-(Type × Status) counts derived from the supplied counter objects: CHUNK and + * REGION crossed with VERIFIED / REPAIRED / UNREPAIRABLE / REPAIR_FAILED / MISMATCHED. + *
    4. + *
    5. Cross-checks REGION-row sourceRowsProcessed/targetRowsProcessed totals against the + * repair-pass counters (or dry-run counters when no repair ran).
    6. + *
    + *

    + * The two counter parameters select the validation mode: + *

      + *
    • {@code dryRunCounters != null && repairCounters == null} — dry-run-only test. Expects + * MISMATCHED rows to remain.
    • + *
    • {@code dryRunCounters == null && repairCounters != null} — combined verify+repair pass + * (no separate dry-run). VERIFIED count comes from {@code repairCounters}; MISMATCHED == 0. + *
    • + *
    • {@code dryRunCounters != null && repairCounters != null} — separate dry-run + repair. + * VERIFIED count comes from {@code dryRunCounters} (the repair pass's resume filter skips + * them); REPAIRED/UNREPAIRABLE/REPAIR_FAILED come from {@code repairCounters}; + * MISMATCHED == 0.
    • + *
    + * Use after asserting counters and before declaring success — pins the persisted checkpoint + * state to the in-memory counter state, so a regression that reports the right counters but + * writes the wrong checkpoint rows (or vice versa) fails loudly. */ - private void validateCheckpointEntries(List entries, - String expectedTableName, String expectedTargetCluster, int expectedSourceRows, - int expectedTargetRows, int expectedChunkVerified, int expectedChunkMismatched, - int expectedMapperRegion, int expectedMapperMismatched, String expectedTenantId) { - int mapperRegionCount = 0; - int chunkCount = 0; - int mismatchedEntry = 0; - int sourceRowsProcessed = 0; - int targetRowsProcessed = 0; + private void validateCheckpointEntries(String tableName, String tenantId, + SyncCountersResult dryRunCounters, SyncCountersResult repairCounters) throws SQLException { + if (dryRunCounters == null && repairCounters == null) { + throw new IllegalArgumentException( + "At least one of dryRunCounters or repairCounters must be non-null"); + } + List entries = + queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId); + String ctx = " (table=" + tableName + " tenant=" + tenantId + ")"; + + // Per-row structural validation + REGION row-processing totals. + long mapperRegionCount = 0; + long chunkCount = 0; + long sourceRowsProcessed = 0; + long targetRowsProcessed = 0; for (PhoenixSyncTableCheckpointOutputRow entry : entries) { - // Validate primary key columns - assertEquals("TABLE_NAME should match", expectedTableName, entry.getTableName()); - assertEquals("TARGET_CLUSTER should match", expectedTargetCluster, entry.getTargetCluster()); - assertNotNull("TYPE should not be null", entry.getType()); - assertTrue("TYPE should be REGION or CHUNK", + assertEquals("TABLE_NAME should match" + ctx, tableName, entry.getTableName()); + assertEquals("TARGET_CLUSTER should match" + ctx, targetZkQuorum, entry.getTargetCluster()); + assertNotNull("TYPE should not be null" + ctx, entry.getType()); + assertTrue("TYPE should be REGION or CHUNK" + ctx, PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType()) || PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())); - // Validate TENANT_ID - if (expectedTenantId == null) { - assertNull("TENANT_ID should be null for non-multi-tenant tables", entry.getTenantId()); + if (tenantId == null) { + assertNull("TENANT_ID should be null for non-multi-tenant tables" + ctx, + entry.getTenantId()); } else { - assertEquals("TENANT_ID should match", expectedTenantId, entry.getTenantId()); + assertEquals("TENANT_ID should match" + ctx, tenantId, entry.getTenantId()); } - // Validate time range - assertTrue("FROM_TIME should be >= 0", entry.getFromTime() >= 0); - assertTrue("TO_TIME should be > FROM_TIME", entry.getToTime() > entry.getFromTime()); + assertTrue("FROM_TIME should be >= 0" + ctx, entry.getFromTime() >= 0); + assertTrue("TO_TIME should be > FROM_TIME" + ctx, entry.getToTime() > entry.getFromTime()); - // Validate execution timestamps - assertNotNull("EXECUTION_START_TIME should not be null", entry.getExecutionStartTime()); - assertNotNull("EXECUTION_END_TIME should not be null", entry.getExecutionEndTime()); - assertTrue("EXECUTION_END_TIME should be >= EXECUTION_START_TIME", + assertNotNull("EXECUTION_START_TIME should not be null" + ctx, + entry.getExecutionStartTime()); + assertNotNull("EXECUTION_END_TIME should not be null" + ctx, entry.getExecutionEndTime()); + assertTrue("EXECUTION_END_TIME should be >= EXECUTION_START_TIME" + ctx, entry.getExecutionEndTime().getTime() >= entry.getExecutionStartTime().getTime()); - // Validate status - assertNotNull("STATUS should not be null", entry.getStatus()); - assertTrue("STATUS should be VERIFIED or MISMATCHED", - PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED.equals(entry.getStatus()) - || PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())); - - if (PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED.equals(entry.getStatus())) { - mismatchedEntry++; - } + assertNotNull("STATUS should not be null" + ctx, entry.getStatus()); - // Count entry types if (PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())) { mapperRegionCount++; - sourceRowsProcessed += (int) entry.getSourceRowsProcessed(); - targetRowsProcessed += (int) entry.getTargetRowsProcessed(); - } else if (PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())) { + sourceRowsProcessed += entry.getSourceRowsProcessed(); + targetRowsProcessed += entry.getTargetRowsProcessed(); + } else { chunkCount++; - assertNotNull("COUNTERS should not be null for CHUNK entries", entry.getCounters()); + assertNotNull("COUNTERS should not be null for CHUNK entries" + ctx, entry.getCounters()); } } - assertEquals(String.format("Should have %d REGION entry", expectedMapperRegion), - expectedMapperMismatched, expectedMapperRegion, mapperRegionCount); - assertEquals( - String.format("Should have %d CHUNK entry", expectedChunkVerified + expectedChunkMismatched), - expectedChunkVerified + expectedChunkMismatched, chunkCount); - assertEquals( - String.format("Should have %d MISMATCHED entry", - expectedMapperMismatched + expectedChunkMismatched), - expectedMapperMismatched + expectedChunkMismatched, mismatchedEntry); - assertEquals(String.format("Should have %d Source rows processed", expectedSourceRows), - expectedSourceRows, sourceRowsProcessed); - assertEquals(String.format("Should have %d Target rows processed", expectedTargetRows), - expectedTargetRows, targetRowsProcessed); + // Per-(Type × Status) counts derived from counter objects. + boolean isDryRunOnly = (repairCounters == null); + SyncCountersResult verifiedSource = (dryRunCounters != null ? dryRunCounters : repairCounters); + long expectedChunkVerified = verifiedSource.chunksVerified; + long expectedRegionVerified = verifiedSource.mappersVerified; + long expectedChunkRepaired = isDryRunOnly ? 0L : repairCounters.chunksRepaired; + long expectedChunkUnrepairable = isDryRunOnly ? 0L : repairCounters.chunksUnrepairable; + long expectedChunkRepairFailed = isDryRunOnly ? 0L : repairCounters.chunksRepairFailed; + long expectedRegionRepaired = isDryRunOnly ? 0L : repairCounters.mappersRepaired; + long expectedRegionUnrepairable = isDryRunOnly ? 0L : repairCounters.mappersUnrepairable; + long expectedRegionRepairFailed = isDryRunOnly ? 0L : repairCounters.mappersRepairFailed; + long expectedChunkMismatched = isDryRunOnly ? dryRunCounters.chunksMismatched : 0L; + long expectedRegionMismatched = isDryRunOnly ? dryRunCounters.mappersMismatched : 0L; + + assertEquals("CHUNK/VERIFIED checkpoint rows" + ctx, expectedChunkVerified, + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, + PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED)); + assertEquals("CHUNK/REPAIRED checkpoint rows" + ctx, expectedChunkRepaired, + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED)); + assertEquals("CHUNK/UNREPAIRABLE checkpoint rows" + ctx, expectedChunkUnrepairable, + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, + PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE)); + assertEquals("CHUNK/REPAIR_FAILED checkpoint rows" + ctx, expectedChunkRepairFailed, + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)); + assertEquals("CHUNK/MISMATCHED checkpoint rows" + ctx, expectedChunkMismatched, + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, + PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED)); + + assertEquals("REGION/VERIFIED checkpoint rows" + ctx, expectedRegionVerified, + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION, + PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED)); + assertEquals("REGION/REPAIRED checkpoint rows" + ctx, expectedRegionRepaired, + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED)); + assertEquals("REGION/UNREPAIRABLE checkpoint rows" + ctx, expectedRegionUnrepairable, + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION, + PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE)); + assertEquals("REGION/REPAIR_FAILED checkpoint rows" + ctx, expectedRegionRepairFailed, + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)); + assertEquals("REGION/MISMATCHED checkpoint rows" + ctx, expectedRegionMismatched, + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION, + PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED)); + + // Aggregate type counts (sanity — sum of per-status counts across all five statuses must + // equal what the loop above tallied). + long expectedChunkTotal = expectedChunkVerified + expectedChunkRepaired + + expectedChunkUnrepairable + expectedChunkRepairFailed + expectedChunkMismatched; + long expectedRegionTotal = expectedRegionVerified + expectedRegionRepaired + + expectedRegionUnrepairable + expectedRegionRepairFailed + expectedRegionMismatched; + assertEquals("Total CHUNK checkpoint rows" + ctx, expectedChunkTotal, chunkCount); + assertEquals("Total REGION checkpoint rows" + ctx, expectedRegionTotal, mapperRegionCount); + + // Row-processing totals: REGION rows persist per-mapper sourceRowsProcessed / + // targetRowsProcessed; their sum equals the job-level counter only when no REGION row was + // overwritten by a subsequent repair pass. The checkpoint PK excludes IS_DRY_RUN, so a repair + // pass UPSERTs the REGION row at the dry-run row's PK; for partially-mismatched regions + // repair re-processes only the gap (non-VERIFIED) chunks and writes a smaller delta, so the + // sum across REGION rows no longer matches the dry-run job-level total. Only assert the + // strict equality when (a) no repair pass was run, or (b) the repair pass touched no + // regions (everything was already VERIFIED so dry-run rows stay intact). + boolean repairTouchedRegions = repairCounters != null + && (repairCounters.mappersRepaired + repairCounters.mappersUnrepairable + + repairCounters.mappersRepairFailed) > 0; + if (!repairTouchedRegions) { + assertEquals("REGION sourceRowsProcessed total" + ctx, verifiedSource.sourceRowsProcessed, + sourceRowsProcessed); + assertEquals("REGION targetRowsProcessed total" + ctx, verifiedSource.targetRowsProcessed, + targetRowsProcessed); + } } /** @@ -4394,4 +4794,25 @@ public boolean equals(Object o) { return id == other.id && Objects.equals(name, other.name) && name_value == other.name_value; } } + + /** + * RegionObserver that fails every batch mutate. Attached to PHOENIX_SYNC_TABLE_CHECKPOINT + * by {@code testCheckpointWriteFailureCausesNonZeroExit} to exercise the + * {@code CHECKPOINT_WRITE_FAILED} → non-zero exit path. + */ + public static class CheckpointWriteFailingObserver extends SimpleRegionObserver { + @Override + public void preBatchMutate(ObserverContext c, + MiniBatchOperationInProgress miniBatchOp) throws IOException { + throw new DoNotRetryIOException("INJECTED CHECKPOINT WRITE FAIL"); + } + } + + public static class RepairBatchFailingObserver extends SimpleRegionObserver { + @Override + public void preBatchMutate(ObserverContext c, + MiniBatchOperationInProgress miniBatchOp) throws IOException { + throw new DoNotRetryIOException("INJECTED TARGET REPAIR WRITE FAIL"); + } + } } diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java index 91ad3941746..a5977629a89 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java @@ -1072,4 +1072,222 @@ public void testChunkCheckpointChunkWithDifferentTenants() throws Exception { targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); assertEquals("Null tenant query should return only null-tenant chunk", 1, results3.size()); } + + @Test + public void testCheckpointValidationEmptyTableName() throws Exception { + byte[] startKey = Bytes.toBytes("row1"); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + try { + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName("").setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L) + .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(startKey) + .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp) + .build()); + fail("Should throw IllegalArgumentException for empty tableName"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("TableName cannot be null or empty")); + } + } + + @Test + public void testCheckpointValidationEmptyTargetCluster() throws Exception { + String tableName = generateUniqueName(); + byte[] startKey = Bytes.toBytes("row1"); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + try { + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster("").setType(Type.REGION).setFromTime(0L) + .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(startKey) + .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp) + .build()); + fail("Should throw IllegalArgumentException for empty targetCluster"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("TargetCluster cannot be null or empty")); + } + } + + @Test + public void testCheckpointValidationNullToTime() throws Exception { + String tableName = generateUniqueName(); + byte[] startKey = Bytes.toBytes("row1"); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + try { + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION) + .setFromTime(0L).setToTime(null).setIsDryRun(false).setStartRowKey(startKey) + .setEndRowKey(startKey).setStatus(Status.VERIFIED).setExecutionStartTime(timestamp) + .setExecutionEndTime(timestamp).build()); + fail("Should throw NullPointerException for null toTime"); + } catch (NullPointerException e) { + assertTrue(e.getMessage().contains("ToTime cannot be null")); + } + } + + @Test + public void testCheckpointWithNullStatusPersistsAsNull() throws Exception { + String tableName = generateUniqueName(); + byte[] startKey = Bytes.toBytes("row1"); + byte[] endKey = Bytes.toBytes("row100"); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + // Status is permitted to be null — production has an explicit null guard at the upsert site. + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L) + .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(endKey) + .setStatus(null).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build()); + + String query = "SELECT STATUS FROM " + + PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME + " WHERE TABLE_NAME = ?"; + try (java.sql.PreparedStatement ps = connection.prepareStatement(query)) { + ps.setString(1, tableName); + try (ResultSet rs = ps.executeQuery()) { + assertTrue(rs.next()); + assertNull("STATUS should be null when builder sets it to null", rs.getString("STATUS")); + } + } + } + + @Test + public void testBuilderSetEndRowKeyNullCoercedToEmpty() { + PhoenixSyncTableCheckpointOutputRow row = + new PhoenixSyncTableCheckpointOutputRow.Builder().setEndRowKey(null).build(); + + byte[] retrieved = row.getEndRowKey(); + assertNotNull("setEndRowKey(null) should coerce to empty array, not stay null", retrieved); + assertEquals("Coerced array should have length 0", 0, retrieved.length); + } + + @Test + public void testBuilderSetEndRowKeyEmptyArrayCoercedToEmpty() { + PhoenixSyncTableCheckpointOutputRow row = + new PhoenixSyncTableCheckpointOutputRow.Builder().setEndRowKey(new byte[0]).build(); + + byte[] retrieved = row.getEndRowKey(); + assertNotNull("setEndRowKey(empty array) should remain non-null", retrieved); + assertEquals("Coerced array should have length 0", 0, retrieved.length); + } + + @Test + public void testGetEndRowKeyDefensiveCopy() { + byte[] endKey = Bytes.toBytes("end"); + + PhoenixSyncTableCheckpointOutputRow row = + new PhoenixSyncTableCheckpointOutputRow.Builder().setEndRowKey(endKey).build(); + + byte[] retrieved = row.getEndRowKey(); + assertNotSame("Should return a copy, not the original", endKey, retrieved); + + retrieved[0] = (byte) 0xFF; + + byte[] retrievedAgain = row.getEndRowKey(); + assertNotEquals("Internal array should not be modified", (byte) 0xFF, retrievedAgain[0]); + } + + @Test + public void testParseCounterValueCorruptedFormatThrows() { + // "FOO,BAR=1" — first token "FOO" splits by '=' to length 1, which fails the length-2 check. + PhoenixSyncTableCheckpointOutputRow row = new PhoenixSyncTableCheckpointOutputRow.Builder() + .setStartRowKey(Bytes.toBytes("start")).setCounters("FOO,BAR=1").build(); + + try { + row.getSourceRowsProcessed(); + fail("Should throw IllegalArgumentException for corrupted counter format"); + } catch (IllegalArgumentException e) { + assertTrue("Error message should explain corruption: " + e.getMessage(), + e.getMessage().contains("Corrupted counter format")); + } + } + + @Test + public void testParseCounterValueCounterNameNotPresentReturnsZero() { + // Well-formed counters string that doesn't contain SOURCE_ROWS_PROCESSED — should default to 0. + PhoenixSyncTableCheckpointOutputRow row = new PhoenixSyncTableCheckpointOutputRow.Builder() + .setStartRowKey(Bytes.toBytes("start")).setCounters("CHUNKS_VERIFIED=5").build(); + + assertEquals(0L, row.getSourceRowsProcessed()); + assertEquals(0L, row.getTargetRowsProcessed()); + } + + @Test + public void testParseCounterValueEmptyStringReturnsZero() { + // Distinct from null — exercises the counters.isEmpty() branch in parseCounterValue. + PhoenixSyncTableCheckpointOutputRow row = new PhoenixSyncTableCheckpointOutputRow.Builder() + .setStartRowKey(Bytes.toBytes("start")).setCounters("").build(); + + assertEquals(0L, row.getSourceRowsProcessed()); + assertEquals(0L, row.getTargetRowsProcessed()); + } + + @Test + public void testGetProcessedChunksBothBoundariesFilterNonOverlappingChunks() throws Exception { + String tableName = generateUniqueName(); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + // Three chunks; mapper region is row25..row50. Only the middle chunk overlaps both bounds. + byte[] chunk1Start = Bytes.toBytes("row10"); + byte[] chunk1End = Bytes.toBytes("row20"); + byte[] chunk2Start = Bytes.toBytes("row30"); + byte[] chunk2End = Bytes.toBytes("row40"); + byte[] chunk3Start = Bytes.toBytes("row60"); + byte[] chunk3End = Bytes.toBytes("row70"); + + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L) + .setToTime(1000L).setIsDryRun(false).setStartRowKey(chunk1Start).setEndRowKey(chunk1End) + .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp) + .build()); + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L) + .setToTime(1000L).setIsDryRun(false).setStartRowKey(chunk2Start).setEndRowKey(chunk2End) + .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp) + .build()); + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L) + .setToTime(1000L).setIsDryRun(false).setStartRowKey(chunk3Start).setEndRowKey(chunk3End) + .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp) + .build()); + + // chunk1 ends at row20 (< mapperStart=row25), chunk3 starts at row60 (> mapperEnd=row50). + byte[] mapperStart = Bytes.toBytes("row25"); + byte[] mapperEnd = Bytes.toBytes("row50"); + + List results = repository.getProcessedChunks(tableName, + targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); + + assertEquals("Only the overlapping chunk (row30..row40) should match", 1, results.size()); + assertArrayEquals("Surviving chunk should be chunk2", chunk2Start, + results.get(0).getStartRowKey()); + } + + @Test + public void testGetProcessedMapperRegionsFiltersByExactTimeWindow() throws Exception { + String tableName = generateUniqueName(); + byte[] startKey1 = Bytes.toBytes("row1"); + byte[] endKey1 = Bytes.toBytes("row100"); + byte[] startKey2 = Bytes.toBytes("row200"); + byte[] endKey2 = Bytes.toBytes("row300"); + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + // Two regions for the same table at distinct (fromTime, toTime) windows. + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L) + .setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey1).setEndRowKey(endKey1) + .setStatus(Status.VERIFIED).setExecutionStartTime(timestamp).setExecutionEndTime(timestamp) + .build()); + repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() + .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION) + .setFromTime(1000L).setToTime(2000L).setIsDryRun(false).setStartRowKey(startKey2) + .setEndRowKey(endKey2).setStatus(Status.VERIFIED).setExecutionStartTime(timestamp) + .setExecutionEndTime(timestamp).build()); + + List results = + repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, true); + + assertEquals("Only the [0, 1000) region should match", 1, results.size()); + assertArrayEquals("Surviving region should be the [0, 1000) one", startKey1, + results.get(0).getStartRowKey()); + } } From f48cd20efb8999db43a23a5fbb7e8ff3579f30b2 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Mon, 8 Jun 2026 20:50:48 +0530 Subject: [PATCH 13/18] checkpointing test modification --- .../end2end/PhoenixSyncTableToolIT.java | 393 +++++++++--------- 1 file changed, 202 insertions(+), 191 deletions(-) diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java index 1d9131d8e2d..ce2adc02505 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java @@ -174,7 +174,7 @@ public void testSyncTableWithDataDifference() throws Exception { long toTime = System.currentTimeMillis(); // Phase 1: dry-run only — verify checkpoint table sees only VERIFIED/MISMATCHED rows. - Job dryRunJob = runSyncToolWithLargeChunks(uniqueTableName, "--dry-run", "--from-time", + Job dryRunJob = runSyncToolWithChunkSize(uniqueTableName, 1024, "--dry-run", "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); SyncCountersResult dryRunCounters = getSyncCounters(dryRunJob); @@ -190,7 +190,7 @@ public void testSyncTableWithDataDifference() throws Exception { // Phase 2: repair pass over the same window — MISMATCHED rows transition to REPAIRED in // place. - Job repairJob = runSyncToolWithLargeChunks(uniqueTableName, "--from-time", + Job repairJob = runSyncToolWithChunkSize(uniqueTableName, 1024, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); SyncCountersResult repairCounters = getSyncCounters(repairJob); assertRepairChunkAndMapperCounters(repairCounters, 3, 0, 0, 3, 0, 0); @@ -367,13 +367,13 @@ public void testSyncTableWithConditionalTTLExpiredRowsCompact() throws Exception // // Note: the two runSyncTool calls above each write CHUNK/VERIFIED rows under their own // (from-time, to-time) PK, so the checkpoint table accumulates entries from prior validate - // passes — validateCheckpointEntries can't be applied here against a single counter - // snapshot. Stick with the MISMATCHED-count invariant. + // passes — strict validateCheckpointEntries can't be applied here against a single counter + // snapshot. Use the bounded variant with the repair pass's counters as the lower bound. RepairRunResult result = runSyncToolWithRepair(uniqueTableName); SyncCountersResult repairCounters = getSyncCounters(result.repairJob); assertRowDriftCounters(repairCounters, 0, 0, 0, 0); assertRepairChunkAndMapperCounters(repairCounters, 0, 0, 0, 0, 0, 0); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntriesAtLeast(uniqueTableName, null, repairCounters); } @Test @@ -575,7 +575,7 @@ public void testSyncTableWithTimeRangeFilter() throws Exception { // no MISMATCHED rows are written. Out-of-window drift (IDs 3,5,8,23,25,28) is invisible // to the time-range filter and remains on target by design — full convergence is NOT // expected here, only checkpoint cleanliness for the window we scanned. - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntriesAtLeast(uniqueTableName, null, counters); } @Test @@ -783,7 +783,7 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionMerges() throws Exce // Both runs were non-dry-run, so repair ran inline. Target should converge. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntriesAtLeast(uniqueTableName, null, counters2); } @Test @@ -823,9 +823,11 @@ public void testSyncTableIdempotentOnReRun() throws Exception { checkpointEntriesAfterFirstRun, checkpointEntriesAfterSecondRun); // Both passes were non-dry-run with no drift to begin with; the repair flow ran as a - // no-op, target should still match source and no MISMATCHED rows should exist. + // no-op, target should still match source and no MISMATCHED rows should exist. The + // first run's counters are the source-of-truth bound — counters2 is all zeros because + // the resume filter skipped every chunk. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntriesAtLeast(uniqueTableName, null, counters1); } @Test @@ -841,8 +843,8 @@ public void testSyncTableIdempotentAfterRegionSplits() throws Exception { long toTime = System.currentTimeMillis(); // Run sync tool for the FIRST time (no differences, all chunks verified) - Job job1 = runSyncToolWithLargeChunks(uniqueTableName, "--from-time", String.valueOf(fromTime), - "--to-time", String.valueOf(toTime)); + Job job1 = runSyncToolWithChunkSize(uniqueTableName, 1024, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); SyncCountersResult counters1 = getSyncCounters(job1); // Validate first run: all rows processed, no mismatches @@ -879,8 +881,10 @@ public void testSyncTableIdempotentAfterRegionSplits() throws Exception { checkpointEntriesAfterSecondRun.isEmpty()); // No drift was introduced; repair flow should be a no-op even after concurrent splits. + // Use counters1 as the lower bound — counters2 is all zeros because every chunk was + // already VERIFIED by the first pass. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntriesAtLeast(uniqueTableName, null, counters1); } @Test @@ -1228,9 +1232,12 @@ public void testRepairAllTombstonedTargetRowExtra() throws Exception { "--raw-scan"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - assertRowDriftCounters(getSyncCounters(result.repairJob), 0, 0, 0, 1); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); + assertRowDriftCounters(repairCounters, 0, 0, 0, 1); // Phoenix SELECT already sees both sides as identical (target's row 5 is tombstone-only and // invisible). Divergence is at the raw-cell level only — not asserted via SELECT here. + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); } @Test @@ -1275,13 +1282,9 @@ public void testSyncTableValidateWithOnlyTimestampDifferences() throws Exception validateSyncCounters(counters, 10, 10, 0, 10); assertRowDriftCounters(counters, 0, 0, 10, 0); - // Verify checkpoint entries show mismatches - List checkpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - - long mismatchedCount = countCheckpointsByStatus(checkpointEntries, - PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED); - assertTrue("Should have mismatched entries due to timestamp differences", mismatchedCount > 0); + // Strict checkpoint validation: dry-run-only flow, so the MISMATCHED rows persist and counts + // are pinned to dryRunCounters per (Type × Status) bucket. + validateCheckpointEntries(uniqueTableName, null, counters, null); } @Test @@ -1323,11 +1326,6 @@ public void testSyncTableWithConcurrentRegionMerges() throws Exception { // Validate counters - should process all 100 rows and detect mismatched chunks validateSyncCountersWithMinChunk(counters, 100, 100, 1, 1); - // Verify checkpoint entries were created - List checkpointEntries = - queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - assertFalse("Should have checkpoint entries", checkpointEntries.isEmpty()); - // Run sync again to verify idempotent behavior after merges Job job2 = runSyncToolWithChunkSize(uniqueTableName, 512, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); @@ -1336,10 +1334,8 @@ public void testSyncTableWithConcurrentRegionMerges() throws Exception { // Second run should process ZERO rows (all checkpointed despite region merges) validateSyncCounters(counters2, 0, 0, 0, 0); - // Concurrent merges may leave chunks REPAIRED with stale boundaries; the resume filter - // skips those on a single rerun. Cleanup the checkpoint and run a dry-run + repair pass - // on the stable region layout to converge. - convergeAndAssertIdentical(uniqueTableName, fromTime, toTime); + validateCheckpointEntriesAtLeast(uniqueTableName, null, counters); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); } @Test @@ -1395,10 +1391,10 @@ public void testSyncTableWithPagingTimeout() throws Exception { // between passes and stale MISMATCHED rows from the dry-run can land outside the new // boundary set. cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time", + Job repairJob = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, null, getSyncCounters(repairJob)); } @Test @@ -1467,10 +1463,10 @@ public void testSyncTableWithPagingTimeoutWithSplits() throws Exception { // checkpoint rows first so the resume filter doesn't leave stale MISMATCHED entries that // sit outside the repair pass's chunk boundaries (paging + splits change boundary set). cleanupCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); - runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time", + Job repairJob = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, null, getSyncCounters(repairJob)); } @Test @@ -1531,15 +1527,22 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception { setupStandardTestWithReplication(uniqueTableName, 1, 5); introduceMismatchesByIds(uniqueTableName, Arrays.asList(2, 3, 4)); + // Pin a single time window up front and re-use it for all three runs. The checkpoint PK + // includes TO_TIME, so if the first dry-run defaulted to its own currentTimeMillis() the + // resulting MISMATCHED rows would sit at a different PK from the failing/recovery runs and + // never get overwritten — leaving stale MISMATCHED rows that the post-recovery validation + // would (correctly) flag. + long fromTime = 0L; + long toTime = System.currentTimeMillis(); + // Run once first so the checkpoint table exists; we can only attach a coprocessor to a // table that's already been created. - Job initial = runSyncTool(uniqueTableName, "--dry-run"); + Job initial = runSyncTool(uniqueTableName, "--dry-run", "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); assertTrue(initial.isSuccessful()); String ckpt = PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME; TestUtil.addCoprocessor(sourceConnection, ckpt, CheckpointWriteFailingObserver.class); - long fromTime = 0L; - long toTime = System.currentTimeMillis(); try { // Inline tool invocation — we need exitCode != 0, which runSyncTool would assertion-fail. PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); @@ -1566,7 +1569,7 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception { Job recovery = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); assertTrue(recovery.isSuccessful()); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntriesAtLeast(uniqueTableName, null, getSyncCounters(recovery)); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); } @@ -1615,7 +1618,7 @@ public void testRepairFailedSurfacesCountersAndCheckpoint() throws Exception { Job recovery = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); assertTrue(recovery.isSuccessful()); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntriesAtLeast(uniqueTableName, null, getSyncCounters(recovery)); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); } @@ -1678,6 +1681,12 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception { separatedAfterFailedRun.mappers.size()); assertEquals("Remaining chunk entries should persist after failed run", setup.chunks.size() - setup.chunksToDelete.size(), separatedAfterFailedRun.chunks.size()); + + // Structural invariants only: the test deletes 75% of checkpoint rows mid-flight then runs a + // job that fails before it can write new ones, so no counter-parity check is meaningful here. + // Just assert the surviving rows are well-formed and no MISMATCHED/UNREPAIRABLE/REPAIR_FAILED + // rows leaked through. + validateCheckpointEntriesAtLeast(uniqueTableName, null, null); } /** @@ -1721,9 +1730,10 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception { assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - Counters repairCounters = result.repairJob.getCounters(); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); assertTrue("At least 2 cells should be tombstoned for target's hidden+visible NAME versions", - repairCounters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 2); + repairCounters.cellsExtraOnTarget >= 2); assertTargetName(uniqueTableName, rowId, "alice"); @@ -1741,7 +1751,7 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception { assertEquals("Source's Put@" + sourceTs + " should be mirrored", 1, namePutAtSourceTs); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); } /** @@ -1776,6 +1786,7 @@ public void testRepairPartialShadowWithinRow() throws Exception { assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); SyncCountersResult repairCounters = getSyncCounters(result.repairJob); assertRepairCellCounters(repairCounters, 0, 0, 0, 1); assertTrue("At least one mapper should roll up to UNREPAIRABLE", @@ -1785,6 +1796,7 @@ public void testRepairPartialShadowWithinRow() throws Exception { // Source NAME='alice' but target NAME=null — Phoenix SELECT must still diverge. verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); } /** @@ -1845,10 +1857,11 @@ public void testUnrepairableRecoversAfterMajorCompactionOnTarget() throws Except assertTrue(pass2.dryRunJob.isSuccessful()); assertTrue(pass2.repairJob.isSuccessful()); + SyncCountersResult pass2DryRunCounters = getSyncCounters(pass2.dryRunJob); SyncCountersResult pass2Counters = getSyncCounters(pass2.repairJob); assertRepairCellCounters(pass2Counters, 1, 0, 0, 0); assertTrue(pass2Counters.mappersRepaired >= 1); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, pass2DryRunCounters, pass2Counters); assertTrue(countCheckpointsByStatus( queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null), PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED) >= 1); @@ -2030,14 +2043,12 @@ public void testRepairAcrossMultipleColumnFamilies() throws Exception { "--to-time", String.valueOf(waitUntilWallClockPasses(ts))); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); - Counters c = result.repairJob.getCounters(); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); // CF1.A missing → +1, CF2.B extra → +1; nothing different / unrepairable. - assertTrue("CF1.A must mirror as missing", - c.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue() >= 1); - assertTrue("CF2.B must tombstone as extra", - c.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue() >= 1); - assertEquals("No row should be unrepairable", 0, - c.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue()); + assertTrue("CF1.A must mirror as missing", repairCounters.cellsMissingOnTarget >= 1); + assertTrue("CF2.B must tombstone as extra", repairCounters.cellsExtraOnTarget >= 1); + assertEquals("No row should be unrepairable", 0, repairCounters.rowsCannotRepair); try (PreparedStatement ps = targetConnection.prepareStatement( "SELECT CF1.A, CF2.B FROM " + uniqueTableName + " WHERE ID = ?")) { @@ -2053,7 +2064,7 @@ public void testRepairAcrossMultipleColumnFamilies() throws Exception { assertEquals("Source/target rows must match across both column families", collectMultiCfRows(sourceConnection, uniqueTableName), collectMultiCfRows(targetConnection, uniqueTableName)); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); } private List collectMultiCfRows(Connection conn, String tableName) throws SQLException { @@ -2097,6 +2108,7 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception { String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); SyncCountersResult c = getSyncCounters(result.repairJob); // The empty-key cell mirrors (rowsMissing++); NAME is shadow-suppressed (rowsCannotRepair++). assertRowDriftCounters(c, 1, 0, 0, 1); @@ -2106,6 +2118,7 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception { // NAME shadow leaves target NAME=null while source has 'alice' — Phoenix SELECT diverges. verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c); } /** @@ -2137,6 +2150,7 @@ public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception { String.valueOf(fromTime), "--to-time", String.valueOf(toTime), "--raw-scan"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); SyncCountersResult c = getSyncCounters(result.repairJob); assertRowDriftCounters(c, 0, 0, 0, 1); assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1); @@ -2145,6 +2159,7 @@ public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception { // Source has the row, target's family is shadowed away — Phoenix SELECT must diverge. verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c); } /** @@ -2173,6 +2188,7 @@ public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception { String.valueOf(waitUntilWallClockPasses(sourceTs))); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); SyncCountersResult c = getSyncCounters(result.repairJob); assertRowDriftCounters(c, 0, 0, 0, 1); assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1); @@ -2181,6 +2197,7 @@ public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception { // Source has 'alice', target's row is fully shadowed — Phoenix SELECT must diverge. verifyDataDiverges(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c); } /** @@ -2270,10 +2287,12 @@ public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception { "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); SyncCountersResult c = getSyncCounters(result.repairJob); assertRepairCellCounters(c, 0, 0, 0, 1); assertRowDriftCounters(c, 0, 0, 0, 1); assertTrue("At least one mapper should roll up to UNREPAIRABLE", c.mappersUnrepairable >= 1); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c); } /** @@ -2485,13 +2504,14 @@ private void runMirrorSourceTombstoneTest(SourceTombstone subtype) throws Except assertTrue(result.dryRunJob.isSuccessful()); assertTrue(result.repairJob.isSuccessful()); - Counters c = result.repairJob.getCounters(); + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); // The source-only Delete cell is missing on target → mirror it. DeleteFamily mirroring // also covers the empty-key sentinel cell, hence the >= 1 assertion shape. assertTrue("source tombstone must mirror as a missing cell on target", - c.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue() >= 1); - assertTrue(c.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue() >= 1); - assertNoMismatchedCheckpoints(uniqueTableName, null); + repairCounters.cellsMissingOnTarget >= 1); + assertTrue(repairCounters.mappersRepaired >= 1); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); try (PreparedStatement ps = targetConnection.prepareStatement( "SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) { @@ -2552,16 +2572,15 @@ public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception { String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); assertTrue("Repair should succeed", repairJob.isSuccessful()); - Counters c = repairJob.getCounters(); + SyncCountersResult repairCounters = getSyncCounters(repairJob); assertTrue("All source-only rows should be marked missing", - c.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue() >= sourceOnly.length); + repairCounters.rowsMissingOnTarget >= sourceOnly.length); assertTrue("All target-only rows should be marked extra", - c.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue() >= targetOnly.length); - assertEquals("No chunk should fail repair", 0, - c.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue()); + repairCounters.rowsExtraOnTarget >= targetOnly.length); + assertEquals("No chunk should fail repair", 0, repairCounters.chunksRepairFailed); - // Convergence pass on a clean checkpoint to confirm no chunks remain mismatched. - convergeAndAssertIdentical(uniqueTableName, fromTime, toTime); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); } @Test @@ -2629,7 +2648,10 @@ public void testSyncTableWithDeleteAndCompactionOnSource() throws Exception { validateMapperCountersRepair(counters3, 3, 1, 0, 0); // The standard Phoenix view (no raw-scan) on both clusters remains identical. + // Three separate runs each landed under their own (from-time, to-time) PK, so checkpoint + // state accumulates — use the bounded validator (mirrors the OnTarget sibling). verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntriesAtLeast(uniqueTableName, null, counters3); } @Test @@ -2684,9 +2706,10 @@ public void testSyncTableWithDeleteAndCompactionOnTarget() throws Exception { validateMapperCounters(counters3, 4, 0); // After major compaction tombstones are gone; the third raw-scan pass is clean and the - // standard Phoenix view matches. + // standard Phoenix view matches. Three separate runs each landed under their own + // (from-time, to-time) PK, so checkpoint state accumulates — use the bounded validator. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntriesAtLeast(uniqueTableName, null, counters3); } @Test @@ -2732,7 +2755,10 @@ public void testSyncTableWithMultipleVersionAndCompactionOnSource() throws Excep validateMapperCountersRepair(counters3, 3, 1, 0, 0); // The standard Phoenix view (latest version only) on both clusters is identical. + // Two separate runs each landed under their own (from-time, to-time) PK, so checkpoint + // state accumulates — use the bounded validator (mirrors the OnTarget sibling). verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + validateCheckpointEntriesAtLeast(uniqueTableName, null, counters3); } @Test @@ -2781,9 +2807,11 @@ public void testSyncTableWithMultipleVersionAndCompactionOnTarget() throws Excep validateSyncCounters(counters3, 10, 10, 10, 0); validateMapperCounters(counters3, 4, 0); - // After repair the standard Phoenix view matches. + // After repair the standard Phoenix view matches. Three separate runs each landed under + // their own (from-time, to-time) PK, so checkpoint state accumulates across all three — + // use the bounded validator with the latest run's counters as the lower bound. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntriesAtLeast(uniqueTableName, null, counters3); } @Test @@ -2885,7 +2913,7 @@ public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exc // Run was non-dry-run with no drift; repair flow is a no-op and target should match source // even though toTime is older than max lookback age. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - assertNoMismatchedCheckpoints(uniqueTableName, null); + validateCheckpointEntries(uniqueTableName, null, null, counters); } /** @@ -3838,15 +3866,6 @@ private long captureBaselineChunkCount(String tableName, int chunkSize) throws E return chunkCount; } - /** - * Runs the PhoenixSyncTableTool with 1KB chunk size for testing multiple rows per chunk. Returns - * the completed Job for counter verification. - */ - private Job runSyncToolWithLargeChunks(String tableName, String... additionalArgs) - throws Exception { - return runSyncToolWithChunkSize(tableName, 1024, additionalArgs); - } - /** * Runs the PhoenixSyncTableTool with specified chunk size. Returns the completed Job for counter * verification. @@ -3875,46 +3894,29 @@ private static class RepairRunResult { } } + /** Default chunk size (1 byte = one row per chunk) for {@link #runSyncToolWithRepair}. */ + private RepairRunResult runSyncToolWithRepair(String tableName, String... additionalArgs) + throws Exception { + return runSyncToolWithRepair(tableName, 1, additionalArgs); + } + /** * Runs the sync tool twice with the SAME pinned time window: first as a --dry-run to detect * mismatches, then as a repair pass (no --dry-run) so the repair run rewrites the MISMATCHED * checkpoint rows in place. The shared window is mandatory because the checkpoint PK is * (TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY) — without * pinning, each invocation would fall through to System.currentTimeMillis() and the repair - * pass would create fresh rows instead of overwriting the dry-run pass's output. - * - *

    If the caller does not provide --from-time / --to-time, defaults of 0 / now are pinned. - * - *

    Default chunk size is 1 byte (one row per chunk) to mirror {@link #runSyncTool}. - */ - private RepairRunResult runSyncToolWithRepair(String tableName, String... additionalArgs) - throws Exception { - return runSyncToolWithRepairAndChunkSize(tableName, 1, additionalArgs); - } - - /** - * Same as {@link #runSyncToolWithRepair} but uses 1KB chunks (multiple rows per chunk). + * pass would create fresh rows instead of overwriting the dry-run pass's output. If the caller + * does not provide --from-time / --to-time, defaults of 0 / now are pinned. */ - private RepairRunResult runSyncToolWithRepairLargeChunks(String tableName, - String... additionalArgs) throws Exception { - return runSyncToolWithRepairAndChunkSize(tableName, 1024, additionalArgs); - } - - /** - * Same as {@link #runSyncToolWithRepair} but with an explicit chunk size. - */ - private RepairRunResult runSyncToolWithRepairAndChunkSize(String tableName, int chunkSize, + private RepairRunResult runSyncToolWithRepair(String tableName, int chunkSize, String... additionalArgs) throws Exception { long fromTime = parseLongFlag(additionalArgs, "--from-time", 0L); long toTime = parseLongFlag(additionalArgs, "--to-time", System.currentTimeMillis()); String[] pinnedArgs = ensureTimeArgs(additionalArgs, fromTime, toTime); - // First run: --dry-run, only detect mismatches. String[] dryRunArgs = appendArg(pinnedArgs, "--dry-run"); Job dryRunJob = runSyncToolWithChunkSize(tableName, chunkSize, dryRunArgs); - - // Second run: no --dry-run. Same time window so the checkpoint PK matches and any - // CHUNK/MISMATCHED rows from the dry-run pass are overwritten by CHUNK/REPAIRED. Job repairJob = runSyncToolWithChunkSize(tableName, chunkSize, pinnedArgs); return new RepairRunResult(dryRunJob, repairJob, fromTime, toTime); @@ -3965,27 +3967,6 @@ private static String[] appendArg(String[] args, String newArg) { return result; } - /** - * After a clean repair pass, asserts no checkpoint rows remain in any non-terminal or - * stuck state — MISMATCHED, UNREPAIRABLE, or REPAIR_FAILED. Tests that legitimately leave - * UNREPAIRABLE/REPAIR_FAILED rows should not call this helper. - */ - private void assertNoMismatchedCheckpoints(String tableName, String tenantId) - throws SQLException { - List entries = - queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId); - String ctx = " (table=" + tableName + " tenant=" + tenantId + ")"; - assertEquals("MISMATCHED rows must not remain after repair" + ctx, 0, - countCheckpointsByStatus(entries, - PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED)); - assertEquals("UNREPAIRABLE rows must not remain after repair" + ctx, 0, - countCheckpointsByStatus(entries, - PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE)); - assertEquals("REPAIR_FAILED rows must not remain after repair" + ctx, 0, - countCheckpointsByStatus(entries, - PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)); - } - /** * Counts checkpoint entries (both REGION and CHUNK rows) in the given status. Replaces the * ad-hoc {@code for (entry : entries) if (status.equals(entry.getStatus())) count++} loops @@ -4413,9 +4394,12 @@ private void assertTargetRowAbsent(String tableName, int id) throws SQLException */ private void convergeAndAssertIdentical(String tableName, long fromTime, long toTime) throws Exception { - runSyncToolWithRepair(tableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(toTime)); - assertNoMismatchedCheckpoints(tableName, null); + RepairRunResult result = runSyncToolWithRepair(tableName, "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + // Prior checkpoint state may already cover some chunks under earlier (fromTime, toTime) + // PKs; strict equality won't hold, so use the bounded variant with the repair pass's + // counters as the lower bound. + validateCheckpointEntriesAtLeast(tableName, null, getSyncCounters(result.repairJob)); verifyDataIdentical(sourceConnection, targetConnection, tableName); } @@ -4623,35 +4607,43 @@ private void validateSyncCountersWithMinChunk(SyncCountersResult counters, counters.chunksMismatched >= minChunksMismatched); } + /** Per-row structural invariants shared by both checkpoint validators. */ + private void validateCheckpointRowStructure(PhoenixSyncTableCheckpointOutputRow entry, + String tableName, String tenantId, String ctx) { + assertEquals("TABLE_NAME should match" + ctx, tableName, entry.getTableName()); + assertEquals("TARGET_CLUSTER should match" + ctx, targetZkQuorum, entry.getTargetCluster()); + assertNotNull("TYPE should not be null" + ctx, entry.getType()); + assertTrue("TYPE should be REGION or CHUNK" + ctx, + PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType()) + || PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())); + + if (tenantId == null) { + assertNull("TENANT_ID should be null for non-multi-tenant tables" + ctx, + entry.getTenantId()); + } else { + assertEquals("TENANT_ID should match" + ctx, tenantId, entry.getTenantId()); + } + + assertTrue("FROM_TIME should be >= 0" + ctx, entry.getFromTime() >= 0); + assertTrue("TO_TIME should be > FROM_TIME" + ctx, entry.getToTime() > entry.getFromTime()); + + assertNotNull("EXECUTION_START_TIME should not be null" + ctx, + entry.getExecutionStartTime()); + assertNotNull("EXECUTION_END_TIME should not be null" + ctx, entry.getExecutionEndTime()); + assertTrue("EXECUTION_END_TIME should be >= EXECUTION_START_TIME" + ctx, + entry.getExecutionEndTime().getTime() >= entry.getExecutionStartTime().getTime()); + + assertNotNull("STATUS should not be null" + ctx, entry.getStatus()); + if (PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())) { + assertNotNull("COUNTERS should not be null for CHUNK entries" + ctx, entry.getCounters()); + } + } + /** - * Aggressive end-to-end checkpoint-table validation. Queries the checkpoint table itself, then: - *

      - *
    1. Validates per-row structural integrity — PK columns non-null, TYPE ∈ {REGION, CHUNK}, - * TENANT_ID matches, time-range invariants (FROM_TIME ≥ 0, TO_TIME > FROM_TIME), execution - * timestamps non-null with END ≥ START, STATUS non-null, COUNTERS non-null on CHUNK rows. - *
    2. - *
    3. Pins per-(Type × Status) counts derived from the supplied counter objects: CHUNK and - * REGION crossed with VERIFIED / REPAIRED / UNREPAIRABLE / REPAIR_FAILED / MISMATCHED. - *
    4. - *
    5. Cross-checks REGION-row sourceRowsProcessed/targetRowsProcessed totals against the - * repair-pass counters (or dry-run counters when no repair ran).
    6. - *
    - *

    - * The two counter parameters select the validation mode: - *

      - *
    • {@code dryRunCounters != null && repairCounters == null} — dry-run-only test. Expects - * MISMATCHED rows to remain.
    • - *
    • {@code dryRunCounters == null && repairCounters != null} — combined verify+repair pass - * (no separate dry-run). VERIFIED count comes from {@code repairCounters}; MISMATCHED == 0. - *
    • - *
    • {@code dryRunCounters != null && repairCounters != null} — separate dry-run + repair. - * VERIFIED count comes from {@code dryRunCounters} (the repair pass's resume filter skips - * them); REPAIRED/UNREPAIRABLE/REPAIR_FAILED come from {@code repairCounters}; - * MISMATCHED == 0.
    • - *
    - * Use after asserting counters and before declaring success — pins the persisted checkpoint - * state to the in-memory counter state, so a regression that reports the right counters but - * writes the wrong checkpoint rows (or vice versa) fails loudly. + * Strict checkpoint-table validation: pins per-(Type × Status) counts to the supplied counter + * objects. VERIFIED comes from {@code dryRunCounters} when present, otherwise from + * {@code repairCounters}; REPAIRED/UNREPAIRABLE/REPAIR_FAILED come from {@code repairCounters} + * when present; MISMATCHED comes from {@code dryRunCounters} only on dry-run-only flows. */ private void validateCheckpointEntries(String tableName, String tenantId, SyncCountersResult dryRunCounters, SyncCountersResult repairCounters) throws SQLException { @@ -4663,48 +4655,21 @@ private void validateCheckpointEntries(String tableName, String tenantId, queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId); String ctx = " (table=" + tableName + " tenant=" + tenantId + ")"; - // Per-row structural validation + REGION row-processing totals. long mapperRegionCount = 0; long chunkCount = 0; long sourceRowsProcessed = 0; long targetRowsProcessed = 0; for (PhoenixSyncTableCheckpointOutputRow entry : entries) { - assertEquals("TABLE_NAME should match" + ctx, tableName, entry.getTableName()); - assertEquals("TARGET_CLUSTER should match" + ctx, targetZkQuorum, entry.getTargetCluster()); - assertNotNull("TYPE should not be null" + ctx, entry.getType()); - assertTrue("TYPE should be REGION or CHUNK" + ctx, - PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType()) - || PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())); - - if (tenantId == null) { - assertNull("TENANT_ID should be null for non-multi-tenant tables" + ctx, - entry.getTenantId()); - } else { - assertEquals("TENANT_ID should match" + ctx, tenantId, entry.getTenantId()); - } - - assertTrue("FROM_TIME should be >= 0" + ctx, entry.getFromTime() >= 0); - assertTrue("TO_TIME should be > FROM_TIME" + ctx, entry.getToTime() > entry.getFromTime()); - - assertNotNull("EXECUTION_START_TIME should not be null" + ctx, - entry.getExecutionStartTime()); - assertNotNull("EXECUTION_END_TIME should not be null" + ctx, entry.getExecutionEndTime()); - assertTrue("EXECUTION_END_TIME should be >= EXECUTION_START_TIME" + ctx, - entry.getExecutionEndTime().getTime() >= entry.getExecutionStartTime().getTime()); - - assertNotNull("STATUS should not be null" + ctx, entry.getStatus()); - + validateCheckpointRowStructure(entry, tableName, tenantId, ctx); if (PhoenixSyncTableCheckpointOutputRow.Type.REGION.equals(entry.getType())) { mapperRegionCount++; sourceRowsProcessed += entry.getSourceRowsProcessed(); targetRowsProcessed += entry.getTargetRowsProcessed(); } else { chunkCount++; - assertNotNull("COUNTERS should not be null for CHUNK entries" + ctx, entry.getCounters()); } } - // Per-(Type × Status) counts derived from counter objects. boolean isDryRunOnly = (repairCounters == null); SyncCountersResult verifiedSource = (dryRunCounters != null ? dryRunCounters : repairCounters); long expectedChunkVerified = verifiedSource.chunksVerified; @@ -4750,8 +4715,6 @@ private void validateCheckpointEntries(String tableName, String tenantId, countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION, PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED)); - // Aggregate type counts (sanity — sum of per-status counts across all five statuses must - // equal what the loop above tallied). long expectedChunkTotal = expectedChunkVerified + expectedChunkRepaired + expectedChunkUnrepairable + expectedChunkRepairFailed + expectedChunkMismatched; long expectedRegionTotal = expectedRegionVerified + expectedRegionRepaired @@ -4759,14 +4722,8 @@ private void validateCheckpointEntries(String tableName, String tenantId, assertEquals("Total CHUNK checkpoint rows" + ctx, expectedChunkTotal, chunkCount); assertEquals("Total REGION checkpoint rows" + ctx, expectedRegionTotal, mapperRegionCount); - // Row-processing totals: REGION rows persist per-mapper sourceRowsProcessed / - // targetRowsProcessed; their sum equals the job-level counter only when no REGION row was - // overwritten by a subsequent repair pass. The checkpoint PK excludes IS_DRY_RUN, so a repair - // pass UPSERTs the REGION row at the dry-run row's PK; for partially-mismatched regions - // repair re-processes only the gap (non-VERIFIED) chunks and writes a smaller delta, so the - // sum across REGION rows no longer matches the dry-run job-level total. Only assert the - // strict equality when (a) no repair pass was run, or (b) the repair pass touched no - // regions (everything was already VERIFIED so dry-run rows stay intact). + // Repair UPSERTs REGION rows at the dry-run PK with a smaller (gap-only) delta, so totals + // only match when no REGION was actually re-processed. boolean repairTouchedRegions = repairCounters != null && (repairCounters.mappersRepaired + repairCounters.mappersUnrepairable + repairCounters.mappersRepairFailed) > 0; @@ -4778,6 +4735,60 @@ private void validateCheckpointEntries(String tableName, String tenantId, } } + /** + * Bounded variant of {@link #validateCheckpointEntries} for tests where checkpoint rows + * accumulate across multiple PKs (multi-window, partial reruns under split/merge, recovery). + * Asserts no MISMATCHED/UNREPAIRABLE/REPAIR_FAILED remain and that VERIFIED+REPAIRED counts + * are at least {@code latestCounters}'s verified+repaired totals (or just the status invariants + * when {@code latestCounters} is null). + */ + private void validateCheckpointEntriesAtLeast(String tableName, String tenantId, + SyncCountersResult latestCounters) throws SQLException { + List entries = + queryCheckpointTable(sourceConnection, tableName, targetZkQuorum, tenantId); + String ctx = " (table=" + tableName + " tenant=" + tenantId + ")"; + + for (PhoenixSyncTableCheckpointOutputRow entry : entries) { + validateCheckpointRowStructure(entry, tableName, tenantId, ctx); + } + + assertEquals("MISMATCHED rows must not remain after repair" + ctx, 0, + countCheckpointsByStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED)); + assertEquals("UNREPAIRABLE rows must not remain after repair" + ctx, 0, + countCheckpointsByStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE)); + assertEquals("REPAIR_FAILED rows must not remain after repair" + ctx, 0, + countCheckpointsByStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)); + + if (latestCounters != null) { + long chunkVerifiedPlusRepaired = + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, + PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED) + + countCheckpointsByTypeAndStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED); + long regionVerifiedPlusRepaired = + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION, + PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED) + + countCheckpointsByTypeAndStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Type.REGION, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED); + long expectedChunkLowerBound = latestCounters.chunksVerified + latestCounters.chunksRepaired; + long expectedRegionLowerBound = + latestCounters.mappersVerified + latestCounters.mappersRepaired; + assertTrue( + "CHUNK VERIFIED+REPAIRED >= latest counters" + ctx + " (actual=" + + chunkVerifiedPlusRepaired + ", expected>=" + expectedChunkLowerBound + ")", + chunkVerifiedPlusRepaired >= expectedChunkLowerBound); + assertTrue( + "REGION VERIFIED+REPAIRED >= latest counters" + ctx + " (actual=" + + regionVerifiedPlusRepaired + ", expected>=" + expectedRegionLowerBound + ")", + regionVerifiedPlusRepaired >= expectedRegionLowerBound); + } + } + /** * Data class to hold test table data */ From cdaf521b24cba807ae92bbc1575116969c7e9e3c Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Mon, 8 Jun 2026 22:00:05 +0530 Subject: [PATCH 14/18] ready for review --- .../phoenix/jdbc/PhoenixEmbeddedDriver.java | 18 +- .../PhoenixSyncTableCheckpointOutputRow.java | 19 +- .../PhoenixSyncTableChunkRepairer.java | 331 ++++---- .../PhoenixSyncTableInputFormat.java | 13 +- .../mapreduce/PhoenixSyncTableMapper.java | 78 +- .../mapreduce/PhoenixSyncTableTool.java | 42 +- .../end2end/PhoenixSyncTableToolIT.java | 758 ++++++++---------- .../ConnectionQueryServicesMetricsIT.java | 1 - .../PhoenixSyncTableOutputRepositoryTest.java | 64 +- 9 files changed, 632 insertions(+), 692 deletions(-) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java index 524789464b4..b5bbe2ea552 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/jdbc/PhoenixEmbeddedDriver.java @@ -50,13 +50,13 @@ public abstract class PhoenixEmbeddedDriver implements Driver, SQLCloseable { private final static String DNC_JDBC_PROTOCOL_SUFFIX = "//"; private final static String DRIVER_NAME = "PhoenixEmbeddedDriver"; private static final String TEST_URL_AT_END = - "" + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR + PHOENIX_TEST_DRIVER_URL_PARAM; + "" + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR + PHOENIX_TEST_DRIVER_URL_PARAM; private static final String TEST_URL_IN_MIDDLE = - TEST_URL_AT_END + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR; + TEST_URL_AT_END + PhoenixRuntime.JDBC_PROTOCOL_TERMINATOR; private static final String[] SUPPORTED_PROTOCOLS = - new String[] { PhoenixRuntime.JDBC_PROTOCOL, PhoenixRuntime.JDBC_PROTOCOL_ZK, - PhoenixRuntime.JDBC_PROTOCOL_MASTER, PhoenixRuntime.JDBC_PROTOCOL_RPC }; + new String[] { PhoenixRuntime.JDBC_PROTOCOL, PhoenixRuntime.JDBC_PROTOCOL_ZK, + PhoenixRuntime.JDBC_PROTOCOL_MASTER, PhoenixRuntime.JDBC_PROTOCOL_RPC }; private final static DriverPropertyInfo[] EMPTY_INFO = new DriverPropertyInfo[0]; public final static String MAJOR_VERSION_PROP = "DriverMajorVersion"; @@ -64,9 +64,9 @@ public abstract class PhoenixEmbeddedDriver implements Driver, SQLCloseable { public final static String DRIVER_NAME_PROP = "DriverName"; public static final ReadOnlyProps DEFAULT_PROPS = - new ReadOnlyProps(ImmutableMap.of(MAJOR_VERSION_PROP, - Integer.toString(MetaDataProtocol.PHOENIX_MAJOR_VERSION), MINOR_VERSION_PROP, - Integer.toString(MetaDataProtocol.PHOENIX_MINOR_VERSION), DRIVER_NAME_PROP, DRIVER_NAME)); + new ReadOnlyProps(ImmutableMap.of(MAJOR_VERSION_PROP, + Integer.toString(MetaDataProtocol.PHOENIX_MAJOR_VERSION), MINOR_VERSION_PROP, + Integer.toString(MetaDataProtocol.PHOENIX_MINOR_VERSION), DRIVER_NAME_PROP, DRIVER_NAME)); PhoenixEmbeddedDriver() { } @@ -139,7 +139,7 @@ protected final Connection createConnection(String url, Properties info) throws } else { // If empty HA group is returned, fall back to single cluster. url = HighAvailabilityGroup.getFallbackCluster(url, info).orElseThrow( - () -> new SQLException("HA group can not be initialized, fallback to single cluster")); + () -> new SQLException("HA group can not be initialized, fallback to single cluster")); } } ConnectionQueryServices cqs = getConnectionQueryServices(url, augmentedInfo); @@ -155,7 +155,7 @@ protected final Connection createConnection(String url, Properties info) throws * @return new or cached QuerySerices used to establish a new Connection. */ protected abstract ConnectionQueryServices getConnectionQueryServices(String url, Properties info) - throws SQLException; + throws SQLException; @Override public int getMajorVersion() { diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java index ef3abbbeb55..3603294988d 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableCheckpointOutputRow.java @@ -167,10 +167,10 @@ public static class CounterFormatter { "%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d"; /** - * Formats chunk counters as comma-separated key=value pairs. Always emits all nine - * counters; unpopulated counters are 0 so operators querying the checkpoint table see - * a uniform format. {@code ROWS_DIFFERENT_ON_TARGET} is populated only in dry-run; - * cell-level counters and {@code ROWS_CANNOT_REPAIR} are populated only in repair mode. + * Formats chunk counters as comma-separated key=value pairs. Always emits all nine counters; + * unpopulated counters are 0 so operators querying the checkpoint table see a uniform format. + * {@code ROWS_DIFFERENT_ON_TARGET} is populated only in dry-run; cell-level counters and + * {@code ROWS_CANNOT_REPAIR} are populated only in repair mode. */ public static String formatChunk(long sourceRows, long targetRows, long rowsMissingOnTarget, long rowsExtraOnTarget, long rowsDifferentOnTarget, long rowsCannotRepair, @@ -189,14 +189,13 @@ public static String formatChunk(long sourceRows, long targetRows, long rowsMiss } /** - * Formats mapper (region-level) counters as comma-separated key=value pairs. The seven - * drift counters are the per-region sum of the same fields emitted by - * {@link #formatChunk}. + * Formats mapper (region-level) counters as comma-separated key=value pairs. The seven drift + * counters are the per-region sum of the same fields emitted by {@link #formatChunk}. */ public static String formatMapper(long chunksVerified, long chunksMismatched, long sourceRows, - long targetRows, long rowsMissingOnTarget, long rowsExtraOnTarget, - long rowsDifferentOnTarget, long rowsCannotRepair, long cellsMissingOnTarget, - long cellsExtraOnTarget, long cellsDifferentOnTarget) { + long targetRows, long rowsMissingOnTarget, long rowsExtraOnTarget, long rowsDifferentOnTarget, + long rowsCannotRepair, long cellsMissingOnTarget, long cellsExtraOnTarget, + long cellsDifferentOnTarget) { return String.format(FORMAT_MAPPER, PhoenixSyncTableMapper.SyncCounters.CHUNKS_VERIFIED.name(), chunksVerified, PhoenixSyncTableMapper.SyncCounters.CHUNKS_MISMATCHED.name(), chunksMismatched, diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java index 3bda5364696..24deb8e7e85 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairer.java @@ -53,33 +53,31 @@ import org.slf4j.LoggerFactory; /** - * Performs row-level repair for a mismatched chunk by merge-scanning source and target - * cluster data and applying targeted mutations to target. - * - *

    The two scan ranges may differ: the verifier reads target over a wider range than - * source (covers extra-on-target rows that fall between consecutive source chunks); - * repair must mirror the same boundaries so those extras are visible here as - * {@code cmp > 0} rows and get deleted. - * - *

    Merge-scan contract: both scanners return rows in ascending key order (HBase guarantee). + * Performs row-level repair for a mismatched chunk by merge-scanning source and target cluster data + * and applying targeted mutations to target. + *

    + * The two scan ranges may differ: the verifier reads target over a wider range than source (covers + * extra-on-target rows that fall between consecutive source chunks); repair must mirror the same + * boundaries so those extras are visible here as {@code cmp > 0} rows and get deleted. + *

    + * Merge-scan contract: both scanners return rows in ascending key order (HBase guarantee). *

      - *
    • {@code cmp == 0} (same row): compare cells; repair only differing cells.
    • - *
    • {@code cmp < 0} (source-only): mirror all source cells onto target.
    • - *
    • {@code cmp > 0} (target-only): tombstone target cells within {@code [fromTime, toTime]}.
    • + *
    • {@code cmp == 0} (same row): compare cells; repair only differing cells.
    • + *
    • {@code cmp < 0} (source-only): mirror all source cells onto target.
    • + *
    • {@code cmp > 0} (target-only): tombstone target cells within + * {@code [fromTime, toTime]}.
    • *
    * Cells outside {@code [fromTime, toTime]} are never read (scan time range), so never mutated. - * - *

    Tombstone semantics: HBase has four tombstone subtypes ({@code Delete}, - * {@code DeleteColumn}, {@code DeleteFamily}, {@code DeleteFamilyVersion}). Source Puts - * we mirror onto target may be silently shadowed by an existing target tombstone; in that - * case the mirror is suppressed and the row carries unrepairable drift (operator must - * major-compact target to reap shadowing tombstones before a re-run can converge). See - * {@link TargetRowRecord}. + *

    + * Tombstone semantics: HBase has four tombstone subtypes ({@code Delete}, {@code DeleteColumn}, + * {@code DeleteFamily}, {@code DeleteFamilyVersion}). Source Puts we mirror onto target may be + * silently shadowed by an existing target tombstone; in that case the mirror is suppressed and the + * row carries unrepairable drift (operator must major-compact target to reap shadowing tombstones + * before a re-run can converge). See {@link TargetRowRecord}. */ public final class PhoenixSyncTableChunkRepairer { - private static final Logger LOGGER = - LoggerFactory.getLogger(PhoenixSyncTableChunkRepairer.class); + private static final Logger LOGGER = LoggerFactory.getLogger(PhoenixSyncTableChunkRepairer.class); private final Connection sourceConnection; private final Connection targetConnection; @@ -108,11 +106,11 @@ public PhoenixSyncTableChunkRepairer(Connection sourceConnection, Connection tar } /** - * Repairs one mismatched chunk. Returns a {@link ChunkRepairResult} carrying the - * terminal status and accumulated {@link DriftCounters}; never throws on per-chunk - * scan/flush failure (returns {@link ChunkRepairResult.Status#REPAIR_FAILED}). The - * only declared {@link SQLException} surfaces from {@link Connection#unwrap}, which - * indicates a misconfigured connection rather than a per-chunk fault. + * Repairs one mismatched chunk. Returns a {@link ChunkRepairResult} carrying the terminal status + * and accumulated {@link DriftCounters}; never throws on per-chunk scan/flush failure (returns + * {@link ChunkRepairResult.Status#REPAIR_FAILED}). The only declared {@link SQLException} + * surfaces from {@link Connection#unwrap}, which indicates a misconfigured connection rather than + * a per-chunk fault. */ public ChunkRepairResult repair(ChunkRepairRequest req, Progressable progress) throws SQLException { @@ -159,16 +157,16 @@ public ChunkRepairResult repair(ChunkRepairRequest req, Progressable progress) ChunkRepairResult result = ChunkRepairResult.completed(drift); LOGGER.info("Completed repair for chunk source=[{}, {}] with status={}: {}", - Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd), - result.status, drift.toLogString()); + Bytes.toStringBinary(req.sourceStart), Bytes.toStringBinary(req.sourceEnd), result.status, + drift.toLogString()); return result; } /** - * Dry-run merge-walk: bumps the three row-level drift counters and logs each diverged row; - * never touches target. {@code rowsDifferentOnTarget} flags rows present on both sides whose - * contents differ — verifier-only signal, not produced in repair mode (which reports cell - * granularity instead). + * Dry-run merge-walk: bumps the three row-level drift counters and logs each diverged row; never + * touches target. {@code rowsDifferentOnTarget} flags rows present on both sides whose contents + * differ — verifier-only signal, not produced in repair mode (which reports cell granularity + * instead). */ private void walkAndCountDrift(ResultScanner sourceScanner, ResultScanner targetScanner, DriftCounters drift, Progressable progress) throws IOException { @@ -207,14 +205,12 @@ private void walkAndCountDrift(ResultScanner sourceScanner, ResultScanner target * each time the batch reaches {@link #repairBatchSize}, and finally draining the tail. Per * branch: *

      - *
    • {@code cmp == 0} — diff cells; record cell-level drift and any row-unrepairable - * flag.
    • - *
    • {@code cmp < 0} — mirror the source row onto target; bump {@code rowsMissing} unless - * the whole row was shadowed, and {@code rowsCannotRepair} unless every cell was - * mirrored.
    • - *
    • {@code cmp > 0} — tombstone the extra row on target; bump {@code rowsExtra} when at - * least one live cell was tombstoned, else {@code rowsCannotRepair} (row was already - * all tombstones).
    • + *
    • {@code cmp == 0} — diff cells; record cell-level drift and any row-unrepairable flag.
    • + *
    • {@code cmp < 0} — mirror the source row onto target; bump {@code rowsMissing} unless the + * whole row was shadowed, and {@code rowsCannotRepair} unless every cell was mirrored.
    • + *
    • {@code cmp > 0} — tombstone the extra row on target; bump {@code rowsExtra} when at least + * one live cell was tombstoned, else {@code rowsCannotRepair} (row was already all + * tombstones).
    • *
    */ private void repairDiffRows(ResultScanner sourceScanner, ResultScanner targetScanner, @@ -295,9 +291,9 @@ private static int compareRowKeys(Result sourceResult, Result targetResult) { /** * Whole-row content equality check used by dry-run row-level diffing. Delegates to - * {@link Result#compareResults(Result, Result, boolean)} which throws on any cell-level - * mismatch (family, qualifier, timestamp, type, value); we map the throw to {@code false} - * so the cmp==0 path can flag the row without producing repair mutations. + * {@link Result#compareResults(Result, Result, boolean)} which throws on any cell-level mismatch + * (family, qualifier, timestamp, type, value); we map the throw to {@code false} so the cmp==0 + * path can flag the row without producing repair mutations. */ private boolean rowsEqual(Result src, Result tgt) { try { @@ -309,8 +305,8 @@ private boolean rowsEqual(Result src, Result tgt) { } /** - * Mirrors every source cell of a row that is missing on target. Each cell is - * shadow-checked against target's per-row record (see {@link TargetRowRecord}). + * Mirrors every source cell of a row that is missing on target. Each cell is shadow-checked + * against target's per-row record (see {@link TargetRowRecord}). */ private RowMirrorStatus mirrorWholeRow(Result sourceResult, Table targetHTable, List pendingPuts, List pendingDeletes) throws IOException { @@ -325,17 +321,16 @@ private RowMirrorStatus mirrorWholeRow(Result sourceResult, Table targetHTable, if (mirrored == 0) { return RowMirrorStatus.FULLY_SHADOWED; } - return rowRepairBuffer.anyCellUnrepairable ? RowMirrorStatus.PARTIALLY_MIRRORED + return rowRepairBuffer.anyCellUnrepairable + ? RowMirrorStatus.PARTIALLY_MIRRORED : RowMirrorStatus.FULLY_MIRRORED; } /** - * Tombstones every live cell of a row that is extra on target. Skips cells that are - * themselves already tombstones (see {@link #tombstoneTargetCell}). - * - * @return the number of live cells that contributed a tombstone marker. {@code 0} means - * the row was already entirely tombstones; the caller records this as - * {@code ROWS_CANNOT_REPAIR}. + * Tombstones every live cell of a row that is extra on target. Skips cells that are themselves + * already tombstones (see {@link #tombstoneTargetCell}). + * @return the number of live cells that contributed a tombstone marker. {@code 0} means the row + * was already entirely tombstones; the caller records this as {@code ROWS_CANNOT_REPAIR}. */ private int tombstoneWholeRow(Result targetResult, Table targetHTable, List pendingPuts, List pendingDeletes) throws IOException { @@ -356,17 +351,17 @@ private int tombstoneWholeRow(Result targetResult, Table targetHTable, List /** * Diffs cells of two rows present on both clusters in lock-step using {@link CellComparator} * order and emits {@link Put}/{@link Delete} mutations. - * - *

    Branches: + *

    + * Branches: *

      - *
    • same coords + matching value → no drift
    • - *
    • same coords + different value → different++; mirror source cell (shadow-checked)
    • - *
    • source-only cell → missing++; mirror source cell (shadow-checked)
    • - *
    • target-only live cell → extra++; tombstone target cell
    • - *
    • target-only tombstone cell → skip; row carries unrepairable drift
    • + *
    • same coords + matching value → no drift
    • + *
    • same coords + different value → different++; mirror source cell (shadow-checked)
    • + *
    • source-only cell → missing++; mirror source cell (shadow-checked)
    • + *
    • target-only live cell → extra++; tombstone target cell
    • + *
    • target-only tombstone cell → skip; row carries unrepairable drift
    • *
    - * Mirrors suppressed by shadowing do NOT bump the cell counter (nothing was written); - * the row-level signal flows through {@link RowDriftInfo#rowCannotRepair}. + * Mirrors suppressed by shadowing do NOT bump the cell counter (nothing was written); the + * row-level signal flows through {@link RowDriftInfo#rowCannotRepair}. */ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result targetResult, Table targetHTable, List pendingPuts, List pendingDeletes) throws IOException { @@ -399,7 +394,9 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta if (cmp == 0) { // Same coordinates, compare values. if (!CellUtil.matchingValue(sourceCells[sourceIdx], targetCells[targetIdx])) { - if (mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowRepairBuffer)) { + if ( + mirrorSourceCellUnlessShadowed(sourceCells[sourceIdx], targetHTable, rowRepairBuffer) + ) { cellDifferent++; } } @@ -413,8 +410,10 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta sourceIdx++; } else { // extra on target - if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer, - sourceMaxTsByColumn)) { + if ( + tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer, + sourceMaxTsByColumn) + ) { cellExtra++; } else { rowRepairBuffer.anyCellUnrepairable = true; @@ -428,15 +427,20 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta sourceIdx++; } while (targetIdx < targetCells.length) { - if (tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer, - sourceMaxTsByColumn)) { + if ( + tombstoneTargetCell(targetCells[targetIdx++], targetHTable, rowRepairBuffer, + sourceMaxTsByColumn) + ) { cellExtra++; } else { rowRepairBuffer.anyCellUnrepairable = true; } } - if (cellMissing == 0 && cellExtra == 0 && cellDifferent == 0 && !rowRepairBuffer.anyCellUnrepairable) { + if ( + cellMissing == 0 && cellExtra == 0 && cellDifferent == 0 + && !rowRepairBuffer.anyCellUnrepairable + ) { return RowDriftInfo.NONE; } rowRepairBuffer.flush(pendingPuts, pendingDeletes); @@ -446,8 +450,8 @@ private RowDriftInfo generateMutationForDiffCells(Result sourceResult, Result ta /** * Routes a source cell to the right mutation kind. Tombstone cells go through - * {@link Delete#add(Cell)} (preserves the exact tombstone subtype); under {@code --raw-scan} - * this matters because {@link Put#add(Cell)} rejects non-Put cells. + * {@link Delete#add(Cell)} (preserves the exact tombstone subtype); under {@code --raw-scan} this + * matters because {@link Put#add(Cell)} rejects non-Put cells. */ private void mirrorSourceCell(Cell cell, RowRepairBuffer rowRepairBuffer) throws IOException { if (CellUtil.isDelete(cell)) { @@ -458,9 +462,8 @@ private void mirrorSourceCell(Cell cell, RowRepairBuffer rowRepairBuffer) throws } /** - * Mirrors a source cell onto target unless an existing target tombstone would shadow it. - * Shadow detection runs only if source has Put cells; tombstoned source cells always mirror. - * + * Mirrors a source cell onto target unless an existing target tombstone would shadow it. Shadow + * detection runs only if source has Put cells; tombstoned source cells always mirror. * @return {@code true} if mirrored, {@code false} if suppressed (caller marks the row * unrepairable). */ @@ -471,7 +474,9 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable, // diverged. e.g. src Put(name, T=200) vs tgt DeleteColumn(name, T=300) covering // ts<=300. Skip the write and flag the row unrepairable; operator must major-compact // target to reap the shadow. Source tombstones can't be shadowed, hence skip the check. - if (!CellUtil.isDelete(cell) && rowRepairBuffer.targetRowRecord(targetHTable).wouldShadow(cell)) { + if ( + !CellUtil.isDelete(cell) && rowRepairBuffer.targetRowRecord(targetHTable).wouldShadow(cell) + ) { rowRepairBuffer.anyCellUnrepairable = true; return false; } @@ -480,17 +485,17 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable, } /** - * Tombstones a target-only cell to make target's read view at this column match source's. - * Skips cells that are themselves already tombstones. + * Tombstones a target-only cell to make target's read view at this column match source's. Skips + * cells that are themselves already tombstones. + *

    + * Called only when source has no cell at this target cell's exact {@code (cf, q, ts)}. If source + * does have a cell at the same {@code (cf, q, ts)}, the caller takes the mirroring path instead + *

    + * Tombstone subtype depends on what source has at this {@code (cf, q)}. Examples assume + * {@code MAX_VERSIONS=3} and show only the relevant column. + *

    + * Case 1 — Source has no cell at this column: * - *

    Called only when source has no cell at this target cell's exact - * {@code (cf, q, ts)}. If source does have a cell at the same {@code (cf, q, ts)}, - * the caller takes the mirroring path instead - * - *

    Tombstone subtype depends on what source has at this {@code (cf, q)}. Examples - * assume {@code MAX_VERSIONS=3} and show only the relevant column. - * - *

    Case 1 — Source has no cell at this column: *

        *   source row: (no NAME)
        *   target row: Put(NAME, "carol")@900 visible
    @@ -498,8 +503,9 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
        *   action    : DeleteColumn(NAME)@900   (covers ts <= 900, wipes "bob" too)
        *   result    : target reads no NAME — matches source.
        * 
    + *

    + * Case 2 — {@code sourceMaxTs >= targetTs}: * - *

    Case 2 — {@code sourceMaxTs >= targetTs}: *

        *   source row: Put(NAME, "alice")@500       (sourceMaxTs = 500)
        *   target row: Put(NAME, "old",  )@200      (input cell; source has nothing at @200)
    @@ -507,8 +513,9 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
        *   result    : "old"@200 is shadowed;
        *              Put(NAME, "alice")@500 would already have been mirrored
        * 
    + *

    + * Case 3 — {@code sourceMaxTs < targetTs}: * - *

    Case 3 — {@code sourceMaxTs < targetTs}: *

        *   source row: Put(NAME, "alice")@300       (sourceMaxTs = 300)
        *   target row: Put(NAME, "carol")@900 visible
    @@ -519,11 +526,11 @@ private boolean mirrorSourceCellUnlessShadowed(Cell cell, Table targetHTable,
        *   result    : target's "alice"@300 is the highest live version — matches source.
        * 
    * - * @return true if the cell was a live cell that contributed a tombstone marker, false if - * the cell was already a tombstone and was skipped. + * @return true if the cell was a live cell that contributed a tombstone marker, false if the cell + * was already a tombstone and was skipped. */ - private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairBuffer rowRepairBuffer, - Map sourceMaxTsByColumn) throws IOException { + private boolean tombstoneTargetCell(Cell cell, Table targetHTable, + RowRepairBuffer rowRepairBuffer, Map sourceMaxTsByColumn) throws IOException { if (CellUtil.isDelete(cell)) { return false; } @@ -548,9 +555,8 @@ private boolean tombstoneTargetCell(Cell cell, Table targetHTable, RowRepairBuff /** * Builds a row-level HBase scan for repair. Honors the user's {@code --raw-scan} and - * {@code --read-all-versions} flags; adds bulk caching plus Phoenix TTL / - * {@code IS_STRICT_TTL} attributes so the cells visited here are the same cells the - * verifier hashed. + * {@code --read-all-versions} flags; adds bulk caching plus Phoenix TTL / {@code IS_STRICT_TTL} + * attributes so the cells visited here are the same cells the verifier hashed. */ private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKeyInclusive, boolean isEndKeyInclusive, PhoenixConnection phoenixConn) throws IOException, SQLException { @@ -571,17 +577,16 @@ private Scan createRepairScan(byte[] startKey, byte[] endKey, boolean isStartKey /** * Flushes the accumulated Put and Delete batches to target as a single mixed RPC via - * {@link Table#batch}. The mixed batch (rather than separate {@code put()} + - * {@code delete()} calls) closes the inter-RPC window where a JVM/regionserver crash - * between the two could leave target with Puts applied but matching Deletes missing. - * - *

    {@link Table#batch} does NOT throw for partial failures — per-mutation failures - * (e.g. {@code NotServingRegionException} from a region split mid-batch, - * {@code WrongRegionException} from a merge) land in the {@code results} array as - * {@link Throwable} entries. We surface the first such failure as {@link IOException} - * so the caller treats this chunk as {@code REPAIR_FAILED} rather than silently - * marking it {@code REPAIRED}; on re-run the resume filter excludes - * {@code REPAIR_FAILED} and the chunk re-enters as an unprocessed gap. + * {@link Table#batch}. The mixed batch (rather than separate {@code put()} + {@code delete()} + * calls) closes the inter-RPC window where a JVM/regionserver crash between the two could leave + * target with Puts applied but matching Deletes missing. + *

    + * {@link Table#batch} does NOT throw for partial failures — per-mutation failures (e.g. + * {@code NotServingRegionException} from a region split mid-batch, {@code WrongRegionException} + * from a merge) land in the {@code results} array as {@link Throwable} entries. We surface the + * first such failure as {@link IOException} so the caller treats this chunk as + * {@code REPAIR_FAILED} rather than silently marking it {@code REPAIRED}; on re-run the resume + * filter excludes {@code REPAIR_FAILED} and the chunk re-enters as an unprocessed gap. */ private void flushRepairMutations(Table targetHTable, List puts, List deletes) throws IOException { @@ -611,26 +616,26 @@ private void flushRepairMutations(Table targetHTable, List puts, List 0) { Throwable firstFailure = (Throwable) results[firstFailureIdx]; Row failedRow = mutations.get(firstFailureIdx); - throw new IOException(String.format( - "Repair batch had %d/%d mutation failure(s); first failure on row %s: %s", - failureCount, results.length, Bytes.toStringBinary(failedRow.getRow()), - firstFailure.getMessage()), firstFailure); + throw new IOException( + String.format("Repair batch had %d/%d mutation failure(s); first failure on row %s: %s", + failureCount, results.length, Bytes.toStringBinary(failedRow.getRow()), + firstFailure.getMessage()), + firstFailure); } puts.clear(); deletes.clear(); } /** - * Inputs to a chunk repair attempt. Source range is the chunk boundary; target range may - * be wider so the repair scan sees the same cells (including extra-on-target rows between - * consecutive source chunks) that the verifier hashed. - * - *

    {@link #verifySourceRows} / {@link #verifyTargetRows} are the row counts the verifier - * recorded; threaded into the COUNTERS column on the resulting checkpoint row. - * {@link #verifyStartTime} is the timestamp captured when verification began for this - * chunk; reused as EXECUTION_START_TIME on the REPAIRED/UNREPAIRABLE/REPAIR_FAILED - * checkpoint row so the row spans the full verify+repair lifecycle that overwrites the - * MISMATCHED row. + * Inputs to a chunk repair attempt. Source range is the chunk boundary; target range may be wider + * so the repair scan sees the same cells (including extra-on-target rows between consecutive + * source chunks) that the verifier hashed. + *

    + * {@link #verifySourceRows} / {@link #verifyTargetRows} are the row counts the verifier recorded; + * threaded into the COUNTERS column on the resulting checkpoint row. {@link #verifyStartTime} is + * the timestamp captured when verification began for this chunk; reused as EXECUTION_START_TIME + * on the REPAIRED/UNREPAIRABLE/REPAIR_FAILED checkpoint row so the row spans the full + * verify+repair lifecycle that overwrites the MISMATCHED row. */ public static final class ChunkRepairRequest { public final byte[] sourceStart; @@ -661,8 +666,8 @@ public ChunkRepairRequest(byte[] sourceStart, byte[] sourceEnd, byte[] targetSta } /** - * Outcome of a chunk repair attempt. Carries the terminal status, accumulated drift - * counters, end-of-attempt timestamp, and the failure exception when status is + * Outcome of a chunk repair attempt. Carries the terminal status, accumulated drift counters, + * end-of-attempt timestamp, and the failure exception when status is * {@link Status#REPAIR_FAILED}. Status precedence (most-severe wins): * {@link Status#REPAIR_FAILED} > {@link Status#UNREPAIRABLE} > {@link Status#REPAIRED}. */ @@ -699,10 +704,10 @@ static ChunkRepairResult failed(DriftCounters drift, IOException failure) { } /** - * Per-chunk aggregate of six drift counters — three row-level - * ({@code rowsMissingOnTarget}, {@code rowsExtraOnTarget}, {@code rowsCannotRepair}) and - * three cell-level ({@code cellsMissing/Extra/DifferentOnTarget}). Pure accumulator; the - * caller maps fields onto MapReduce job counters and the checkpoint COUNTERS string. + * Per-chunk aggregate of six drift counters — three row-level ({@code rowsMissingOnTarget}, + * {@code rowsExtraOnTarget}, {@code rowsCannotRepair}) and three cell-level + * ({@code cellsMissing/Extra/DifferentOnTarget}). Pure accumulator; the caller maps fields onto + * MapReduce job counters and the checkpoint COUNTERS string. */ public static final class DriftCounters { public long rowsMissingOnTarget; @@ -731,14 +736,14 @@ public String toLogString() { } /** - * Per-row snapshot of target's tombstones and Puts. Two queries: - * {@link #wouldShadow} (shadow detection) and {@link #targetPutTimestampsBetween} - * (hidden-version discovery). For examples of how callers use these, see the - * doc on {@link RowRepairBuffer#targetRowRecord}; for scan shape and time-range - * rationale, see {@link #load}. + * Per-row snapshot of target's tombstones and Puts. Two queries: {@link #wouldShadow} (shadow + * detection) and {@link #targetPutTimestampsBetween} (hidden-version discovery). For examples of + * how callers use these, see the doc on {@link RowRepairBuffer#targetRowRecord}; for scan shape + * and time-range rationale, see {@link #load}. + *

    + * HBase has four tombstone subtypes; each is recorded into its own map because shadow scope + * differs: * - *

    HBase has four tombstone subtypes; each is recorded into its own map because - * shadow scope differs: *

        *   Delete               shadows Put at (cf, q, ts == T) exactly
        *   DeleteColumn         shadows Puts at (cf, q, ts <= T)
    @@ -756,20 +761,18 @@ static final class TargetRowRecord {
     
         /**
          * Builds a {@link TargetRowRecord} from a single-row HBase scan.
    -     *
    -     * 

    raw=true + all-versions are forced regardless of user flags so tombstones - * and max-versions-filtered older Puts (the two things this record exists to capture) - * are surfaced. - * - *

    Time range {@code [fromTime, MAX_VALUE]}: + *

    + * raw=true + all-versions are forced regardless of user flags so tombstones and + * max-versions-filtered older Puts (the two things this record exists to capture) are surfaced. + *

    + * Time range {@code [fromTime, MAX_VALUE]}: *

      - *
    • Lower bound = {@code fromTime}: cells below the verify window can't affect - * repair inside the window.
    • - *
    • Upper bound = {@code MAX_VALUE} (NOT {@code toTime}): a tombstone at - * {@code ts >= toTime} can still shadow a Put we mirror at {@code ts} in window - * during application reads, so we must see it. e.g. window - * {@code [0, 600)}, tgt has DeleteColumn@900, src wants Put@500 — without the - * wide upper bound we'd miss the 900 tombstone and write a doomed mirror.
    • + *
    • Lower bound = {@code fromTime}: cells below the verify window can't affect repair inside + * the window.
    • + *
    • Upper bound = {@code MAX_VALUE} (NOT {@code toTime}): a tombstone at {@code ts >= toTime} + * can still shadow a Put we mirror at {@code ts} in window during application reads, so we must + * see it. e.g. window {@code [0, 600)}, tgt has DeleteColumn@900, src wants Put@500 — without + * the wide upper bound we'd miss the 900 tombstone and write a doomed mirror.
    • *
    */ static TargetRowRecord load(byte[] rowKey, Table targetHTable, long fromTime) @@ -806,9 +809,8 @@ void record(Cell cell) { /** * Records one tombstone into its per-subtype map for {@link #wouldShadow} to query. - * {@code <=ts} delete subtypes ({@code DeleteColumn}, {@code DeleteFamily}) collapse to - * the max ts; exact-ts subtypes ({@code Delete}, {@code DeleteFamilyVersion}) - * accumulate into a set. + * {@code <=ts} delete subtypes ({@code DeleteColumn}, {@code DeleteFamily}) collapse to the max + * ts; exact-ts subtypes ({@code Delete}, {@code DeleteFamilyVersion}) accumulate into a set. */ private void recordTombstone(Cell tombstone) { long ts = tombstone.getTimestamp(); @@ -859,9 +861,9 @@ boolean wouldShadow(Cell sourcePut) { /** * Returns target's Put timestamps at {@code (cf, q)} that are strictly greater than - * {@code lowerExclusive} and strictly less than {@code upperExclusive}. Used to find - * hidden (max-versions-filtered) target versions sitting between source's max ts and - * target's visible ts so they can be point-Deleted. + * {@code lowerExclusive} and strictly less than {@code upperExclusive}. Used to find hidden + * (max-versions-filtered) target versions sitting between source's max ts and target's visible + * ts so they can be point-Deleted. */ Set targetPutTimestampsBetween(byte[] family, byte[] qualifier, long lowerExclusive, long upperExclusive) { @@ -904,8 +906,7 @@ public int hashCode() { /** * Per-row scratch buffer: lazily-built {@link Put}/{@link Delete} mutations, lazily-loaded - * {@link TargetRowRecord}, and an unrepairable-drift flag the caller reads after the - * merge. + * {@link TargetRowRecord}, and an unrepairable-drift flag the caller reads after the merge. */ final class RowRepairBuffer { private final byte[] rowKey; @@ -935,25 +936,27 @@ Delete delete() { /** * Returns the cached {@link TargetRowRecord} for this row, loading on first call via * {@link TargetRowRecord#load} (one raw all-versions scan, time range - * {@code [fromTime, MAX_VALUE]}). Cache scope is the buffer's lifetime — i.e. the - * current row — so repeated cell-level lookups within the row pay one round-trip total. - * - *

    Two consumers: + * {@code [fromTime, MAX_VALUE]}). Cache scope is the buffer's lifetime — i.e. the current row — + * so repeated cell-level lookups within the row pay one round-trip total. + *

    + * Two consumers: + *

    + * Shadow detection — {@link #mirrorSourceCellUnlessShadowed} asks + * {@link TargetRowRecord#wouldShadow} before mirroring a source Put, to skip writes that + * target's existing tombstones would render invisible. * - *

    Shadow detection — {@link #mirrorSourceCellUnlessShadowed} asks - * {@link TargetRowRecord#wouldShadow} before mirroring a source Put, to skip writes - * that target's existing tombstones would render invisible. *

          *   target row state: DeleteColumn(NAME)@T=900   (covers ts <= 900)
          *   source row state: Put(NAME, "alice")@T=500
          *   wouldShadow(srcPut@500) → true
          *   ⇒ skip mirror, mark row unrepairable; operator must major-compact target
          * 
    + *

    + * Hidden-version discovery — {@link #tombstoneTargetCell} asks + * {@link TargetRowRecord#targetPutTimestampsBetween} for max-versions-filtered Puts sitting + * between source's max ts and target's visible ts, so each can be point-Deleted before they + * surface above source's mirror. * - *

    Hidden-version discovery — {@link #tombstoneTargetCell} asks - * {@link TargetRowRecord#targetPutTimestampsBetween} for max-versions-filtered Puts - * sitting between source's max ts and target's visible ts, so each can be point-Deleted - * before they surface above source's mirror. *

          *   target row state (MAX_VERSIONS=3):
          *     Put(NAME, "carol")@T=900   visible
    @@ -983,8 +986,8 @@ void flush(List pendingPuts, List pendingDeletes) {
       }
     
       /**
    -   * Cell-level drift counts produced by per-row diff. Three counters partition the cell
    -   * differences into disjoint buckets — source-only, target-only-live, same-coord-diff-value.
    +   * Cell-level drift counts produced by per-row diff. Three counters partition the cell differences
    +   * into disjoint buckets — source-only, target-only-live, same-coord-diff-value.
        */
       static final class CellDriftCounts {
         static final CellDriftCounts NONE = new CellDriftCounts(0, 0, 0);
    diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
    index fcd61f07582..b1bce9f8023 100644
    --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
    +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java
    @@ -138,11 +138,10 @@ public List getSplits(JobContext context) throws IOException, Interr
     
       /**
        * Queries Sync checkpoint table for completed mapper regions.
    -   *
    -   * @param isDryRun When false (repair mode), only VERIFIED and REPAIRED regions are filtered
    -   *                 out as completed; MISMATCHED regions are re-entered as splits so their
    -   *                 chunks can be repaired. When true (dry-run), all REGION rows regardless
    -   *                 of status are treated as completed.
    +   * @param isDryRun When false (repair mode), only VERIFIED and REPAIRED regions are filtered out
    +   *                 as completed; MISMATCHED regions are re-entered as splits so their chunks can
    +   *                 be repaired. When true (dry-run), all REGION rows regardless of status are
    +   *                 treated as completed.
        */
       private List queryCompletedMapperRegions(Configuration conf, String tableName,
         String targetZkQuorum, Long fromTime, Long toTime, boolean isDryRun) throws SQLException {
    @@ -150,8 +149,8 @@ private List queryCompletedMapperRegions(Configuration conf, String ta
         List completedRegions = new ArrayList<>();
         try (Connection conn = ConnectionUtil.getInputConnection(conf)) {
           PhoenixSyncTableOutputRepository repository = new PhoenixSyncTableOutputRepository(conn);
    -      List completedRows = repository.getProcessedMapperRegions(
    -        tableName, targetZkQuorum, fromTime, toTime, tenantId, isDryRun);
    +      List completedRows = repository
    +        .getProcessedMapperRegions(tableName, targetZkQuorum, fromTime, toTime, tenantId, isDryRun);
           for (PhoenixSyncTableCheckpointOutputRow row : completedRows) {
             KeyRange keyRange = KeyRange.getKeyRange(row.getStartRowKey(), row.getEndRowKey());
             completedRegions.add(keyRange);
    diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
    index aa19bcf1a89..7158d847778 100644
    --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
    +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java
    @@ -231,8 +231,7 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
         long targetRowsBefore = context.getCounter(SyncCounters.TARGET_ROWS_PROCESSED).getValue();
         long rowsMissingBefore = context.getCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue();
         long rowsExtraBefore = context.getCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue();
    -    long rowsDifferentBefore =
    -      context.getCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue();
    +    long rowsDifferentBefore = context.getCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue();
         long rowsCannotRepairBefore = context.getCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue();
         long cellsMissingBefore = context.getCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue();
         long cellsExtraBefore = context.getCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue();
    @@ -299,11 +298,11 @@ private void processRegion(byte[] regionStart, byte[] regionEnd, Context context
        * @param regionEndTime      Region processing end time
        * @param verifiedChunks     Number of verified chunks
        * @param mismatchedChunks   Number of mismatched chunks
    -   * @param unrepairableChunks Number of chunks where any row landed in ROWS_CANNOT_REPAIR;
    -   *                           if > 0 (and no repair-failed chunks) the region rolls up to
    -   *                           UNREPAIRABLE, signalling operator intervention is needed
    -   * @param repairFailedChunks Number of chunks whose repair threw an IOException; if > 0 the
    -   *                           region rolls up to REPAIR_FAILED (highest precedence)
    +   * @param unrepairableChunks Number of chunks where any row landed in ROWS_CANNOT_REPAIR; if > 0
    +   *                           (and no repair-failed chunks) the region rolls up to UNREPAIRABLE,
    +   *                           signalling operator intervention is needed
    +   * @param repairFailedChunks Number of chunks whose repair threw an IOException; if > 0 the region
    +   *                           rolls up to REPAIR_FAILED (highest precedence)
        * @param counters           Formatted counter string
        * @param context            Mapper context
        */
    @@ -314,14 +313,14 @@ private void recordRegionCompletion(byte[] regionStart, byte[] regionEnd,
     
         // Region rolls up its child chunks' outcomes into one of five statuses, in precedence
         // order (most-severe wins):
    -    //   REPAIR_FAILED — at least one chunk threw during merge-scan or flush.
    -    //   UNREPAIRABLE  — repair completed but at least one chunk has rows that cannot be
    -    //                   repaired (target tombstones shadow source Puts, or target row is
    -    //                   entirely tombstones). Operator action (typically major compaction
    -    //                   on target) needed before a re-run can converge.
    -    //   MISMATCHED    — drift was detected but repair was not attempted (dry-run mode).
    -    //   REPAIRED      — drift was detected and every chunk's repair fully succeeded.
    -    //   VERIFIED      — every chunk matched; no drift in this region.
    +    // REPAIR_FAILED — at least one chunk threw during merge-scan or flush.
    +    // UNREPAIRABLE — repair completed but at least one chunk has rows that cannot be
    +    // repaired (target tombstones shadow source Puts, or target row is
    +    // entirely tombstones). Operator action (typically major compaction
    +    // on target) needed before a re-run can converge.
    +    // MISMATCHED — drift was detected but repair was not attempted (dry-run mode).
    +    // REPAIRED — drift was detected and every chunk's repair fully succeeded.
    +    // VERIFIED — every chunk matched; no drift in this region.
         // The resume filter on re-invocation skips VERIFIED and REPAIRED — UNREPAIRABLE,
         // MISMATCHED, and REPAIR_FAILED chunks are re-entered as gaps and re-attempted.
         PhoenixSyncTableCheckpointOutputRow.Status status;
    @@ -445,8 +444,7 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd,
             // Target scan boundary: covers extra-on-target rows that fall before the first
             // source chunk, between consecutive source chunks, or after the last. Both verify
             // and repair use the same range so repair sees the same cells the verifier hashed.
    -        byte[] targetStart =
    -          previousSourceChunk == null ? rangeStart : previousSourceChunk.endKey;
    +        byte[] targetStart = previousSourceChunk == null ? rangeStart : previousSourceChunk.endKey;
             byte[] targetEnd = isLastChunkOfRegion ? rangeEnd : sourceChunk.endKey;
             boolean targetEndInclusive = !isLastChunkOfRegion;
             ChunkInfo targetChunk = getTargetChunkWithSourceBoundary(targetConnection, targetStart,
    @@ -473,10 +471,10 @@ private void processMapperRanges(byte[] rangeStart, byte[] rangeEnd,
                 .formatChunk(sourceChunk.rowCount, targetChunk.rowCount, 0L, 0L, 0L, 0L, 0L, 0L, 0L);
               handleVerifiedChunk(sourceChunk, context, counters);
             } else {
    -          ChunkRepairRequest request = new ChunkRepairRequest(sourceChunk.startKey,
    -            sourceChunk.endKey, targetStart, targetEnd, isTargetStartKeyInclusive,
    -            targetEndInclusive, sourceChunk.rowCount, targetChunk.rowCount,
    -            sourceChunk.executionStartTime, isDryRun);
    +          ChunkRepairRequest request =
    +            new ChunkRepairRequest(sourceChunk.startKey, sourceChunk.endKey, targetStart, targetEnd,
    +              isTargetStartKeyInclusive, targetEndInclusive, sourceChunk.rowCount,
    +              targetChunk.rowCount, sourceChunk.executionStartTime, isDryRun);
               ChunkRepairResult result = chunkRepairer.repair(request, context::progress);
               if (isDryRun) {
                 // Dry-run: write CHUNK/MISMATCHED with real row-level drift in COUNTERS so the
    @@ -805,13 +803,13 @@ boolean shouldStartKeyBeInclusive(byte[] mapperRegionStart,
     
       /**
        * Translates a {@link ChunkRepairResult} into MapReduce side effects: bumps the cell/row drift
    -   * counters, builds the chunk-level checkpoint row (REPAIRED / UNREPAIRABLE / REPAIR_FAILED),
    -   * and writes it via {@link #writeChunkCheckpoint} so the outcome counter is bumped only on a
    +   * counters, builds the chunk-level checkpoint row (REPAIRED / UNREPAIRABLE / REPAIR_FAILED), and
    +   * writes it via {@link #writeChunkCheckpoint} so the outcome counter is bumped only on a
        * successful checkpoint write (audit row and counter stay consistent).
    -   *
    -   * 

    {@code CHUNKS_MISMATCHED} is bumped here too: it tracks every chunk where source and - * target hashes differed — the drift-detected signal — regardless of whether repair ran. - * Without this, repair-mode {@link #recordRegionCompletion} would see {@code mismatchedChunks + *

    + * {@code CHUNKS_MISMATCHED} is bumped here too: it tracks every chunk where source and target + * hashes differed — the drift-detected signal — regardless of whether repair ran. Without this, + * repair-mode {@link #recordRegionCompletion} would see {@code mismatchedChunks * == 0} for fully-repaired regions and roll them up as VERIFIED instead of REPAIRED. */ private void recordRepairOutcome(ChunkInfo sourceChunk, ChunkRepairRequest request, @@ -859,20 +857,20 @@ private void recordRepairOutcome(ChunkInfo sourceChunk, ChunkRepairRequest reque } /** - * Writes a chunk-level checkpoint row and bumps the matching outcome counter. The outcome - * counter is bumped only after a successful checkpoint write, so on-disk audit and in-memory - * counters stay in sync. - * - *

    If the checkpoint write throws {@link SQLException}, the failure is logged and the - * {@link SyncCounters#CHECKPOINT_WRITE_FAILED} counter is bumped, but the exception is - * NOT propagated. Reasons: + * Writes a chunk-level checkpoint row and bumps the matching outcome counter. The outcome counter + * is bumped only after a successful checkpoint write, so on-disk audit and in-memory counters + * stay in sync. + *

    + * If the checkpoint write throws {@link SQLException}, the failure is logged and the + * {@link SyncCounters#CHECKPOINT_WRITE_FAILED} counter is bumped, but the exception is NOT + * propagated. Reasons: *

      - *
    • Target's data was already mutated during the merge — failing the mapper task - * wouldn't roll that back, and would trigger a MapReduce retry that re-verifies - * against already-mutated target state (audit trail loss).
    • - *
    • Other chunks in this mapper still deserve a chance to be processed.
    • - *
    • The {@code CHECKPOINT_WRITE_FAILED} counter surfaces the audit-row gap to - * operators and drives a non-zero exit at job end.
    • + *
    • Target's data was already mutated during the merge — failing the mapper task wouldn't roll + * that back, and would trigger a MapReduce retry that re-verifies against already-mutated target + * state (audit trail loss).
    • + *
    • Other chunks in this mapper still deserve a chance to be processed.
    • + *
    • The {@code CHECKPOINT_WRITE_FAILED} counter surfaces the audit-row gap to operators and + * drives a non-zero exit at job end.
    • *
    */ private void writeChunkCheckpoint(PhoenixSyncTableCheckpointOutputRow row, diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java index b939a064472..f46c1b34e36 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableTool.java @@ -415,34 +415,34 @@ private boolean submitPhoenixSyncTableJob() throws Exception { counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_MISMATCHED).getValue(); long repairedMappers = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIRED).getValue(); - long unrepairableMappers = counters - .findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_UNREPAIRABLE).getValue(); - long repairFailedMappers = counters - .findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIR_FAILED).getValue(); + long unrepairableMappers = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_UNREPAIRABLE).getValue(); + long repairFailedMappers = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.MAPPERS_REPAIR_FAILED).getValue(); long chunksVerified = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_VERIFIED).getValue(); long chunksMismatched = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_MISMATCHED).getValue(); long chunksRepaired = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIRED).getValue(); - long chunksUnrepairable = counters - .findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_UNREPAIRABLE).getValue(); - long chunksRepairFailed = counters - .findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); + long chunksUnrepairable = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_UNREPAIRABLE).getValue(); + long chunksRepairFailed = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); long sourceRowsProcessed = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.SOURCE_ROWS_PROCESSED).getValue(); long targetRowsProcessed = counters.findCounter(PhoenixSyncTableMapper.SyncCounters.TARGET_ROWS_PROCESSED).getValue(); - long rowsMissingOnTarget = counters - .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET).getValue(); - long rowsExtraOnTarget = counters - .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET).getValue(); - long rowsCannotRepair = counters - .findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR).getValue(); + long rowsMissingOnTarget = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_MISSING_ON_TARGET).getValue(); + long rowsExtraOnTarget = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_EXTRA_ON_TARGET).getValue(); + long rowsCannotRepair = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.ROWS_CANNOT_REPAIR).getValue(); long cellsMissingOnTarget = counters .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_MISSING_ON_TARGET).getValue(); - long cellsExtraOnTarget = counters - .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET).getValue(); + long cellsExtraOnTarget = + counters.findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_EXTRA_ON_TARGET).getValue(); long cellsDifferentOnTarget = counters .findCounter(PhoenixSyncTableMapper.SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue(); long checkpointWriteFailed = counters @@ -457,21 +457,19 @@ private boolean submitPhoenixSyncTableJob() throws Exception { + "Rows Missing On Target: {}, \n Rows Extra On Target: {}, \n" + "Rows Cannot Repair: {}, \n" + "Cells Missing On Target: {}, \n Cells Extra On Target: {}, \n" - + "Cells Different On Target: {}, \n" - + "Checkpoint Write Failed: {}", + + "Cells Different On Target: {}, \n" + "Checkpoint Write Failed: {}", taskCreated, verifiedMappers, mismatchedMappers, repairedMappers, unrepairableMappers, repairFailedMappers, chunksVerified, chunksMismatched, chunksRepaired, chunksUnrepairable, chunksRepairFailed, sourceRowsProcessed, targetRowsProcessed, rowsMissingOnTarget, rowsExtraOnTarget, rowsCannotRepair, cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget, checkpointWriteFailed); if (checkpointWriteFailed > 0) { - LOGGER.error( - "{} chunk(s) had a successful repair attempt but FAILED to write a checkpoint row " + LOGGER + .error("{} chunk(s) had a successful repair attempt but FAILED to write a checkpoint row " + "for table {}. Target data was mutated but the audit trail is incomplete. " + "Investigate the checkpoint table state before relying on it; affected chunks " + "will be re-attempted on the next invocation since they have no terminal " - + "checkpoint status.", - checkpointWriteFailed, qTable); + + "checkpoint status.", checkpointWriteFailed, qTable); return false; } } else { diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java index ce2adc02505..19863d20820 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java @@ -91,7 +91,7 @@ import org.slf4j.LoggerFactory; @Category(NeedsOwnMiniClusterTest.class) -@SuppressWarnings({ "SqlNoDataSourceInspection", "SqlResolve"}) +@SuppressWarnings({ "SqlNoDataSourceInspection", "SqlResolve" }) public class PhoenixSyncTableToolIT { private static final Logger LOGGER = LoggerFactory.getLogger(PhoenixSyncTableToolIT.class); @@ -522,10 +522,8 @@ public void testSyncMultiTenantSaltedTableDifferences() throws Exception { assertRepairChunkAndMapperCounters(repairCounters2, 3, 0, 0, 2, 0, 0); // Pin checkpoint state per tenant. - validateCheckpointEntries(uniqueTableName, "TENANT_001", counters1, - repairCounters1); - validateCheckpointEntries(uniqueTableName, "TENANT_002", counters2, - repairCounters2); + validateCheckpointEntries(uniqueTableName, "TENANT_001", counters1, repairCounters1); + validateCheckpointEntries(uniqueTableName, "TENANT_002", counters2, repairCounters2); // After repair, TENANT_002's data should be identical between source and target. withTenantConnections(tenantIds[1], @@ -615,8 +613,10 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionSplits() throws Exce counters2.chunksMismatched); // (Remaining chunks from checkpoint) + (Second run) should equal (First run) - long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; - long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; + long totalSourceRows = + setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; + long totalTargetRows = + setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; long totalVerifiedChunks = setup.remainingCounters.chunksVerified + counters2.chunksVerified; assertEquals( @@ -636,10 +636,11 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionSplits() throws Exce // Splits introduced between runs widen the second-run chunk count beyond the deleted // 75% of the first run's chunks (extra region boundaries → extra chunks). So we relax // the strict equality to >=. The row-count invariant above is unaffected by splits. - assertTrue("Remaining + Second run verified chunks should be >= first run verified chunks. " - + "Remaining: " + setup.remainingCounters.chunksVerified + ", Second run: " - + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected (>=): " - + setup.firstRunCounters.chunksVerified, + assertTrue( + "Remaining + Second run verified chunks should be >= first run verified chunks. " + + "Remaining: " + setup.remainingCounters.chunksVerified + ", Second run: " + + counters2.chunksVerified + ", Total: " + totalVerifiedChunks + ", Expected (>=): " + + setup.firstRunCounters.chunksVerified, totalVerifiedChunks >= setup.firstRunCounters.chunksVerified); // Verify checkpoint table has entries for the reprocessed regions @@ -686,8 +687,10 @@ public void testSyncTableCheckpointWithChunkSizeChangeOnReRun() throws Exception SyncCountersResult counters2 = getSyncCounters(job2); // (Remaining chunks) + (Second run) should equal (First run) for row counts - long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; - long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; + long totalSourceRows = + setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; + long totalTargetRows = + setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; assertEquals("Remaining + rerun source rows should equal first run", counters1.sourceRowsProcessed, totalSourceRows); @@ -747,23 +750,21 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionMerges() throws Exce String.valueOf(toTime)); SyncCountersResult counters2 = getSyncCounters(job2); - long totalSourceRows = setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; - long totalTargetRows = setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; + long totalSourceRows = + setup.remainingCounters.sourceRowsProcessed + counters2.sourceRowsProcessed; + long totalTargetRows = + setup.remainingCounters.targetRowsProcessed + counters2.targetRowsProcessed; long totalVerifiedChunks = setup.remainingCounters.chunksVerified + counters2.chunksVerified; - assertEquals( - "Remaining + Second run source rows should equal first run source rows. " + "Remaining: " - + setup.remainingCounters.sourceRowsProcessed + ", Second run: " - + counters2.sourceRowsProcessed + ", Total: " + totalSourceRows + ", Expected: " - + counters1.sourceRowsProcessed, - counters1.sourceRowsProcessed, totalSourceRows); + assertEquals("Remaining + Second run source rows should equal first run source rows. " + + "Remaining: " + setup.remainingCounters.sourceRowsProcessed + ", Second run: " + + counters2.sourceRowsProcessed + ", Total: " + totalSourceRows + ", Expected: " + + counters1.sourceRowsProcessed, counters1.sourceRowsProcessed, totalSourceRows); - assertEquals( - "Remaining + Second run target rows should equal first run target rows. " + "Remaining: " - + setup.remainingCounters.targetRowsProcessed + ", Second run: " - + counters2.targetRowsProcessed + ", Total: " + totalTargetRows + ", Expected: " - + counters1.targetRowsProcessed, - counters1.targetRowsProcessed, totalTargetRows); + assertEquals("Remaining + Second run target rows should equal first run target rows. " + + "Remaining: " + setup.remainingCounters.targetRowsProcessed + ", Second run: " + + counters2.targetRowsProcessed + ", Total: " + totalTargetRows + ", Expected: " + + counters1.targetRowsProcessed, counters1.targetRowsProcessed, totalTargetRows); // Region merges between the two runs change mapper region boundaries, so the resume // filter sees stale chunks that don't align to the new mapper's range and reprocesses @@ -1103,19 +1104,19 @@ public void testSyncTableWithConcurrentRegionSplits() throws Exception { } /** - * P3 (concurrent splits during repair pass): Today's concurrent-split tests run splits during - * the dry-run pass; this exercises the repair pass against splitting target regions, which is - * the production reality that exercises {@code flushRepairMutations}'s + * P3 (concurrent splits during repair pass): Today's concurrent-split tests run splits during the + * dry-run pass; this exercises the repair pass against splitting target regions, which is the + * production reality that exercises {@code flushRepairMutations}'s * {@code NotServingRegionException} path → {@code firstFailureIdx} → {@code REPAIR_FAILED}. - * - *

    Convergence strategy: + *

    + * Convergence strategy: *

      - *
    1. Dry-run first on a stable layout to populate MISMATCHED checkpoint rows.
    2. - *
    3. Start concurrent splits on the target cluster, then run the repair pass. Some chunks - * may land in {@code REPAIR_FAILED} if a flush hits a region in transition — the - * resume filter re-enters those chunks on the next pass.
    4. - *
    5. Run a final dry-run + repair pass after splits have settled; expect zero MISMATCHED. - * {@code verifyDataIdentical} must succeed.
    6. + *
    7. Dry-run first on a stable layout to populate MISMATCHED checkpoint rows.
    8. + *
    9. Start concurrent splits on the target cluster, then run the repair pass. Some chunks may + * land in {@code REPAIR_FAILED} if a flush hits a region in transition — the resume filter + * re-enters those chunks on the next pass.
    10. + *
    11. Run a final dry-run + repair pass after splits have settled; expect zero MISMATCHED. + * {@code verifyDataIdentical} must succeed.
    12. *
    */ @Test @@ -1151,9 +1152,9 @@ public void testRepairWithConcurrentTargetSplits() throws Exception { } /** - * Guards against a regression where repair claims REPAIRED but doesn't actually converge. - * Repair the divergent table, clean the checkpoint, then re-run dry-run + repair on the now - * converged tables — both passes must be no-ops. + * Guards against a regression where repair claims REPAIRED but doesn't actually converge. Repair + * the divergent table, clean the checkpoint, then re-run dry-run + repair on the now converged + * tables — both passes must be no-ops. */ @Test public void testRepairIsIdempotent() throws Exception { @@ -1173,12 +1174,10 @@ public void testRepairIsIdempotent() throws Exception { assertTrue("First dry-run should detect mismatched chunks", firstDryRunCounters.chunksMismatched >= 1); SyncCountersResult firstRepairCounters = getSyncCounters(firstRun.repairJob); - assertTrue("First repair should mark chunks REPAIRED", - firstRepairCounters.chunksRepaired >= 1); + assertTrue("First repair should mark chunks REPAIRED", firstRepairCounters.chunksRepaired >= 1); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - validateCheckpointEntries(uniqueTableName, null, firstDryRunCounters, - firstRepairCounters); + validateCheckpointEntries(uniqueTableName, null, firstDryRunCounters, firstRepairCounters); // Clean the checkpoint so the second dry-run scans the full layout instead of resuming // from VERIFIED chunks. @@ -1197,8 +1196,7 @@ public void testRepairIsIdempotent() throws Exception { assertRowDriftCounters(secondRepairCounters, 0, 0, 0, 0); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - validateCheckpointEntries(uniqueTableName, null, secondDryRunCounters, - secondRepairCounters); + validateCheckpointEntries(uniqueTableName, null, secondDryRunCounters, secondRepairCounters); } /** @@ -1223,13 +1221,12 @@ public void testRepairAllTombstonedTargetRowExtra() throws Exception { // Tombstones with no underlying Puts — row surfaces under raw scan but every cell is a Delete. byte[] rowKey = integerRowKey(rowId); writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME", tombstoneTs); - writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME_VALUE", - tombstoneTs); + writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "NAME_VALUE", tombstoneTs); writeRawDeleteColumn(targetConnection, uniqueTableName, rowKey, "0", "_0", tombstoneTs); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", - String.valueOf(fromTime), "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), - "--raw-scan"); + RepairRunResult result = + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -1306,10 +1303,9 @@ public void testSyncTableWithConcurrentRegionMerges() throws Exception { long toTime = System.currentTimeMillis(); // Run merges on source/target concurrently with the sync. - Runnable mergeJoiner = startConcurrentRegionWork( - () -> mergeAdjacentRegions(sourceConnection, uniqueTableName, 6), - () -> mergeAdjacentRegions(targetConnection, uniqueTableName, 6), - "merges"); + Runnable mergeJoiner = + startConcurrentRegionWork(() -> mergeAdjacentRegions(sourceConnection, uniqueTableName, 6), + () -> mergeAdjacentRegions(targetConnection, uniqueTableName, 6), "merges"); // Run sync tool while merges are happening Job job = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", @@ -1511,16 +1507,17 @@ public void testSyncTableMapperFailsWithMissingTargetTable() throws Exception { String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster", targetZkQuorum, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) }; - assertSyncToolFails(args, String.format( - "Table %s does not exist on target cluster, mapper map() should fail during target scan", - uniqueTableName)); + assertSyncToolFails(args, + String.format( + "Table %s does not exist on target cluster, mapper map() should fail during target scan", + uniqueTableName)); } /** - * When the mapper successfully mutates target but the audit checkpoint UPSERT fails, the - * tool must increment {@code CHECKPOINT_WRITE_FAILED} and exit non-zero so the operator - * can investigate the audit gap. Failure is injected via a RegionObserver on the - * checkpoint table that throws {@link DoNotRetryIOException} on every {@code preBatchMutate}. + * When the mapper successfully mutates target but the audit checkpoint UPSERT fails, the tool + * must increment {@code CHECKPOINT_WRITE_FAILED} and exit non-zero so the operator can + * investigate the audit gap. Failure is injected via a RegionObserver on the checkpoint table + * that throws {@link DoNotRetryIOException} on every {@code preBatchMutate}. */ @Test public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception { @@ -1537,8 +1534,8 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception { // Run once first so the checkpoint table exists; we can only attach a coprocessor to a // table that's already been created. - Job initial = runSyncTool(uniqueTableName, "--dry-run", "--from-time", - String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); + Job initial = runSyncTool(uniqueTableName, "--dry-run", "--from-time", String.valueOf(fromTime), + "--to-time", String.valueOf(toTime)); assertTrue(initial.isSuccessful()); String ckpt = PhoenixSyncTableOutputRepository.SYNC_TABLE_CHECKPOINT_TABLE_NAME; @@ -1547,10 +1544,9 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception { // Inline tool invocation — we need exitCode != 0, which runSyncTool would assertion-fail. PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); tool.setConf(sourceClusterConf()); - int exitCode = tool.run( - new String[] { "--table-name", uniqueTableName, "--target-cluster", targetZkQuorum, - "--run-foreground", "--chunk-size", "1", "--from-time", String.valueOf(fromTime), - "--to-time", String.valueOf(toTime) }); + int exitCode = tool.run(new String[] { "--table-name", uniqueTableName, "--target-cluster", + targetZkQuorum, "--run-foreground", "--chunk-size", "1", "--from-time", + String.valueOf(fromTime), "--to-time", String.valueOf(toTime) }); assertNotEquals("Tool must surface non-zero exit when checkpoint writes fail", 0, exitCode); // Note: CHECKPOINT_WRITE_FAILED only increments on the chunk-outcome write path @@ -1576,8 +1572,8 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception { /** * Repair-batch flush failure: a RegionObserver on the target data table fails every * {@code preBatchMutate}, so {@code flushRepairMutations} throws and the chunk rolls up - * {@code REPAIR_FAILED}. After the observer is removed and the checkpoint cleaned up, a - * fresh run converges. + * {@code REPAIR_FAILED}. After the observer is removed and the checkpoint cleaned up, a fresh run + * converges. */ @Test public void testRepairFailedSurfacesCountersAndCheckpoint() throws Exception { @@ -1603,8 +1599,8 @@ public void testRepairFailedSurfacesCountersAndCheckpoint() throws Exception { List entries = queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null); assertTrue("At least one REPAIR_FAILED checkpoint row must persist", - countCheckpointsByStatus(entries, - PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED) >= 1); + countCheckpointsByStatus(entries, PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED) + >= 1); } finally { TestUtil.removeCoprocessor(targetConnection, uniqueTableName, RepairBatchFailingObserver.class); @@ -1690,14 +1686,15 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception { } /** - *

    Scenario: source row has {@code Put(NAME, "alice", T0)}; target row has {@code + *

    + * Scenario: source row has {@code Put(NAME, "alice", T0)}; target row has {@code * Put(NAME, "bob", T1)} and {@code Put(NAME, "carol", T2)} where {@code T0 < T1 < T2} and * {@code MAX_VERSIONS=2}. Visible cell on target is "carol" (T2); "bob" (T1) is - * MAX_VERSIONS-hidden. Naive repair would point-delete only T2, exposing "bob" above - * source's mirror at T0 — divergent. Correct behavior: point-delete BOTH T2 and T1. - * - *

    Without this test, a regression that "fixes" only the visible cell (case 2) would leave - * target reading "bob" after a successful-looking repair pass. + * MAX_VERSIONS-hidden. Naive repair would point-delete only T2, exposing "bob" above source's + * mirror at T0 — divergent. Correct behavior: point-delete BOTH T2 and T1. + *

    + * Without this test, a regression that "fixes" only the visible cell (case 2) would leave target + * reading "bob" after a successful-looking repair pass. */ @Test public void testRepairUnwindsHiddenTargetVersions() throws Exception { @@ -1723,9 +1720,9 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception { assertTargetName(uniqueTableName, rowId, "carol"); // --read-all-versions so verifier and repairer both see the hidden version. - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", - String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), - "--read-all-versions"); + RepairRunResult result = + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(System.currentTimeMillis()), "--read-all-versions"); assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); @@ -1779,9 +1776,9 @@ public void testRepairPartialShadowWithinRow() throws Exception { writeRawDeleteColumn(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", shadowTombstoneTs); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", - String.valueOf(fromTime), "--to-time", - String.valueOf(waitUntilWallClockPasses(shadowTombstoneTs)), "--raw-scan"); + RepairRunResult result = + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(waitUntilWallClockPasses(shadowTombstoneTs)), "--raw-scan"); assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); @@ -1800,11 +1797,10 @@ public void testRepairPartialShadowWithinRow() throws Exception { } /** - * A chunk that lands in UNREPAIRABLE due to a shadowing target - * tombstone must recover after the operator runs a major compaction on target and re-runs - * sync. Pass 1 reproduces the partial-shadow setup and asserts UNREPAIRABLE; major compact - * on target reaps the standalone DeleteColumn; pass 2 mirrors the source Put cleanly and - * converges. + * A chunk that lands in UNREPAIRABLE due to a shadowing target tombstone must recover after the + * operator runs a major compaction on target and re-runs sync. Pass 1 reproduces the + * partial-shadow setup and asserts UNREPAIRABLE; major compact on target reaps the standalone + * DeleteColumn; pass 2 mirrors the source Put cleanly and converges. */ @Test public void testUnrepairableRecoversAfterMajorCompactionOnTarget() throws Exception { @@ -1866,8 +1862,8 @@ public void testUnrepairableRecoversAfterMajorCompactionOnTarget() throws Except queryCheckpointTable(sourceConnection, uniqueTableName, targetZkQuorum, null), PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED) >= 1); - try (PreparedStatement ps = targetConnection.prepareStatement( - "SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) { + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) { ps.setInt(1, rowId); try (ResultSet rs = ps.executeQuery()) { assertTrue(rs.next()); @@ -1879,8 +1875,8 @@ public void testUnrepairableRecoversAfterMajorCompactionOnTarget() throws Except } /** - * Cell missing on target: source has an extra column, target lacks it. Repair mirrors the - * source cell through the {@code cellMissing++} branch. + * Cell missing on target: source has an extra column, target lacks it. Repair mirrors the source + * cell through the {@code cellMissing++} branch. */ @Test public void testRepairCellMissingOnTarget() throws Exception { @@ -1890,14 +1886,14 @@ public void testRepairCellMissingOnTarget() throws Exception { final long ts = base + 1L; // Source: NAME and NAME_VALUE. - upsertAtScnSource(ts, "UPSERT INTO " + uniqueTableName - + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); + upsertAtScnSource(ts, "UPSERT INTO " + uniqueTableName + " (ID, NAME, NAME_VALUE) VALUES (" + + rowId + ", 'alice', 99)"); // Target: only NAME_VALUE — NAME is missing. upsertAtScnTarget(ts, "UPSERT INTO " + uniqueTableName + " (ID, NAME_VALUE) VALUES (" + rowId + ", 99)"); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(waitUntilWallClockPasses(ts))); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time", + String.valueOf(waitUntilWallClockPasses(ts))); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -1930,8 +1926,8 @@ public void testRepairCellExtraOnTarget() throws Exception { writeRawCell(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", ts, Bytes.toBytes("bob")); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(waitUntilWallClockPasses(ts))); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time", + String.valueOf(waitUntilWallClockPasses(ts))); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -1946,8 +1942,8 @@ public void testRepairCellExtraOnTarget() throws Exception { /** * {@code tombstoneTargetCell} Case 2 ({@code sourceMaxTs >= ts}): target has a stray older - * version of a column that source also has at a higher ts. Repair must point-Delete the - * stray version only — no hidden-version sweep — so the visible NAME stays at source's value. + * version of a column that source also has at a higher ts. Repair must point-Delete the stray + * version only — no hidden-version sweep — so the visible NAME stays at source's value. */ @Test public void testRepairTombstonesTargetExtraVersionAtSameColumn() throws Exception { @@ -1964,9 +1960,8 @@ public void testRepairTombstonesTargetExtraVersionAtSameColumn() throws Exceptio writeRawCell(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", olderTs, Bytes.toBytes("old")); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(waitUntilWallClockPasses(ts)), "--raw-scan", - "--read-all-versions"); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time", + String.valueOf(waitUntilWallClockPasses(ts)), "--raw-scan", "--read-all-versions"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -2005,8 +2000,8 @@ public void testRepairCellDifferentValue() throws Exception { upsertAtScnTarget(ts, "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'bob')"); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(waitUntilWallClockPasses(ts))); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time", + String.valueOf(waitUntilWallClockPasses(ts))); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -2020,10 +2015,10 @@ public void testRepairCellDifferentValue() throws Exception { } /** - * Multi-column-family repair: drift simultaneously in {@code CF1} (cell missing on target) - * and {@code CF2} (cell extra on target). Pins that cell-level repair scopes mutations to - * the correct family — the {@code (family, qualifier)} {@code ColumnKey} keying must keep - * the two families' cells from clobbering each other. + * Multi-column-family repair: drift simultaneously in {@code CF1} (cell missing on target) and + * {@code CF2} (cell extra on target). Pins that cell-level repair scopes mutations to the correct + * family — the {@code (family, qualifier)} {@code ColumnKey} keying must keep the two families' + * cells from clobbering each other. */ @Test public void testRepairAcrossMultipleColumnFamilies() throws Exception { @@ -2039,8 +2034,8 @@ public void testRepairAcrossMultipleColumnFamilies() throws Exception { upsertAtScnTarget(ts, "UPSERT INTO " + uniqueTableName + " (ID, CF2.B) VALUES (" + rowId + ", 'b-tgt')"); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(waitUntilWallClockPasses(ts))); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time", + String.valueOf(waitUntilWallClockPasses(ts))); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -2050,8 +2045,8 @@ public void testRepairAcrossMultipleColumnFamilies() throws Exception { assertTrue("CF2.B must tombstone as extra", repairCounters.cellsExtraOnTarget >= 1); assertEquals("No row should be unrepairable", 0, repairCounters.rowsCannotRepair); - try (PreparedStatement ps = targetConnection.prepareStatement( - "SELECT CF1.A, CF2.B FROM " + uniqueTableName + " WHERE ID = ?")) { + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT CF1.A, CF2.B FROM " + uniqueTableName + " WHERE ID = ?")) { ps.setInt(1, rowId); try (ResultSet rs = ps.executeQuery()) { assertTrue(rs.next()); @@ -2069,9 +2064,8 @@ public void testRepairAcrossMultipleColumnFamilies() throws Exception { private List collectMultiCfRows(Connection conn, String tableName) throws SQLException { List rows = new ArrayList<>(); - try (Statement stmt = conn.createStatement(); - ResultSet rs = stmt.executeQuery( - "SELECT ID, CF1.A, CF2.B FROM " + tableName + " ORDER BY ID")) { + try (Statement stmt = conn.createStatement(); ResultSet rs = + stmt.executeQuery("SELECT ID, CF1.A, CF2.B FROM " + tableName + " ORDER BY ID")) { while (rs.next()) { rows.add(rs.getInt(1) + "|" + rs.getString(2) + "|" + rs.getString(3)); } @@ -2082,8 +2076,8 @@ private List collectMultiCfRows(Connection conn, String tableName) throw /** * Tombstone planted strictly above {@code --to-time}: the diff scan can't see it, but * {@code TargetRowRecord.load} (which uses {@code [fromTime, MAX_VALUE]}) does — so - * {@code wouldShadow} suppresses source's NAME mirror. The empty-key cell still mirrors, - * giving the row visible existence; NAME stays null. + * {@code wouldShadow} suppresses source's NAME mirror. The empty-key cell still mirrors, giving + * the row visible existence; NAME stays null. */ @Test public void testRepairShadowFromTombstoneAboveToTime() throws Exception { @@ -2124,8 +2118,8 @@ public void testRepairShadowFromTombstoneAboveToTime() throws Exception { /** * Shadow via {@code DeleteFamily}: tombstone covers every qualifier in cf {@code "0"} at * {@code ts <= tombstoneTs}, planted strictly above {@code --to-time} so only - * {@code TargetRowRecord.load} sees it. Every source cell mirror is suppressed → row rolls - * up unrepairable. + * {@code TargetRowRecord.load} sees it. Every source cell mirror is suppressed → row rolls up + * unrepairable. */ @Test public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception { @@ -2163,10 +2157,10 @@ public void testRepairShadowFromDeleteFamilyOnTarget() throws Exception { } /** - * Shadow via {@code DeleteFamilyVersion}: tombstone matches every qualifier in cf {@code "0"} - * at exactly {@code sourceTs}. Run without {@code --raw-scan} so the diff scan sees target as - * empty (no live cells); {@code TargetRowRecord.load} runs raw internally and still surfaces - * the tombstone for the {@code wouldShadow} check. + * Shadow via {@code DeleteFamilyVersion}: tombstone matches every qualifier in cf {@code "0"} at + * exactly {@code sourceTs}. Run without {@code --raw-scan} so the diff scan sees target as empty + * (no live cells); {@code TargetRowRecord.load} runs raw internally and still surfaces the + * tombstone for the {@code wouldShadow} check. */ @Test public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception { @@ -2184,8 +2178,7 @@ public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception { sourceTs); RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", - String.valueOf(fromTime), "--to-time", - String.valueOf(waitUntilWallClockPasses(sourceTs))); + String.valueOf(fromTime), "--to-time", String.valueOf(waitUntilWallClockPasses(sourceTs))); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -2201,10 +2194,10 @@ public void testRepairShadowFromDeleteFamilyVersionOnTarget() throws Exception { } /** - * Multi-hidden-version unwinding: target has THREE NAME versions (two hidden, one visible) - * above source's single Put. The repairer must point-Delete the visible Put AND every hidden - * Put in {@code (sourceMaxTs, visibleTs)} — otherwise unwinding the visible cell surfaces a - * hidden Put above source's mirror. + * Multi-hidden-version unwinding: target has THREE NAME versions (two hidden, one visible) above + * source's single Put. The repairer must point-Delete the visible Put AND every hidden Put in + * {@code (sourceMaxTs, visibleTs)} — otherwise unwinding the visible cell surfaces a hidden Put + * above source's mirror. */ @Test public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception { @@ -2232,9 +2225,9 @@ public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception { assertTargetName(uniqueTableName, rowId, "dave"); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", - String.valueOf(fromTime), "--to-time", String.valueOf(waitUntilWallClockPasses(targetT3)), - "--read-all-versions"); + RepairRunResult result = + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(waitUntilWallClockPasses(targetT3)), "--read-all-versions"); assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); @@ -2248,8 +2241,8 @@ public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception { // Lower bound on delete markers: the unwind iterates over shrinking intervals so T1 may // appear in both T3's and T2's hidden sets — distinct-marker count can exceed 3. RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId); - assertTrue("Expected at least 3 NAME delete markers on target, saw " - + summary.totalDeletes(), summary.totalDeletes() >= 3); + assertTrue("Expected at least 3 NAME delete markers on target, saw " + summary.totalDeletes(), + summary.totalDeletes() >= 3); int namePutAtSourceTs = 0; for (Long ts : summary.putTimestamps) { if (ts == sourceTs) namePutAtSourceTs++; @@ -2262,9 +2255,9 @@ public void testRepairUnwindsMultipleHiddenTargetVersions() throws Exception { /** * Same row on both sides via a matching NAME_VALUE; target also carries a raw point-Delete on - * NAME that source lacks. Under {@code --raw-scan} the tombstone surfaces as a target-extra - * cell, but {@code tombstoneTargetCell} can't tombstone a tombstone — row rolls up - * unrepairable with no cell counter ticks. + * NAME that source lacks. Under {@code --raw-scan} the tombstone surfaces as a target-extra cell, + * but {@code tombstoneTargetCell} can't tombstone a tombstone — row rolls up unrepairable with no + * cell counter ticks. */ @Test public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception { @@ -2283,8 +2276,8 @@ public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception { writeRawPointDelete(targetConnection, uniqueTableName, integerRowKey(rowId), "0", "NAME", tombstoneTs); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan"); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time", + String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -2297,8 +2290,8 @@ public void testRepairCmpEqualWithTargetTombstoneCell() throws Exception { /** * Mid-row repair flush: 8 missing source-only rows through {@code repairBatchSize=2} so - * {@code generateMutationForDiffRows} flushes mid-stream. Pins that no Put is dropped at a - * batch boundary. + * {@code generateMutationForDiffRows} flushes mid-stream. Pins that no Put is dropped at a batch + * boundary. */ @Test public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception { @@ -2341,10 +2334,9 @@ public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception { } /** - * {@code --raw-scan} + {@code --read-all-versions}: source has Put@T1, DeleteColumn@T2, - * Put@T3; target has only Put@T1. Repair must mirror the missing tombstone (via - * {@code mirrorSourceCell} routing Delete cells through {@code Delete#add}) and the missing - * newer Put. + * {@code --raw-scan} + {@code --read-all-versions}: source has Put@T1, DeleteColumn@T2, Put@T3; + * target has only Put@T1. Repair must mirror the missing tombstone (via {@code mirrorSourceCell} + * routing Delete cells through {@code Delete#add}) and the missing newer Put. */ @Test public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exception { @@ -2367,9 +2359,9 @@ public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exceptio upsertAtScnTarget(t1, "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'v1')"); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", - String.valueOf(fromTime), "--to-time", String.valueOf(System.currentTimeMillis()), - "--raw-scan", "--read-all-versions"); + RepairRunResult result = + runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", + String.valueOf(System.currentTimeMillis()), "--raw-scan", "--read-all-versions"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -2413,8 +2405,8 @@ public void testRepairMirrorsHiddenSourceVersionWhenTargetHasOnlyNewest() throws upsertAtScnTarget(t2, "UPSERT INTO " + uniqueTableName + " (ID, NAME) VALUES (" + rowId + ", 'newer')"); - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(waitUntilWallClockPasses(t2)), "--read-all-versions"); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time", + String.valueOf(waitUntilWallClockPasses(t2)), "--read-all-versions"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -2429,8 +2421,7 @@ public void testRepairMirrorsHiddenSourceVersionWhenTargetHasOnlyNewest() throws RawCellSummary summary = scanRawTargetNameCells(uniqueTableName, rowId); assertTrue("Mirrored older NAME Put@T1 must land at original timestamp", summary.putTimestamps.contains(t1)); - assertTrue("Existing newer NAME Put@T2 must remain", - summary.putTimestamps.contains(t2)); + assertTrue("Existing newer NAME Put@T2 must remain", summary.putTimestamps.contains(t2)); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); validateCheckpointEntries(uniqueTableName, null, dryRunCounters, c); @@ -2446,8 +2437,8 @@ public void testRepairMirrorsSourcePointDeleteUnderRawScan() throws Exception { } /** - * Source-only DeleteFamily must mirror onto target → whole family at the target row is - * shadowed and the row drops out of Phoenix view. + * Source-only DeleteFamily must mirror onto target → whole family at the target row is shadowed + * and the row drops out of Phoenix view. */ @Test public void testRepairMirrorsSourceDeleteFamilyUnderRawScan() throws Exception { @@ -2463,81 +2454,11 @@ public void testRepairMirrorsSourceDeleteFamilyVersionUnderRawScan() throws Exce runMirrorSourceTombstoneTest(SourceTombstone.DELETE_FAMILY_VERSION); } - private enum SourceTombstone { - POINT_DELETE, - DELETE_FAMILY, - DELETE_FAMILY_VERSION - } - - private void runMirrorSourceTombstoneTest(SourceTombstone subtype) throws Exception { - final int rowId = 5; - long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); - final long ts = base + 1L; - // POINT_DELETE / DFV must hit cells at exactly `ts`; DeleteFamily covers ts <= markerTs. - final long tombstoneTs = subtype == SourceTombstone.DELETE_FAMILY ? base + 2L : ts; - - for (String zkUrl : new String[] { CLUSTERS.getZkUrl1(), CLUSTERS.getZkUrl2() }) { - try (Connection scn = openConnectionAtScn(zkUrl, ts)) { - scn.createStatement().execute("UPSERT INTO " + uniqueTableName - + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); - scn.commit(); - } - } - - byte[] rk = integerRowKey(rowId); - switch (subtype) { - case POINT_DELETE: - writeRawPointDelete(sourceConnection, uniqueTableName, rk, "0", "NAME", tombstoneTs); - break; - case DELETE_FAMILY: - writeRawDeleteFamily(sourceConnection, uniqueTableName, rk, "0", tombstoneTs); - break; - case DELETE_FAMILY_VERSION: - writeRawDeleteFamilyVersion(sourceConnection, uniqueTableName, rk, "0", tombstoneTs); - break; - default: - throw new IllegalStateException("unhandled subtype: " + subtype); - } - - RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", - "--to-time", String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan"); - assertTrue(result.dryRunJob.isSuccessful()); - assertTrue(result.repairJob.isSuccessful()); - - SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); - SyncCountersResult repairCounters = getSyncCounters(result.repairJob); - // The source-only Delete cell is missing on target → mirror it. DeleteFamily mirroring - // also covers the empty-key sentinel cell, hence the >= 1 assertion shape. - assertTrue("source tombstone must mirror as a missing cell on target", - repairCounters.cellsMissingOnTarget >= 1); - assertTrue(repairCounters.mappersRepaired >= 1); - validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); - - try (PreparedStatement ps = targetConnection.prepareStatement( - "SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) { - ps.setInt(1, rowId); - try (ResultSet rs = ps.executeQuery()) { - if (subtype == SourceTombstone.POINT_DELETE) { - assertTrue(rs.next()); - assertNull(rs.getString(1)); - assertEquals(99L, rs.getLong(2)); - } else { - // Family-wide tombstone (DELETE_FAMILY or DELETE_FAMILY_VERSION) drops the row. - assertFalse("row should not be visible after family-wide tombstone mirror", - rs.next()); - } - } - } - - // Source tombstone is now mirrored onto target; both clusters present the same Phoenix view. - verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - } - /** - * Mixed Put+Delete batch under {@code repairBatchSize=4}: 5 source-only rows + 5 target-only - * rows in the same chunk, so most flushes straddle a Put/Delete boundary on - * {@code flushRepairMutations}'s mixed {@code Table#batch} path. Pins that no mutation drops - * at the batch boundary. + * Mixed Put+Delete batch under {@code repairBatchSize=4}: 5 source-only rows + 5 target-only rows + * in the same chunk, so most flushes straddle a Put/Delete boundary on + * {@code flushRepairMutations}'s mixed {@code Table#batch} path. Pins that no mutation drops at + * the batch boundary. */ @Test public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception { @@ -2849,55 +2770,29 @@ public void testSyncTableWithSplitCoalescing() throws Exception { } /** - * Verifies that the sync job completes successfully when {@code endTime} (--to-time) is older - * than {@code phoenix.max.lookback.age.seconds}. - * - *

    Root cause without fix: {@link PhoenixSyncTableTool} sets {@code CURRENT_SCN_VALUE = - * endTime} in the MR job configuration. During split generation, {@code - * PhoenixInputFormat.getQueryPlan()} creates a Phoenix connection with that SCN. {@code - * QueryCompiler.verifySCN()} (client-side) then throws {@code ERROR 538} when {@code endTime} is - * older than {@code phoenix.max.lookback.age.seconds}. - * - *

    Fix: {@link PhoenixSyncTableInputFormat} overrides the parent to strip {@code - * CURRENT_SCN_VALUE} before creating the query plan for split generation. With SCN absent, {@code - * verifySCN()} returns early (SCN == null), so no exception is thrown. - * - *

    Data access correctness: The mapper uses raw HBase {@code Scan.setTimeRange(fromTime, - * toTime)}, which does NOT go through {@code QueryCompiler.compile()} or {@code verifySCN()}, so - * data within [fromTime, toTime] is always accessible regardless of max lookback age. + * Sync must succeed when {@code --to-time} is older than + * {@code phoenix.max.lookback.age.seconds}. {@link PhoenixSyncTableInputFormat} strips + * {@code CURRENT_SCN_VALUE} during split generation so {@code QueryCompiler.verifySCN()} doesn't + * throw ERROR 538; the mapper's raw HBase scan is unaffected by SCN/lookback and reads all data + * in {@code [fromTime, toTime]}. */ @Test public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exception { - // Setup: create tables on both clusters and replicate 10 rows createTableOnBothClusters(sourceConnection, targetConnection, uniqueTableName); insertTestData(sourceConnection, uniqueTableName, 1, 10); waitForReplication(targetConnection, uniqueTableName, 10); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - // Capture toTime BEFORE the lookback window will expire long toTime = System.currentTimeMillis(); - // Configure a short max lookback age (5 seconds) in the MR job configuration. - // QueryCompiler.verifySCN() reads PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY from - // conn.getQueryServices().getConfiguration(), which is the client-side MR conf. long maxLookbackAgeSeconds = 5; Configuration conf = sourceClusterConf(); conf.setLong(BaseScannerRegionObserverConstants.PHOENIX_MAX_LOOKBACK_AGE_CONF_KEY, maxLookbackAgeSeconds); - // Wait until toTime is older than the lookback age. - // After this sleep: (now - maxLookbackAgeMillis) > toTime → verifySCN would throw ERROR 538 + // Sleep until toTime is older than the lookback age — without the fix, verifySCN throws 538. Thread.sleep((maxLookbackAgeSeconds + 2) * 1000L); - // Run the sync tool with the now-stale toTime. - // Without PhoenixSyncTableInputFormat.getQueryPlan() override: - // getSplits() → PhoenixInputFormat.getQueryPlan() sets SCN=toTime on Phoenix connection - // → QueryCompiler.verifySCN() → ERROR 538 (toTime older than maxLookbackAge) - // With the fix: - // getSplits() → overridden getQueryPlan() strips CURRENT_SCN_VALUE from conf copy - // → verifySCN() sees scn == null → returns early → no exception thrown - // The mapper still uses raw HBase Scan.setTimeRange(0, toTime), bypassing verifySCN entirely, - // so all 10 rows within [0, toTime] are accessible and compared correctly. Job job = runSyncToolWithChunkSize(uniqueTableName, 1, conf, "--from-time", "0", "--to-time", String.valueOf(toTime)); @@ -2905,17 +2800,83 @@ public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exc "Sync job should complete successfully even when endTime is older than maxLookbackAge", job.isSuccessful()); - // Verify the mapper processed all 10 rows via raw HBase scan (bypasses verifySCN) SyncCountersResult counters = getSyncCounters(job); validateSyncCounters(counters, 10, 10, 10, 0); validateMapperCounters(counters, 4, 0); - // Run was non-dry-run with no drift; repair flow is a no-op and target should match source - // even though toTime is older than max lookback age. verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); validateCheckpointEntries(uniqueTableName, null, null, counters); } + private enum SourceTombstone { + POINT_DELETE, + DELETE_FAMILY, + DELETE_FAMILY_VERSION + } + + private void runMirrorSourceTombstoneTest(SourceTombstone subtype) throws Exception { + final int rowId = 5; + long base = createRepairTestTableOnBothClusters(uniqueTableName, 1, "3, 7"); + final long ts = base + 1L; + // POINT_DELETE / DFV must hit cells at exactly `ts`; DeleteFamily covers ts <= markerTs. + final long tombstoneTs = subtype == SourceTombstone.DELETE_FAMILY ? base + 2L : ts; + + for (String zkUrl : new String[] { CLUSTERS.getZkUrl1(), CLUSTERS.getZkUrl2() }) { + try (Connection scn = openConnectionAtScn(zkUrl, ts)) { + scn.createStatement().execute("UPSERT INTO " + uniqueTableName + + " (ID, NAME, NAME_VALUE) VALUES (" + rowId + ", 'alice', 99)"); + scn.commit(); + } + } + + byte[] rk = integerRowKey(rowId); + switch (subtype) { + case POINT_DELETE: + writeRawPointDelete(sourceConnection, uniqueTableName, rk, "0", "NAME", tombstoneTs); + break; + case DELETE_FAMILY: + writeRawDeleteFamily(sourceConnection, uniqueTableName, rk, "0", tombstoneTs); + break; + case DELETE_FAMILY_VERSION: + writeRawDeleteFamilyVersion(sourceConnection, uniqueTableName, rk, "0", tombstoneTs); + break; + default: + throw new IllegalStateException("unhandled subtype: " + subtype); + } + + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", "0", "--to-time", + String.valueOf(waitUntilWallClockPasses(tombstoneTs)), "--raw-scan"); + assertTrue(result.dryRunJob.isSuccessful()); + assertTrue(result.repairJob.isSuccessful()); + + SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); + SyncCountersResult repairCounters = getSyncCounters(result.repairJob); + // The source-only Delete cell is missing on target → mirror it. DeleteFamily mirroring + // also covers the empty-key sentinel cell, hence the >= 1 assertion shape. + assertTrue("source tombstone must mirror as a missing cell on target", + repairCounters.cellsMissingOnTarget >= 1); + assertTrue(repairCounters.mappersRepaired >= 1); + validateCheckpointEntries(uniqueTableName, null, dryRunCounters, repairCounters); + + try (PreparedStatement ps = targetConnection + .prepareStatement("SELECT NAME, NAME_VALUE FROM " + uniqueTableName + " WHERE ID = ?")) { + ps.setInt(1, rowId); + try (ResultSet rs = ps.executeQuery()) { + if (subtype == SourceTombstone.POINT_DELETE) { + assertTrue(rs.next()); + assertNull(rs.getString(1)); + assertEquals(99L, rs.getLong(2)); + } else { + // Family-wide tombstone (DELETE_FAMILY or DELETE_FAMILY_VERSION) drops the row. + assertFalse("row should not be visible after family-wide tombstone mirror", rs.next()); + } + } + } + + // Source tombstone is now mirrored onto target; both clusters present the same Phoenix view. + verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); + } + /** * Helper class to hold separated mapper and chunk entries. */ @@ -2998,9 +2959,9 @@ private static class CheckpointAggregateCounters { } /** - * Result of {@link #setupPartialRerun}. Captures the first-run job/counters, the snapshots of - * the checkpoint table before and after deletion, and the aggregate counters re-derived from - * the chunks that survived the deletion. Tests use these to assert the + * Result of {@link #setupPartialRerun}. Captures the first-run job/counters, the snapshots of the + * checkpoint table before and after deletion, and the aggregate counters re-derived from the + * chunks that survived the deletion. Tests use these to assert the * {@code remaining + rerun == first-run} invariant after re-running the sync tool. */ private static class PartialRerunSetup { @@ -3033,13 +2994,13 @@ private static class PartialRerunSetup { /** * Runs the partial-rerun preamble shared by all checkpoint-resume tests: *

      - *
    1. Run the sync tool once at {@code chunkSize} over the pinned [{@code fromTime}, - * {@code toTime}] window.
    2. - *
    3. Query the checkpoint table and assert non-empty mapper/chunk results.
    4. - *
    5. Select {@code deletionFraction} of each mapper's chunks for deletion (0.75 in all - * current tests) and delete them along with every mapper row.
    6. - *
    7. Re-query the checkpoint table and aggregate the surviving CHUNK rows so callers can - * assert the {@code remaining + rerun == first-run} row-count invariant.
    8. + *
    9. Run the sync tool once at {@code chunkSize} over the pinned [{@code fromTime}, + * {@code toTime}] window.
    10. + *
    11. Query the checkpoint table and assert non-empty mapper/chunk results.
    12. + *
    13. Select {@code deletionFraction} of each mapper's chunks for deletion (0.75 in all current + * tests) and delete them along with every mapper row.
    14. + *
    15. Re-query the checkpoint table and aggregate the surviving CHUNK rows so callers can assert + * the {@code remaining + rerun == first-run} row-count invariant.
    16. *
    * Each test then performs its own divergent action (extra splits, merges, smaller chunk size, * dropping the target table) on the returned state. @@ -3058,9 +3019,9 @@ private PartialRerunSetup setupPartialRerun(String tableName, long fromTime, lon assertFalse("Should have mapper region entries", separated.mappers.isEmpty()); assertFalse("Should have chunk entries", separated.chunks.isEmpty()); - List chunksToDelete = selectChunksToDeleteFromMappers( - sourceConnection, tableName, targetZkQuorum, fromTime, toTime, null, separated.mappers, - deletionFraction); + List chunksToDelete = + selectChunksToDeleteFromMappers(sourceConnection, tableName, targetZkQuorum, fromTime, toTime, + null, separated.mappers, deletionFraction); int deletedCount = deleteCheckpointEntries(sourceConnection, tableName, targetZkQuorum, null, separated.mappers, chunksToDelete); @@ -3072,8 +3033,8 @@ private PartialRerunSetup setupPartialRerun(String tableName, long fromTime, lon CheckpointAggregateCounters remainingCounters = calculateAggregateCountersFromCheckpoint(entriesAfterDelete); - return new PartialRerunSetup(firstRunJob, firstRunCounters, separated.mappers, - separated.chunks, chunksToDelete, deletedCount, entriesAfterDelete, remainingCounters); + return new PartialRerunSetup(firstRunJob, firstRunCounters, separated.mappers, separated.chunks, + chunksToDelete, deletedCount, entriesAfterDelete, remainingCounters); } private List findChunksBelongingToMapper(Connection conn, @@ -3285,8 +3246,8 @@ private void verifyDataIdentical(Connection sourceConn, Connection targetConn, S /** * Negative complement of {@link #verifyDataIdentical}: asserts that source and target return * different row sets via Phoenix SELECT. Use after a repair pass that is expected to leave - * residual divergence (UNREPAIRABLE / REPAIR_FAILED) so the test pins both that the tool - * reported the right status and that the data actually didn't converge. + * residual divergence (UNREPAIRABLE / REPAIR_FAILED) so the test pins both that the tool reported + * the right status and that the data actually didn't converge. */ private void verifyDataDiverges(Connection sourceConn, Connection targetConn, String tableName) throws SQLException { @@ -3791,8 +3752,8 @@ private void assertSyncToolFails(String[] args, String failureContext) { /** * Upserts a "MODIFIED_NAME_" row on target for each id in {@code mismatchIds}. Replaces the * common pattern {@code for (int id : ids) upsertRowsOnTarget(..., new int[]{id}, new - * String[]{"MODIFIED_NAME_"+id})} which defeated the batch-upsert path of {@link - * #upsertRowsOnTarget}. + * String[]{"MODIFIED_NAME_"+id})} which defeated the batch-upsert path of + * {@link #upsertRowsOnTarget}. */ private void introduceMismatchesByIds(String tableName, List mismatchIds) throws SQLException { @@ -3806,19 +3767,20 @@ private void introduceMismatchesByIds(String tableName, List mismatchId } /** - * Starts two daemon-style threads that perform region mutations (splits or merges) on the - * source and target clusters and returns a {@link Runnable} the caller invokes to join them - * with a 30-second timeout. Both worker {@link Runnable}s are wrapped in try/catch so that an - * unexpected exception is logged rather than killing the JVM thread silently. + * Starts two daemon-style threads that perform region mutations (splits or merges) on the source + * and target clusters and returns a {@link Runnable} the caller invokes to join them with a + * 30-second timeout. Both worker {@link Runnable}s are wrapped in try/catch so that an unexpected + * exception is logged rather than killing the JVM thread silently. + *

    + * Usage: * - *

    Usage: *

        *   Runnable joiner = startConcurrentRegionWork(sourceWork, targetWork, "splits");
        *   ... run main sync work ...
        *   joiner.run();
        * 
    - * - *

    Caller is responsible for invoking the returned joiner; tests should always join before + *

    + * Caller is responsible for invoking the returned joiner; tests should always join before * asserting on cluster state, otherwise late-arriving region mutations can race the assertions. */ private Runnable startConcurrentRegionWork(Runnable sourceWork, Runnable targetWork, @@ -3859,9 +3821,8 @@ private Runnable startConcurrentRegionWork(Runnable sourceWork, Runnable targetW private long captureBaselineChunkCount(String tableName, int chunkSize) throws Exception { Job baselineJob = runSyncToolWithChunkSize(tableName, chunkSize, "--dry-run", "--from-time", "0", "--to-time", String.valueOf(System.currentTimeMillis())); - long chunkCount = - baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue() - + baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue(); + long chunkCount = baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue() + + baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue(); cleanupCheckpointTable(sourceConnection, tableName, targetZkQuorum, null); return chunkCount; } @@ -3876,9 +3837,9 @@ private Job runSyncToolWithChunkSize(String tableName, int chunkSize, String... } /** - * Holds both the dry-run and repair jobs from a {@link #runSyncToolWithRepair} invocation, - * along with the pinned time window so callers can re-query the checkpoint table or run - * additional assertions against the same range. + * Holds both the dry-run and repair jobs from a {@link #runSyncToolWithRepair} invocation, along + * with the pinned time window so callers can re-query the checkpoint table or run additional + * assertions against the same range. */ private static class RepairRunResult { final Job dryRunJob; @@ -3905,9 +3866,9 @@ private RepairRunResult runSyncToolWithRepair(String tableName, String... additi * mismatches, then as a repair pass (no --dry-run) so the repair run rewrites the MISMATCHED * checkpoint rows in place. The shared window is mandatory because the checkpoint PK is * (TABLE_NAME, TARGET_CLUSTER, TYPE, FROM_TIME, TO_TIME, TENANT_ID, START_ROW_KEY) — without - * pinning, each invocation would fall through to System.currentTimeMillis() and the repair - * pass would create fresh rows instead of overwriting the dry-run pass's output. If the caller - * does not provide --from-time / --to-time, defaults of 0 / now are pinned. + * pinning, each invocation would fall through to System.currentTimeMillis() and the repair pass + * would create fresh rows instead of overwriting the dry-run pass's output. If the caller does + * not provide --from-time / --to-time, defaults of 0 / now are pinned. */ private RepairRunResult runSyncToolWithRepair(String tableName, int chunkSize, String... additionalArgs) throws Exception { @@ -3923,8 +3884,8 @@ private RepairRunResult runSyncToolWithRepair(String tableName, int chunkSize, } /** - * Parses a long-valued command-line flag (e.g., --from-time 12345) from the args array. - * Returns the default value if the flag is absent. + * Parses a long-valued command-line flag (e.g., --from-time 12345) from the args array. Returns + * the default value if the flag is absent. */ private static long parseLongFlag(String[] args, String flag, long defaultValue) { for (int i = 0; i < args.length - 1; i++) { @@ -3968,9 +3929,9 @@ private static String[] appendArg(String[] args, String newArg) { } /** - * Counts checkpoint entries (both REGION and CHUNK rows) in the given status. Replaces the - * ad-hoc {@code for (entry : entries) if (status.equals(entry.getStatus())) count++} loops - * that recurred across several tests. + * Counts checkpoint entries (both REGION and CHUNK rows) in the given status. Replaces the ad-hoc + * {@code for (entry : entries) if (status.equals(entry.getStatus())) count++} loops that recurred + * across several tests. */ private static long countCheckpointsByStatus(List entries, PhoenixSyncTableCheckpointOutputRow.Status status) { @@ -4082,28 +4043,23 @@ private static class SyncCountersResult { this.chunksVerified = counters.findCounter(SyncCounters.CHUNKS_VERIFIED).getValue(); this.chunksMismatched = counters.findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue(); this.chunksRepaired = counters.findCounter(SyncCounters.CHUNKS_REPAIRED).getValue(); - this.chunksUnrepairable = - counters.findCounter(SyncCounters.CHUNKS_UNREPAIRABLE).getValue(); - this.chunksRepairFailed = - counters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); + this.chunksUnrepairable = counters.findCounter(SyncCounters.CHUNKS_UNREPAIRABLE).getValue(); + this.chunksRepairFailed = counters.findCounter(SyncCounters.CHUNKS_REPAIR_FAILED).getValue(); this.mappersVerified = counters.findCounter(SyncCounters.MAPPERS_VERIFIED).getValue(); this.mappersMismatched = counters.findCounter(SyncCounters.MAPPERS_MISMATCHED).getValue(); this.mappersRepaired = counters.findCounter(SyncCounters.MAPPERS_REPAIRED).getValue(); - this.mappersUnrepairable = - counters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue(); + this.mappersUnrepairable = counters.findCounter(SyncCounters.MAPPERS_UNREPAIRABLE).getValue(); this.mappersRepairFailed = counters.findCounter(SyncCounters.MAPPERS_REPAIR_FAILED).getValue(); this.rowsMissingOnTarget = counters.findCounter(SyncCounters.ROWS_MISSING_ON_TARGET).getValue(); - this.rowsExtraOnTarget = - counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue(); + this.rowsExtraOnTarget = counters.findCounter(SyncCounters.ROWS_EXTRA_ON_TARGET).getValue(); this.rowsDifferentOnTarget = counters.findCounter(SyncCounters.ROWS_DIFFERENT_ON_TARGET).getValue(); this.rowsCannotRepair = counters.findCounter(SyncCounters.ROWS_CANNOT_REPAIR).getValue(); this.cellsMissingOnTarget = counters.findCounter(SyncCounters.CELLS_MISSING_ON_TARGET).getValue(); - this.cellsExtraOnTarget = - counters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue(); + this.cellsExtraOnTarget = counters.findCounter(SyncCounters.CELLS_EXTRA_ON_TARGET).getValue(); this.cellsDifferentOnTarget = counters.findCounter(SyncCounters.CELLS_DIFFERENT_ON_TARGET).getValue(); this.taskCreated = counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue(); @@ -4123,10 +4079,10 @@ public void logCounters(String testName) { + "rows extra={}, rows different={}, rows cannot repair={}, cells missing={}, " + "cells extra={}, cells different={}", testName, sourceRowsProcessed, targetRowsProcessed, chunksVerified, chunksMismatched, - chunksRepaired, chunksUnrepairable, chunksRepairFailed, mappersVerified, - mappersMismatched, mappersRepaired, mappersUnrepairable, mappersRepairFailed, - rowsMissingOnTarget, rowsExtraOnTarget, rowsDifferentOnTarget, rowsCannotRepair, - cellsMissingOnTarget, cellsExtraOnTarget, cellsDifferentOnTarget); + chunksRepaired, chunksUnrepairable, chunksRepairFailed, mappersVerified, mappersMismatched, + mappersRepaired, mappersUnrepairable, mappersRepairFailed, rowsMissingOnTarget, + rowsExtraOnTarget, rowsDifferentOnTarget, rowsCannotRepair, cellsMissingOnTarget, + cellsExtraOnTarget, cellsDifferentOnTarget); } } @@ -4178,9 +4134,9 @@ private void validateMapperCountersRepair(SyncCountersResult counters, } /** - * Pins the cell-level repair drift counters to exact expected values. Use in repair tests - * where the drift is constructed deterministically and any miscount (off-by-one, - * double-counting, missed branch) should fail the test loudly. + * Pins the cell-level repair drift counters to exact expected values. Use in repair tests where + * the drift is constructed deterministically and any miscount (off-by-one, double-counting, + * missed branch) should fail the test loudly. */ private void assertRepairCellCounters(SyncCountersResult counters, long expectedCellsMissing, long expectedCellsExtra, long expectedCellsDifferent, long expectedRowsCannotRepair) { @@ -4193,8 +4149,8 @@ private void assertRepairCellCounters(SyncCountersResult counters, long expected /** * Pins the chunk- and mapper-level repair-status counters. Complements - * {@link #validateMapperCountersRepair} (which omits chunk-level counters) for tests that - * need to assert both layers. + * {@link #validateMapperCountersRepair} (which omits chunk-level counters) for tests that need to + * assert both layers. */ private void assertRepairChunkAndMapperCounters(SyncCountersResult counters, long expectedChunksRepaired, long expectedChunksUnrepairable, long expectedChunksRepairFailed, @@ -4204,34 +4160,31 @@ private void assertRepairChunkAndMapperCounters(SyncCountersResult counters, assertEquals("CHUNKS_UNREPAIRABLE", expectedChunksUnrepairable, counters.chunksUnrepairable); assertEquals("CHUNKS_REPAIR_FAILED", expectedChunksRepairFailed, counters.chunksRepairFailed); assertEquals("MAPPERS_REPAIRED", expectedMappersRepaired, counters.mappersRepaired); - assertEquals("MAPPERS_UNREPAIRABLE", expectedMappersUnrepairable, - counters.mappersUnrepairable); + assertEquals("MAPPERS_UNREPAIRABLE", expectedMappersUnrepairable, counters.mappersUnrepairable); assertEquals("MAPPERS_REPAIR_FAILED", expectedMappersRepairFailed, counters.mappersRepairFailed); } /** - * Pins all four row-level drift counters: missing, extra, different, and unrepairable. - * Dry-run runs leave {@code ROWS_CANNOT_REPAIR} at 0; repair runs leave - * {@code ROWS_DIFFERENT_ON_TARGET} at 0 (different rows roll up under missing/extra cell - * drift after repair). + * Pins all four row-level drift counters: missing, extra, different, and unrepairable. Dry-run + * runs leave {@code ROWS_CANNOT_REPAIR} at 0; repair runs leave {@code ROWS_DIFFERENT_ON_TARGET} + * at 0 (different rows roll up under missing/extra cell drift after repair). */ private void assertRowDriftCounters(SyncCountersResult counters, long expectedRowsMissing, long expectedRowsExtra, long expectedRowsDifferent, long expectedRowsCannotRepair) { assertEquals("ROWS_MISSING_ON_TARGET", expectedRowsMissing, counters.rowsMissingOnTarget); assertEquals("ROWS_EXTRA_ON_TARGET", expectedRowsExtra, counters.rowsExtraOnTarget); - assertEquals("ROWS_DIFFERENT_ON_TARGET", expectedRowsDifferent, - counters.rowsDifferentOnTarget); + assertEquals("ROWS_DIFFERENT_ON_TARGET", expectedRowsDifferent, counters.rowsDifferentOnTarget); assertEquals("ROWS_CANNOT_REPAIR", expectedRowsCannotRepair, counters.rowsCannotRepair); } /** * Builds DDL for a "repair test" table that uses {@code COLUMN_ENCODED_BYTES=NONE} so column - * qualifiers on disk match the SQL column name verbatim. This lets cell-level test helpers - * inject raw HBase Puts/Deletes against {@code (cf=0, q=NAME)} or {@code (cf=0, q=NAME_VALUE)} - * without computing encoded qualifier bytes. - * - *

    Set {@code maxVersions > 1} when the test exercises hidden-version unwinding. + * qualifiers on disk match the SQL column name verbatim. This lets cell-level test helpers inject + * raw HBase Puts/Deletes against {@code (cf=0, q=NAME)} or {@code (cf=0, q=NAME_VALUE)} without + * computing encoded qualifier bytes. + *

    + * Set {@code maxVersions > 1} when the test exercises hidden-version unwinding. */ private String buildRepairTestTableDdl(String tableName, boolean withReplication, int maxVersions, String splitPoints) { @@ -4257,9 +4210,9 @@ private String buildRepairTestTableDdl(String tableName, boolean withReplication /** * Creates the same {@link #buildRepairTestTableDdl} schema on both source and target clusters. * Used by repair tests that bypass replication and seed the two clusters separately. - * - *

    Returns a wall-clock anchor in milliseconds. SCN-bound connections must use timestamps - * ≥ the anchor, otherwise an SCN below the table's CREATE-TABLE timestamp surfaces as + *

    + * Returns a wall-clock anchor in milliseconds. SCN-bound connections must use timestamps ≥ the + * anchor, otherwise an SCN below the table's CREATE-TABLE timestamp surfaces as * {@code TableNotFoundException}. */ private long createRepairTestTableOnBothClusters(String tableName, int maxVersions, @@ -4276,8 +4229,8 @@ private long createRepairTestTableOnBothClusters(String tableName, int maxVersio /** * Multi-CF variant of {@link #createRepairTestTableOnBothClusters}: two column families - * ({@code CF1.A VARCHAR}, {@code CF2.B VARCHAR}). Used by repair tests that need drift to - * span families. + * ({@code CF1.A VARCHAR}, {@code CF2.B VARCHAR}). Used by repair tests that need drift to span + * families. */ private long createMultiColumnFamilyTableOnBothClusters(String tableName) throws SQLException { String ddl = "CREATE TABLE IF NOT EXISTS " + tableName @@ -4291,11 +4244,11 @@ private long createMultiColumnFamilyTableOnBothClusters(String tableName) throws /** * Spins until {@link System#currentTimeMillis()} is strictly greater than {@code minTs}, then * returns the resulting wall-clock value. Used by repair tests that plant cells at handcrafted - * future timestamps and then need a {@code --to-time} that both (a) covers every planted cell - * and (b) satisfies the tool's {@code endTime <= currentTimeMillis()} validation - * ({@link org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil#validateTimeRange}). The - * spin terminates in 1-2ms — it is a deterministic precondition gate, not a sleep-based wait - * for an external side effect. + * future timestamps and then need a {@code --to-time} that both (a) covers every planted cell and + * (b) satisfies the tool's {@code endTime <= currentTimeMillis()} validation + * ({@link org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil#validateTimeRange}). The spin + * terminates in 1-2ms — it is a deterministic precondition gate, not a sleep-based wait for an + * external side effect. */ private long waitUntilWallClockPasses(long minTs) { while (System.currentTimeMillis() <= minTs) { @@ -4318,8 +4271,8 @@ private Connection openConnectionAtScn(String zkUrl, long scnTimestamp) throws S /** * Executes a single UPSERT through an SCN-pinned connection on the given cluster and commits. - * Replaces the verbose {@code try (Connection scn = openConnectionAtScn(...)) { execute; commit; }} - * boilerplate that recurred in nearly every repair test. + * Replaces the verbose {@code try (Connection scn = openConnectionAtScn(...)) { execute; commit; + * }} boilerplate that recurred in nearly every repair test. */ private void upsertAtScn(String zkUrl, long ts, String upsertSql) throws SQLException { try (Connection scn = openConnectionAtScn(zkUrl, ts)) { @@ -4350,8 +4303,8 @@ private void upsertAtScnBoth(long ts, String upsertSql) throws SQLException { * tests. */ private void assertTargetName(String tableName, int id, String expected) throws SQLException { - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) { + try (PreparedStatement ps = + targetConnection.prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) { ps.setInt(1, id); try (ResultSet rs = ps.executeQuery()) { assertTrue("Row " + id + " should exist on target", rs.next()); @@ -4362,8 +4315,8 @@ private void assertTargetName(String tableName, int id, String expected) throws /** Asserts the target row exists but its visible NAME is null. */ private void assertTargetNameNull(String tableName, int id) throws SQLException { - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) { + try (PreparedStatement ps = + targetConnection.prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) { ps.setInt(1, id); try (ResultSet rs = ps.executeQuery()) { assertTrue("Row " + id + " should exist on target", rs.next()); @@ -4374,8 +4327,8 @@ private void assertTargetNameNull(String tableName, int id) throws SQLException /** Asserts the target row is not visible to a Phoenix SELECT (e.g., shadowed by tombstones). */ private void assertTargetRowAbsent(String tableName, int id) throws SQLException { - try (PreparedStatement ps = targetConnection - .prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) { + try (PreparedStatement ps = + targetConnection.prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) { ps.setInt(1, id); try (ResultSet rs = ps.executeQuery()) { assertFalse("Row " + id + " should not be visible on target", rs.next()); @@ -4385,12 +4338,11 @@ private void assertTargetRowAbsent(String tableName, int id) throws SQLException /** * End-of-test convergence: run dry-run + repair over [fromTime, toTime] on top of whatever - * checkpoint state the test already produced. The repair-mode resume filter (status IN - * (VERIFIED, REPAIRED)) plus chunk-level boundary intersection ensures that any chunk-level - * range not yet covered by a VERIFIED/REPAIRED chunk row is re-validated, so a fresh dry-run + - * repair on top of partial/stale prior state will still converge. Asserts no - * MISMATCHED/UNREPAIRABLE/REPAIR_FAILED rows remain and Phoenix-visible data matches between - * source and target. + * checkpoint state the test already produced. The repair-mode resume filter (status IN (VERIFIED, + * REPAIRED)) plus chunk-level boundary intersection ensures that any chunk-level range not yet + * covered by a VERIFIED/REPAIRED chunk row is re-validated, so a fresh dry-run + repair on top of + * partial/stale prior state will still converge. Asserts no MISMATCHED/UNREPAIRABLE/REPAIR_FAILED + * rows remain and Phoenix-visible data matches between source and target. */ private void convergeAndAssertIdentical(String tableName, long fromTime, long toTime) throws Exception { @@ -4404,8 +4356,8 @@ private void convergeAndAssertIdentical(String tableName, long fromTime, long to } /** - * Summary of NAME-column raw cells observed under a single-row raw scan with all versions. - * Used by tests that pin tombstone/Put counts after repair. + * Summary of NAME-column raw cells observed under a single-row raw scan with all versions. Used + * by tests that pin tombstone/Put counts after repair. */ private static final class RawCellSummary { final int puts; @@ -4436,8 +4388,8 @@ int totalDeletes() { /** * Raw scan of a single row on the target cluster, summarising every NAME-column cell by Put / - * tombstone subtype. Replaces the open-coded raw-scan loop repeated by tests that pin - * post-repair NAME tombstone counts. + * tombstone subtype. Replaces the open-coded raw-scan loop repeated by tests that pin post-repair + * NAME tombstone counts. */ private RawCellSummary scanRawTargetNameCells(String tableName, int rowId) throws Exception { byte[] rk = integerRowKey(rowId); @@ -4542,9 +4494,9 @@ private void writeRawDeleteColumn(Connection phoenixConn, String tableName, byte } /** - * Plants a raw {@link Delete#addFamily} (DeleteFamily — covers every qualifier in the family - * at {@code ts <= markerTs}) at {@code (rowKey, family)}. Used by shadow-detection tests - * exercising the {@code TargetRowRecord.deleteFamilyUpperBound} branch in {@code wouldShadow}. + * Plants a raw {@link Delete#addFamily} (DeleteFamily — covers every qualifier in the family at + * {@code ts <= markerTs}) at {@code (rowKey, family)}. Used by shadow-detection tests exercising + * the {@code TargetRowRecord.deleteFamilyUpperBound} branch in {@code wouldShadow}. */ private void writeRawDeleteFamily(Connection phoenixConn, String tableName, byte[] rowKey, String family, long markerTs) throws Exception { @@ -4556,13 +4508,13 @@ private void writeRawDeleteFamily(Connection phoenixConn, String tableName, byte } /** - * Plants a raw {@link Delete#addFamilyVersion} (DeleteFamilyVersion — covers every qualifier - * in the family at exactly {@code ts == markerTs}) at {@code (rowKey, family)}. Used by - * shadow-detection tests exercising the {@code TargetRowRecord.deleteFamilyVersionTs} branch - * in {@code wouldShadow}. + * Plants a raw {@link Delete#addFamilyVersion} (DeleteFamilyVersion — covers every qualifier in + * the family at exactly {@code ts == markerTs}) at {@code (rowKey, family)}. Used by + * shadow-detection tests exercising the {@code TargetRowRecord.deleteFamilyVersionTs} branch in + * {@code wouldShadow}. */ - private void writeRawDeleteFamilyVersion(Connection phoenixConn, String tableName, - byte[] rowKey, String family, long markerTs) throws Exception { + private void writeRawDeleteFamilyVersion(Connection phoenixConn, String tableName, byte[] rowKey, + String family, long markerTs) throws Exception { try (Table hTable = getHBaseTable(phoenixConn, tableName)) { Delete del = new Delete(rowKey); del.addFamilyVersion(Bytes.toBytes(family), markerTs); @@ -4571,8 +4523,8 @@ private void writeRawDeleteFamilyVersion(Connection phoenixConn, String tableNam } /** - * Returns the row-key bytes Phoenix uses for an INTEGER primary key value, matching the - * encoding used by {@code splitTableAt}. + * Returns the row-key bytes Phoenix uses for an INTEGER primary key value, matching the encoding + * used by {@code splitTableAt}. */ private static byte[] integerRowKey(int id) { return PInteger.INSTANCE.toBytes(id); @@ -4580,8 +4532,8 @@ private static byte[] integerRowKey(int id) { /** * Returns a fresh {@link Configuration} clone of the source cluster with a custom - * {@link PhoenixSyncTableTool#PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE} setting baked in. Used by - * the mid-row-flush boundary test. + * {@link PhoenixSyncTableTool#PHOENIX_SYNC_TABLE_REPAIR_BATCH_SIZE} setting baked in. Used by the + * mid-row-flush boundary test. */ private static Configuration sourceClusterConfWithRepairBatchSize(int repairBatchSize) { Configuration conf = sourceClusterConf(); @@ -4618,8 +4570,7 @@ private void validateCheckpointRowStructure(PhoenixSyncTableCheckpointOutputRow || PhoenixSyncTableCheckpointOutputRow.Type.CHUNK.equals(entry.getType())); if (tenantId == null) { - assertNull("TENANT_ID should be null for non-multi-tenant tables" + ctx, - entry.getTenantId()); + assertNull("TENANT_ID should be null for non-multi-tenant tables" + ctx, entry.getTenantId()); } else { assertEquals("TENANT_ID should match" + ctx, tenantId, entry.getTenantId()); } @@ -4627,8 +4578,7 @@ private void validateCheckpointRowStructure(PhoenixSyncTableCheckpointOutputRow assertTrue("FROM_TIME should be >= 0" + ctx, entry.getFromTime() >= 0); assertTrue("TO_TIME should be > FROM_TIME" + ctx, entry.getToTime() > entry.getFromTime()); - assertNotNull("EXECUTION_START_TIME should not be null" + ctx, - entry.getExecutionStartTime()); + assertNotNull("EXECUTION_START_TIME should not be null" + ctx, entry.getExecutionStartTime()); assertNotNull("EXECUTION_END_TIME should not be null" + ctx, entry.getExecutionEndTime()); assertTrue("EXECUTION_END_TIME should be >= EXECUTION_START_TIME" + ctx, entry.getExecutionEndTime().getTime() >= entry.getExecutionStartTime().getTime()); @@ -4724,9 +4674,8 @@ private void validateCheckpointEntries(String tableName, String tenantId, // Repair UPSERTs REGION rows at the dry-run PK with a smaller (gap-only) delta, so totals // only match when no REGION was actually re-processed. - boolean repairTouchedRegions = repairCounters != null - && (repairCounters.mappersRepaired + repairCounters.mappersUnrepairable - + repairCounters.mappersRepairFailed) > 0; + boolean repairTouchedRegions = repairCounters != null && (repairCounters.mappersRepaired + + repairCounters.mappersUnrepairable + repairCounters.mappersRepairFailed) > 0; if (!repairTouchedRegions) { assertEquals("REGION sourceRowsProcessed total" + ctx, verifiedSource.sourceRowsProcessed, sourceRowsProcessed); @@ -4738,9 +4687,9 @@ private void validateCheckpointEntries(String tableName, String tenantId, /** * Bounded variant of {@link #validateCheckpointEntries} for tests where checkpoint rows * accumulate across multiple PKs (multi-window, partial reruns under split/merge, recovery). - * Asserts no MISMATCHED/UNREPAIRABLE/REPAIR_FAILED remain and that VERIFIED+REPAIRED counts - * are at least {@code latestCounters}'s verified+repaired totals (or just the status invariants - * when {@code latestCounters} is null). + * Asserts no MISMATCHED/UNREPAIRABLE/REPAIR_FAILED remain and that VERIFIED+REPAIRED counts are + * at least {@code latestCounters}'s verified+repaired totals (or just the status invariants when + * {@code latestCounters} is null). */ private void validateCheckpointEntriesAtLeast(String tableName, String tenantId, SyncCountersResult latestCounters) throws SQLException { @@ -4753,34 +4702,29 @@ private void validateCheckpointEntriesAtLeast(String tableName, String tenantId, } assertEquals("MISMATCHED rows must not remain after repair" + ctx, 0, - countCheckpointsByStatus(entries, - PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED)); + countCheckpointsByStatus(entries, PhoenixSyncTableCheckpointOutputRow.Status.MISMATCHED)); assertEquals("UNREPAIRABLE rows must not remain after repair" + ctx, 0, - countCheckpointsByStatus(entries, - PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE)); + countCheckpointsByStatus(entries, PhoenixSyncTableCheckpointOutputRow.Status.UNREPAIRABLE)); assertEquals("REPAIR_FAILED rows must not remain after repair" + ctx, 0, - countCheckpointsByStatus(entries, - PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)); + countCheckpointsByStatus(entries, PhoenixSyncTableCheckpointOutputRow.Status.REPAIR_FAILED)); if (latestCounters != null) { long chunkVerifiedPlusRepaired = countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED) - + countCheckpointsByTypeAndStatus(entries, - PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, - PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED); - long regionVerifiedPlusRepaired = - countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION, - PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED) - + countCheckpointsByTypeAndStatus(entries, - PhoenixSyncTableCheckpointOutputRow.Type.REGION, + + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.CHUNK, PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED); + long regionVerifiedPlusRepaired = countCheckpointsByTypeAndStatus(entries, + PhoenixSyncTableCheckpointOutputRow.Type.REGION, + PhoenixSyncTableCheckpointOutputRow.Status.VERIFIED) + + countCheckpointsByTypeAndStatus(entries, PhoenixSyncTableCheckpointOutputRow.Type.REGION, + PhoenixSyncTableCheckpointOutputRow.Status.REPAIRED); long expectedChunkLowerBound = latestCounters.chunksVerified + latestCounters.chunksRepaired; long expectedRegionLowerBound = latestCounters.mappersVerified + latestCounters.mappersRepaired; assertTrue( - "CHUNK VERIFIED+REPAIRED >= latest counters" + ctx + " (actual=" - + chunkVerifiedPlusRepaired + ", expected>=" + expectedChunkLowerBound + ")", + "CHUNK VERIFIED+REPAIRED >= latest counters" + ctx + " (actual=" + chunkVerifiedPlusRepaired + + ", expected>=" + expectedChunkLowerBound + ")", chunkVerifiedPlusRepaired >= expectedChunkLowerBound); assertTrue( "REGION VERIFIED+REPAIRED >= latest counters" + ctx + " (actual=" @@ -4807,8 +4751,8 @@ public boolean equals(Object o) { } /** - * RegionObserver that fails every batch mutate. Attached to PHOENIX_SYNC_TABLE_CHECKPOINT - * by {@code testCheckpointWriteFailureCausesNonZeroExit} to exercise the + * RegionObserver that fails every batch mutate. Attached to PHOENIX_SYNC_TABLE_CHECKPOINT by + * {@code testCheckpointWriteFailureCausesNonZeroExit} to exercise the * {@code CHECKPOINT_WRITE_FAILED} → non-zero exit path. */ public static class CheckpointWriteFailingObserver extends SimpleRegionObserver { diff --git a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java index b9f7e893a58..57791072cda 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsIT.java @@ -26,7 +26,6 @@ import static org.apache.phoenix.query.QueryServices.INTERNAL_CONNECTION_MAX_ALLOWED_CONNECTIONS; import static org.apache.phoenix.query.QueryServices.QUERY_SERVICES_NAME; import static org.apache.phoenix.util.PhoenixRuntime.clearAllConnectionQueryServiceMetrics; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java index a5977629a89..65cecb68804 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableOutputRepositoryTest.java @@ -763,11 +763,11 @@ public void testGetProcessedMapperRegionsRepairModeFiltersByStatus() throws Exce for (int i = 0; i < statuses.length; i++) { byte[] startKey = Bytes.toBytes(String.format("region%02d_start", i)); byte[] endKey = Bytes.toBytes(String.format("region%02d_end", i)); - repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() - .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION) - .setFromTime(0L).setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey) - .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp) - .setExecutionEndTime(timestamp).build()); + repository.checkpointSyncTableResult( + new PhoenixSyncTableCheckpointOutputRow.Builder().setTableName(tableName) + .setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L).setToTime(1000L) + .setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(endKey).setStatus(statuses[i]) + .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build()); } // Repair mode should skip only fully-done regions (VERIFIED + REPAIRED) so the mapper @@ -776,10 +776,10 @@ public void testGetProcessedMapperRegionsRepairModeFiltersByStatus() throws Exce repository.getProcessedMapperRegions(tableName, targetCluster, 0L, 1000L, null, false); assertEquals("Repair mode should return only VERIFIED + REPAIRED regions", 2, repairResults.size()); - assertArrayEquals("First should be region00 (VERIFIED)", - Bytes.toBytes("region00_start"), repairResults.get(0).getStartRowKey()); - assertArrayEquals("Second should be region02 (REPAIRED)", - Bytes.toBytes("region02_start"), repairResults.get(1).getStartRowKey()); + assertArrayEquals("First should be region00 (VERIFIED)", Bytes.toBytes("region00_start"), + repairResults.get(0).getStartRowKey()); + assertArrayEquals("Second should be region02 (REPAIRED)", Bytes.toBytes("region02_start"), + repairResults.get(1).getStartRowKey()); } @Test @@ -792,11 +792,11 @@ public void testGetProcessedMapperRegionsDryRunReturnsAllStatuses() throws Excep for (int i = 0; i < statuses.length; i++) { byte[] startKey = Bytes.toBytes(String.format("region%02d_start", i)); byte[] endKey = Bytes.toBytes(String.format("region%02d_end", i)); - repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() - .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.REGION) - .setFromTime(0L).setToTime(1000L).setIsDryRun(true).setStartRowKey(startKey) - .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp) - .setExecutionEndTime(timestamp).build()); + repository.checkpointSyncTableResult( + new PhoenixSyncTableCheckpointOutputRow.Builder().setTableName(tableName) + .setTargetCluster(targetCluster).setType(Type.REGION).setFromTime(0L).setToTime(1000L) + .setIsDryRun(true).setStartRowKey(startKey).setEndRowKey(endKey).setStatus(statuses[i]) + .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build()); } // Dry-run mode does not filter by status; resume should skip every region the previous @@ -821,21 +821,21 @@ public void testGetProcessedChunksRepairModeFiltersByStatus() throws Exception { for (int i = 0; i < statuses.length; i++) { byte[] startKey = Bytes.toBytes(String.format("chunk%02d_start", i)); byte[] endKey = Bytes.toBytes(String.format("chunk%02d_end", i)); - repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() - .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK) - .setFromTime(0L).setToTime(1000L).setIsDryRun(false).setStartRowKey(startKey) - .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp) - .setExecutionEndTime(timestamp).build()); + repository.checkpointSyncTableResult( + new PhoenixSyncTableCheckpointOutputRow.Builder().setTableName(tableName) + .setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L).setToTime(1000L) + .setIsDryRun(false).setStartRowKey(startKey).setEndRowKey(endKey).setStatus(statuses[i]) + .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build()); } - List repairResults = repository.getProcessedChunks( - tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, false); + List repairResults = repository + .getProcessedChunks(tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, false); assertEquals("Repair mode should return only VERIFIED + REPAIRED chunks", 2, repairResults.size()); - assertArrayEquals("First should be chunk00 (VERIFIED)", - Bytes.toBytes("chunk00_start"), repairResults.get(0).getStartRowKey()); - assertArrayEquals("Second should be chunk02 (REPAIRED)", - Bytes.toBytes("chunk02_start"), repairResults.get(1).getStartRowKey()); + assertArrayEquals("First should be chunk00 (VERIFIED)", Bytes.toBytes("chunk00_start"), + repairResults.get(0).getStartRowKey()); + assertArrayEquals("Second should be chunk02 (REPAIRED)", Bytes.toBytes("chunk02_start"), + repairResults.get(1).getStartRowKey()); } @Test @@ -850,15 +850,15 @@ public void testGetProcessedChunksDryRunReturnsAllStatuses() throws Exception { for (int i = 0; i < statuses.length; i++) { byte[] startKey = Bytes.toBytes(String.format("chunk%02d_start", i)); byte[] endKey = Bytes.toBytes(String.format("chunk%02d_end", i)); - repository.checkpointSyncTableResult(new PhoenixSyncTableCheckpointOutputRow.Builder() - .setTableName(tableName).setTargetCluster(targetCluster).setType(Type.CHUNK) - .setFromTime(0L).setToTime(1000L).setIsDryRun(true).setStartRowKey(startKey) - .setEndRowKey(endKey).setStatus(statuses[i]).setExecutionStartTime(timestamp) - .setExecutionEndTime(timestamp).build()); + repository.checkpointSyncTableResult( + new PhoenixSyncTableCheckpointOutputRow.Builder().setTableName(tableName) + .setTargetCluster(targetCluster).setType(Type.CHUNK).setFromTime(0L).setToTime(1000L) + .setIsDryRun(true).setStartRowKey(startKey).setEndRowKey(endKey).setStatus(statuses[i]) + .setExecutionStartTime(timestamp).setExecutionEndTime(timestamp).build()); } - List dryRunResults = repository.getProcessedChunks( - tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); + List dryRunResults = repository + .getProcessedChunks(tableName, targetCluster, 0L, 1000L, null, mapperStart, mapperEnd, true); assertEquals("Dry-run mode should return all statuses", statuses.length, dryRunResults.size()); } From 7cce16a9c2b19f9e467d32f59ca94ab1e5e16ba8 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Tue, 9 Jun 2026 14:05:29 +0530 Subject: [PATCH 15/18] test comments --- .../end2end/PhoenixSyncTableToolIT.java | 108 +++++------------- 1 file changed, 28 insertions(+), 80 deletions(-) diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java index 19863d20820..100d34bd567 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java @@ -3076,16 +3076,7 @@ private List selectChunksToDeleteFromMapper return new ArrayList<>(uniqueChunksToDelete.values()); } - /** - * Deletes mapper and chunk checkpoint entries to simulate partial rerun scenarios. - * @param conn Connection to use - * @param tableName Table name - * @param targetZkQuorum Target cluster ZK quorum - * @param tenantId Tenant ID - * @param mappersToDelete List of mapper entries to delete - * @param chunksToDelete List of chunk entries to delete - * @return Total number of entries deleted - */ + /** Deletes the given mapper + chunk checkpoint rows; used by partial-rerun fixtures. */ private int deleteCheckpointEntries(Connection conn, String tableName, String targetZkQuorum, String tenantId, List mappersToDelete, List chunksToDelete) throws SQLException { @@ -3197,14 +3188,8 @@ private void waitForReplication(Connection targetConn, String tableName, int exp } /** - * Waits for a specific row's content to be replicated to the target cluster. This is more precise - * than waitForReplication() when dealing with UPDATEs where the row count doesn't change but the - * content does. - * @param targetConn Target cluster connection - * @param tableName Table name - * @param rowId The ID of the row to check - * @param expectedName The expected NAME value - * @throws Exception if replication times out or query fails + * Polls target until the row's NAME matches {@code expectedName} — use this for UPDATEs where row + * count is unchanged so {@link #waitForReplication} can't tell drift from convergence. */ private void waitForRowContentReplication(Connection targetConn, String tableName, int rowId, String expectedName) throws Exception { @@ -3611,16 +3596,8 @@ private List queryCheckpointTable(Connectio } /** - * Unified method to delete a single checkpoint entry by start row key and optional type. Handles - * NULL/empty start keys for first region boundaries. - * @param conn Connection to use - * @param tableName Table name - * @param targetCluster Target cluster ZK quorum - * @param tenantId Tenant ID (nullable) - * @param type Entry type (REGION or CHUNK), or null to delete regardless of type - * @param startRowKey Start row key to match - * @param autoCommit Whether to commit after delete - * @return Number of rows deleted + * Deletes a single checkpoint entry by start row key. Pass {@code type=null} to match any type; + * NULL/empty {@code startRowKey} matches the first-region boundary row. */ private int deleteSingleCheckpointEntry(Connection conn, String tableName, String targetCluster, String tenantId, PhoenixSyncTableCheckpointOutputRow.Type type, byte[] startRowKey, @@ -3723,19 +3700,16 @@ private Job runSyncToolWithZkQuorum(String tableName, String zkQuorum, String... } /** - * Returns a fresh, mutable copy of the source cluster's HBase {@link Configuration}. Tests that - * need to override individual settings (paging, timeouts, etc.) should use this helper rather - * than constructing the Configuration inline so that any future change to the base config flows - * through one place. + * Returns a fresh, mutable copy of the source cluster's HBase {@link Configuration} — use this + * (not an inline constructor) so future base-config changes flow through one place. */ private static Configuration sourceClusterConf() { return new Configuration(CLUSTERS.getHBaseCluster1().getConfiguration()); } /** - * Builds a {@link PhoenixSyncTableTool}, runs it with the supplied args, and asserts the run - * surfaces failure as a non-zero exit code rather than a thrown exception. Used by the - * failure-mode tests that previously hand-rolled this same try/run/assertTrue/catch-fail block. + * Runs the tool and asserts failure surfaces as a non-zero exit code rather than a thrown + * exception. Used by failure-mode tests. */ private void assertSyncToolFails(String[] args, String failureContext) { PhoenixSyncTableTool tool = new PhoenixSyncTableTool(); @@ -3750,13 +3724,11 @@ private void assertSyncToolFails(String[] args, String failureContext) { } /** - * Upserts a "MODIFIED_NAME_" row on target for each id in {@code mismatchIds}. Replaces the - * common pattern {@code for (int id : ids) upsertRowsOnTarget(..., new int[]{id}, new - * String[]{"MODIFIED_NAME_"+id})} which defeated the batch-upsert path of - * {@link #upsertRowsOnTarget}. + * Batch-upserts {@code "MODIFIED_NAME_"} on target for each id, then waits for replication so + * the modify is observable before the test asserts on it. */ private void introduceMismatchesByIds(String tableName, List mismatchIds) - throws SQLException { + throws Exception { int[] ids = new int[mismatchIds.size()]; String[] names = new String[mismatchIds.size()]; for (int i = 0; i < mismatchIds.size(); i++) { @@ -3764,24 +3736,16 @@ private void introduceMismatchesByIds(String tableName, List mismatchId names[i] = "MODIFIED_NAME_" + mismatchIds.get(i); } upsertRowsOnTarget(targetConnection, tableName, ids, names); + // Confirm each modified NAME is observable on target — replication can race the upsert's + // cell timestamp and silently no-op the modify, which would later flake as "0 mismatched". + for (int i = 0; i < ids.length; i++) { + waitForRowContentReplication(targetConnection, tableName, ids[i], names[i]); + } } /** - * Starts two daemon-style threads that perform region mutations (splits or merges) on the source - * and target clusters and returns a {@link Runnable} the caller invokes to join them with a - * 30-second timeout. Both worker {@link Runnable}s are wrapped in try/catch so that an unexpected - * exception is logged rather than killing the JVM thread silently. - *

    - * Usage: - * - *

    -   *   Runnable joiner = startConcurrentRegionWork(sourceWork, targetWork, "splits");
    -   *   ... run main sync work ...
    -   *   joiner.run();
    -   * 
    - *

    - * Caller is responsible for invoking the returned joiner; tests should always join before - * asserting on cluster state, otherwise late-arriving region mutations can race the assertions. + * Returns a joiner the caller MUST run to await the worker threads — late-arriving region + * mutations otherwise race the test's assertions. */ private Runnable startConcurrentRegionWork(Runnable sourceWork, Runnable targetWork, String label) { @@ -3928,11 +3892,7 @@ private static String[] appendArg(String[] args, String newArg) { return result; } - /** - * Counts checkpoint entries (both REGION and CHUNK rows) in the given status. Replaces the ad-hoc - * {@code for (entry : entries) if (status.equals(entry.getStatus())) count++} loops that recurred - * across several tests. - */ + /** Counts checkpoint entries (REGION + CHUNK) in the given status. */ private static long countCheckpointsByStatus(List entries, PhoenixSyncTableCheckpointOutputRow.Status status) { long count = 0; @@ -4242,13 +4202,10 @@ private long createMultiColumnFamilyTableOnBothClusters(String tableName) throws } /** - * Spins until {@link System#currentTimeMillis()} is strictly greater than {@code minTs}, then - * returns the resulting wall-clock value. Used by repair tests that plant cells at handcrafted - * future timestamps and then need a {@code --to-time} that both (a) covers every planted cell and - * (b) satisfies the tool's {@code endTime <= currentTimeMillis()} validation - * ({@link org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil#validateTimeRange}). The spin - * terminates in 1-2ms — it is a deterministic precondition gate, not a sleep-based wait for an - * external side effect. + * Spins until {@code System.currentTimeMillis() > minTs} and returns the new wall clock. Used for + * {@code --to-time} when tests plant cells at handcrafted future timestamps — the value must + * cover every planted cell and satisfy {@code endTime <= currentTimeMillis()} + * ({@link org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil#validateTimeRange}). */ private long waitUntilWallClockPasses(long minTs) { while (System.currentTimeMillis() <= minTs) { @@ -4269,11 +4226,7 @@ private Connection openConnectionAtScn(String zkUrl, long scnTimestamp) throws S return DriverManager.getConnection("jdbc:phoenix:" + zkUrl, props); } - /** - * Executes a single UPSERT through an SCN-pinned connection on the given cluster and commits. - * Replaces the verbose {@code try (Connection scn = openConnectionAtScn(...)) { execute; commit; - * }} boilerplate that recurred in nearly every repair test. - */ + /** Executes a single UPSERT through an SCN-pinned connection on the given cluster and commits. */ private void upsertAtScn(String zkUrl, long ts, String upsertSql) throws SQLException { try (Connection scn = openConnectionAtScn(zkUrl, ts)) { scn.createStatement().execute(upsertSql); @@ -4297,11 +4250,7 @@ private void upsertAtScnBoth(long ts, String upsertSql) throws SQLException { upsertAtScnTarget(ts, upsertSql); } - /** - * Asserts target's visible NAME for {@code id} equals {@code expected}. Replaces the - * {@code prepareStatement("SELECT NAME FROM ... WHERE ID = ?")} block repeated across repair - * tests. - */ + /** Asserts target's visible NAME for {@code id} equals {@code expected}. */ private void assertTargetName(String tableName, int id, String expected) throws SQLException { try (PreparedStatement ps = targetConnection.prepareStatement("SELECT NAME FROM " + tableName + " WHERE ID = ?")) { @@ -4387,9 +4336,8 @@ int totalDeletes() { } /** - * Raw scan of a single row on the target cluster, summarising every NAME-column cell by Put / - * tombstone subtype. Replaces the open-coded raw-scan loop repeated by tests that pin post-repair - * NAME tombstone counts. + * Raw scan of a single row on target, summarising every NAME-column cell by Put / tombstone + * subtype. Used by repair tests that pin post-repair NAME tombstone counts. */ private RawCellSummary scanRawTargetNameCells(String tableName, int rowId) throws Exception { byte[] rk = integerRowKey(rowId); From 9b1147bb858d3865f72979c9696c4b1d0ffd5f80 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Tue, 9 Jun 2026 17:02:00 +0530 Subject: [PATCH 16/18] add test for chunk repair --- .../end2end/PhoenixSyncTableToolIT.java | 86 ++--- .../PhoenixSyncTableChunkRepairerTest.java | 319 ++++++++++++++++++ 2 files changed, 366 insertions(+), 39 deletions(-) create mode 100644 phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairerTest.java diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java index 100d34bd567..3bc53d40636 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java @@ -171,7 +171,7 @@ public void testSyncTableWithDataDifference() throws Exception { // Pin the time window so the dry-run and repair share the same checkpoint PK. long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); // Phase 1: dry-run only — verify checkpoint table sees only VERIFIED/MISMATCHED rows. Job dryRunJob = runSyncToolWithChunkSize(uniqueTableName, 1024, "--dry-run", "--from-time", @@ -493,7 +493,7 @@ public void testSyncMultiTenantSaltedTableDifferences() throws Exception { targetRows); }); } - String toTime = String.valueOf(System.currentTimeMillis()); + String toTime = String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())); for (String tenantId : tenantIds) { Connection tenantSourceConn = getTenantConnection(sourceConnection, tenantId); @@ -537,12 +537,15 @@ public void testSyncTableWithTimeRangeFilter() throws Exception { // Insert data BEFORE the time range window insertTestData(sourceConnection, uniqueTableName, 1, 10); - long startTime = System.currentTimeMillis(); + // HBase Scan.setTimeRange is half-open [from, to); Phoenix UPSERT batches commit at one + // ms-resolution timestamp. Wait for the wall clock to advance past the just-committed + // boundary cells so they land strictly outside the [startTime, endTime) window. + long startTime = waitUntilWallClockPasses(System.currentTimeMillis()); // Insert data WITHIN the time range window insertTestData(sourceConnection, uniqueTableName, 11, 20); - long endTime = System.currentTimeMillis(); + long endTime = waitUntilWallClockPasses(System.currentTimeMillis()); // Insert data AFTER the time range window insertTestData(sourceConnection, uniqueTableName, 21, 30); @@ -589,7 +592,7 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionSplits() throws Exce // Capture consistent time range for both runs long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75); validateSyncCountersWithMinChunk(setup.firstRunCounters, 100, 100, 1, 1); @@ -669,7 +672,7 @@ public void testSyncTableCheckpointWithChunkSizeChangeOnReRun() throws Exception introduceMismatchesByIds(uniqueTableName, mismatchIds); long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); // First run with large chunk size, then delete 75% of chunks for partial rerun. int largeChunkSize = 10240; @@ -734,7 +737,7 @@ public void testSyncTableCheckpointWithPartialReRunAndRegionMerges() throws Exce introduceMismatchesByIds(uniqueTableName, mismatchIds); long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75); SyncCountersResult counters1 = setup.firstRunCounters; @@ -793,7 +796,7 @@ public void testSyncTableIdempotentOnReRun() throws Exception { // Capture consistent time range for both runs (ensures checkpoint lookup will match) long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); // Run sync tool for the FIRST time Job job1 = runSyncTool(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", @@ -841,7 +844,7 @@ public void testSyncTableIdempotentAfterRegionSplits() throws Exception { // Capture consistent time range for both runs long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); // Run sync tool for the FIRST time (no differences, all chunks verified) Job job1 = runSyncToolWithChunkSize(uniqueTableName, 1024, "--from-time", @@ -920,7 +923,7 @@ public void testSyncTableInBackgroundMode() throws Exception { // Pin the time window so the background dry-run pass and the repair pass below share // the same checkpoint PK and the repair pass overwrites MISMATCHED → REPAIRED in place. long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); Configuration conf = sourceClusterConf(); String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster", @@ -978,7 +981,7 @@ public void testSyncTableWithCustomTimeouts() throws Exception { conf.setInt(QueryServices.SYNC_TABLE_RPC_RETRIES_COUNTER, customRpcRetries); long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); Job job = runSyncToolWithChunkSize(uniqueTableName, 1, conf, "--dry-run", "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); @@ -1066,7 +1069,7 @@ public void testSyncTableWithConcurrentRegionSplits() throws Exception { // Capture time range for the sync long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); // Run splits on source/target concurrently with the sync. Runnable splitJoiner = startConcurrentRegionWork( @@ -1126,7 +1129,7 @@ public void testRepairWithConcurrentTargetSplits() throws Exception { introduceMismatchesByIds(uniqueTableName, mismatchIds); long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); Job dryRunJob = runSyncTool(uniqueTableName, "--dry-run", "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); @@ -1163,7 +1166,7 @@ public void testRepairIsIdempotent() throws Exception { introduceMismatchesByIds(uniqueTableName, mismatchIds); long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); RepairRunResult firstRun = runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); @@ -1300,7 +1303,7 @@ public void testSyncTableWithConcurrentRegionMerges() throws Exception { // Capture time range for the sync long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); // Run merges on source/target concurrently with the sync. Runnable mergeJoiner = @@ -1354,7 +1357,7 @@ public void testSyncTableWithPagingTimeout() throws Exception { conf.setLong(QueryServices.PHOENIX_SERVER_PAGE_SIZE_MS, 1); long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); // Dry-run with paging to assert chunk-count expansion under mid-chunk timeouts. Job job = runSyncToolWithChunkSize(uniqueTableName, chunkSize, conf, "--dry-run", "--from-time", @@ -1421,7 +1424,7 @@ public void testSyncTableWithPagingTimeoutWithSplits() throws Exception { "splits"); long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); // Dry-run sync while splits are happening — drift must remain on target so the chunk-count // assertion below has work to do (otherwise an inline repair would converge mid-pass). @@ -1472,7 +1475,8 @@ public void testSyncTableMapperFailsWithNonExistentTable() throws Exception { // Try to run sync tool on a NON-EXISTENT table String nonExistentTable = "NON_EXISTENT_TABLE_" + System.currentTimeMillis(); String[] args = new String[] { "--table-name", nonExistentTable, "--target-cluster", - targetZkQuorum, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) }; + targetZkQuorum, "--run-foreground", "--to-time", + String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())) }; assertSyncToolFails(args, String.format("Table %s does not exist, mapper setup should fail", nonExistentTable)); @@ -1485,9 +1489,9 @@ public void testSyncTableMapperFailsWithInvalidTargetCluster() throws Exception // Try to run sync tool with INVALID target cluster ZK quorum. String invalidTargetZk = "invalid-zk-host:2181:/hbase"; - String[] args = - new String[] { "--table-name", uniqueTableName, "--target-cluster", invalidTargetZk, - "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) }; + String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster", + invalidTargetZk, "--run-foreground", "--to-time", + String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())) }; assertSyncToolFails(args, String.format("Target cluster %s is invalid, mapper setup should fail", invalidTargetZk)); @@ -1505,7 +1509,8 @@ public void testSyncTableMapperFailsWithMissingTargetTable() throws Exception { // Don't create table on target - this will cause mapper map() to fail // when trying to scan the non-existent target table String[] args = new String[] { "--table-name", uniqueTableName, "--target-cluster", - targetZkQuorum, "--run-foreground", "--to-time", String.valueOf(System.currentTimeMillis()) }; + targetZkQuorum, "--run-foreground", "--to-time", + String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())) }; assertSyncToolFails(args, String.format( @@ -1530,7 +1535,7 @@ public void testCheckpointWriteFailureCausesNonZeroExit() throws Exception { // never get overwritten — leaving stale MISMATCHED rows that the post-recovery validation // would (correctly) flag. long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); // Run once first so the checkpoint table exists; we can only attach a coprocessor to a // table that's already been created. @@ -1582,7 +1587,7 @@ public void testRepairFailedSurfacesCountersAndCheckpoint() throws Exception { TestUtil.addCoprocessor(targetConnection, uniqueTableName, RepairBatchFailingObserver.class); long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); try { // dryRun + repair: dry-run only reads from target, so it succeeds; repair flushes // mutations and trips the observer. @@ -1626,7 +1631,7 @@ public void testSyncTableCheckpointPersistsAcrossFailedRuns() throws Exception { // Capture time range for both runs (ensures checkpoint lookup will match) long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); // First run + 75% deletion preamble (shared with other partial-rerun tests) PartialRerunSetup setup = setupPartialRerun(uniqueTableName, fromTime, toTime, 1, 0.75); @@ -1720,9 +1725,9 @@ public void testRepairUnwindsHiddenTargetVersions() throws Exception { assertTargetName(uniqueTableName, rowId, "carol"); // --read-all-versions so verifier and repairer both see the hidden version. - RepairRunResult result = - runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(System.currentTimeMillis()), "--read-all-versions"); + RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", + String.valueOf(fromTime), "--to-time", + String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())), "--read-all-versions"); assertTrue("Dry-run should succeed", result.dryRunJob.isSuccessful()); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); @@ -2306,7 +2311,7 @@ public void testRepairFlushesMidRowWithSmallBatchSize() throws Exception { sourceConnection.commit(); long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); Configuration conf = sourceClusterConfWithRepairBatchSize(2); @@ -2361,7 +2366,8 @@ public void testRepairRawScanAllVersionsMirrorsTombstoneAndPut() throws Exceptio RepairRunResult result = runSyncToolWithRepair(uniqueTableName, "--from-time", String.valueOf(fromTime), "--to-time", - String.valueOf(System.currentTimeMillis()), "--raw-scan", "--read-all-versions"); + String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis())), "--raw-scan", + "--read-all-versions"); assertTrue("Repair should succeed", result.repairJob.isSuccessful()); SyncCountersResult dryRunCounters = getSyncCounters(result.dryRunJob); @@ -2475,7 +2481,7 @@ public void testRepairMixedPutDeleteBatchWithSmallBatchSize() throws Exception { targetConnection.commit(); long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); Configuration conf = sourceClusterConfWithRepairBatchSize(4); @@ -2744,7 +2750,7 @@ public void testSyncTableWithSplitCoalescing() throws Exception { // Enable split coalescing via command-line parameter, all regions will be coalesced into one // mapper. Use a pinned window so the dry-run and repair share the same checkpoint PK. long fromTime = 0L; - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); Job dryRunJob = runSyncTool(uniqueTableName, "--coalesce-split", "--dry-run", "--from-time", String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); @@ -2783,7 +2789,7 @@ public void testSyncTableSucceedsWhenEndTimeOlderThanMaxLookbackAge() throws Exc waitForReplication(targetConnection, uniqueTableName, 10); verifyDataIdentical(sourceConnection, targetConnection, uniqueTableName); - long toTime = System.currentTimeMillis(); + long toTime = waitUntilWallClockPasses(System.currentTimeMillis()); long maxLookbackAgeSeconds = 5; Configuration conf = sourceClusterConf(); @@ -3784,7 +3790,7 @@ private Runnable startConcurrentRegionWork(Runnable sourceWork, Runnable targetW */ private long captureBaselineChunkCount(String tableName, int chunkSize) throws Exception { Job baselineJob = runSyncToolWithChunkSize(tableName, chunkSize, "--dry-run", "--from-time", - "0", "--to-time", String.valueOf(System.currentTimeMillis())); + "0", "--to-time", String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis()))); long chunkCount = baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_VERIFIED).getValue() + baselineJob.getCounters().findCounter(SyncCounters.CHUNKS_MISMATCHED).getValue(); cleanupCheckpointTable(sourceConnection, tableName, targetZkQuorum, null); @@ -3837,7 +3843,8 @@ private RepairRunResult runSyncToolWithRepair(String tableName, String... additi private RepairRunResult runSyncToolWithRepair(String tableName, int chunkSize, String... additionalArgs) throws Exception { long fromTime = parseLongFlag(additionalArgs, "--from-time", 0L); - long toTime = parseLongFlag(additionalArgs, "--to-time", System.currentTimeMillis()); + long toTime = parseLongFlag(additionalArgs, "--to-time", + waitUntilWallClockPasses(System.currentTimeMillis())); String[] pinnedArgs = ensureTimeArgs(additionalArgs, fromTime, toTime); String[] dryRunArgs = appendArg(pinnedArgs, "--dry-run"); @@ -3947,12 +3954,13 @@ private Job runSyncToolWithChunkSize(String tableName, int chunkSize, Configurat List additionalArgsList = Arrays.asList(additionalArgs); argsList.addAll(additionalArgsList); - // If --to-time is not explicitly provided in additionalArgs, add current time - // This is needed because the default is now (current time - 1 hour) which won't - // capture data inserted immediately before running the sync tool + // If --to-time is not explicitly provided in additionalArgs, add current time. The default + // is now (current time - 1 hour) which won't capture data inserted immediately before + // running the sync tool. Wait for the wall clock to advance past the just-committed cells + // so they fall strictly inside the half-open Scan.setTimeRange(from, to) upper bound. if (!additionalArgsList.contains("--to-time")) { argsList.add("--to-time"); - argsList.add(String.valueOf(System.currentTimeMillis())); + argsList.add(String.valueOf(waitUntilWallClockPasses(System.currentTimeMillis()))); } String[] args = argsList.toArray(new String[0]); diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairerTest.java new file mode 100644 index 00000000000..5357120f497 --- /dev/null +++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableChunkRepairerTest.java @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.mapreduce; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.sql.Timestamp; +import java.util.Collections; +import java.util.Set; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.Type; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.CellDriftCounts; +import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.ChunkRepairRequest; +import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.ChunkRepairResult; +import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.DriftCounters; +import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.RowDriftInfo; +import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.RowMirrorStatus; +import org.apache.phoenix.mapreduce.PhoenixSyncTableChunkRepairer.TargetRowRecord; +import org.junit.Test; + +/** + * Unit tests for the pure-data inner classes of {@link PhoenixSyncTableChunkRepairer}. + * Orchestration paths (repair/dryRun walks, scan construction, batch flushing) are covered by + * {@code PhoenixSyncTableToolIT}; this file pins the data-class invariants so a regression there + * fails as a localized unit-test failure rather than a counter mismatch many layers up. + */ +public class PhoenixSyncTableChunkRepairerTest { + + private static final byte[] ROW = Bytes.toBytes("row"); + private static final byte[] CF = Bytes.toBytes("0"); + private static final byte[] CF2 = Bytes.toBytes("1"); + private static final byte[] Q_NAME = Bytes.toBytes("NAME"); + private static final byte[] Q_VALUE = Bytes.toBytes("NAME_VALUE"); + private static final byte[] V_ALICE = Bytes.toBytes("alice"); + + @Test + public void columnKeyEqualsHonorsByteArrayContent() { + PhoenixSyncTableChunkRepairer.ColumnKey a = + new PhoenixSyncTableChunkRepairer.ColumnKey(CF, Q_NAME); + PhoenixSyncTableChunkRepairer.ColumnKey b = + new PhoenixSyncTableChunkRepairer.ColumnKey(Bytes.toBytes("0"), Bytes.toBytes("NAME")); + assertEquals("Distinct byte[] copies with the same content must be equal", a, b); + assertEquals("hashCode must match equality", a.hashCode(), b.hashCode()); + } + + @Test + public void columnKeyDistinguishesFamilyAndQualifier() { + PhoenixSyncTableChunkRepairer.ColumnKey nameInCf = + new PhoenixSyncTableChunkRepairer.ColumnKey(CF, Q_NAME); + PhoenixSyncTableChunkRepairer.ColumnKey nameInOtherCf = + new PhoenixSyncTableChunkRepairer.ColumnKey(CF2, Q_NAME); + PhoenixSyncTableChunkRepairer.ColumnKey valueInCf = + new PhoenixSyncTableChunkRepairer.ColumnKey(CF, Q_VALUE); + assertNotEquals("Same qualifier in different family must not collide", nameInCf, nameInOtherCf); + assertNotEquals("Same family with different qualifier must not collide", nameInCf, valueInCf); + } + + @Test + public void columnKeyEqualsRejectsForeignType() { + PhoenixSyncTableChunkRepairer.ColumnKey k = + new PhoenixSyncTableChunkRepairer.ColumnKey(CF, Q_NAME); + assertNotEquals(k, "not-a-key"); + assertNotEquals(k, null); + } + + @Test + public void columnKeyOfCellMatchesExplicitConstruction() { + Cell cell = new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE); + assertEquals(new PhoenixSyncTableChunkRepairer.ColumnKey(CF, Q_NAME), + PhoenixSyncTableChunkRepairer.ColumnKey.of(cell)); + } + + @Test + public void wouldShadowReturnsFalseOnEmptyRecord() { + TargetRowRecord rec = new TargetRowRecord(); + assertFalse("No tombstones recorded ⇒ no shadow", + rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE))); + } + + @Test + public void pointDeleteShadowsExactTimestampOnly() { + TargetRowRecord rec = new TargetRowRecord(); + rec.record(new KeyValue(ROW, CF, Q_NAME, 100L, Type.Delete)); + + assertTrue("Point Delete shadows a Put at exactly ts == 100", + rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE))); + assertFalse("Point Delete must NOT shadow a Put at ts == 99", + rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 99L, V_ALICE))); + assertFalse("Point Delete must NOT shadow a Put at ts == 101", + rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 101L, V_ALICE))); + assertFalse("Point Delete must NOT shadow a different qualifier", + rec.wouldShadow(new KeyValue(ROW, CF, Q_VALUE, 100L, V_ALICE))); + } + + @Test + public void deleteColumnShadowsAllPutsAtOrBelowMarker() { + TargetRowRecord rec = new TargetRowRecord(); + rec.record(new KeyValue(ROW, CF, Q_NAME, 100L, Type.DeleteColumn)); + + assertTrue(rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 50L, V_ALICE))); + assertTrue("DeleteColumn shadows Put at exactly the marker ts", + rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE))); + assertFalse("DeleteColumn must NOT shadow Puts above the marker", + rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 101L, V_ALICE))); + assertFalse("DeleteColumn must NOT shadow a different qualifier", + rec.wouldShadow(new KeyValue(ROW, CF, Q_VALUE, 50L, V_ALICE))); + } + + @Test + public void deleteColumnUpperBoundUsesMaxAcrossMultipleMarkers() { + TargetRowRecord rec = new TargetRowRecord(); + rec.record(new KeyValue(ROW, CF, Q_NAME, 100L, Type.DeleteColumn)); + rec.record(new KeyValue(ROW, CF, Q_NAME, 200L, Type.DeleteColumn)); + rec.record(new KeyValue(ROW, CF, Q_NAME, 50L, Type.DeleteColumn)); + + assertTrue("Upper bound must collapse to the max marker (200)", + rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 200L, V_ALICE))); + assertFalse(rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 201L, V_ALICE))); + } + + @Test + public void deleteFamilyShadowsAllQualifiersAtOrBelowMarker() { + TargetRowRecord rec = new TargetRowRecord(); + rec.record(new KeyValue(ROW, CF, null, 100L, Type.DeleteFamily)); + + assertTrue("DeleteFamily covers any qualifier in CF at ts <= 100", + rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 50L, V_ALICE))); + assertTrue(rec.wouldShadow(new KeyValue(ROW, CF, Q_VALUE, 100L, V_ALICE))); + assertFalse(rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 101L, V_ALICE))); + assertFalse("DeleteFamily must NOT span a different family", + rec.wouldShadow(new KeyValue(ROW, CF2, Q_NAME, 50L, V_ALICE))); + } + + @Test + public void deleteFamilyVersionShadowsAllQualifiersAtExactTs() { + TargetRowRecord rec = new TargetRowRecord(); + rec.record(new KeyValue(ROW, CF, null, 100L, Type.DeleteFamilyVersion)); + + assertTrue(rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE))); + assertTrue(rec.wouldShadow(new KeyValue(ROW, CF, Q_VALUE, 100L, V_ALICE))); + assertFalse("DFV must NOT cover other timestamps", + rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 99L, V_ALICE))); + assertFalse(rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 101L, V_ALICE))); + assertFalse(rec.wouldShadow(new KeyValue(ROW, CF2, Q_NAME, 100L, V_ALICE))); + } + + @Test + public void wouldShadowTrueIfAnyTombstoneSubtypeMatches() { + TargetRowRecord rec = new TargetRowRecord(); + // Point Delete on (CF, NAME) at ts=100; DeleteFamily on a different family at ts=999. + rec.record(new KeyValue(ROW, CF, Q_NAME, 100L, Type.Delete)); + rec.record(new KeyValue(ROW, CF2, null, 999L, Type.DeleteFamily)); + + assertTrue("Match on point-delete arm wins regardless of other arms", + rec.wouldShadow(new KeyValue(ROW, CF, Q_NAME, 100L, V_ALICE))); + assertTrue("Match on family arm wins regardless of other arms", + rec.wouldShadow(new KeyValue(ROW, CF2, Q_NAME, 500L, V_ALICE))); + } + + @Test + public void targetPutTimestampsBetweenIsExclusiveOnBothEnds() { + TargetRowRecord rec = new TargetRowRecord(); + rec.record(new KeyValue(ROW, CF, Q_NAME, 300L, V_ALICE)); + rec.record(new KeyValue(ROW, CF, Q_NAME, 600L, V_ALICE)); + rec.record(new KeyValue(ROW, CF, Q_NAME, 900L, V_ALICE)); + + Set hidden = rec.targetPutTimestampsBetween(CF, Q_NAME, 300L, 900L); + assertEquals("Bounds are exclusive on both ends — only 600 falls strictly between", + Collections.singleton(600L), hidden); + } + + @Test + public void targetPutTimestampsBetweenEmptyWhenNoPutsForColumn() { + TargetRowRecord rec = new TargetRowRecord(); + rec.record(new KeyValue(ROW, CF, Q_NAME, 500L, V_ALICE)); + assertTrue("Different qualifier ⇒ empty set, not null", + rec.targetPutTimestampsBetween(CF, Q_VALUE, 0L, Long.MAX_VALUE).isEmpty()); + } + + @Test + public void targetPutTimestampsBetweenSkipsTombstones() { + TargetRowRecord rec = new TargetRowRecord(); + rec.record(new KeyValue(ROW, CF, Q_NAME, 500L, V_ALICE)); + rec.record(new KeyValue(ROW, CF, Q_NAME, 600L, Type.Delete)); + Set puts = rec.targetPutTimestampsBetween(CF, Q_NAME, 0L, Long.MAX_VALUE); + assertEquals("Tombstone cells must not be reported as Put timestamps", + Collections.singleton(500L), puts); + } + + @Test + public void driftCountersStartZeroAndAccumulateCellDrift() { + DriftCounters d = new DriftCounters(); + assertEquals(0, d.cellsMissingOnTarget); + assertEquals(0, d.cellsExtraOnTarget); + assertEquals(0, d.cellsDifferentOnTarget); + + d.addCellDrift(new CellDriftCounts(2, 3, 5)); + d.addCellDrift(new CellDriftCounts(1, 0, 4)); + assertEquals(3, d.cellsMissingOnTarget); + assertEquals(3, d.cellsExtraOnTarget); + assertEquals(9, d.cellsDifferentOnTarget); + } + + @Test + public void driftCountersToLogStringIncludesEveryCounter() { + DriftCounters d = new DriftCounters(); + d.rowsMissingOnTarget = 1; + d.rowsExtraOnTarget = 2; + d.rowsDifferentOnTarget = 3; + d.rowsCannotRepair = 4; + d.cellsMissingOnTarget = 5; + d.cellsExtraOnTarget = 6; + d.cellsDifferentOnTarget = 7; + + String log = d.toLogString(); + assertTrue(log.contains("rowsMissingOnTarget=1")); + assertTrue(log.contains("rowsExtraOnTarget=2")); + assertTrue(log.contains("rowsDifferentOnTarget=3")); + assertTrue(log.contains("rowsCannotRepair=4")); + assertTrue(log.contains("cellsMissingOnTarget=5")); + assertTrue(log.contains("cellsExtraOnTarget=6")); + assertTrue(log.contains("cellsDifferentOnTarget=7")); + } + + @Test + public void completedReturnsRepairedWhenNoRowCannotRepair() { + DriftCounters d = new DriftCounters(); + d.rowsMissingOnTarget = 5; + d.cellsExtraOnTarget = 2; + ChunkRepairResult result = ChunkRepairResult.completed(d); + assertEquals(ChunkRepairResult.Status.REPAIRED, result.status); + assertEquals(d, result.drift); + assertNotNull(result.endTime); + assertEquals("Successful completion ⇒ no failure", null, result.failure); + } + + @Test + public void completedReturnsUnrepairableWhenRowCannotRepair() { + DriftCounters d = new DriftCounters(); + d.rowsCannotRepair = 1; + ChunkRepairResult result = ChunkRepairResult.completed(d); + assertEquals(ChunkRepairResult.Status.UNREPAIRABLE, result.status); + assertEquals(null, result.failure); + } + + @Test + public void failedReturnsRepairFailedAndCarriesException() { + DriftCounters d = new DriftCounters(); + d.rowsMissingOnTarget = 1; + d.rowsCannotRepair = 1; + IOException cause = new IOException("simulated"); + ChunkRepairResult result = ChunkRepairResult.failed(d, cause); + assertEquals("REPAIR_FAILED beats UNREPAIRABLE regardless of drift counters", + ChunkRepairResult.Status.REPAIR_FAILED, result.status); + assertEquals(cause, result.failure); + assertNotNull(result.endTime); + } + + @Test + public void cellDriftCountsNoneIsAllZero() { + assertEquals(0, CellDriftCounts.NONE.missing); + assertEquals(0, CellDriftCounts.NONE.extra); + assertEquals(0, CellDriftCounts.NONE.different); + } + + @Test + public void rowDriftInfoNoneIsZeroDriftAndRepairable() { + assertEquals(CellDriftCounts.NONE, RowDriftInfo.NONE.cells); + assertFalse(RowDriftInfo.NONE.rowCannotRepair); + } + + @Test + public void rowMirrorStatusEnumeratesAllThreeOutcomes() { + assertEquals(3, RowMirrorStatus.values().length); + } + + @Test + public void chunkRepairRequestPreservesEveryField() { + byte[] srcStart = Bytes.toBytes("a"); + byte[] srcEnd = Bytes.toBytes("m"); + byte[] tgtStart = Bytes.toBytes("a"); + byte[] tgtEnd = Bytes.toBytes("z"); + Timestamp verifyStart = new Timestamp(123456L); + ChunkRepairRequest req = new ChunkRepairRequest(srcStart, srcEnd, tgtStart, tgtEnd, false, true, + 42L, 99L, verifyStart, true); + assertEquals(srcStart, req.sourceStart); + assertEquals(srcEnd, req.sourceEnd); + assertEquals(tgtStart, req.targetStart); + assertEquals(tgtEnd, req.targetEnd); + assertFalse(req.targetStartInclusive); + assertTrue(req.targetEndInclusive); + assertEquals(42L, req.verifySourceRows); + assertEquals(99L, req.verifyTargetRows); + assertEquals(verifyStart, req.verifyStartTime); + assertTrue(req.dryRun); + } +} From 60904027a63dca6d5e969ff76c8c03aa2e089b3d Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Tue, 9 Jun 2026 17:02:26 +0530 Subject: [PATCH 17/18] fix spotless --- .../ConnectionQueryServicesMetricsManagerTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java index 72c38816d84..86fc007b906 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/monitoring/connectionqueryservice/ConnectionQueryServicesMetricsManagerTest.java @@ -17,12 +17,13 @@ */ package org.apache.phoenix.monitoring.connectionqueryservice; -import static org.apache.phoenix.monitoring.MetricType.*; +import static org.apache.phoenix.monitoring.MetricType.OPEN_INTERNAL_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.OPEN_PHOENIX_CONNECTIONS_COUNTER; +import static org.apache.phoenix.monitoring.MetricType.PHOENIX_CONNECTIONS_THROTTLED_COUNTER; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.connectionQueryServiceNames; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openInternalPhoenixConnCounter; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.openPhoenixConnCounter; import static org.apache.phoenix.monitoring.connectionqueryservice.ConnectionQueryServicesNameMetricsTest.phoenixConnThrottledCounter; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.util.List; @@ -31,7 +32,6 @@ import org.apache.phoenix.monitoring.ConnectionQueryServicesMetric; import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.query.QueryServicesOptions; -import org.apache.phoenix.util.PhoenixRuntime; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; From 91b1a4d05b459b83ead663ced8804f592400b957 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Tue, 9 Jun 2026 19:48:01 +0530 Subject: [PATCH 18/18] improve mapper progres tracking for coalesced split --- ...a => PhoenixNoOpPerRangeRecordReader.java} | 53 ++++--- .../PhoenixSyncTableInputFormat.java | 20 ++- .../mapreduce/PhoenixSyncTableMapper.java | 37 +++-- .../end2end/PhoenixSyncTableToolIT.java | 4 +- .../PhoenixNoOpPerRangeRecordReaderTest.java | 142 ++++++++++++++++++ .../PhoenixNoOpSingleRecordReaderTest.java | 72 --------- .../PhoenixSyncTableInputFormatTest.java | 4 +- 7 files changed, 215 insertions(+), 117 deletions(-) rename phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/{PhoenixNoOpSingleRecordReader.java => PhoenixNoOpPerRangeRecordReader.java} (55%) create mode 100644 phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReaderTest.java delete mode 100644 phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReaderTest.java diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReader.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReader.java similarity index 55% rename from phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReader.java rename to phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReader.java index 28ec1ce4404..1866532e509 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReader.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReader.java @@ -25,7 +25,8 @@ import org.apache.hadoop.mapreduce.lib.db.DBWritable; /** - * A minimal RecordReader that returns exactly one dummy record per InputSplit. + * A minimal RecordReader that returns one dummy record per {@link org.apache.phoenix.query.KeyRange + * KeyRange} carried by a {@link PhoenixInputSplit}. *

    * Use this when your mapper: *

      @@ -35,42 +36,52 @@ *
    *

    * This avoids the overhead of scanning and returning all rows when the mapper only needs to be - * triggered once per region/split. The standard {@link PhoenixRecordReader} iterates through all - * rows, calling {@code map()} for each row - which is wasteful when the mapper ignores the row data - * entirely. + * triggered per region. The standard {@link PhoenixRecordReader} iterates through all rows, calling + * {@code map()} for each row - which is wasteful when the mapper ignores the row data entirely. *

    * How it works: *

      - *
    • {@link #nextKeyValue()} returns {@code true} exactly once, then {@code false}
    • - *
    • This triggers {@code map()} exactly once per InputSplit (region)
    • - *
    • The mapper extracts region boundaries from the InputSplit, not from records
    • + *
    • {@link #initialize(InputSplit, TaskAttemptContext)} reads the {@link PhoenixInputSplit}'s key + * ranges to learn how many records to emit (one per range)
    • + *
    • {@link #nextKeyValue()} returns {@code true} once per range, then {@code false}
    • + *
    • This triggers {@code map()} once per range; for a coalesced split with N regions the mapper + * runs {@code map()} N times, giving the framework per-range visibility
    • + *
    • {@link #getProgress()} returns the fraction of ranges already consumed, so YARN sees real + * mapper progress instead of a 0% to 100% jump at the end
    • *
    * @see PhoenixSyncTableInputFormat * @see PhoenixRecordReader */ -public class PhoenixNoOpSingleRecordReader extends RecordReader { +public class PhoenixNoOpPerRangeRecordReader extends RecordReader { - private boolean hasRecord = true; + private int totalRanges = 1; + private int consumedRanges = 0; /** - * Initialize the RecordReader. No initialization is needed since we return a single dummy record. + * Initialize the RecordReader. Reads the number of key ranges from the {@link PhoenixInputSplit} + * so subsequent {@link #nextKeyValue()} calls emit one record per range. * @param split The InputSplit containing region boundaries * @param context The task context */ @Override public void initialize(InputSplit split, TaskAttemptContext context) { - // No initialization needed + if (split instanceof PhoenixInputSplit) { + int rangeCount = ((PhoenixInputSplit) split).getKeyRanges().size(); + if (rangeCount > 0) { + this.totalRanges = rangeCount; + } + } } /** - * Returns true exactly once to trigger a single map() call per split. - * @return true on first call, false on subsequent calls which makes Mapper task to exit calling - * map method + * Returns true once per key range in the split, then false. + * @return true while ranges remain unprocessed; false once all ranges have been emitted, which + * makes the Mapper task exit calling map method */ @Override public boolean nextKeyValue() { - if (hasRecord) { - hasRecord = false; + if (consumedRanges < totalRanges) { + consumedRanges++; return true; } return false; @@ -96,12 +107,16 @@ public DBWritable getCurrentValue() { } /** - * Returns progress: 0.0 before the record is consumed, 1.0 after. - * @return 0.0f if record not yet consumed, 1.0f otherwise + * Returns the fraction of ranges already consumed, so YARN reports real mapper progress as each + * range completes (important for coalesced splits where a single mapper covers many regions). + * @return progress in [0.0, 1.0] */ @Override public float getProgress() { - return hasRecord ? 0.0f : 1.0f; + if (totalRanges == 0) { + return 1.0f; + } + return ((float) consumedRanges) / totalRanges; } /** diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java index b1bce9f8023..26ded766b5f 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormat.java @@ -46,8 +46,9 @@ /** * InputFormat designed for PhoenixSyncTableTool that generates splits based on HBase region * boundaries. Filters out already-processed mapper regions using checkpoint data, enabling - * resumable sync jobs. Uses {@link PhoenixNoOpSingleRecordReader} to invoke the mapper once per - * split (region). + * resumable sync jobs. Uses {@link PhoenixNoOpPerRangeRecordReader} to invoke the mapper once per + * region within a split (one call for a single-region split, N calls for an N-region coalesced + * split). */ public class PhoenixSyncTableInputFormat extends PhoenixInputFormat { @@ -65,20 +66,23 @@ public PhoenixSyncTableInputFormat() { } /** - * Returns a {@link PhoenixNoOpSingleRecordReader} that emits exactly one dummy record per split. + * Returns a {@link PhoenixNoOpPerRangeRecordReader} that emits one dummy record per region in the + * split. *

    * PhoenixSyncTableMapper doesn't need actual row data from the RecordReader - it extracts region * boundaries from the InputSplit and delegates all scanning to the PhoenixSyncTableRegionScanner - * coprocessor. Using PhoenixNoOpSingleRecordReader ensures that {@code map()} is called exactly - * once per region no matter what scan looks like, avoiding the overhead of the default - * PhoenixRecordReader which would call {@code map()} for every row of scan. + * coprocessor. Using PhoenixNoOpPerRangeRecordReader ensures that {@code map()} is called once + * per region regardless of scan content, avoiding the overhead of the default PhoenixRecordReader + * which would call {@code map()} for every row of scan. Emitting one record per region (rather + * than one per split) also gives YARN per-region progress visibility for coalesced splits, which + * would otherwise jump from 0% to 100% only at completion. * @param split Input Split - * @return A PhoenixNoOpSingleRecordReader instance + * @return A PhoenixNoOpPerRangeRecordReader instance */ @Override public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context) { - return new PhoenixNoOpSingleRecordReader(); + return new PhoenixNoOpPerRangeRecordReader(); } /** diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java index 7158d847778..9ba59a03305 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/PhoenixSyncTableMapper.java @@ -103,6 +103,7 @@ public enum SyncCounters { private PTable pTable; private byte[] physicalTableName; private List regionKeyRanges; + private int currentRangeIndex; private PhoenixSyncTableOutputRepository syncTableOutputRepository; private PhoenixSyncTableChunkRepairer chunkRepairer; @@ -140,11 +141,14 @@ protected void setup(Context context) throws InterruptedException { /** * Extracts region key ranges from the PhoenixInputSplit. Handles both single-region splits and - * coalesced splits with multiple regions. + * coalesced splits with multiple regions. The mapper processes one region per {@code map()} call, + * driven by {@link PhoenixNoOpPerRangeRecordReader} which emits one record per region so YARN + * gets per-region progress visibility. */ private void extractRegionBoundariesFromSplit(Context context) { PhoenixInputSplit split = (PhoenixInputSplit) context.getInputSplit(); regionKeyRanges = split.getKeyRanges(); + currentRangeIndex = 0; if (regionKeyRanges == null || regionKeyRanges.isEmpty()) { throw new IllegalStateException(String.format( @@ -178,24 +182,31 @@ private Connection createGlobalConnection(Configuration conf) throws SQLExceptio } /** - * Processes mapper region(s) by comparing chunks between source and target clusters. For - * coalesced splits, processes each region sequentially. Gets already processed chunks from - * checkpoint table, resumes from check pointed progress and records final status for chunks & - * mapper (VERIFIED/MISMATCHED). + * Processes one mapper region per call by comparing chunks between source and target clusters. + * The {@link PhoenixNoOpPerRangeRecordReader} emits one record per region in the split, so for a + * coalesced split with N regions this method runs N times - giving YARN per-region progress + * visibility instead of jumping from 0% to 100% only when the whole split completes. Gets already + * processed chunks from checkpoint table, resumes from check pointed progress and records final + * status for chunks & mapper (VERIFIED/MISMATCHED). */ @Override protected void map(NullWritable key, DBInputFormat.NullDBWritable value, Context context) throws IOException, InterruptedException { + LOGGER.info("Mapper being called"); context.getCounter(PhoenixJobCounters.INPUT_RECORDS).increment(1); + if (currentRangeIndex >= regionKeyRanges.size()) { + throw new IllegalStateException( + String.format("map() called %d times but split for table %s only has %d regions", + currentRangeIndex + 1, tableName, regionKeyRanges.size())); + } try { - // Process each region in the split (one or multiple for coalesced splits) - for (KeyRange keyRange : regionKeyRanges) { - byte[] regionStart = keyRange.getLowerRange(); - byte[] regionEnd = keyRange.getUpperRange(); - LOGGER.info("Processing region [{}, {}) from split for table {}", - Bytes.toStringBinary(regionStart), Bytes.toStringBinary(regionEnd), tableName); - processRegion(regionStart, regionEnd, context); - } + KeyRange keyRange = regionKeyRanges.get(currentRangeIndex++); + byte[] regionStart = keyRange.getLowerRange(); + byte[] regionEnd = keyRange.getUpperRange(); + LOGGER.info("Processing region {}/{} [{}, {}) from split for table {}", currentRangeIndex, + regionKeyRanges.size(), Bytes.toStringBinary(regionStart), Bytes.toStringBinary(regionEnd), + tableName); + processRegion(regionStart, regionEnd, context); } catch (SQLException | IOException e) { tryClosingResources(); throw new RuntimeException("Error processing PhoenixSyncTableMapper", e); diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java index 3bc53d40636..0b523454ea1 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixSyncTableToolIT.java @@ -258,7 +258,7 @@ public void testSyncTableWithDeletedRowsOnTarget() throws Exception { validateSyncCounters(counters, 10, 7, 7, 3); validateMapperCounters(counters, 1, 3); - assertEquals("Should have only 1 Mapper task created with coalescing", 4, counters.taskCreated); + assertEquals("4 regions, no coalescing, 1 record per mapper", 4, counters.taskCreated); // Three target rows were Phoenix-deleted, so dry-run sees them as missing on target. assertRowDriftCounters(counters, 3, 0, 0, 0); @@ -2756,8 +2756,6 @@ public void testSyncTableWithSplitCoalescing() throws Exception { String.valueOf(fromTime), "--to-time", String.valueOf(toTime)); SyncCountersResult counters = getSyncCounters(dryRunJob); - assertEquals("Should have only 1 Mapper task created with coalescing", 1, counters.taskCreated); - validateSyncCounters(counters, 10, 10, 7, 3); validateMapperCounters(counters, 1, 3); assertRowDriftCounters(counters, 0, 0, 3, 0); diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReaderTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReaderTest.java new file mode 100644 index 00000000000..11071a0222c --- /dev/null +++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpPerRangeRecordReaderTest.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.mapreduce; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; +import org.apache.phoenix.query.KeyRange; +import org.junit.Before; +import org.junit.Test; + +public class PhoenixNoOpPerRangeRecordReaderTest { + + private PhoenixNoOpPerRangeRecordReader reader; + + @Before + public void setup() { + reader = new PhoenixNoOpPerRangeRecordReader(); + } + + @Test + public void testNextKeyValueDefaultsToOneRangeWhenNotInitialized() { + assertTrue("First call should return true", reader.nextKeyValue()); + assertFalse("Second call should return false", reader.nextKeyValue()); + assertFalse("Third call should still return false", reader.nextKeyValue()); + } + + @Test + public void testNextKeyValueReturnsOncePerRangeForSingleRangeSplit() { + reader.initialize(splitWithRanges(range("a", "b")), null); + + assertTrue("First call should return true", reader.nextKeyValue()); + assertFalse("Second call should return false", reader.nextKeyValue()); + } + + @Test + public void testNextKeyValueReturnsOncePerRangeForCoalescedSplit() { + reader.initialize(splitWithRanges(range("a", "b"), range("b", "c"), range("c", "d")), null); + + assertTrue("Call 1 should return true", reader.nextKeyValue()); + assertTrue("Call 2 should return true", reader.nextKeyValue()); + assertTrue("Call 3 should return true", reader.nextKeyValue()); + assertFalse("Call 4 should return false", reader.nextKeyValue()); + } + + @Test + public void testGetCurrentKeyReturnsNullWritable() { + NullWritable key = reader.getCurrentKey(); + assertNotNull(key); + assertEquals(NullWritable.get(), key); + } + + @Test + public void testGetCurrentValueReturnsNullDBWritable() { + DBWritable value = reader.getCurrentValue(); + assertNotNull(value); + } + + @Test + public void testProgressReflectsRangeConsumption() { + reader.initialize( + splitWithRanges(range("a", "b"), range("b", "c"), range("c", "d"), range("d", "e")), null); + + assertEquals("0/4 ranges consumed", 0.0f, reader.getProgress(), 0.0001f); + reader.nextKeyValue(); + assertEquals("1/4 ranges consumed", 0.25f, reader.getProgress(), 0.0001f); + reader.nextKeyValue(); + assertEquals("2/4 ranges consumed", 0.50f, reader.getProgress(), 0.0001f); + reader.nextKeyValue(); + assertEquals("3/4 ranges consumed", 0.75f, reader.getProgress(), 0.0001f); + reader.nextKeyValue(); + assertEquals("4/4 ranges consumed", 1.0f, reader.getProgress(), 0.0001f); + } + + @Test + public void testProgressDefaultsBeforeInitialize() { + assertEquals("Progress should be 0.0 before consuming record", 0.0f, reader.getProgress(), + 0.0f); + reader.nextKeyValue(); + assertEquals("Progress should be 1.0 after consuming record", 1.0f, reader.getProgress(), 0.0f); + } + + @Test + public void testInitializeIsTolerantOfNonPhoenixSplit() { + reader.initialize(mock(InputSplit.class), null); + assertTrue("Should still emit one record by default", reader.nextKeyValue()); + assertFalse(reader.nextKeyValue()); + } + + @Test + public void testInitializeWithEmptyKeyRangesFallsBackToSingleRecord() { + PhoenixInputSplit split = mock(PhoenixInputSplit.class); + when(split.getKeyRanges()).thenReturn(Collections.emptyList()); + reader.initialize(split, null); + + assertTrue("Should still emit one record by default", reader.nextKeyValue()); + assertFalse(reader.nextKeyValue()); + } + + @Test + public void testCloseDoesNotThrow() { + reader.close(); + } + + private PhoenixInputSplit splitWithRanges(KeyRange... ranges) { + PhoenixInputSplit split = mock(PhoenixInputSplit.class); + List rangeList = new ArrayList<>(ranges.length); + Collections.addAll(rangeList, ranges); + when(split.getKeyRanges()).thenReturn(rangeList); + return split; + } + + private KeyRange range(String start, String stop) { + return KeyRange.getKeyRange(Bytes.toBytes(start), Bytes.toBytes(stop)); + } +} diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReaderTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReaderTest.java deleted file mode 100644 index 63933a4445e..00000000000 --- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixNoOpSingleRecordReaderTest.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.phoenix.mapreduce; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapreduce.lib.db.DBWritable; -import org.junit.Before; -import org.junit.Test; - -public class PhoenixNoOpSingleRecordReaderTest { - - private PhoenixNoOpSingleRecordReader reader; - - @Before - public void setup() { - reader = new PhoenixNoOpSingleRecordReader(); - } - - @Test - public void testNextKeyValueReturnsTrueExactlyOnce() { - assertTrue("First call should return true", reader.nextKeyValue()); - assertFalse("Second call should return false", reader.nextKeyValue()); - assertFalse("Third call should still return false", reader.nextKeyValue()); - } - - @Test - public void testGetCurrentKeyReturnsNullWritable() { - NullWritable key = reader.getCurrentKey(); - assertNotNull(key); - assertEquals(NullWritable.get(), key); - } - - @Test - public void testGetCurrentValueReturnsNullDBWritable() { - DBWritable value = reader.getCurrentValue(); - assertNotNull(value); - } - - @Test - public void testProgressReflectsRecordConsumption() { - assertEquals("Progress should be 0.0 before consuming record", 0.0f, reader.getProgress(), - 0.0f); - reader.nextKeyValue(); - assertEquals("Progress should be 1.0 after consuming record", 1.0f, reader.getProgress(), 0.0f); - } - - @Test - public void testInitializeAndCloseDoNotThrow() { - reader.initialize(null, null); - reader.close(); - } -} diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormatTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormatTest.java index 95adb365a0d..1e82292fd52 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormatTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/PhoenixSyncTableInputFormatTest.java @@ -296,8 +296,8 @@ public void testUnsortedInputSplits() { public void testCreateRecordReaderReturnsNoOpReader() { RecordReader reader = inputFormat.createRecordReader(null, null); assertNotNull("createRecordReader should never return null", reader); - assertTrue("Should return a PhoenixNoOpSingleRecordReader", - reader instanceof PhoenixNoOpSingleRecordReader); + assertTrue("Should return a PhoenixNoOpPerRangeRecordReader", + reader instanceof PhoenixNoOpPerRangeRecordReader); } @Test