diff --git a/openam-authentication/openam-auth-radius/src/test/java/com/sun/identity/authentication/modules/radius/client/RadiusConnSecurityTest.java b/openam-authentication/openam-auth-radius/src/test/java/com/sun/identity/authentication/modules/radius/client/RadiusConnSecurityTest.java index b4a04220bd..c697e5301a 100644 --- a/openam-authentication/openam-auth-radius/src/test/java/com/sun/identity/authentication/modules/radius/client/RadiusConnSecurityTest.java +++ b/openam-authentication/openam-auth-radius/src/test/java/com/sun/identity/authentication/modules/radius/client/RadiusConnSecurityTest.java @@ -65,6 +65,15 @@ public class RadiusConnSecurityTest { @BeforeMethod public void startServerSocket() throws IOException { + // RadiusConn keeps the server-availability map and the health-check timer in static + // fields that outlive any single connection. Left untouched they leak across test + // methods: e.g. failoverToSecondary() permanently records its dead primary as OFFLINE + // and schedules a background RADIUSMonitor that keeps probing. A later test can then + // see getOnlineServer() return null ("No RADIUS server is online.") when an ephemeral + // port number is reused, or have its manually-driven monitor.run() race the background + // one. Reset the shared statics so every method starts from a clean slate. + resetRadiusConnStatics(); + serverSocket = new DatagramSocket(0, InetAddress.getByName("127.0.0.1")); serverSocket.setSoTimeout(5000); serverRunning = true; @@ -84,6 +93,37 @@ public void stopServer() { Thread.currentThread().interrupt(); } } + // Tear down any state/timer this test scheduled so it cannot bleed into the next one. + resetRadiusConnStatics(); + } + + /** + * Cancel any scheduled health-check monitor and clear the static server-status map on + * {@link RadiusConn}, isolating each test method from the shared singleton state. + */ + private static void resetRadiusConnStatics() { + try { + final java.lang.reflect.Field monitorField = RadiusConn.class.getDeclaredField("serverMonitor"); + monitorField.setAccessible(true); + final Object monitor = monitorField.get(null); + if (monitor != null) { + // RADIUSMonitor extends GeneralTaskRunnable, whose cancel() unschedules it from + // the shared SystemTimer so no background thread keeps probing. + monitor.getClass().getMethod("cancel").invoke(monitor); + monitorField.set(null, null); + } + + final java.lang.reflect.Field statusField = RadiusConn.class.getDeclaredField("SERVER_STATUS"); + statusField.setAccessible(true); + @SuppressWarnings("unchecked") + final java.util.Map serverStatus = + (java.util.Map) statusField.get(null); + synchronized (serverStatus) { + serverStatus.clear(); + } + } catch (ReflectiveOperationException roe) { + throw new IllegalStateException("Unable to reset RadiusConn static state for test isolation", roe); + } } private RadiusConn newClient() throws IOException { @@ -93,8 +133,11 @@ private RadiusConn newClient() throws IOException { private RadiusConn newClient(boolean strict) throws IOException { final Set servers = new HashSet<>(); servers.add(new RADIUSServer("127.0.0.1", serverSocket.getLocalPort())); - // 2-second timeout; tests respond synchronously well within that. - return new RadiusConn(servers, Collections.emptySet(), SHARED_SECRET, 2, null, 60, strict); + // 10-second read timeout (defence-in-depth; the real CI de-flake is the responder no + // longer dying on its own read timeout - see startResponder). Every test using this client + // receives a response and returns as soon as it arrives, so a generous timeout never slows + // the happy path; it only adds margin against scheduling jitter on a loaded CI runner. + return new RadiusConn(servers, Collections.emptySet(), SHARED_SECRET, 10, null, 60, strict); } /** Start a background responder that crafts a reply per the supplied lambda. */ @@ -106,6 +149,15 @@ private void startResponder(Responder responder) { final DatagramPacket dp = new DatagramPacket(buf, buf.length); try { serverSocket.receive(dp); + } catch (java.net.SocketTimeoutException ste) { + // The server socket carries a 5s SO_TIMEOUT (see @BeforeMethod). A read + // timeout only means no request has arrived *yet* - it must NOT kill the + // responder. The client can legitimately be slow to send its first packet + // (cold-JVM class loading, or InetAddress.getLocalHost() blocking on reverse + // DNS on a CI host), and if the responder died here it would never answer the + // request that eventually arrives, leaving the client to time out and report + // "No RADIUS server is online." Keep waiting instead. + continue; } catch (IOException e) { if (!serverRunning) { return; diff --git a/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/BaseTest.java b/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/BaseTest.java index 324add2b71..d0e17b53df 100644 --- a/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/BaseTest.java +++ b/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/BaseTest.java @@ -32,6 +32,7 @@ import org.testng.annotations.AfterClass; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeClass; +import org.testng.annotations.Listeners; import java.io.BufferedReader; import java.io.File; @@ -42,6 +43,7 @@ import java.time.Duration; import java.util.List; +@Listeners(RetryListener.class) public abstract class BaseTest { WebDriver driver; WebDriverWait wait; diff --git a/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/IT_SetupWithOpenDJ.java b/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/IT_SetupWithOpenDJ.java index 81ef857826..279e2f228a 100644 --- a/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/IT_SetupWithOpenDJ.java +++ b/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/IT_SetupWithOpenDJ.java @@ -146,7 +146,12 @@ private void testOpenAmInstallation(String openamUrl, Integer opendjPort) throws wait.until(ExpectedConditions.elementToBeClickable(By.id("writeConfigButton"))).click(); - WebDriverWait waitComplete = new WebDriverWait(driver, Duration.ofSeconds(600)); + // Generous completion budget: this variant installs against an external OpenDJ container and + // (for testSetupWithOpendj) makes OpenAM create the base entry itself, which under CI + // resource contention can push the install past the previous 10-minute ceiling. Combined + // with the suite-wide retry (see RetryListener) this keeps transient infra slowness from + // failing the build. + WebDriverWait waitComplete = new WebDriverWait(driver, Duration.ofSeconds(900)); try { WebElement proceedToConsole = waitComplete.until(visibilityOfAnyElement(By.cssSelector("#confComplete a"))); proceedToConsole.click(); diff --git a/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/RetryAnalyzer.java b/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/RetryAnalyzer.java new file mode 100644 index 0000000000..d6c12aff7b --- /dev/null +++ b/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/RetryAnalyzer.java @@ -0,0 +1,56 @@ +/* + * The contents of this file are subject to the terms of the Common Development and + * Distribution License (the License). You may not use this file except in compliance with the + * License. + * + * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the + * specific language governing permission and limitations under the License. + * + * When distributing Covered Software, include this CDDL Header Notice in each file and include + * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL + * Header, with the fields enclosed by brackets [] replaced by your own identifying + * information: "Portions copyright [year] [name of copyright owner]". + * + * Copyright 2026 3A Systems LLC. + */ + +package org.openidentityplatform.openam.test.integration; + +import org.testng.IRetryAnalyzer; +import org.testng.ITestResult; + +/** + * Retries a failed integration-test method a small number of times before giving up. + * + *

The Selenium/Docker/Cargo integration tests drive a full OpenAM installation through a real + * browser against a Tomcat and an OpenDJ container. They are inherently sensitive to resource + * contention on a shared CI runner: a container that is slow to become healthy, a Tomcat that is + * slow to deploy the heavy OpenAM web app, or a ChromeDriver command that stalls under load can all + * make an otherwise-correct run time out. Rather than fail the whole build on such transient + * infrastructure hiccups, give each test method one more attempt (each retry starts from a clean + * config and a freshly started OpenAM instance - see {@link CargoBaseTest}). + * + *

The retry budget can be overridden with the {@code it.retry.count} system property; it defaults + * to a single retry (two attempts total) so a genuine, reproducible failure still fails the build + * without exploding CI time on a test that takes minutes per attempt. + */ +public class RetryAnalyzer implements IRetryAnalyzer { + + private static final int MAX_RETRIES = + Integer.getInteger("it.retry.count", 1); + + private int attempts = 0; + + @Override + public boolean retry(ITestResult result) { + if (attempts < MAX_RETRIES) { + attempts++; + System.err.println("Retrying " + result.getTestClass().getName() + "." + + result.getMethod().getMethodName() + " (attempt " + (attempts + 1) + + " of " + (MAX_RETRIES + 1) + ") after failure: " + + (result.getThrowable() == null ? "n/a" : result.getThrowable().toString())); + return true; + } + return false; + } +} diff --git a/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/RetryListener.java b/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/RetryListener.java new file mode 100644 index 0000000000..f7a4e7f66e --- /dev/null +++ b/openam-server/src/test/java/org/openidentityplatform/openam/test/integration/RetryListener.java @@ -0,0 +1,41 @@ +/* + * The contents of this file are subject to the terms of the Common Development and + * Distribution License (the License). You may not use this file except in compliance with the + * License. + * + * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the + * specific language governing permission and limitations under the License. + * + * When distributing Covered Software, include this CDDL Header Notice in each file and include + * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL + * Header, with the fields enclosed by brackets [] replaced by your own identifying + * information: "Portions copyright [year] [name of copyright owner]". + * + * Copyright 2026 3A Systems LLC. + */ + +package org.openidentityplatform.openam.test.integration; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; + +import org.testng.IAnnotationTransformer; +import org.testng.annotations.ITestAnnotation; + +/** + * Installs {@link RetryAnalyzer} on every integration-test method automatically, so the flaky-CI + * retry behaviour does not have to be repeated on each {@code @Test}. Registered once via + * {@code @Listeners} on {@link BaseTest}; any test method that already declares its own retry + * analyzer is left untouched. + */ +public class RetryListener implements IAnnotationTransformer { + + @Override + public void transform(ITestAnnotation annotation, Class testClass, Constructor testConstructor, + Method testMethod) { + // TestNG 6.x API: a method with no explicit retry analyzer returns null here. + if (annotation.getRetryAnalyzer() == null) { + annotation.setRetryAnalyzer(RetryAnalyzer.class); + } + } +}