There are a couple of failures in interacting with RemoteDeviceHandler
API (which is completely undocumented).
The API design (and interactions in implementations) assume that:
- onDeviceConnected()/onDeviceDisconnected() manage a reconnecting
process -- i.e. when 'disconnected' it is assumed the device might
come back again
- onDeviceFailed() is the hard alternative, indicating the node is node
coming back
This is then picked up and routed through NetconfTopologyDeviceSalFacade
to NetconfDeviceTopologyAdapter, which makes assumptions and propagates
them to the datastore.
JIRA: NETCONF-1097
Change-Id: Ib148a617b52e775261212aa3a809d1ab450d7d13
Signed-off-by: Ivan Hrasko <[email protected]>
Signed-off-by: Robert Varga <[email protected]>
--- /dev/null
+/*
+ * Copyright (c) 2023 PANTHEON.tech, s.r.o. and others. All rights reserved.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License v1.0 which accompanies this distribution,
+ * and is available at http://www.eclipse.org/legal/epl-v10.html
+ */
+package org.opendaylight.netconf.topology.spi;
+
+import static java.util.Objects.requireNonNull;
+
+import org.opendaylight.netconf.client.mdsal.api.RemoteDeviceHandler;
+
+/**
+ * Reported to {@link RemoteDeviceHandler#onDeviceFailed(Throwable)} downstream of {@link NetconfNodeHandler}. Indicates
+ * the corresponding NETCONF topology node will never become connected.
+ */
+public final class ConnectGivenUpException extends Exception {
+ @java.io.Serial
+ private static final long serialVersionUID = 1L;
+
+ public ConnectGivenUpException(final String message) {
+ super(requireNonNull(message));
+ }
+}
}
private void connectComplete(final Future<?> future) {
- final Throwable cause;
-
// Locked manipulation of internal state
synchronized (this) {
// A quick sanity check
}
currentTask = null;
- cause = future.cause();
+ final var cause = future.cause();
if (cause == null || cause instanceof CancellationException) {
// Success or cancellation, nothing else to do.
// In case of success the rest of the setup is driven by RemoteDeviceHandler callbacks
}
// We are invoking callbacks, do not hold locks
- onDeviceFailed(cause);
+ reconnectOrFail();
}
@Override
@Override
public void onDeviceDisconnected() {
delegate.onDeviceDisconnected();
- scheduleReconnect();
+ reconnectOrFail();
}
@Override
public void onDeviceFailed(final Throwable throwable) {
+ // We have not reported onDeviceConnected(), so from the view of delete we are still connecting
LOG.debug("Connection attempt failed", throwable);
- delegate.onDeviceFailed(throwable);
- scheduleReconnect();
+ reconnectOrFail();
}
@Override
delegate.onNotification(domNotification);
}
- private synchronized void scheduleReconnect() {
+ private void reconnectOrFail() {
+ final var ex = scheduleReconnect();
+ if (ex != null) {
+ delegate.onDeviceFailed(ex);
+ }
+ }
+
+ private synchronized Exception scheduleReconnect() {
if (isClosed()) {
- return;
+ return null;
}
final long delayMillis;
// We have exceeded the number of connection attempts
if (maxAttempts > 0 && attempts >= maxAttempts) {
LOG.info("Failed to connect {} after {} attempts, not attempting", deviceId, attempts);
- return;
+ return new ConnectGivenUpException("Given up connecting " + deviceId + " after " + attempts + " attempts");
}
// First connection attempt gets initialized to minimum sleep, each subsequent is exponentially backed off
lastSleep = delayMillis;
LOG.debug("Retrying {} connection attempt {} after {} milliseconds", deviceId, attempts, delayMillis);
- // If we are not sleeping at all, return an already-succeeded future
- if (delayMillis == 0) {
- lockedConnect();
- return;
- }
-
- // Schedule a task for the right time. It will also clear the flag.
+ // Schedule a task for the right time. We always go through the executor to eliminate the special case of
+ // immediate reconnect. While we could check and got to lockedConnect(), it makes for a rare special case.
+ // That special case makes for more code paths to test and introduces additional uncertainty as to whether
+ // the attempt was executed on on this thread or not.
currentTask = eventExecutor.schedule(this::reconnect, delayMillis, TimeUnit.MILLISECONDS);
+ return null;
}
private synchronized void reconnect() {
*/
package org.opendaylight.netconf.topology.spi;
+import static org.hamcrest.CoreMatchers.instanceOf;
+import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertEquals;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
}
@Before
- public void setUp() {
+ public void before() {
// Instantiate the handler
handler = new NetconfNodeHandler(clientDispatcher, eventExecutor, keepaliveExecutor, BASE_SCHEMAS,
schemaManager, processingExecutor,
assertEquals(0, handler.attempts());
}
+ @Test
+ public void failedSchemaCausesReconnect() {
+ assertSuccessfulConnect();
+ assertEquals(1, handler.attempts());
+
+ // Note: this will count as a second attempt
+ doReturn(scheduleFuture).when(eventExecutor).schedule(scheduleCaptor.capture(), eq(150L),
+ eq(TimeUnit.MILLISECONDS));
+ handler.onDeviceFailed(new AssertionError("schema failure"));
+
+ assertEquals(2, handler.attempts());
+
+ // and when we run the task, we get a clientDispatcher invocation, but attempts are still the same
+ scheduleCaptor.getValue().run();
+ verify(clientDispatcher, times(2)).createClient(any());
+ assertEquals(2, handler.attempts());
+ }
+
@Test
public void downAfterUpCausesReconnect() {
// Let's borrow common bits
handler.connect();
assertEquals(1, handler.attempts());
- // FIXME: NETCONF-1097 remove this stubbing
- final var firstFailure = new AssertionError("first");
- doNothing().when(delegate).onDeviceFailed(firstFailure);
doReturn(scheduleFuture).when(eventExecutor).schedule(scheduleCaptor.capture(), eq(150L),
eq(TimeUnit.MILLISECONDS));
- firstPromise.setFailure(firstFailure);
+ firstPromise.setFailure(new AssertionError("first"));
assertEquals(2, handler.attempts());
assertEquals(2, handler.attempts());
// now report the second failure
- final var secondFailure = new AssertionError("second");
- doNothing().when(delegate).onDeviceFailed(secondFailure);
- secondPromise.setFailure(secondFailure);
+ final var throwableCaptor = ArgumentCaptor.forClass(Throwable.class);
+ doNothing().when(delegate).onDeviceFailed(throwableCaptor.capture());
+ secondPromise.setFailure(new AssertionError("second"));
+ assertThat(throwableCaptor.getValue(), instanceOf(ConnectGivenUpException.class));
// but nothing else happens
assertEquals(2, handler.attempts());