Tear down NETCONF session on keepalive timeout 00/108200/14
authorRobert Varga <robert.varga@pantheon.tech>
Wed, 15 Nov 2023 11:03:53 +0000 (12:03 +0100)
committerRobert Varga <robert.varga@pantheon.tech>
Wed, 15 Nov 2023 11:48:50 +0000 (12:48 +0100)
The keepalive mechanism guards user-level invocations from timeouts, but
does not proactively react to the device failing to respond to the
keepalive request itself.

Fix this by adding a timeout for the keepalive request as well.

JIRA: NETCONF-966
Change-Id: I39f8ba16a3f85c450f601f0ce42201e2bdccc1d0
Signed-off-by: Martin Sunal <martin@paxet.io>
Signed-off-by: Robert Varga <robert.varga@pantheon.tech>
plugins/netconf-client-mdsal/src/main/java/org/opendaylight/netconf/client/mdsal/spi/KeepaliveSalFacade.java
plugins/netconf-client-mdsal/src/test/java/org/opendaylight/netconf/client/mdsal/spi/KeepaliveSalFacadeTest.java

index b3945a4536c5dcd46e561758f60c8ef6ffa1c5a5..a974e057732a892e7edf42584fdc7617e962d5b5 100644 (file)
@@ -19,6 +19,7 @@ import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.ListenableFuture;
 import com.google.common.util.concurrent.MoreExecutors;
 import com.google.common.util.concurrent.SettableFuture;
+import java.util.concurrent.CancellationException;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
 import javax.xml.transform.dom.DOMSource;
@@ -181,11 +182,15 @@ public final class KeepaliveSalFacade implements RemoteDeviceHandler {
 
     private <T> @NonNull ListenableFuture<T> scheduleTimeout(final ListenableFuture<T> invokeFuture) {
         final var timeout = new RequestTimeoutTask<>(invokeFuture);
-        final var timeoutFuture = executor.schedule(timeout, timeoutNanos, TimeUnit.NANOSECONDS);
-        invokeFuture.addListener(() -> timeoutFuture.cancel(false), MoreExecutors.directExecutor());
+        scheduleTimeout(invokeFuture, timeout);
         return timeout.userFuture;
     }
 
+    private void scheduleTimeout(final ListenableFuture<?> future, final TimeoutTask timeout) {
+        final var timeoutFuture = executor.schedule(timeout, timeoutNanos, TimeUnit.NANOSECONDS);
+        future.addListener(() -> timeoutFuture.cancel(false), MoreExecutors.directExecutor());
+    }
+
     /**
      * Invoke keepalive RPC and check the response. In case of any received response the keepalive
      * is considered successful and schedules next keepalive with a fixed delay. If the response is unsuccessful (no
@@ -251,6 +256,8 @@ public final class KeepaliveSalFacade implements RemoteDeviceHandler {
             LOG.trace("{}: Invoking keepalive RPC", id);
             final var deviceFuture = devRpc.invokeNetconf(NETCONF_GET_CONFIG_QNAME, KEEPALIVE_PAYLOAD);
             lastActivity = now;
+
+            scheduleTimeout(deviceFuture, new TimeoutTask(deviceFuture));
             Futures.addCallback(deviceFuture, this, MoreExecutors.directExecutor());
         }
 
@@ -280,7 +287,11 @@ public final class KeepaliveSalFacade implements RemoteDeviceHandler {
 
         @Override
         public void onFailure(final Throwable throwable) {
-            LOG.warn("{}: Keepalive RPC failed. Reconnecting netconf session.", id, throwable);
+            if (throwable instanceof CancellationException) {
+                LOG.warn("{}: Keepalive RPC timed out. Reconnecting netconf session.", id);
+            } else {
+                LOG.warn("{}: Keepalive RPC failed. Reconnecting netconf session.", id, throwable);
+            }
             reconnect();
         }
 
@@ -293,25 +304,32 @@ public final class KeepaliveSalFacade implements RemoteDeviceHandler {
         }
     }
 
+    private static class TimeoutTask implements Runnable {
+        private final ListenableFuture<?> future;
+
+        TimeoutTask(final ListenableFuture<?> future) {
+            this.future = requireNonNull(future);
+        }
+
+        @Override
+        public final void run() {
+            future.cancel(true);
+        }
+    }
+
     /*
      * Request timeout task is called once the requestTimeoutMillis is reached. At that moment, if the request is not
      * yet finished, we cancel it.
      */
-    private final class RequestTimeoutTask<V> implements FutureCallback<V>, Runnable {
+    private final class RequestTimeoutTask<V> extends TimeoutTask implements FutureCallback<V> {
         private final @NonNull SettableFuture<V> userFuture = SettableFuture.create();
-        private final @NonNull ListenableFuture<? extends V> rpcResultFuture;
 
         RequestTimeoutTask(final ListenableFuture<V> rpcResultFuture) {
-            this.rpcResultFuture = requireNonNull(rpcResultFuture);
+            super(rpcResultFuture);
+            // Note: this will also wire run() to onFailure()
             Futures.addCallback(rpcResultFuture, this, MoreExecutors.directExecutor());
         }
 
-        @Override
-        public void run() {
-            // Note: this will loop to onFailure()
-            rpcResultFuture.cancel(true);
-        }
-
         @Override
         public void onSuccess(final V result) {
             // No matter what response we got,
@@ -322,9 +340,12 @@ public final class KeepaliveSalFacade implements RemoteDeviceHandler {
 
         @Override
         public void onFailure(final Throwable throwable) {
-            // User/Application RPC failed (The RPC did not reach the remote device or ...)
-            // FIXME: what other reasons could cause this ?)
-            LOG.warn("{}: Rpc failure detected. Reconnecting netconf session", id, throwable);
+            // User/Application RPC failed (The RPC did not reach the remote device or it has timeed out)
+            if (throwable instanceof CancellationException) {
+                LOG.warn("{}: RPC timed out. Reconnecting netconf session", id);
+            } else {
+                LOG.warn("{}: RPC failed. Reconnecting netconf session", id, throwable);
+            }
             userFuture.setException(throwable);
             // There is no point in keeping this session. Reconnect.
             reconnect();
index ff0e716637bcdeb951dfb9575fc4f6e85d360685..392d83438b30d9e584be635427980756db46dc01 100644 (file)
@@ -20,6 +20,7 @@ import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 
 import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.SettableFuture;
 import java.net.InetSocketAddress;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
@@ -137,4 +138,18 @@ public class KeepaliveSalFacadeTest {
 
         verify(listener, times(1)).disconnect();
     }
+
+    @Test
+    public void testKeepaliveRpcResponseTimeout() {
+        final var neverResolvedFuture = SettableFuture.create();
+        doReturn(neverResolvedFuture).when(deviceRpc).invokeNetconf(any(), any());
+
+        keepaliveSalFacade.onDeviceConnected(null, null, new RemoteDeviceServices(deviceRpc, null));
+
+        verify(underlyingSalFacade).onDeviceConnected(isNull(), isNull(), any(RemoteDeviceServices.class));
+
+        // Should disconnect the session because RPC result future is never resolved and keepalive delay is 1 sec
+        verify(listener, timeout(115000).times(1)).disconnect();
+        verify(deviceRpc, times(1)).invokeNetconf(any(), any());
+    }
 }