7942bf191c2225f66389f18e1373c1405d488594
[netconf.git] / netconf / sal-netconf-connector / src / main / java / org / opendaylight / netconf / sal / connect / netconf / sal / KeepaliveSalFacade.java
1 /*
2  * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.netconf.sal.connect.netconf.sal;
9
10 import static com.google.common.base.Preconditions.checkState;
11 import static java.util.Objects.requireNonNull;
12 import static org.opendaylight.netconf.sal.connect.netconf.util.NetconfBaseOps.getSourceNode;
13 import static org.opendaylight.netconf.sal.connect.netconf.util.NetconfMessageTransformUtil.NETCONF_GET_CONFIG_NODEID;
14 import static org.opendaylight.netconf.sal.connect.netconf.util.NetconfMessageTransformUtil.NETCONF_GET_CONFIG_QNAME;
15 import static org.opendaylight.netconf.sal.connect.netconf.util.NetconfMessageTransformUtil.NETCONF_RUNNING_QNAME;
16
17 import com.google.common.util.concurrent.FutureCallback;
18 import com.google.common.util.concurrent.Futures;
19 import com.google.common.util.concurrent.ListenableFuture;
20 import com.google.common.util.concurrent.MoreExecutors;
21 import com.google.common.util.concurrent.SettableFuture;
22 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
23 import java.util.Collection;
24 import java.util.concurrent.ScheduledExecutorService;
25 import java.util.concurrent.ScheduledFuture;
26 import java.util.concurrent.TimeUnit;
27 import org.checkerframework.checker.lock.qual.GuardedBy;
28 import org.eclipse.jdt.annotation.NonNull;
29 import org.opendaylight.mdsal.dom.api.DOMActionService;
30 import org.opendaylight.mdsal.dom.api.DOMNotification;
31 import org.opendaylight.mdsal.dom.api.DOMRpcAvailabilityListener;
32 import org.opendaylight.mdsal.dom.api.DOMRpcResult;
33 import org.opendaylight.mdsal.dom.api.DOMRpcService;
34 import org.opendaylight.netconf.sal.connect.api.RemoteDeviceHandler;
35 import org.opendaylight.netconf.sal.connect.netconf.NetconfDeviceSchema;
36 import org.opendaylight.netconf.sal.connect.netconf.listener.NetconfDeviceCommunicator;
37 import org.opendaylight.netconf.sal.connect.netconf.listener.NetconfSessionPreferences;
38 import org.opendaylight.netconf.sal.connect.netconf.util.NetconfMessageTransformUtil;
39 import org.opendaylight.netconf.sal.connect.util.RemoteDeviceId;
40 import org.opendaylight.yangtools.concepts.ListenerRegistration;
41 import org.opendaylight.yangtools.yang.common.QName;
42 import org.opendaylight.yangtools.yang.data.api.schema.ContainerNode;
43 import org.opendaylight.yangtools.yang.data.api.schema.NormalizedNode;
44 import org.slf4j.Logger;
45 import org.slf4j.LoggerFactory;
46
47 /**
48  * SalFacade proxy that invokes keepalive RPCs to prevent session shutdown from remote device
49  * and to detect incorrect session drops (netconf session is inactive, but TCP/SSH connection is still present).
50  * The keepalive RPC is a get-config with empty filter.
51  */
52 public final class KeepaliveSalFacade implements RemoteDeviceHandler {
53     private static final Logger LOG = LoggerFactory.getLogger(KeepaliveSalFacade.class);
54
55     // 2 minutes keepalive delay by default
56     private static final long DEFAULT_DELAY = TimeUnit.MINUTES.toSeconds(2);
57
58     // 1 minute transaction timeout by default
59     private static final long DEFAULT_TRANSACTION_TIMEOUT_MILLI = TimeUnit.MILLISECONDS.toMillis(60000);
60
61     private final KeepaliveTask keepaliveTask = new KeepaliveTask();
62     private final RemoteDeviceHandler salFacade;
63     private final ScheduledExecutorService executor;
64
65     private final long keepaliveDelaySeconds;
66     private final long timeoutNanos;
67     private final long delayNanos;
68
69     private final RemoteDeviceId id;
70
71     private volatile NetconfDeviceCommunicator listener;
72     private volatile DOMRpcService currentDeviceRpc;
73
74     public KeepaliveSalFacade(final RemoteDeviceId id, final RemoteDeviceHandler salFacade,
75             final ScheduledExecutorService executor, final long keepaliveDelaySeconds,
76             final long requestTimeoutMillis) {
77         this.id = id;
78         this.salFacade = salFacade;
79         this.executor = requireNonNull(executor);
80         this.keepaliveDelaySeconds = keepaliveDelaySeconds;
81         delayNanos = TimeUnit.SECONDS.toNanos(keepaliveDelaySeconds);
82         timeoutNanos = TimeUnit.MILLISECONDS.toNanos(requestTimeoutMillis);
83     }
84
85     public KeepaliveSalFacade(final RemoteDeviceId id, final RemoteDeviceHandler salFacade,
86             final ScheduledExecutorService executor) {
87         this(id, salFacade, executor, DEFAULT_DELAY, DEFAULT_TRANSACTION_TIMEOUT_MILLI);
88     }
89
90     /**
91      * Set the netconf session listener whenever ready.
92      *
93      * @param listener netconf session listener
94      */
95     public void setListener(final NetconfDeviceCommunicator listener) {
96         this.listener = listener;
97     }
98
99     /**
100      * Cancel current keepalive and also reset current deviceRpc.
101      */
102     private synchronized void stopKeepalives() {
103         keepaliveTask.disableKeepalive();
104         currentDeviceRpc = null;
105     }
106
107     void reconnect() {
108         checkState(listener != null, "%s: Unable to reconnect, session listener is missing", id);
109         stopKeepalives();
110         LOG.info("{}: Reconnecting inactive netconf session", id);
111         listener.disconnect();
112     }
113
114     @Override
115     public void onDeviceConnected(final NetconfDeviceSchema deviceSchema,
116             final NetconfSessionPreferences netconfSessionPreferences, final DOMRpcService deviceRpc) {
117         onDeviceConnected(deviceSchema, netconfSessionPreferences, deviceRpc, null);
118     }
119
120     @Override
121     public void onDeviceConnected(final NetconfDeviceSchema deviceSchema,
122             final NetconfSessionPreferences netconfSessionPreferences, final DOMRpcService deviceRpc,
123             final DOMActionService deviceAction) {
124         currentDeviceRpc = requireNonNull(deviceRpc);
125         salFacade.onDeviceConnected(deviceSchema, netconfSessionPreferences,
126             new KeepaliveDOMRpcService(deviceRpc), deviceAction);
127
128         LOG.debug("{}: Netconf session initiated, starting keepalives", id);
129         LOG.trace("{}: Scheduling keepalives every {}s", id, keepaliveDelaySeconds);
130         keepaliveTask.enableKeepalive();
131     }
132
133     @Override
134     public void onDeviceDisconnected() {
135         stopKeepalives();
136         salFacade.onDeviceDisconnected();
137     }
138
139     @Override
140     public void onDeviceFailed(final Throwable throwable) {
141         stopKeepalives();
142         salFacade.onDeviceFailed(throwable);
143     }
144
145     @Override
146     public void onNotification(final DOMNotification domNotification) {
147         keepaliveTask.recordActivity();
148         salFacade.onNotification(domNotification);
149     }
150
151     @Override
152     public void close() {
153         stopKeepalives();
154         salFacade.close();
155     }
156
157     // Keepalive RPC static resources
158     private static final @NonNull ContainerNode KEEPALIVE_PAYLOAD =
159         NetconfMessageTransformUtil.wrap(NETCONF_GET_CONFIG_NODEID,
160             getSourceNode(NETCONF_RUNNING_QNAME), NetconfMessageTransformUtil.EMPTY_FILTER);
161
162     /**
163      * Invoke keepalive RPC and check the response. In case of any received response the keepalive
164      * is considered successful and schedules next keepalive with a fixed delay. If the response is unsuccessful (no
165      * response received, or the rcp could not even be sent) immediate reconnect is triggered as netconf session
166      * is considered inactive/failed.
167      */
168     private final class KeepaliveTask implements Runnable, FutureCallback<DOMRpcResult> {
169         private volatile long lastActivity;
170         @GuardedBy("this")
171         private boolean suppressed;
172
173         KeepaliveTask() {
174             suppressed = false;
175         }
176
177         @Override
178         public void run() {
179             final long local = lastActivity;
180             final long now = System.nanoTime();
181             final long inFutureNanos = local + delayNanos - now;
182             if (inFutureNanos > 0) {
183                 reschedule(inFutureNanos);
184             } else {
185                 sendKeepalive(now);
186             }
187         }
188
189         void recordActivity() {
190             lastActivity = System.nanoTime();
191         }
192
193         synchronized void disableKeepalive() {
194             // unsuppressed -> suppressed
195             suppressed = true;
196         }
197
198         synchronized void enableKeepalive() {
199             recordActivity();
200             if (!suppressed) {
201                 // unscheduled -> unsuppressed
202                 reschedule();
203             } else {
204                 // suppressed -> unsuppressed
205                 suppressed = false;
206             }
207         }
208
209         private synchronized void sendKeepalive(final long now) {
210             if (suppressed) {
211                 // suppressed -> unscheduled
212                 suppressed = false;
213                 return;
214             }
215
216             final var deviceRpc = currentDeviceRpc;
217             if (deviceRpc == null) {
218                 // deviceRpc is null, which means we hit the reconnect window and attempted to send keepalive while
219                 // we were reconnecting. Next keepalive will be scheduled after reconnect so no action necessary here.
220                 LOG.debug("{}: Skipping keepalive while reconnecting", id);
221                 return;
222             }
223
224             LOG.trace("{}: Invoking keepalive RPC", id);
225             final var deviceFuture = deviceRpc.invokeRpc(NETCONF_GET_CONFIG_QNAME, KEEPALIVE_PAYLOAD);
226
227             lastActivity = now;
228             Futures.addCallback(deviceFuture, this, MoreExecutors.directExecutor());
229         }
230
231         @SuppressFBWarnings(value = "RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE",
232                 justification = "Unrecognised NullableDecl")
233         @Override
234         public void onSuccess(final DOMRpcResult result) {
235             // No matter what response we got, rpc-reply or rpc-error,
236             // we got it from device so the netconf session is OK
237             if (result == null) {
238                 LOG.warn("{} Keepalive RPC returned null with response. Reconnecting netconf session", id);
239                 reconnect();
240                 return;
241             }
242
243             if (result.getResult() != null) {
244                 reschedule();
245             } else {
246                 final Collection<?> errors = result.getErrors();
247                 if (!errors.isEmpty()) {
248                     LOG.warn("{}: Keepalive RPC failed with error: {}", id, errors);
249                     reschedule();
250                 } else {
251                     LOG.warn("{} Keepalive RPC returned null with response. Reconnecting netconf session", id);
252                     reconnect();
253                 }
254             }
255         }
256
257         @Override
258         public void onFailure(final Throwable throwable) {
259             LOG.warn("{}: Keepalive RPC failed. Reconnecting netconf session.", id, throwable);
260             reconnect();
261         }
262
263         private void reschedule() {
264             reschedule(delayNanos);
265         }
266
267         private void reschedule(final long delay) {
268             executor.schedule(this, delay, TimeUnit.NANOSECONDS);
269         }
270     }
271
272     /*
273      * Request timeout task is called once the requestTimeoutMillis is reached. At that moment, if the request is not
274      * yet finished, we cancel it.
275      */
276     private final class RequestTimeoutTask implements FutureCallback<DOMRpcResult>, Runnable {
277         private final @NonNull SettableFuture<DOMRpcResult> userFuture = SettableFuture.create();
278         private final @NonNull ListenableFuture<? extends DOMRpcResult> deviceFuture;
279
280         RequestTimeoutTask(final ListenableFuture<? extends DOMRpcResult> rpcResultFuture) {
281             deviceFuture = requireNonNull(rpcResultFuture);
282             Futures.addCallback(deviceFuture, this, MoreExecutors.directExecutor());
283         }
284
285         @Override
286         public void run() {
287             deviceFuture.cancel(true);
288             userFuture.cancel(false);
289             keepaliveTask.enableKeepalive();
290         }
291
292         @Override
293         public void onSuccess(final DOMRpcResult result) {
294             // No matter what response we got,
295             // rpc-reply or rpc-error, we got it from device so the netconf session is OK.
296             userFuture.set(result);
297             keepaliveTask.enableKeepalive();
298         }
299
300         @Override
301         public void onFailure(final Throwable throwable) {
302             // User/Application RPC failed (The RPC did not reach the remote device or ...)
303             // FIXME: what other reasons could cause this ?)
304             LOG.warn("{}: Rpc failure detected. Reconnecting netconf session", id, throwable);
305             userFuture.setException(throwable);
306             // There is no point in keeping this session. Reconnect.
307             reconnect();
308         }
309     }
310
311     /**
312      * DOMRpcService proxy that attaches reset-keepalive-task and schedule
313      * request-timeout-task to each RPC invocation.
314      */
315     public final class KeepaliveDOMRpcService implements DOMRpcService {
316         private final @NonNull DOMRpcService deviceRpc;
317
318         KeepaliveDOMRpcService(final DOMRpcService deviceRpc) {
319             this.deviceRpc = requireNonNull(deviceRpc);
320         }
321
322         public @NonNull DOMRpcService getDeviceRpc() {
323             return deviceRpc;
324         }
325
326         @Override
327         public ListenableFuture<? extends DOMRpcResult> invokeRpc(final QName type, final NormalizedNode input) {
328             keepaliveTask.disableKeepalive();
329             final ListenableFuture<? extends DOMRpcResult> deviceFuture = deviceRpc.invokeRpc(type, input);
330
331             final RequestTimeoutTask timeout = new RequestTimeoutTask(deviceFuture);
332             final ScheduledFuture<?> timeoutFuture = executor.schedule(timeout, timeoutNanos, TimeUnit.NANOSECONDS);
333             deviceFuture.addListener(() -> timeoutFuture.cancel(false), MoreExecutors.directExecutor());
334
335             return timeout.userFuture;
336         }
337
338         @Override
339         public <T extends DOMRpcAvailabilityListener> ListenerRegistration<T> registerRpcListener(final T rpcListener) {
340             // There is no real communication with the device (yet), hence recordActivity() or anything
341             return deviceRpc.registerRpcListener(rpcListener);
342         }
343     }
344 }