NETCONF-608 - Change Netconf keepalives to not send during large payload replies
[netconf.git] / netconf / sal-netconf-connector / src / main / java / org / opendaylight / netconf / sal / connect / netconf / sal / KeepaliveSalFacade.java
1 /*
2  * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.netconf.sal.connect.netconf.sal;
9
10 import static org.opendaylight.netconf.sal.connect.netconf.util.NetconfBaseOps.getSourceNode;
11 import static org.opendaylight.netconf.sal.connect.netconf.util.NetconfMessageTransformUtil.NETCONF_GET_CONFIG_NODEID;
12 import static org.opendaylight.netconf.sal.connect.netconf.util.NetconfMessageTransformUtil.NETCONF_GET_CONFIG_PATH;
13 import static org.opendaylight.netconf.sal.connect.netconf.util.NetconfMessageTransformUtil.NETCONF_RUNNING_QNAME;
14
15 import com.google.common.base.Preconditions;
16 import com.google.common.util.concurrent.FluentFuture;
17 import com.google.common.util.concurrent.FutureCallback;
18 import com.google.common.util.concurrent.Futures;
19 import com.google.common.util.concurrent.MoreExecutors;
20 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
21 import java.util.concurrent.ScheduledExecutorService;
22 import java.util.concurrent.ScheduledFuture;
23 import java.util.concurrent.TimeUnit;
24 import java.util.concurrent.atomic.AtomicBoolean;
25 import javax.annotation.Nonnull;
26 import javax.annotation.Nullable;
27 import org.eclipse.jdt.annotation.NonNull;
28 import org.opendaylight.mdsal.dom.api.DOMActionService;
29 import org.opendaylight.mdsal.dom.api.DOMNotification;
30 import org.opendaylight.mdsal.dom.api.DOMRpcAvailabilityListener;
31 import org.opendaylight.mdsal.dom.api.DOMRpcResult;
32 import org.opendaylight.mdsal.dom.api.DOMRpcService;
33 import org.opendaylight.netconf.sal.connect.api.RemoteDeviceHandler;
34 import org.opendaylight.netconf.sal.connect.netconf.listener.NetconfDeviceCommunicator;
35 import org.opendaylight.netconf.sal.connect.netconf.listener.NetconfSessionPreferences;
36 import org.opendaylight.netconf.sal.connect.netconf.util.NetconfMessageTransformUtil;
37 import org.opendaylight.netconf.sal.connect.util.RemoteDeviceId;
38 import org.opendaylight.yangtools.concepts.ListenerRegistration;
39 import org.opendaylight.yangtools.yang.data.api.schema.ContainerNode;
40 import org.opendaylight.yangtools.yang.data.api.schema.NormalizedNode;
41 import org.opendaylight.yangtools.yang.model.api.SchemaContext;
42 import org.opendaylight.yangtools.yang.model.api.SchemaPath;
43 import org.slf4j.Logger;
44 import org.slf4j.LoggerFactory;
45
46 /**
47  * SalFacade proxy that invokes keepalive RPCs to prevent session shutdown from remote device
48  * and to detect incorrect session drops (netconf session is inactive, but TCP/SSH connection is still present).
49  * The keepalive RPC is a get-config with empty filter.
50  */
51 public final class KeepaliveSalFacade implements RemoteDeviceHandler<NetconfSessionPreferences> {
52
53     private static final Logger LOG = LoggerFactory.getLogger(KeepaliveSalFacade.class);
54
55     // 2 minutes keepalive delay by default
56     private static final long DEFAULT_DELAY = TimeUnit.MINUTES.toSeconds(2);
57
58     // 1 minute transaction timeout by default
59     private static final long DEFAULT_TRANSACTION_TIMEOUT_MILLI = TimeUnit.MILLISECONDS.toMillis(60000);
60
61     private final RemoteDeviceId id;
62     private final RemoteDeviceHandler<NetconfSessionPreferences> salFacade;
63     private final ScheduledExecutorService executor;
64     private final long keepaliveDelaySeconds;
65     private final ResetKeepalive resetKeepaliveTask;
66     private final long defaultRequestTimeoutMillis;
67
68     private volatile NetconfDeviceCommunicator listener;
69     private volatile ScheduledFuture<?> currentKeepalive;
70     private volatile DOMRpcService currentDeviceRpc;
71     private final AtomicBoolean lastKeepAliveSucceeded = new AtomicBoolean(false);
72
73     public KeepaliveSalFacade(final RemoteDeviceId id, final RemoteDeviceHandler<NetconfSessionPreferences> salFacade,
74                               final ScheduledExecutorService executor, final long keepaliveDelaySeconds,
75                               final long defaultRequestTimeoutMillis) {
76         this.id = id;
77         this.salFacade = salFacade;
78         this.executor = executor;
79         this.keepaliveDelaySeconds = keepaliveDelaySeconds;
80         this.defaultRequestTimeoutMillis = defaultRequestTimeoutMillis;
81         this.resetKeepaliveTask = new ResetKeepalive();
82     }
83
84     public KeepaliveSalFacade(final RemoteDeviceId id, final RemoteDeviceHandler<NetconfSessionPreferences> salFacade,
85                               final ScheduledExecutorService executor) {
86         this(id, salFacade, executor, DEFAULT_DELAY, DEFAULT_TRANSACTION_TIMEOUT_MILLI);
87     }
88
89     /**
90      * Set the netconf session listener whenever ready.
91      *
92      * @param listener netconf session listener
93      */
94     public void setListener(final NetconfDeviceCommunicator listener) {
95         this.listener = listener;
96     }
97
98     /**
99      * Just cancel current keepalive task.
100      * If its already started, let it finish ... not such a big deal.
101      *
102      * <p>
103      * Then schedule next keepalive.
104      */
105     void resetKeepalive() {
106         LOG.trace("{}: Resetting netconf keepalive timer", id);
107         if (currentKeepalive != null) {
108             currentKeepalive.cancel(false);
109         }
110         scheduleKeepalives();
111     }
112
113     /**
114      * Cancel current keepalive and also reset current deviceRpc.
115      */
116     private void stopKeepalives() {
117         if (currentKeepalive != null) {
118             currentKeepalive.cancel(false);
119         }
120         currentDeviceRpc = null;
121     }
122
123     void reconnect() {
124         Preconditions.checkState(listener != null, "%s: Unable to reconnect, session listener is missing", id);
125         stopKeepalives();
126         LOG.info("{}: Reconnecting inactive netconf session", id);
127         listener.disconnect();
128     }
129
130     @Override
131     public void onDeviceConnected(final SchemaContext remoteSchemaContext,
132                           final NetconfSessionPreferences netconfSessionPreferences, final DOMRpcService deviceRpc) {
133         onDeviceConnected(remoteSchemaContext, netconfSessionPreferences, deviceRpc, null);
134     }
135
136     @Override
137     public void onDeviceConnected(final SchemaContext remoteSchemaContext,
138             final NetconfSessionPreferences netconfSessionPreferences, final DOMRpcService deviceRpc,
139             final DOMActionService deviceAction) {
140         this.currentDeviceRpc = deviceRpc;
141         final DOMRpcService deviceRpc1 =
142                 new KeepaliveDOMRpcService(deviceRpc, resetKeepaliveTask, defaultRequestTimeoutMillis, executor,
143                         new ResponseWaitingScheduler());
144
145         salFacade.onDeviceConnected(remoteSchemaContext, netconfSessionPreferences, deviceRpc1, deviceAction);
146
147         LOG.debug("{}: Netconf session initiated, starting keepalives", id);
148         scheduleKeepalives();
149     }
150
151     private void scheduleKeepalives() {
152         lastKeepAliveSucceeded.set(true);
153         Preconditions.checkState(currentDeviceRpc != null);
154         LOG.trace("{}: Scheduling keepalives every  {} {}", id, keepaliveDelaySeconds, TimeUnit.SECONDS);
155         currentKeepalive = executor.scheduleWithFixedDelay(new Keepalive(),
156           keepaliveDelaySeconds, keepaliveDelaySeconds, TimeUnit.SECONDS);
157     }
158
159     @Override
160     public void onDeviceDisconnected() {
161         stopKeepalives();
162         salFacade.onDeviceDisconnected();
163     }
164
165     @Override
166     public void onDeviceFailed(final Throwable throwable) {
167         stopKeepalives();
168         salFacade.onDeviceFailed(throwable);
169     }
170
171     @Override
172     public void onNotification(final DOMNotification domNotification) {
173         resetKeepalive();
174         salFacade.onNotification(domNotification);
175     }
176
177     @Override
178     public void close() {
179         stopKeepalives();
180         salFacade.close();
181     }
182
183     // Keepalive RPC static resources
184     private static final ContainerNode KEEPALIVE_PAYLOAD = NetconfMessageTransformUtil.wrap(NETCONF_GET_CONFIG_NODEID,
185             getSourceNode(NETCONF_RUNNING_QNAME), NetconfMessageTransformUtil.EMPTY_FILTER);
186
187     /**
188      * Invoke keepalive RPC and check the response. In case of any received response the keepalive
189      * is considered successful and schedules next keepalive with a fixed delay. If the response is unsuccessful (no
190      * response received, or the rcp could not even be sent) immediate reconnect is triggered as netconf session
191      * is considered inactive/failed.
192      */
193     private class Keepalive implements Runnable, FutureCallback<DOMRpcResult> {
194
195         @Override
196         public void run() {
197             LOG.trace("{}: Invoking keepalive RPC", id);
198
199             try {
200                 final boolean lastJobSucceeded = lastKeepAliveSucceeded.getAndSet(false);
201                 if (!lastJobSucceeded) {
202                     onFailure(new IllegalStateException("Previous keepalive timed out"));
203                 } else {
204                     Futures.addCallback(currentDeviceRpc.invokeRpc(NETCONF_GET_CONFIG_PATH, KEEPALIVE_PAYLOAD), this,
205                                         MoreExecutors.directExecutor());
206                 }
207             } catch (final NullPointerException e) {
208                 LOG.debug("{}: Skipping keepalive while reconnecting", id);
209                 // Empty catch block intentional
210                 // Do nothing. The currentDeviceRpc was null and it means we hit the reconnect window and
211                 // attempted to send keepalive while we were reconnecting. Next keepalive will be scheduled
212                 // after reconnect so no action necessary here.
213             }
214         }
215
216         @SuppressFBWarnings(value = "RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE",
217                 justification = "Unrecognised NullableDecl")
218         @Override
219         public void onSuccess(final DOMRpcResult result) {
220             // No matter what response we got, rpc-reply or rpc-error,
221             // we got it from device so the netconf session is OK
222             if (result != null && result.getResult() != null) {
223                 lastKeepAliveSucceeded.set(true);
224             }  else if (result != null && result.getErrors() != null) {
225                 LOG.warn("{}: Keepalive RPC failed with error: {}", id, result.getErrors());
226                 lastKeepAliveSucceeded.set(true);
227             } else {
228                 LOG.warn("{} Keepalive RPC returned null with response. Reconnecting netconf session", id);
229                 reconnect();
230             }
231         }
232
233         @Override
234         public void onFailure(@Nonnull final Throwable throwable) {
235             LOG.warn("{}: Keepalive RPC failed. Reconnecting netconf session.", id, throwable);
236             reconnect();
237         }
238     }
239
240     /**
241      * Reset keepalive after each RPC response received.
242      */
243     private class ResetKeepalive implements FutureCallback<DOMRpcResult> {
244         @Override
245         public void onSuccess(@Nullable final DOMRpcResult result) {
246             // No matter what response we got,
247             // rpc-reply or rpc-error, we got it from device so the netconf session is OK.
248             resetKeepalive();
249         }
250
251         @Override
252         public void onFailure(@Nonnull final Throwable throwable) {
253             // User/Application RPC failed (The RPC did not reach the remote device or ..
254             // TODO what other reasons could cause this ?)
255             // There is no point in keeping this session. Reconnect.
256             LOG.warn("{}: Rpc failure detected. Reconnecting netconf session", id, throwable);
257             reconnect();
258         }
259     }
260
261     private final class ResponseWaitingScheduler {
262
263         private ScheduledFuture<?> schedule;
264
265         public void initScheduler(final Runnable runnable) {
266             if (currentKeepalive != null) {
267                 currentKeepalive.cancel(true);
268             } else {
269                 LOG.trace("Keepalive does not exist.");
270             }
271             scheduleKeepalives();
272             //Listening on the result should be done before the keepalive rpc will be send
273             final long delay = (keepaliveDelaySeconds * 1000) - 500;
274             schedule = executor.schedule(runnable, delay, TimeUnit.MILLISECONDS);
275         }
276
277         public void stopScheduler() {
278             if (schedule != null) {
279                 schedule.cancel(true);
280             } else {
281                 LOG.trace("Scheduler does not exist.");
282             }
283         }
284     }
285
286     private static final class ResponseWaiting implements Runnable {
287
288         private final FluentFuture<DOMRpcResult> rpcResultFuture;
289         private final ResponseWaitingScheduler responseWaitingScheduler;
290
291         ResponseWaiting(final ResponseWaitingScheduler responseWaitingScheduler,
292                 final FluentFuture<DOMRpcResult> rpcResultFuture) {
293             this.responseWaitingScheduler = responseWaitingScheduler;
294             this.rpcResultFuture = rpcResultFuture;
295         }
296
297         public void start() {
298             LOG.trace("Start to waiting for result.");
299             responseWaitingScheduler.initScheduler(this);
300         }
301
302         public void stop() {
303             LOG.info("Stop to waiting for result.");
304             responseWaitingScheduler.stopScheduler();
305         }
306
307         @Override
308         public void run() {
309             if (!rpcResultFuture.isCancelled() && !rpcResultFuture.isDone()) {
310                 LOG.trace("Waiting for result");
311                 responseWaitingScheduler.initScheduler(this);
312             } else {
313                 LOG.trace("Result has been cancelled or done.");
314             }
315         }
316     }
317
318     /*
319      * Request timeout task is called once the defaultRequestTimeoutMillis is
320      * reached. At this moment, if the request is not yet finished, we cancel
321      * it.
322      */
323     private static final class RequestTimeoutTask implements Runnable {
324
325         private final FluentFuture<DOMRpcResult> rpcResultFuture;
326         private final ResponseWaiting responseWaiting;
327
328         RequestTimeoutTask(final FluentFuture<DOMRpcResult> rpcResultFuture, final ResponseWaiting responseWaiting) {
329             this.rpcResultFuture = rpcResultFuture;
330             this.responseWaiting = responseWaiting;
331         }
332
333         @Override
334         public void run() {
335             if (!rpcResultFuture.isDone()) {
336                 rpcResultFuture.cancel(true);
337             }
338             if (responseWaiting != null) {
339                 responseWaiting.stop();
340             }
341         }
342     }
343
344     /**
345      * DOMRpcService proxy that attaches reset-keepalive-task and schedule
346      * request-timeout-task to each RPC invocation.
347      */
348     public static final class KeepaliveDOMRpcService implements DOMRpcService {
349
350         private final DOMRpcService deviceRpc;
351         private final ResetKeepalive resetKeepaliveTask;
352         private final long defaultRequestTimeoutMillis;
353         private final ScheduledExecutorService executor;
354         private final ResponseWaitingScheduler responseWaitingScheduler;
355
356         KeepaliveDOMRpcService(final DOMRpcService deviceRpc, final ResetKeepalive resetKeepaliveTask,
357                 final long defaultRequestTimeoutMillis, final ScheduledExecutorService executor,
358                 final ResponseWaitingScheduler responseWaitingScheduler) {
359             this.deviceRpc = deviceRpc;
360             this.resetKeepaliveTask = resetKeepaliveTask;
361             this.defaultRequestTimeoutMillis = defaultRequestTimeoutMillis;
362             this.executor = executor;
363             this.responseWaitingScheduler = responseWaitingScheduler;
364         }
365
366         public DOMRpcService getDeviceRpc() {
367             return deviceRpc;
368         }
369
370         @Nonnull
371         @Override
372         public @NonNull FluentFuture<DOMRpcResult> invokeRpc(@Nonnull final SchemaPath type,
373                                                                       final NormalizedNode<?, ?> input) {
374             final FluentFuture<DOMRpcResult> rpcResultFuture = deviceRpc.invokeRpc(type, input);
375             final ResponseWaiting responseWaiting = new ResponseWaiting(responseWaitingScheduler, rpcResultFuture);
376             responseWaiting.start();
377             rpcResultFuture.addCallback(resetKeepaliveTask, MoreExecutors.directExecutor());
378
379             final RequestTimeoutTask timeoutTask = new RequestTimeoutTask(rpcResultFuture, responseWaiting);
380             executor.schedule(timeoutTask, defaultRequestTimeoutMillis, TimeUnit.MILLISECONDS);
381
382             return rpcResultFuture;
383         }
384
385         @Override
386         public <T extends DOMRpcAvailabilityListener> ListenerRegistration<T> registerRpcListener(
387                 @Nonnull final T listener) {
388             // There is no real communication with the device (yet), no reset here
389             return deviceRpc.registerRpcListener(listener);
390         }
391     }
392 }