Bug 7499 - ensure statistics scheduler does not die and keep trying while the control...
[openflowplugin.git] / openflowplugin-impl / src / main / java / org / opendaylight / openflowplugin / impl / statistics / StatisticsManagerImpl.java
1 /*
2  * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8
9 package org.opendaylight.openflowplugin.impl.statistics;
10
11 import com.google.common.annotations.VisibleForTesting;
12 import com.google.common.base.Preconditions;
13 import com.google.common.base.Verify;
14 import com.google.common.collect.Iterators;
15 import com.google.common.util.concurrent.FutureCallback;
16 import com.google.common.util.concurrent.Futures;
17 import com.google.common.util.concurrent.ListenableFuture;
18 import io.netty.util.HashedWheelTimer;
19 import io.netty.util.Timeout;
20 import io.netty.util.TimerTask;
21 import java.util.Iterator;
22 import java.util.Map;
23 import java.util.Optional;
24 import java.util.concurrent.ConcurrentHashMap;
25 import java.util.concurrent.ConcurrentMap;
26 import java.util.concurrent.Future;
27 import java.util.concurrent.Semaphore;
28 import java.util.concurrent.TimeUnit;
29 import javax.annotation.Nonnull;
30 import org.opendaylight.controller.sal.binding.api.BindingAwareBroker;
31 import org.opendaylight.controller.sal.binding.api.RpcProviderRegistry;
32 import org.opendaylight.openflowplugin.api.ConnectionException;
33 import org.opendaylight.openflowplugin.api.openflow.OFPContext;
34 import org.opendaylight.openflowplugin.api.openflow.device.DeviceContext;
35 import org.opendaylight.openflowplugin.api.openflow.device.DeviceInfo;
36 import org.opendaylight.openflowplugin.api.openflow.device.DeviceState;
37 import org.opendaylight.openflowplugin.api.openflow.device.handlers.DeviceInitializationPhaseHandler;
38 import org.opendaylight.openflowplugin.api.openflow.device.handlers.DeviceTerminationPhaseHandler;
39 import org.opendaylight.openflowplugin.api.openflow.lifecycle.LifecycleService;
40 import org.opendaylight.openflowplugin.api.openflow.rpc.ItemLifeCycleSource;
41 import org.opendaylight.openflowplugin.api.openflow.statistics.StatisticsContext;
42 import org.opendaylight.openflowplugin.api.openflow.statistics.StatisticsManager;
43 import org.opendaylight.openflowplugin.openflow.md.core.sal.convertor.ConvertorExecutor;
44 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.ChangeStatisticsWorkModeInput;
45 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.GetStatisticsWorkModeOutput;
46 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.GetStatisticsWorkModeOutputBuilder;
47 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.StatisticsManagerControlService;
48 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.StatisticsWorkMode;
49 import org.opendaylight.yangtools.yang.common.RpcError;
50 import org.opendaylight.yangtools.yang.common.RpcResult;
51 import org.opendaylight.yangtools.yang.common.RpcResultBuilder;
52 import org.slf4j.Logger;
53 import org.slf4j.LoggerFactory;
54
55 public class StatisticsManagerImpl implements StatisticsManager, StatisticsManagerControlService {
56
57     private static final Logger LOG = LoggerFactory.getLogger(StatisticsManagerImpl.class);
58
59     private static final long DEFAULT_STATS_TIMEOUT_SEC = 50L;
60     private final ConvertorExecutor converterExecutor;
61
62     private DeviceInitializationPhaseHandler deviceInitPhaseHandler;
63     private DeviceTerminationPhaseHandler deviceTerminationPhaseHandler;
64
65     private final ConcurrentMap<DeviceInfo, StatisticsContext> contexts = new ConcurrentHashMap<>();
66
67     private static long basicTimerDelay;
68     private static long currentTimerDelay;
69     private static long maximumTimerDelay; //wait time for next statistics
70
71     private StatisticsWorkMode workMode = StatisticsWorkMode.COLLECTALL;
72     private final Semaphore workModeGuard = new Semaphore(1, true);
73     private boolean isStatisticsPollingOn;
74     private BindingAwareBroker.RpcRegistration<StatisticsManagerControlService> controlServiceRegistration;
75
76     private final HashedWheelTimer hashedWheelTimer;
77
78     @Override
79     public void setDeviceInitializationPhaseHandler(final DeviceInitializationPhaseHandler handler) {
80         deviceInitPhaseHandler = handler;
81     }
82
83     public StatisticsManagerImpl(final RpcProviderRegistry rpcProviderRegistry,
84                                  final boolean isStatisticsPollingOn,
85                                  final HashedWheelTimer hashedWheelTimer,
86                                  final ConvertorExecutor convertorExecutor,
87                                  final long basicTimerDelay,
88                                  final long maximumTimerDelay) {
89         Preconditions.checkArgument(rpcProviderRegistry != null);
90             this.converterExecutor = convertorExecutor;
91         this.controlServiceRegistration = Preconditions.checkNotNull(
92                 rpcProviderRegistry.addRpcImplementation(StatisticsManagerControlService.class, this)
93         );
94         this.isStatisticsPollingOn = isStatisticsPollingOn;
95         this.basicTimerDelay = basicTimerDelay;
96         this.currentTimerDelay = basicTimerDelay;
97         this.maximumTimerDelay = maximumTimerDelay;
98         this.hashedWheelTimer = hashedWheelTimer;
99     }
100
101     @Override
102     public void onDeviceContextLevelUp(final DeviceInfo deviceInfo,
103                                        final LifecycleService lifecycleService) throws Exception {
104
105         final StatisticsContext statisticsContext =
106                 new StatisticsContextImpl(
107                         deviceInfo,
108                         isStatisticsPollingOn,
109                         lifecycleService,
110                         converterExecutor,
111                         this);
112
113         Verify.verify(
114                 contexts.putIfAbsent(deviceInfo, statisticsContext) == null,
115                 "StatisticsCtx still not closed for Node {}", deviceInfo.getLOGValue()
116         );
117
118         lifecycleService.setStatContext(statisticsContext);
119         lifecycleService.registerDeviceRemovedHandler(this);
120         deviceInitPhaseHandler.onDeviceContextLevelUp(deviceInfo, lifecycleService);
121     }
122
123     @VisibleForTesting
124     void pollStatistics(final DeviceState deviceState,
125                         final StatisticsContext statisticsContext,
126                         final TimeCounter timeCounter,
127                         final DeviceInfo deviceInfo) {
128
129         if (!statisticsContext.isSchedulingEnabled()) {
130             if (LOG.isDebugEnabled()) {
131                 LOG.debug("Disabled statistics scheduling for device: {}", deviceInfo.getNodeId().getValue());
132             }
133             return;
134         }
135
136         if (LOG.isDebugEnabled()) {
137             LOG.debug("POLLING ALL STATISTICS for device: {}", deviceInfo.getNodeId());
138         }
139
140         timeCounter.markStart();
141         final ListenableFuture<Boolean> deviceStatisticsCollectionFuture = statisticsContext.gatherDynamicData();
142         Futures.addCallback(deviceStatisticsCollectionFuture, new FutureCallback<Boolean>() {
143             @Override
144             public void onSuccess(final Boolean o) {
145                 timeCounter.addTimeMark();
146                 calculateTimerDelay(timeCounter);
147                 scheduleNextPolling(deviceState, deviceInfo, statisticsContext, timeCounter);
148             }
149
150             @Override
151             public void onFailure(@Nonnull final Throwable throwable) {
152                 timeCounter.addTimeMark();
153                 LOG.warn("Statistics gathering for single node {} was not successful: {}", deviceInfo.getLOGValue(),
154                         throwable.getMessage());
155                 if (LOG.isTraceEnabled()) {
156                     LOG.trace("Gathering for node {} failure: ", deviceInfo.getLOGValue(), throwable);
157                 }
158                 calculateTimerDelay(timeCounter);
159                 if (throwable instanceof ConnectionException) {
160                     // ConnectionException is raised by StatisticsContextImpl class when the connections
161                     // move to RIP state. In this particular case, there is no need to reschedule
162                     // because this statistics manager should be closed soon
163                     LOG.warn("Node {} is no more connected, stopping the statistics collection",
164                             deviceInfo.getLOGValue(),throwable);
165                     stopScheduling(deviceInfo);
166                 } else {
167                     LOG.warn("Unexpected error occurred during statistics collection for node {}, rescheduling " +
168                             "statistics collections", deviceInfo.getLOGValue(),throwable);
169                     scheduleNextPolling(deviceState, deviceInfo, statisticsContext, timeCounter);
170                 }
171             }
172         });
173
174         final long averageTime = TimeUnit.MILLISECONDS.toSeconds(timeCounter.getAverageTimeBetweenMarks());
175         final long statsTimeoutSec = averageTime > 0 ? 3 * averageTime : DEFAULT_STATS_TIMEOUT_SEC;
176         final TimerTask timerTask = timeout -> {
177             if (!deviceStatisticsCollectionFuture.isDone()) {
178                 LOG.info("Statistics collection for node {} still in progress even after {} secs", deviceInfo.getLOGValue(), statsTimeoutSec);
179                 deviceStatisticsCollectionFuture.cancel(true);
180             }
181         };
182
183         hashedWheelTimer.newTimeout(timerTask, statsTimeoutSec, TimeUnit.SECONDS);
184     }
185
186     private void scheduleNextPolling(final DeviceState deviceState,
187                                      final DeviceInfo deviceInfo,
188                                      final StatisticsContext statisticsContext,
189                                      final TimeCounter timeCounter) {
190         if (LOG.isDebugEnabled()) {
191             LOG.debug("SCHEDULING NEXT STATISTICS POLLING for device: {}", deviceInfo.getNodeId());
192         }
193         if (isStatisticsPollingOn) {
194             final Timeout pollTimeout = hashedWheelTimer.newTimeout(
195                     timeout -> pollStatistics(
196                             deviceState,
197                             statisticsContext,
198                             timeCounter,
199                             deviceInfo),
200                     currentTimerDelay,
201                     TimeUnit.MILLISECONDS);
202             statisticsContext.setPollTimeout(pollTimeout);
203         }
204     }
205
206     @VisibleForTesting
207     void calculateTimerDelay(final TimeCounter timeCounter) {
208         final long averageStatisticsGatheringTime = timeCounter.getAverageTimeBetweenMarks();
209         if (averageStatisticsGatheringTime > currentTimerDelay) {
210             currentTimerDelay *= 2;
211             if (currentTimerDelay > maximumTimerDelay) {
212                 currentTimerDelay = maximumTimerDelay;
213             }
214         } else {
215             if (currentTimerDelay > basicTimerDelay) {
216                 currentTimerDelay /= 2;
217             } else {
218                 currentTimerDelay = basicTimerDelay;
219             }
220         }
221     }
222
223     @VisibleForTesting
224     static long getCurrentTimerDelay() {
225         return currentTimerDelay;
226     }
227
228     @Override
229     public void onDeviceContextLevelDown(final DeviceInfo deviceInfo) {
230         Optional.ofNullable(contexts.get(deviceInfo)).ifPresent(OFPContext::close);
231         deviceTerminationPhaseHandler.onDeviceContextLevelDown(deviceInfo);
232     }
233
234     @Override
235     public Future<RpcResult<GetStatisticsWorkModeOutput>> getStatisticsWorkMode() {
236         final GetStatisticsWorkModeOutputBuilder smModeOutputBld = new GetStatisticsWorkModeOutputBuilder();
237         smModeOutputBld.setMode(workMode);
238         return RpcResultBuilder.success(smModeOutputBld.build()).buildFuture();
239     }
240
241     @Override
242     public Future<RpcResult<Void>> changeStatisticsWorkMode(ChangeStatisticsWorkModeInput input) {
243         final Future<RpcResult<Void>> result;
244         // acquire exclusive access
245         if (workModeGuard.tryAcquire()) {
246             final StatisticsWorkMode targetWorkMode = input.getMode();
247             if (!workMode.equals(targetWorkMode)) {
248                 isStatisticsPollingOn = !(StatisticsWorkMode.FULLYDISABLED.equals(targetWorkMode));
249                 // iterate through stats-ctx: propagate mode
250                 for (Map.Entry<DeviceInfo, StatisticsContext> entry : contexts.entrySet()) {
251                     final DeviceInfo deviceInfo = entry.getKey();
252                     final StatisticsContext statisticsContext = entry.getValue();
253                     final DeviceContext deviceContext = statisticsContext.gainDeviceContext();
254                     switch (targetWorkMode) {
255                         case COLLECTALL:
256                             scheduleNextPolling(statisticsContext.gainDeviceState(), deviceInfo, statisticsContext, new TimeCounter());
257                             for (final ItemLifeCycleSource lifeCycleSource : deviceContext.getItemLifeCycleSourceRegistry().getLifeCycleSources()) {
258                                 lifeCycleSource.setItemLifecycleListener(null);
259                             }
260                             break;
261                         case FULLYDISABLED:
262                             final Optional<Timeout> pollTimeout = statisticsContext.getPollTimeout();
263                             if (pollTimeout.isPresent()) {
264                                 pollTimeout.get().cancel();
265                             }
266                             for (final ItemLifeCycleSource lifeCycleSource : deviceContext.getItemLifeCycleSourceRegistry().getLifeCycleSources()) {
267                                 lifeCycleSource.setItemLifecycleListener(statisticsContext.getItemLifeCycleListener());
268                             }
269                             break;
270                         default:
271                             LOG.warn("Statistics work mode not supported: {}", targetWorkMode);
272                     }
273                 }
274                 workMode = targetWorkMode;
275             }
276             workModeGuard.release();
277             result = RpcResultBuilder.<Void>success().buildFuture();
278         } else {
279             result = RpcResultBuilder.<Void>failed()
280                     .withError(RpcError.ErrorType.APPLICATION, "mode change already in progress")
281                     .buildFuture();
282         }
283         return result;
284     }
285
286     @Override
287     public void startScheduling(final DeviceInfo deviceInfo) {
288         if (!isStatisticsPollingOn) {
289             LOG.info("Statistics are shutdown for device: {}", deviceInfo.getNodeId());
290             return;
291         }
292
293         final StatisticsContext statisticsContext = contexts.get(deviceInfo);
294
295         if (statisticsContext == null) {
296             LOG.warn("Statistics context not found for device: {}", deviceInfo.getNodeId());
297             return;
298         }
299
300         if (statisticsContext.isSchedulingEnabled()) {
301             LOG.debug("Statistics scheduling is already enabled for device: {}", deviceInfo.getNodeId());
302             return;
303         }
304
305         LOG.info("Scheduling statistics poll for device: {}", deviceInfo.getNodeId());
306
307         statisticsContext.setSchedulingEnabled(true);
308         scheduleNextPolling(
309                 statisticsContext.gainDeviceState(),
310                 deviceInfo,
311                 statisticsContext,
312                 new TimeCounter()
313         );
314     }
315
316     @Override
317     public void stopScheduling(final DeviceInfo deviceInfo) {
318         if (LOG.isDebugEnabled()) {
319             LOG.debug("Stopping statistics scheduling for device: {}", deviceInfo.getNodeId());
320         }
321
322         final StatisticsContext statisticsContext = contexts.get(deviceInfo);
323
324         if (statisticsContext == null) {
325             LOG.warn("Statistics context not found for device: {}", deviceInfo.getNodeId());
326             return;
327         }
328
329         statisticsContext.setSchedulingEnabled(false);
330     }
331
332     @Override
333     public void close() {
334         if (controlServiceRegistration != null) {
335             controlServiceRegistration.close();
336             controlServiceRegistration = null;
337         }
338
339         for (final Iterator<StatisticsContext> iterator = Iterators.consumingIterator(contexts.values().iterator());
340                 iterator.hasNext();) {
341             iterator.next().close();
342         }
343     }
344
345     @Override
346     public void setDeviceTerminationPhaseHandler(final DeviceTerminationPhaseHandler handler) {
347         this.deviceTerminationPhaseHandler = handler;
348     }
349
350     @Override
351     public void setIsStatisticsPollingOn(boolean isStatisticsPollingOn){
352         this.isStatisticsPollingOn = isStatisticsPollingOn;
353     }
354
355     public void onDeviceRemoved(DeviceInfo deviceInfo) {
356         contexts.remove(deviceInfo);
357         LOG.debug("Statistics context removed for node {}", deviceInfo.getLOGValue());
358     }
359
360     @Override
361     public void setBasicTimerDelay(final long basicTimerDelay) {
362         this.basicTimerDelay = basicTimerDelay;
363     }
364
365     @Override
366     public void setMaximumTimerDelay(final long maximumTimerDelay) {
367         this.maximumTimerDelay = maximumTimerDelay;
368     }
369 }