Bug 7011 - Race condition in statistics collection related transaction chain handling
[openflowplugin.git] / openflowplugin-impl / src / main / java / org / opendaylight / openflowplugin / impl / statistics / StatisticsManagerImpl.java
1 /*
2  * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8
9 package org.opendaylight.openflowplugin.impl.statistics;
10
11 import com.google.common.annotations.VisibleForTesting;
12 import com.google.common.base.Preconditions;
13 import com.google.common.base.Verify;
14 import com.google.common.collect.Iterators;
15 import com.google.common.util.concurrent.FutureCallback;
16 import com.google.common.util.concurrent.Futures;
17 import com.google.common.util.concurrent.ListenableFuture;
18 import io.netty.util.HashedWheelTimer;
19 import io.netty.util.Timeout;
20 import io.netty.util.TimerTask;
21 import java.util.Iterator;
22 import java.util.Map;
23 import java.util.Optional;
24 import java.util.concurrent.CancellationException;
25 import java.util.concurrent.ConcurrentHashMap;
26 import java.util.concurrent.ConcurrentMap;
27 import java.util.concurrent.Future;
28 import java.util.concurrent.Semaphore;
29 import java.util.concurrent.TimeUnit;
30 import javax.annotation.Nonnull;
31 import org.opendaylight.controller.sal.binding.api.BindingAwareBroker;
32 import org.opendaylight.controller.sal.binding.api.RpcProviderRegistry;
33 import org.opendaylight.openflowplugin.api.openflow.device.DeviceContext;
34 import org.opendaylight.openflowplugin.api.openflow.device.DeviceInfo;
35 import org.opendaylight.openflowplugin.api.openflow.device.DeviceState;
36 import org.opendaylight.openflowplugin.api.openflow.device.handlers.DeviceInitializationPhaseHandler;
37 import org.opendaylight.openflowplugin.api.openflow.device.handlers.DeviceTerminationPhaseHandler;
38 import org.opendaylight.openflowplugin.api.openflow.lifecycle.LifecycleService;
39 import org.opendaylight.openflowplugin.api.openflow.rpc.ItemLifeCycleSource;
40 import org.opendaylight.openflowplugin.api.openflow.statistics.StatisticsContext;
41 import org.opendaylight.openflowplugin.api.openflow.statistics.StatisticsManager;
42 import org.opendaylight.openflowplugin.openflow.md.core.sal.convertor.ConvertorExecutor;
43 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.ChangeStatisticsWorkModeInput;
44 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.GetStatisticsWorkModeOutput;
45 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.GetStatisticsWorkModeOutputBuilder;
46 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.StatisticsManagerControlService;
47 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.StatisticsWorkMode;
48 import org.opendaylight.yangtools.yang.common.RpcError;
49 import org.opendaylight.yangtools.yang.common.RpcResult;
50 import org.opendaylight.yangtools.yang.common.RpcResultBuilder;
51 import org.slf4j.Logger;
52 import org.slf4j.LoggerFactory;
53
54 public class StatisticsManagerImpl implements StatisticsManager, StatisticsManagerControlService {
55
56     private static final Logger LOG = LoggerFactory.getLogger(StatisticsManagerImpl.class);
57
58     private static final long DEFAULT_STATS_TIMEOUT_SEC = 50L;
59     private final ConvertorExecutor converterExecutor;
60
61     private DeviceInitializationPhaseHandler deviceInitPhaseHandler;
62     private DeviceTerminationPhaseHandler deviceTerminPhaseHandler;
63
64     private final ConcurrentMap<DeviceInfo, StatisticsContext> contexts = new ConcurrentHashMap<>();
65
66     private static final long basicTimerDelay = 3000;
67     private static long currentTimerDelay = basicTimerDelay;
68     private static final long maximumTimerDelay = 900000; //wait max 15 minutes for next statistics
69
70     private StatisticsWorkMode workMode = StatisticsWorkMode.COLLECTALL;
71     private final Semaphore workModeGuard = new Semaphore(1, true);
72     private boolean isStatisticsPollingOff;
73     private BindingAwareBroker.RpcRegistration<StatisticsManagerControlService> controlServiceRegistration;
74
75     private final HashedWheelTimer hashedWheelTimer;
76
77     @Override
78     public void setDeviceInitializationPhaseHandler(final DeviceInitializationPhaseHandler handler) {
79         deviceInitPhaseHandler = handler;
80     }
81
82     public StatisticsManagerImpl(final RpcProviderRegistry rpcProviderRegistry,
83                                  final boolean isStatisticsPollingOff,
84                                  final HashedWheelTimer hashedWheelTimer,
85                                  final ConvertorExecutor convertorExecutor) {
86         Preconditions.checkArgument(rpcProviderRegistry != null);
87             this.converterExecutor = convertorExecutor;
88         this.controlServiceRegistration = Preconditions.checkNotNull(
89                 rpcProviderRegistry.addRpcImplementation(StatisticsManagerControlService.class, this)
90         );
91         this.isStatisticsPollingOff = isStatisticsPollingOff;
92         this.hashedWheelTimer = hashedWheelTimer;
93     }
94
95     @Override
96     public void onDeviceContextLevelUp(final DeviceInfo deviceInfo,
97                                        final LifecycleService lifecycleService) throws Exception {
98
99         final StatisticsContext statisticsContext =
100                 new StatisticsContextImpl(
101                         deviceInfo,
102                         isStatisticsPollingOff,
103                         lifecycleService,
104                         converterExecutor,
105                         this);
106         Verify.verify(
107                 contexts.putIfAbsent(deviceInfo, statisticsContext) == null,
108                 "StatisticsCtx still not closed for Node {}", deviceInfo.getLOGValue()
109         );
110         lifecycleService.setStatContext(statisticsContext);
111         deviceInitPhaseHandler.onDeviceContextLevelUp(deviceInfo, lifecycleService);
112     }
113
114     @VisibleForTesting
115     void pollStatistics(final DeviceState deviceState,
116                         final StatisticsContext statisticsContext,
117                         final TimeCounter timeCounter,
118                         final DeviceInfo deviceInfo) {
119
120         if (!statisticsContext.isSchedulingEnabled()) {
121             if (LOG.isDebugEnabled()) {
122                 LOG.debug("Disabled statistics scheduling for device: {}", deviceInfo.getNodeId().getValue());
123             }
124             return;
125         }
126
127         if (LOG.isDebugEnabled()) {
128             LOG.debug("POLLING ALL STATISTICS for device: {}", deviceInfo.getNodeId());
129         }
130         timeCounter.markStart();
131         final ListenableFuture<Boolean> deviceStatisticsCollectionFuture = statisticsContext.gatherDynamicData();
132         Futures.addCallback(deviceStatisticsCollectionFuture, new FutureCallback<Boolean>() {
133             @Override
134             public void onSuccess(final Boolean o) {
135                 timeCounter.addTimeMark();
136                 calculateTimerDelay(timeCounter);
137                 scheduleNextPolling(deviceState, deviceInfo, statisticsContext, timeCounter);
138             }
139
140             @Override
141             public void onFailure(@Nonnull final Throwable throwable) {
142                 timeCounter.addTimeMark();
143                 LOG.warn("Statistics gathering for single node {} was not successful: {}", deviceInfo.getLOGValue(),
144                         throwable.getMessage());
145                 if (LOG.isTraceEnabled()) {
146                     LOG.trace("Gathering for node {} failure: ", deviceInfo.getLOGValue(), throwable);
147                 }
148                 calculateTimerDelay(timeCounter);
149                 if (throwable instanceof IllegalStateException) {
150                     stopScheduling(deviceInfo);
151                 } else {
152                     scheduleNextPolling(deviceState, deviceInfo, statisticsContext, timeCounter);
153                 }
154             }
155         });
156
157         final long averageTime = TimeUnit.MILLISECONDS.toSeconds(timeCounter.getAverageTimeBetweenMarks());
158         final long statsTimeoutSec = averageTime > 0 ? 3 * averageTime : DEFAULT_STATS_TIMEOUT_SEC;
159         final TimerTask timerTask = timeout -> {
160             if (!deviceStatisticsCollectionFuture.isDone()) {
161                 LOG.info("Statistics collection for node {} still in progress even after {} secs", deviceInfo.getLOGValue(), statsTimeoutSec);
162                 deviceStatisticsCollectionFuture.cancel(true);
163             }
164         };
165
166         hashedWheelTimer.newTimeout(timerTask, statsTimeoutSec, TimeUnit.SECONDS);
167     }
168
169     private void scheduleNextPolling(final DeviceState deviceState,
170                                      final DeviceInfo deviceInfo,
171                                      final StatisticsContext statisticsContext,
172                                      final TimeCounter timeCounter) {
173         if (LOG.isDebugEnabled()) {
174             LOG.debug("SCHEDULING NEXT STATISTICS POLLING for device: {}", deviceInfo.getNodeId());
175         }
176         if (!isStatisticsPollingOff) {
177             final Timeout pollTimeout = hashedWheelTimer.newTimeout(
178                     timeout -> pollStatistics(
179                             deviceState,
180                             statisticsContext,
181                             timeCounter,
182                             deviceInfo),
183                     currentTimerDelay,
184                     TimeUnit.MILLISECONDS);
185             statisticsContext.setPollTimeout(pollTimeout);
186         }
187     }
188
189     @VisibleForTesting
190     void calculateTimerDelay(final TimeCounter timeCounter) {
191         final long averageStatisticsGatheringTime = timeCounter.getAverageTimeBetweenMarks();
192         if (averageStatisticsGatheringTime > currentTimerDelay) {
193             currentTimerDelay *= 2;
194             if (currentTimerDelay > maximumTimerDelay) {
195                 currentTimerDelay = maximumTimerDelay;
196             }
197         } else {
198             if (currentTimerDelay > basicTimerDelay) {
199                 currentTimerDelay /= 2;
200             } else {
201                 currentTimerDelay = basicTimerDelay;
202             }
203         }
204     }
205
206     @VisibleForTesting
207     static long getCurrentTimerDelay() {
208         return currentTimerDelay;
209     }
210
211     @Override
212     public void onDeviceContextLevelDown(final DeviceInfo deviceInfo) {
213         final StatisticsContext statisticsContext = contexts.remove(deviceInfo);
214         if (null != statisticsContext) {
215             LOG.debug("Removing device context from stack. No more statistics gathering for device: {}", deviceInfo.getLOGValue());
216             statisticsContext.close();
217         }
218         deviceTerminPhaseHandler.onDeviceContextLevelDown(deviceInfo);
219     }
220
221     @Override
222     public Future<RpcResult<GetStatisticsWorkModeOutput>> getStatisticsWorkMode() {
223         final GetStatisticsWorkModeOutputBuilder smModeOutputBld = new GetStatisticsWorkModeOutputBuilder();
224         smModeOutputBld.setMode(workMode);
225         return RpcResultBuilder.success(smModeOutputBld.build()).buildFuture();
226     }
227
228     @Override
229     public Future<RpcResult<Void>> changeStatisticsWorkMode(ChangeStatisticsWorkModeInput input) {
230         final Future<RpcResult<Void>> result;
231         // acquire exclusive access
232         if (workModeGuard.tryAcquire()) {
233             final StatisticsWorkMode targetWorkMode = input.getMode();
234             if (!workMode.equals(targetWorkMode)) {
235                 isStatisticsPollingOff = StatisticsWorkMode.FULLYDISABLED.equals(targetWorkMode);
236                 // iterate through stats-ctx: propagate mode
237                 for (Map.Entry<DeviceInfo, StatisticsContext> entry : contexts.entrySet()) {
238                     final DeviceInfo deviceInfo = entry.getKey();
239                     final StatisticsContext statisticsContext = entry.getValue();
240                     final DeviceContext deviceContext = statisticsContext.gainDeviceContext();
241                     switch (targetWorkMode) {
242                         case COLLECTALL:
243                             scheduleNextPolling(statisticsContext.gainDeviceState(), deviceInfo, statisticsContext, new TimeCounter());
244                             for (final ItemLifeCycleSource lifeCycleSource : deviceContext.getItemLifeCycleSourceRegistry().getLifeCycleSources()) {
245                                 lifeCycleSource.setItemLifecycleListener(null);
246                             }
247                             break;
248                         case FULLYDISABLED:
249                             final Optional<Timeout> pollTimeout = statisticsContext.getPollTimeout();
250                             if (pollTimeout.isPresent()) {
251                                 pollTimeout.get().cancel();
252                             }
253                             for (final ItemLifeCycleSource lifeCycleSource : deviceContext.getItemLifeCycleSourceRegistry().getLifeCycleSources()) {
254                                 lifeCycleSource.setItemLifecycleListener(statisticsContext.getItemLifeCycleListener());
255                             }
256                             break;
257                         default:
258                             LOG.warn("Statistics work mode not supported: {}", targetWorkMode);
259                     }
260                 }
261                 workMode = targetWorkMode;
262             }
263             workModeGuard.release();
264             result = RpcResultBuilder.<Void>success().buildFuture();
265         } else {
266             result = RpcResultBuilder.<Void>failed()
267                     .withError(RpcError.ErrorType.APPLICATION, "mode change already in progress")
268                     .buildFuture();
269         }
270         return result;
271     }
272
273     @Override
274     public void startScheduling(final DeviceInfo deviceInfo) {
275         if (isStatisticsPollingOff) {
276             LOG.info("Statistics are shutdown for device: {}", deviceInfo.getNodeId());
277             return;
278         }
279
280         final StatisticsContext statisticsContext = contexts.get(deviceInfo);
281
282         if (statisticsContext == null) {
283             LOG.warn("Statistics context not found for device: {}", deviceInfo.getNodeId());
284             return;
285         }
286
287         if (statisticsContext.isSchedulingEnabled()) {
288             LOG.debug("Statistics scheduling is already enabled for device: {}", deviceInfo.getNodeId());
289             return;
290         }
291
292         LOG.info("Scheduling statistics poll for device: {}", deviceInfo.getNodeId());
293
294         statisticsContext.setSchedulingEnabled(true);
295         scheduleNextPolling(
296                 statisticsContext.gainDeviceState(),
297                 deviceInfo,
298                 statisticsContext,
299                 new TimeCounter()
300         );
301     }
302
303     @Override
304     public void stopScheduling(final DeviceInfo deviceInfo) {
305         if (LOG.isDebugEnabled()) {
306             LOG.debug("Stopping statistics scheduling for device: {}", deviceInfo.getNodeId());
307         }
308         final StatisticsContext statisticsContext = contexts.get(deviceInfo);
309
310         if (statisticsContext == null) {
311             LOG.warn("Statistics context not found for device: {}", deviceInfo.getNodeId());
312             return;
313         }
314         statisticsContext.setSchedulingEnabled(false);
315     }
316
317     @Override
318     public void close() {
319         if (controlServiceRegistration != null) {
320             controlServiceRegistration.close();
321             controlServiceRegistration = null;
322         }
323         for (final Iterator<StatisticsContext> iterator = Iterators.consumingIterator(contexts.values().iterator());
324                 iterator.hasNext();) {
325             iterator.next().close();
326         }
327     }
328
329     @Override
330     public void setDeviceTerminationPhaseHandler(final DeviceTerminationPhaseHandler handler) {
331         this.deviceTerminPhaseHandler = handler;
332     }
333
334     @Override
335     public void setIsStatisticsPollingOff(boolean isStatisticsPollingOff){
336         this.isStatisticsPollingOff = isStatisticsPollingOff;
337     }
338
339 }