Bug-4957: Double candidate, clean up
[openflowplugin.git] / openflowplugin-impl / src / main / java / org / opendaylight / openflowplugin / impl / statistics / StatisticsManagerImpl.java
1 /*
2  * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8
9 package org.opendaylight.openflowplugin.impl.statistics;
10
11 import java.util.Map;
12 import java.util.concurrent.ConcurrentHashMap;
13 import java.util.concurrent.ExecutionException;
14 import java.util.concurrent.Future;
15 import java.util.concurrent.Semaphore;
16 import java.util.concurrent.TimeUnit;
17 import java.util.concurrent.TimeoutException;
18
19 import com.google.common.annotations.VisibleForTesting;
20 import com.google.common.base.Optional;
21 import com.google.common.util.concurrent.FutureCallback;
22 import com.google.common.util.concurrent.Futures;
23 import com.google.common.util.concurrent.ListenableFuture;
24 import io.netty.util.HashedWheelTimer;
25 import io.netty.util.Timeout;
26 import io.netty.util.TimerTask;
27 import org.opendaylight.controller.sal.binding.api.BindingAwareBroker;
28 import org.opendaylight.controller.sal.binding.api.RpcProviderRegistry;
29 import org.opendaylight.openflowplugin.api.openflow.device.DeviceContext;
30 import org.opendaylight.openflowplugin.api.openflow.device.handlers.DeviceInitializationPhaseHandler;
31 import org.opendaylight.openflowplugin.api.openflow.rpc.ItemLifeCycleSource;
32 import org.opendaylight.openflowplugin.api.openflow.statistics.StatisticsContext;
33 import org.opendaylight.openflowplugin.api.openflow.statistics.StatisticsManager;
34 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.ChangeStatisticsWorkModeInput;
35 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.GetStatisticsWorkModeOutput;
36 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.GetStatisticsWorkModeOutputBuilder;
37 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.StatisticsManagerControlService;
38 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.StatisticsWorkMode;
39 import org.opendaylight.yang.gen.v1.urn.opendaylight.role.service.rev150727.OfpRole;
40 import org.opendaylight.yangtools.yang.common.RpcError;
41 import org.opendaylight.yangtools.yang.common.RpcResult;
42 import org.opendaylight.yangtools.yang.common.RpcResultBuilder;
43 import org.slf4j.Logger;
44 import org.slf4j.LoggerFactory;
45
46 /**
47  * Created by Martin Bobak <mbobak@cisco.com> on 1.4.2015.
48  */
49 public class StatisticsManagerImpl implements StatisticsManager, StatisticsManagerControlService {
50
51     private static final Logger LOG = LoggerFactory.getLogger(StatisticsManagerImpl.class);
52     private final RpcProviderRegistry rpcProviderRegistry;
53
54     private DeviceInitializationPhaseHandler deviceInitPhaseHandler;
55
56     private HashedWheelTimer hashedWheelTimer;
57
58     private final ConcurrentHashMap<DeviceContext, StatisticsContext> contexts = new ConcurrentHashMap<>();
59
60     private static final long basicTimerDelay = 3000;
61     private static long currentTimerDelay = basicTimerDelay;
62     private static long maximumTimerDelay = 900000; //wait max 15 minutes for next statistics
63
64     private StatisticsWorkMode workMode = StatisticsWorkMode.COLLECTALL;
65     private Semaphore workModeGuard = new Semaphore(1, true);
66     private boolean shuttingDownStatisticsPolling;
67     private BindingAwareBroker.RpcRegistration<StatisticsManagerControlService> controlServiceRegistration;
68
69     @Override
70     public void setDeviceInitializationPhaseHandler(final DeviceInitializationPhaseHandler handler) {
71         deviceInitPhaseHandler = handler;
72     }
73
74     public StatisticsManagerImpl(RpcProviderRegistry rpcProviderRegistry) {
75         this.rpcProviderRegistry = rpcProviderRegistry;
76         controlServiceRegistration = rpcProviderRegistry.addRpcImplementation(StatisticsManagerControlService.class, this);
77     }
78
79     public StatisticsManagerImpl(RpcProviderRegistry rpcProviderRegistry, final boolean shuttingDownStatisticsPolling) {
80         this(rpcProviderRegistry);
81         this.shuttingDownStatisticsPolling = shuttingDownStatisticsPolling;
82     }
83
84     @Override
85     public void onDeviceContextLevelUp(final DeviceContext deviceContext) throws Exception {
86         LOG.debug("Node:{}, deviceContext.getDeviceState().getRole():{}", deviceContext.getDeviceState().getNodeId(),
87                 deviceContext.getDeviceState().getRole());
88         if (null == hashedWheelTimer) {
89             LOG.trace("This is first device that delivered timer. Starting statistics polling immediately.");
90             hashedWheelTimer = deviceContext.getTimer();
91         }
92
93         LOG.info("Starting Statistics for master role for node:{}", deviceContext.getDeviceState().getNodeId());
94
95         final StatisticsContext statisticsContext = new StatisticsContextImpl(deviceContext);
96         deviceContext.addDeviceContextClosedHandler(this);
97
98         if (deviceContext.getDeviceState().getRole() == OfpRole.BECOMESLAVE) {
99             // if slave, we dont poll for statistics and jump to rpc initialization
100             LOG.info("Skipping Statistics for slave role for node:{}", deviceContext.getDeviceState().getNodeId());
101             scheduleNextPolling(deviceContext, statisticsContext, new TimeCounter());
102             deviceInitPhaseHandler.onDeviceContextLevelUp(deviceContext);
103             return;
104         }
105
106         final ListenableFuture<Boolean> weHaveDynamicData = statisticsContext.gatherDynamicData();
107         Futures.addCallback(weHaveDynamicData, new FutureCallback<Boolean>() {
108             @Override
109             public void onSuccess(final Boolean statisticsGathered) {
110                 if (statisticsGathered) {
111                     //there are some statistics on device worth gathering
112                     contexts.put(deviceContext, statisticsContext);
113                     final TimeCounter timeCounter = new TimeCounter();
114                     scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
115                     LOG.trace("Device dynamic info collecting done. Going to announce raise to next level.");
116                     try {
117                         deviceInitPhaseHandler.onDeviceContextLevelUp(deviceContext);
118                     } catch (Exception e) {
119                         LOG.info("failed to complete levelUp on next handler for device {}", deviceContext.getDeviceState().getNodeId());
120                         deviceContext.close();
121                         return;
122                     }
123                     deviceContext.getDeviceState().setDeviceSynchronized(true);
124                 } else {
125                     final String deviceAdress = deviceContext.getPrimaryConnectionContext().getConnectionAdapter().getRemoteAddress().toString();
126                     try {
127                         deviceContext.close();
128                     } catch (Exception e) {
129                         LOG.info("Statistics for device {} could not be gathered. Closing its device context.", deviceAdress);
130                     }
131                 }
132             }
133
134             @Override
135             public void onFailure(final Throwable throwable) {
136                 LOG.warn("Statistics manager was not able to collect dynamic info for device.", deviceContext.getDeviceState().getNodeId(), throwable);
137                 try {
138                     deviceContext.close();
139                 } catch (Exception e) {
140                     LOG.warn("Error closing device context.", e);
141                 }
142             }
143         });
144     }
145
146     private void pollStatistics(final DeviceContext deviceContext,
147                                 final StatisticsContext statisticsContext,
148                                 final TimeCounter timeCounter) {
149         
150         if (!deviceContext.getDeviceState().isValid()) {
151             LOG.debug("Session for device {} is not valid.", deviceContext.getDeviceState().getNodeId().getValue());
152             return;
153         }
154         LOG.debug("POLLING ALL STATS for device: {}", deviceContext.getDeviceState().getNodeId().getValue());
155         if (OfpRole.BECOMESLAVE.equals(deviceContext.getDeviceState().getRole())) {
156             LOG.debug("Role is SLAVE so we don't want to poll any stat for device: {}", deviceContext.getDeviceState().getNodeId());
157             scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
158             return;
159         }
160         timeCounter.markStart();
161         ListenableFuture<Boolean> deviceStatisticsCollectionFuture = statisticsContext.gatherDynamicData();
162         Futures.addCallback(deviceStatisticsCollectionFuture, new FutureCallback<Boolean>() {
163             @Override
164             public void onSuccess(final Boolean o) {
165                 timeCounter.addTimeMark();
166                 calculateTimerDelay(timeCounter);
167                 scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
168             }
169
170             @Override
171             public void onFailure(final Throwable throwable) {
172                 timeCounter.addTimeMark();
173                 LOG.info("Statistics gathering for single node was not successful: {}", throwable.getMessage());
174                 LOG.debug("Statistics gathering for single node was not successful.. ", throwable);
175                 calculateTimerDelay(timeCounter);
176                 scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
177             }
178         });
179
180         final long STATS_TIMEOUT_SEC = 20L;
181         try {
182             deviceStatisticsCollectionFuture.get(STATS_TIMEOUT_SEC, TimeUnit.SECONDS);
183         } catch (InterruptedException | ExecutionException e) {
184             LOG.warn("Statistics collection for node {} failed", deviceContext.getDeviceState().getNodeId(), e);
185         } catch (final TimeoutException e) {
186             LOG.info("Statistics collection for node {} still in progress even after {} secs", deviceContext.getDeviceState().getNodeId(), STATS_TIMEOUT_SEC);
187         }
188     }
189
190     private void scheduleNextPolling(final DeviceContext deviceContext,
191                                      final StatisticsContext statisticsContext,
192                                      final TimeCounter timeCounter) {
193         if (null != hashedWheelTimer) {
194             LOG.debug("SCHEDULING NEXT STATS POLLING for device: {}", deviceContext.getDeviceState().getNodeId().getValue());
195             if (!shuttingDownStatisticsPolling) {
196                 Timeout pollTimeout = hashedWheelTimer.newTimeout(new TimerTask() {
197                     @Override
198                     public void run(final Timeout timeout) throws Exception {
199                         pollStatistics(deviceContext, statisticsContext, timeCounter);
200                     }
201                 }, currentTimerDelay, TimeUnit.MILLISECONDS);
202                 statisticsContext.setPollTimeout(pollTimeout);
203             }
204         } else {
205             LOG.debug("#!NOT SCHEDULING NEXT STATS POLLING for device: {}", deviceContext.getDeviceState().getNodeId().getValue());
206         }
207     }
208
209     @VisibleForTesting
210     protected void calculateTimerDelay(final TimeCounter timeCounter) {
211         long averageStatisticsGatheringTime = timeCounter.getAverageTimeBetweenMarks();
212         if (averageStatisticsGatheringTime > currentTimerDelay) {
213             currentTimerDelay *= 2;
214             if (currentTimerDelay > maximumTimerDelay) {
215                 currentTimerDelay = maximumTimerDelay;
216             }
217         } else {
218             if (currentTimerDelay > basicTimerDelay) {
219                 currentTimerDelay /= 2;
220             } else {
221                 currentTimerDelay = basicTimerDelay;
222             }
223         }
224     }
225
226     @VisibleForTesting
227     protected static long getCurrentTimerDelay() {
228         return currentTimerDelay;
229     }
230
231     @Override
232     public void onDeviceContextClosed(final DeviceContext deviceContext) {
233         StatisticsContext statisticsContext = contexts.remove(deviceContext);
234         if (null != statisticsContext) {
235             LOG.trace("Removing device context from stack. No more statistics gathering for node {}", deviceContext.getDeviceState().getNodeId());
236             try {
237                 statisticsContext.close();
238             } catch (Exception e) {
239                 LOG.debug("Error closing statistic context for node {}.", deviceContext.getDeviceState().getNodeId());
240             }
241         }
242     }
243
244     @Override
245     public Future<RpcResult<GetStatisticsWorkModeOutput>> getStatisticsWorkMode() {
246         GetStatisticsWorkModeOutputBuilder smModeOutputBld = new GetStatisticsWorkModeOutputBuilder();
247         smModeOutputBld.setMode(workMode);
248         return RpcResultBuilder.success(smModeOutputBld.build()).buildFuture();
249     }
250
251     @Override
252     public Future<RpcResult<Void>> changeStatisticsWorkMode(ChangeStatisticsWorkModeInput input) {
253         final Future<RpcResult<Void>> result;
254         // acquire exclusive access
255         if (workModeGuard.tryAcquire()) {
256             final StatisticsWorkMode targetWorkMode = input.getMode();
257             if (!workMode.equals(targetWorkMode)) {
258                 shuttingDownStatisticsPolling = StatisticsWorkMode.FULLYDISABLED.equals(targetWorkMode);
259                 // iterate through stats-ctx: propagate mode
260                 for (Map.Entry<DeviceContext, StatisticsContext> contextEntry : contexts.entrySet()) {
261                     final DeviceContext deviceContext = contextEntry.getKey();
262                     final StatisticsContext statisticsContext = contextEntry.getValue();
263                     switch (targetWorkMode) {
264                         case COLLECTALL:
265                             scheduleNextPolling(deviceContext, statisticsContext, new TimeCounter());
266                             for (ItemLifeCycleSource lifeCycleSource : deviceContext.getItemLifeCycleSourceRegistry().getLifeCycleSources()) {
267                                 lifeCycleSource.setItemLifecycleListener(null);
268                             }
269                             break;
270                         case FULLYDISABLED:
271                             final Optional<Timeout> pollTimeout = statisticsContext.getPollTimeout();
272                             if (pollTimeout.isPresent()) {
273                                 pollTimeout.get().cancel();
274                             }
275                             for (ItemLifeCycleSource lifeCycleSource : deviceContext.getItemLifeCycleSourceRegistry().getLifeCycleSources()) {
276                                 lifeCycleSource.setItemLifecycleListener(statisticsContext.getItemLifeCycleListener());
277                             }
278                             break;
279                         default:
280                             LOG.warn("statistics work mode not supported: {}", targetWorkMode);
281                     }
282                 }
283                 workMode = targetWorkMode;
284             }
285             workModeGuard.release();
286             result = RpcResultBuilder.<Void>success().buildFuture();
287         } else {
288             result = RpcResultBuilder.<Void>failed()
289                     .withError(RpcError.ErrorType.APPLICATION, "mode change already in progress")
290                     .buildFuture();
291         }
292         return result;
293     }
294
295     @Override
296     public void close() {
297         if (controlServiceRegistration != null) {
298             controlServiceRegistration.close();
299             controlServiceRegistration = null;
300         }
301     }
302 }