Bug 7011 - Race condition in statistics collection related transaction chain handling
[openflowplugin.git] / openflowplugin-impl / src / main / java / org / opendaylight / openflowplugin / impl / statistics / StatisticsManagerImpl.java
index 992275175eb9cdbb86a5d6dc1e55f46c4925029e..f0bcbdcce43c73509569ac126d932591d1be5ea0 100644 (file)
@@ -8,17 +8,7 @@
 
 package org.opendaylight.openflowplugin.impl.statistics;
 
-import javax.annotation.CheckForNull;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Future;
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
-
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Verify;
 import com.google.common.collect.Iterators;
@@ -30,149 +20,113 @@ import io.netty.util.Timeout;
 import io.netty.util.TimerTask;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Map.Entry;
+import java.util.Optional;
+import java.util.concurrent.CancellationException;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.Future;
 import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
-import javax.annotation.CheckForNull;
+import javax.annotation.Nonnull;
 import org.opendaylight.controller.sal.binding.api.BindingAwareBroker;
 import org.opendaylight.controller.sal.binding.api.RpcProviderRegistry;
 import org.opendaylight.openflowplugin.api.openflow.device.DeviceContext;
+import org.opendaylight.openflowplugin.api.openflow.device.DeviceInfo;
+import org.opendaylight.openflowplugin.api.openflow.device.DeviceState;
 import org.opendaylight.openflowplugin.api.openflow.device.handlers.DeviceInitializationPhaseHandler;
+import org.opendaylight.openflowplugin.api.openflow.device.handlers.DeviceTerminationPhaseHandler;
+import org.opendaylight.openflowplugin.api.openflow.lifecycle.LifecycleService;
 import org.opendaylight.openflowplugin.api.openflow.rpc.ItemLifeCycleSource;
 import org.opendaylight.openflowplugin.api.openflow.statistics.StatisticsContext;
 import org.opendaylight.openflowplugin.api.openflow.statistics.StatisticsManager;
+import org.opendaylight.openflowplugin.openflow.md.core.sal.convertor.ConvertorExecutor;
 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.ChangeStatisticsWorkModeInput;
 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.GetStatisticsWorkModeOutput;
 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.GetStatisticsWorkModeOutputBuilder;
 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.StatisticsManagerControlService;
 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.StatisticsWorkMode;
-import org.opendaylight.yang.gen.v1.urn.opendaylight.role.service.rev150727.OfpRole;
 import org.opendaylight.yangtools.yang.common.RpcError;
 import org.opendaylight.yangtools.yang.common.RpcResult;
 import org.opendaylight.yangtools.yang.common.RpcResultBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/**
- * Created by Martin Bobak <mbobak@cisco.com> on 1.4.2015.
- */
 public class StatisticsManagerImpl implements StatisticsManager, StatisticsManagerControlService {
 
     private static final Logger LOG = LoggerFactory.getLogger(StatisticsManagerImpl.class);
 
     private static final long DEFAULT_STATS_TIMEOUT_SEC = 50L;
+    private final ConvertorExecutor converterExecutor;
 
     private DeviceInitializationPhaseHandler deviceInitPhaseHandler;
+    private DeviceTerminationPhaseHandler deviceTerminPhaseHandler;
 
-    private HashedWheelTimer hashedWheelTimer;
-
-    private final ConcurrentMap<DeviceContext, StatisticsContext> contexts = new ConcurrentHashMap<>();
+    private final ConcurrentMap<DeviceInfo, StatisticsContext> contexts = new ConcurrentHashMap<>();
 
     private static final long basicTimerDelay = 3000;
     private static long currentTimerDelay = basicTimerDelay;
-    private static long maximumTimerDelay = 900000; //wait max 15 minutes for next statistics
+    private static final long maximumTimerDelay = 900000; //wait max 15 minutes for next statistics
 
     private StatisticsWorkMode workMode = StatisticsWorkMode.COLLECTALL;
     private final Semaphore workModeGuard = new Semaphore(1, true);
-    private boolean shuttingDownStatisticsPolling;
+    private boolean isStatisticsPollingOff;
     private BindingAwareBroker.RpcRegistration<StatisticsManagerControlService> controlServiceRegistration;
 
+    private final HashedWheelTimer hashedWheelTimer;
+
     @Override
     public void setDeviceInitializationPhaseHandler(final DeviceInitializationPhaseHandler handler) {
         deviceInitPhaseHandler = handler;
     }
 
-    public StatisticsManagerImpl(@CheckForNull final RpcProviderRegistry rpcProviderRegistry, final boolean shuttingDownStatisticsPolling) {
+    public StatisticsManagerImpl(final RpcProviderRegistry rpcProviderRegistry,
+                                 final boolean isStatisticsPollingOff,
+                                 final HashedWheelTimer hashedWheelTimer,
+                                 final ConvertorExecutor convertorExecutor) {
         Preconditions.checkArgument(rpcProviderRegistry != null);
-        controlServiceRegistration = rpcProviderRegistry.addRpcImplementation(StatisticsManagerControlService.class, this);
-        this.shuttingDownStatisticsPolling = shuttingDownStatisticsPolling;
+           this.converterExecutor = convertorExecutor;
+        this.controlServiceRegistration = Preconditions.checkNotNull(
+                rpcProviderRegistry.addRpcImplementation(StatisticsManagerControlService.class, this)
+        );
+        this.isStatisticsPollingOff = isStatisticsPollingOff;
+        this.hashedWheelTimer = hashedWheelTimer;
     }
 
     @Override
-    public void onDeviceContextLevelUp(final DeviceContext deviceContext) throws Exception {
-        LOG.debug("Node:{}, deviceContext.getDeviceState().getRole():{}", deviceContext.getDeviceState().getNodeId(),
-                deviceContext.getDeviceState().getRole());
-        if (null == hashedWheelTimer) {
-            LOG.trace("This is first device that delivered timer. Starting statistics polling immediately.");
-            hashedWheelTimer = deviceContext.getTimer();
-        }
-        final StatisticsContext statisticsContext = new StatisticsContextImpl(deviceContext, shuttingDownStatisticsPolling);
-
-        Verify.verify(contexts.putIfAbsent(deviceContext, statisticsContext) == null, "StatisticsCtx still not closed for Node {}",deviceContext.getDeviceState().getNodeId());
-        deviceContext.addDeviceContextClosedHandler(this);
+    public void onDeviceContextLevelUp(final DeviceInfo deviceInfo,
+                                       final LifecycleService lifecycleService) throws Exception {
 
-        if (shuttingDownStatisticsPolling) {
-            LOG.info("Statistics is shutdown for node:{}", deviceContext.getDeviceState().getNodeId());
-        } else {
-            LOG.info("Schedule Statistics poll for node:{}", deviceContext.getDeviceState().getNodeId());
-            if (OfpRole.BECOMEMASTER.equals(deviceContext.getDeviceState().getRole())) {
-                initialStatPollForMaster(statisticsContext, deviceContext);
-                /* we want to wait for initial statCollecting response */
-                return;
-            }
-            scheduleNextPolling(deviceContext, statisticsContext, new TimeCounter());
-        }
-        deviceContext.getDeviceState().setDeviceSynchronized(true);
-        deviceInitPhaseHandler.onDeviceContextLevelUp(deviceContext);
+        final StatisticsContext statisticsContext =
+                new StatisticsContextImpl(
+                        deviceInfo,
+                        isStatisticsPollingOff,
+                        lifecycleService,
+                        converterExecutor,
+                        this);
+        Verify.verify(
+                contexts.putIfAbsent(deviceInfo, statisticsContext) == null,
+                "StatisticsCtx still not closed for Node {}", deviceInfo.getLOGValue()
+        );
+        lifecycleService.setStatContext(statisticsContext);
+        deviceInitPhaseHandler.onDeviceContextLevelUp(deviceInfo, lifecycleService);
     }
 
-    private void initialStatPollForMaster(final StatisticsContext statisticsContext, final DeviceContext deviceContext) {
-        final ListenableFuture<Boolean> weHaveDynamicData = statisticsContext.gatherDynamicData();
-        Futures.addCallback(weHaveDynamicData, new FutureCallback<Boolean>() {
-            @Override
-            public void onSuccess(final Boolean statisticsGathered) {
-                if (statisticsGathered) {
-                    //there are some statistics on device worth gathering
-                    final TimeCounter timeCounter = new TimeCounter();
-                    deviceContext.getDeviceState().setStatisticsPollingEnabledProp(true);
-                    scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
-                    LOG.trace("Device dynamic info collecting done. Going to announce raise to next level.");
-                    try {
-                        deviceInitPhaseHandler.onDeviceContextLevelUp(deviceContext);
-                    } catch (final Exception e) {
-                        LOG.info("failed to complete levelUp on next handler for device {}", deviceContext.getDeviceState().getNodeId());
-                        deviceContext.close();
-                        return;
-                    }
-                    deviceContext.getDeviceState().setDeviceSynchronized(true);
-                } else {
-                    final String deviceAddress = deviceContext.getPrimaryConnectionContext().getConnectionAdapter().getRemoteAddress().toString();
-                    LOG.info("Statistics for device {} could not be gathered. Closing its device context.", deviceAddress);
-                    deviceContext.close();
-                }
-            }
+    @VisibleForTesting
+    void pollStatistics(final DeviceState deviceState,
+                        final StatisticsContext statisticsContext,
+                        final TimeCounter timeCounter,
+                        final DeviceInfo deviceInfo) {
 
-            @Override
-            public void onFailure(final Throwable throwable) {
-                LOG.warn("Statistics manager was not able to collect dynamic info for device.", deviceContext.getDeviceState().getNodeId(), throwable);
-                deviceContext.close();
+        if (!statisticsContext.isSchedulingEnabled()) {
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("Disabled statistics scheduling for device: {}", deviceInfo.getNodeId().getValue());
             }
-        });
-    }
-
-    private void pollStatistics(final DeviceContext deviceContext,
-                                final StatisticsContext statisticsContext,
-                                final TimeCounter timeCounter) {
-        
-        if (!deviceContext.getDeviceState().isValid()) {
-            LOG.debug("Session for device {} is not valid.", deviceContext.getDeviceState().getNodeId().getValue());
-            return;
-        }
-        if (!deviceContext.getDeviceState().isStatisticsPollingEnabled()) {
-            LOG.debug("StatisticsPolling is disabled for device: {} , try later", deviceContext.getDeviceState().getNodeId());
-            scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
-            return;
-        }
-        if (OfpRole.BECOMESLAVE.equals(deviceContext.getDeviceState().getRole())) {
-            LOG.debug("Role is SLAVE so we don't want to poll any stat for device: {}", deviceContext.getDeviceState().getNodeId());
-            scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
             return;
         }
 
-        LOG.debug("POLLING ALL STATS for device: {}", deviceContext.getDeviceState().getNodeId().getValue());
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("POLLING ALL STATISTICS for device: {}", deviceInfo.getNodeId());
+        }
         timeCounter.markStart();
         final ListenableFuture<Boolean> deviceStatisticsCollectionFuture = statisticsContext.gatherDynamicData();
         Futures.addCallback(deviceStatisticsCollectionFuture, new FutureCallback<Boolean>() {
@@ -180,56 +134,60 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
             public void onSuccess(final Boolean o) {
                 timeCounter.addTimeMark();
                 calculateTimerDelay(timeCounter);
-                scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
+                scheduleNextPolling(deviceState, deviceInfo, statisticsContext, timeCounter);
             }
 
             @Override
-            public void onFailure(final Throwable throwable) {
+            public void onFailure(@Nonnull final Throwable throwable) {
                 timeCounter.addTimeMark();
-                LOG.info("Statistics gathering for single node was not successful: {}", throwable.getMessage());
-                LOG.debug("Statistics gathering for single node was not successful.. ", throwable);
+                LOG.warn("Statistics gathering for single node {} was not successful: {}", deviceInfo.getLOGValue(),
+                        throwable.getMessage());
+                if (LOG.isTraceEnabled()) {
+                    LOG.trace("Gathering for node {} failure: ", deviceInfo.getLOGValue(), throwable);
+                }
                 calculateTimerDelay(timeCounter);
-                scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
+                if (throwable instanceof IllegalStateException) {
+                    stopScheduling(deviceInfo);
+                } else {
+                    scheduleNextPolling(deviceState, deviceInfo, statisticsContext, timeCounter);
+                }
             }
         });
 
-        final long averangeTime = TimeUnit.MILLISECONDS.toSeconds(timeCounter.getAverageTimeBetweenMarks());
-        final long STATS_TIMEOUT_SEC = averangeTime > 0 ? 3 * averangeTime : DEFAULT_STATS_TIMEOUT_SEC;
-        final TimerTask timerTask = new TimerTask() {
-
-            @Override
-            public void run(final Timeout timeout) throws Exception {
-                if (!deviceStatisticsCollectionFuture.isDone()) {
-                    LOG.info("Statistics collection for node {} still in progress even after {} secs", deviceContext
-                            .getDeviceState().getNodeId(), STATS_TIMEOUT_SEC);
-                    deviceStatisticsCollectionFuture.cancel(true);
-                }
+        final long averageTime = TimeUnit.MILLISECONDS.toSeconds(timeCounter.getAverageTimeBetweenMarks());
+        final long statsTimeoutSec = averageTime > 0 ? 3 * averageTime : DEFAULT_STATS_TIMEOUT_SEC;
+        final TimerTask timerTask = timeout -> {
+            if (!deviceStatisticsCollectionFuture.isDone()) {
+                LOG.info("Statistics collection for node {} still in progress even after {} secs", deviceInfo.getLOGValue(), statsTimeoutSec);
+                deviceStatisticsCollectionFuture.cancel(true);
             }
         };
-        deviceContext.getTimer().newTimeout(timerTask, STATS_TIMEOUT_SEC, TimeUnit.SECONDS);
+
+        hashedWheelTimer.newTimeout(timerTask, statsTimeoutSec, TimeUnit.SECONDS);
     }
 
-    private void scheduleNextPolling(final DeviceContext deviceContext,
+    private void scheduleNextPolling(final DeviceState deviceState,
+                                     final DeviceInfo deviceInfo,
                                      final StatisticsContext statisticsContext,
                                      final TimeCounter timeCounter) {
-        if (null != hashedWheelTimer) {
-            LOG.debug("SCHEDULING NEXT STATS POLLING for device: {}", deviceContext.getDeviceState().getNodeId().getValue());
-            if (!shuttingDownStatisticsPolling) {
-                final Timeout pollTimeout = hashedWheelTimer.newTimeout(new TimerTask() {
-                    @Override
-                    public void run(final Timeout timeout) throws Exception {
-                        pollStatistics(deviceContext, statisticsContext, timeCounter);
-                    }
-                }, currentTimerDelay, TimeUnit.MILLISECONDS);
-                statisticsContext.setPollTimeout(pollTimeout);
-            }
-        } else {
-            LOG.debug("#!NOT SCHEDULING NEXT STATS POLLING for device: {}", deviceContext.getDeviceState().getNodeId().getValue());
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("SCHEDULING NEXT STATISTICS POLLING for device: {}", deviceInfo.getNodeId());
+        }
+        if (!isStatisticsPollingOff) {
+            final Timeout pollTimeout = hashedWheelTimer.newTimeout(
+                    timeout -> pollStatistics(
+                            deviceState,
+                            statisticsContext,
+                            timeCounter,
+                            deviceInfo),
+                    currentTimerDelay,
+                    TimeUnit.MILLISECONDS);
+            statisticsContext.setPollTimeout(pollTimeout);
         }
     }
 
     @VisibleForTesting
-    protected void calculateTimerDelay(final TimeCounter timeCounter) {
+    void calculateTimerDelay(final TimeCounter timeCounter) {
         final long averageStatisticsGatheringTime = timeCounter.getAverageTimeBetweenMarks();
         if (averageStatisticsGatheringTime > currentTimerDelay) {
             currentTimerDelay *= 2;
@@ -246,17 +204,18 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
     }
 
     @VisibleForTesting
-    protected static long getCurrentTimerDelay() {
+    static long getCurrentTimerDelay() {
         return currentTimerDelay;
     }
 
     @Override
-    public void onDeviceContextClosed(final DeviceContext deviceContext) {
-        final StatisticsContext statisticsContext = contexts.remove(deviceContext);
+    public void onDeviceContextLevelDown(final DeviceInfo deviceInfo) {
+        final StatisticsContext statisticsContext = contexts.remove(deviceInfo);
         if (null != statisticsContext) {
-            LOG.trace("Removing device context from stack. No more statistics gathering for node {}", deviceContext.getDeviceState().getNodeId());
+            LOG.debug("Removing device context from stack. No more statistics gathering for device: {}", deviceInfo.getLOGValue());
             statisticsContext.close();
         }
+        deviceTerminPhaseHandler.onDeviceContextLevelDown(deviceInfo);
     }
 
     @Override
@@ -273,14 +232,15 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
         if (workModeGuard.tryAcquire()) {
             final StatisticsWorkMode targetWorkMode = input.getMode();
             if (!workMode.equals(targetWorkMode)) {
-                shuttingDownStatisticsPolling = StatisticsWorkMode.FULLYDISABLED.equals(targetWorkMode);
+                isStatisticsPollingOff = StatisticsWorkMode.FULLYDISABLED.equals(targetWorkMode);
                 // iterate through stats-ctx: propagate mode
-                for (final Map.Entry<DeviceContext, StatisticsContext> contextEntry : contexts.entrySet()) {
-                    final DeviceContext deviceContext = contextEntry.getKey();
-                    final StatisticsContext statisticsContext = contextEntry.getValue();
+                for (Map.Entry<DeviceInfo, StatisticsContext> entry : contexts.entrySet()) {
+                    final DeviceInfo deviceInfo = entry.getKey();
+                    final StatisticsContext statisticsContext = entry.getValue();
+                    final DeviceContext deviceContext = statisticsContext.gainDeviceContext();
                     switch (targetWorkMode) {
                         case COLLECTALL:
-                            scheduleNextPolling(deviceContext, statisticsContext, new TimeCounter());
+                            scheduleNextPolling(statisticsContext.gainDeviceState(), deviceInfo, statisticsContext, new TimeCounter());
                             for (final ItemLifeCycleSource lifeCycleSource : deviceContext.getItemLifeCycleSourceRegistry().getLifeCycleSources()) {
                                 lifeCycleSource.setItemLifecycleListener(null);
                             }
@@ -295,7 +255,7 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
                             }
                             break;
                         default:
-                            LOG.warn("statistics work mode not supported: {}", targetWorkMode);
+                            LOG.warn("Statistics work mode not supported: {}", targetWorkMode);
                     }
                 }
                 workMode = targetWorkMode;
@@ -310,15 +270,70 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
         return result;
     }
 
+    @Override
+    public void startScheduling(final DeviceInfo deviceInfo) {
+        if (isStatisticsPollingOff) {
+            LOG.info("Statistics are shutdown for device: {}", deviceInfo.getNodeId());
+            return;
+        }
+
+        final StatisticsContext statisticsContext = contexts.get(deviceInfo);
+
+        if (statisticsContext == null) {
+            LOG.warn("Statistics context not found for device: {}", deviceInfo.getNodeId());
+            return;
+        }
+
+        if (statisticsContext.isSchedulingEnabled()) {
+            LOG.debug("Statistics scheduling is already enabled for device: {}", deviceInfo.getNodeId());
+            return;
+        }
+
+        LOG.info("Scheduling statistics poll for device: {}", deviceInfo.getNodeId());
+
+        statisticsContext.setSchedulingEnabled(true);
+        scheduleNextPolling(
+                statisticsContext.gainDeviceState(),
+                deviceInfo,
+                statisticsContext,
+                new TimeCounter()
+        );
+    }
+
+    @Override
+    public void stopScheduling(final DeviceInfo deviceInfo) {
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("Stopping statistics scheduling for device: {}", deviceInfo.getNodeId());
+        }
+        final StatisticsContext statisticsContext = contexts.get(deviceInfo);
+
+        if (statisticsContext == null) {
+            LOG.warn("Statistics context not found for device: {}", deviceInfo.getNodeId());
+            return;
+        }
+        statisticsContext.setSchedulingEnabled(false);
+    }
+
     @Override
     public void close() {
         if (controlServiceRegistration != null) {
             controlServiceRegistration.close();
             controlServiceRegistration = null;
         }
-        for (final Iterator<Entry<DeviceContext, StatisticsContext>> iterator = Iterators
-                .consumingIterator(contexts.entrySet().iterator()); iterator.hasNext();) {
-            iterator.next().getValue().close();
+        for (final Iterator<StatisticsContext> iterator = Iterators.consumingIterator(contexts.values().iterator());
+                iterator.hasNext();) {
+            iterator.next().close();
         }
     }
+
+    @Override
+    public void setDeviceTerminationPhaseHandler(final DeviceTerminationPhaseHandler handler) {
+        this.deviceTerminPhaseHandler = handler;
+    }
+
+    @Override
+    public void setIsStatisticsPollingOff(boolean isStatisticsPollingOff){
+        this.isStatisticsPollingOff = isStatisticsPollingOff;
+    }
+
 }