Bug 7011 - Race condition in statistics collection related transaction chain handling
[openflowplugin.git] / openflowplugin-impl / src / main / java / org / opendaylight / openflowplugin / impl / statistics / StatisticsManagerImpl.java
index 07e4b6df929c12f80a0b3e32fb8e780930cc75f6..f0bcbdcce43c73509569ac126d932591d1be5ea0 100644 (file)
@@ -8,51 +8,43 @@
 
 package org.opendaylight.openflowplugin.impl.statistics;
 
-import javax.annotation.CheckForNull;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Future;
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
-
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Verify;
 import com.google.common.collect.Iterators;
 import com.google.common.util.concurrent.FutureCallback;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.ListenableFuture;
+import io.netty.util.HashedWheelTimer;
 import io.netty.util.Timeout;
 import io.netty.util.TimerTask;
 import java.util.Iterator;
+import java.util.Map;
+import java.util.Optional;
 import java.util.concurrent.CancellationException;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.Future;
 import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
-import javax.annotation.CheckForNull;
 import javax.annotation.Nonnull;
-
 import org.opendaylight.controller.sal.binding.api.BindingAwareBroker;
 import org.opendaylight.controller.sal.binding.api.RpcProviderRegistry;
 import org.opendaylight.openflowplugin.api.openflow.device.DeviceContext;
+import org.opendaylight.openflowplugin.api.openflow.device.DeviceInfo;
+import org.opendaylight.openflowplugin.api.openflow.device.DeviceState;
 import org.opendaylight.openflowplugin.api.openflow.device.handlers.DeviceInitializationPhaseHandler;
 import org.opendaylight.openflowplugin.api.openflow.device.handlers.DeviceTerminationPhaseHandler;
-import org.opendaylight.openflowplugin.api.openflow.lifecycle.LifecycleConductor;
+import org.opendaylight.openflowplugin.api.openflow.lifecycle.LifecycleService;
 import org.opendaylight.openflowplugin.api.openflow.rpc.ItemLifeCycleSource;
 import org.opendaylight.openflowplugin.api.openflow.statistics.StatisticsContext;
 import org.opendaylight.openflowplugin.api.openflow.statistics.StatisticsManager;
-import org.opendaylight.yang.gen.v1.urn.opendaylight.inventory.rev130819.NodeId;
+import org.opendaylight.openflowplugin.openflow.md.core.sal.convertor.ConvertorExecutor;
 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.ChangeStatisticsWorkModeInput;
 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.GetStatisticsWorkModeOutput;
 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.GetStatisticsWorkModeOutputBuilder;
 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.StatisticsManagerControlService;
 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.openflowplugin.sm.control.rev150812.StatisticsWorkMode;
-import org.opendaylight.yang.gen.v1.urn.opendaylight.role.service.rev150727.OfpRole;
 import org.opendaylight.yangtools.yang.common.RpcError;
 import org.opendaylight.yangtools.yang.common.RpcResult;
 import org.opendaylight.yangtools.yang.common.RpcResultBuilder;
@@ -64,77 +56,77 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
     private static final Logger LOG = LoggerFactory.getLogger(StatisticsManagerImpl.class);
 
     private static final long DEFAULT_STATS_TIMEOUT_SEC = 50L;
+    private final ConvertorExecutor converterExecutor;
 
     private DeviceInitializationPhaseHandler deviceInitPhaseHandler;
     private DeviceTerminationPhaseHandler deviceTerminPhaseHandler;
 
-    private final ConcurrentMap<NodeId, StatisticsContext> contexts = new ConcurrentHashMap<>();
+    private final ConcurrentMap<DeviceInfo, StatisticsContext> contexts = new ConcurrentHashMap<>();
 
     private static final long basicTimerDelay = 3000;
     private static long currentTimerDelay = basicTimerDelay;
-    private static long maximumTimerDelay = 900000; //wait max 15 minutes for next statistics
+    private static final long maximumTimerDelay = 900000; //wait max 15 minutes for next statistics
 
     private StatisticsWorkMode workMode = StatisticsWorkMode.COLLECTALL;
     private final Semaphore workModeGuard = new Semaphore(1, true);
-    private boolean shuttingDownStatisticsPolling;
+    private boolean isStatisticsPollingOff;
     private BindingAwareBroker.RpcRegistration<StatisticsManagerControlService> controlServiceRegistration;
 
-    private final LifecycleConductor conductor;
+    private final HashedWheelTimer hashedWheelTimer;
 
     @Override
     public void setDeviceInitializationPhaseHandler(final DeviceInitializationPhaseHandler handler) {
         deviceInitPhaseHandler = handler;
     }
 
-    public StatisticsManagerImpl(@CheckForNull final RpcProviderRegistry rpcProviderRegistry,
-                                 final boolean shuttingDownStatisticsPolling,
-                                 final LifecycleConductor lifecycleConductor) {
+    public StatisticsManagerImpl(final RpcProviderRegistry rpcProviderRegistry,
+                                 final boolean isStatisticsPollingOff,
+                                 final HashedWheelTimer hashedWheelTimer,
+                                 final ConvertorExecutor convertorExecutor) {
         Preconditions.checkArgument(rpcProviderRegistry != null);
-        this.controlServiceRegistration = Preconditions.checkNotNull(rpcProviderRegistry.addRpcImplementation(
-                StatisticsManagerControlService.class, this));
-        this.shuttingDownStatisticsPolling = shuttingDownStatisticsPolling;
-        this.conductor = lifecycleConductor;
+           this.converterExecutor = convertorExecutor;
+        this.controlServiceRegistration = Preconditions.checkNotNull(
+                rpcProviderRegistry.addRpcImplementation(StatisticsManagerControlService.class, this)
+        );
+        this.isStatisticsPollingOff = isStatisticsPollingOff;
+        this.hashedWheelTimer = hashedWheelTimer;
     }
 
     @Override
-    public void onDeviceContextLevelUp(final NodeId nodeId) throws Exception {
-
-        final DeviceContext deviceContext = Preconditions.checkNotNull(conductor.getDeviceContext(nodeId));
-
-        final StatisticsContext statisticsContext = new StatisticsContextImpl(nodeId, shuttingDownStatisticsPolling, conductor);
-        Verify.verify(contexts.putIfAbsent(nodeId, statisticsContext) == null, "StatisticsCtx still not closed for Node {}", nodeId);
-
-        if (shuttingDownStatisticsPolling) {
-            LOG.info("Statistics is shutdown for node:{}", nodeId);
-        } else {
-            LOG.info("Schedule Statistics poll for node:{}", nodeId);
-            scheduleNextPolling(deviceContext, statisticsContext, new TimeCounter());
-        }
-
-        deviceContext.getDeviceState().setDeviceSynchronized(true);
-        deviceInitPhaseHandler.onDeviceContextLevelUp(nodeId);
+    public void onDeviceContextLevelUp(final DeviceInfo deviceInfo,
+                                       final LifecycleService lifecycleService) throws Exception {
+
+        final StatisticsContext statisticsContext =
+                new StatisticsContextImpl(
+                        deviceInfo,
+                        isStatisticsPollingOff,
+                        lifecycleService,
+                        converterExecutor,
+                        this);
+        Verify.verify(
+                contexts.putIfAbsent(deviceInfo, statisticsContext) == null,
+                "StatisticsCtx still not closed for Node {}", deviceInfo.getLOGValue()
+        );
+        lifecycleService.setStatContext(statisticsContext);
+        deviceInitPhaseHandler.onDeviceContextLevelUp(deviceInfo, lifecycleService);
     }
 
-    private void pollStatistics(final DeviceContext deviceContext,
-                                final StatisticsContext statisticsContext,
-                                final TimeCounter timeCounter) {
-        
-        if (!deviceContext.getDeviceState().isValid()) {
-            LOG.debug("Session for device {} is not valid.", deviceContext.getDeviceState().getNodeId().getValue());
-            return;
-        }
-        if (!deviceContext.getDeviceState().isStatisticsPollingEnabled()) {
-            LOG.debug("StatisticsPolling is disabled for device: {} , try later", deviceContext.getDeviceState().getNodeId());
-            scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
+    @VisibleForTesting
+    void pollStatistics(final DeviceState deviceState,
+                        final StatisticsContext statisticsContext,
+                        final TimeCounter timeCounter,
+                        final DeviceInfo deviceInfo) {
+
+        if (!statisticsContext.isSchedulingEnabled()) {
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("Disabled statistics scheduling for device: {}", deviceInfo.getNodeId().getValue());
+            }
             return;
         }
 
-        if (!OfpRole.BECOMEMASTER.equals(deviceContext.getDeviceState().getRole())) {
-            LOG.debug("Role is not Master so we don't want to poll any stat for device: {}", deviceContext.getDeviceState().getNodeId());
-            scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
-            return;
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("POLLING ALL STATISTICS for device: {}", deviceInfo.getNodeId());
         }
-        LOG.debug("POLLING ALL STATS for device: {}", deviceContext.getDeviceState().getNodeId().getValue());
         timeCounter.markStart();
         final ListenableFuture<Boolean> deviceStatisticsCollectionFuture = statisticsContext.gatherDynamicData();
         Futures.addCallback(deviceStatisticsCollectionFuture, new FutureCallback<Boolean>() {
@@ -142,51 +134,54 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
             public void onSuccess(final Boolean o) {
                 timeCounter.addTimeMark();
                 calculateTimerDelay(timeCounter);
-                scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
+                scheduleNextPolling(deviceState, deviceInfo, statisticsContext, timeCounter);
             }
 
             @Override
             public void onFailure(@Nonnull final Throwable throwable) {
                 timeCounter.addTimeMark();
-                LOG.warn("Statistics gathering for single node was not successful: {}", throwable.getMessage());
-                LOG.trace("Statistics gathering for single node was not successful.. ", throwable);
+                LOG.warn("Statistics gathering for single node {} was not successful: {}", deviceInfo.getLOGValue(),
+                        throwable.getMessage());
+                if (LOG.isTraceEnabled()) {
+                    LOG.trace("Gathering for node {} failure: ", deviceInfo.getLOGValue(), throwable);
+                }
                 calculateTimerDelay(timeCounter);
-                if (throwable instanceof CancellationException) {
-                    /** This often happens when something wrong with akka or DS, so closing connection will help to restart device **/
-                    conductor.closeConnection(deviceContext.getDeviceState().getNodeId());
+                if (throwable instanceof IllegalStateException) {
+                    stopScheduling(deviceInfo);
                 } else {
-                    scheduleNextPolling(deviceContext, statisticsContext, timeCounter);
+                    scheduleNextPolling(deviceState, deviceInfo, statisticsContext, timeCounter);
                 }
             }
         });
 
         final long averageTime = TimeUnit.MILLISECONDS.toSeconds(timeCounter.getAverageTimeBetweenMarks());
-        final long STATS_TIMEOUT_SEC = averageTime > 0 ? 3 * averageTime : DEFAULT_STATS_TIMEOUT_SEC;
-        final TimerTask timerTask = new TimerTask() {
-
-            @Override
-            public void run(final Timeout timeout) throws Exception {
-                if (!deviceStatisticsCollectionFuture.isDone()) {
-                    LOG.info("Statistics collection for node {} still in progress even after {} secs", deviceContext
-                            .getDeviceState().getNodeId(), STATS_TIMEOUT_SEC);
-                    deviceStatisticsCollectionFuture.cancel(true);
-                }
+        final long statsTimeoutSec = averageTime > 0 ? 3 * averageTime : DEFAULT_STATS_TIMEOUT_SEC;
+        final TimerTask timerTask = timeout -> {
+            if (!deviceStatisticsCollectionFuture.isDone()) {
+                LOG.info("Statistics collection for node {} still in progress even after {} secs", deviceInfo.getLOGValue(), statsTimeoutSec);
+                deviceStatisticsCollectionFuture.cancel(true);
             }
         };
-        conductor.newTimeout(timerTask, STATS_TIMEOUT_SEC, TimeUnit.SECONDS);
+
+        hashedWheelTimer.newTimeout(timerTask, statsTimeoutSec, TimeUnit.SECONDS);
     }
 
-    private void scheduleNextPolling(final DeviceContext deviceContext,
+    private void scheduleNextPolling(final DeviceState deviceState,
+                                     final DeviceInfo deviceInfo,
                                      final StatisticsContext statisticsContext,
                                      final TimeCounter timeCounter) {
-        LOG.debug("SCHEDULING NEXT STATS POLLING for device: {}", deviceContext.getDeviceState().getNodeId().getValue());
-        if (!shuttingDownStatisticsPolling) {
-            final Timeout pollTimeout = conductor.newTimeout(new TimerTask() {
-                @Override
-                public void run(final Timeout timeout) throws Exception {
-                    pollStatistics(deviceContext, statisticsContext, timeCounter);
-                }
-            }, currentTimerDelay, TimeUnit.MILLISECONDS);
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("SCHEDULING NEXT STATISTICS POLLING for device: {}", deviceInfo.getNodeId());
+        }
+        if (!isStatisticsPollingOff) {
+            final Timeout pollTimeout = hashedWheelTimer.newTimeout(
+                    timeout -> pollStatistics(
+                            deviceState,
+                            statisticsContext,
+                            timeCounter,
+                            deviceInfo),
+                    currentTimerDelay,
+                    TimeUnit.MILLISECONDS);
             statisticsContext.setPollTimeout(pollTimeout);
         }
     }
@@ -214,13 +209,13 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
     }
 
     @Override
-    public void onDeviceContextLevelDown(final DeviceContext deviceContext) {
-        final StatisticsContext statisticsContext = contexts.remove(deviceContext.getDeviceState().getNodeId());
+    public void onDeviceContextLevelDown(final DeviceInfo deviceInfo) {
+        final StatisticsContext statisticsContext = contexts.remove(deviceInfo);
         if (null != statisticsContext) {
-            LOG.trace("Removing device context from stack. No more statistics gathering for node {}", deviceContext.getDeviceState().getNodeId());
+            LOG.debug("Removing device context from stack. No more statistics gathering for device: {}", deviceInfo.getLOGValue());
             statisticsContext.close();
         }
-        deviceTerminPhaseHandler.onDeviceContextLevelDown(deviceContext);
+        deviceTerminPhaseHandler.onDeviceContextLevelDown(deviceInfo);
     }
 
     @Override
@@ -237,13 +232,15 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
         if (workModeGuard.tryAcquire()) {
             final StatisticsWorkMode targetWorkMode = input.getMode();
             if (!workMode.equals(targetWorkMode)) {
-                shuttingDownStatisticsPolling = StatisticsWorkMode.FULLYDISABLED.equals(targetWorkMode);
+                isStatisticsPollingOff = StatisticsWorkMode.FULLYDISABLED.equals(targetWorkMode);
                 // iterate through stats-ctx: propagate mode
-                for (final StatisticsContext statisticsContext : contexts.values()) {
-                    final DeviceContext deviceContext = statisticsContext.getDeviceContext();
+                for (Map.Entry<DeviceInfo, StatisticsContext> entry : contexts.entrySet()) {
+                    final DeviceInfo deviceInfo = entry.getKey();
+                    final StatisticsContext statisticsContext = entry.getValue();
+                    final DeviceContext deviceContext = statisticsContext.gainDeviceContext();
                     switch (targetWorkMode) {
                         case COLLECTALL:
-                            scheduleNextPolling(deviceContext, statisticsContext, new TimeCounter());
+                            scheduleNextPolling(statisticsContext.gainDeviceState(), deviceInfo, statisticsContext, new TimeCounter());
                             for (final ItemLifeCycleSource lifeCycleSource : deviceContext.getItemLifeCycleSourceRegistry().getLifeCycleSources()) {
                                 lifeCycleSource.setItemLifecycleListener(null);
                             }
@@ -258,7 +255,7 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
                             }
                             break;
                         default:
-                            LOG.warn("statistics work mode not supported: {}", targetWorkMode);
+                            LOG.warn("Statistics work mode not supported: {}", targetWorkMode);
                     }
                 }
                 workMode = targetWorkMode;
@@ -273,6 +270,50 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
         return result;
     }
 
+    @Override
+    public void startScheduling(final DeviceInfo deviceInfo) {
+        if (isStatisticsPollingOff) {
+            LOG.info("Statistics are shutdown for device: {}", deviceInfo.getNodeId());
+            return;
+        }
+
+        final StatisticsContext statisticsContext = contexts.get(deviceInfo);
+
+        if (statisticsContext == null) {
+            LOG.warn("Statistics context not found for device: {}", deviceInfo.getNodeId());
+            return;
+        }
+
+        if (statisticsContext.isSchedulingEnabled()) {
+            LOG.debug("Statistics scheduling is already enabled for device: {}", deviceInfo.getNodeId());
+            return;
+        }
+
+        LOG.info("Scheduling statistics poll for device: {}", deviceInfo.getNodeId());
+
+        statisticsContext.setSchedulingEnabled(true);
+        scheduleNextPolling(
+                statisticsContext.gainDeviceState(),
+                deviceInfo,
+                statisticsContext,
+                new TimeCounter()
+        );
+    }
+
+    @Override
+    public void stopScheduling(final DeviceInfo deviceInfo) {
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("Stopping statistics scheduling for device: {}", deviceInfo.getNodeId());
+        }
+        final StatisticsContext statisticsContext = contexts.get(deviceInfo);
+
+        if (statisticsContext == null) {
+            LOG.warn("Statistics context not found for device: {}", deviceInfo.getNodeId());
+            return;
+        }
+        statisticsContext.setSchedulingEnabled(false);
+    }
+
     @Override
     public void close() {
         if (controlServiceRegistration != null) {
@@ -289,4 +330,10 @@ public class StatisticsManagerImpl implements StatisticsManager, StatisticsManag
     public void setDeviceTerminationPhaseHandler(final DeviceTerminationPhaseHandler handler) {
         this.deviceTerminPhaseHandler = handler;
     }
+
+    @Override
+    public void setIsStatisticsPollingOff(boolean isStatisticsPollingOff){
+        this.isStatisticsPollingOff = isStatisticsPollingOff;
+    }
+
 }