Fix issues during connection flap with scaled pcc sessions. 39/92039/5
authorDeepthi V V <dvv@luminanetworks.com>
Tue, 11 Aug 2020 12:31:55 +0000 (18:01 +0530)
committerOlivier Dugeon <olivier.dugeon@orange.com>
Fri, 30 Apr 2021 10:48:49 +0000 (10:48 +0000)
Back to back pcep sessions results in exceptions and sessions cannot
be established further.

1. Close tx chain when Topology Node State is released
2. Deadlock occurs in TopologyStatsProviderImpl.
T1: Unbind call creates a tx using tx chain and commits it. This
triggers onTxChainFailed call. Lock acquired order:
TopologyStatsProviderImpl -> PingPongTransactionChain
T2: 2nd Unbind call locks on TopologyStatsProviderImpl and tries to
create tx for which lock on PingPongTransactionChain is required.
Thus resulting in deadlock.

Fix: init, closed and recreateTxChain will synhronise on TopologyStatsProviderImpl.
All other tx will be synchronised on txChain.

JIRA: BGPCEP-920
Change-Id: I1f727121843b45a5e6fc722a94319538c7522deb
Signed-off-by: Deepthi V V <dvv@luminanetworks.com>
pcep/topology/topology-provider/src/main/java/org/opendaylight/bgpcep/pcep/topology/provider/AbstractTopologySessionListener.java
pcep/topology/topology-provider/src/main/java/org/opendaylight/bgpcep/pcep/topology/provider/ServerSessionManager.java
pcep/topology/topology-provider/src/main/java/org/opendaylight/bgpcep/pcep/topology/provider/TopologyNodeState.java
pcep/topology/topology-provider/src/main/java/org/opendaylight/bgpcep/pcep/topology/provider/session/stats/SessionStateImpl.java
pcep/topology/topology-stats/src/main/java/org/opendaylight/bgpcep/pcep/topology/stats/provider/TopologyStatsProviderImpl.java

index a85730af250a2b05b5eabbd4779c02d7bd420e76..706b34f958dc91cd68374eca7b98264830333428 100644 (file)
@@ -343,6 +343,7 @@ public abstract class AbstractTopologySessionListener<S, L> implements TopologyS
     private void clearNodeState() {
         if (this.nodeState != null) {
             this.serverSessionManager.unbind(this.nodeState.getNodeId());
+            LOG.info("Clear Node state: {}", this.nodeState.getNodeId().getKey().getNodeId().getValue());
             this.nodeState = null;
         }
     }
index eaa343a295560f4dece1d21127cdea09212ad5ad..58b1eb6d7366d3241e72bf27ea731fd13713de61 100644 (file)
@@ -127,6 +127,7 @@ final class ServerSessionManager implements PCEPSessionListenerFactory, Topology
             return;
         }
         this.nodes.remove(createNodeId(session.getRemoteAddress()));
+        this.state.remove(createNodeId(session.getRemoteAddress()));
         if (nodeState != null) {
             LOG.debug("Node {} unbound", nodeState.getNodeId());
             nodeState.released(persistNode);
index 63abe010df709615013064193ed4e86f3b05483e..3dd2ccc1b0f6a4f0f8fb14a248be0cbc507ba958 100644 (file)
@@ -16,6 +16,7 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.atomic.AtomicBoolean;
 import org.checkerframework.checker.lock.qual.GuardedBy;
 import org.eclipse.jdt.annotation.NonNull;
 import org.opendaylight.mdsal.binding.api.DataBroker;
@@ -44,6 +45,7 @@ import org.slf4j.LoggerFactory;
 // This class is thread-safe
 final class TopologyNodeState implements AutoCloseable, TransactionChainListener {
     private static final Logger LOG = LoggerFactory.getLogger(TopologyNodeState.class);
+    private final AtomicBoolean closed = new AtomicBoolean(false);
     private final Map<String, Metadata> metadata = new HashMap<>();
     private final KeyedInstanceIdentifier<Node, NodeKey> nodeId;
     private final TransactionChain chain;
@@ -101,6 +103,7 @@ final class TopologyNodeState implements AutoCloseable, TransactionChainListener
             }, MoreExecutors.directExecutor());
         }
 
+        close();
         this.lastReleased = System.nanoTime();
     }
 
@@ -150,6 +153,9 @@ final class TopologyNodeState implements AutoCloseable, TransactionChainListener
         // FIXME: flip internal state, so that the next attempt to update fails, triggering node reconnect
         LOG.error("Unexpected transaction failure in node {} transaction {}",
                 this.nodeId, transaction.getIdentifier(), cause);
+        if (closed.compareAndSet(false, true)) {
+            pchain.close();
+        }
     }
 
     @Override
@@ -159,7 +165,9 @@ final class TopologyNodeState implements AutoCloseable, TransactionChainListener
 
     @Override
     public synchronized void close() {
-        this.chain.close();
+        if (closed.compareAndSet(false, true)) {
+            this.chain.close();
+        }
     }
 
     private synchronized void putTopologyNode() {
@@ -176,7 +184,7 @@ final class TopologyNodeState implements AutoCloseable, TransactionChainListener
 
             @Override
             public void onFailure(final Throwable throwable) {
-                LOG.trace("Put topology Node failed {}, value {}", TopologyNodeState.this.nodeId, node, throwable);
+                LOG.error("Put topology Node failed {}, value {}", TopologyNodeState.this.nodeId, node, throwable);
             }
         }, MoreExecutors.directExecutor());
     }
index 3e2cc294da045e8caa9f15dfd86e20fcefee2647..6ecbf6eaff2aee388039272e33eda14185cae44c 100644 (file)
@@ -135,6 +135,9 @@ public final class SessionStateImpl implements PcepSessionState {
 
     @Override
     public Messages getMessages() {
+        if (pcepSessionState == null) {
+            return null;
+        }
         return new MessagesBuilder(this.pcepSessionState.getMessages())
                 .setReplyTime(setReplyTime())
                 .addAugmentation(createStatefulMessages())
index 5546c116d339baabedcac4602ad949f32e0de85b..e0d7a6d54997978856f21bad6bb08f2268214be0 100644 (file)
@@ -78,35 +78,53 @@ public final class TopologyStatsProviderImpl implements TransactionChainListener
     @SuppressWarnings("checkstyle:IllegalCatch")
     @SuppressFBWarnings(value = "UPM_UNCALLED_PRIVATE_METHOD",
             justification = "https://github.com/spotbugs/spotbugs/issues/811")
-    private synchronized void updatePcepStats() {
-        final WriteTransaction tx = TopologyStatsProviderImpl.this.transactionChain.newWriteOnlyTransaction();
-
-        try {
-            for (final Map.Entry<KeyedInstanceIdentifier<Node, NodeKey>, PcepSessionState> entry
-                    : this.statsMap.entrySet()) {
-                if (this.statsPendingDelete.contains(entry.getKey())) {
-                    continue;
+    private void updatePcepStats() {
+        synchronized (this.transactionChain) {
+            WriteTransaction tx = null;
+
+            try {
+                tx = TopologyStatsProviderImpl.this.transactionChain.newWriteOnlyTransaction();
+                for (final Map.Entry<KeyedInstanceIdentifier<Node, NodeKey>, PcepSessionState> entry
+                        : this.statsMap.entrySet()) {
+                    if (this.statsPendingDelete.contains(entry.getKey())) {
+                        continue;
+                    }
+                    final PcepTopologyNodeStatsAug nodeStatsAug = new PcepTopologyNodeStatsAugBuilder()
+                            .setPcepSessionState(new PcepSessionStateBuilder(entry.getValue()).build()).build();
+                    final InstanceIdentifier<PcepTopologyNodeStatsAug> statId =
+                            entry.getKey().augmentation(PcepTopologyNodeStatsAug.class);
+                    tx.put(LogicalDatastoreType.OPERATIONAL, statId, nodeStatsAug);
                 }
-                final PcepTopologyNodeStatsAug nodeStatsAug = new PcepTopologyNodeStatsAugBuilder()
-                        .setPcepSessionState(new PcepSessionStateBuilder(entry.getValue()).build()).build();
-                final InstanceIdentifier<PcepTopologyNodeStatsAug> statId =
-                        entry.getKey().augmentation(PcepTopologyNodeStatsAug.class);
-                tx.put(LogicalDatastoreType.OPERATIONAL, statId, nodeStatsAug);
-            }
-            tx.commit().addCallback(new FutureCallback<CommitInfo>() {
-                @Override
-                public void onSuccess(final CommitInfo result) {
-                    LOG.debug("Successfully committed Topology stats update");
+                tx.commit().addCallback(new FutureCallback<CommitInfo>() {
+                    @Override
+                    public void onSuccess(final CommitInfo result) {
+                        LOG.debug("Successfully committed Topology stats update");
+                    }
+
+                    @Override
+                    public void onFailure(final Throwable ex) {
+                        LOG.error("Failed to commit Topology stats update", ex);
+                    }
+                }, MoreExecutors.directExecutor());
+            } catch (final Exception e) {
+                if (tx != null) {
+                    LOG.warn("Failed to prepare Tx {} for PCEP stats update", tx.getIdentifier(), e);
+                    tx.cancel();
+                    this.transactionChain.close();
+                    recreateTxChain();
                 }
+            }
+        }
+    }
 
-                @Override
-                public void onFailure(final Throwable ex) {
-                    LOG.error("Failed to commit Topology stats update", ex);
-                }
-            }, MoreExecutors.directExecutor());
+    @SuppressWarnings("checkstyle:IllegalCatch")
+    private synchronized void recreateTxChain() {
+        try {
+            if (!closed.get()) {
+                transactionChain = dataBroker.createMergingTransactionChain(this);
+            }
         } catch (final Exception e) {
-            LOG.warn("Failed to prepare Tx for PCEP stats update", e);
-            tx.cancel();
+            LOG.error("Failed to recreate transaction chain {}", transactionChain, e);
         }
     }
 
@@ -115,58 +133,75 @@ public final class TopologyStatsProviderImpl implements TransactionChainListener
         if (closed.compareAndSet(false, true)) {
             LOG.info("Closing TopologyStatsProvider service.");
             this.scheduleTask.cancel(true);
-            final WriteTransaction wTx = this.transactionChain.newWriteOnlyTransaction();
+            this.transactionChain.close();
+            final WriteTransaction wTx = this.dataBroker.newWriteOnlyTransaction();
             for (final KeyedInstanceIdentifier<Node, NodeKey> statId : this.statsMap.keySet()) {
                 wTx.delete(LogicalDatastoreType.OPERATIONAL, statId);
             }
             wTx.commit().get();
             this.statsMap.clear();
-            this.transactionChain.close();
             this.scheduler.shutdown();
         }
     }
 
     @Override
-    public synchronized void onTransactionChainFailed(final TransactionChain chain,
+    public void onTransactionChainFailed(final TransactionChain chain,
             final Transaction transaction, final Throwable cause) {
         LOG.error("Transaction chain {} failed for tx {}",
                 chain, transaction != null ? transaction.getIdentifier() : null, cause);
-
-        if (!closed.get()) {
-            transactionChain.close();
-            transactionChain = dataBroker.createMergingTransactionChain(this);
+        chain.close();
+        if (chain == this.transactionChain) {
+            recreateTxChain();
         }
     }
 
     @Override
-    public synchronized void onTransactionChainSuccessful(final TransactionChain chain) {
+    public void onTransactionChainSuccessful(final TransactionChain chain) {
         LOG.debug("Transaction chain {} successful.", chain);
     }
 
     @Override
-    public synchronized void bind(final KeyedInstanceIdentifier<Node, NodeKey> nodeId,
+    public void bind(final KeyedInstanceIdentifier<Node, NodeKey> nodeId,
             final PcepSessionState sessionState) {
-        this.statsMap.put(nodeId, sessionState);
+        LOG.info("Bind: {}", nodeId.getKey().getNodeId());
+        synchronized (this.transactionChain) {
+            this.statsMap.put(nodeId, sessionState);
+        }
     }
 
+    @SuppressWarnings("checkstyle:IllegalCatch")
     @Override
-    public synchronized void unbind(final KeyedInstanceIdentifier<Node, NodeKey> nodeId) {
-        this.statsMap.remove(nodeId);
-        this.statsPendingDelete.add(nodeId);
-        final WriteTransaction wTx = this.transactionChain.newWriteOnlyTransaction();
-        wTx.delete(LogicalDatastoreType.OPERATIONAL, nodeId);
-        wTx.commit().addCallback(new FutureCallback<CommitInfo>() {
-            @Override
-            public void onSuccess(final CommitInfo result) {
-                LOG.debug("Successfully removed Pcep Node stats {}.", nodeId.getKey().getNodeId());
-                TopologyStatsProviderImpl.this.statsPendingDelete.remove(nodeId);
-            }
-
-            @Override
-            public void onFailure(final Throwable ex) {
-                LOG.warn("Failed to remove Pcep Node stats {}.", nodeId.getKey().getNodeId(), ex);
-                TopologyStatsProviderImpl.this.statsPendingDelete.remove(nodeId);
+    public void unbind(final KeyedInstanceIdentifier<Node, NodeKey> nodeId) {
+        synchronized (this.transactionChain) {
+            LOG.info("Unbind: {}", nodeId.getKey().getNodeId());
+            this.statsMap.remove(nodeId);
+            this.statsPendingDelete.add(nodeId);
+            WriteTransaction tx = null;
+            try {
+                tx = this.transactionChain.newWriteOnlyTransaction();
+                tx.delete(LogicalDatastoreType.OPERATIONAL, nodeId);
+                tx.commit().addCallback(new FutureCallback<CommitInfo>() {
+                    @Override
+                    public void onSuccess(final CommitInfo result) {
+                        LOG.info("Successfully removed Pcep Node stats {}.", nodeId.getKey().getNodeId());
+                        TopologyStatsProviderImpl.this.statsPendingDelete.remove(nodeId);
+                    }
+
+                    @Override
+                    public void onFailure(final Throwable ex) {
+                        LOG.warn("Failed to remove Pcep Node stats {}.", nodeId.getKey().getNodeId(), ex);
+                        TopologyStatsProviderImpl.this.statsPendingDelete.remove(nodeId);
+                    }
+                }, MoreExecutors.directExecutor());
+            } catch (final Exception e) {
+                if (tx != null) {
+                    LOG.warn("Failed to prepare Tx {} for Pcep Node stats delete for node {}.", tx.getIdentifier(),
+                            nodeId.getKey().getNodeId(), e);
+                    tx.cancel();
+                    this.transactionChain.close();
+                    recreateTxChain();
+                }
             }
-        }, MoreExecutors.directExecutor());
+        }
     }
 }