Apply a workaround for the isolation of quarantined node 79/90279/8
authorTejas Nevrekar <tejas.nevrekar@gmail.com>
Thu, 4 Jun 2020 12:44:23 +0000 (18:14 +0530)
committerRobert Varga <nite@hq.sk>
Fri, 19 Jun 2020 10:30:27 +0000 (10:30 +0000)
Listen to the AssociationErrorEvent in addition to
the ThisActorSystemQuarantinedEvent so that in cases
like isolate followed by unisolate, the quarantine
restart is triggered.

JIRA: CONTROLLER-1941
Change-Id: I937a9411c90fbbbaad73769efbc4e996369448a8
Signed-off-by: Tejas Nevrekar <tejas.nevrekar@gmail.com>
opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/common/actor/QuarantinedMonitorActor.java
opendaylight/md-sal/sal-clustering-commons/src/test/java/org/opendaylight/controller/cluster/common/actor/QuarantinedMonitorActorTest.java

index 52df6ab7b388314879e72160890a624fc0104529..ff8bfc8ff62575e9e5af8271324a96501a15c90d 100644 (file)
@@ -8,10 +8,15 @@
 
 package org.opendaylight.controller.cluster.common.actor;
 
+import akka.actor.Address;
 import akka.actor.Props;
 import akka.actor.UntypedAbstractActor;
 import akka.japi.Effect;
+import akka.remote.AssociationErrorEvent;
+import akka.remote.RemotingLifecycleEvent;
 import akka.remote.ThisActorSystemQuarantinedEvent;
+import java.util.HashSet;
+import java.util.Set;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -24,20 +29,23 @@ import org.slf4j.LoggerFactory;
  *
  */
 public class QuarantinedMonitorActor extends UntypedAbstractActor {
+    public static final String ADDRESS = "quarantined-monitor";
 
     private static final Logger LOG = LoggerFactory.getLogger(QuarantinedMonitorActor.class);
-
-    public static final String ADDRESS = "quarantined-monitor";
+    private static final Integer MESSAGE_THRESHOLD = 10;
 
     private final Effect callback;
     private boolean quarantined;
 
+    private Set<Address> addressSet = new HashSet<>();
+    private int count = 0;
+
     protected QuarantinedMonitorActor(final Effect callback) {
         this.callback = callback;
 
         LOG.debug("Created QuarantinedMonitorActor");
 
-        getContext().system().eventStream().subscribe(getSelf(), ThisActorSystemQuarantinedEvent.class);
+        getContext().system().eventStream().subscribe(getSelf(), RemotingLifecycleEvent.class);
     }
 
     @Override
@@ -62,6 +70,28 @@ public class QuarantinedMonitorActor extends UntypedAbstractActor {
 
             // execute the callback
             callback.apply();
+        } else  if (message instanceof AssociationErrorEvent) {
+            String errorMessage = message.toString();
+            LOG.trace("errorMessage:{}", errorMessage);
+            if (errorMessage.contains("The remote system has a UID that has been quarantined")) {
+                Address address = ((AssociationErrorEvent) message).getRemoteAddress();
+                addressSet.add(address);
+                count++;
+                LOG.trace("address:{} addressSet: {} count:{}", address, addressSet, count);
+                if (count >= MESSAGE_THRESHOLD && addressSet.size() > 1) {
+                    count = 0;
+                    addressSet.clear();
+                    final AssociationErrorEvent event = (AssociationErrorEvent) message;
+                    LOG.warn("Got quarantined via AssociationEvent by {}", event.remoteAddress());
+                    quarantined = true;
+
+                    // execute the callback
+                    callback.apply();
+                }
+            } else if (errorMessage.contains("The remote system explicitly disassociated")) {
+                count = 0;
+                addressSet.clear();
+            }
         }
     }
 
index ceb21e13418c916236d3b3df91bb98f3bc5baac7..caffa698f14311949657021ac69216ad2088b7f2 100644 (file)
@@ -58,6 +58,28 @@ public class QuarantinedMonitorActorTest {
         verify(callback, timeout(1000)).apply();
     }
 
+    @Test
+    public void testOnReceiveQuarantinedAsAssociation() throws Exception {
+        for (int i = 0; i < 9; i++) {
+            final Throwable t =
+                    new RuntimeException("The remote system has a UID that has been quarantined. Association aborted.");
+            final InvalidAssociation cause = InvalidAssociation.apply(LOCAL, REMOTE, t, Option.apply(null));
+            final AssociationErrorEvent event =
+                    new AssociationErrorEvent(cause, LOCAL, REMOTE, true, Logging.ErrorLevel());
+            actor.tell(event, ActorRef.noSender());
+        }
+
+        final Address local1 = Address.apply("http", "local1");
+        final Address remote1 = Address.apply("http", "remote1");
+        final Throwable t1 =
+                new RuntimeException("The remote system has a UID that has been quarantined. Association aborted.");
+        final InvalidAssociation cause1 = InvalidAssociation.apply(local1, remote1, t1, Option.apply(null));
+        final AssociationErrorEvent event1 =
+                new AssociationErrorEvent(cause1, local1, remote1, true, Logging.ErrorLevel());
+        actor.tell(event1, ActorRef.noSender());
+        verify(callback, timeout(1000)).apply();
+    }
+
     @Test
     public void testOnReceiveAnother() throws Exception {
         final Address local = Address.apply("http", "local");