Fix default schema cache failures 75/72775/1
authorTom Pantelis <tompantelis@gmail.com>
Fri, 8 Jun 2018 02:45:55 +0000 (22:45 -0400)
committerTom Pantelis <tompantelis@gmail.com>
Fri, 8 Jun 2018 02:45:55 +0000 (22:45 -0400)
We've these seen intermittent failures on jenkins:

 Caused by: java.lang.IllegalArgumentException: Unable to create cache directory at cache/schema
     at com.google.common.base.Preconditions.checkArgument(Preconditions.java:210)
     at org.opendaylight.yangtools.yang.model.repo.util.FilesystemSchemaSourceCache.<init>(FilesystemSchemaSourceCache.java:74)
     at org.opendaylight.netconf.topology.AbstractNetconfTopology.<clinit>(AbstractNetconfTopology.java:149)

In FilesystemSchemaSourceCache"

  if (!storageDirectory.exists()) {
      checkArgument(storageDirectory.mkdirs(), "Unable to create cache directory at %s",
              storageDirectory);
  }

mkdirs returns false if the dir/file already exists - this seems the likely reason
in this case even though it checks exists() just prior. This scenario could happen
if there's a race where some other component interleaves and creates the dir in
between the exists and mkdirs calls.

To protect against this sceniario, AbstractNetconfTopology and NetconfTopologyUtils
were changed to retry if FilesystemSchemaSourceCache throws an IAE. Also they no
longer fail class initialization on failure.

Change-Id: I9f7ec134e7fd817aa753f0db175bf5620cc52ff4
Signed-off-by: Tom Pantelis <tompantelis@gmail.com>
netconf/netconf-topology-singleton/src/main/java/org/opendaylight/netconf/topology/singleton/impl/utils/NetconfTopologyUtils.java
netconf/netconf-topology/src/main/java/org/opendaylight/netconf/topology/AbstractNetconfTopology.java

index eb34152e937e75de09e00651849ba617a6fc6618..a881db4c2f6d31754d308c29e4b75bfcd8711c88 100644 (file)
@@ -9,11 +9,13 @@
 package org.opendaylight.netconf.topology.singleton.impl.utils;
 
 import com.google.common.base.Strings;
+import com.google.common.util.concurrent.Uninterruptibles;
 import java.io.File;
 import java.math.BigDecimal;
 import java.net.InetSocketAddress;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
 import org.opendaylight.netconf.api.DocumentedException;
 import org.opendaylight.netconf.sal.connect.netconf.NetconfDevice;
 import org.opendaylight.netconf.sal.connect.netconf.NetconfStateSchemasResolverImpl;
@@ -73,12 +75,6 @@ public final class NetconfTopologyUtils {
     public static final SharedSchemaRepository DEFAULT_SCHEMA_REPOSITORY =
             new SharedSchemaRepository(DEFAULT_SCHEMA_REPOSITORY_NAME);
 
-
-     // The default <code>FilesystemSchemaSourceCache</code>, which stores cached files in <code>cache/schema</code>.
-    public static final FilesystemSchemaSourceCache<YangTextSchemaSource> DEFAULT_CACHE =
-            new FilesystemSchemaSourceCache<>(DEFAULT_SCHEMA_REPOSITORY, YangTextSchemaSource.class,
-                    new File(QUALIFIED_DEFAULT_CACHE_DIRECTORY));
-
     public static final InMemorySchemaSourceCache<ASTSchemaSource> DEFAULT_AST_CACHE =
             InMemorySchemaSourceCache.createSoftCache(DEFAULT_SCHEMA_REPOSITORY, ASTSchemaSource.class);
 
@@ -102,10 +98,33 @@ public final class NetconfTopologyUtils {
         SCHEMA_RESOURCES_DTO_MAP.put(DEFAULT_CACHE_DIRECTORY,
                 new NetconfDevice.SchemaResourcesDTO(DEFAULT_SCHEMA_REPOSITORY, DEFAULT_SCHEMA_REPOSITORY,
                         DEFAULT_SCHEMA_CONTEXT_FACTORY, new NetconfStateSchemasResolverImpl()));
-        DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(DEFAULT_CACHE);
         DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(DEFAULT_AST_CACHE);
         DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(
                 TextToASTTransformer.create(DEFAULT_SCHEMA_REPOSITORY, DEFAULT_SCHEMA_REPOSITORY));
+
+        /*
+         * Create the default <code>FilesystemSchemaSourceCache</code>, which stores cached files
+         * in <code>cache/schema</code>. Try up to 3 times - we've seen intermittent failures on jenkins where
+         * FilesystemSchemaSourceCache throws an IAE due to mkdirs failure. The theory is that there's a race
+         * creating the dir and it already exists when mkdirs is called (mkdirs returns false in this case). In this
+         * scenario, a retry should succeed.
+         */
+        int tries = 1;
+        while (true) {
+            try {
+                FilesystemSchemaSourceCache<YangTextSchemaSource> defaultCache =
+                        new FilesystemSchemaSourceCache<>(DEFAULT_SCHEMA_REPOSITORY, YangTextSchemaSource.class,
+                                new File(QUALIFIED_DEFAULT_CACHE_DIRECTORY));
+                DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(defaultCache);
+                break;
+            } catch (IllegalArgumentException e) {
+                if (tries++ >= 3) {
+                    LOG.error("Error creating default schema cache", e);
+                    break;
+                }
+                Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
+            }
+        }
     }
 
     private NetconfTopologyUtils() {
index b5f6a172fd8f005e0946579bbf0f94974a66d6db..8300eee947cc170fb2891d431f4248a5de6275ff 100644 (file)
@@ -17,6 +17,7 @@ import com.google.common.util.concurrent.FutureCallback;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.ListenableFuture;
 import com.google.common.util.concurrent.MoreExecutors;
+import com.google.common.util.concurrent.Uninterruptibles;
 import io.netty.handler.ssl.SslHandler;
 import io.netty.util.concurrent.EventExecutor;
 import java.io.File;
@@ -31,6 +32,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 import javax.net.ssl.KeyManagerFactory;
 import javax.net.ssl.SSLContext;
 import javax.net.ssl.SSLEngine;
@@ -143,13 +145,6 @@ public abstract class AbstractNetconfTopology implements NetconfTopology {
     private static final SharedSchemaRepository DEFAULT_SCHEMA_REPOSITORY =
             new SharedSchemaRepository(DEFAULT_SCHEMA_REPOSITORY_NAME);
 
-    /**
-     * The default <code>FilesystemSchemaSourceCache</code>, which stores cached files in <code>cache/schema</code>.
-     */
-    private static final FilesystemSchemaSourceCache<YangTextSchemaSource> DEFAULT_CACHE =
-            new FilesystemSchemaSourceCache<>(DEFAULT_SCHEMA_REPOSITORY, YangTextSchemaSource.class,
-                    new File(QUALIFIED_DEFAULT_CACHE_DIRECTORY));
-
     public static final InMemorySchemaSourceCache<ASTSchemaSource> DEFAULT_AST_CACHE =
             InMemorySchemaSourceCache.createSoftCache(DEFAULT_SCHEMA_REPOSITORY, ASTSchemaSource.class);
 
@@ -176,10 +171,33 @@ public abstract class AbstractNetconfTopology implements NetconfTopology {
                 new NetconfDevice.SchemaResourcesDTO(DEFAULT_SCHEMA_REPOSITORY, DEFAULT_SCHEMA_REPOSITORY,
                         DEFAULT_SCHEMA_CONTEXT_FACTORY,
                         new NetconfStateSchemasResolverImpl()));
-        DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(DEFAULT_CACHE);
         DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(DEFAULT_AST_CACHE);
         DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(
                 TextToASTTransformer.create(DEFAULT_SCHEMA_REPOSITORY, DEFAULT_SCHEMA_REPOSITORY));
+
+        /*
+         * Create the default <code>FilesystemSchemaSourceCache</code>, which stores cached files
+         * in <code>cache/schema</code>. Try up to 3 times - we've seen intermittent failures on jenkins where
+         * FilesystemSchemaSourceCache throws an IAE due to mkdirs failure. The theory is that there's a race
+         * creating the dir and it already exists when mkdirs is called (mkdirs returns false in this case). In this
+         * scenario, a retry should succeed.
+         */
+        int tries = 1;
+        while (true) {
+            try {
+                FilesystemSchemaSourceCache<YangTextSchemaSource> defaultCache =
+                        new FilesystemSchemaSourceCache<>(DEFAULT_SCHEMA_REPOSITORY, YangTextSchemaSource.class,
+                                new File(QUALIFIED_DEFAULT_CACHE_DIRECTORY));
+                DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(defaultCache);
+                break;
+            } catch (IllegalArgumentException e) {
+                if (tries++ >= 3) {
+                    LOG.error("Error creating default schema cache", e);
+                    break;
+                }
+                Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
+            }
+        }
     }
 
     protected final String topologyId;