From: Tom Pantelis Date: Fri, 8 Jun 2018 02:45:55 +0000 (-0400) Subject: Fix default schema cache failures X-Git-Tag: release/fluorine~58 X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=commitdiff_plain;h=29900137ce29276cecae2a95f758205887f67da9;p=netconf.git Fix default schema cache failures We've these seen intermittent failures on jenkins: Caused by: java.lang.IllegalArgumentException: Unable to create cache directory at cache/schema at com.google.common.base.Preconditions.checkArgument(Preconditions.java:210) at org.opendaylight.yangtools.yang.model.repo.util.FilesystemSchemaSourceCache.(FilesystemSchemaSourceCache.java:74) at org.opendaylight.netconf.topology.AbstractNetconfTopology.(AbstractNetconfTopology.java:149) In FilesystemSchemaSourceCache" if (!storageDirectory.exists()) { checkArgument(storageDirectory.mkdirs(), "Unable to create cache directory at %s", storageDirectory); } mkdirs returns false if the dir/file already exists - this seems the likely reason in this case even though it checks exists() just prior. This scenario could happen if there's a race where some other component interleaves and creates the dir in between the exists and mkdirs calls. To protect against this sceniario, AbstractNetconfTopology and NetconfTopologyUtils were changed to retry if FilesystemSchemaSourceCache throws an IAE. Also they no longer fail class initialization on failure. Change-Id: I9f7ec134e7fd817aa753f0db175bf5620cc52ff4 Signed-off-by: Tom Pantelis --- diff --git a/netconf/netconf-topology-singleton/src/main/java/org/opendaylight/netconf/topology/singleton/impl/utils/NetconfTopologyUtils.java b/netconf/netconf-topology-singleton/src/main/java/org/opendaylight/netconf/topology/singleton/impl/utils/NetconfTopologyUtils.java index eb34152e93..a881db4c2f 100644 --- a/netconf/netconf-topology-singleton/src/main/java/org/opendaylight/netconf/topology/singleton/impl/utils/NetconfTopologyUtils.java +++ b/netconf/netconf-topology-singleton/src/main/java/org/opendaylight/netconf/topology/singleton/impl/utils/NetconfTopologyUtils.java @@ -9,11 +9,13 @@ package org.opendaylight.netconf.topology.singleton.impl.utils; import com.google.common.base.Strings; +import com.google.common.util.concurrent.Uninterruptibles; import java.io.File; import java.math.BigDecimal; import java.net.InetSocketAddress; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.TimeUnit; import org.opendaylight.netconf.api.DocumentedException; import org.opendaylight.netconf.sal.connect.netconf.NetconfDevice; import org.opendaylight.netconf.sal.connect.netconf.NetconfStateSchemasResolverImpl; @@ -73,12 +75,6 @@ public final class NetconfTopologyUtils { public static final SharedSchemaRepository DEFAULT_SCHEMA_REPOSITORY = new SharedSchemaRepository(DEFAULT_SCHEMA_REPOSITORY_NAME); - - // The default FilesystemSchemaSourceCache, which stores cached files in cache/schema. - public static final FilesystemSchemaSourceCache DEFAULT_CACHE = - new FilesystemSchemaSourceCache<>(DEFAULT_SCHEMA_REPOSITORY, YangTextSchemaSource.class, - new File(QUALIFIED_DEFAULT_CACHE_DIRECTORY)); - public static final InMemorySchemaSourceCache DEFAULT_AST_CACHE = InMemorySchemaSourceCache.createSoftCache(DEFAULT_SCHEMA_REPOSITORY, ASTSchemaSource.class); @@ -102,10 +98,33 @@ public final class NetconfTopologyUtils { SCHEMA_RESOURCES_DTO_MAP.put(DEFAULT_CACHE_DIRECTORY, new NetconfDevice.SchemaResourcesDTO(DEFAULT_SCHEMA_REPOSITORY, DEFAULT_SCHEMA_REPOSITORY, DEFAULT_SCHEMA_CONTEXT_FACTORY, new NetconfStateSchemasResolverImpl())); - DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(DEFAULT_CACHE); DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(DEFAULT_AST_CACHE); DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener( TextToASTTransformer.create(DEFAULT_SCHEMA_REPOSITORY, DEFAULT_SCHEMA_REPOSITORY)); + + /* + * Create the default FilesystemSchemaSourceCache, which stores cached files + * in cache/schema. Try up to 3 times - we've seen intermittent failures on jenkins where + * FilesystemSchemaSourceCache throws an IAE due to mkdirs failure. The theory is that there's a race + * creating the dir and it already exists when mkdirs is called (mkdirs returns false in this case). In this + * scenario, a retry should succeed. + */ + int tries = 1; + while (true) { + try { + FilesystemSchemaSourceCache defaultCache = + new FilesystemSchemaSourceCache<>(DEFAULT_SCHEMA_REPOSITORY, YangTextSchemaSource.class, + new File(QUALIFIED_DEFAULT_CACHE_DIRECTORY)); + DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(defaultCache); + break; + } catch (IllegalArgumentException e) { + if (tries++ >= 3) { + LOG.error("Error creating default schema cache", e); + break; + } + Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); + } + } } private NetconfTopologyUtils() { diff --git a/netconf/netconf-topology/src/main/java/org/opendaylight/netconf/topology/AbstractNetconfTopology.java b/netconf/netconf-topology/src/main/java/org/opendaylight/netconf/topology/AbstractNetconfTopology.java index b5f6a172fd..8300eee947 100644 --- a/netconf/netconf-topology/src/main/java/org/opendaylight/netconf/topology/AbstractNetconfTopology.java +++ b/netconf/netconf-topology/src/main/java/org/opendaylight/netconf/topology/AbstractNetconfTopology.java @@ -17,6 +17,7 @@ import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.MoreExecutors; +import com.google.common.util.concurrent.Uninterruptibles; import io.netty.handler.ssl.SslHandler; import io.netty.util.concurrent.EventExecutor; import java.io.File; @@ -31,6 +32,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import javax.net.ssl.KeyManagerFactory; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLEngine; @@ -143,13 +145,6 @@ public abstract class AbstractNetconfTopology implements NetconfTopology { private static final SharedSchemaRepository DEFAULT_SCHEMA_REPOSITORY = new SharedSchemaRepository(DEFAULT_SCHEMA_REPOSITORY_NAME); - /** - * The default FilesystemSchemaSourceCache, which stores cached files in cache/schema. - */ - private static final FilesystemSchemaSourceCache DEFAULT_CACHE = - new FilesystemSchemaSourceCache<>(DEFAULT_SCHEMA_REPOSITORY, YangTextSchemaSource.class, - new File(QUALIFIED_DEFAULT_CACHE_DIRECTORY)); - public static final InMemorySchemaSourceCache DEFAULT_AST_CACHE = InMemorySchemaSourceCache.createSoftCache(DEFAULT_SCHEMA_REPOSITORY, ASTSchemaSource.class); @@ -176,10 +171,33 @@ public abstract class AbstractNetconfTopology implements NetconfTopology { new NetconfDevice.SchemaResourcesDTO(DEFAULT_SCHEMA_REPOSITORY, DEFAULT_SCHEMA_REPOSITORY, DEFAULT_SCHEMA_CONTEXT_FACTORY, new NetconfStateSchemasResolverImpl())); - DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(DEFAULT_CACHE); DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(DEFAULT_AST_CACHE); DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener( TextToASTTransformer.create(DEFAULT_SCHEMA_REPOSITORY, DEFAULT_SCHEMA_REPOSITORY)); + + /* + * Create the default FilesystemSchemaSourceCache, which stores cached files + * in cache/schema. Try up to 3 times - we've seen intermittent failures on jenkins where + * FilesystemSchemaSourceCache throws an IAE due to mkdirs failure. The theory is that there's a race + * creating the dir and it already exists when mkdirs is called (mkdirs returns false in this case). In this + * scenario, a retry should succeed. + */ + int tries = 1; + while (true) { + try { + FilesystemSchemaSourceCache defaultCache = + new FilesystemSchemaSourceCache<>(DEFAULT_SCHEMA_REPOSITORY, YangTextSchemaSource.class, + new File(QUALIFIED_DEFAULT_CACHE_DIRECTORY)); + DEFAULT_SCHEMA_REPOSITORY.registerSchemaSourceListener(defaultCache); + break; + } catch (IllegalArgumentException e) { + if (tries++ >= 3) { + LOG.error("Error creating default schema cache", e); + break; + } + Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); + } + } } protected final String topologyId;