First was fix in AsyncSshHandlerReader but there was another
deadlock after that. Therefore a change in AsyncSshHandler is also
needed. Both ABBA deadlockw occur when AsyncSshHandler.disconnect
(..) and AsyncSshHandlerReader.invokeDisconnect(..) methods are
executed concurrently within the same SSH session that is going
to be closed.
Note: Snippets from stack-traces are from Oxygen distribution, so
the line numbers don't have to match exactly.
Deadlock #1:
"nioEventLoopGroupCloseable-3-20":
at org.opendaylight.netconf.nettyutil.handler.ssh.client
.AsyncSshHandlerReader.close(AsyncSshHandlerReader.java:96)
- waiting to lock <0x000000072ecffff8> (a org.opendaylight
.netconf.nettyutil.handler.ssh.client.AsyncSshHandlerReader)
at org.opendaylight.netconf.nettyutil.handler.ssh.client
.AsyncSshHandler.disconnect(AsyncSshHandler.java:235)
- locked <0x000000072ed00020> (a org.opendaylight.netconf
.nettyutil.handler.ssh.client.AsyncSshHandler)
at org.opendaylight.netconf.nettyutil.handler.ssh.client
.AsyncSshHandler.close(AsyncSshHandler.java:215)
...
"sshd-SshClient[
57f78db1]-nio2-thread-1":
at org.opendaylight.netconf.nettyutil.handler.ssh.client
.AsyncSshHandler.disconnect(AsyncSshHandler.java:221)
- waiting to lock <0x000000072ed00020> (a org.opendaylight
.netconf.nettyutil.handler.ssh.client.AsyncSshHandler)
...
org.opendaylight.netconf.nettyutil.handler.ssh.client
.AsyncSshHandlerReader.operationComplete(AsyncSshHandlerReader
.java:64)
- locked <0x000000072ecffff8> (a org.opendaylight.netconf
.nettyutil.handler.ssh.client.AsyncSshHandlerReader)
...
Deadlock #2:
"nioEventLoopGroupCloseable-3-19":
at org.apache.sshd.client.session.ClientSessionImpl
.signalAuthFailure(ClientSessionImpl.java:138)
- waiting to lock <0x000000072c800218> (a java.lang.Object)
at org.apache.sshd.client.session.ClientSessionImpl
.preClose(ClientSessionImpl.java:126)
at org.apache.sshd.common.util.closeable.AbstractCloseable
.close(AbstractCloseable.java:95)
at org.opendaylight.netconf.nettyutil.handler.ssh.client
.AsyncSshHandler.disconnect(AsyncSshHandler.java:251)
- locked <0x000000072c800480> (a org.opendaylight.netconf
.nettyutil.handler.ssh.client.AsyncSshHandler)
...
"sshd-SshClient[
411a406d]-nio2-thread-8":
at org.opendaylight.netconf.nettyutil.handler.ssh.client
.AsyncSshHandler.disconnect(AsyncSshHandler.java:221)
- waiting to lock <0x000000072c800480> (a org.opendaylight
.netconf.nettyutil.handler.ssh.client.AsyncSshHandler)
...
org.apache.sshd.common.session.helpers.AbstractSession
.handleMessage(AbstractSession.java:499)
- locked <0x000000072c800218> (a java.lang.Object)
...
Change-Id: I42260213cad82250832451539d6ce11035ba37e2
Signed-off-by: Martin Sunal <msunal@frinx.io>
Signed-off-by: Jaroslav Tóth <jtoth@frinx.io>
Signed-off-by: Robert Varga <robert.varga@pantheon.tech>
import io.netty.util.concurrent.GenericFutureListener;
import java.io.IOException;
import java.net.SocketAddress;
+import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.sshd.client.SshClient;
import org.apache.sshd.client.channel.ClientChannel;
import org.apache.sshd.client.future.AuthFuture;
private final AuthenticationHandler authenticationHandler;
private final SshClient sshClient;
+ private final AtomicBoolean isDisconnected = new AtomicBoolean();
private Future<?> negotiationFuture;
private AsyncSshHandlerReader sshReadAsyncListener;
disconnect(ctx, promise);
}
- @SuppressWarnings("checkstyle:IllegalCatch")
@Override
- public synchronized void disconnect(final ChannelHandlerContext ctx, final ChannelPromise promise) {
+ public void disconnect(final ChannelHandlerContext ctx, final ChannelPromise promise) {
+ if (isDisconnected.compareAndSet(false, true)) {
+ safelyDisconnect(ctx, promise);
+ }
+ }
+
+ @SuppressWarnings("checkstyle:IllegalCatch")
+ private synchronized void safelyDisconnect(final ChannelHandlerContext ctx, final ChannelPromise promise) {
LOG.trace("Closing SSH session on channel: {} with connect promise in state: {}",
- ctx.channel(),connectPromise);
+ ctx.channel(), connectPromise);
// If we have already succeeded and the session was dropped after,
// we need to fire inactive to notify reconnect logic
promise.setSuccess();
LOG.debug("SSH session closed on channel: {}", ctx.channel());
}
-
}
}
@Override
- public synchronized void operationComplete(final IoReadFuture future) {
- if (future.getException() != null) {
+ public void operationComplete(final IoReadFuture future) {
+ if (checkDisconnect(future)) {
+ invokeDisconnect();
+ }
+ }
+ private synchronized boolean checkDisconnect(final IoReadFuture future) {
+ if (future.getException() != null) {
//if asyncout is already set to null by close method, do nothing
if (asyncOut == null) {
- return;
+ return false;
}
if (asyncOut.isClosed() || asyncOut.isClosing()) {
} else {
LOG.warn("Exception while reading from SSH remote on channel {}", channelId, future.getException());
}
- invokeDisconnect();
- return;
- }
-
- if (future.getRead() > 0) {
+ return true;
+ } else if (future.getRead() > 0) {
final ByteBuf msg = Unpooled.wrappedBuffer(buf.array(), 0, future.getRead());
if (LOG.isTraceEnabled()) {
LOG.trace("Reading message on channel: {}, message: {}",
currentReadFuture = asyncOut.read(buf);
currentReadFuture.addListener(this);
}
+ return false;
}
+ /**
+ * Closing of the {@link AsyncSshHandlerReader}. This method should never be called with any locks held since
+ * call to {@link AutoCloseable#close()} can be a source of ABBA deadlock.
+ */
@SuppressWarnings("checkstyle:IllegalCatch")
private void invokeDisconnect() {
try {