From 457556047a855fdcf5fcc0570bc94b2d322997c2 Mon Sep 17 00:00:00 2001 From: Robert Varga Date: Wed, 21 Dec 2016 16:11:09 +0100 Subject: [PATCH] BUG-5280: fix race proxy creation and reconnect ProxyHistory map insertions have to happen-before or happen-after the reconnect attempt. This is mostly take care of by client's inversible lock, except for the race window when the connection lookup succeeds and the history map is updated. Resolve this by introducing a StampedLock, which is used to establish order of these operations, covering the entire createHistoryProxy() method. Change-Id: Ibdee739c94f3a48bef3f7fb19cd2f041c0061fe2 Signed-off-by: Robert Varga --- .../actors/dds/AbstractClientHistory.java | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractClientHistory.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractClientHistory.java index 1be8464335..47283843d4 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractClientHistory.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractClientHistory.java @@ -14,6 +14,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLongFieldUpdater; import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.concurrent.locks.StampedLock; import javax.annotation.concurrent.GuardedBy; import org.opendaylight.controller.cluster.access.client.AbstractClientConnection; import org.opendaylight.controller.cluster.access.client.ConnectedClientConnection; @@ -53,7 +54,10 @@ abstract class AbstractClientHistory extends LocalAbortable implements Identifia @GuardedBy("this") private final Map readyTransactions = new HashMap<>(); + @GuardedBy("lock") private final Map histories = new ConcurrentHashMap<>(); + private final StampedLock lock = new StampedLock(); + private final AbstractDataStoreClientBehavior client; private final LocalHistoryIdentifier identifier; @@ -113,6 +117,7 @@ abstract class AbstractClientHistory extends LocalAbortable implements Identifia * * @throws InversibleLockException if the shard is being reconnected */ + @GuardedBy("lock") private ProxyHistory createHistoryProxy(final Long shard) { final AbstractClientConnection connection = client.getConnection(shard); final LocalHistoryIdentifier proxyId = new LocalHistoryIdentifier(identifier.getClientId(), @@ -139,7 +144,14 @@ abstract class AbstractClientHistory extends LocalAbortable implements Identifia private ProxyHistory ensureHistoryProxy(final TransactionIdentifier transactionId, final Long shard) { while (true) { try { - return histories.computeIfAbsent(shard, this::createHistoryProxy); + // Short-lived lock to ensure exclusion of createHistoryProxy and the lookup phase in startReconnect, + // see comments in startReconnect() for details. + final long stamp = lock.readLock(); + try { + return histories.computeIfAbsent(shard, this::createHistoryProxy); + } finally { + lock.unlockRead(stamp); + } } catch (InversibleLockException e) { LOG.trace("Waiting for transaction {} shard {} connection to resolve", transactionId, shard); e.awaitResolution(); @@ -248,7 +260,26 @@ abstract class AbstractClientHistory extends LocalAbortable implements Identifia } HistoryReconnectCohort startReconnect(final ConnectedClientConnection newConn) { - final ProxyHistory oldProxy = histories.get(newConn.cookie()); + /* + * This looks ugly and unusual and there is a reason for that, as the locking involved is in multiple places. + * + * We need to make sure that a new proxy is not created while we are reconnecting, which is partially satisfied + * by client.getConnection() throwing InversibleLockException by the time this method is invoked. That does + * not cover the case when createHistoryProxy() has already acquired the connection, but has not yet populated + * the history map. + * + * Hence we need to make sure no potential computation is happening concurrently with us looking at the history + * map. Once we have performed that lookup, though, we can release the lock immediately, as all creation + * requests are established to happen either before or after the reconnect attempt. + */ + final ProxyHistory oldProxy; + final long stamp = lock.writeLock(); + try { + oldProxy = histories.get(newConn.cookie()); + } finally { + lock.unlockWrite(stamp); + } + if (oldProxy == null) { return null; } -- 2.36.6