Netty Replicator - improve the reconnection and keepalive mechanisms
[mdsal.git] / replicate / mdsal-replicate-netty / src / main / java / org / opendaylight / mdsal / replicate / netty / SinkSingletonService.java
1 /*
2  * Copyright (c) 2020 PANTHEON.tech, s.r.o. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.mdsal.replicate.netty;
9
10 import static java.util.Objects.requireNonNull;
11
12 import com.google.common.util.concurrent.ListenableFuture;
13 import io.netty.bootstrap.Bootstrap;
14 import io.netty.buffer.ByteBuf;
15 import io.netty.buffer.ByteBufOutputStream;
16 import io.netty.buffer.Unpooled;
17 import io.netty.channel.Channel;
18 import io.netty.channel.ChannelFuture;
19 import io.netty.channel.ChannelFutureListener;
20 import io.netty.channel.ChannelInitializer;
21 import io.netty.channel.ChannelOption;
22 import io.netty.channel.socket.SocketChannel;
23 import io.netty.handler.timeout.IdleStateHandler;
24 import java.io.IOException;
25 import java.net.InetSocketAddress;
26 import java.time.Duration;
27 import java.util.concurrent.ScheduledExecutorService;
28 import java.util.concurrent.TimeUnit;
29 import org.checkerframework.checker.lock.qual.GuardedBy;
30 import org.checkerframework.checker.lock.qual.Holding;
31 import org.opendaylight.mdsal.common.api.LogicalDatastoreType;
32 import org.opendaylight.mdsal.dom.api.DOMDataBroker;
33 import org.opendaylight.mdsal.dom.api.DOMDataTreeIdentifier;
34 import org.opendaylight.mdsal.singleton.common.api.ClusterSingletonService;
35 import org.opendaylight.mdsal.singleton.common.api.ServiceGroupIdentifier;
36 import org.opendaylight.yangtools.util.concurrent.FluentFutures;
37 import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier;
38 import org.opendaylight.yangtools.yang.data.codec.binfmt.NormalizedNodeDataOutput;
39 import org.opendaylight.yangtools.yang.data.codec.binfmt.NormalizedNodeStreamVersion;
40 import org.slf4j.Logger;
41 import org.slf4j.LoggerFactory;
42
43 final class SinkSingletonService extends ChannelInitializer<SocketChannel> implements ClusterSingletonService {
44     private static final Logger LOG = LoggerFactory.getLogger(SinkSingletonService.class);
45     private static final ServiceGroupIdentifier SGID =
46             ServiceGroupIdentifier.create(SinkSingletonService.class.getName());
47     // TODO: allow different trees?
48     private static final DOMDataTreeIdentifier TREE = new DOMDataTreeIdentifier(LogicalDatastoreType.CONFIGURATION,
49         YangInstanceIdentifier.empty());
50     private static long CHANNEL_CLOSE_TIMEOUT_S = 10;
51     private static final ByteBuf TREE_REQUEST;
52
53     static {
54         try {
55             TREE_REQUEST = Unpooled.unreleasableBuffer(requestTree(TREE));
56         } catch (IOException e) {
57             throw new ExceptionInInitializerError(e);
58         }
59     }
60
61     private final BootstrapSupport bootstrapSupport;
62     private final DOMDataBroker dataBroker;
63     private final InetSocketAddress sourceAddress;
64     private final Duration reconnectDelay;
65     private final int maxMissedKeepalives;
66     private final Duration keepaliveInterval;
67
68     @GuardedBy("this")
69     private ChannelFuture futureChannel;
70     private boolean closingInstance;
71     private Bootstrap bs;
72
73     SinkSingletonService(final BootstrapSupport bootstrapSupport, final DOMDataBroker dataBroker,
74             final InetSocketAddress sourceAddress, final Duration reconnectDelay, final Duration keepaliveInterval,
75             final int maxMissedKeepalives) {
76         this.bootstrapSupport = requireNonNull(bootstrapSupport);
77         this.dataBroker = requireNonNull(dataBroker);
78         this.sourceAddress = requireNonNull(sourceAddress);
79         this.reconnectDelay = requireNonNull(reconnectDelay);
80         this.keepaliveInterval = requireNonNull(keepaliveInterval);
81         this.maxMissedKeepalives = maxMissedKeepalives;
82         LOG.info("Replication sink from {} waiting for cluster-wide mastership", sourceAddress);
83     }
84
85     @Override
86     public ServiceGroupIdentifier getIdentifier() {
87         return SGID;
88     }
89
90     @Override
91     public synchronized void instantiateServiceInstance() {
92         LOG.info("Replication sink started with source {}", sourceAddress);
93         this.bs = bootstrapSupport.newBootstrap();
94         doConnect();
95     }
96
97     @Holding("this")
98     private void doConnect() {
99         LOG.info("Connecting to Source");
100         final ScheduledExecutorService group = bs.config().group();
101
102         futureChannel = bs
103             .option(ChannelOption.SO_KEEPALIVE, true)
104             .handler(this)
105             .connect(sourceAddress, null);
106         futureChannel.addListener((ChannelFutureListener) future -> channelResolved(future, group));
107     }
108
109     @Override
110     public synchronized ListenableFuture<?> closeServiceInstance() {
111         closingInstance = true;
112         if (futureChannel == null) {
113             return FluentFutures.immediateNullFluentFuture();
114         }
115
116         return FluentFutures.immediateBooleanFluentFuture(disconnect());
117     }
118
119     private synchronized void reconnect() {
120         disconnect();
121         doConnect();
122     }
123
124     private synchronized boolean disconnect() {
125         boolean shutdownSuccess = true;
126         final Channel channel = futureChannel.channel();
127         if (channel != null && channel.isActive()) {
128             try {
129                 // close the resulting channel. Even when this triggers the closeFuture, it won't try to reconnect since
130                 // the closingInstance flag is set
131                 channel.close().await(CHANNEL_CLOSE_TIMEOUT_S, TimeUnit.SECONDS);
132             } catch (InterruptedException e) {
133                 LOG.error("The channel didn't close properly within {} seconds", CHANNEL_CLOSE_TIMEOUT_S);
134                 shutdownSuccess = false;
135             }
136         }
137         shutdownSuccess &= futureChannel.cancel(true);
138         futureChannel = null;
139         return shutdownSuccess;
140     }
141
142     @Override
143     protected void initChannel(final SocketChannel ch) {
144         ch.pipeline()
145             .addLast("frameDecoder", new MessageFrameDecoder())
146             .addLast("idleStateHandler", new IdleStateHandler(
147                 keepaliveInterval.toNanos() * maxMissedKeepalives, 0, 0, TimeUnit.NANOSECONDS))
148             .addLast("keepaliveHandler", new SinkKeepaliveHandler())
149             .addLast("requestHandler", new SinkRequestHandler(TREE, dataBroker.createMergingTransactionChain(
150                 new SinkTransactionChainListener(ch))))
151             .addLast("frameEncoder", MessageFrameEncoder.INSTANCE);
152     }
153
154     private synchronized void channelResolved(final ChannelFuture completedFuture,
155         final ScheduledExecutorService group) {
156         if (futureChannel != null && futureChannel.channel() == completedFuture.channel()) {
157             if (completedFuture.isSuccess()) {
158                 final Channel ch = completedFuture.channel();
159                 LOG.info("Channel {} established", ch);
160                 ch.closeFuture().addListener((ChannelFutureListener) future -> channelClosed(future, group));
161                 ch.writeAndFlush(TREE_REQUEST);
162             } else {
163                 LOG.info("Failed to connect to source {}, reconnecting in {}", sourceAddress,
164                     reconnectDelay.getSeconds(), completedFuture.cause());
165                 group.schedule(() -> {
166                     reconnect();
167                 }, reconnectDelay.toNanos(), TimeUnit.NANOSECONDS);
168             }
169         }
170     }
171
172     private synchronized void channelClosed(final ChannelFuture completedFuture, final ScheduledExecutorService group) {
173         if (futureChannel != null && futureChannel.channel() == completedFuture.channel()) {
174             if (!closingInstance) {
175                 LOG.info("Channel {} lost connection to source {}, reconnecting in {}", completedFuture.channel(),
176                     sourceAddress, reconnectDelay.getSeconds());
177                 group.schedule(() -> {
178                     reconnect();
179                 }, reconnectDelay.toNanos(), TimeUnit.NANOSECONDS);
180             }
181         }
182     }
183
184     private static ByteBuf requestTree(final DOMDataTreeIdentifier tree) throws IOException {
185         final ByteBuf ret = Unpooled.buffer();
186
187         try (ByteBufOutputStream stream = new ByteBufOutputStream(ret)) {
188             stream.writeByte(Constants.MSG_SUBSCRIBE_REQ);
189             try (NormalizedNodeDataOutput output = NormalizedNodeStreamVersion.current().newDataOutput(stream)) {
190                 tree.getDatastoreType().writeTo(output);
191                 output.writeYangInstanceIdentifier(tree.getRootIdentifier());
192             }
193         }
194
195         return ret;
196     }
197 }