blob: fac3c216fa3fcd2b1d98028472b17ae9d44904fe [file] [log] [blame]
tom1d416c52014-09-29 20:55:24 -07001package org.onlab.onos.store.cluster.impl;
2
3import org.onlab.onos.cluster.DefaultControllerNode;
4import org.onlab.onos.cluster.NodeId;
5import org.onlab.onos.store.cluster.messaging.ClusterMessage;
6import org.onlab.onos.store.cluster.messaging.ClusterMessageStream;
7import org.onlab.onos.store.cluster.messaging.HelloMessage;
8import org.onlab.onos.store.cluster.messaging.SerializationService;
9import org.slf4j.Logger;
10import org.slf4j.LoggerFactory;
11
12import java.io.IOException;
13import java.net.InetSocketAddress;
14import java.net.SocketAddress;
15import java.nio.channels.SocketChannel;
16import java.util.ArrayList;
17import java.util.HashSet;
18import java.util.List;
19import java.util.Map;
20import java.util.Set;
21import java.util.Timer;
22import java.util.TimerTask;
23import java.util.concurrent.ConcurrentHashMap;
24import java.util.concurrent.ExecutorService;
25import java.util.concurrent.Executors;
26
27import static java.net.InetAddress.getByAddress;
28import static org.onlab.util.Tools.namedThreads;
29
30/**
31 * Manages connections to other controller cluster nodes.
32 */
33public class ConnectionManager implements MessageSender {
34
35 private final Logger log = LoggerFactory.getLogger(getClass());
36
37 private static final long CONNECTION_CUSTODIAN_DELAY = 1000L;
38 private static final long CONNECTION_CUSTODIAN_FREQUENCY = 5000;
39
40 private static final long START_TIMEOUT = 1000;
41 private static final int WORKERS = 3;
42
43 private ClusterConnectionListener connectionListener;
44 private List<ClusterIOWorker> workers = new ArrayList<>(WORKERS);
45
46 private final DefaultControllerNode localNode;
47 private final ClusterNodesDelegate nodesDelegate;
48 private final CommunicationsDelegate commsDelegate;
49 private final SerializationService serializationService;
50
51 // Nodes to be monitored to make sure they have a connection.
52 private final Set<DefaultControllerNode> nodes = new HashSet<>();
53
54 // Means to track message streams to other nodes.
55 private final Map<NodeId, ClusterMessageStream> streams = new ConcurrentHashMap<>();
56
57 // Executor pools for listening and managing connections to other nodes.
58 private final ExecutorService listenExecutor =
59 Executors.newSingleThreadExecutor(namedThreads("onos-comm-listen"));
60 private final ExecutorService commExecutors =
61 Executors.newFixedThreadPool(WORKERS, namedThreads("onos-comm-cluster"));
62 private final ExecutorService heartbeatExecutor =
63 Executors.newSingleThreadExecutor(namedThreads("onos-comm-heartbeat"));
64
65 private final Timer timer = new Timer("onos-comm-initiator");
66 private final TimerTask connectionCustodian = new ConnectionCustodian();
67
68 private final WorkerFinder workerFinder = new LeastUtilitiedWorkerFinder();
69
70
71 /**
72 * Creates a new connection manager.
73 */
74 ConnectionManager(DefaultControllerNode localNode,
75 ClusterNodesDelegate nodesDelegate,
76 CommunicationsDelegate commsDelegate,
77 SerializationService serializationService) {
78 this.localNode = localNode;
79 this.nodesDelegate = nodesDelegate;
80 this.commsDelegate = commsDelegate;
81 this.serializationService = serializationService;
82
83 commsDelegate.setSender(this);
84 startCommunications();
85 startListening();
86 startInitiating();
87 log.info("Started");
88 }
89
90 /**
91 * Shuts down the connection manager.
92 */
93 void shutdown() {
94 connectionListener.shutdown();
95 for (ClusterIOWorker worker : workers) {
96 worker.shutdown();
97 }
98 log.info("Stopped");
99 }
100
101 /**
102 * Adds the node to the list of monitored nodes.
103 *
104 * @param node node to be added
105 */
106 void addNode(DefaultControllerNode node) {
107 nodes.add(node);
108 }
109
110 /**
111 * Removes the node from the list of monitored nodes.
112 *
113 * @param node node to be removed
114 */
115 void removeNode(DefaultControllerNode node) {
116 nodes.remove(node);
117 ClusterMessageStream stream = streams.remove(node.id());
118 if (stream != null) {
119 stream.close();
120 }
121 }
122
123 /**
124 * Removes the stream associated with the specified node.
125 *
126 * @param node node whose stream to remove
127 */
128 void removeNodeStream(DefaultControllerNode node) {
129 nodesDelegate.nodeVanished(node);
130 streams.remove(node.id());
131 }
132
133 @Override
134 public boolean send(NodeId nodeId, ClusterMessage message) {
135 ClusterMessageStream stream = streams.get(nodeId);
136 if (stream != null) {
137 try {
138 stream.write(message);
139 return true;
140 } catch (IOException e) {
141 log.warn("Unable to send a message about {} to node {}",
142 message.subject(), nodeId);
143 }
144 }
145 return false;
146 }
147
148 /**
149 * Kicks off the IO loops and waits for them to startup.
150 */
151 private void startCommunications() {
152 HelloMessage hello = new HelloMessage(localNode.id(), localNode.ip(),
153 localNode.tcpPort());
154 for (int i = 0; i < WORKERS; i++) {
155 try {
156 ClusterIOWorker worker =
157 new ClusterIOWorker(this, commsDelegate,
158 serializationService, hello);
159 workers.add(worker);
160 commExecutors.execute(worker);
161 } catch (IOException e) {
162 log.warn("Unable to start communication worker", e);
163 }
164 }
165
166 // Wait for the IO loops to start
167 for (ClusterIOWorker loop : workers) {
168 if (!loop.awaitStart(START_TIMEOUT)) {
169 log.warn("Comm loop did not start on-time; moving on...");
170 }
171 }
172 }
173
174 /**
175 * Starts listening for connections from peer cluster members.
176 */
177 private void startListening() {
178 try {
179 connectionListener =
180 new ClusterConnectionListener(localNode.ip(), localNode.tcpPort(),
181 workerFinder);
182 listenExecutor.execute(connectionListener);
183 if (!connectionListener.awaitStart(START_TIMEOUT)) {
184 log.warn("Listener did not start on-time; moving on...");
185 }
186 } catch (IOException e) {
187 log.error("Unable to listen for cluster connections", e);
188 }
189 }
190
191 /**
192 * Initiates open connection request and registers the pending socket
193 * channel with the given IO loop.
194 *
195 * @param loop loop with which the channel should be registered
196 * @throws java.io.IOException if the socket could not be open or connected
197 */
198 private void initiateConnection(DefaultControllerNode node,
199 ClusterIOWorker loop) throws IOException {
200 SocketAddress sa = new InetSocketAddress(getByAddress(node.ip().toOctets()), node.tcpPort());
201 SocketChannel ch = SocketChannel.open();
202 ch.configureBlocking(false);
203 ch.connect(sa);
204 loop.connectStream(ch);
205 }
206
207
208 /**
209 * Attempts to connect to any nodes that do not have an associated connection.
210 */
211 private void startInitiating() {
212 timer.schedule(connectionCustodian, CONNECTION_CUSTODIAN_DELAY,
213 CONNECTION_CUSTODIAN_FREQUENCY);
214 }
215
216 // Sweeps through all controller nodes and attempts to open connection to
217 // those that presently do not have one.
218 private class ConnectionCustodian extends TimerTask {
219 @Override
220 public void run() {
221 for (DefaultControllerNode node : nodes) {
222 if (node != localNode && !streams.containsKey(node.id())) {
223 try {
224 initiateConnection(node, workerFinder.findWorker());
225 } catch (IOException e) {
226 log.debug("Unable to connect", e);
227 }
228 }
229 }
230 }
231 }
232
233 // Finds the least utilitied IO loop.
234 private class LeastUtilitiedWorkerFinder implements WorkerFinder {
235
236 @Override
237 public ClusterIOWorker findWorker() {
238 ClusterIOWorker leastUtilized = null;
239 int minCount = Integer.MAX_VALUE;
240 for (ClusterIOWorker worker : workers) {
241 int count = worker.streamCount();
242 if (count == 0) {
243 return worker;
244 }
245
246 if (count < minCount) {
247 leastUtilized = worker;
248 minCount = count;
249 }
250 }
251 return leastUtilized;
252 }
253 }
254
255}