tom | 1d416c5 | 2014-09-29 20:55:24 -0700 | [diff] [blame] | 1 | package org.onlab.onos.store.cluster.impl; |
| 2 | |
| 3 | import org.onlab.onos.cluster.DefaultControllerNode; |
| 4 | import org.onlab.onos.cluster.NodeId; |
| 5 | import org.onlab.onos.store.cluster.messaging.ClusterMessage; |
| 6 | import org.onlab.onos.store.cluster.messaging.ClusterMessageStream; |
| 7 | import org.onlab.onos.store.cluster.messaging.HelloMessage; |
| 8 | import org.onlab.onos.store.cluster.messaging.SerializationService; |
| 9 | import org.slf4j.Logger; |
| 10 | import org.slf4j.LoggerFactory; |
| 11 | |
| 12 | import java.io.IOException; |
| 13 | import java.net.InetSocketAddress; |
| 14 | import java.net.SocketAddress; |
| 15 | import java.nio.channels.SocketChannel; |
| 16 | import java.util.ArrayList; |
| 17 | import java.util.HashSet; |
| 18 | import java.util.List; |
| 19 | import java.util.Map; |
| 20 | import java.util.Set; |
| 21 | import java.util.Timer; |
| 22 | import java.util.TimerTask; |
| 23 | import java.util.concurrent.ConcurrentHashMap; |
| 24 | import java.util.concurrent.ExecutorService; |
| 25 | import java.util.concurrent.Executors; |
| 26 | |
| 27 | import static java.net.InetAddress.getByAddress; |
| 28 | import static org.onlab.util.Tools.namedThreads; |
| 29 | |
| 30 | /** |
| 31 | * Manages connections to other controller cluster nodes. |
| 32 | */ |
| 33 | public class ConnectionManager implements MessageSender { |
| 34 | |
| 35 | private final Logger log = LoggerFactory.getLogger(getClass()); |
| 36 | |
| 37 | private static final long CONNECTION_CUSTODIAN_DELAY = 1000L; |
| 38 | private static final long CONNECTION_CUSTODIAN_FREQUENCY = 5000; |
| 39 | |
| 40 | private static final long START_TIMEOUT = 1000; |
| 41 | private static final int WORKERS = 3; |
| 42 | |
| 43 | private ClusterConnectionListener connectionListener; |
| 44 | private List<ClusterIOWorker> workers = new ArrayList<>(WORKERS); |
| 45 | |
| 46 | private final DefaultControllerNode localNode; |
| 47 | private final ClusterNodesDelegate nodesDelegate; |
| 48 | private final CommunicationsDelegate commsDelegate; |
| 49 | private final SerializationService serializationService; |
| 50 | |
| 51 | // Nodes to be monitored to make sure they have a connection. |
| 52 | private final Set<DefaultControllerNode> nodes = new HashSet<>(); |
| 53 | |
| 54 | // Means to track message streams to other nodes. |
| 55 | private final Map<NodeId, ClusterMessageStream> streams = new ConcurrentHashMap<>(); |
| 56 | |
| 57 | // Executor pools for listening and managing connections to other nodes. |
| 58 | private final ExecutorService listenExecutor = |
| 59 | Executors.newSingleThreadExecutor(namedThreads("onos-comm-listen")); |
| 60 | private final ExecutorService commExecutors = |
| 61 | Executors.newFixedThreadPool(WORKERS, namedThreads("onos-comm-cluster")); |
| 62 | private final ExecutorService heartbeatExecutor = |
| 63 | Executors.newSingleThreadExecutor(namedThreads("onos-comm-heartbeat")); |
| 64 | |
| 65 | private final Timer timer = new Timer("onos-comm-initiator"); |
| 66 | private final TimerTask connectionCustodian = new ConnectionCustodian(); |
| 67 | |
| 68 | private final WorkerFinder workerFinder = new LeastUtilitiedWorkerFinder(); |
| 69 | |
| 70 | |
| 71 | /** |
| 72 | * Creates a new connection manager. |
| 73 | */ |
| 74 | ConnectionManager(DefaultControllerNode localNode, |
| 75 | ClusterNodesDelegate nodesDelegate, |
| 76 | CommunicationsDelegate commsDelegate, |
| 77 | SerializationService serializationService) { |
| 78 | this.localNode = localNode; |
| 79 | this.nodesDelegate = nodesDelegate; |
| 80 | this.commsDelegate = commsDelegate; |
| 81 | this.serializationService = serializationService; |
| 82 | |
| 83 | commsDelegate.setSender(this); |
| 84 | startCommunications(); |
| 85 | startListening(); |
| 86 | startInitiating(); |
| 87 | log.info("Started"); |
| 88 | } |
| 89 | |
| 90 | /** |
| 91 | * Shuts down the connection manager. |
| 92 | */ |
| 93 | void shutdown() { |
| 94 | connectionListener.shutdown(); |
| 95 | for (ClusterIOWorker worker : workers) { |
| 96 | worker.shutdown(); |
| 97 | } |
| 98 | log.info("Stopped"); |
| 99 | } |
| 100 | |
| 101 | /** |
| 102 | * Adds the node to the list of monitored nodes. |
| 103 | * |
| 104 | * @param node node to be added |
| 105 | */ |
| 106 | void addNode(DefaultControllerNode node) { |
| 107 | nodes.add(node); |
| 108 | } |
| 109 | |
| 110 | /** |
| 111 | * Removes the node from the list of monitored nodes. |
| 112 | * |
| 113 | * @param node node to be removed |
| 114 | */ |
| 115 | void removeNode(DefaultControllerNode node) { |
| 116 | nodes.remove(node); |
| 117 | ClusterMessageStream stream = streams.remove(node.id()); |
| 118 | if (stream != null) { |
| 119 | stream.close(); |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | /** |
| 124 | * Removes the stream associated with the specified node. |
| 125 | * |
| 126 | * @param node node whose stream to remove |
| 127 | */ |
| 128 | void removeNodeStream(DefaultControllerNode node) { |
| 129 | nodesDelegate.nodeVanished(node); |
| 130 | streams.remove(node.id()); |
| 131 | } |
| 132 | |
| 133 | @Override |
| 134 | public boolean send(NodeId nodeId, ClusterMessage message) { |
| 135 | ClusterMessageStream stream = streams.get(nodeId); |
| 136 | if (stream != null) { |
| 137 | try { |
| 138 | stream.write(message); |
| 139 | return true; |
| 140 | } catch (IOException e) { |
| 141 | log.warn("Unable to send a message about {} to node {}", |
| 142 | message.subject(), nodeId); |
| 143 | } |
| 144 | } |
| 145 | return false; |
| 146 | } |
| 147 | |
| 148 | /** |
| 149 | * Kicks off the IO loops and waits for them to startup. |
| 150 | */ |
| 151 | private void startCommunications() { |
| 152 | HelloMessage hello = new HelloMessage(localNode.id(), localNode.ip(), |
| 153 | localNode.tcpPort()); |
| 154 | for (int i = 0; i < WORKERS; i++) { |
| 155 | try { |
| 156 | ClusterIOWorker worker = |
| 157 | new ClusterIOWorker(this, commsDelegate, |
| 158 | serializationService, hello); |
| 159 | workers.add(worker); |
| 160 | commExecutors.execute(worker); |
| 161 | } catch (IOException e) { |
| 162 | log.warn("Unable to start communication worker", e); |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | // Wait for the IO loops to start |
| 167 | for (ClusterIOWorker loop : workers) { |
| 168 | if (!loop.awaitStart(START_TIMEOUT)) { |
| 169 | log.warn("Comm loop did not start on-time; moving on..."); |
| 170 | } |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | /** |
| 175 | * Starts listening for connections from peer cluster members. |
| 176 | */ |
| 177 | private void startListening() { |
| 178 | try { |
| 179 | connectionListener = |
| 180 | new ClusterConnectionListener(localNode.ip(), localNode.tcpPort(), |
| 181 | workerFinder); |
| 182 | listenExecutor.execute(connectionListener); |
| 183 | if (!connectionListener.awaitStart(START_TIMEOUT)) { |
| 184 | log.warn("Listener did not start on-time; moving on..."); |
| 185 | } |
| 186 | } catch (IOException e) { |
| 187 | log.error("Unable to listen for cluster connections", e); |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | /** |
| 192 | * Initiates open connection request and registers the pending socket |
| 193 | * channel with the given IO loop. |
| 194 | * |
| 195 | * @param loop loop with which the channel should be registered |
| 196 | * @throws java.io.IOException if the socket could not be open or connected |
| 197 | */ |
| 198 | private void initiateConnection(DefaultControllerNode node, |
| 199 | ClusterIOWorker loop) throws IOException { |
| 200 | SocketAddress sa = new InetSocketAddress(getByAddress(node.ip().toOctets()), node.tcpPort()); |
| 201 | SocketChannel ch = SocketChannel.open(); |
| 202 | ch.configureBlocking(false); |
| 203 | ch.connect(sa); |
| 204 | loop.connectStream(ch); |
| 205 | } |
| 206 | |
| 207 | |
| 208 | /** |
| 209 | * Attempts to connect to any nodes that do not have an associated connection. |
| 210 | */ |
| 211 | private void startInitiating() { |
| 212 | timer.schedule(connectionCustodian, CONNECTION_CUSTODIAN_DELAY, |
| 213 | CONNECTION_CUSTODIAN_FREQUENCY); |
| 214 | } |
| 215 | |
| 216 | // Sweeps through all controller nodes and attempts to open connection to |
| 217 | // those that presently do not have one. |
| 218 | private class ConnectionCustodian extends TimerTask { |
| 219 | @Override |
| 220 | public void run() { |
| 221 | for (DefaultControllerNode node : nodes) { |
| 222 | if (node != localNode && !streams.containsKey(node.id())) { |
| 223 | try { |
| 224 | initiateConnection(node, workerFinder.findWorker()); |
| 225 | } catch (IOException e) { |
| 226 | log.debug("Unable to connect", e); |
| 227 | } |
| 228 | } |
| 229 | } |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | // Finds the least utilitied IO loop. |
| 234 | private class LeastUtilitiedWorkerFinder implements WorkerFinder { |
| 235 | |
| 236 | @Override |
| 237 | public ClusterIOWorker findWorker() { |
| 238 | ClusterIOWorker leastUtilized = null; |
| 239 | int minCount = Integer.MAX_VALUE; |
| 240 | for (ClusterIOWorker worker : workers) { |
| 241 | int count = worker.streamCount(); |
| 242 | if (count == 0) { |
| 243 | return worker; |
| 244 | } |
| 245 | |
| 246 | if (count < minCount) { |
| 247 | leastUtilized = worker; |
| 248 | minCount = count; |
| 249 | } |
| 250 | } |
| 251 | return leastUtilized; |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | } |