Working on the cluster i/o
diff --git a/core/store/dist/src/main/java/org/onlab/onos/store/cluster/impl/DistributedClusterStore.java b/core/store/dist/src/main/java/org/onlab/onos/store/cluster/impl/DistributedClusterStore.java
index 5cd9d9e..ae04226 100644
--- a/core/store/dist/src/main/java/org/onlab/onos/store/cluster/impl/DistributedClusterStore.java
+++ b/core/store/dist/src/main/java/org/onlab/onos/store/cluster/impl/DistributedClusterStore.java
@@ -4,10 +4,9 @@
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.ReferenceCardinality;
import org.apache.felix.scr.annotations.Service;
-import org.onlab.nio.AcceptorLoop;
-import org.onlab.nio.IOLoop;
-import org.onlab.nio.MessageStream;
import org.onlab.onos.cluster.ClusterEvent;
import org.onlab.onos.cluster.ClusterStore;
import org.onlab.onos.cluster.ClusterStoreDelegate;
@@ -15,33 +14,18 @@
import org.onlab.onos.cluster.DefaultControllerNode;
import org.onlab.onos.cluster.NodeId;
import org.onlab.onos.store.AbstractStore;
+import org.onlab.onos.store.cluster.messaging.SerializationService;
import org.onlab.packet.IpPrefix;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.net.InetSocketAddress;
-import java.net.Socket;
-import java.net.SocketAddress;
-import java.nio.channels.ByteChannel;
-import java.nio.channels.SelectionKey;
-import java.nio.channels.ServerSocketChannel;
-import java.nio.channels.SocketChannel;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Map;
-import java.util.Objects;
import java.util.Set;
-import java.util.Timer;
-import java.util.TimerTask;
import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import static java.net.InetAddress.getByAddress;
import static org.onlab.onos.cluster.ControllerNode.State;
import static org.onlab.packet.IpPrefix.valueOf;
-import static org.onlab.util.Tools.namedThreads;
/**
* Distributed implementation of the cluster nodes store.
@@ -52,146 +36,69 @@
extends AbstractStore<ClusterEvent, ClusterStoreDelegate>
implements ClusterStore {
- private static final int HELLO_MSG = 1;
- private static final int ECHO_MSG = 2;
-
private final Logger log = LoggerFactory.getLogger(getClass());
- private static final long CONNECTION_CUSTODIAN_DELAY = 1000L;
- private static final long CONNECTION_CUSTODIAN_FREQUENCY = 5000;
-
- private static final long START_TIMEOUT = 1000;
- private static final long SELECT_TIMEOUT = 50;
- private static final int WORKERS = 3;
- private static final int COMM_BUFFER_SIZE = 32 * 1024;
- private static final int COMM_IDLE_TIME = 500;
-
- private static final boolean SO_NO_DELAY = false;
- private static final int SO_SEND_BUFFER_SIZE = COMM_BUFFER_SIZE;
- private static final int SO_RCV_BUFFER_SIZE = COMM_BUFFER_SIZE;
-
- private DefaultControllerNode self;
+ private DefaultControllerNode localNode;
private final Map<NodeId, DefaultControllerNode> nodes = new ConcurrentHashMap<>();
private final Map<NodeId, State> states = new ConcurrentHashMap<>();
- // Means to track message streams to other nodes.
- private final Map<NodeId, TLVMessageStream> streams = new ConcurrentHashMap<>();
- private final Map<SocketChannel, DefaultControllerNode> nodesByChannel = new ConcurrentHashMap<>();
+ @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
+ private CommunicationsDelegate commsDelegate;
- // Executor pools for listening and managing connections to other nodes.
- private final ExecutorService listenExecutor =
- Executors.newSingleThreadExecutor(namedThreads("onos-comm-listen"));
- private final ExecutorService commExecutors =
- Executors.newFixedThreadPool(WORKERS, namedThreads("onos-comm-cluster"));
- private final ExecutorService heartbeatExecutor =
- Executors.newSingleThreadExecutor(namedThreads("onos-comm-heartbeat"));
+ @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
+ private SerializationService serializationService;
- private final Timer timer = new Timer("onos-comm-initiator");
- private final TimerTask connectionCustodian = new ConnectionCustodian();
-
- private ListenLoop listenLoop;
- private List<CommLoop> commLoops = new ArrayList<>(WORKERS);
+ private final ClusterNodesDelegate nodesDelegate = new InnerNodesDelegate();
+ private ConnectionManager connectionManager;
@Activate
public void activate() {
loadClusterDefinition();
- startCommunications();
- startListening();
- startInitiating();
+ establishSelfIdentity();
+ connectionManager = new ConnectionManager(localNode, nodesDelegate,
+ commsDelegate, serializationService);
log.info("Started");
}
@Deactivate
public void deactivate() {
- listenLoop.shutdown();
- for (CommLoop loop : commLoops) {
- loop.shutdown();
- }
log.info("Stopped");
}
- // Loads the cluster definition file
+ /**
+ * Loads the cluster definition file.
+ */
private void loadClusterDefinition() {
-// ClusterDefinitionStore cds = new ClusterDefinitionStore("../config/cluster.json");
-// try {
-// Set<DefaultControllerNode> storedNodes = cds.read();
-// for (DefaultControllerNode node : storedNodes) {
-// nodes.put(node.id(), node);
-// }
-// } catch (IOException e) {
-// log.error("Unable to read cluster definitions", e);
-// }
-
- // Establishes the controller's own identity.
- IpPrefix ip = valueOf(System.getProperty("onos.ip", "127.0.1.1"));
- self = nodes.get(new NodeId(ip.toString()));
-
- // As a fall-back, let's make sure we at least know who we are.
- if (self == null) {
- self = new DefaultControllerNode(new NodeId(ip.toString()), ip);
- nodes.put(self.id(), self);
- states.put(self.id(), State.ACTIVE);
- }
- }
-
- // Kicks off the IO loops.
- private void startCommunications() {
- for (int i = 0; i < WORKERS; i++) {
- try {
- CommLoop loop = new CommLoop();
- commLoops.add(loop);
- commExecutors.execute(loop);
- } catch (IOException e) {
- log.warn("Unable to start comm IO loop", e);
- }
- }
-
- // Wait for the IO loops to start
- for (CommLoop loop : commLoops) {
- if (!loop.awaitStart(START_TIMEOUT)) {
- log.warn("Comm loop did not start on-time; moving on...");
- }
- }
- }
-
- // Starts listening for connections from peer cluster members.
- private void startListening() {
+ ClusterDefinitionStore cds = new ClusterDefinitionStore("../config/cluster.json");
try {
- listenLoop = new ListenLoop(self.ip(), self.tcpPort());
- listenExecutor.execute(listenLoop);
- if (!listenLoop.awaitStart(START_TIMEOUT)) {
- log.warn("Listen loop did not start on-time; moving on...");
+ Set<DefaultControllerNode> storedNodes = cds.read();
+ for (DefaultControllerNode node : storedNodes) {
+ nodes.put(node.id(), node);
}
} catch (IOException e) {
- log.error("Unable to listen for cluster connections", e);
+ log.error("Unable to read cluster definitions", e);
}
}
/**
- * Initiates open connection request and registers the pending socket
- * channel with the given IO loop.
- *
- * @param loop loop with which the channel should be registered
- * @throws java.io.IOException if the socket could not be open or connected
+ * Determines who the local controller node is.
*/
- private void openConnection(DefaultControllerNode node, CommLoop loop) throws IOException {
- SocketAddress sa = new InetSocketAddress(getByAddress(node.ip().toOctets()), node.tcpPort());
- SocketChannel ch = SocketChannel.open();
- nodesByChannel.put(ch, node);
- ch.configureBlocking(false);
- ch.connect(sa);
- loop.connectStream(ch);
- }
+ private void establishSelfIdentity() {
+ // Establishes the controller's own identity.
+ IpPrefix ip = valueOf(System.getProperty("onos.ip", "127.0.1.1"));
+ localNode = nodes.get(new NodeId(ip.toString()));
-
- // Attempts to connect to any nodes that do not have an associated connection.
- private void startInitiating() {
- timer.schedule(connectionCustodian, CONNECTION_CUSTODIAN_DELAY, CONNECTION_CUSTODIAN_FREQUENCY);
+ // As a fall-back, let's make sure we at least know who we are.
+ if (localNode == null) {
+ localNode = new DefaultControllerNode(new NodeId(ip.toString()), ip);
+ nodes.put(localNode.id(), localNode);
+ states.put(localNode.id(), State.ACTIVE);
+ }
}
@Override
public ControllerNode getLocalNode() {
- return self;
+ return localNode;
}
@Override
@@ -215,179 +122,29 @@
public ControllerNode addNode(NodeId nodeId, IpPrefix ip, int tcpPort) {
DefaultControllerNode node = new DefaultControllerNode(nodeId, ip, tcpPort);
nodes.put(nodeId, node);
+ connectionManager.addNode(node);
return node;
}
@Override
public void removeNode(NodeId nodeId) {
- nodes.remove(nodeId);
- TLVMessageStream stream = streams.remove(nodeId);
- if (stream != null) {
- stream.close();
+ DefaultControllerNode node = nodes.remove(nodeId);
+ if (node != null) {
+ connectionManager.removeNode(node);
}
}
- // Listens and accepts inbound connections from other cluster nodes.
- private class ListenLoop extends AcceptorLoop {
- ListenLoop(IpPrefix ip, int tcpPort) throws IOException {
- super(SELECT_TIMEOUT, new InetSocketAddress(getByAddress(ip.toOctets()), tcpPort));
+ // Entity to handle back calls from the connection manager.
+ private class InnerNodesDelegate implements ClusterNodesDelegate {
+ @Override
+ public void nodeDetected(DefaultControllerNode node) {
+ nodes.put(node.id(), node);
+ states.put(node.id(), State.ACTIVE);
}
@Override
- protected void acceptConnection(ServerSocketChannel channel) throws IOException {
- SocketChannel sc = channel.accept();
- sc.configureBlocking(false);
-
- Socket so = sc.socket();
- so.setTcpNoDelay(SO_NO_DELAY);
- so.setReceiveBufferSize(SO_RCV_BUFFER_SIZE);
- so.setSendBufferSize(SO_SEND_BUFFER_SIZE);
-
- findLeastUtilizedLoop().acceptStream(sc);
+ public void nodeVanished(DefaultControllerNode node) {
+ states.put(node.id(), State.INACTIVE);
}
}
-
- private class CommLoop extends IOLoop<TLVMessage, TLVMessageStream> {
- CommLoop() throws IOException {
- super(SELECT_TIMEOUT);
- }
-
- @Override
- protected TLVMessageStream createStream(ByteChannel byteChannel) {
- return new TLVMessageStream(this, byteChannel, COMM_BUFFER_SIZE, COMM_IDLE_TIME);
- }
-
- @Override
- protected void processMessages(List<TLVMessage> messages, MessageStream<TLVMessage> stream) {
- TLVMessageStream tlvStream = (TLVMessageStream) stream;
- for (TLVMessage message : messages) {
- // TODO: add type-based dispatching here... this is just a hack to get going
- if (message.type() == HELLO_MSG) {
- processHello(message, tlvStream);
- } else if (message.type() == ECHO_MSG) {
- processEcho(message, tlvStream);
- } else {
- log.info("Deal with other messages");
- }
- }
- }
-
- @Override
- public TLVMessageStream acceptStream(SocketChannel channel) {
- TLVMessageStream stream = super.acceptStream(channel);
- try {
- InetSocketAddress sa = (InetSocketAddress) channel.getRemoteAddress();
- log.info("Accepted connection from node {}", valueOf(sa.getAddress().getAddress()));
- stream.write(createHello(self));
-
- } catch (IOException e) {
- log.warn("Unable to accept connection from an unknown end-point", e);
- }
- return stream;
- }
-
- @Override
- public TLVMessageStream connectStream(SocketChannel channel) {
- TLVMessageStream stream = super.connectStream(channel);
- DefaultControllerNode node = nodesByChannel.get(channel);
- if (node != null) {
- log.debug("Opened connection to node {}", node.id());
- nodesByChannel.remove(channel);
- }
- return stream;
- }
-
- @Override
- protected void connect(SelectionKey key) throws IOException {
- try {
- super.connect(key);
- TLVMessageStream stream = (TLVMessageStream) key.attachment();
- send(stream, createHello(self));
- } catch (IOException e) {
- if (!Objects.equals(e.getMessage(), "Connection refused")) {
- throw e;
- }
- }
- }
-
- @Override
- protected void removeStream(MessageStream<TLVMessage> stream) {
- DefaultControllerNode node = ((TLVMessageStream) stream).node();
- if (node != null) {
- log.info("Closed connection to node {}", node.id());
- states.put(node.id(), State.INACTIVE);
- streams.remove(node.id());
- }
- super.removeStream(stream);
- }
- }
-
- // Processes a HELLO message from a peer controller node.
- private void processHello(TLVMessage message, TLVMessageStream stream) {
- // FIXME: pure hack for now
- String data = new String(message.data());
- String[] fields = data.split(":");
- DefaultControllerNode node = new DefaultControllerNode(new NodeId(fields[0]),
- valueOf(fields[1]),
- Integer.parseInt(fields[2]));
- stream.setNode(node);
- nodes.put(node.id(), node);
- streams.put(node.id(), stream);
- states.put(node.id(), State.ACTIVE);
- }
-
- // Processes an ECHO message from a peer controller node.
- private void processEcho(TLVMessage message, TLVMessageStream tlvStream) {
- // TODO: implement heart-beat refresh
- log.info("Dealing with echoes...");
- }
-
- // Sends message to the specified stream.
- private void send(TLVMessageStream stream, TLVMessage message) {
- try {
- stream.write(message);
- } catch (IOException e) {
- log.warn("Unable to send message to {}", stream.node().id());
- }
- }
-
- // Creates a hello message to be sent to a peer controller node.
- private TLVMessage createHello(DefaultControllerNode self) {
- return new TLVMessage(HELLO_MSG, (self.id() + ":" + self.ip() + ":" + self.tcpPort()).getBytes());
- }
-
- // Sweeps through all controller nodes and attempts to open connection to
- // those that presently do not have one.
- private class ConnectionCustodian extends TimerTask {
- @Override
- public void run() {
- for (DefaultControllerNode node : nodes.values()) {
- if (node != self && !streams.containsKey(node.id())) {
- try {
- openConnection(node, findLeastUtilizedLoop());
- } catch (IOException e) {
- log.debug("Unable to connect", e);
- }
- }
- }
- }
- }
-
- // Finds the least utilities IO loop.
- private CommLoop findLeastUtilizedLoop() {
- CommLoop leastUtilized = null;
- int minCount = Integer.MAX_VALUE;
- for (CommLoop loop : commLoops) {
- int count = loop.streamCount();
- if (count == 0) {
- return loop;
- }
-
- if (count < minCount) {
- leastUtilized = loop;
- minCount = count;
- }
- }
- return leastUtilized;
- }
}