blob: 0472cff30bb1e3c70b062941c41f7ac72a826dc0 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
2 * Copyright 2014 Open Networking Laboratory
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Brian O'Connorabafb502014-12-02 22:26:20 -080016package org.onosproject.store.cluster.impl;
tom2d7c65f2014-09-23 01:09:35 -070017
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070018import com.google.common.collect.ImmutableSet;
19import com.google.common.collect.Maps;
20import com.hazelcast.util.AddressUtil;
tom2d7c65f2014-09-23 01:09:35 -070021import org.apache.felix.scr.annotations.Activate;
22import org.apache.felix.scr.annotations.Component;
23import org.apache.felix.scr.annotations.Deactivate;
tom2d7c65f2014-09-23 01:09:35 -070024import org.apache.felix.scr.annotations.Service;
Madan Jampani7d2fab22015-03-18 17:21:57 -070025import org.joda.time.DateTime;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080026import org.onlab.netty.Endpoint;
27import org.onlab.netty.Message;
28import org.onlab.netty.MessageHandler;
29import org.onlab.netty.NettyMessagingService;
30import org.onlab.packet.IpAddress;
31import org.onlab.util.KryoNamespace;
Brian O'Connorabafb502014-12-02 22:26:20 -080032import org.onosproject.cluster.ClusterEvent;
33import org.onosproject.cluster.ClusterStore;
34import org.onosproject.cluster.ClusterStoreDelegate;
35import org.onosproject.cluster.ControllerNode;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080036import org.onosproject.cluster.ControllerNode.State;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070037import org.onosproject.cluster.DefaultControllerNode;
Brian O'Connorabafb502014-12-02 22:26:20 -080038import org.onosproject.cluster.NodeId;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080039import org.onosproject.store.AbstractStore;
40import org.onosproject.store.serializers.KryoNamespaces;
41import org.onosproject.store.serializers.KryoSerializer;
42import org.slf4j.Logger;
tom2d7c65f2014-09-23 01:09:35 -070043
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070044import java.io.File;
45import java.io.IOException;
46import java.net.InetAddress;
47import java.net.NetworkInterface;
48import java.net.SocketException;
49import java.util.Enumeration;
50import java.util.Map;
51import java.util.Set;
52import java.util.concurrent.ExecutorService;
53import java.util.concurrent.Executors;
54import java.util.concurrent.ScheduledExecutorService;
55import java.util.concurrent.TimeUnit;
56import java.util.stream.Collectors;
57
58import static com.google.common.base.Preconditions.checkArgument;
59import static com.google.common.base.Preconditions.checkNotNull;
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070060import static java.net.NetworkInterface.getNetworkInterfaces;
61import static java.util.Collections.list;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070062import static org.onlab.util.Tools.groupedThreads;
63import static org.slf4j.LoggerFactory.getLogger;
tom2d7c65f2014-09-23 01:09:35 -070064
tom2d7c65f2014-09-23 01:09:35 -070065@Component(immediate = true)
66@Service
Ayaka Koshibedd91b842015-03-02 14:48:47 -080067/**
68 * Distributed cluster nodes store that employs an accrual failure
69 * detector to identify cluster member up/down status.
70 */
tom0755a362014-09-24 11:54:43 -070071public class DistributedClusterStore
Ayaka Koshibedd91b842015-03-02 14:48:47 -080072 extends AbstractStore<ClusterEvent, ClusterStoreDelegate>
tomb41d1ac2014-09-24 01:51:24 -070073 implements ClusterStore {
tom2d7c65f2014-09-23 01:09:35 -070074
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070075 private static final Logger log = getLogger(DistributedClusterStore.class);
tom2d7c65f2014-09-23 01:09:35 -070076
Ayaka Koshibedd91b842015-03-02 14:48:47 -080077 // TODO: make these configurable.
78 private static final int HEARTBEAT_FD_PORT = 2419;
79 private static final int HEARTBEAT_INTERVAL_MS = 100;
80 private static final int PHI_FAILURE_THRESHOLD = 10;
tom2d7c65f2014-09-23 01:09:35 -070081
Ayaka Koshibedd91b842015-03-02 14:48:47 -080082 private static final String CONFIG_DIR = "../config";
83 private static final String CLUSTER_DEFINITION_FILE = "cluster.json";
84 private static final String HEARTBEAT_MESSAGE = "onos-cluster-heartbeat";
Yuta HIGUCHId1a63e92014-12-02 13:14:28 -080085
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070086 public static final int DEFAULT_PORT = 9876;
87
Ayaka Koshibedd91b842015-03-02 14:48:47 -080088 private static final KryoSerializer SERIALIZER = new KryoSerializer() {
89 @Override
90 protected void setupKryoPool() {
91 serializerPool = KryoNamespace.newBuilder()
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070092 .register(KryoNamespaces.API)
93 .register(HeartbeatMessage.class)
94 .build()
95 .populate(1);
Ayaka Koshibedd91b842015-03-02 14:48:47 -080096 }
97 };
98
99 private static final String INSTANCE_ID_NULL = "Instance ID cannot be null";
100
101 private ClusterDefinition clusterDefinition;
102
103 private Set<ControllerNode> seedNodes;
104 private final Map<NodeId, ControllerNode> allNodes = Maps.newConcurrentMap();
105 private final Map<NodeId, State> nodeStates = Maps.newConcurrentMap();
Madan Jampani7d2fab22015-03-18 17:21:57 -0700106 private final Map<NodeId, DateTime> nodeStateLastUpdatedTimes = Maps.newConcurrentMap();
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800107 private NettyMessagingService messagingService = new NettyMessagingService();
108 private ScheduledExecutorService heartBeatSender = Executors.newSingleThreadScheduledExecutor(
109 groupedThreads("onos/cluster/membership", "heartbeat-sender"));
110 private ExecutorService heartBeatMessageHandler = Executors.newSingleThreadExecutor(
111 groupedThreads("onos/cluster/membership", "heartbeat-receiver"));
112
113 private PhiAccrualFailureDetector failureDetector;
114
115 private ControllerNode localNode;
116
tom2d7c65f2014-09-23 01:09:35 -0700117 @Activate
118 public void activate() {
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700119 File clusterDefinitionFile = new File(CONFIG_DIR, CLUSTER_DEFINITION_FILE);
120 ClusterDefinitionStore clusterDefinitionStore =
121 new ClusterDefinitionStore(clusterDefinitionFile.getPath());
122
123 if (!clusterDefinitionFile.exists()) {
124 createDefaultClusterDefinition(clusterDefinitionStore);
125 }
tom2d7c65f2014-09-23 01:09:35 -0700126
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800127 try {
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700128 clusterDefinition = clusterDefinitionStore.read();
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800129 seedNodes = ImmutableSet
130 .copyOf(clusterDefinition.getNodes())
131 .stream()
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700132 .map(nodeInfo -> new DefaultControllerNode(new NodeId(nodeInfo.getId()),
133 IpAddress.valueOf(nodeInfo.getIp()),
134 nodeInfo.getTcpPort()))
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800135 .collect(Collectors.toSet());
136 } catch (IOException e) {
137 throw new IllegalStateException(
138 "Failed to read cluster definition.", e);
139 }
tomb41d1ac2014-09-24 01:51:24 -0700140
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800141 seedNodes.forEach(node -> {
142 allNodes.put(node.id(), node);
Madan Jampani7d2fab22015-03-18 17:21:57 -0700143 updateState(node.id(), State.INACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800144 });
145
146 establishSelfIdentity();
147
148 messagingService = new NettyMessagingService(HEARTBEAT_FD_PORT);
149
150 try {
151 messagingService.activate();
152 } catch (InterruptedException e) {
153 Thread.currentThread().interrupt();
154 throw new IllegalStateException(
155 "Failed to cleanly initialize membership and"
156 + " failure detector communication channel.", e);
157 }
158 messagingService.registerHandler(HEARTBEAT_MESSAGE,
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700159 new HeartbeatMessageHandler(), heartBeatMessageHandler);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800160
161 failureDetector = new PhiAccrualFailureDetector();
162
163 heartBeatSender.scheduleWithFixedDelay(this::heartbeat, 0,
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700164 HEARTBEAT_INTERVAL_MS, TimeUnit.MILLISECONDS);
tomb41d1ac2014-09-24 01:51:24 -0700165
166 log.info("Started");
167 }
168
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700169 private void createDefaultClusterDefinition(ClusterDefinitionStore store) {
170 // Assumes IPv4 is returned.
171 String ip = DistributedClusterStore.getSiteLocalAddress();
172 String ipPrefix = ip.replaceFirst("\\.[0-9]*$", ".*");
173 NodeInfo node = NodeInfo.from(ip, ip, DEFAULT_PORT);
174 try {
175 store.write(ClusterDefinition.from(ImmutableSet.of(node), ipPrefix));
176 } catch (IOException e) {
177 log.warn("Unable to write default cluster definition", e);
178 }
179 }
180
181 /**
182 * Returns the site local address if one can be found, loopback otherwise.
183 *
184 * @return site-local address in string form
185 */
186 public static String getSiteLocalAddress() {
187 try {
188 for (NetworkInterface nif : list(getNetworkInterfaces())) {
189 for (InetAddress address : list(nif.getInetAddresses())) {
190 if (address.getAddress()[0] == (byte) 0xC0) {
191 return address.toString().substring(1);
192 }
193 }
194 }
195 return InetAddress.getLoopbackAddress().toString().substring(1);
196
197 } catch (SocketException e) {
198 log.error("Unable to get network interfaces", e);
199 }
200
201 return null;
202 }
203
tom2d7c65f2014-09-23 01:09:35 -0700204 @Deactivate
205 public void deactivate() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800206 try {
207 messagingService.deactivate();
208 } catch (Exception e) {
209 log.trace("Failed to cleanly shutdown cluster membership messaging", e);
210 }
211
212 heartBeatSender.shutdownNow();
213 heartBeatMessageHandler.shutdownNow();
214
tom2d7c65f2014-09-23 01:09:35 -0700215 log.info("Stopped");
216 }
217
218 @Override
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800219 public void setDelegate(ClusterStoreDelegate delegate) {
220 checkNotNull(delegate, "Delegate cannot be null");
221 this.delegate = delegate;
222 }
223
224 @Override
225 public void unsetDelegate(ClusterStoreDelegate delegate) {
226 this.delegate = null;
227 }
228
229 @Override
230 public boolean hasDelegate() {
231 return this.delegate != null;
232 }
233
234 @Override
tom2d7c65f2014-09-23 01:09:35 -0700235 public ControllerNode getLocalNode() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800236 return localNode;
tom2d7c65f2014-09-23 01:09:35 -0700237 }
238
239 @Override
240 public Set<ControllerNode> getNodes() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800241 return ImmutableSet.copyOf(allNodes.values());
tom2d7c65f2014-09-23 01:09:35 -0700242 }
243
244 @Override
245 public ControllerNode getNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800246 checkNotNull(nodeId, INSTANCE_ID_NULL);
247 return allNodes.get(nodeId);
tom2d7c65f2014-09-23 01:09:35 -0700248 }
249
250 @Override
tomb41d1ac2014-09-24 01:51:24 -0700251 public State getState(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800252 checkNotNull(nodeId, INSTANCE_ID_NULL);
253 return nodeStates.get(nodeId);
tomb41d1ac2014-09-24 01:51:24 -0700254 }
255
256 @Override
Pavlin Radoslavov444b5192014-10-28 10:45:19 -0700257 public ControllerNode addNode(NodeId nodeId, IpAddress ip, int tcpPort) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800258 checkNotNull(nodeId, INSTANCE_ID_NULL);
259 checkNotNull(ip, "IP address must not be null");
260 checkArgument(tcpPort > 5000, "Tcp port must be greater than 5000");
261 ControllerNode node = new DefaultControllerNode(nodeId, ip, tcpPort);
262 allNodes.put(node.id(), node);
Madan Jampani7d2fab22015-03-18 17:21:57 -0700263 updateState(nodeId, State.INACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800264 delegate.notify(new ClusterEvent(ClusterEvent.Type.INSTANCE_ADDED, node));
265 return node;
tomee49c372014-09-26 15:14:50 -0700266 }
267
268 @Override
tomb41d1ac2014-09-24 01:51:24 -0700269 public void removeNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800270 checkNotNull(nodeId, INSTANCE_ID_NULL);
271 ControllerNode node = allNodes.remove(nodeId);
272 if (node != null) {
273 nodeStates.remove(nodeId);
274 delegate.notify(new ClusterEvent(ClusterEvent.Type.INSTANCE_REMOVED, node));
tomb41d1ac2014-09-24 01:51:24 -0700275 }
276 }
277
Madan Jampani7d2fab22015-03-18 17:21:57 -0700278 private void updateState(NodeId nodeId, State newState) {
279 nodeStates.put(nodeId, newState);
280 nodeStateLastUpdatedTimes.put(nodeId, DateTime.now());
281 }
282
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800283 private void establishSelfIdentity() {
284 try {
285 IpAddress ip = findLocalIp();
286 localNode = new DefaultControllerNode(new NodeId(ip.toString()), ip);
287 allNodes.put(localNode.id(), localNode);
Madan Jampani7d2fab22015-03-18 17:21:57 -0700288 updateState(localNode.id(), State.ACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800289 log.info("Local Node: {}", localNode);
290 } catch (SocketException e) {
291 throw new IllegalStateException("Cannot determine local IP", e);
292 }
tom2d7c65f2014-09-23 01:09:35 -0700293 }
294
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800295 private void heartbeat() {
296 try {
297 Set<ControllerNode> peers = allNodes.values()
298 .stream()
299 .filter(node -> !(node.id().equals(localNode.id())))
300 .collect(Collectors.toSet());
301 byte[] hbMessagePayload = SERIALIZER.encode(new HeartbeatMessage(localNode, peers));
302 peers.forEach((node) -> {
303 heartbeatToPeer(hbMessagePayload, node);
304 State currentState = nodeStates.get(node.id());
305 double phi = failureDetector.phi(node.id());
306 if (phi >= PHI_FAILURE_THRESHOLD) {
307 if (currentState == State.ACTIVE) {
Madan Jampani7d2fab22015-03-18 17:21:57 -0700308 updateState(node.id(), State.INACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800309 notifyStateChange(node.id(), State.ACTIVE, State.INACTIVE);
310 }
311 } else {
312 if (currentState == State.INACTIVE) {
Madan Jampani7d2fab22015-03-18 17:21:57 -0700313 updateState(node.id(), State.ACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800314 notifyStateChange(node.id(), State.INACTIVE, State.ACTIVE);
315 }
316 }
317 });
318 } catch (Exception e) {
319 log.debug("Failed to send heartbeat", e);
320 }
tomb41d1ac2014-09-24 01:51:24 -0700321 }
322
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800323 private void notifyStateChange(NodeId nodeId, State oldState, State newState) {
324 ControllerNode node = allNodes.get(nodeId);
325 if (newState == State.ACTIVE) {
326 delegate.notify(new ClusterEvent(ClusterEvent.Type.INSTANCE_ACTIVATED, node));
327 } else {
328 delegate.notify(new ClusterEvent(ClusterEvent.Type.INSTANCE_DEACTIVATED, node));
329 }
tomb41d1ac2014-09-24 01:51:24 -0700330 }
331
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800332 private void heartbeatToPeer(byte[] messagePayload, ControllerNode peer) {
333 Endpoint remoteEp = new Endpoint(peer.ip(), HEARTBEAT_FD_PORT);
334 try {
335 messagingService.sendAsync(remoteEp, HEARTBEAT_MESSAGE, messagePayload);
336 } catch (IOException e) {
Jonathan Hart4a4d18f2015-03-26 12:16:16 -0700337 log.trace("Sending heartbeat to {} failed", remoteEp, e);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800338 }
339 }
340
341 private IpAddress findLocalIp() throws SocketException {
342 Enumeration<NetworkInterface> interfaces =
343 NetworkInterface.getNetworkInterfaces();
344 while (interfaces.hasMoreElements()) {
345 NetworkInterface iface = interfaces.nextElement();
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700346 Enumeration<InetAddress> inetAddresses = iface.getInetAddresses();
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800347 while (inetAddresses.hasMoreElements()) {
348 IpAddress ip = IpAddress.valueOf(inetAddresses.nextElement());
349 if (AddressUtil.matchInterface(ip.toString(), clusterDefinition.getIpPrefix())) {
350 return ip;
351 }
352 }
353 }
354 throw new IllegalStateException("Unable to determine local ip");
355 }
356
357 private class HeartbeatMessageHandler implements MessageHandler {
tomb41d1ac2014-09-24 01:51:24 -0700358 @Override
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800359 public void handle(Message message) throws IOException {
360 HeartbeatMessage hb = SERIALIZER.decode(message.payload());
361 failureDetector.report(hb.source().id());
362 hb.knownPeers().forEach(node -> {
363 allNodes.put(node.id(), node);
364 });
tomb41d1ac2014-09-24 01:51:24 -0700365 }
tom2d7c65f2014-09-23 01:09:35 -0700366 }
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800367
368 private static class HeartbeatMessage {
369 private ControllerNode source;
370 private Set<ControllerNode> knownPeers;
371
372 public HeartbeatMessage(ControllerNode source, Set<ControllerNode> members) {
373 this.source = source;
374 this.knownPeers = ImmutableSet.copyOf(members);
375 }
376
377 public ControllerNode source() {
378 return source;
379 }
380
381 public Set<ControllerNode> knownPeers() {
382 return knownPeers;
383 }
384 }
385
Madan Jampani7d2fab22015-03-18 17:21:57 -0700386 @Override
387 public DateTime getLastUpdated(NodeId nodeId) {
388 return nodeStateLastUpdatedTimes.get(nodeId);
389 }
Jonathan Hart4a4d18f2015-03-26 12:16:16 -0700390}