blob: 90fbd60002efa296655f7d1f1bd2c86ff4794550 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
2 * Copyright 2014 Open Networking Laboratory
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Brian O'Connorabafb502014-12-02 22:26:20 -080016package org.onosproject.store.cluster.impl;
tom2d7c65f2014-09-23 01:09:35 -070017
Ayaka Koshibedd91b842015-03-02 14:48:47 -080018import static com.google.common.base.Preconditions.checkArgument;
19import static com.google.common.base.Preconditions.checkNotNull;
20import static org.onlab.util.Tools.groupedThreads;
21import static org.slf4j.LoggerFactory.getLogger;
22
23import java.io.File;
24import java.io.IOException;
25import java.net.InetAddress;
26import java.net.NetworkInterface;
27import java.net.SocketException;
28import java.util.Enumeration;
29import java.util.Map;
30import java.util.Set;
31import java.util.concurrent.ExecutorService;
32import java.util.concurrent.Executors;
33import java.util.concurrent.ScheduledExecutorService;
34import java.util.concurrent.TimeUnit;
35import java.util.stream.Collectors;
Yuta HIGUCHIb5df76d2014-09-27 20:54:00 -070036
tom2d7c65f2014-09-23 01:09:35 -070037import org.apache.felix.scr.annotations.Activate;
38import org.apache.felix.scr.annotations.Component;
39import org.apache.felix.scr.annotations.Deactivate;
tom2d7c65f2014-09-23 01:09:35 -070040import org.apache.felix.scr.annotations.Service;
Madan Jampani7d2fab22015-03-18 17:21:57 -070041import org.joda.time.DateTime;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080042import org.onlab.netty.Endpoint;
43import org.onlab.netty.Message;
44import org.onlab.netty.MessageHandler;
45import org.onlab.netty.NettyMessagingService;
46import org.onlab.packet.IpAddress;
47import org.onlab.util.KryoNamespace;
Brian O'Connorabafb502014-12-02 22:26:20 -080048import org.onosproject.cluster.ClusterEvent;
49import org.onosproject.cluster.ClusterStore;
50import org.onosproject.cluster.ClusterStoreDelegate;
51import org.onosproject.cluster.ControllerNode;
52import org.onosproject.cluster.DefaultControllerNode;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080053import org.onosproject.cluster.ControllerNode.State;
Brian O'Connorabafb502014-12-02 22:26:20 -080054import org.onosproject.cluster.NodeId;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080055import org.onosproject.store.AbstractStore;
56import org.onosproject.store.serializers.KryoNamespaces;
57import org.onosproject.store.serializers.KryoSerializer;
58import org.slf4j.Logger;
tom2d7c65f2014-09-23 01:09:35 -070059
Ayaka Koshibedd91b842015-03-02 14:48:47 -080060import com.google.common.collect.ImmutableSet;
61import com.google.common.collect.Maps;
62import com.hazelcast.util.AddressUtil;
tom2d7c65f2014-09-23 01:09:35 -070063
tom2d7c65f2014-09-23 01:09:35 -070064@Component(immediate = true)
65@Service
Ayaka Koshibedd91b842015-03-02 14:48:47 -080066/**
67 * Distributed cluster nodes store that employs an accrual failure
68 * detector to identify cluster member up/down status.
69 */
tom0755a362014-09-24 11:54:43 -070070public class DistributedClusterStore
Ayaka Koshibedd91b842015-03-02 14:48:47 -080071 extends AbstractStore<ClusterEvent, ClusterStoreDelegate>
tomb41d1ac2014-09-24 01:51:24 -070072 implements ClusterStore {
tom2d7c65f2014-09-23 01:09:35 -070073
Ayaka Koshibedd91b842015-03-02 14:48:47 -080074 private final Logger log = getLogger(DistributedClusterStore.class);
tom2d7c65f2014-09-23 01:09:35 -070075
Ayaka Koshibedd91b842015-03-02 14:48:47 -080076 // TODO: make these configurable.
77 private static final int HEARTBEAT_FD_PORT = 2419;
78 private static final int HEARTBEAT_INTERVAL_MS = 100;
79 private static final int PHI_FAILURE_THRESHOLD = 10;
tom2d7c65f2014-09-23 01:09:35 -070080
Ayaka Koshibedd91b842015-03-02 14:48:47 -080081 private static final String CONFIG_DIR = "../config";
82 private static final String CLUSTER_DEFINITION_FILE = "cluster.json";
83 private static final String HEARTBEAT_MESSAGE = "onos-cluster-heartbeat";
Yuta HIGUCHId1a63e92014-12-02 13:14:28 -080084
Ayaka Koshibedd91b842015-03-02 14:48:47 -080085 private static final KryoSerializer SERIALIZER = new KryoSerializer() {
86 @Override
87 protected void setupKryoPool() {
88 serializerPool = KryoNamespace.newBuilder()
89 .register(KryoNamespaces.API)
90 .register(HeartbeatMessage.class)
91 .build()
92 .populate(1);
93 }
94 };
95
96 private static final String INSTANCE_ID_NULL = "Instance ID cannot be null";
97
98 private ClusterDefinition clusterDefinition;
99
100 private Set<ControllerNode> seedNodes;
101 private final Map<NodeId, ControllerNode> allNodes = Maps.newConcurrentMap();
102 private final Map<NodeId, State> nodeStates = Maps.newConcurrentMap();
Madan Jampani7d2fab22015-03-18 17:21:57 -0700103 private final Map<NodeId, DateTime> nodeStateLastUpdatedTimes = Maps.newConcurrentMap();
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800104 private NettyMessagingService messagingService = new NettyMessagingService();
105 private ScheduledExecutorService heartBeatSender = Executors.newSingleThreadScheduledExecutor(
106 groupedThreads("onos/cluster/membership", "heartbeat-sender"));
107 private ExecutorService heartBeatMessageHandler = Executors.newSingleThreadExecutor(
108 groupedThreads("onos/cluster/membership", "heartbeat-receiver"));
109
110 private PhiAccrualFailureDetector failureDetector;
111
112 private ControllerNode localNode;
113
tom2d7c65f2014-09-23 01:09:35 -0700114 @Activate
115 public void activate() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800116 File clusterDefinitionFile = new File(CONFIG_DIR,
117 CLUSTER_DEFINITION_FILE);
tom2d7c65f2014-09-23 01:09:35 -0700118
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800119 try {
120 clusterDefinition = new ClusterDefinitionStore(
121 clusterDefinitionFile.getPath()).read();
122 seedNodes = ImmutableSet
123 .copyOf(clusterDefinition.getNodes())
124 .stream()
125 .map(nodeInfo -> new DefaultControllerNode(new NodeId(
126 nodeInfo.getId()), IpAddress.valueOf(nodeInfo
127 .getIp()), nodeInfo.getTcpPort()))
128 .collect(Collectors.toSet());
129 } catch (IOException e) {
130 throw new IllegalStateException(
131 "Failed to read cluster definition.", e);
132 }
tomb41d1ac2014-09-24 01:51:24 -0700133
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800134 seedNodes.forEach(node -> {
135 allNodes.put(node.id(), node);
Madan Jampani7d2fab22015-03-18 17:21:57 -0700136 updateState(node.id(), State.INACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800137 });
138
139 establishSelfIdentity();
140
141 messagingService = new NettyMessagingService(HEARTBEAT_FD_PORT);
142
143 try {
144 messagingService.activate();
145 } catch (InterruptedException e) {
146 Thread.currentThread().interrupt();
147 throw new IllegalStateException(
148 "Failed to cleanly initialize membership and"
149 + " failure detector communication channel.", e);
150 }
151 messagingService.registerHandler(HEARTBEAT_MESSAGE,
152 new HeartbeatMessageHandler(), heartBeatMessageHandler);
153
154 failureDetector = new PhiAccrualFailureDetector();
155
156 heartBeatSender.scheduleWithFixedDelay(this::heartbeat, 0,
157 HEARTBEAT_INTERVAL_MS, TimeUnit.MILLISECONDS);
tomb41d1ac2014-09-24 01:51:24 -0700158
159 log.info("Started");
160 }
161
tom2d7c65f2014-09-23 01:09:35 -0700162 @Deactivate
163 public void deactivate() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800164 try {
165 messagingService.deactivate();
166 } catch (Exception e) {
167 log.trace("Failed to cleanly shutdown cluster membership messaging", e);
168 }
169
170 heartBeatSender.shutdownNow();
171 heartBeatMessageHandler.shutdownNow();
172
tom2d7c65f2014-09-23 01:09:35 -0700173 log.info("Stopped");
174 }
175
176 @Override
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800177 public void setDelegate(ClusterStoreDelegate delegate) {
178 checkNotNull(delegate, "Delegate cannot be null");
179 this.delegate = delegate;
180 }
181
182 @Override
183 public void unsetDelegate(ClusterStoreDelegate delegate) {
184 this.delegate = null;
185 }
186
187 @Override
188 public boolean hasDelegate() {
189 return this.delegate != null;
190 }
191
192 @Override
tom2d7c65f2014-09-23 01:09:35 -0700193 public ControllerNode getLocalNode() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800194 return localNode;
tom2d7c65f2014-09-23 01:09:35 -0700195 }
196
197 @Override
198 public Set<ControllerNode> getNodes() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800199 return ImmutableSet.copyOf(allNodes.values());
tom2d7c65f2014-09-23 01:09:35 -0700200 }
201
202 @Override
203 public ControllerNode getNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800204 checkNotNull(nodeId, INSTANCE_ID_NULL);
205 return allNodes.get(nodeId);
tom2d7c65f2014-09-23 01:09:35 -0700206 }
207
208 @Override
tomb41d1ac2014-09-24 01:51:24 -0700209 public State getState(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800210 checkNotNull(nodeId, INSTANCE_ID_NULL);
211 return nodeStates.get(nodeId);
tomb41d1ac2014-09-24 01:51:24 -0700212 }
213
214 @Override
Pavlin Radoslavov444b5192014-10-28 10:45:19 -0700215 public ControllerNode addNode(NodeId nodeId, IpAddress ip, int tcpPort) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800216 checkNotNull(nodeId, INSTANCE_ID_NULL);
217 checkNotNull(ip, "IP address must not be null");
218 checkArgument(tcpPort > 5000, "Tcp port must be greater than 5000");
219 ControllerNode node = new DefaultControllerNode(nodeId, ip, tcpPort);
220 allNodes.put(node.id(), node);
Madan Jampani7d2fab22015-03-18 17:21:57 -0700221 updateState(nodeId, State.INACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800222 delegate.notify(new ClusterEvent(ClusterEvent.Type.INSTANCE_ADDED, node));
223 return node;
tomee49c372014-09-26 15:14:50 -0700224 }
225
226 @Override
tomb41d1ac2014-09-24 01:51:24 -0700227 public void removeNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800228 checkNotNull(nodeId, INSTANCE_ID_NULL);
229 ControllerNode node = allNodes.remove(nodeId);
230 if (node != null) {
231 nodeStates.remove(nodeId);
232 delegate.notify(new ClusterEvent(ClusterEvent.Type.INSTANCE_REMOVED, node));
tomb41d1ac2014-09-24 01:51:24 -0700233 }
234 }
235
Madan Jampani7d2fab22015-03-18 17:21:57 -0700236 private void updateState(NodeId nodeId, State newState) {
237 nodeStates.put(nodeId, newState);
238 nodeStateLastUpdatedTimes.put(nodeId, DateTime.now());
239 }
240
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800241 private void establishSelfIdentity() {
242 try {
243 IpAddress ip = findLocalIp();
244 localNode = new DefaultControllerNode(new NodeId(ip.toString()), ip);
245 allNodes.put(localNode.id(), localNode);
Madan Jampani7d2fab22015-03-18 17:21:57 -0700246 updateState(localNode.id(), State.ACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800247 log.info("Local Node: {}", localNode);
248 } catch (SocketException e) {
249 throw new IllegalStateException("Cannot determine local IP", e);
250 }
tom2d7c65f2014-09-23 01:09:35 -0700251 }
252
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800253 private void heartbeat() {
254 try {
255 Set<ControllerNode> peers = allNodes.values()
256 .stream()
257 .filter(node -> !(node.id().equals(localNode.id())))
258 .collect(Collectors.toSet());
259 byte[] hbMessagePayload = SERIALIZER.encode(new HeartbeatMessage(localNode, peers));
260 peers.forEach((node) -> {
261 heartbeatToPeer(hbMessagePayload, node);
262 State currentState = nodeStates.get(node.id());
263 double phi = failureDetector.phi(node.id());
264 if (phi >= PHI_FAILURE_THRESHOLD) {
265 if (currentState == State.ACTIVE) {
Madan Jampani7d2fab22015-03-18 17:21:57 -0700266 updateState(node.id(), State.INACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800267 notifyStateChange(node.id(), State.ACTIVE, State.INACTIVE);
268 }
269 } else {
270 if (currentState == State.INACTIVE) {
Madan Jampani7d2fab22015-03-18 17:21:57 -0700271 updateState(node.id(), State.ACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800272 notifyStateChange(node.id(), State.INACTIVE, State.ACTIVE);
273 }
274 }
275 });
276 } catch (Exception e) {
277 log.debug("Failed to send heartbeat", e);
278 }
tomb41d1ac2014-09-24 01:51:24 -0700279 }
280
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800281 private void notifyStateChange(NodeId nodeId, State oldState, State newState) {
282 ControllerNode node = allNodes.get(nodeId);
283 if (newState == State.ACTIVE) {
284 delegate.notify(new ClusterEvent(ClusterEvent.Type.INSTANCE_ACTIVATED, node));
285 } else {
286 delegate.notify(new ClusterEvent(ClusterEvent.Type.INSTANCE_DEACTIVATED, node));
287 }
tomb41d1ac2014-09-24 01:51:24 -0700288 }
289
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800290 private void heartbeatToPeer(byte[] messagePayload, ControllerNode peer) {
291 Endpoint remoteEp = new Endpoint(peer.ip(), HEARTBEAT_FD_PORT);
292 try {
293 messagingService.sendAsync(remoteEp, HEARTBEAT_MESSAGE, messagePayload);
294 } catch (IOException e) {
295 log.debug("Sending heartbeat to {} failed", remoteEp, e);
296 }
297 }
298
299 private IpAddress findLocalIp() throws SocketException {
300 Enumeration<NetworkInterface> interfaces =
301 NetworkInterface.getNetworkInterfaces();
302 while (interfaces.hasMoreElements()) {
303 NetworkInterface iface = interfaces.nextElement();
304 Enumeration<InetAddress> inetAddresses = iface.getInetAddresses();
305 while (inetAddresses.hasMoreElements()) {
306 IpAddress ip = IpAddress.valueOf(inetAddresses.nextElement());
307 if (AddressUtil.matchInterface(ip.toString(), clusterDefinition.getIpPrefix())) {
308 return ip;
309 }
310 }
311 }
312 throw new IllegalStateException("Unable to determine local ip");
313 }
314
315 private class HeartbeatMessageHandler implements MessageHandler {
tomb41d1ac2014-09-24 01:51:24 -0700316 @Override
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800317 public void handle(Message message) throws IOException {
318 HeartbeatMessage hb = SERIALIZER.decode(message.payload());
319 failureDetector.report(hb.source().id());
320 hb.knownPeers().forEach(node -> {
321 allNodes.put(node.id(), node);
322 });
tomb41d1ac2014-09-24 01:51:24 -0700323 }
tom2d7c65f2014-09-23 01:09:35 -0700324 }
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800325
326 private static class HeartbeatMessage {
327 private ControllerNode source;
328 private Set<ControllerNode> knownPeers;
329
330 public HeartbeatMessage(ControllerNode source, Set<ControllerNode> members) {
331 this.source = source;
332 this.knownPeers = ImmutableSet.copyOf(members);
333 }
334
335 public ControllerNode source() {
336 return source;
337 }
338
339 public Set<ControllerNode> knownPeers() {
340 return knownPeers;
341 }
342 }
343
Madan Jampani7d2fab22015-03-18 17:21:57 -0700344 @Override
345 public DateTime getLastUpdated(NodeId nodeId) {
346 return nodeStateLastUpdatedTimes.get(nodeId);
347 }
348}