blob: b2ee8327e59281d13e55ccbd7b3d04c98ca71bb9 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
Ray Milkey34c95902015-04-15 09:47:53 -07002 * Copyright 2014-2015 Open Networking Laboratory
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Brian O'Connorabafb502014-12-02 22:26:20 -080016package org.onosproject.store.cluster.impl;
tom2d7c65f2014-09-23 01:09:35 -070017
HIGUCHI Yuta1979f552015-12-28 21:24:26 -080018import com.google.common.base.MoreObjects;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070019import com.google.common.collect.ImmutableSet;
20import com.google.common.collect.Maps;
Madan Jampanic26eede2015-04-16 11:42:16 -070021
tom2d7c65f2014-09-23 01:09:35 -070022import org.apache.felix.scr.annotations.Activate;
23import org.apache.felix.scr.annotations.Component;
24import org.apache.felix.scr.annotations.Deactivate;
Madan Jampaniafeebbd2015-05-19 15:26:01 -070025import org.apache.felix.scr.annotations.Reference;
26import org.apache.felix.scr.annotations.ReferenceCardinality;
tom2d7c65f2014-09-23 01:09:35 -070027import org.apache.felix.scr.annotations.Service;
Madan Jampani7d2fab22015-03-18 17:21:57 -070028import org.joda.time.DateTime;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080029import org.onlab.packet.IpAddress;
30import org.onlab.util.KryoNamespace;
Brian O'Connorabafb502014-12-02 22:26:20 -080031import org.onosproject.cluster.ClusterEvent;
Madan Jampaniec1df022015-10-13 21:23:03 -070032import org.onosproject.cluster.ClusterMetadataService;
Brian O'Connorabafb502014-12-02 22:26:20 -080033import org.onosproject.cluster.ClusterStore;
34import org.onosproject.cluster.ClusterStoreDelegate;
35import org.onosproject.cluster.ControllerNode;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080036import org.onosproject.cluster.ControllerNode.State;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070037import org.onosproject.cluster.DefaultControllerNode;
Brian O'Connorabafb502014-12-02 22:26:20 -080038import org.onosproject.cluster.NodeId;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080039import org.onosproject.store.AbstractStore;
Madan Jampanic26eede2015-04-16 11:42:16 -070040import org.onosproject.store.cluster.messaging.Endpoint;
Madan Jampaniafeebbd2015-05-19 15:26:01 -070041import org.onosproject.store.cluster.messaging.MessagingService;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080042import org.onosproject.store.serializers.KryoNamespaces;
43import org.onosproject.store.serializers.KryoSerializer;
44import org.slf4j.Logger;
tom2d7c65f2014-09-23 01:09:35 -070045
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070046import java.util.Map;
47import java.util.Set;
48import java.util.concurrent.ExecutorService;
49import java.util.concurrent.Executors;
50import java.util.concurrent.ScheduledExecutorService;
51import java.util.concurrent.TimeUnit;
Madan Jampanic26eede2015-04-16 11:42:16 -070052import java.util.function.Consumer;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070053import java.util.stream.Collectors;
54
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070055import static com.google.common.base.Preconditions.checkNotNull;
56import static org.onlab.util.Tools.groupedThreads;
57import static org.slf4j.LoggerFactory.getLogger;
tom2d7c65f2014-09-23 01:09:35 -070058
tom2d7c65f2014-09-23 01:09:35 -070059@Component(immediate = true)
60@Service
Ayaka Koshibedd91b842015-03-02 14:48:47 -080061/**
62 * Distributed cluster nodes store that employs an accrual failure
63 * detector to identify cluster member up/down status.
64 */
tom0755a362014-09-24 11:54:43 -070065public class DistributedClusterStore
Ayaka Koshibedd91b842015-03-02 14:48:47 -080066 extends AbstractStore<ClusterEvent, ClusterStoreDelegate>
tomb41d1ac2014-09-24 01:51:24 -070067 implements ClusterStore {
tom2d7c65f2014-09-23 01:09:35 -070068
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070069 private static final Logger log = getLogger(DistributedClusterStore.class);
tom2d7c65f2014-09-23 01:09:35 -070070
Thomas Vachuskade563cf2015-04-01 00:28:50 -070071 public static final String HEARTBEAT_MESSAGE = "onos-cluster-heartbeat";
72
Ayaka Koshibedd91b842015-03-02 14:48:47 -080073 // TODO: make these configurable.
Ayaka Koshibedd91b842015-03-02 14:48:47 -080074 private static final int HEARTBEAT_INTERVAL_MS = 100;
75 private static final int PHI_FAILURE_THRESHOLD = 10;
tom2d7c65f2014-09-23 01:09:35 -070076
Ayaka Koshibedd91b842015-03-02 14:48:47 -080077 private static final KryoSerializer SERIALIZER = new KryoSerializer() {
78 @Override
79 protected void setupKryoPool() {
80 serializerPool = KryoNamespace.newBuilder()
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070081 .register(KryoNamespaces.API)
82 .register(HeartbeatMessage.class)
83 .build()
84 .populate(1);
Ayaka Koshibedd91b842015-03-02 14:48:47 -080085 }
86 };
87
88 private static final String INSTANCE_ID_NULL = "Instance ID cannot be null";
89
Ayaka Koshibedd91b842015-03-02 14:48:47 -080090 private final Map<NodeId, ControllerNode> allNodes = Maps.newConcurrentMap();
91 private final Map<NodeId, State> nodeStates = Maps.newConcurrentMap();
Madan Jampani7d2fab22015-03-18 17:21:57 -070092 private final Map<NodeId, DateTime> nodeStateLastUpdatedTimes = Maps.newConcurrentMap();
Ayaka Koshibedd91b842015-03-02 14:48:47 -080093 private ScheduledExecutorService heartBeatSender = Executors.newSingleThreadScheduledExecutor(
94 groupedThreads("onos/cluster/membership", "heartbeat-sender"));
95 private ExecutorService heartBeatMessageHandler = Executors.newSingleThreadExecutor(
96 groupedThreads("onos/cluster/membership", "heartbeat-receiver"));
97
98 private PhiAccrualFailureDetector failureDetector;
99
100 private ControllerNode localNode;
101
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700102 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
Madan Jampaniec1df022015-10-13 21:23:03 -0700103 protected ClusterMetadataService clusterMetadataService;
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700104
105 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
106 protected MessagingService messagingService;
107
tom2d7c65f2014-09-23 01:09:35 -0700108 @Activate
109 public void activate() {
Madan Jampaniec1df022015-10-13 21:23:03 -0700110 localNode = clusterMetadataService.getLocalNode();
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700111
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800112 messagingService.registerHandler(HEARTBEAT_MESSAGE,
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700113 new HeartbeatMessageHandler(), heartBeatMessageHandler);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800114
115 failureDetector = new PhiAccrualFailureDetector();
116
117 heartBeatSender.scheduleWithFixedDelay(this::heartbeat, 0,
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700118 HEARTBEAT_INTERVAL_MS, TimeUnit.MILLISECONDS);
tomb41d1ac2014-09-24 01:51:24 -0700119
120 log.info("Started");
121 }
122
tom2d7c65f2014-09-23 01:09:35 -0700123 @Deactivate
124 public void deactivate() {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700125 messagingService.unregisterHandler(HEARTBEAT_MESSAGE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800126 heartBeatSender.shutdownNow();
127 heartBeatMessageHandler.shutdownNow();
128
tom2d7c65f2014-09-23 01:09:35 -0700129 log.info("Stopped");
130 }
131
132 @Override
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800133 public void setDelegate(ClusterStoreDelegate delegate) {
134 checkNotNull(delegate, "Delegate cannot be null");
135 this.delegate = delegate;
136 }
137
138 @Override
139 public void unsetDelegate(ClusterStoreDelegate delegate) {
140 this.delegate = null;
141 }
142
143 @Override
144 public boolean hasDelegate() {
145 return this.delegate != null;
146 }
147
148 @Override
tom2d7c65f2014-09-23 01:09:35 -0700149 public ControllerNode getLocalNode() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800150 return localNode;
tom2d7c65f2014-09-23 01:09:35 -0700151 }
152
153 @Override
154 public Set<ControllerNode> getNodes() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800155 return ImmutableSet.copyOf(allNodes.values());
tom2d7c65f2014-09-23 01:09:35 -0700156 }
157
158 @Override
159 public ControllerNode getNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800160 checkNotNull(nodeId, INSTANCE_ID_NULL);
161 return allNodes.get(nodeId);
tom2d7c65f2014-09-23 01:09:35 -0700162 }
163
164 @Override
tomb41d1ac2014-09-24 01:51:24 -0700165 public State getState(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800166 checkNotNull(nodeId, INSTANCE_ID_NULL);
HIGUCHI Yuta1979f552015-12-28 21:24:26 -0800167 return MoreObjects.firstNonNull(nodeStates.get(nodeId), State.INACTIVE);
tomb41d1ac2014-09-24 01:51:24 -0700168 }
169
170 @Override
Pavlin Radoslavov444b5192014-10-28 10:45:19 -0700171 public ControllerNode addNode(NodeId nodeId, IpAddress ip, int tcpPort) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800172 ControllerNode node = new DefaultControllerNode(nodeId, ip, tcpPort);
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700173 addNode(node);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800174 return node;
tomee49c372014-09-26 15:14:50 -0700175 }
176
177 @Override
tomb41d1ac2014-09-24 01:51:24 -0700178 public void removeNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800179 checkNotNull(nodeId, INSTANCE_ID_NULL);
180 ControllerNode node = allNodes.remove(nodeId);
181 if (node != null) {
182 nodeStates.remove(nodeId);
Jonathan Hartf1141262015-04-23 11:27:07 -0700183 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_REMOVED, node));
tomb41d1ac2014-09-24 01:51:24 -0700184 }
185 }
186
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700187 private void addNode(ControllerNode node) {
188 allNodes.put(node.id(), node);
Madan Jampaniec1df022015-10-13 21:23:03 -0700189 updateState(node.id(), node.equals(localNode) ? State.ACTIVE : State.INACTIVE);
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700190 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_ADDED, node));
Thomas Vachuskade563cf2015-04-01 00:28:50 -0700191 }
192
Madan Jampani7d2fab22015-03-18 17:21:57 -0700193 private void updateState(NodeId nodeId, State newState) {
194 nodeStates.put(nodeId, newState);
195 nodeStateLastUpdatedTimes.put(nodeId, DateTime.now());
196 }
197
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800198 private void heartbeat() {
199 try {
200 Set<ControllerNode> peers = allNodes.values()
201 .stream()
202 .filter(node -> !(node.id().equals(localNode.id())))
203 .collect(Collectors.toSet());
204 byte[] hbMessagePayload = SERIALIZER.encode(new HeartbeatMessage(localNode, peers));
205 peers.forEach((node) -> {
206 heartbeatToPeer(hbMessagePayload, node);
207 State currentState = nodeStates.get(node.id());
208 double phi = failureDetector.phi(node.id());
209 if (phi >= PHI_FAILURE_THRESHOLD) {
210 if (currentState == State.ACTIVE) {
Madan Jampani7d2fab22015-03-18 17:21:57 -0700211 updateState(node.id(), State.INACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800212 notifyStateChange(node.id(), State.ACTIVE, State.INACTIVE);
213 }
214 } else {
215 if (currentState == State.INACTIVE) {
Madan Jampani7d2fab22015-03-18 17:21:57 -0700216 updateState(node.id(), State.ACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800217 notifyStateChange(node.id(), State.INACTIVE, State.ACTIVE);
218 }
219 }
220 });
221 } catch (Exception e) {
222 log.debug("Failed to send heartbeat", e);
223 }
tomb41d1ac2014-09-24 01:51:24 -0700224 }
225
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800226 private void notifyStateChange(NodeId nodeId, State oldState, State newState) {
227 ControllerNode node = allNodes.get(nodeId);
228 if (newState == State.ACTIVE) {
Jonathan Hartf1141262015-04-23 11:27:07 -0700229 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_ACTIVATED, node));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800230 } else {
Jonathan Hartf1141262015-04-23 11:27:07 -0700231 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_DEACTIVATED, node));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800232 }
tomb41d1ac2014-09-24 01:51:24 -0700233 }
234
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800235 private void heartbeatToPeer(byte[] messagePayload, ControllerNode peer) {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700236 Endpoint remoteEp = new Endpoint(peer.ip(), peer.tcpPort());
Madan Jampani175e8fd2015-05-20 14:10:45 -0700237 messagingService.sendAsync(remoteEp, HEARTBEAT_MESSAGE, messagePayload).whenComplete((result, error) -> {
238 if (error != null) {
239 log.trace("Sending heartbeat to {} failed", remoteEp, error);
240 }
241 });
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800242 }
243
Madan Jampanic26eede2015-04-16 11:42:16 -0700244 private class HeartbeatMessageHandler implements Consumer<byte[]> {
tomb41d1ac2014-09-24 01:51:24 -0700245 @Override
Madan Jampanic26eede2015-04-16 11:42:16 -0700246 public void accept(byte[] message) {
247 HeartbeatMessage hb = SERIALIZER.decode(message);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800248 failureDetector.report(hb.source().id());
249 hb.knownPeers().forEach(node -> {
250 allNodes.put(node.id(), node);
251 });
tomb41d1ac2014-09-24 01:51:24 -0700252 }
tom2d7c65f2014-09-23 01:09:35 -0700253 }
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800254
255 private static class HeartbeatMessage {
256 private ControllerNode source;
257 private Set<ControllerNode> knownPeers;
258
259 public HeartbeatMessage(ControllerNode source, Set<ControllerNode> members) {
260 this.source = source;
261 this.knownPeers = ImmutableSet.copyOf(members);
262 }
263
264 public ControllerNode source() {
265 return source;
266 }
267
268 public Set<ControllerNode> knownPeers() {
269 return knownPeers;
270 }
271 }
272
Madan Jampani7d2fab22015-03-18 17:21:57 -0700273 @Override
274 public DateTime getLastUpdated(NodeId nodeId) {
275 return nodeStateLastUpdatedTimes.get(nodeId);
276 }
Thomas Vachuskade563cf2015-04-01 00:28:50 -0700277
Jonathan Hart4a4d18f2015-03-26 12:16:16 -0700278}