blob: ac8692b71a01fea4899452ddb8c5061bf3cccde4 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
Ray Milkey34c95902015-04-15 09:47:53 -07002 * Copyright 2014-2015 Open Networking Laboratory
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Brian O'Connorabafb502014-12-02 22:26:20 -080016package org.onosproject.store.cluster.impl;
tom2d7c65f2014-09-23 01:09:35 -070017
HIGUCHI Yuta1979f552015-12-28 21:24:26 -080018import com.google.common.base.MoreObjects;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070019import com.google.common.collect.ImmutableSet;
20import com.google.common.collect.Maps;
tom2d7c65f2014-09-23 01:09:35 -070021import org.apache.felix.scr.annotations.Activate;
22import org.apache.felix.scr.annotations.Component;
23import org.apache.felix.scr.annotations.Deactivate;
Madan Jampaniafeebbd2015-05-19 15:26:01 -070024import org.apache.felix.scr.annotations.Reference;
25import org.apache.felix.scr.annotations.ReferenceCardinality;
tom2d7c65f2014-09-23 01:09:35 -070026import org.apache.felix.scr.annotations.Service;
Madan Jampani7d2fab22015-03-18 17:21:57 -070027import org.joda.time.DateTime;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080028import org.onlab.packet.IpAddress;
29import org.onlab.util.KryoNamespace;
Brian O'Connorabafb502014-12-02 22:26:20 -080030import org.onosproject.cluster.ClusterEvent;
Madan Jampaniec1df022015-10-13 21:23:03 -070031import org.onosproject.cluster.ClusterMetadataService;
Brian O'Connorabafb502014-12-02 22:26:20 -080032import org.onosproject.cluster.ClusterStore;
33import org.onosproject.cluster.ClusterStoreDelegate;
34import org.onosproject.cluster.ControllerNode;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080035import org.onosproject.cluster.ControllerNode.State;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070036import org.onosproject.cluster.DefaultControllerNode;
Brian O'Connorabafb502014-12-02 22:26:20 -080037import org.onosproject.cluster.NodeId;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080038import org.onosproject.store.AbstractStore;
Madan Jampanic26eede2015-04-16 11:42:16 -070039import org.onosproject.store.cluster.messaging.Endpoint;
Madan Jampaniafeebbd2015-05-19 15:26:01 -070040import org.onosproject.store.cluster.messaging.MessagingService;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080041import org.onosproject.store.serializers.KryoNamespaces;
42import org.onosproject.store.serializers.KryoSerializer;
43import org.slf4j.Logger;
tom2d7c65f2014-09-23 01:09:35 -070044
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070045import java.util.Map;
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -070046import java.util.Objects;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070047import java.util.Set;
48import java.util.concurrent.ExecutorService;
49import java.util.concurrent.Executors;
50import java.util.concurrent.ScheduledExecutorService;
51import java.util.concurrent.TimeUnit;
Madan Jampanid36def02016-01-13 11:21:56 -080052import java.util.function.BiConsumer;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070053import java.util.stream.Collectors;
54
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070055import static com.google.common.base.Preconditions.checkNotNull;
56import static org.onlab.util.Tools.groupedThreads;
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -070057import static org.onosproject.cluster.ClusterEvent.Type.INSTANCE_ACTIVATED;
58import static org.onosproject.cluster.ClusterEvent.Type.INSTANCE_DEACTIVATED;
59import static org.onosproject.cluster.ClusterEvent.Type.INSTANCE_READY;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070060import static org.slf4j.LoggerFactory.getLogger;
tom2d7c65f2014-09-23 01:09:35 -070061
tom2d7c65f2014-09-23 01:09:35 -070062@Component(immediate = true)
63@Service
Ayaka Koshibedd91b842015-03-02 14:48:47 -080064/**
65 * Distributed cluster nodes store that employs an accrual failure
66 * detector to identify cluster member up/down status.
67 */
tom0755a362014-09-24 11:54:43 -070068public class DistributedClusterStore
Ayaka Koshibedd91b842015-03-02 14:48:47 -080069 extends AbstractStore<ClusterEvent, ClusterStoreDelegate>
tomb41d1ac2014-09-24 01:51:24 -070070 implements ClusterStore {
tom2d7c65f2014-09-23 01:09:35 -070071
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070072 private static final Logger log = getLogger(DistributedClusterStore.class);
tom2d7c65f2014-09-23 01:09:35 -070073
Thomas Vachuskade563cf2015-04-01 00:28:50 -070074 public static final String HEARTBEAT_MESSAGE = "onos-cluster-heartbeat";
75
Ayaka Koshibedd91b842015-03-02 14:48:47 -080076 // TODO: make these configurable.
Ayaka Koshibedd91b842015-03-02 14:48:47 -080077 private static final int HEARTBEAT_INTERVAL_MS = 100;
78 private static final int PHI_FAILURE_THRESHOLD = 10;
tom2d7c65f2014-09-23 01:09:35 -070079
Ayaka Koshibedd91b842015-03-02 14:48:47 -080080 private static final KryoSerializer SERIALIZER = new KryoSerializer() {
81 @Override
82 protected void setupKryoPool() {
83 serializerPool = KryoNamespace.newBuilder()
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070084 .register(KryoNamespaces.API)
85 .register(HeartbeatMessage.class)
86 .build()
87 .populate(1);
Ayaka Koshibedd91b842015-03-02 14:48:47 -080088 }
89 };
90
91 private static final String INSTANCE_ID_NULL = "Instance ID cannot be null";
92
Ayaka Koshibedd91b842015-03-02 14:48:47 -080093 private final Map<NodeId, ControllerNode> allNodes = Maps.newConcurrentMap();
94 private final Map<NodeId, State> nodeStates = Maps.newConcurrentMap();
Madan Jampani7d2fab22015-03-18 17:21:57 -070095 private final Map<NodeId, DateTime> nodeStateLastUpdatedTimes = Maps.newConcurrentMap();
Thomas Vachuska7a8de842016-03-07 20:56:35 -080096
Ayaka Koshibedd91b842015-03-02 14:48:47 -080097 private ScheduledExecutorService heartBeatSender = Executors.newSingleThreadScheduledExecutor(
98 groupedThreads("onos/cluster/membership", "heartbeat-sender"));
99 private ExecutorService heartBeatMessageHandler = Executors.newSingleThreadExecutor(
100 groupedThreads("onos/cluster/membership", "heartbeat-receiver"));
101
102 private PhiAccrualFailureDetector failureDetector;
103
104 private ControllerNode localNode;
105
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700106 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
Madan Jampaniec1df022015-10-13 21:23:03 -0700107 protected ClusterMetadataService clusterMetadataService;
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700108
109 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
110 protected MessagingService messagingService;
111
tom2d7c65f2014-09-23 01:09:35 -0700112 @Activate
113 public void activate() {
Madan Jampaniec1df022015-10-13 21:23:03 -0700114 localNode = clusterMetadataService.getLocalNode();
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700115
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800116 messagingService.registerHandler(HEARTBEAT_MESSAGE,
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700117 new HeartbeatMessageHandler(), heartBeatMessageHandler);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800118
119 failureDetector = new PhiAccrualFailureDetector();
120
121 heartBeatSender.scheduleWithFixedDelay(this::heartbeat, 0,
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700122 HEARTBEAT_INTERVAL_MS, TimeUnit.MILLISECONDS);
tomb41d1ac2014-09-24 01:51:24 -0700123
124 log.info("Started");
125 }
126
tom2d7c65f2014-09-23 01:09:35 -0700127 @Deactivate
128 public void deactivate() {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700129 messagingService.unregisterHandler(HEARTBEAT_MESSAGE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800130 heartBeatSender.shutdownNow();
131 heartBeatMessageHandler.shutdownNow();
132
tom2d7c65f2014-09-23 01:09:35 -0700133 log.info("Stopped");
134 }
135
136 @Override
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800137 public void setDelegate(ClusterStoreDelegate delegate) {
138 checkNotNull(delegate, "Delegate cannot be null");
139 this.delegate = delegate;
140 }
141
142 @Override
143 public void unsetDelegate(ClusterStoreDelegate delegate) {
144 this.delegate = null;
145 }
146
147 @Override
148 public boolean hasDelegate() {
149 return this.delegate != null;
150 }
151
152 @Override
tom2d7c65f2014-09-23 01:09:35 -0700153 public ControllerNode getLocalNode() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800154 return localNode;
tom2d7c65f2014-09-23 01:09:35 -0700155 }
156
157 @Override
158 public Set<ControllerNode> getNodes() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800159 return ImmutableSet.copyOf(allNodes.values());
tom2d7c65f2014-09-23 01:09:35 -0700160 }
161
162 @Override
163 public ControllerNode getNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800164 checkNotNull(nodeId, INSTANCE_ID_NULL);
165 return allNodes.get(nodeId);
tom2d7c65f2014-09-23 01:09:35 -0700166 }
167
168 @Override
tomb41d1ac2014-09-24 01:51:24 -0700169 public State getState(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800170 checkNotNull(nodeId, INSTANCE_ID_NULL);
HIGUCHI Yuta1979f552015-12-28 21:24:26 -0800171 return MoreObjects.firstNonNull(nodeStates.get(nodeId), State.INACTIVE);
tomb41d1ac2014-09-24 01:51:24 -0700172 }
173
174 @Override
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800175 public void markFullyStarted(boolean started) {
176 updateState(localNode.id(), started ? State.READY : State.ACTIVE);
177 }
178
179 @Override
Pavlin Radoslavov444b5192014-10-28 10:45:19 -0700180 public ControllerNode addNode(NodeId nodeId, IpAddress ip, int tcpPort) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800181 ControllerNode node = new DefaultControllerNode(nodeId, ip, tcpPort);
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700182 addNode(node);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800183 return node;
tomee49c372014-09-26 15:14:50 -0700184 }
185
186 @Override
tomb41d1ac2014-09-24 01:51:24 -0700187 public void removeNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800188 checkNotNull(nodeId, INSTANCE_ID_NULL);
189 ControllerNode node = allNodes.remove(nodeId);
190 if (node != null) {
191 nodeStates.remove(nodeId);
Jonathan Hartf1141262015-04-23 11:27:07 -0700192 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_REMOVED, node));
tomb41d1ac2014-09-24 01:51:24 -0700193 }
194 }
195
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700196 private void addNode(ControllerNode node) {
197 allNodes.put(node.id(), node);
Madan Jampaniec1df022015-10-13 21:23:03 -0700198 updateState(node.id(), node.equals(localNode) ? State.ACTIVE : State.INACTIVE);
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700199 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_ADDED, node));
Thomas Vachuskade563cf2015-04-01 00:28:50 -0700200 }
201
Madan Jampani7d2fab22015-03-18 17:21:57 -0700202 private void updateState(NodeId nodeId, State newState) {
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700203 State currentState = nodeStates.get(nodeId);
204 if (!Objects.equals(currentState, newState)) {
205 nodeStates.put(nodeId, newState);
206 nodeStateLastUpdatedTimes.put(nodeId, DateTime.now());
207 notifyStateChange(nodeId, currentState, newState);
208 }
Madan Jampani7d2fab22015-03-18 17:21:57 -0700209 }
210
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800211 private void heartbeat() {
212 try {
213 Set<ControllerNode> peers = allNodes.values()
214 .stream()
215 .filter(node -> !(node.id().equals(localNode.id())))
216 .collect(Collectors.toSet());
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800217 State state = nodeStates.get(localNode.id());
218 byte[] hbMessagePayload = SERIALIZER.encode(new HeartbeatMessage(localNode, state, peers));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800219 peers.forEach((node) -> {
220 heartbeatToPeer(hbMessagePayload, node);
221 State currentState = nodeStates.get(node.id());
222 double phi = failureDetector.phi(node.id());
223 if (phi >= PHI_FAILURE_THRESHOLD) {
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800224 if (currentState.isActive()) {
Madan Jampani7d2fab22015-03-18 17:21:57 -0700225 updateState(node.id(), State.INACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800226 }
227 } else {
228 if (currentState == State.INACTIVE) {
Madan Jampani7d2fab22015-03-18 17:21:57 -0700229 updateState(node.id(), State.ACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800230 }
231 }
232 });
233 } catch (Exception e) {
234 log.debug("Failed to send heartbeat", e);
235 }
tomb41d1ac2014-09-24 01:51:24 -0700236 }
237
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800238 private void notifyStateChange(NodeId nodeId, State oldState, State newState) {
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700239 if (oldState != newState) {
240 ControllerNode node = allNodes.get(nodeId);
241 ClusterEvent.Type type = newState == State.READY ? INSTANCE_READY :
242 newState == State.ACTIVE ? INSTANCE_ACTIVATED :
243 INSTANCE_DEACTIVATED;
244 notifyDelegate(new ClusterEvent(type, node));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800245 }
tomb41d1ac2014-09-24 01:51:24 -0700246 }
247
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800248 private void heartbeatToPeer(byte[] messagePayload, ControllerNode peer) {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700249 Endpoint remoteEp = new Endpoint(peer.ip(), peer.tcpPort());
Madan Jampani175e8fd2015-05-20 14:10:45 -0700250 messagingService.sendAsync(remoteEp, HEARTBEAT_MESSAGE, messagePayload).whenComplete((result, error) -> {
251 if (error != null) {
252 log.trace("Sending heartbeat to {} failed", remoteEp, error);
253 }
254 });
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800255 }
256
Madan Jampanid36def02016-01-13 11:21:56 -0800257 private class HeartbeatMessageHandler implements BiConsumer<Endpoint, byte[]> {
tomb41d1ac2014-09-24 01:51:24 -0700258 @Override
Madan Jampanid36def02016-01-13 11:21:56 -0800259 public void accept(Endpoint sender, byte[] message) {
Madan Jampanic26eede2015-04-16 11:42:16 -0700260 HeartbeatMessage hb = SERIALIZER.decode(message);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800261 failureDetector.report(hb.source().id());
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800262 updateState(hb.source().id(), hb.state);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800263 hb.knownPeers().forEach(node -> {
264 allNodes.put(node.id(), node);
265 });
tomb41d1ac2014-09-24 01:51:24 -0700266 }
tom2d7c65f2014-09-23 01:09:35 -0700267 }
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800268
269 private static class HeartbeatMessage {
270 private ControllerNode source;
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800271 private State state;
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800272 private Set<ControllerNode> knownPeers;
273
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800274 public HeartbeatMessage(ControllerNode source, State state, Set<ControllerNode> members) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800275 this.source = source;
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800276 this.state = state != null ? state : State.ACTIVE;
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800277 this.knownPeers = ImmutableSet.copyOf(members);
278 }
279
280 public ControllerNode source() {
281 return source;
282 }
283
284 public Set<ControllerNode> knownPeers() {
285 return knownPeers;
286 }
287 }
288
Madan Jampani7d2fab22015-03-18 17:21:57 -0700289 @Override
290 public DateTime getLastUpdated(NodeId nodeId) {
291 return nodeStateLastUpdatedTimes.get(nodeId);
292 }
Thomas Vachuskade563cf2015-04-01 00:28:50 -0700293
Jonathan Hart4a4d18f2015-03-26 12:16:16 -0700294}