blob: 859efebf1adb8dffbf40660dcf0a8728c02675e2 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
Ray Milkey34c95902015-04-15 09:47:53 -07002 * Copyright 2014-2015 Open Networking Laboratory
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Brian O'Connorabafb502014-12-02 22:26:20 -080016package org.onosproject.store.cluster.impl;
tom2d7c65f2014-09-23 01:09:35 -070017
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070018import com.google.common.collect.ImmutableSet;
19import com.google.common.collect.Maps;
Madan Jampanic26eede2015-04-16 11:42:16 -070020
tom2d7c65f2014-09-23 01:09:35 -070021import org.apache.felix.scr.annotations.Activate;
22import org.apache.felix.scr.annotations.Component;
23import org.apache.felix.scr.annotations.Deactivate;
Madan Jampaniafeebbd2015-05-19 15:26:01 -070024import org.apache.felix.scr.annotations.Reference;
25import org.apache.felix.scr.annotations.ReferenceCardinality;
tom2d7c65f2014-09-23 01:09:35 -070026import org.apache.felix.scr.annotations.Service;
Madan Jampani7d2fab22015-03-18 17:21:57 -070027import org.joda.time.DateTime;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080028import org.onlab.packet.IpAddress;
29import org.onlab.util.KryoNamespace;
Madan Jampaniafeebbd2015-05-19 15:26:01 -070030import org.onosproject.cluster.ClusterDefinitionService;
Brian O'Connorabafb502014-12-02 22:26:20 -080031import org.onosproject.cluster.ClusterEvent;
32import org.onosproject.cluster.ClusterStore;
33import org.onosproject.cluster.ClusterStoreDelegate;
34import org.onosproject.cluster.ControllerNode;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080035import org.onosproject.cluster.ControllerNode.State;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070036import org.onosproject.cluster.DefaultControllerNode;
Brian O'Connorabafb502014-12-02 22:26:20 -080037import org.onosproject.cluster.NodeId;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080038import org.onosproject.store.AbstractStore;
Madan Jampanic26eede2015-04-16 11:42:16 -070039import org.onosproject.store.cluster.messaging.Endpoint;
Madan Jampaniafeebbd2015-05-19 15:26:01 -070040import org.onosproject.store.cluster.messaging.MessagingService;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080041import org.onosproject.store.serializers.KryoNamespaces;
42import org.onosproject.store.serializers.KryoSerializer;
43import org.slf4j.Logger;
tom2d7c65f2014-09-23 01:09:35 -070044
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070045import java.util.Map;
46import java.util.Set;
47import java.util.concurrent.ExecutorService;
48import java.util.concurrent.Executors;
49import java.util.concurrent.ScheduledExecutorService;
50import java.util.concurrent.TimeUnit;
Madan Jampanic26eede2015-04-16 11:42:16 -070051import java.util.function.Consumer;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070052import java.util.stream.Collectors;
53
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070054import static com.google.common.base.Preconditions.checkNotNull;
55import static org.onlab.util.Tools.groupedThreads;
56import static org.slf4j.LoggerFactory.getLogger;
tom2d7c65f2014-09-23 01:09:35 -070057
tom2d7c65f2014-09-23 01:09:35 -070058@Component(immediate = true)
59@Service
Ayaka Koshibedd91b842015-03-02 14:48:47 -080060/**
61 * Distributed cluster nodes store that employs an accrual failure
62 * detector to identify cluster member up/down status.
63 */
tom0755a362014-09-24 11:54:43 -070064public class DistributedClusterStore
Ayaka Koshibedd91b842015-03-02 14:48:47 -080065 extends AbstractStore<ClusterEvent, ClusterStoreDelegate>
tomb41d1ac2014-09-24 01:51:24 -070066 implements ClusterStore {
tom2d7c65f2014-09-23 01:09:35 -070067
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070068 private static final Logger log = getLogger(DistributedClusterStore.class);
tom2d7c65f2014-09-23 01:09:35 -070069
Thomas Vachuskade563cf2015-04-01 00:28:50 -070070 public static final String HEARTBEAT_MESSAGE = "onos-cluster-heartbeat";
71
Ayaka Koshibedd91b842015-03-02 14:48:47 -080072 // TODO: make these configurable.
Ayaka Koshibedd91b842015-03-02 14:48:47 -080073 private static final int HEARTBEAT_INTERVAL_MS = 100;
74 private static final int PHI_FAILURE_THRESHOLD = 10;
tom2d7c65f2014-09-23 01:09:35 -070075
Ayaka Koshibedd91b842015-03-02 14:48:47 -080076 private static final KryoSerializer SERIALIZER = new KryoSerializer() {
77 @Override
78 protected void setupKryoPool() {
79 serializerPool = KryoNamespace.newBuilder()
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070080 .register(KryoNamespaces.API)
81 .register(HeartbeatMessage.class)
82 .build()
83 .populate(1);
Ayaka Koshibedd91b842015-03-02 14:48:47 -080084 }
85 };
86
87 private static final String INSTANCE_ID_NULL = "Instance ID cannot be null";
88
Ayaka Koshibedd91b842015-03-02 14:48:47 -080089 private final Map<NodeId, ControllerNode> allNodes = Maps.newConcurrentMap();
90 private final Map<NodeId, State> nodeStates = Maps.newConcurrentMap();
Madan Jampani7d2fab22015-03-18 17:21:57 -070091 private final Map<NodeId, DateTime> nodeStateLastUpdatedTimes = Maps.newConcurrentMap();
Ayaka Koshibedd91b842015-03-02 14:48:47 -080092 private ScheduledExecutorService heartBeatSender = Executors.newSingleThreadScheduledExecutor(
93 groupedThreads("onos/cluster/membership", "heartbeat-sender"));
94 private ExecutorService heartBeatMessageHandler = Executors.newSingleThreadExecutor(
95 groupedThreads("onos/cluster/membership", "heartbeat-receiver"));
96
97 private PhiAccrualFailureDetector failureDetector;
98
99 private ControllerNode localNode;
100
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700101 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
102 protected ClusterDefinitionService clusterDefinitionService;
103
104 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
105 protected MessagingService messagingService;
106
tom2d7c65f2014-09-23 01:09:35 -0700107 @Activate
108 public void activate() {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700109 localNode = clusterDefinitionService.localNode();
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700110
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800111 messagingService.registerHandler(HEARTBEAT_MESSAGE,
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700112 new HeartbeatMessageHandler(), heartBeatMessageHandler);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800113
114 failureDetector = new PhiAccrualFailureDetector();
115
116 heartBeatSender.scheduleWithFixedDelay(this::heartbeat, 0,
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700117 HEARTBEAT_INTERVAL_MS, TimeUnit.MILLISECONDS);
tomb41d1ac2014-09-24 01:51:24 -0700118
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700119 addNode(localNode);
120 updateState(localNode.id(), State.ACTIVE);
121
tomb41d1ac2014-09-24 01:51:24 -0700122 log.info("Started");
123 }
124
tom2d7c65f2014-09-23 01:09:35 -0700125 @Deactivate
126 public void deactivate() {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700127 messagingService.unregisterHandler(HEARTBEAT_MESSAGE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800128 heartBeatSender.shutdownNow();
129 heartBeatMessageHandler.shutdownNow();
130
tom2d7c65f2014-09-23 01:09:35 -0700131 log.info("Stopped");
132 }
133
134 @Override
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800135 public void setDelegate(ClusterStoreDelegate delegate) {
136 checkNotNull(delegate, "Delegate cannot be null");
137 this.delegate = delegate;
138 }
139
140 @Override
141 public void unsetDelegate(ClusterStoreDelegate delegate) {
142 this.delegate = null;
143 }
144
145 @Override
146 public boolean hasDelegate() {
147 return this.delegate != null;
148 }
149
150 @Override
tom2d7c65f2014-09-23 01:09:35 -0700151 public ControllerNode getLocalNode() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800152 return localNode;
tom2d7c65f2014-09-23 01:09:35 -0700153 }
154
155 @Override
156 public Set<ControllerNode> getNodes() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800157 return ImmutableSet.copyOf(allNodes.values());
tom2d7c65f2014-09-23 01:09:35 -0700158 }
159
160 @Override
161 public ControllerNode getNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800162 checkNotNull(nodeId, INSTANCE_ID_NULL);
163 return allNodes.get(nodeId);
tom2d7c65f2014-09-23 01:09:35 -0700164 }
165
166 @Override
tomb41d1ac2014-09-24 01:51:24 -0700167 public State getState(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800168 checkNotNull(nodeId, INSTANCE_ID_NULL);
169 return nodeStates.get(nodeId);
tomb41d1ac2014-09-24 01:51:24 -0700170 }
171
172 @Override
Pavlin Radoslavov444b5192014-10-28 10:45:19 -0700173 public ControllerNode addNode(NodeId nodeId, IpAddress ip, int tcpPort) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800174 ControllerNode node = new DefaultControllerNode(nodeId, ip, tcpPort);
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700175 addNode(node);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800176 return node;
tomee49c372014-09-26 15:14:50 -0700177 }
178
179 @Override
tomb41d1ac2014-09-24 01:51:24 -0700180 public void removeNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800181 checkNotNull(nodeId, INSTANCE_ID_NULL);
182 ControllerNode node = allNodes.remove(nodeId);
183 if (node != null) {
184 nodeStates.remove(nodeId);
Jonathan Hartf1141262015-04-23 11:27:07 -0700185 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_REMOVED, node));
tomb41d1ac2014-09-24 01:51:24 -0700186 }
187 }
188
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700189 private void addNode(ControllerNode node) {
190 allNodes.put(node.id(), node);
191 updateState(node.id(), State.INACTIVE);
192 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_ADDED, node));
Thomas Vachuskade563cf2015-04-01 00:28:50 -0700193 }
194
Madan Jampani7d2fab22015-03-18 17:21:57 -0700195 private void updateState(NodeId nodeId, State newState) {
196 nodeStates.put(nodeId, newState);
197 nodeStateLastUpdatedTimes.put(nodeId, DateTime.now());
198 }
199
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800200 private void heartbeat() {
201 try {
202 Set<ControllerNode> peers = allNodes.values()
203 .stream()
204 .filter(node -> !(node.id().equals(localNode.id())))
205 .collect(Collectors.toSet());
206 byte[] hbMessagePayload = SERIALIZER.encode(new HeartbeatMessage(localNode, peers));
207 peers.forEach((node) -> {
208 heartbeatToPeer(hbMessagePayload, node);
209 State currentState = nodeStates.get(node.id());
210 double phi = failureDetector.phi(node.id());
211 if (phi >= PHI_FAILURE_THRESHOLD) {
212 if (currentState == State.ACTIVE) {
Madan Jampani7d2fab22015-03-18 17:21:57 -0700213 updateState(node.id(), State.INACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800214 notifyStateChange(node.id(), State.ACTIVE, State.INACTIVE);
215 }
216 } else {
217 if (currentState == State.INACTIVE) {
Madan Jampani7d2fab22015-03-18 17:21:57 -0700218 updateState(node.id(), State.ACTIVE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800219 notifyStateChange(node.id(), State.INACTIVE, State.ACTIVE);
220 }
221 }
222 });
223 } catch (Exception e) {
224 log.debug("Failed to send heartbeat", e);
225 }
tomb41d1ac2014-09-24 01:51:24 -0700226 }
227
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800228 private void notifyStateChange(NodeId nodeId, State oldState, State newState) {
229 ControllerNode node = allNodes.get(nodeId);
230 if (newState == State.ACTIVE) {
Jonathan Hartf1141262015-04-23 11:27:07 -0700231 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_ACTIVATED, node));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800232 } else {
Jonathan Hartf1141262015-04-23 11:27:07 -0700233 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_DEACTIVATED, node));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800234 }
tomb41d1ac2014-09-24 01:51:24 -0700235 }
236
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800237 private void heartbeatToPeer(byte[] messagePayload, ControllerNode peer) {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700238 Endpoint remoteEp = new Endpoint(peer.ip(), peer.tcpPort());
Madan Jampani175e8fd2015-05-20 14:10:45 -0700239 messagingService.sendAsync(remoteEp, HEARTBEAT_MESSAGE, messagePayload).whenComplete((result, error) -> {
240 if (error != null) {
241 log.trace("Sending heartbeat to {} failed", remoteEp, error);
242 }
243 });
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800244 }
245
Madan Jampanic26eede2015-04-16 11:42:16 -0700246 private class HeartbeatMessageHandler implements Consumer<byte[]> {
tomb41d1ac2014-09-24 01:51:24 -0700247 @Override
Madan Jampanic26eede2015-04-16 11:42:16 -0700248 public void accept(byte[] message) {
249 HeartbeatMessage hb = SERIALIZER.decode(message);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800250 failureDetector.report(hb.source().id());
251 hb.knownPeers().forEach(node -> {
252 allNodes.put(node.id(), node);
253 });
tomb41d1ac2014-09-24 01:51:24 -0700254 }
tom2d7c65f2014-09-23 01:09:35 -0700255 }
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800256
257 private static class HeartbeatMessage {
258 private ControllerNode source;
259 private Set<ControllerNode> knownPeers;
260
261 public HeartbeatMessage(ControllerNode source, Set<ControllerNode> members) {
262 this.source = source;
263 this.knownPeers = ImmutableSet.copyOf(members);
264 }
265
266 public ControllerNode source() {
267 return source;
268 }
269
270 public Set<ControllerNode> knownPeers() {
271 return knownPeers;
272 }
273 }
274
Madan Jampani7d2fab22015-03-18 17:21:57 -0700275 @Override
276 public DateTime getLastUpdated(NodeId nodeId) {
277 return nodeStateLastUpdatedTimes.get(nodeId);
278 }
Thomas Vachuskade563cf2015-04-01 00:28:50 -0700279
Jonathan Hart4a4d18f2015-03-26 12:16:16 -0700280}