blob: af1cffeb47805bcc5118296ec76865b97e3aa6ff [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2014-present Open Networking Foundation
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Brian O'Connorabafb502014-12-02 22:26:20 -080016package org.onosproject.store.cluster.impl;
tom2d7c65f2014-09-23 01:09:35 -070017
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070018import com.google.common.collect.ImmutableSet;
19import com.google.common.collect.Maps;
Madan Jampanie3375062016-04-19 16:32:10 -070020
tom2d7c65f2014-09-23 01:09:35 -070021import org.apache.felix.scr.annotations.Activate;
22import org.apache.felix.scr.annotations.Component;
23import org.apache.felix.scr.annotations.Deactivate;
sangyun-hanf98df542016-03-24 20:28:03 +090024import org.apache.felix.scr.annotations.Modified;
25import org.apache.felix.scr.annotations.Property;
Madan Jampaniafeebbd2015-05-19 15:26:01 -070026import org.apache.felix.scr.annotations.Reference;
27import org.apache.felix.scr.annotations.ReferenceCardinality;
Deepa Vaddireddy6690f592017-03-12 03:31:48 +053028import org.apache.felix.scr.annotations.ReferencePolicy;
tom2d7c65f2014-09-23 01:09:35 -070029import org.apache.felix.scr.annotations.Service;
Madan Jampani7d2fab22015-03-18 17:21:57 -070030import org.joda.time.DateTime;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080031import org.onlab.packet.IpAddress;
32import org.onlab.util.KryoNamespace;
Deepa Vaddireddy6690f592017-03-12 03:31:48 +053033import org.onosproject.cfg.ConfigProperty;
sangyun-han9f0af2d2016-08-04 13:04:59 +090034import org.onosproject.cfg.ComponentConfigService;
Brian O'Connorabafb502014-12-02 22:26:20 -080035import org.onosproject.cluster.ClusterEvent;
Madan Jampaniec1df022015-10-13 21:23:03 -070036import org.onosproject.cluster.ClusterMetadataService;
Brian O'Connorabafb502014-12-02 22:26:20 -080037import org.onosproject.cluster.ClusterStore;
38import org.onosproject.cluster.ClusterStoreDelegate;
39import org.onosproject.cluster.ControllerNode;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080040import org.onosproject.cluster.ControllerNode.State;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070041import org.onosproject.cluster.DefaultControllerNode;
Brian O'Connorabafb502014-12-02 22:26:20 -080042import org.onosproject.cluster.NodeId;
Jordan Haltermanf70bf462017-07-29 13:12:00 -070043import org.onosproject.core.Version;
44import org.onosproject.core.VersionService;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080045import org.onosproject.store.AbstractStore;
Madan Jampanic26eede2015-04-16 11:42:16 -070046import org.onosproject.store.cluster.messaging.Endpoint;
Madan Jampaniafeebbd2015-05-19 15:26:01 -070047import org.onosproject.store.cluster.messaging.MessagingService;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080048import org.onosproject.store.serializers.KryoNamespaces;
Jordan Haltermanc6c6ef22017-08-20 17:11:41 -070049import org.onosproject.store.service.Serializer;
sangyun-hanf98df542016-03-24 20:28:03 +090050import org.osgi.service.component.ComponentContext;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080051import org.slf4j.Logger;
tom2d7c65f2014-09-23 01:09:35 -070052
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070053import java.util.Map;
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -070054import java.util.Objects;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070055import java.util.Set;
56import java.util.concurrent.ExecutorService;
57import java.util.concurrent.Executors;
58import java.util.concurrent.ScheduledExecutorService;
59import java.util.concurrent.TimeUnit;
Madan Jampanid36def02016-01-13 11:21:56 -080060import java.util.function.BiConsumer;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070061import java.util.stream.Collectors;
62
Jordan Haltermanf70bf462017-07-29 13:12:00 -070063import static com.google.common.base.MoreObjects.firstNonNull;
sangyun-hanf98df542016-03-24 20:28:03 +090064import static com.google.common.base.Preconditions.checkArgument;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070065import static com.google.common.base.Preconditions.checkNotNull;
Deepa Vaddireddy6690f592017-03-12 03:31:48 +053066import static com.google.common.base.Strings.isNullOrEmpty;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070067import static org.onlab.util.Tools.groupedThreads;
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -070068import static org.onosproject.cluster.ClusterEvent.Type.INSTANCE_ACTIVATED;
69import static org.onosproject.cluster.ClusterEvent.Type.INSTANCE_DEACTIVATED;
70import static org.onosproject.cluster.ClusterEvent.Type.INSTANCE_READY;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070071import static org.slf4j.LoggerFactory.getLogger;
tom2d7c65f2014-09-23 01:09:35 -070072
tom2d7c65f2014-09-23 01:09:35 -070073@Component(immediate = true)
74@Service
Ayaka Koshibedd91b842015-03-02 14:48:47 -080075/**
76 * Distributed cluster nodes store that employs an accrual failure
77 * detector to identify cluster member up/down status.
78 */
tom0755a362014-09-24 11:54:43 -070079public class DistributedClusterStore
Ayaka Koshibedd91b842015-03-02 14:48:47 -080080 extends AbstractStore<ClusterEvent, ClusterStoreDelegate>
tomb41d1ac2014-09-24 01:51:24 -070081 implements ClusterStore {
tom2d7c65f2014-09-23 01:09:35 -070082
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070083 private static final Logger log = getLogger(DistributedClusterStore.class);
tom2d7c65f2014-09-23 01:09:35 -070084
Thomas Vachuskade563cf2015-04-01 00:28:50 -070085 public static final String HEARTBEAT_MESSAGE = "onos-cluster-heartbeat";
86
sangyun-hanf98df542016-03-24 20:28:03 +090087 private static final int DEFAULT_HEARTBEAT_INTERVAL = 100;
88 @Property(name = "heartbeatInterval", intValue = DEFAULT_HEARTBEAT_INTERVAL,
89 label = "Interval time to send heartbeat to other controller nodes (millisecond)")
90 private int heartbeatInterval = DEFAULT_HEARTBEAT_INTERVAL;
91
92 private static final int DEFAULT_PHI_FAILURE_THRESHOLD = 10;
93 @Property(name = "phiFailureThreshold", intValue = DEFAULT_PHI_FAILURE_THRESHOLD,
94 label = "the value of Phi threshold to detect accrual failure")
95 private int phiFailureThreshold = DEFAULT_PHI_FAILURE_THRESHOLD;
tom2d7c65f2014-09-23 01:09:35 -070096
Jordan Haltermanc6c6ef22017-08-20 17:11:41 -070097 private static final Serializer SERIALIZER = Serializer.using(
Jordan Haltermanf70bf462017-07-29 13:12:00 -070098 KryoNamespace.newBuilder()
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070099 .register(KryoNamespaces.API)
HIGUCHI Yutae7290652016-05-18 11:29:01 -0700100 .nextId(KryoNamespaces.BEGIN_USER_CUSTOM_ID)
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700101 .register(HeartbeatMessage.class)
HIGUCHI Yutae7290652016-05-18 11:29:01 -0700102 .build("ClusterStore"));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800103
104 private static final String INSTANCE_ID_NULL = "Instance ID cannot be null";
105
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800106 private final Map<NodeId, ControllerNode> allNodes = Maps.newConcurrentMap();
107 private final Map<NodeId, State> nodeStates = Maps.newConcurrentMap();
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700108 private final Map<NodeId, Version> nodeVersions = Maps.newConcurrentMap();
109 private final Map<NodeId, DateTime> nodeLastUpdatedTimes = Maps.newConcurrentMap();
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800110
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800111 private ScheduledExecutorService heartBeatSender = Executors.newSingleThreadScheduledExecutor(
HIGUCHI Yutad9e01052016-04-14 09:31:42 -0700112 groupedThreads("onos/cluster/membership", "heartbeat-sender", log));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800113 private ExecutorService heartBeatMessageHandler = Executors.newSingleThreadExecutor(
HIGUCHI Yutad9e01052016-04-14 09:31:42 -0700114 groupedThreads("onos/cluster/membership", "heartbeat-receiver", log));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800115
116 private PhiAccrualFailureDetector failureDetector;
117
118 private ControllerNode localNode;
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700119 private Version localVersion;
120
121 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
122 protected VersionService versionService;
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800123
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700124 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
Madan Jampaniec1df022015-10-13 21:23:03 -0700125 protected ClusterMetadataService clusterMetadataService;
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700126
127 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
128 protected MessagingService messagingService;
129
Thomas Vachuska1ca7e9f2016-08-05 10:21:41 -0700130 // This must be optional to avoid a cyclic dependency
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530131 @Reference(cardinality = ReferenceCardinality.OPTIONAL_UNARY,
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700132 bind = "bindComponentConfigService",
133 unbind = "unbindComponentConfigService",
134 policy = ReferencePolicy.DYNAMIC)
sangyun-han9f0af2d2016-08-04 13:04:59 +0900135 protected ComponentConfigService cfgService;
136
Thomas Vachuska1ca7e9f2016-08-05 10:21:41 -0700137 /**
138 * Hook for wiring up optional reference to a service.
139 *
140 * @param service service being announced
141 */
142 protected void bindComponentConfigService(ComponentConfigService service) {
143 if (cfgService == null) {
144 cfgService = service;
145 cfgService.registerProperties(getClass());
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530146 readComponentConfiguration();
Thomas Vachuska1ca7e9f2016-08-05 10:21:41 -0700147 }
148 }
149
150 /**
151 * Hook for unwiring optional reference to a service.
152 *
153 * @param service service being withdrawn
154 */
155 protected void unbindComponentConfigService(ComponentConfigService service) {
156 if (cfgService == service) {
157 cfgService.unregisterProperties(getClass(), false);
158 cfgService = null;
159 }
160 }
161
tom2d7c65f2014-09-23 01:09:35 -0700162 @Activate
Thomas Vachuska1ca7e9f2016-08-05 10:21:41 -0700163 public void activate() {
Madan Jampaniec1df022015-10-13 21:23:03 -0700164 localNode = clusterMetadataService.getLocalNode();
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700165 localVersion = versionService.version();
166 nodeVersions.put(localNode.id(), localVersion);
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700167
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800168 messagingService.registerHandler(HEARTBEAT_MESSAGE,
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700169 new HeartbeatMessageHandler(), heartBeatMessageHandler);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800170
171 failureDetector = new PhiAccrualFailureDetector();
172
173 heartBeatSender.scheduleWithFixedDelay(this::heartbeat, 0,
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700174 heartbeatInterval, TimeUnit.MILLISECONDS);
tomb41d1ac2014-09-24 01:51:24 -0700175
176 log.info("Started");
177 }
178
tom2d7c65f2014-09-23 01:09:35 -0700179 @Deactivate
180 public void deactivate() {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700181 messagingService.unregisterHandler(HEARTBEAT_MESSAGE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800182 heartBeatSender.shutdownNow();
183 heartBeatMessageHandler.shutdownNow();
184
tom2d7c65f2014-09-23 01:09:35 -0700185 log.info("Stopped");
186 }
187
sangyun-hanf98df542016-03-24 20:28:03 +0900188 @Modified
189 public void modified(ComponentContext context) {
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530190 readComponentConfiguration();
sangyun-hanf98df542016-03-24 20:28:03 +0900191 }
192
tom2d7c65f2014-09-23 01:09:35 -0700193 @Override
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800194 public void setDelegate(ClusterStoreDelegate delegate) {
195 checkNotNull(delegate, "Delegate cannot be null");
196 this.delegate = delegate;
197 }
198
199 @Override
200 public void unsetDelegate(ClusterStoreDelegate delegate) {
201 this.delegate = null;
202 }
203
204 @Override
205 public boolean hasDelegate() {
206 return this.delegate != null;
207 }
208
209 @Override
tom2d7c65f2014-09-23 01:09:35 -0700210 public ControllerNode getLocalNode() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800211 return localNode;
tom2d7c65f2014-09-23 01:09:35 -0700212 }
213
214 @Override
215 public Set<ControllerNode> getNodes() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800216 return ImmutableSet.copyOf(allNodes.values());
tom2d7c65f2014-09-23 01:09:35 -0700217 }
218
219 @Override
220 public ControllerNode getNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800221 checkNotNull(nodeId, INSTANCE_ID_NULL);
222 return allNodes.get(nodeId);
tom2d7c65f2014-09-23 01:09:35 -0700223 }
224
225 @Override
tomb41d1ac2014-09-24 01:51:24 -0700226 public State getState(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800227 checkNotNull(nodeId, INSTANCE_ID_NULL);
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700228 return firstNonNull(nodeStates.get(nodeId), State.INACTIVE);
229 }
230
231 @Override
232 public Version getVersion(NodeId nodeId) {
233 checkNotNull(nodeId, INSTANCE_ID_NULL);
234 return nodeVersions.get(nodeId);
tomb41d1ac2014-09-24 01:51:24 -0700235 }
236
237 @Override
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800238 public void markFullyStarted(boolean started) {
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700239 updateNode(localNode.id(), started ? State.READY : State.ACTIVE, null);
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800240 }
241
242 @Override
Pavlin Radoslavov444b5192014-10-28 10:45:19 -0700243 public ControllerNode addNode(NodeId nodeId, IpAddress ip, int tcpPort) {
sangyun-hanf98df542016-03-24 20:28:03 +0900244 checkNotNull(nodeId, INSTANCE_ID_NULL);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800245 ControllerNode node = new DefaultControllerNode(nodeId, ip, tcpPort);
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700246 addNode(node);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800247 return node;
tomee49c372014-09-26 15:14:50 -0700248 }
249
250 @Override
tomb41d1ac2014-09-24 01:51:24 -0700251 public void removeNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800252 checkNotNull(nodeId, INSTANCE_ID_NULL);
253 ControllerNode node = allNodes.remove(nodeId);
254 if (node != null) {
255 nodeStates.remove(nodeId);
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700256 nodeVersions.remove(nodeId);
Jonathan Hartf1141262015-04-23 11:27:07 -0700257 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_REMOVED, node));
tomb41d1ac2014-09-24 01:51:24 -0700258 }
259 }
260
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700261 private void addNode(ControllerNode node) {
262 allNodes.put(node.id(), node);
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700263 updateNode(node.id(), node.equals(localNode) ? State.ACTIVE : State.INACTIVE, null);
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700264 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_ADDED, node));
Thomas Vachuskade563cf2015-04-01 00:28:50 -0700265 }
266
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700267 private void updateNode(NodeId nodeId, State newState, Version newVersion) {
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700268 State currentState = nodeStates.get(nodeId);
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700269 Version currentVersion = nodeVersions.get(nodeId);
Jordan Haltermaneabae962017-08-01 17:29:36 -0700270 if (!Objects.equals(currentState, newState)
271 || (newVersion != null && !Objects.equals(currentVersion, newVersion))) {
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700272 nodeStates.put(nodeId, newState);
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700273 if (newVersion != null) {
274 nodeVersions.put(nodeId, newVersion);
275 }
276 nodeLastUpdatedTimes.put(nodeId, DateTime.now());
277 notifyChange(nodeId, currentState, newState, currentVersion, newVersion);
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700278 }
Madan Jampani7d2fab22015-03-18 17:21:57 -0700279 }
280
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800281 private void heartbeat() {
282 try {
283 Set<ControllerNode> peers = allNodes.values()
284 .stream()
285 .filter(node -> !(node.id().equals(localNode.id())))
286 .collect(Collectors.toSet());
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800287 State state = nodeStates.get(localNode.id());
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700288 byte[] hbMessagePayload = SERIALIZER.encode(new HeartbeatMessage(localNode, state, localVersion));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800289 peers.forEach((node) -> {
290 heartbeatToPeer(hbMessagePayload, node);
291 State currentState = nodeStates.get(node.id());
292 double phi = failureDetector.phi(node.id());
sangyun-hanf98df542016-03-24 20:28:03 +0900293 if (phi >= phiFailureThreshold) {
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800294 if (currentState.isActive()) {
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700295 updateNode(node.id(), State.INACTIVE, null);
Jordan Halterman03f4c692018-01-22 11:22:46 -0800296 failureDetector.reset(node.id());
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800297 }
298 } else {
299 if (currentState == State.INACTIVE) {
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700300 updateNode(node.id(), State.ACTIVE, null);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800301 }
302 }
303 });
304 } catch (Exception e) {
305 log.debug("Failed to send heartbeat", e);
306 }
tomb41d1ac2014-09-24 01:51:24 -0700307 }
308
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700309 private void notifyChange(NodeId nodeId, State oldState, State newState, Version oldVersion, Version newVersion) {
310 if (oldState != newState || !Objects.equals(oldVersion, newVersion)) {
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700311 ControllerNode node = allNodes.get(nodeId);
Jon Hall1f13d642016-05-13 17:53:01 -0700312 // Either this node or that node is no longer part of the same cluster
313 if (node == null) {
Jon Hall66870baa2016-06-20 12:12:10 -0700314 log.debug("Could not find node {} in the cluster, ignoring state change", nodeId);
Jon Hall1f13d642016-05-13 17:53:01 -0700315 return;
316 }
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700317 ClusterEvent.Type type = newState == State.READY ? INSTANCE_READY :
318 newState == State.ACTIVE ? INSTANCE_ACTIVATED :
319 INSTANCE_DEACTIVATED;
320 notifyDelegate(new ClusterEvent(type, node));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800321 }
tomb41d1ac2014-09-24 01:51:24 -0700322 }
323
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800324 private void heartbeatToPeer(byte[] messagePayload, ControllerNode peer) {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700325 Endpoint remoteEp = new Endpoint(peer.ip(), peer.tcpPort());
Madan Jampani175e8fd2015-05-20 14:10:45 -0700326 messagingService.sendAsync(remoteEp, HEARTBEAT_MESSAGE, messagePayload).whenComplete((result, error) -> {
327 if (error != null) {
328 log.trace("Sending heartbeat to {} failed", remoteEp, error);
329 }
330 });
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800331 }
332
Madan Jampanid36def02016-01-13 11:21:56 -0800333 private class HeartbeatMessageHandler implements BiConsumer<Endpoint, byte[]> {
tomb41d1ac2014-09-24 01:51:24 -0700334 @Override
Madan Jampanid36def02016-01-13 11:21:56 -0800335 public void accept(Endpoint sender, byte[] message) {
Madan Jampanic26eede2015-04-16 11:42:16 -0700336 HeartbeatMessage hb = SERIALIZER.decode(message);
Madan Jampanie3375062016-04-19 16:32:10 -0700337 if (clusterMetadataService.getClusterMetadata().getNodes().contains(hb.source())) {
Jordan Halterman03f4c692018-01-22 11:22:46 -0800338 // Avoid reporting heartbeats that have been enqueued by setting a minimum interval.
339 long heartbeatTime = System.currentTimeMillis();
340 long lastHeartbeatTime = failureDetector.getLastHeartbeatTime(hb.source().id());
341 if (heartbeatTime - lastHeartbeatTime > heartbeatInterval / 2) {
342 failureDetector.report(hb.source().id(), heartbeatTime);
Jordan Halterman5b54dad2018-01-16 19:41:31 -0800343 }
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700344 updateNode(hb.source().id(), hb.state, hb.version);
Madan Jampanie3375062016-04-19 16:32:10 -0700345 }
tomb41d1ac2014-09-24 01:51:24 -0700346 }
tom2d7c65f2014-09-23 01:09:35 -0700347 }
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800348
349 private static class HeartbeatMessage {
350 private ControllerNode source;
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800351 private State state;
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700352 private Version version;
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800353
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700354 public HeartbeatMessage(ControllerNode source, State state, Version version) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800355 this.source = source;
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800356 this.state = state != null ? state : State.ACTIVE;
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700357 this.version = version;
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800358 }
359
360 public ControllerNode source() {
361 return source;
362 }
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800363 }
364
Madan Jampani7d2fab22015-03-18 17:21:57 -0700365 @Override
366 public DateTime getLastUpdated(NodeId nodeId) {
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700367 return nodeLastUpdatedTimes.get(nodeId);
Madan Jampani7d2fab22015-03-18 17:21:57 -0700368 }
Thomas Vachuskade563cf2015-04-01 00:28:50 -0700369
sangyun-hanf98df542016-03-24 20:28:03 +0900370 /**
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530371 * Extracts properties from the component configuration.
sangyun-hanf98df542016-03-24 20:28:03 +0900372 *
sangyun-hanf98df542016-03-24 20:28:03 +0900373 */
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530374 private void readComponentConfiguration() {
375 Set<ConfigProperty> configProperties = cfgService.getProperties(getClass().getName());
376 for (ConfigProperty property : configProperties) {
Jon Halla3fcf672017-03-28 16:53:22 -0700377 if ("heartbeatInterval".equals(property.name())) {
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530378 String s = property.value();
379 if (s == null) {
380 setHeartbeatInterval(DEFAULT_HEARTBEAT_INTERVAL);
381 log.info("Heartbeat interval time is not configured, default value is {}",
382 DEFAULT_HEARTBEAT_INTERVAL);
383 } else {
384 int newHeartbeatInterval = isNullOrEmpty(s) ? DEFAULT_HEARTBEAT_INTERVAL
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700385 : Integer.parseInt(s.trim());
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530386 if (newHeartbeatInterval > 0 && heartbeatInterval != newHeartbeatInterval) {
387 heartbeatInterval = newHeartbeatInterval;
388 restartHeartbeatSender();
389 }
390 log.info("Configured. Heartbeat interval time is configured to {}",
391 heartbeatInterval);
392 }
393 }
Jon Halla3fcf672017-03-28 16:53:22 -0700394 if ("phiFailureThreshold".equals(property.name())) {
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530395 String s = property.value();
396 if (s == null) {
397 setPhiFailureThreshold(DEFAULT_PHI_FAILURE_THRESHOLD);
398 log.info("Phi failure threshold is not configured, default value is {}",
399 DEFAULT_PHI_FAILURE_THRESHOLD);
400 } else {
401 int newPhiFailureThreshold = isNullOrEmpty(s) ? DEFAULT_HEARTBEAT_INTERVAL
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700402 : Integer.parseInt(s.trim());
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530403 setPhiFailureThreshold(newPhiFailureThreshold);
404 log.info("Configured. Phi failure threshold is configured to {}",
405 phiFailureThreshold);
406 }
407 }
sangyun-hanf98df542016-03-24 20:28:03 +0900408 }
409 }
410
411 /**
412 * Sets heartbeat interval between the termination of one execution of heartbeat
413 * and the commencement of the next.
414 *
415 * @param interval term between each heartbeat
416 */
417 private void setHeartbeatInterval(int interval) {
418 try {
419 checkArgument(interval > 0, "Interval must be greater than zero");
420 heartbeatInterval = interval;
421 } catch (IllegalArgumentException e) {
422 log.warn(e.getMessage());
423 heartbeatInterval = DEFAULT_HEARTBEAT_INTERVAL;
424 }
425 }
426
427 /**
428 * Sets Phi failure threshold.
429 * Phi is based on a paper titled: "The φ Accrual Failure Detector" by Hayashibara, et al.
430 *
431 * @param threshold
432 */
433 private void setPhiFailureThreshold(int threshold) {
434 phiFailureThreshold = threshold;
435 }
436
437 /**
438 * Restarts heartbeatSender executor.
sangyun-hanf98df542016-03-24 20:28:03 +0900439 */
440 private void restartHeartbeatSender() {
441 try {
442 ScheduledExecutorService prevSender = heartBeatSender;
443 heartBeatSender = Executors.newSingleThreadScheduledExecutor(
HIGUCHI Yutad9e01052016-04-14 09:31:42 -0700444 groupedThreads("onos/cluster/membership", "heartbeat-sender-%d", log));
sangyun-hanf98df542016-03-24 20:28:03 +0900445 heartBeatSender.scheduleWithFixedDelay(this::heartbeat, 0,
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700446 heartbeatInterval, TimeUnit.MILLISECONDS);
sangyun-hanf98df542016-03-24 20:28:03 +0900447 prevSender.shutdown();
448 } catch (Exception e) {
449 log.warn(e.getMessage());
450 }
451 }
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700452}