blob: 823bcba23f2ded94c28d135621dbc414068f3399 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2014-present Open Networking Foundation
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Brian O'Connorabafb502014-12-02 22:26:20 -080016package org.onosproject.store.cluster.impl;
tom2d7c65f2014-09-23 01:09:35 -070017
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070018import com.google.common.collect.ImmutableSet;
19import com.google.common.collect.Maps;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080020import org.onlab.packet.IpAddress;
21import org.onlab.util.KryoNamespace;
sangyun-han9f0af2d2016-08-04 13:04:59 +090022import org.onosproject.cfg.ComponentConfigService;
Ray Milkeyd84f89b2018-08-17 14:54:17 -070023import org.onosproject.cfg.ConfigProperty;
Brian O'Connorabafb502014-12-02 22:26:20 -080024import org.onosproject.cluster.ClusterEvent;
Madan Jampaniec1df022015-10-13 21:23:03 -070025import org.onosproject.cluster.ClusterMetadataService;
Brian O'Connorabafb502014-12-02 22:26:20 -080026import org.onosproject.cluster.ClusterStore;
27import org.onosproject.cluster.ClusterStoreDelegate;
28import org.onosproject.cluster.ControllerNode;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080029import org.onosproject.cluster.ControllerNode.State;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070030import org.onosproject.cluster.DefaultControllerNode;
Jordan Halterman00e92da2018-05-22 23:05:52 -070031import org.onosproject.cluster.Node;
Brian O'Connorabafb502014-12-02 22:26:20 -080032import org.onosproject.cluster.NodeId;
Jordan Haltermanf70bf462017-07-29 13:12:00 -070033import org.onosproject.core.Version;
34import org.onosproject.core.VersionService;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080035import org.onosproject.store.AbstractStore;
Madan Jampanic26eede2015-04-16 11:42:16 -070036import org.onosproject.store.cluster.messaging.Endpoint;
Madan Jampaniafeebbd2015-05-19 15:26:01 -070037import org.onosproject.store.cluster.messaging.MessagingService;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080038import org.onosproject.store.serializers.KryoNamespaces;
Jordan Halterman2c83a102017-08-20 17:11:41 -070039import org.onosproject.store.service.Serializer;
sangyun-hanf98df542016-03-24 20:28:03 +090040import org.osgi.service.component.ComponentContext;
Ray Milkeyd84f89b2018-08-17 14:54:17 -070041import org.osgi.service.component.annotations.Activate;
42import org.osgi.service.component.annotations.Component;
43import org.osgi.service.component.annotations.Deactivate;
44import org.osgi.service.component.annotations.Modified;
45import org.osgi.service.component.annotations.Reference;
46import org.osgi.service.component.annotations.ReferenceCardinality;
47import org.osgi.service.component.annotations.ReferencePolicy;
Ayaka Koshibedd91b842015-03-02 14:48:47 -080048import org.slf4j.Logger;
tom2d7c65f2014-09-23 01:09:35 -070049
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -070050import java.time.Instant;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070051import java.util.Map;
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -070052import java.util.Objects;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070053import java.util.Set;
54import java.util.concurrent.ExecutorService;
55import java.util.concurrent.Executors;
56import java.util.concurrent.ScheduledExecutorService;
57import java.util.concurrent.TimeUnit;
Madan Jampanid36def02016-01-13 11:21:56 -080058import java.util.function.BiConsumer;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070059import java.util.stream.Collectors;
60
Jordan Haltermanf70bf462017-07-29 13:12:00 -070061import static com.google.common.base.MoreObjects.firstNonNull;
sangyun-hanf98df542016-03-24 20:28:03 +090062import static com.google.common.base.Preconditions.checkArgument;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070063import static com.google.common.base.Preconditions.checkNotNull;
Deepa Vaddireddy6690f592017-03-12 03:31:48 +053064import static com.google.common.base.Strings.isNullOrEmpty;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070065import static org.onlab.util.Tools.groupedThreads;
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -070066import static org.onosproject.cluster.ClusterEvent.Type.INSTANCE_ACTIVATED;
67import static org.onosproject.cluster.ClusterEvent.Type.INSTANCE_DEACTIVATED;
68import static org.onosproject.cluster.ClusterEvent.Type.INSTANCE_READY;
Jonathan Hart4a4d18f2015-03-26 12:16:16 -070069import static org.slf4j.LoggerFactory.getLogger;
Ray Milkeyb5646e62018-10-16 11:42:18 -070070import static org.onosproject.store.OsgiPropertyConstants.*;
71
Ayaka Koshibedd91b842015-03-02 14:48:47 -080072/**
73 * Distributed cluster nodes store that employs an accrual failure
74 * detector to identify cluster member up/down status.
75 */
Ray Milkeyb5646e62018-10-16 11:42:18 -070076
77@Component(
78 enabled = false,
79 service = ClusterStore.class,
80 property = {
81 HEARTBEAT_INTERVAL + "=" + HEARTBEAT_INTERVAL_DEFAULT,
82 PHI_FAILURE_THRESHOLD + "=" + PHI_FAILURE_THRESHOLD_DEFAULT,
83 MIN_STANDARD_DEVIATION_MILLIS + "=" + MIN_STANDARD_DEVIATION_MILLIS_DEFAULT
84 }
85)
86
tom0755a362014-09-24 11:54:43 -070087public class DistributedClusterStore
Ayaka Koshibedd91b842015-03-02 14:48:47 -080088 extends AbstractStore<ClusterEvent, ClusterStoreDelegate>
tomb41d1ac2014-09-24 01:51:24 -070089 implements ClusterStore {
tom2d7c65f2014-09-23 01:09:35 -070090
Thomas Vachuska8dc1a692015-03-31 01:01:37 -070091 private static final Logger log = getLogger(DistributedClusterStore.class);
tom2d7c65f2014-09-23 01:09:35 -070092
Thomas Vachuskade563cf2015-04-01 00:28:50 -070093 public static final String HEARTBEAT_MESSAGE = "onos-cluster-heartbeat";
94
Ray Milkeyd84f89b2018-08-17 14:54:17 -070095 //@Property(name = "heartbeatInterval", intValue = DEFAULT_HEARTBEAT_INTERVAL,
96 // label = "Interval time to send heartbeat to other controller nodes (millisecond)")
Ray Milkeyb5646e62018-10-16 11:42:18 -070097 private int heartbeatInterval = HEARTBEAT_INTERVAL_DEFAULT;
sangyun-hanf98df542016-03-24 20:28:03 +090098
Ray Milkeyd84f89b2018-08-17 14:54:17 -070099 //@Property(name = "phiFailureThreshold", intValue = DEFAULT_PHI_FAILURE_THRESHOLD,
100 // label = "the value of Phi threshold to detect accrual failure")
Ray Milkeyb5646e62018-10-16 11:42:18 -0700101 private int phiFailureThreshold = PHI_FAILURE_THRESHOLD_DEFAULT;
tom2d7c65f2014-09-23 01:09:35 -0700102
Ray Milkeyd84f89b2018-08-17 14:54:17 -0700103 //@Property(name = "minStandardDeviationMillis", longValue = DEFAULT_MIN_STANDARD_DEVIATION_MILLIS,
104 // label = "The minimum standard deviation to take into account when computing the Phi value")
Ray Milkeyb5646e62018-10-16 11:42:18 -0700105 private long minStandardDeviationMillis = MIN_STANDARD_DEVIATION_MILLIS_DEFAULT;
Jordan Halterman4a082ec2018-05-14 15:00:42 -0700106
Jordan Halterman2c83a102017-08-20 17:11:41 -0700107 private static final Serializer SERIALIZER = Serializer.using(
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700108 KryoNamespace.newBuilder()
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700109 .register(KryoNamespaces.API)
HIGUCHI Yutae7290652016-05-18 11:29:01 -0700110 .nextId(KryoNamespaces.BEGIN_USER_CUSTOM_ID)
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700111 .register(HeartbeatMessage.class)
HIGUCHI Yutae7290652016-05-18 11:29:01 -0700112 .build("ClusterStore"));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800113
114 private static final String INSTANCE_ID_NULL = "Instance ID cannot be null";
115
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800116 private final Map<NodeId, ControllerNode> allNodes = Maps.newConcurrentMap();
117 private final Map<NodeId, State> nodeStates = Maps.newConcurrentMap();
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700118 private final Map<NodeId, Version> nodeVersions = Maps.newConcurrentMap();
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700119 private final Map<NodeId, Instant> nodeLastUpdatedTimes = Maps.newConcurrentMap();
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800120
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800121 private ScheduledExecutorService heartBeatSender = Executors.newSingleThreadScheduledExecutor(
HIGUCHI Yutad9e01052016-04-14 09:31:42 -0700122 groupedThreads("onos/cluster/membership", "heartbeat-sender", log));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800123 private ExecutorService heartBeatMessageHandler = Executors.newSingleThreadExecutor(
HIGUCHI Yutad9e01052016-04-14 09:31:42 -0700124 groupedThreads("onos/cluster/membership", "heartbeat-receiver", log));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800125
126 private PhiAccrualFailureDetector failureDetector;
127
128 private ControllerNode localNode;
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700129 private Version localVersion;
130
Ray Milkeyd84f89b2018-08-17 14:54:17 -0700131 @Reference(cardinality = ReferenceCardinality.MANDATORY)
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700132 protected VersionService versionService;
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800133
Ray Milkeyd84f89b2018-08-17 14:54:17 -0700134 @Reference(cardinality = ReferenceCardinality.MANDATORY)
Madan Jampaniec1df022015-10-13 21:23:03 -0700135 protected ClusterMetadataService clusterMetadataService;
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700136
Ray Milkeyd84f89b2018-08-17 14:54:17 -0700137 @Reference(cardinality = ReferenceCardinality.MANDATORY)
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700138 protected MessagingService messagingService;
139
Thomas Vachuska1ca7e9f2016-08-05 10:21:41 -0700140 // This must be optional to avoid a cyclic dependency
Ray Milkeyd84f89b2018-08-17 14:54:17 -0700141 @Reference(cardinality = ReferenceCardinality.OPTIONAL,
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700142 bind = "bindComponentConfigService",
143 unbind = "unbindComponentConfigService",
144 policy = ReferencePolicy.DYNAMIC)
sangyun-han9f0af2d2016-08-04 13:04:59 +0900145 protected ComponentConfigService cfgService;
146
Thomas Vachuska1ca7e9f2016-08-05 10:21:41 -0700147 /**
148 * Hook for wiring up optional reference to a service.
149 *
150 * @param service service being announced
151 */
152 protected void bindComponentConfigService(ComponentConfigService service) {
153 if (cfgService == null) {
154 cfgService = service;
155 cfgService.registerProperties(getClass());
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530156 readComponentConfiguration();
Thomas Vachuska1ca7e9f2016-08-05 10:21:41 -0700157 }
158 }
159
160 /**
161 * Hook for unwiring optional reference to a service.
162 *
163 * @param service service being withdrawn
164 */
165 protected void unbindComponentConfigService(ComponentConfigService service) {
166 if (cfgService == service) {
167 cfgService.unregisterProperties(getClass(), false);
168 cfgService = null;
169 }
170 }
171
tom2d7c65f2014-09-23 01:09:35 -0700172 @Activate
Thomas Vachuska1ca7e9f2016-08-05 10:21:41 -0700173 public void activate() {
Madan Jampaniec1df022015-10-13 21:23:03 -0700174 localNode = clusterMetadataService.getLocalNode();
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700175 localVersion = versionService.version();
176 nodeVersions.put(localNode.id(), localVersion);
Thomas Vachuska8dc1a692015-03-31 01:01:37 -0700177
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800178 messagingService.registerHandler(HEARTBEAT_MESSAGE,
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700179 new HeartbeatMessageHandler(), heartBeatMessageHandler);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800180
Jordan Halterman4a082ec2018-05-14 15:00:42 -0700181 failureDetector = new PhiAccrualFailureDetector(minStandardDeviationMillis);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800182
183 heartBeatSender.scheduleWithFixedDelay(this::heartbeat, 0,
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700184 heartbeatInterval, TimeUnit.MILLISECONDS);
tomb41d1ac2014-09-24 01:51:24 -0700185
186 log.info("Started");
187 }
188
tom2d7c65f2014-09-23 01:09:35 -0700189 @Deactivate
190 public void deactivate() {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700191 messagingService.unregisterHandler(HEARTBEAT_MESSAGE);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800192 heartBeatSender.shutdownNow();
193 heartBeatMessageHandler.shutdownNow();
194
tom2d7c65f2014-09-23 01:09:35 -0700195 log.info("Stopped");
196 }
197
sangyun-hanf98df542016-03-24 20:28:03 +0900198 @Modified
199 public void modified(ComponentContext context) {
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530200 readComponentConfiguration();
sangyun-hanf98df542016-03-24 20:28:03 +0900201 }
202
tom2d7c65f2014-09-23 01:09:35 -0700203 @Override
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800204 public void setDelegate(ClusterStoreDelegate delegate) {
205 checkNotNull(delegate, "Delegate cannot be null");
206 this.delegate = delegate;
207 }
208
209 @Override
210 public void unsetDelegate(ClusterStoreDelegate delegate) {
211 this.delegate = null;
212 }
213
214 @Override
215 public boolean hasDelegate() {
216 return this.delegate != null;
217 }
218
219 @Override
tom2d7c65f2014-09-23 01:09:35 -0700220 public ControllerNode getLocalNode() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800221 return localNode;
tom2d7c65f2014-09-23 01:09:35 -0700222 }
223
224 @Override
225 public Set<ControllerNode> getNodes() {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800226 return ImmutableSet.copyOf(allNodes.values());
tom2d7c65f2014-09-23 01:09:35 -0700227 }
228
229 @Override
Jordan Halterman00e92da2018-05-22 23:05:52 -0700230 public Set<Node> getStorageNodes() {
231 return ImmutableSet.of();
232 }
233
234 @Override
tom2d7c65f2014-09-23 01:09:35 -0700235 public ControllerNode getNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800236 checkNotNull(nodeId, INSTANCE_ID_NULL);
237 return allNodes.get(nodeId);
tom2d7c65f2014-09-23 01:09:35 -0700238 }
239
240 @Override
tomb41d1ac2014-09-24 01:51:24 -0700241 public State getState(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800242 checkNotNull(nodeId, INSTANCE_ID_NULL);
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700243 return firstNonNull(nodeStates.get(nodeId), State.INACTIVE);
244 }
245
246 @Override
247 public Version getVersion(NodeId nodeId) {
248 checkNotNull(nodeId, INSTANCE_ID_NULL);
249 return nodeVersions.get(nodeId);
tomb41d1ac2014-09-24 01:51:24 -0700250 }
251
252 @Override
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800253 public void markFullyStarted(boolean started) {
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700254 updateNode(localNode.id(), started ? State.READY : State.ACTIVE, null);
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800255 }
256
257 @Override
Pavlin Radoslavov444b5192014-10-28 10:45:19 -0700258 public ControllerNode addNode(NodeId nodeId, IpAddress ip, int tcpPort) {
sangyun-hanf98df542016-03-24 20:28:03 +0900259 checkNotNull(nodeId, INSTANCE_ID_NULL);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800260 ControllerNode node = new DefaultControllerNode(nodeId, ip, tcpPort);
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700261 addNode(node);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800262 return node;
tomee49c372014-09-26 15:14:50 -0700263 }
264
265 @Override
tomb41d1ac2014-09-24 01:51:24 -0700266 public void removeNode(NodeId nodeId) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800267 checkNotNull(nodeId, INSTANCE_ID_NULL);
268 ControllerNode node = allNodes.remove(nodeId);
269 if (node != null) {
270 nodeStates.remove(nodeId);
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700271 nodeVersions.remove(nodeId);
Jonathan Hartf1141262015-04-23 11:27:07 -0700272 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_REMOVED, node));
tomb41d1ac2014-09-24 01:51:24 -0700273 }
274 }
275
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700276 private void addNode(ControllerNode node) {
277 allNodes.put(node.id(), node);
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700278 updateNode(node.id(), node.equals(localNode) ? State.ACTIVE : State.INACTIVE, null);
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700279 notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_ADDED, node));
Thomas Vachuskade563cf2015-04-01 00:28:50 -0700280 }
281
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700282 private void updateNode(NodeId nodeId, State newState, Version newVersion) {
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700283 State currentState = nodeStates.get(nodeId);
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700284 Version currentVersion = nodeVersions.get(nodeId);
Jordan Haltermaneabae962017-08-01 17:29:36 -0700285 if (!Objects.equals(currentState, newState)
286 || (newVersion != null && !Objects.equals(currentVersion, newVersion))) {
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700287 nodeStates.put(nodeId, newState);
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700288 if (newVersion != null) {
289 nodeVersions.put(nodeId, newVersion);
290 }
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700291 nodeLastUpdatedTimes.put(nodeId, Instant.now());
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700292 notifyChange(nodeId, currentState, newState, currentVersion, newVersion);
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700293 }
Madan Jampani7d2fab22015-03-18 17:21:57 -0700294 }
295
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800296 private void heartbeat() {
297 try {
298 Set<ControllerNode> peers = allNodes.values()
299 .stream()
300 .filter(node -> !(node.id().equals(localNode.id())))
301 .collect(Collectors.toSet());
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800302 State state = nodeStates.get(localNode.id());
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700303 byte[] hbMessagePayload = SERIALIZER.encode(new HeartbeatMessage(localNode, state, localVersion));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800304 peers.forEach((node) -> {
305 heartbeatToPeer(hbMessagePayload, node);
306 State currentState = nodeStates.get(node.id());
307 double phi = failureDetector.phi(node.id());
sangyun-hanf98df542016-03-24 20:28:03 +0900308 if (phi >= phiFailureThreshold) {
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800309 if (currentState.isActive()) {
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700310 updateNode(node.id(), State.INACTIVE, null);
Jordan Haltermane7062532018-01-22 11:22:46 -0800311 failureDetector.reset(node.id());
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800312 }
313 } else {
314 if (currentState == State.INACTIVE) {
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700315 updateNode(node.id(), State.ACTIVE, null);
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800316 }
317 }
318 });
319 } catch (Exception e) {
320 log.debug("Failed to send heartbeat", e);
321 }
tomb41d1ac2014-09-24 01:51:24 -0700322 }
323
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700324 private void notifyChange(NodeId nodeId, State oldState, State newState, Version oldVersion, Version newVersion) {
325 if (oldState != newState || !Objects.equals(oldVersion, newVersion)) {
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700326 ControllerNode node = allNodes.get(nodeId);
Jon Hall1f13d642016-05-13 17:53:01 -0700327 // Either this node or that node is no longer part of the same cluster
328 if (node == null) {
Jon Hall66870baa2016-06-20 12:12:10 -0700329 log.debug("Could not find node {} in the cluster, ignoring state change", nodeId);
Jon Hall1f13d642016-05-13 17:53:01 -0700330 return;
331 }
Thomas Vachuskafba7f3d2016-03-23 15:46:25 -0700332 ClusterEvent.Type type = newState == State.READY ? INSTANCE_READY :
333 newState == State.ACTIVE ? INSTANCE_ACTIVATED :
334 INSTANCE_DEACTIVATED;
335 notifyDelegate(new ClusterEvent(type, node));
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800336 }
tomb41d1ac2014-09-24 01:51:24 -0700337 }
338
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800339 private void heartbeatToPeer(byte[] messagePayload, ControllerNode peer) {
Madan Jampaniafeebbd2015-05-19 15:26:01 -0700340 Endpoint remoteEp = new Endpoint(peer.ip(), peer.tcpPort());
Madan Jampani175e8fd2015-05-20 14:10:45 -0700341 messagingService.sendAsync(remoteEp, HEARTBEAT_MESSAGE, messagePayload).whenComplete((result, error) -> {
342 if (error != null) {
343 log.trace("Sending heartbeat to {} failed", remoteEp, error);
344 }
345 });
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800346 }
347
Madan Jampanid36def02016-01-13 11:21:56 -0800348 private class HeartbeatMessageHandler implements BiConsumer<Endpoint, byte[]> {
tomb41d1ac2014-09-24 01:51:24 -0700349 @Override
Madan Jampanid36def02016-01-13 11:21:56 -0800350 public void accept(Endpoint sender, byte[] message) {
Madan Jampanic26eede2015-04-16 11:42:16 -0700351 HeartbeatMessage hb = SERIALIZER.decode(message);
Madan Jampanie3375062016-04-19 16:32:10 -0700352 if (clusterMetadataService.getClusterMetadata().getNodes().contains(hb.source())) {
Jordan Haltermane7062532018-01-22 11:22:46 -0800353 // Avoid reporting heartbeats that have been enqueued by setting a minimum interval.
354 long heartbeatTime = System.currentTimeMillis();
355 long lastHeartbeatTime = failureDetector.getLastHeartbeatTime(hb.source().id());
356 if (heartbeatTime - lastHeartbeatTime > heartbeatInterval / 2) {
357 failureDetector.report(hb.source().id(), heartbeatTime);
Jordan Halterman1315d3e2018-01-16 19:41:31 -0800358 }
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700359 updateNode(hb.source().id(), hb.state, hb.version);
Madan Jampanie3375062016-04-19 16:32:10 -0700360 }
tomb41d1ac2014-09-24 01:51:24 -0700361 }
tom2d7c65f2014-09-23 01:09:35 -0700362 }
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800363
364 private static class HeartbeatMessage {
365 private ControllerNode source;
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800366 private State state;
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700367 private Version version;
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800368
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700369 public HeartbeatMessage(ControllerNode source, State state, Version version) {
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800370 this.source = source;
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800371 this.state = state != null ? state : State.ACTIVE;
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700372 this.version = version;
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800373 }
374
375 public ControllerNode source() {
376 return source;
377 }
Ayaka Koshibedd91b842015-03-02 14:48:47 -0800378 }
379
Madan Jampani7d2fab22015-03-18 17:21:57 -0700380 @Override
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700381 public Instant getLastUpdatedInstant(NodeId nodeId) {
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700382 return nodeLastUpdatedTimes.get(nodeId);
Madan Jampani7d2fab22015-03-18 17:21:57 -0700383 }
Thomas Vachuskade563cf2015-04-01 00:28:50 -0700384
sangyun-hanf98df542016-03-24 20:28:03 +0900385 /**
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530386 * Extracts properties from the component configuration.
sangyun-hanf98df542016-03-24 20:28:03 +0900387 *
sangyun-hanf98df542016-03-24 20:28:03 +0900388 */
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530389 private void readComponentConfiguration() {
390 Set<ConfigProperty> configProperties = cfgService.getProperties(getClass().getName());
391 for (ConfigProperty property : configProperties) {
Jon Halla3fcf672017-03-28 16:53:22 -0700392 if ("heartbeatInterval".equals(property.name())) {
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530393 String s = property.value();
394 if (s == null) {
Ray Milkeyb5646e62018-10-16 11:42:18 -0700395 setHeartbeatInterval(HEARTBEAT_INTERVAL_DEFAULT);
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530396 log.info("Heartbeat interval time is not configured, default value is {}",
Ray Milkeyb5646e62018-10-16 11:42:18 -0700397 HEARTBEAT_INTERVAL_DEFAULT);
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530398 } else {
Ray Milkeyb5646e62018-10-16 11:42:18 -0700399 int newHeartbeatInterval = isNullOrEmpty(s) ? HEARTBEAT_INTERVAL_DEFAULT
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700400 : Integer.parseInt(s.trim());
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530401 if (newHeartbeatInterval > 0 && heartbeatInterval != newHeartbeatInterval) {
402 heartbeatInterval = newHeartbeatInterval;
403 restartHeartbeatSender();
404 }
405 log.info("Configured. Heartbeat interval time is configured to {}",
406 heartbeatInterval);
407 }
408 }
Jon Halla3fcf672017-03-28 16:53:22 -0700409 if ("phiFailureThreshold".equals(property.name())) {
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530410 String s = property.value();
411 if (s == null) {
Ray Milkeyb5646e62018-10-16 11:42:18 -0700412 setPhiFailureThreshold(PHI_FAILURE_THRESHOLD_DEFAULT);
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530413 log.info("Phi failure threshold is not configured, default value is {}",
Ray Milkeyb5646e62018-10-16 11:42:18 -0700414 PHI_FAILURE_THRESHOLD_DEFAULT);
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530415 } else {
Ray Milkeyb5646e62018-10-16 11:42:18 -0700416 int newPhiFailureThreshold = isNullOrEmpty(s) ? HEARTBEAT_INTERVAL_DEFAULT
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700417 : Integer.parseInt(s.trim());
Deepa Vaddireddy6690f592017-03-12 03:31:48 +0530418 setPhiFailureThreshold(newPhiFailureThreshold);
419 log.info("Configured. Phi failure threshold is configured to {}",
420 phiFailureThreshold);
421 }
422 }
Jordan Halterman4a082ec2018-05-14 15:00:42 -0700423 if ("minStandardDeviationMillis".equals(property.name())) {
424 String s = property.value();
425 if (s == null) {
Ray Milkeyb5646e62018-10-16 11:42:18 -0700426 setMinStandardDeviationMillis(MIN_STANDARD_DEVIATION_MILLIS_DEFAULT);
Jordan Halterman4a082ec2018-05-14 15:00:42 -0700427 log.info("Minimum standard deviation is not configured, default value is {}",
Ray Milkeyb5646e62018-10-16 11:42:18 -0700428 MIN_STANDARD_DEVIATION_MILLIS_DEFAULT);
Jordan Halterman4a082ec2018-05-14 15:00:42 -0700429 } else {
430 long newMinStandardDeviationMillis = isNullOrEmpty(s)
Ray Milkeyb5646e62018-10-16 11:42:18 -0700431 ? MIN_STANDARD_DEVIATION_MILLIS_DEFAULT
Jordan Halterman4a082ec2018-05-14 15:00:42 -0700432 : Long.parseLong(s.trim());
433 setMinStandardDeviationMillis(newMinStandardDeviationMillis);
434 log.info("Configured. Minimum standard deviation is configured to {}",
435 newMinStandardDeviationMillis);
436 }
437 }
sangyun-hanf98df542016-03-24 20:28:03 +0900438 }
439 }
440
441 /**
442 * Sets heartbeat interval between the termination of one execution of heartbeat
443 * and the commencement of the next.
444 *
445 * @param interval term between each heartbeat
446 */
447 private void setHeartbeatInterval(int interval) {
448 try {
449 checkArgument(interval > 0, "Interval must be greater than zero");
450 heartbeatInterval = interval;
451 } catch (IllegalArgumentException e) {
452 log.warn(e.getMessage());
Ray Milkeyb5646e62018-10-16 11:42:18 -0700453 heartbeatInterval = HEARTBEAT_INTERVAL_DEFAULT;
sangyun-hanf98df542016-03-24 20:28:03 +0900454 }
455 }
456
457 /**
458 * Sets Phi failure threshold.
459 * Phi is based on a paper titled: "The φ Accrual Failure Detector" by Hayashibara, et al.
460 *
461 * @param threshold
462 */
463 private void setPhiFailureThreshold(int threshold) {
464 phiFailureThreshold = threshold;
465 }
466
467 /**
Jordan Halterman4a082ec2018-05-14 15:00:42 -0700468 * Sets the minimum standard deviation milliseconds.
469 *
470 * @param minStandardDeviationMillis the updated minimum standard deviation
471 */
472 private void setMinStandardDeviationMillis(long minStandardDeviationMillis) {
473 this.minStandardDeviationMillis = minStandardDeviationMillis;
474 try {
475 failureDetector = new PhiAccrualFailureDetector(minStandardDeviationMillis);
476 } catch (IllegalArgumentException e) {
477 log.warn(e.getMessage());
Ray Milkeyb5646e62018-10-16 11:42:18 -0700478 this.minStandardDeviationMillis = MIN_STANDARD_DEVIATION_MILLIS_DEFAULT;
Jordan Halterman4a082ec2018-05-14 15:00:42 -0700479 failureDetector = new PhiAccrualFailureDetector(this.minStandardDeviationMillis);
480 }
481 }
482
483 /**
sangyun-hanf98df542016-03-24 20:28:03 +0900484 * Restarts heartbeatSender executor.
sangyun-hanf98df542016-03-24 20:28:03 +0900485 */
486 private void restartHeartbeatSender() {
487 try {
488 ScheduledExecutorService prevSender = heartBeatSender;
489 heartBeatSender = Executors.newSingleThreadScheduledExecutor(
HIGUCHI Yutad9e01052016-04-14 09:31:42 -0700490 groupedThreads("onos/cluster/membership", "heartbeat-sender-%d", log));
sangyun-hanf98df542016-03-24 20:28:03 +0900491 heartBeatSender.scheduleWithFixedDelay(this::heartbeat, 0,
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700492 heartbeatInterval, TimeUnit.MILLISECONDS);
sangyun-hanf98df542016-03-24 20:28:03 +0900493 prevSender.shutdown();
494 } catch (Exception e) {
495 log.warn(e.getMessage());
496 }
497 }
Jordan Haltermanf70bf462017-07-29 13:12:00 -0700498}