Remove all the mastership roles when a Node leaves the cluster
- Fix for ONOS-1189
Change-Id: I695ccd6bf2ff12da3702d1a982e377b7082c9341
diff --git a/core/api/src/main/java/org/onosproject/mastership/MastershipStore.java b/core/api/src/main/java/org/onosproject/mastership/MastershipStore.java
index 2112029..16d6d39 100644
--- a/core/api/src/main/java/org/onosproject/mastership/MastershipStore.java
+++ b/core/api/src/main/java/org/onosproject/mastership/MastershipStore.java
@@ -113,4 +113,12 @@
*/
MastershipEvent relinquishRole(NodeId nodeId, DeviceId deviceId);
+ /**
+ * Removes all the roles for the specified controller instance.
+ * If the role was MASTER, another controller instance will be selected
+ * as a candidate master.
+ *
+ * @param nodeId the controller instance identifier
+ */
+ void relinquishAllRole(NodeId nodeId);
}
diff --git a/core/net/src/main/java/org/onosproject/cluster/impl/MastershipManager.java b/core/net/src/main/java/org/onosproject/cluster/impl/MastershipManager.java
index 17647ac..6f76ad0 100644
--- a/core/net/src/main/java/org/onosproject/cluster/impl/MastershipManager.java
+++ b/core/net/src/main/java/org/onosproject/cluster/impl/MastershipManager.java
@@ -304,38 +304,18 @@
case INSTANCE_REMOVED:
case INSTANCE_DEACTIVATED:
ControllerNode node = event.subject();
+ log.info("instance {} removed/deactivated", node);
+ store.relinquishAllRole(node.id());
- if (node.equals(clusterService.getLocalNode())) {
- //If we are in smaller cluster, relinquish and return
- for (DeviceId device : getDevicesOf(node.id())) {
- if (!isInMajority()) {
- //own DeviceManager should catch event and tell switch
- store.relinquishRole(node.id(), device);
- }
- }
- log.info("broke off from cluster, relinquished devices");
- break;
- }
-
- // if we are the larger one and the removed node(s) are brain dead,
- // force relinquish on behalf of disabled node.
- // check network channel to do this?
- for (DeviceId device : getDevicesOf(node.id())) {
- //some things to check:
- // 1. we didn't break off as well while we're at it
- // 2. others don't pile in and try too - maybe a lock
- if (isInMajority()) {
- store.relinquishRole(node.id(), device);
- }
- }
clusterSize.decrementAndGet();
- log.info("instance {} removed/deactivated", event.subject());
break;
default:
log.warn("unknown cluster event {}", event);
}
}
+ // Can be removed if we go with naive split-brain handling: only majority
+ // assigns mastership
private boolean isInMajority() {
if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) {
return true;
diff --git a/core/store/dist/src/main/java/org/onosproject/store/mastership/impl/DistributedMastershipStore.java b/core/store/dist/src/main/java/org/onosproject/store/mastership/impl/DistributedMastershipStore.java
index 5f98d79..db7edc5 100644
--- a/core/store/dist/src/main/java/org/onosproject/store/mastership/impl/DistributedMastershipStore.java
+++ b/core/store/dist/src/main/java/org/onosproject/store/mastership/impl/DistributedMastershipStore.java
@@ -19,8 +19,11 @@
import static org.onosproject.mastership.MastershipEvent.Type.BACKUPS_CHANGED;
import static org.apache.commons.lang3.concurrent.ConcurrentUtils.putIfAbsent;
+import java.util.ArrayList;
import java.util.HashSet;
+import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import org.apache.felix.scr.annotations.Activate;
@@ -360,6 +363,26 @@
}
}
+ @Override
+ public void relinquishAllRole(NodeId nodeId) {
+
+ List<MastershipEvent> events = new ArrayList<>();
+ for (Entry<DeviceId, RoleValue> entry : roleMap.entrySet()) {
+ final DeviceId deviceId = entry.getKey();
+ final RoleValue roleValue = entry.getValue();
+
+ if (roleValue.contains(MASTER, nodeId) ||
+ roleValue.contains(STANDBY, nodeId)) {
+
+ MastershipEvent event = relinquishRole(nodeId, deviceId);
+ if (event != null) {
+ events.add(event);
+ }
+ }
+ }
+ notifyDelegate(events);
+ }
+
// TODO: Consider moving this to RoleValue method
//helper to fetch a new master candidate for a given device.
private NodeId reelect(
diff --git a/core/store/trivial/src/main/java/org/onosproject/store/trivial/impl/SimpleMastershipStore.java b/core/store/trivial/src/main/java/org/onosproject/store/trivial/impl/SimpleMastershipStore.java
index 7236de5..f9e4273 100644
--- a/core/store/trivial/src/main/java/org/onosproject/store/trivial/impl/SimpleMastershipStore.java
+++ b/core/store/trivial/src/main/java/org/onosproject/store/trivial/impl/SimpleMastershipStore.java
@@ -352,4 +352,27 @@
}
return null;
}
+
+ @Override
+ public synchronized void relinquishAllRole(NodeId nodeId) {
+ List<MastershipEvent> events = new ArrayList<>();
+ Set<DeviceId> toRelinquish = new HashSet<>();
+
+ masterMap.entrySet().stream()
+ .filter(entry -> nodeId.equals(entry.getValue()))
+ .forEach(entry -> toRelinquish.add(entry.getKey()));
+
+ backups.entrySet().stream()
+ .filter(entry -> entry.getValue().contains(nodeId))
+ .forEach(entry -> toRelinquish.add(entry.getKey()));
+
+ toRelinquish.forEach(deviceId -> {
+ MastershipEvent event = relinquishRole(nodeId, deviceId);
+ if (event != null) {
+ events.add(event);
+ }
+ });
+
+ notifyDelegate(events);
+ }
}