fixes for mastership handoff race conditions
Change-Id: Ifed733df1bdc3b144b6a341a9322838ea2aacd72
diff --git a/core/store/dist/src/main/java/org/onlab/onos/store/device/impl/DeviceClockManager.java b/core/store/dist/src/main/java/org/onlab/onos/store/device/impl/DeviceClockManager.java
index 48355cf..73ba735 100644
--- a/core/store/dist/src/main/java/org/onlab/onos/store/device/impl/DeviceClockManager.java
+++ b/core/store/dist/src/main/java/org/onlab/onos/store/device/impl/DeviceClockManager.java
@@ -44,6 +44,8 @@
@Override
public Timestamp getTimestamp(DeviceId deviceId) {
MastershipTerm term = deviceMastershipTerms.get(deviceId);
+ log.info("term info for {} is: {}", deviceId, term);
+
if (term == null) {
throw new IllegalStateException("Requesting timestamp for a deviceId without mastership");
}
@@ -52,6 +54,7 @@
@Override
public void setMastershipTerm(DeviceId deviceId, MastershipTerm term) {
+ log.info("adding term info {} {}", deviceId, term.master());
deviceMastershipTerms.put(deviceId, term);
}
}
diff --git a/core/store/dist/src/main/java/org/onlab/onos/store/device/impl/GossipDeviceStore.java b/core/store/dist/src/main/java/org/onlab/onos/store/device/impl/GossipDeviceStore.java
index 2603da1..31a86a5 100644
--- a/core/store/dist/src/main/java/org/onlab/onos/store/device/impl/GossipDeviceStore.java
+++ b/core/store/dist/src/main/java/org/onlab/onos/store/device/impl/GossipDeviceStore.java
@@ -390,6 +390,7 @@
List<PortDescription> portDescriptions) {
final Timestamp newTimestamp = deviceClockService.getTimestamp(deviceId);
+ log.info("timestamp for {} {}", deviceId, newTimestamp);
final Timestamped<List<PortDescription>> timestampedInput
= new Timestamped<>(portDescriptions, newTimestamp);
diff --git a/core/store/dist/src/main/java/org/onlab/onos/store/link/impl/GossipLinkStore.java b/core/store/dist/src/main/java/org/onlab/onos/store/link/impl/GossipLinkStore.java
index 6e8a367..8cf78e5 100644
--- a/core/store/dist/src/main/java/org/onlab/onos/store/link/impl/GossipLinkStore.java
+++ b/core/store/dist/src/main/java/org/onlab/onos/store/link/impl/GossipLinkStore.java
@@ -360,7 +360,14 @@
final LinkKey key = linkKey(src, dst);
DeviceId dstDeviceId = dst.deviceId();
- Timestamp timestamp = deviceClockService.getTimestamp(dstDeviceId);
+ Timestamp timestamp = null;
+ try {
+ timestamp = deviceClockService.getTimestamp(dstDeviceId);
+ } catch (IllegalStateException e) {
+ //there are times when this is called before mastership
+ // handoff correctly completes.
+ return null;
+ }
LinkEvent event = removeLinkInternal(key, timestamp);
diff --git a/core/store/hz/cluster/src/main/java/org/onlab/onos/store/mastership/impl/DistributedMastershipStore.java b/core/store/hz/cluster/src/main/java/org/onlab/onos/store/mastership/impl/DistributedMastershipStore.java
index d0eae2d..b310b48 100644
--- a/core/store/hz/cluster/src/main/java/org/onlab/onos/store/mastership/impl/DistributedMastershipStore.java
+++ b/core/store/hz/cluster/src/main/java/org/onlab/onos/store/mastership/impl/DistributedMastershipStore.java
@@ -29,7 +29,10 @@
import org.onlab.util.KryoPool;
import com.google.common.collect.ImmutableSet;
+import com.hazelcast.core.EntryEvent;
+import com.hazelcast.core.EntryListener;
import com.hazelcast.core.IAtomicLong;
+import com.hazelcast.core.MapEvent;
import static org.onlab.onos.net.MastershipRole.*;
@@ -78,7 +81,7 @@
roleMap = new SMap(theInstance.getMap("nodeRoles"), this.serializer);
terms = new SMap(theInstance.getMap("terms"), this.serializer);
clusterSize = theInstance.getAtomicLong("clustersize");
- // roleMap.addEntryListener(new RemoteMasterShipEventHandler(), true);
+ roleMap.addEntryListener((new RemoteMasterShipEventHandler()), true);
log.info("Started");
}
@@ -207,6 +210,7 @@
rv.reassign(local, NONE, STANDBY);
roleMap.put(deviceId, rv);
terms.putIfAbsent(deviceId, INIT);
+
break;
case NONE:
//claim mastership
@@ -289,7 +293,8 @@
}
//helper to fetch a new master candidate for a given device.
- private MastershipEvent reelect(NodeId current, DeviceId deviceId, RoleValue rv) {
+ private MastershipEvent reelect(
+ NodeId current, DeviceId deviceId, RoleValue rv) {
//if this is an queue it'd be neater.
NodeId backup = null;
@@ -301,17 +306,18 @@
}
if (backup == null) {
+ log.info("{} giving up and going to NONE for {}", current, deviceId);
rv.remove(MASTER, current);
roleMap.put(deviceId, rv);
return null;
} else {
+ log.info("{} trying to pass mastership for {} to {}", current, deviceId, backup);
rv.replace(current, backup, MASTER);
rv.reassign(backup, STANDBY, NONE);
roleMap.put(deviceId, rv);
Integer term = terms.get(deviceId);
terms.put(deviceId, ++term);
- return new MastershipEvent(
- MASTER_CHANGED, deviceId, backup);
+ return new MastershipEvent(MASTER_CHANGED, deviceId, backup);
}
}
@@ -346,30 +352,51 @@
//adds or updates term information.
private void updateTerm(DeviceId deviceId) {
- Integer term = terms.get(deviceId);
- if (term == null) {
- terms.put(deviceId, INIT);
- } else {
- terms.put(deviceId, ++term);
+ terms.lock(deviceId);
+ try {
+ Integer term = terms.get(deviceId);
+ if (term == null) {
+ terms.put(deviceId, INIT);
+ } else {
+ terms.put(deviceId, ++term);
+ }
+ } finally {
+ terms.unlock(deviceId);
}
}
- private class RemoteMasterShipEventHandler extends RemoteEventHandler<DeviceId, NodeId> {
+ private class RemoteMasterShipEventHandler implements EntryListener<DeviceId, RoleValue> {
@Override
- protected void onAdd(DeviceId deviceId, NodeId nodeId) {
- notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId));
+ public void entryAdded(EntryEvent<DeviceId, RoleValue> event) {
}
@Override
- protected void onRemove(DeviceId deviceId, NodeId nodeId) {
- //notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId));
+ public void entryRemoved(EntryEvent<DeviceId, RoleValue> event) {
}
@Override
- protected void onUpdate(DeviceId deviceId, NodeId oldNodeId, NodeId nodeId) {
- //only addition indicates a change in mastership
- //notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId));
+ public void entryUpdated(EntryEvent<DeviceId, RoleValue> event) {
+ NodeId myId = clusterService.getLocalNode().id();
+ NodeId node = event.getValue().get(MASTER);
+ if (myId.equals(node)) {
+ // XXX or do we just let it get sent and caught by ourself?
+ return;
+ }
+ notifyDelegate(new MastershipEvent(
+ MASTER_CHANGED, event.getKey(), event.getValue().get(MASTER)));
+ }
+
+ @Override
+ public void entryEvicted(EntryEvent<DeviceId, RoleValue> event) {
+ }
+
+ @Override
+ public void mapEvicted(MapEvent event) {
+ }
+
+ @Override
+ public void mapCleared(MapEvent event) {
}
}
diff --git a/core/store/serializers/src/main/java/org/onlab/onos/store/serializers/KryoPoolUtil.java b/core/store/serializers/src/main/java/org/onlab/onos/store/serializers/KryoPoolUtil.java
index 38c4dfd..fe0d82a 100644
--- a/core/store/serializers/src/main/java/org/onlab/onos/store/serializers/KryoPoolUtil.java
+++ b/core/store/serializers/src/main/java/org/onlab/onos/store/serializers/KryoPoolUtil.java
@@ -94,7 +94,6 @@
.register(ConnectPoint.class, new ConnectPointSerializer())
.register(DefaultLink.class, new DefaultLinkSerializer())
.register(MastershipTerm.class, new MastershipTermSerializer())
- .register(MastershipRole.class, new MastershipRoleSerializer())
.register(HostLocation.class, new HostLocationSerializer())
.build();