revisiting DeviceManager role handling
Change-Id: If7765b38e2eda99ca210316429a8e65d482c2791
diff --git a/core/net/src/main/java/org/onlab/onos/net/device/impl/DeviceManager.java b/core/net/src/main/java/org/onlab/onos/net/device/impl/DeviceManager.java
index 32ab007..258f57c 100644
--- a/core/net/src/main/java/org/onlab/onos/net/device/impl/DeviceManager.java
+++ b/core/net/src/main/java/org/onlab/onos/net/device/impl/DeviceManager.java
@@ -17,6 +17,7 @@
import static com.google.common.base.Preconditions.checkNotNull;
import static org.onlab.onos.net.device.DeviceEvent.Type.DEVICE_MASTERSHIP_CHANGED;
+import static org.onlab.onos.net.MastershipRole.*;
import static org.slf4j.LoggerFactory.getLogger;
import java.util.List;
@@ -29,7 +30,6 @@
import org.apache.felix.scr.annotations.Service;
import org.onlab.onos.cluster.ClusterService;
import org.onlab.onos.cluster.NodeId;
-import org.onlab.onos.cluster.RoleInfo;
import org.onlab.onos.event.AbstractListenerRegistry;
import org.onlab.onos.event.EventDeliveryService;
import org.onlab.onos.mastership.MastershipEvent;
@@ -59,8 +59,6 @@
import org.onlab.onos.net.provider.AbstractProviderService;
import org.slf4j.Logger;
-import com.google.common.collect.HashMultimap;
-
/**
* Provides implementation of the device SB & NB APIs.
*/
@@ -160,31 +158,6 @@
return store.isAvailable(deviceId);
}
- // Applies the specified role to the device; ignores NONE
- private void applyRole(DeviceId deviceId, MastershipRole newRole) {
- if (newRole.equals(MastershipRole.NONE)) {
- return;
- }
-
- Device device = store.getDevice(deviceId);
- // FIXME: Device might not be there yet. (eventual consistent)
- if (device == null) {
- return;
- }
-
- DeviceProvider provider = getProvider(device.providerId());
- if (provider != null) {
- provider.roleChanged(device, newRole);
- // only trigger event when request was sent to provider
- // TODO: consider removing this from Device event type?
- post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, device));
-
- if (newRole.equals(MastershipRole.MASTER)) {
- provider.triggerProbe(device);
- }
- }
- }
-
// Check a device for control channel connectivity.
private boolean isReachable(Device device) {
// FIXME: Device might not be there yet. (eventual consistent)
@@ -234,6 +207,38 @@
super(provider);
}
+ /**
+ * Apply role in reaction to provider event.
+ *
+ * @param deviceId device identifier
+ * @param newRole new role to apply to the device
+ * @return true if the request was sent to provider
+ */
+ private boolean applyRole(DeviceId deviceId, MastershipRole newRole) {
+
+ if (newRole.equals(MastershipRole.NONE)) {
+ //no-op
+ return true;
+ }
+
+ DeviceProvider provider = provider();
+ if (provider == null) {
+ log.warn("Provider for {} was not found. Cannot apply role {}", deviceId, newRole);
+ return false;
+ }
+ // FIXME roleChanged should take DeviceId instead of Device
+ Device device = store.getDevice(deviceId);
+ if (device == null) {
+ log.warn("{} was not there. Cannot apply role {}", deviceId, newRole);
+ return false;
+ }
+ provider.roleChanged(device, newRole);
+
+ // not triggering prove when triggered by provider service
+ return true;
+ }
+
+
@Override
public void deviceConnected(DeviceId deviceId,
DeviceDescription deviceDescription) {
@@ -242,27 +247,16 @@
checkValidity();
log.info("Device {} connected", deviceId);
- // check my Role
- MastershipRole role = mastershipService.requestRoleFor(deviceId);
- if (role != MastershipRole.MASTER) {
- // TODO: Do we need to explicitly tell the Provider that
- // this instance is no longer the MASTER? probably not
-// Device device = getDevice(deviceId);
-// if (device != null) {
-// // FIXME roleChanged should take DeviceId instead of Device
-// provider().roleChanged(device, role);
-// }
- return;
- }
- MastershipTerm term = termService.getMastershipTerm(deviceId);
-
final NodeId myNodeId = clusterService.getLocalNode().id();
+
+ // check my Role
+ mastershipService.requestRoleFor(deviceId);
+ final MastershipTerm term = termService.getMastershipTerm(deviceId);
if (!myNodeId.equals(term.master())) {
- // lost mastership after requestRole told this instance was MASTER.
log.info("Role of this node is STANDBY for {}", deviceId);
// TODO: Do we need to explicitly tell the Provider that
- // this instance is no longer the MASTER?
- //applyRole(deviceId, MastershipRole.STANDBY);
+ // this instance is not the MASTER
+ applyRole(deviceId, MastershipRole.STANDBY);
return;
}
log.info("Role of this node is MASTER for {}", deviceId);
@@ -273,24 +267,15 @@
DeviceEvent event = store.createOrUpdateDevice(provider().id(),
deviceId, deviceDescription);
+ applyRole(deviceId, MastershipRole.MASTER);
+
// If there was a change of any kind, tell the provider
// that this instance is the master.
- // Note: event can be null, if mastership was lost between
- // roleRequest and store update calls.
if (event != null) {
- // TODO: Check switch reconnected case. Is it assured that
- // event will never be null?
- // Could there be a situation MastershipService told this
- // instance is the new Master, but
- // event returned from the store is null?
-
- // FIXME: 1st argument should be deviceId, to allow setting
- // certain roles even if the store returned null.
- log.info("event: {} {}", event.type(), event);
- provider().roleChanged(event.subject(), role);
+ log.trace("event: {} {}", event.type(), event);
post(event);
} else {
- log.info("No event to publish");
+ post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, store.getDevice(deviceId)));
}
}
@@ -441,118 +426,133 @@
// Intercepts mastership events
private class InternalMastershipListener implements MastershipListener {
- // random cache size
- private final int cacheSize = 5;
- // temporarily stores term number + events to check for duplicates. A hack.
- private HashMultimap<Integer, RoleInfo> eventCache =
- HashMultimap.create();
+ // Applies the specified role to the device; ignores NONE
+ /**
+ * Apply role in reaction to mastership event.
+ *
+ * @param deviceId device identifier
+ * @param newRole new role to apply to the device
+ * @return true if the request was sent to provider
+ */
+ private boolean applyRole(DeviceId deviceId, MastershipRole newRole) {
+ if (newRole.equals(MastershipRole.NONE)) {
+ //no-op
+ return true;
+ }
+
+ Device device = store.getDevice(deviceId);
+ // FIXME: Device might not be there yet. (eventual consistent)
+ if (device == null) {
+ log.warn("{} was not there. Cannot apply role {}", deviceId, newRole);
+ return false;
+ }
+
+ DeviceProvider provider = getProvider(device.providerId());
+ if (provider == null) {
+ log.warn("Provider for {} was not found. Cannot apply role {}", deviceId, newRole);
+ return false;
+ }
+ // FIXME roleChanged should take DeviceId instead of Device
+ provider.roleChanged(device, newRole);
+
+ if (newRole.equals(MastershipRole.MASTER)) {
+ // only trigger event when request was sent to provider
+ // TODO: consider removing this from Device event type?
+ post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, device));
+
+ provider.triggerProbe(device);
+ }
+ return true;
+ }
@Override
public void event(MastershipEvent event) {
+
+ if (event.type() != MastershipEvent.Type.MASTER_CHANGED) {
+ // Don't care if backup list changed.
+ return;
+ }
+
final DeviceId did = event.subject();
final NodeId myNodeId = clusterService.getLocalNode().id();
+ // myRole suggested by MastershipService
+ MastershipRole myNextRole;
if (myNodeId.equals(event.roleInfo().master())) {
+ // confirm latest info
MastershipTerm term = termService.getMastershipTerm(did);
-
- // TODO duplicate suppression should probably occur in the MastershipManager
- // itself, so listeners that can't deal with duplicates don't have to
- // so this check themselves.
-// if (checkDuplicate(event.roleInfo(), term.termNumber())) {
-// return;
-// }
-
- if (!myNodeId.equals(term.master())) {
- // something went wrong in consistency, let go
- log.warn("Mastership has changed after this event."
- + "Term Service suggests {} for {}", term, did);
- // FIXME: Is it possible to let go of MASTER role
- // but remain on STANDBY list?
- mastershipService.relinquishMastership(did);
- applyRole(did, MastershipRole.STANDBY);
- return;
- }
-
- // only set the new term if I am the master
- deviceClockProviderService.setMastershipTerm(did, term);
-
- // if the device is null here, we are the first master to claim the
- // device. No worries, the DeviceManager will create one soon.
- Device device = getDevice(did);
- if ((device != null) && !isAvailable(did)) {
- if (!isReachable(device)) {
- log.warn("Device {} has disconnected after this event", did);
- mastershipService.relinquishMastership(did);
- return;
- }
- //flag the device as online. Is there a better way to do this?
- DeviceEvent devEvent =
- store.createOrUpdateDevice(device.providerId(), did,
- new DefaultDeviceDescription(
- did.uri(), device.type(), device.manufacturer(),
- device.hwVersion(), device.swVersion(),
- device.serialNumber(), device.chassisId()));
- post(devEvent);
- }
- applyRole(did, MastershipRole.MASTER);
- } else if (event.roleInfo().backups().contains(myNodeId)) {
- if (!isReachable(getDevice(did))) {
- log.warn("Device {} has disconnected after this event", did);
- mastershipService.relinquishMastership(did);
- return;
- }
- applyRole(did, MastershipRole.STANDBY);
- } else {
- // Event suggests that this Node has no connection to this Device
- // confirm.
- final Device device = getDevice(did);
- if (!isReachable(device)) {
- // not connection to device, as expected
- return;
- }
- // connection seems to exist
- log.info("Detected mastership info mismatch, requesting Role");
- mastershipService.requestRoleFor(did);
- final MastershipTerm term = termService.getMastershipTerm(did);
- if (myNodeId.equals(term.master())) {
- // became MASTER
- // TODO: consider slicing out method for applying MASTER role
+ final boolean iHaveControl = myNodeId.equals(term.master());
+ if (iHaveControl) {
deviceClockProviderService.setMastershipTerm(did, term);
-
- //flag the device as online. Is there a better way to do this?
- DeviceEvent devEvent =
- store.createOrUpdateDevice(device.providerId(), did,
- new DefaultDeviceDescription(
- did.uri(), device.type(), device.manufacturer(),
- device.hwVersion(), device.swVersion(),
- device.serialNumber(), device.chassisId()));
- applyRole(did, MastershipRole.MASTER);
- post(devEvent);
+ myNextRole = MASTER;
} else {
- applyRole(did, MastershipRole.STANDBY);
+ myNextRole = STANDBY;
+ }
+ } else if (event.roleInfo().backups().contains(myNodeId)) {
+ myNextRole = STANDBY;
+ } else {
+ myNextRole = NONE;
+ }
+
+
+ final Device device = getDevice(did);
+ final boolean isReachable = isReachable(device);
+ if (!isReachable) {
+ // device is not connected to this node
+ if (myNextRole != NONE) {
+ log.warn("Node was instructed to be {} role for {}, "
+ + "but this node cannot reach the device. "
+ + "Relinquishing role. ",
+ myNextRole, did);
+ mastershipService.relinquishMastership(did);
+ }
+ return;
+ }
+
+ // device is connected to this node:
+
+ if (myNextRole == NONE) {
+ mastershipService.requestRoleFor(did);
+ MastershipTerm term = termService.getMastershipTerm(did);
+ if (myNodeId.equals(term.master())) {
+ myNextRole = MASTER;
+ } else {
+ myNextRole = STANDBY;
}
}
- }
- // checks for duplicate event, returning true if one is found.
- private boolean checkDuplicate(RoleInfo roleInfo, int term) {
- // turning off duplicate check
- return false;
-// synchronized (eventCache) {
-// if (eventCache.get(term).contains(roleInfo)) {
-// log.info("duplicate event detected; ignoring");
-// return true;
-// } else {
-// eventCache.put(term, roleInfo);
-// // purge by-term oldest entries to keep the cache size under limit
-// if (eventCache.size() > cacheSize) {
-// eventCache.removeAll(term - cacheSize);
-// }
-// return false;
-// }
-// }
+ switch (myNextRole) {
+ case MASTER:
+ if ((device != null) && !isAvailable(did)) {
+ //flag the device as online. Is there a better way to do this?
+ DefaultDeviceDescription deviceDescription
+ = new DefaultDeviceDescription(did.uri(),
+ device.type(),
+ device.manufacturer(),
+ device.hwVersion(),
+ device.swVersion(),
+ device.serialNumber(),
+ device.chassisId());
+ DeviceEvent devEvent =
+ store.createOrUpdateDevice(device.providerId(), did,
+ deviceDescription);
+ post(devEvent);
+ }
+ // TODO: should apply role only if there is mismatch
+ log.info("Applying role {} to {}", myNextRole, did);
+ applyRole(did, MASTER);
+ break;
+ case STANDBY:
+ log.info("Applying role {} to {}", myNextRole, did);
+ applyRole(did, STANDBY);
+ break;
+ case NONE:
+ default:
+ // should never reach here
+ log.error("You didn't see anything. I did not exist.");
+ break;
+ }
}
-
}
// Store delegate to re-post events emitted from the store.