DeviceManager: background process checking mastership
Change-Id: I215a2f6b585077847821b9e36953c53e43fde6c3
diff --git a/core/net/src/main/java/org/onlab/onos/net/device/impl/DeviceManager.java b/core/net/src/main/java/org/onlab/onos/net/device/impl/DeviceManager.java
index 68665e4..b15872b 100644
--- a/core/net/src/main/java/org/onlab/onos/net/device/impl/DeviceManager.java
+++ b/core/net/src/main/java/org/onlab/onos/net/device/impl/DeviceManager.java
@@ -18,9 +18,13 @@
import static com.google.common.base.Preconditions.checkNotNull;
import static org.onlab.onos.net.device.DeviceEvent.Type.DEVICE_MASTERSHIP_CHANGED;
import static org.onlab.onos.net.MastershipRole.*;
+import static org.onlab.util.Tools.namedThreads;
import static org.slf4j.LoggerFactory.getLogger;
import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
@@ -83,6 +87,8 @@
private final MastershipListener mastershipListener = new InternalMastershipListener();
+ private ScheduledExecutorService backgroundService;
+
@Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
protected DeviceStore store;
@@ -102,15 +108,31 @@
@Activate
public void activate() {
+ backgroundService = Executors.newSingleThreadScheduledExecutor(namedThreads("device-manager-background"));
+
store.setDelegate(delegate);
eventDispatcher.addSink(DeviceEvent.class, listenerRegistry);
mastershipService.addListener(mastershipListener);
termService = mastershipService.requestTermService();
+
+ backgroundService.scheduleWithFixedDelay(new Runnable() {
+
+ @Override
+ public void run() {
+ try {
+ mastershipCheck();
+ } catch (Exception e) {
+ log.error("Exception thrown during integrity check", e);
+ }
+ }
+ }, 1, 1, TimeUnit.MINUTES);
log.info("Started");
}
@Deactivate
public void deactivate() {
+ backgroundService.shutdown();
+
store.unsetDelegate(delegate);
mastershipService.removeListener(mastershipListener);
eventDispatcher.removeSink(DeviceEvent.class);
@@ -172,10 +194,6 @@
@Override
public void removeDevice(DeviceId deviceId) {
checkNotNull(deviceId, DEVICE_ID_NULL);
- // XXX is this intended to apply to the full global topology?
- // if so, we probably don't want the fact that we aren't
- // MASTER to get in the way, as it would do now.
- // FIXME: forward or broadcast and let the Master handler the event.
DeviceEvent event = store.removeDevice(deviceId);
if (event != null) {
log.info("Device {} administratively removed", deviceId);
@@ -199,6 +217,31 @@
return new InternalDeviceProviderService(provider);
}
+ /**
+ * Checks if all the reachable devices have a valid mastership role.
+ */
+ private void mastershipCheck() {
+ log.debug("Checking mastership");
+ for (Device device : getDevices()) {
+ final DeviceId deviceId = device.id();
+ log.debug("Checking device {}", deviceId);
+
+ if (!isReachable(deviceId)) {
+ continue;
+ }
+
+ if (mastershipService.getLocalRole(deviceId) != NONE) {
+ continue;
+ }
+
+ log.info("{} is reachable but did not have a valid role, reasserting", deviceId);
+
+ // isReachable but was not MASTER or STANDBY, get a role and apply
+ // Note: NONE triggers request to MastershipService
+ reassertRole(deviceId, NONE);
+ }
+ }
+
// Personalized device provider service issued to the supplied provider.
private class InternalDeviceProviderService
extends AbstractProviderService<DeviceProvider>
@@ -418,48 +461,112 @@
}
}
- // Intercepts mastership events
- private class InternalMastershipListener implements MastershipListener {
-
- // Applies the specified role to the device; ignores NONE
- /**
- * Apply role in reaction to mastership event.
- *
- * @param deviceId device identifier
- * @param newRole new role to apply to the device
- * @return true if the request was sent to provider
- */
- private boolean applyRole(DeviceId deviceId, MastershipRole newRole) {
- if (newRole.equals(MastershipRole.NONE)) {
- //no-op
- return true;
- }
-
- Device device = store.getDevice(deviceId);
- // FIXME: Device might not be there yet. (eventual consistent)
- // FIXME relinquish role
- if (device == null) {
- log.warn("{} was not there. Cannot apply role {}", deviceId, newRole);
- return false;
- }
-
- DeviceProvider provider = getProvider(device.providerId());
- if (provider == null) {
- log.warn("Provider for {} was not found. Cannot apply role {}", deviceId, newRole);
- return false;
- }
- provider.roleChanged(deviceId, newRole);
-
- if (newRole.equals(MastershipRole.MASTER)) {
- // only trigger event when request was sent to provider
- // TODO: consider removing this from Device event type?
- post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, device));
-
- provider.triggerProbe(device);
- }
+ // Applies the specified role to the device; ignores NONE
+ /**
+ * Apply role to device and send probe if MASTER.
+ *
+ * @param deviceId device identifier
+ * @param newRole new role to apply to the device
+ * @return true if the request was sent to provider
+ */
+ private boolean applyRoleAndProbe(DeviceId deviceId, MastershipRole newRole) {
+ if (newRole.equals(MastershipRole.NONE)) {
+ //no-op
return true;
}
+ Device device = store.getDevice(deviceId);
+ // FIXME: Device might not be there yet. (eventual consistent)
+ // FIXME relinquish role
+ if (device == null) {
+ log.warn("{} was not there. Cannot apply role {}", deviceId, newRole);
+ return false;
+ }
+
+ DeviceProvider provider = getProvider(device.providerId());
+ if (provider == null) {
+ log.warn("Provider for {} was not found. Cannot apply role {}", deviceId, newRole);
+ return false;
+ }
+ provider.roleChanged(deviceId, newRole);
+
+ if (newRole.equals(MastershipRole.MASTER)) {
+ // only trigger event when request was sent to provider
+ // TODO: consider removing this from Device event type?
+ post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, device));
+
+ provider.triggerProbe(device);
+ }
+ return true;
+ }
+
+ /**
+ * Reaasert role for specified device connected to this node.
+ *
+ * @param did device identifier
+ * @param nextRole role to apply. If NONE is specified,
+ * it will ask mastership service for a role and apply it.
+ */
+ private void reassertRole(final DeviceId did,
+ final MastershipRole nextRole) {
+
+ final NodeId myNodeId = clusterService.getLocalNode().id();
+ MastershipRole myNextRole = nextRole;
+ if (myNextRole == NONE) {
+ mastershipService.requestRoleFor(did);
+ MastershipTerm term = termService.getMastershipTerm(did);
+ if (myNodeId.equals(term.master())) {
+ myNextRole = MASTER;
+ } else {
+ myNextRole = STANDBY;
+ }
+ }
+
+ switch (myNextRole) {
+ case MASTER:
+ final Device device = getDevice(did);
+ if ((device != null) && !isAvailable(did)) {
+ //flag the device as online. Is there a better way to do this?
+ DefaultDeviceDescription deviceDescription
+ = new DefaultDeviceDescription(did.uri(),
+ device.type(),
+ device.manufacturer(),
+ device.hwVersion(),
+ device.swVersion(),
+ device.serialNumber(),
+ device.chassisId());
+ DeviceEvent devEvent =
+ store.createOrUpdateDevice(device.providerId(), did,
+ deviceDescription);
+ post(devEvent);
+ }
+ // TODO: should apply role only if there is mismatch
+ log.info("Applying role {} to {}", myNextRole, did);
+ if (!applyRoleAndProbe(did, MASTER)) {
+ // immediately failed to apply role
+ mastershipService.relinquishMastership(did);
+ // FIXME disconnect?
+ }
+ break;
+ case STANDBY:
+ log.info("Applying role {} to {}", myNextRole, did);
+ if (!applyRoleAndProbe(did, STANDBY)) {
+ // immediately failed to apply role
+ mastershipService.relinquishMastership(did);
+ // FIXME disconnect?
+ }
+ break;
+ case NONE:
+ default:
+ // should never reach here
+ log.error("You didn't see anything. I did not exist.");
+ break;
+ }
+ }
+
+ // Intercepts mastership events
+ private class InternalMastershipListener implements MastershipListener {
+
@Override
public void event(MastershipEvent event) {
@@ -499,55 +606,12 @@
+ "Relinquishing role. ",
myNextRole, did);
mastershipService.relinquishMastership(did);
- // FIXME disconnect?
}
return;
}
// device is connected to this node:
-
- if (myNextRole == NONE) {
- mastershipService.requestRoleFor(did);
- MastershipTerm term = termService.getMastershipTerm(did);
- if (myNodeId.equals(term.master())) {
- myNextRole = MASTER;
- } else {
- myNextRole = STANDBY;
- }
- }
-
- switch (myNextRole) {
- case MASTER:
- final Device device = getDevice(did);
- if ((device != null) && !isAvailable(did)) {
- //flag the device as online. Is there a better way to do this?
- DefaultDeviceDescription deviceDescription
- = new DefaultDeviceDescription(did.uri(),
- device.type(),
- device.manufacturer(),
- device.hwVersion(),
- device.swVersion(),
- device.serialNumber(),
- device.chassisId());
- DeviceEvent devEvent =
- store.createOrUpdateDevice(device.providerId(), did,
- deviceDescription);
- post(devEvent);
- }
- // TODO: should apply role only if there is mismatch
- log.info("Applying role {} to {}", myNextRole, did);
- applyRole(did, MASTER);
- break;
- case STANDBY:
- log.info("Applying role {} to {}", myNextRole, did);
- applyRole(did, STANDBY);
- break;
- case NONE:
- default:
- // should never reach here
- log.error("You didn't see anything. I did not exist.");
- break;
- }
+ reassertRole(did, myNextRole);
}
}