fixed #229: handling rapid switch flapping race conditions gracefully
diff --git a/src/main/java/net/floodlightcontroller/core/internal/Controller.java b/src/main/java/net/floodlightcontroller/core/internal/Controller.java
index 4057bc9..60dbc69 100644
--- a/src/main/java/net/floodlightcontroller/core/internal/Controller.java
+++ b/src/main/java/net/floodlightcontroller/core/internal/Controller.java
@@ -797,6 +797,7 @@
}
protected void checkSwitchReady() {
+ Boolean controlRequested = Boolean.TRUE;
if (state.hsState == HandshakeState.FEATURES_REPLY &&
state.hasDescription && state.hasGetConfigReply) {
@@ -825,6 +826,7 @@
new RoleChangeCallback());
} catch (RegistryException e) {
log.debug("Registry error: {}", e.getMessage());
+ controlRequested = Boolean.FALSE;
}
@@ -858,6 +860,16 @@
state.firstRoleReplyReceived = true;
}
}
+ if (!controlRequested) {
+ // yield to allow other thread(s) to release control
+ try {
+ Thread.sleep(10);
+ } catch (InterruptedException e) {
+ // Ignore interruptions
+ }
+ // safer to bounce the switch to reconnect here than proceeding further
+ sw.channel.close();
+ }
}
}
diff --git a/src/main/java/net/floodlightcontroller/onoslistener/OnosPublisher.java b/src/main/java/net/floodlightcontroller/onoslistener/OnosPublisher.java
index c6fe108..9cb1c4f 100644
--- a/src/main/java/net/floodlightcontroller/onoslistener/OnosPublisher.java
+++ b/src/main/java/net/floodlightcontroller/onoslistener/OnosPublisher.java
@@ -48,7 +48,7 @@
protected static final String CleanupEnabled = "EnableCleanup";
protected IThreadPoolService threadPool;
- protected final int CLEANUP_TASK_INTERVAL = 999; // 999 ms
+ protected final int CLEANUP_TASK_INTERVAL = 10; // 10 sec
protected SingletonTask cleanupTask;
/**
@@ -65,7 +65,7 @@
log.error("Error in cleanup thread", e);
} finally {
cleanupTask.reschedule(CLEANUP_TASK_INTERVAL,
- TimeUnit.MILLISECONDS);
+ TimeUnit.SECONDS);
}
}
@@ -74,7 +74,7 @@
// TODO Auto-generated method stub
if (hasControl) {
- log.debug("got control to set inactive sw {}", dpid);
+ log.debug("got control to set inactive sw {}", HexString.toHexString(dpid));
swStore.update(HexString.toHexString(dpid),SwitchState.INACTIVE, DM_OPERATION.UPDATE);
registryService.releaseControl(dpid);
}
@@ -94,10 +94,10 @@
long dpid = HexString.toLong(sw.getDPID());
String controller = registryService.getControllerForSwitch(dpid);
if (controller == null) {
- log.debug("request Control to set inactive sw {}", dpid);
+ log.debug("request Control to set inactive sw {}", HexString.toHexString(dpid));
registryService.requestControl(dpid, new SwitchCleanup());
} else {
- log.debug("sw {} is controlled by controller: {}",dpid,controller);
+ log.debug("sw {} is controlled by controller: {}",HexString.toHexString(dpid),controller);
}
} catch (NumberFormatException e) {
// TODO Auto-generated catch block
@@ -223,10 +223,10 @@
deviceService.addListener(this);
// Setup the Cleanup task.
- if (cleanupNeeded != null &&cleanupNeeded.equals("True")) {
+ if (cleanupNeeded == null || !cleanupNeeded.equals("False")) {
ScheduledExecutorService ses = threadPool.getScheduledExecutor();
cleanupTask = new SingletonTask(ses, new SwitchCleanup());
- cleanupTask.reschedule(CLEANUP_TASK_INTERVAL, TimeUnit.MILLISECONDS);
+ cleanupTask.reschedule(CLEANUP_TASK_INTERVAL, TimeUnit.SECONDS);
}
}
diff --git a/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java b/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java
index fe758cd..7e39fdb 100644
--- a/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java
+++ b/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java
@@ -157,7 +157,7 @@
if (switches.get(dpidStr) != null){
log.debug("Already contesting {}, returning", HexString.toHexString(dpid));
- return;
+ throw new RegistryException("Already requested control for " + dpidStr);
}
LeaderLatch latch = new LeaderLatch(client, latchPath, controllerId);
diff --git a/src/main/resources/floodlightdefault.properties b/src/main/resources/floodlightdefault.properties
index 498fce5..5e1c13f 100644
--- a/src/main/resources/floodlightdefault.properties
+++ b/src/main/resources/floodlightdefault.properties
@@ -17,3 +17,4 @@
net.floodlightcontroller.forwarding.Forwarding.idletimeout = 5
net.floodlightcontroller.forwarding.Forwarding.hardtimeout = 0
net.floodlightcontroller.onoslistener.OnosPublisher.dbconf = /tmp/cassandra.titan
+net.floodlightcontroller.onoslistener.OnosPublisher.EnableCleanup = True