Merge pull request #277 from pgreyson/master
First look
diff --git a/clean-cassandra.sh b/drop-keyspace.sh
similarity index 100%
rename from clean-cassandra.sh
rename to drop-keyspace.sh
diff --git a/src/main/java/net/floodlightcontroller/core/internal/Controller.java b/src/main/java/net/floodlightcontroller/core/internal/Controller.java
index 4057bc9..24eeab5 100644
--- a/src/main/java/net/floodlightcontroller/core/internal/Controller.java
+++ b/src/main/java/net/floodlightcontroller/core/internal/Controller.java
@@ -560,7 +560,9 @@
removeSwitch(sw);
}
synchronized(roleChanger) {
- registryService.releaseControl(sw.getId());
+ if (controlRequested) {
+ registryService.releaseControl(sw.getId());
+ }
connectedSwitches.remove(sw);
}
sw.setConnected(false);
@@ -795,8 +797,10 @@
dfuture);
}
-
+
+ volatile Boolean controlRequested = Boolean.FALSE;
protected void checkSwitchReady() {
+
if (state.hsState == HandshakeState.FEATURES_REPLY &&
state.hasDescription && state.hasGetConfigReply) {
@@ -821,10 +825,12 @@
//Request control of the switch from the global registry
try {
+ controlRequested = Boolean.TRUE;
registryService.requestControl(sw.getId(),
new RoleChangeCallback());
} catch (RegistryException e) {
log.debug("Registry error: {}", e.getMessage());
+ controlRequested = Boolean.FALSE;
}
@@ -858,6 +864,16 @@
state.firstRoleReplyReceived = true;
}
}
+ if (!controlRequested) {
+ // yield to allow other thread(s) to release control
+ try {
+ Thread.sleep(10);
+ } catch (InterruptedException e) {
+ // Ignore interruptions
+ }
+ // safer to bounce the switch to reconnect here than proceeding further
+ sw.channel.close();
+ }
}
}
@@ -1084,6 +1100,7 @@
shouldHandleMessage = handleVendorMessage((OFVendor)m);
break;
case ERROR:
+ log.debug("Recieved ERROR message from switch {}: {}", sw, m);
// TODO: we need better error handling. Especially for
// request/reply style message (stats, roles) we should have
// a unified way to lookup the xid in the error message.
@@ -1107,6 +1124,7 @@
// is not a spurious error
shouldLogError = !isBadVendorError;
if (isBadVendorError) {
+ log.debug("Handling bad vendor error for {}", sw);
if (state.firstRoleReplyReceived && (role != null)) {
log.warn("Received ERROR from sw {} that "
+"indicates roles are not supported "
@@ -1114,16 +1132,21 @@
+"role reply earlier", sw);
}
state.firstRoleReplyReceived = true;
- sw.deliverRoleRequestNotSupported(error.getXid());
+ Role requestedRole =
+ sw.deliverRoleRequestNotSupported(error.getXid());
synchronized(roleChanger) {
if (sw.role == null && Controller.this.role==Role.SLAVE) {
+ //This will now never happen. The Controller's role
+ //is now never SLAVE, always MASTER.
// the switch doesn't understand role request
// messages and the current controller role is
// slave. We need to disconnect the switch.
// @see RoleChanger for rationale
sw.getChannel().close();
}
- else if (sw.role == null) {
+ else if (sw.role == null && requestedRole == Role.MASTER) {
+ log.debug("Adding switch {} because we got an error" +
+ " returned from a MASTER role request", sw);
// Controller's role is master: add to
// active
// TODO: check if clearing flow table is
diff --git a/src/main/java/net/floodlightcontroller/core/internal/OFSwitchImpl.java b/src/main/java/net/floodlightcontroller/core/internal/OFSwitchImpl.java
index 5ffd9bc..7313619 100644
--- a/src/main/java/net/floodlightcontroller/core/internal/OFSwitchImpl.java
+++ b/src/main/java/net/floodlightcontroller/core/internal/OFSwitchImpl.java
@@ -787,15 +787,17 @@
* Otherwise we ignore it.
* @param xid
*/
- protected void deliverRoleRequestNotSupported(int xid) {
+ protected Role deliverRoleRequestNotSupported(int xid) {
synchronized(pendingRoleRequests) {
PendingRoleRequestEntry head = pendingRoleRequests.poll();
this.role = null;
if (head!=null && head.xid == xid) {
setAttribute(SWITCH_SUPPORTS_NX_ROLE, false);
+ return head.role;
}
else {
this.channel.close();
+ return null;
}
}
}
diff --git a/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java b/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java
index 6378136..b2de649 100644
--- a/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java
+++ b/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java
@@ -275,10 +275,23 @@
sw.getAttribute(IOFSwitch.SWITCH_SUPPORTS_NX_ROLE);
if ((supportsNxRole == null) || supportsNxRole) {
// Handle cases #1 and #2
+ log.debug("Sending NxRoleRequest to {}", sw);
sw.sendNxRoleRequest(role, cookie);
- } else {
- // Handle case #3
- if (role == Role.SLAVE) {
+ } else {
+ if (role == Role.MASTER) {
+ log.debug("Switch {} doesn't support NxRoleRequests, but sending " +
+ "{} request anyway", sw, role);
+ //Send the role request anyway, even though we know the switch
+ //doesn't support it. The switch will give an error and in our
+ //error handling code we will add the switch.
+ //NOTE we *could* just add the switch right away rather than
+ //going through the overhead of sending a role request - however
+ //we then have to deal with concurrency issues resulting from
+ //calling addSwitch outside of a netty handler.
+ sw.sendNxRoleRequest(role, cookie);
+ }
+ // Handle case #3
+ else if (role == Role.SLAVE) {
log.debug("Disconnecting switch {} that doesn't support " +
"role request messages from a controller that went to SLAVE mode");
// Closing the channel should result in a call to
diff --git a/src/main/java/net/floodlightcontroller/flowcache/FlowManager.java b/src/main/java/net/floodlightcontroller/flowcache/FlowManager.java
index d55979e..d64476d 100644
--- a/src/main/java/net/floodlightcontroller/flowcache/FlowManager.java
+++ b/src/main/java/net/floodlightcontroller/flowcache/FlowManager.java
@@ -101,12 +101,13 @@
for (IFlowEntry flowEntryObj : allFlowEntries) {
FlowEntryId flowEntryId =
new FlowEntryId(flowEntryObj.getFlowEntryId());
- String userState = "User State: " + flowEntryObj.getUserState();
- String switchState = "Switch State: " + flowEntryObj.getSwitchState();
+ String userState = flowEntryObj.getUserState();
+ String switchState = flowEntryObj.getSwitchState();
log.debug("Found Flow Entry {}: {}",
flowEntryId.toString(),
- userState + " " + switchState);
+ "User State: " + userState +
+ " Switch State: " + switchState);
if (! switchState.equals("FE_SWITCH_NOT_UPDATED")) {
// Ignore the entry: nothing to do
@@ -160,6 +161,8 @@
flowModCommand = OFFlowMod.OFPFC_DELETE_STRICT;
} else {
// Unknown user state. Ignore the entry
+ log.debug("Flow Entry ignored (FlowEntryId = {}): unknown user state {}",
+ flowEntryId.toString(), userState);
continue;
}
diff --git a/src/main/java/net/floodlightcontroller/onoslistener/OnosPublisher.java b/src/main/java/net/floodlightcontroller/onoslistener/OnosPublisher.java
index c6fe108..36cde95 100644
--- a/src/main/java/net/floodlightcontroller/onoslistener/OnosPublisher.java
+++ b/src/main/java/net/floodlightcontroller/onoslistener/OnosPublisher.java
@@ -48,7 +48,7 @@
protected static final String CleanupEnabled = "EnableCleanup";
protected IThreadPoolService threadPool;
- protected final int CLEANUP_TASK_INTERVAL = 999; // 999 ms
+ protected final int CLEANUP_TASK_INTERVAL = 10; // 10 sec
protected SingletonTask cleanupTask;
/**
@@ -65,7 +65,7 @@
log.error("Error in cleanup thread", e);
} finally {
cleanupTask.reschedule(CLEANUP_TASK_INTERVAL,
- TimeUnit.MILLISECONDS);
+ TimeUnit.SECONDS);
}
}
@@ -74,30 +74,29 @@
// TODO Auto-generated method stub
if (hasControl) {
- log.debug("got control to set inactive sw {}", dpid);
+ log.debug("got control to set inactive sw {}", HexString.toHexString(dpid));
swStore.update(HexString.toHexString(dpid),SwitchState.INACTIVE, DM_OPERATION.UPDATE);
registryService.releaseControl(dpid);
}
}
}
-
-
protected void switchCleanup() {
-
TopoSwitchServiceImpl impl = new TopoSwitchServiceImpl();
Iterable<ISwitchObject> switches = impl.getActiveSwitches();
+
+ log.debug("Checking for inactive switches");
// For each switch check if a controller exists in controller registry
for (ISwitchObject sw: switches) {
- log.debug("checking if switch is inactive: {}", sw.getDPID());
+ //log.debug("checking if switch is inactive: {}", sw.getDPID());
try {
long dpid = HexString.toLong(sw.getDPID());
String controller = registryService.getControllerForSwitch(dpid);
if (controller == null) {
- log.debug("request Control to set inactive sw {}", dpid);
+ log.debug("request Control to set inactive sw {}", HexString.toHexString(dpid));
registryService.requestControl(dpid, new SwitchCleanup());
} else {
- log.debug("sw {} is controlled by controller: {}",dpid,controller);
+ log.debug("sw {} is controlled by controller: {}",HexString.toHexString(dpid),controller);
}
} catch (NumberFormatException e) {
// TODO Auto-generated catch block
@@ -223,10 +222,10 @@
deviceService.addListener(this);
// Setup the Cleanup task.
- if (cleanupNeeded != null &&cleanupNeeded.equals("True")) {
+ if (cleanupNeeded == null || !cleanupNeeded.equals("False")) {
ScheduledExecutorService ses = threadPool.getScheduledExecutor();
cleanupTask = new SingletonTask(ses, new SwitchCleanup());
- cleanupTask.reschedule(CLEANUP_TASK_INTERVAL, TimeUnit.MILLISECONDS);
+ cleanupTask.reschedule(CLEANUP_TASK_INTERVAL, TimeUnit.SECONDS);
}
}
diff --git a/src/main/java/net/onrc/onos/registry/controller/SwitchRegistryResource.java b/src/main/java/net/onrc/onos/registry/controller/SwitchRegistryResource.java
index d6f3dee..0a7ac5d 100644
--- a/src/main/java/net/onrc/onos/registry/controller/SwitchRegistryResource.java
+++ b/src/main/java/net/onrc/onos/registry/controller/SwitchRegistryResource.java
@@ -26,11 +26,11 @@
switches = new HashMap<String, List<ControllerRegistryEntry>>();
}
- for (List<ControllerRegistryEntry> list: switches.values()){
+ /*for (List<ControllerRegistryEntry> list: switches.values()){
for (ControllerRegistryEntry en : list) {
log.debug("Controller id {}", en.getControllerId());
}
- }
+ }*/
return switches;
}
diff --git a/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java b/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java
index 5f6ef78..b666db7 100644
--- a/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java
+++ b/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java
@@ -111,7 +111,7 @@
@Override
public void childEvent(CuratorFramework client,
PathChildrenCacheEvent event) throws Exception {
- log.debug("Root switch path cache got {} event", event.getType());
+ //log.debug("Root switch path cache got {} event", event.getType());
String strSwitch = null;
if (event.getData() != null){
@@ -158,7 +158,7 @@
if (switches.get(dpidStr) != null){
log.debug("Already contesting {}, returning", HexString.toHexString(dpid));
- return;
+ throw new RegistryException("Already contesting control for " + dpidStr);
}
LeaderLatch latch = new LeaderLatch(client, latchPath, controllerId);
@@ -355,7 +355,8 @@
new ArrayList<ControllerRegistryEntry>();
if (entry.getValue().getCurrentData().size() < 1){
- log.info("Switch entry with no leader elections: {}", entry.getKey());
+ //TODO prevent even having the PathChildrenCache in this case
+ //log.info("Switch entry with no leader elections: {}", entry.getKey());
continue;
}
diff --git a/src/main/resources/floodlightdefault.properties b/src/main/resources/floodlightdefault.properties
index 498fce5..5e1c13f 100644
--- a/src/main/resources/floodlightdefault.properties
+++ b/src/main/resources/floodlightdefault.properties
@@ -17,3 +17,4 @@
net.floodlightcontroller.forwarding.Forwarding.idletimeout = 5
net.floodlightcontroller.forwarding.Forwarding.hardtimeout = 0
net.floodlightcontroller.onoslistener.OnosPublisher.dbconf = /tmp/cassandra.titan
+net.floodlightcontroller.onoslistener.OnosPublisher.EnableCleanup = True
diff --git a/start-onos.sh b/start-onos.sh
index 166ce74..3c7ce1e 100755
--- a/start-onos.sh
+++ b/start-onos.sh
@@ -21,32 +21,6 @@
# Set classpath to include titan libs
CLASSPATH=`echo ${FL_HOME}/lib/*.jar ${FL_HOME}/lib/titan/*.jar | sed 's/ /:/g'`
-# Create a logback file if required
-cat <<EOF_LOGBACK >${FL_LOGBACK}
-<configuration scan="true" debug="true">
-<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
-<encoder>
-<pattern>%level [%logger:%thread] %msg%n</pattern>
-</encoder>
-</appender>
-
-<appender name="FILE" class="ch.qos.logback.core.FileAppender">
-<file>${FL_LOG}</file>
-<encoder>
-<pattern>%date %level [%thread] %logger{10} [%file:%line] %msg%n</pattern>
-</encoder>
-</appender>
-
-<logger name="org" level="WARN"/>
-<logger name="LogService" level="WARN"/> <!-- Restlet access logging -->
-<logger name="net.floodlightcontroller.logging" level="WARN"/>
-
-<root level="DEBUG">
-<appender-ref ref="FILE" />
-</root>
-</configuration>
-EOF_LOGBACK
-
#<logger name="net.floodlightcontroller.linkdiscovery.internal" level="TRACE"/>
#<appender-ref ref="STDOUT" />
@@ -75,6 +49,32 @@
fi
done
+# Create a logback file if required
+ cat <<EOF_LOGBACK >${FL_LOGBACK}
+<configuration scan="true" debug="true">
+<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+<encoder>
+<pattern>%level [%logger:%thread] %msg%n</pattern>
+</encoder>
+</appender>
+
+<appender name="FILE" class="ch.qos.logback.core.FileAppender">
+<file>${FL_LOG}</file>
+<encoder>
+<pattern>%date %level [%thread] %logger{10} [%file:%line] %msg%n</pattern>
+</encoder>
+</appender>
+
+<logger name="org" level="WARN"/>
+<logger name="LogService" level="WARN"/> <!-- Restlet access logging -->
+<logger name="net.floodlightcontroller.logging" level="WARN"/>
+
+<root level="DEBUG">
+<appender-ref ref="FILE" />
+</root>
+</configuration>
+EOF_LOGBACK
+
# Run floodlight
echo "Starting ONOS controller ..."
echo