Role management for the case where the controller does not hear back from
the registry-service causing the handshake to time out. Earlier we were
disconnecting the switch. With this patch, we will move to state Equal/Slave.
This behavior should be changed, once the underlying registry replies
to every mastership request (currently only replies to the controller that
wins mastership).

Also changed the timeouts:
 - role-reply not received timeout to 3 secs
 - handshake not completed timeout to 10 secs

Previously the timeouts were 10 and 60 secs respectively, which is too long
to figure out that something has gone wrong.

Change-Id: I58bf56f511992ce874407f5205c42a38cd6403ae
diff --git a/src/main/java/net/floodlightcontroller/core/internal/OFChannelHandler.java b/src/main/java/net/floodlightcontroller/core/internal/OFChannelHandler.java
index a69e933..d2e4156 100644
--- a/src/main/java/net/floodlightcontroller/core/internal/OFChannelHandler.java
+++ b/src/main/java/net/floodlightcontroller/core/internal/OFChannelHandler.java
@@ -84,7 +84,7 @@
 
     private static final Logger log = LoggerFactory.getLogger(OFChannelHandler.class);
 
-    private static final long DEFAULT_ROLE_TIMEOUT_MS = 10 * 1000; // 10 sec
+    private static final long DEFAULT_ROLE_TIMEOUT_MS = 3 * 1000; // 3 sec
     private final Controller controller;
     private final Counters counters;
     private IOFSwitch sw;
@@ -1033,6 +1033,15 @@
             }
 
             @Override
+            public void handleTimedOutHandshake(OFChannelHandler h,
+                    ChannelHandlerContext ctx) throws IOException {
+                log.info("Handshake timed out waiting to hear back from registry "
+                        + "service. Moving to Role EQUAL for switch {}",
+                        h.getSwitchInfoString());
+                setRoleAndStartDriverHandshake(h, Role.EQUAL);
+            }
+
+            @Override
             void processOFFeaturesReply(OFChannelHandler h, OFFeaturesReply m)
                     throws IOException, SwitchStateException {
                 illegalMessageReceived(h, m);
@@ -1893,6 +1902,41 @@
                     h.getSwitchInfoString(), this.toString(), pendingRole);
             throw new SwitchStateException(msg);
         }
+
+        /**
+         * Handles switch handshake timeout.
+         * <p>
+         * If the handshake times-out while the switch is in any state other
+         * than WAIT_INITIAL_ROLE, then the switch is disconnected.
+         * <p>
+         * If the switch is in WAIT_INITIAL_ROLE state, and a pending role reply
+         * is not received, it would trigger the role reply timeout, which would
+         * be handled by handleTimedOutRoleReply (which would disconnect the
+         * switch).
+         * <p>
+         * If the switch is in WAIT_INITIAL_ROLE state, when the handshake
+         * timeout is triggered, then it's because we have not heard back from
+         * the registry service regarding switch mastership. In this case, we
+         * move to EQUAL (or SLAVE) state. See override for this method in
+         * WAIT_INITIAL_ROLE state.
+         * <p>
+         * XXX: This is required today as the registry service does not reply
+         * with role.slave to a mastership request, i.e it only replies to the
+         * controller that wins mastership. Once the registry API changes to
+         * reply to every request, we would not need to wait for a timeout to
+         * move to Role.EQUAL (or SLAVE).
+         * 
+         * @param h the channel handler for this switch
+         * @param ctx the netty channel handler context for the channel 'h'
+         * @throws IOException
+         */
+        public void handleTimedOutHandshake(OFChannelHandler h,
+                ChannelHandlerContext ctx) throws IOException {
+            log.error("Disconnecting switch {}: failed to complete handshake",
+                    h.getSwitchInfoString());
+            h.counters.switchDisconnectHandshakeTimeout.updateCounterWithFlush();
+            ctx.getChannel().close();
+        }
     }
 
     // *************************
@@ -1996,10 +2040,9 @@
             counters.switchDisconnectReadTimeout.updateCounterWithFlush();
             ctx.getChannel().close();
         } else if (e.getCause() instanceof HandshakeTimeoutException) {
-            log.error("Disconnecting switch {}: failed to complete handshake",
-                    getSwitchInfoString());
-            counters.switchDisconnectHandshakeTimeout.updateCounterWithFlush();
-            ctx.getChannel().close();
+            // handle timeout within state-machine - different actions taken
+            // depending on current state
+            state.handleTimedOutHandshake(this, ctx);
         } else if (e.getCause() instanceof ClosedChannelException) {
             log.debug("Channel for sw {} already closed", getSwitchInfoString());
         } else if (e.getCause() instanceof IOException) {