tom | 0eb04ca | 2014-08-25 14:34:51 -0700 | [diff] [blame^] | 1 | /** |
| 2 | * Copyright 2011, Big Switch Networks, Inc. |
| 3 | * Originally created by David Erickson, Stanford University |
| 4 | * |
| 5 | * Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 6 | * not use this file except in compliance with the License. You may obtain |
| 7 | * a copy of the License at |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 13 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 14 | * License for the specific language governing permissions and limitations |
| 15 | * under the License. |
| 16 | **/ |
| 17 | |
| 18 | package net.onrc.onos.of.ctl.internal; |
| 19 | |
| 20 | import java.lang.management.ManagementFactory; |
| 21 | import java.lang.management.RuntimeMXBean; |
| 22 | import java.net.InetSocketAddress; |
| 23 | import java.net.UnknownHostException; |
| 24 | import java.util.Collections; |
| 25 | import java.util.HashMap; |
| 26 | import java.util.HashSet; |
| 27 | import java.util.Map; |
| 28 | import java.util.Set; |
| 29 | import java.util.concurrent.ConcurrentHashMap; |
| 30 | import java.util.concurrent.Executors; |
| 31 | |
| 32 | import net.onrc.onos.of.ctl.IOFSwitchManager; |
| 33 | import net.onrc.onos.of.ctl.Role; |
| 34 | import net.onrc.onos.of.ctl.annotations.LogMessageDoc; |
| 35 | import net.onrc.onos.of.ctl.annotations.LogMessageDocs; |
| 36 | import net.onrc.onos.of.ctl.debugcounter.DebugCounter; |
| 37 | import net.onrc.onos.of.ctl.debugcounter.IDebugCounter; |
| 38 | import net.onrc.onos.of.ctl.debugcounter.IDebugCounterService; |
| 39 | import net.onrc.onos.of.ctl.debugcounter.IDebugCounterService.CounterException; |
| 40 | import net.onrc.onos.of.ctl.debugcounter.IDebugCounterService.CounterType; |
| 41 | import net.onrc.onos.of.ctl.internal.OFChannelHandler.RoleRecvStatus; |
| 42 | import net.onrc.onos.of.ctl.registry.IControllerRegistry; |
| 43 | import net.onrc.onos.of.ctl.registry.RegistryException; |
| 44 | import net.onrc.onos.of.ctl.registry.IControllerRegistry.ControlChangeCallback; |
| 45 | import net.onrc.onos.of.ctl.util.Dpid; |
| 46 | import net.onrc.onos.of.ctl.util.DummySwitchForTesting; |
| 47 | import net.onrc.onos.of.ctl.util.InstanceId; |
| 48 | import net.onrc.onos.of.ctl.IOFSwitch; |
| 49 | import net.onrc.onos.of.ctl.IOFSwitch.PortChangeType; |
| 50 | |
| 51 | import org.apache.felix.scr.annotations.Activate; |
| 52 | import org.apache.felix.scr.annotations.Component; |
| 53 | import org.jboss.netty.bootstrap.ServerBootstrap; |
| 54 | import org.jboss.netty.channel.ChannelPipelineFactory; |
| 55 | import org.jboss.netty.channel.group.ChannelGroup; |
| 56 | import org.jboss.netty.channel.group.DefaultChannelGroup; |
| 57 | import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory; |
| 58 | import org.projectfloodlight.openflow.protocol.OFDescStatsReply; |
| 59 | import org.projectfloodlight.openflow.protocol.OFFactories; |
| 60 | import org.projectfloodlight.openflow.protocol.OFFactory; |
| 61 | import org.projectfloodlight.openflow.protocol.OFPortDesc; |
| 62 | import org.projectfloodlight.openflow.protocol.OFVersion; |
| 63 | import org.projectfloodlight.openflow.util.HexString; |
| 64 | import org.slf4j.Logger; |
| 65 | import org.slf4j.LoggerFactory; |
| 66 | |
| 67 | |
| 68 | /** |
| 69 | * The main controller class. Handles all setup and network listeners |
| 70 | * - Distributed ownership control of switch through IControllerRegistryService |
| 71 | */ |
| 72 | @Component(immediate = true) |
| 73 | public class Controller { |
| 74 | |
| 75 | protected static final Logger log = LoggerFactory.getLogger(Controller.class); |
| 76 | static final String ERROR_DATABASE = |
| 77 | "The controller could not communicate with the system database."; |
| 78 | protected static final OFFactory FACTORY13 = OFFactories.getFactory(OFVersion.OF_13); |
| 79 | protected static final OFFactory FACTORY10 = OFFactories.getFactory(OFVersion.OF_10); |
| 80 | |
| 81 | // connectedSwitches cache contains all connected switch's channelHandlers |
| 82 | // including ones where this controller is a master/equal/slave controller |
| 83 | // as well as ones that have not been activated yet |
| 84 | protected ConcurrentHashMap<Long, OFChannelHandler> connectedSwitches; |
| 85 | // These caches contains only those switches that are active |
| 86 | protected ConcurrentHashMap<Long, IOFSwitch> activeMasterSwitches; |
| 87 | protected ConcurrentHashMap<Long, IOFSwitch> activeEqualSwitches; |
| 88 | // lock to synchronize on, when manipulating multiple caches above |
| 89 | private Object multiCacheLock; |
| 90 | |
| 91 | // The controllerNodeIPsCache maps Controller IDs to their IP address. |
| 92 | // It's only used by handleControllerNodeIPsChanged |
| 93 | protected HashMap<String, String> controllerNodeIPsCache; |
| 94 | |
| 95 | // Module dependencies |
| 96 | |
| 97 | protected IControllerRegistry registryService; |
| 98 | protected IDebugCounterService debugCounters; |
| 99 | |
| 100 | |
| 101 | private IOFSwitchManager switchManager; |
| 102 | |
| 103 | // Configuration options |
| 104 | protected int openFlowPort = 6633; |
| 105 | protected int workerThreads = 0; |
| 106 | |
| 107 | // defined counters |
| 108 | private Counters counters; |
| 109 | |
| 110 | // Start time of the controller |
| 111 | protected long systemStartTime; |
| 112 | |
| 113 | // Flag to always flush flow table on switch reconnect (HA or otherwise) |
| 114 | protected boolean alwaysClearFlowsOnSwAdd = false; |
| 115 | private InstanceId instanceId; |
| 116 | |
| 117 | // Perf. related configuration |
| 118 | protected static final int SEND_BUFFER_SIZE = 4 * 1024 * 1024; |
| 119 | protected static final int BATCH_MAX_SIZE = 100; |
| 120 | protected static final boolean ALWAYS_DECODE_ETH = true; |
| 121 | |
| 122 | protected boolean addConnectedSwitch(long dpid, OFChannelHandler h) { |
| 123 | if (connectedSwitches.get(dpid) != null) { |
| 124 | log.error("Trying to add connectedSwitch but found a previous " |
| 125 | + "value for dpid: {}", dpid); |
| 126 | return false; |
| 127 | } else { |
| 128 | log.error("Added switch {}", dpid); |
| 129 | connectedSwitches.put(dpid, h); |
| 130 | return true; |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | private boolean validActivation(long dpid) { |
| 135 | if (connectedSwitches.get(dpid) == null) { |
| 136 | log.error("Trying to activate switch but is not in " |
| 137 | + "connected switches: dpid {}. Aborting ..", |
| 138 | HexString.toHexString(dpid)); |
| 139 | return false; |
| 140 | } |
| 141 | if (activeMasterSwitches.get(dpid) != null || |
| 142 | activeEqualSwitches.get(dpid) != null) { |
| 143 | log.error("Trying to activate switch but it is already " |
| 144 | + "activated: dpid {}. Found in activeMaster: {} " |
| 145 | + "Found in activeEqual: {}. Aborting ..", new Object[] { |
| 146 | HexString.toHexString(dpid), |
| 147 | (activeMasterSwitches.get(dpid) == null) ? 'N' : 'Y', |
| 148 | (activeEqualSwitches.get(dpid) == null) ? 'N' : 'Y'}); |
| 149 | counters.switchWithSameDpidActivated.updateCounterWithFlush(); |
| 150 | return false; |
| 151 | } |
| 152 | return true; |
| 153 | } |
| 154 | |
| 155 | /** |
| 156 | * Called when a switch is activated, with this controller's role as MASTER. |
| 157 | */ |
| 158 | protected boolean addActivatedMasterSwitch(long dpid, IOFSwitch sw) { |
| 159 | synchronized (multiCacheLock) { |
| 160 | if (!validActivation(dpid)) { |
| 161 | return false; |
| 162 | } |
| 163 | activeMasterSwitches.put(dpid, sw); |
| 164 | } |
| 165 | //update counters and events |
| 166 | counters.switchActivated.updateCounterWithFlush(); |
| 167 | |
| 168 | return true; |
| 169 | } |
| 170 | |
| 171 | /** |
| 172 | * Called when a switch is activated, with this controller's role as EQUAL. |
| 173 | */ |
| 174 | protected boolean addActivatedEqualSwitch(long dpid, IOFSwitch sw) { |
| 175 | synchronized (multiCacheLock) { |
| 176 | if (!validActivation(dpid)) { |
| 177 | return false; |
| 178 | } |
| 179 | activeEqualSwitches.put(dpid, sw); |
| 180 | } |
| 181 | //update counters and events |
| 182 | counters.switchActivated.updateCounterWithFlush(); |
| 183 | return true; |
| 184 | } |
| 185 | |
| 186 | /** |
| 187 | * Called when this controller's role for a switch transitions from equal |
| 188 | * to master. For 1.0 switches, we internally refer to the role 'slave' as |
| 189 | * 'equal' - so this transition is equivalent to 'addActivatedMasterSwitch'. |
| 190 | */ |
| 191 | protected void transitionToMasterSwitch(long dpid) { |
| 192 | synchronized (multiCacheLock) { |
| 193 | IOFSwitch sw = activeEqualSwitches.remove(dpid); |
| 194 | if (sw == null) { |
| 195 | log.error("Transition to master called on sw {}, but switch " |
| 196 | + "was not found in controller-cache", dpid); |
| 197 | return; |
| 198 | } |
| 199 | activeMasterSwitches.put(dpid, sw); |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | |
| 204 | /** |
| 205 | * Called when this controller's role for a switch transitions to equal. |
| 206 | * For 1.0 switches, we internally refer to the role 'slave' as |
| 207 | * 'equal'. |
| 208 | */ |
| 209 | protected void transitionToEqualSwitch(long dpid) { |
| 210 | synchronized (multiCacheLock) { |
| 211 | IOFSwitch sw = activeMasterSwitches.remove(dpid); |
| 212 | if (sw == null) { |
| 213 | log.error("Transition to equal called on sw {}, but switch " |
| 214 | + "was not found in controller-cache", dpid); |
| 215 | return; |
| 216 | } |
| 217 | activeEqualSwitches.put(dpid, sw); |
| 218 | } |
| 219 | |
| 220 | } |
| 221 | |
| 222 | /** |
| 223 | * Clear all state in controller switch maps for a switch that has |
| 224 | * disconnected from the local controller. Also release control for |
| 225 | * that switch from the global repository. Notify switch listeners. |
| 226 | */ |
| 227 | protected void removeConnectedSwitch(long dpid) { |
| 228 | releaseRegistryControl(dpid); |
| 229 | connectedSwitches.remove(dpid); |
| 230 | IOFSwitch sw = activeMasterSwitches.remove(dpid); |
| 231 | if (sw == null) { |
| 232 | sw = activeEqualSwitches.remove(dpid); |
| 233 | } |
| 234 | if (sw != null) { |
| 235 | sw.cancelAllStatisticsReplies(); |
| 236 | sw.setConnected(false); // do we need this? |
| 237 | } |
| 238 | counters.switchDisconnected.updateCounterWithFlush(); |
| 239 | |
| 240 | } |
| 241 | |
| 242 | /** |
| 243 | * Indicates that ports on the given switch have changed. Enqueue a |
| 244 | * switch update. |
| 245 | * @param sw |
| 246 | */ |
| 247 | protected void notifyPortChanged(long dpid, OFPortDesc port, |
| 248 | PortChangeType changeType) { |
| 249 | if (port == null || changeType == null) { |
| 250 | String msg = String.format("Switch port or changetType must not " |
| 251 | + "be null in port change notification"); |
| 252 | throw new NullPointerException(msg); |
| 253 | } |
| 254 | if (connectedSwitches.get(dpid) == null || getSwitch(dpid) == null) { |
| 255 | log.warn("Port change update on switch {} not connected or activated " |
| 256 | + "... Aborting.", HexString.toHexString(dpid)); |
| 257 | return; |
| 258 | } |
| 259 | |
| 260 | } |
| 261 | |
| 262 | // *************** |
| 263 | // Getters/Setters |
| 264 | // *************** |
| 265 | |
| 266 | |
| 267 | public synchronized void setIOFSwitchManager(IOFSwitchManager swManager) { |
| 268 | this.switchManager = swManager; |
| 269 | this.registryService = swManager.getRegistry(); |
| 270 | } |
| 271 | |
| 272 | |
| 273 | public void setDebugCounter(IDebugCounterService dcs) { |
| 274 | this.debugCounters = dcs; |
| 275 | } |
| 276 | |
| 277 | IDebugCounterService getDebugCounter() { |
| 278 | return this.debugCounters; |
| 279 | } |
| 280 | |
| 281 | // ********************** |
| 282 | // Role Handling |
| 283 | // ********************** |
| 284 | |
| 285 | /** |
| 286 | * created by ONOS - works with registry service. |
| 287 | */ |
| 288 | protected class RoleChangeCallback implements ControlChangeCallback { |
| 289 | @Override |
| 290 | public void controlChanged(long dpidLong, boolean hasControl) { |
| 291 | Dpid dpid = new Dpid(dpidLong); |
| 292 | log.info("Role change callback for switch {}, hasControl {}", |
| 293 | dpid, hasControl); |
| 294 | |
| 295 | Role role = null; |
| 296 | |
| 297 | /* |
| 298 | * issue #229 |
| 299 | * Cannot rely on sw.getRole() as it can be behind due to pending |
| 300 | * role changes in the queue. Just submit it and late the |
| 301 | * RoleChanger handle duplicates. |
| 302 | */ |
| 303 | |
| 304 | if (hasControl) { |
| 305 | role = Role.MASTER; |
| 306 | } else { |
| 307 | role = Role.EQUAL; // treat the same as Role.SLAVE |
| 308 | } |
| 309 | |
| 310 | OFChannelHandler swCh = connectedSwitches.get(dpid.value()); |
| 311 | if (swCh == null) { |
| 312 | log.warn("Switch {} not found in connected switches", dpid); |
| 313 | return; |
| 314 | } |
| 315 | |
| 316 | log.debug("Sending role request {} msg to {}", role, dpid); |
| 317 | swCh.sendRoleRequest(role, RoleRecvStatus.MATCHED_SET_ROLE); |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | /** |
| 322 | * Submit request to the registry service for mastership of the |
| 323 | * switch. |
| 324 | * @param dpid this datapath to get role for |
| 325 | */ |
| 326 | public synchronized void submitRegistryRequest(long dpid) { |
| 327 | if (registryService == null) { |
| 328 | /* |
| 329 | * If we have no registry then simply assign |
| 330 | * mastership to this controller. |
| 331 | */ |
| 332 | new RoleChangeCallback().controlChanged(dpid, true); |
| 333 | return; |
| 334 | } |
| 335 | OFChannelHandler h = connectedSwitches.get(dpid); |
| 336 | if (h == null) { |
| 337 | log.error("Trying to request registry control for switch {} " |
| 338 | + "not in connected switches. Aborting.. ", |
| 339 | HexString.toHexString(dpid)); |
| 340 | connectedSwitches.get(dpid).disconnectSwitch(); |
| 341 | return; |
| 342 | } |
| 343 | //Request control of the switch from the global registry |
| 344 | try { |
| 345 | h.controlRequested = Boolean.TRUE; |
| 346 | registryService.requestControl(dpid, new RoleChangeCallback()); |
| 347 | } catch (RegistryException e) { |
| 348 | log.debug("Registry error: {}", e.getMessage()); |
| 349 | h.controlRequested = Boolean.FALSE; |
| 350 | } |
| 351 | if (!h.controlRequested) { // XXX what is being attempted here? |
| 352 | // yield to allow other thread(s) to release control |
| 353 | // TODO AAS: this is awful and needs to be fixed |
| 354 | Thread.yield(); |
| 355 | // safer to bounce the switch to reconnect here than proceeding further |
| 356 | // XXX S why? can't we just try again a little later? |
| 357 | log.debug("Closing sw:{} because we weren't able to request control " + |
| 358 | "successfully" + dpid); |
| 359 | connectedSwitches.get(dpid).disconnectSwitch(); |
| 360 | } |
| 361 | } |
| 362 | |
| 363 | /** |
| 364 | * Relinquish role for the switch. |
| 365 | * @param dpidLong the controlled datapath |
| 366 | */ |
| 367 | public synchronized void releaseRegistryControl(long dpidLong) { |
| 368 | OFChannelHandler h = connectedSwitches.get(dpidLong); |
| 369 | if (h == null) { |
| 370 | log.error("Trying to release registry control for switch {} " |
| 371 | + "not in connected switches. Aborting.. ", |
| 372 | HexString.toHexString(dpidLong)); |
| 373 | return; |
| 374 | } |
| 375 | if (registryService != null && h.controlRequested) { |
| 376 | //TODO the above is not good for testing need to change controlrequest to method call. |
| 377 | registryService.releaseControl(dpidLong); |
| 378 | } |
| 379 | } |
| 380 | |
| 381 | |
| 382 | // FIXME: remove this method |
| 383 | public Map<Long, IOFSwitch> getSwitches() { |
| 384 | return getMasterSwitches(); |
| 385 | } |
| 386 | |
| 387 | // FIXME: remove this method |
| 388 | public Map<Long, IOFSwitch> getMasterSwitches() { |
| 389 | return Collections.unmodifiableMap(activeMasterSwitches); |
| 390 | } |
| 391 | |
| 392 | |
| 393 | |
| 394 | public Set<Long> getAllSwitchDpids() { |
| 395 | Set<Long> dpids = new HashSet<Long>(); |
| 396 | dpids.addAll(activeMasterSwitches.keySet()); |
| 397 | dpids.addAll(activeEqualSwitches.keySet()); |
| 398 | return dpids; |
| 399 | } |
| 400 | |
| 401 | |
| 402 | public Set<Long> getAllMasterSwitchDpids() { |
| 403 | Set<Long> dpids = new HashSet<Long>(); |
| 404 | dpids.addAll(activeMasterSwitches.keySet()); |
| 405 | return dpids; |
| 406 | } |
| 407 | |
| 408 | |
| 409 | public Set<Long> getAllEqualSwitchDpids() { |
| 410 | Set<Long> dpids = new HashSet<Long>(); |
| 411 | dpids.addAll(activeEqualSwitches.keySet()); |
| 412 | return dpids; |
| 413 | } |
| 414 | |
| 415 | |
| 416 | public IOFSwitch getSwitch(long dpid) { |
| 417 | IOFSwitch sw = null; |
| 418 | sw = activeMasterSwitches.get(dpid); |
| 419 | if (sw != null) { |
| 420 | return sw; |
| 421 | } |
| 422 | sw = activeEqualSwitches.get(dpid); |
| 423 | if (sw != null) { |
| 424 | return sw; |
| 425 | } |
| 426 | return sw; |
| 427 | } |
| 428 | |
| 429 | |
| 430 | public IOFSwitch getMasterSwitch(long dpid) { |
| 431 | return activeMasterSwitches.get(dpid); |
| 432 | } |
| 433 | |
| 434 | |
| 435 | public IOFSwitch getEqualSwitch(long dpid) { |
| 436 | return activeEqualSwitches.get(dpid); |
| 437 | } |
| 438 | |
| 439 | |
| 440 | |
| 441 | |
| 442 | |
| 443 | public OFFactory getOFMessageFactory10() { |
| 444 | return FACTORY10; |
| 445 | } |
| 446 | |
| 447 | |
| 448 | public OFFactory getOFMessageFactory13() { |
| 449 | return FACTORY13; |
| 450 | } |
| 451 | |
| 452 | |
| 453 | |
| 454 | public Map<String, String> getControllerNodeIPs() { |
| 455 | // We return a copy of the mapping so we can guarantee that |
| 456 | // the mapping return is the same as one that will be (or was) |
| 457 | // dispatched to IHAListeners |
| 458 | HashMap<String, String> retval = new HashMap<String, String>(); |
| 459 | synchronized (controllerNodeIPsCache) { |
| 460 | retval.putAll(controllerNodeIPsCache); |
| 461 | } |
| 462 | return retval; |
| 463 | } |
| 464 | |
| 465 | |
| 466 | public long getSystemStartTime() { |
| 467 | return (this.systemStartTime); |
| 468 | } |
| 469 | |
| 470 | |
| 471 | public InstanceId getInstanceId() { |
| 472 | return instanceId; |
| 473 | } |
| 474 | |
| 475 | |
| 476 | // ************** |
| 477 | // Initialization |
| 478 | // ************** |
| 479 | |
| 480 | /** |
| 481 | * Tell controller that we're ready to accept switches loop. |
| 482 | * |
| 483 | * @throws IOException |
| 484 | */ |
| 485 | @LogMessageDocs({ |
| 486 | @LogMessageDoc(message = "Listening for switch connections on {address}", |
| 487 | explanation = "The controller is ready and listening for new" + |
| 488 | " switch connections"), |
| 489 | @LogMessageDoc(message = "Storage exception in controller " + |
| 490 | "updates loop; terminating process", |
| 491 | explanation = ERROR_DATABASE, |
| 492 | recommendation = LogMessageDoc.CHECK_CONTROLLER), |
| 493 | @LogMessageDoc(level = "ERROR", |
| 494 | message = "Exception in controller updates loop", |
| 495 | explanation = "Failed to dispatch controller event", |
| 496 | recommendation = LogMessageDoc.GENERIC_ACTION) |
| 497 | }) |
| 498 | public void run() { |
| 499 | |
| 500 | try { |
| 501 | final ServerBootstrap bootstrap = createServerBootStrap(); |
| 502 | |
| 503 | bootstrap.setOption("reuseAddr", true); |
| 504 | bootstrap.setOption("child.keepAlive", true); |
| 505 | bootstrap.setOption("child.tcpNoDelay", true); |
| 506 | bootstrap.setOption("child.sendBufferSize", Controller.SEND_BUFFER_SIZE); |
| 507 | |
| 508 | ChannelPipelineFactory pfact = |
| 509 | new OpenflowPipelineFactory(this, null); |
| 510 | bootstrap.setPipelineFactory(pfact); |
| 511 | InetSocketAddress sa = new InetSocketAddress(openFlowPort); |
| 512 | final ChannelGroup cg = new DefaultChannelGroup(); |
| 513 | cg.add(bootstrap.bind(sa)); |
| 514 | |
| 515 | log.info("Listening for switch connections on {}", sa); |
| 516 | } catch (Exception e) { |
| 517 | throw new RuntimeException(e); |
| 518 | } |
| 519 | |
| 520 | } |
| 521 | |
| 522 | private ServerBootstrap createServerBootStrap() { |
| 523 | if (workerThreads == 0) { |
| 524 | return new ServerBootstrap( |
| 525 | new NioServerSocketChannelFactory( |
| 526 | Executors.newCachedThreadPool(), |
| 527 | Executors.newCachedThreadPool())); |
| 528 | } else { |
| 529 | return new ServerBootstrap( |
| 530 | new NioServerSocketChannelFactory( |
| 531 | Executors.newCachedThreadPool(), |
| 532 | Executors.newCachedThreadPool(), workerThreads)); |
| 533 | } |
| 534 | } |
| 535 | |
| 536 | public void setConfigParams(Map<String, String> configParams) { |
| 537 | String ofPort = configParams.get("openflowport"); |
| 538 | if (ofPort != null) { |
| 539 | this.openFlowPort = Integer.parseInt(ofPort); |
| 540 | } |
| 541 | log.debug("OpenFlow port set to {}", this.openFlowPort); |
| 542 | String threads = configParams.get("workerthreads"); |
| 543 | if (threads != null) { |
| 544 | this.workerThreads = Integer.parseInt(threads); |
| 545 | } |
| 546 | log.debug("Number of worker threads set to {}", this.workerThreads); |
| 547 | String controllerId = configParams.get("controllerid"); |
| 548 | if (controllerId != null) { |
| 549 | this.instanceId = new InstanceId(controllerId); |
| 550 | } else { |
| 551 | //Try to get the hostname of the machine and use that for controller ID |
| 552 | try { |
| 553 | String hostname = java.net.InetAddress.getLocalHost().getHostName(); |
| 554 | this.instanceId = new InstanceId(hostname); |
| 555 | } catch (UnknownHostException e) { |
| 556 | log.warn("Can't get hostname, using the default"); |
| 557 | } |
| 558 | } |
| 559 | |
| 560 | log.debug("ControllerId set to {}", this.instanceId); |
| 561 | } |
| 562 | |
| 563 | |
| 564 | /** |
| 565 | * Initialize internal data structures. |
| 566 | */ |
| 567 | public void init(Map<String, String> configParams) { |
| 568 | // These data structures are initialized here because other |
| 569 | // module's startUp() might be called before ours |
| 570 | this.activeMasterSwitches = new ConcurrentHashMap<Long, IOFSwitch>(); |
| 571 | this.activeEqualSwitches = new ConcurrentHashMap<Long, IOFSwitch>(); |
| 572 | this.connectedSwitches = new ConcurrentHashMap<Long, OFChannelHandler>(); |
| 573 | this.controllerNodeIPsCache = new HashMap<String, String>(); |
| 574 | |
| 575 | setConfigParams(configParams); |
| 576 | this.systemStartTime = System.currentTimeMillis(); |
| 577 | this.setDebugCounter(new DebugCounter()); |
| 578 | this.counters = new Counters(); |
| 579 | this.multiCacheLock = new Object(); |
| 580 | |
| 581 | } |
| 582 | |
| 583 | /** |
| 584 | * Startup all of the controller's components. |
| 585 | */ |
| 586 | @LogMessageDoc(message = "Waiting for storage source", |
| 587 | explanation = "The system database is not yet ready", |
| 588 | recommendation = "If this message persists, this indicates " + |
| 589 | "that the system database has failed to start. " + |
| 590 | LogMessageDoc.CHECK_CONTROLLER) |
| 591 | public synchronized void startupComponents() { |
| 592 | try { |
| 593 | if (registryService != null) { |
| 594 | registryService.registerController(instanceId.toString()); |
| 595 | } |
| 596 | } catch (RegistryException e) { |
| 597 | log.warn("Registry service error: {}", e.getMessage()); |
| 598 | } |
| 599 | |
| 600 | // register counters and events |
| 601 | try { |
| 602 | this.counters.createCounters(debugCounters); |
| 603 | } catch (CounterException e) { |
| 604 | log.warn("Counters unavailable: {}", e.getMessage()); |
| 605 | } |
| 606 | } |
| 607 | |
| 608 | // ************** |
| 609 | // debugCounter registrations |
| 610 | // ************** |
| 611 | |
| 612 | public static class Counters { |
| 613 | public static final String PREFIX = "controller"; |
| 614 | public IDebugCounter switchActivated; |
| 615 | public IDebugCounter switchWithSameDpidActivated; // warn |
| 616 | public IDebugCounter switchDisconnected; |
| 617 | public IDebugCounter messageReceived; |
| 618 | public IDebugCounter switchDisconnectReadTimeout; |
| 619 | public IDebugCounter switchDisconnectHandshakeTimeout; |
| 620 | public IDebugCounter switchDisconnectIOError; |
| 621 | public IDebugCounter switchDisconnectParseError; |
| 622 | public IDebugCounter switchDisconnectSwitchStateException; |
| 623 | public IDebugCounter rejectedExecutionException; |
| 624 | public IDebugCounter switchDisconnectOtherException; |
| 625 | public IDebugCounter switchConnected; |
| 626 | public IDebugCounter unhandledMessage; |
| 627 | public IDebugCounter packetInWhileSwitchIsSlave; |
| 628 | public IDebugCounter epermErrorWhileSwitchIsMaster; |
| 629 | public IDebugCounter roleReplyTimeout; |
| 630 | public IDebugCounter roleReplyReceived; // expected RoleReply received |
| 631 | public IDebugCounter roleReplyErrorUnsupported; |
| 632 | public IDebugCounter switchCounterRegistrationFailed; |
| 633 | |
| 634 | void createCounters(IDebugCounterService debugCounters) throws CounterException { |
| 635 | |
| 636 | switchActivated = |
| 637 | debugCounters.registerCounter( |
| 638 | PREFIX, "switch-activated", |
| 639 | "A switch connected to this controller is now " + |
| 640 | "in MASTER role", |
| 641 | CounterType.ALWAYS_COUNT); |
| 642 | |
| 643 | switchWithSameDpidActivated = // warn |
| 644 | debugCounters.registerCounter( |
| 645 | PREFIX, "switch-with-same-dpid-activated", |
| 646 | "A switch with the same DPID as another switch " + |
| 647 | "connected to the controller. This can be " + |
| 648 | "caused by multiple switches configured with " + |
| 649 | "the same DPID or by a switch reconnecting very " + |
| 650 | "quickly.", |
| 651 | CounterType.COUNT_ON_DEMAND, |
| 652 | IDebugCounterService.CTR_MDATA_WARN); |
| 653 | |
| 654 | switchDisconnected = |
| 655 | debugCounters.registerCounter( |
| 656 | PREFIX, "switch-disconnected", |
| 657 | "FIXME: switch has disconnected", |
| 658 | CounterType.ALWAYS_COUNT); |
| 659 | |
| 660 | //------------------------ |
| 661 | // channel handler counters. Factor them out ?? |
| 662 | messageReceived = |
| 663 | debugCounters.registerCounter( |
| 664 | PREFIX, "message-received", |
| 665 | "Number of OpenFlow messages received. Some of " + |
| 666 | "these might be throttled", |
| 667 | CounterType.ALWAYS_COUNT); |
| 668 | |
| 669 | switchDisconnectReadTimeout = |
| 670 | debugCounters.registerCounter( |
| 671 | PREFIX, "switch-disconnect-read-timeout", |
| 672 | "Number of times a switch was disconnected due " + |
| 673 | "due the switch failing to send OpenFlow " + |
| 674 | "messages or responding to OpenFlow ECHOs", |
| 675 | CounterType.ALWAYS_COUNT, |
| 676 | IDebugCounterService.CTR_MDATA_ERROR); |
| 677 | switchDisconnectHandshakeTimeout = |
| 678 | debugCounters.registerCounter( |
| 679 | PREFIX, "switch-disconnect-handshake-timeout", |
| 680 | "Number of times a switch was disconnected " + |
| 681 | "because it failed to complete the handshake " + |
| 682 | "in time.", |
| 683 | CounterType.ALWAYS_COUNT, |
| 684 | IDebugCounterService.CTR_MDATA_ERROR); |
| 685 | switchDisconnectIOError = |
| 686 | debugCounters.registerCounter( |
| 687 | PREFIX, "switch-disconnect-io-error", |
| 688 | "Number of times a switch was disconnected " + |
| 689 | "due to IO errors on the switch connection.", |
| 690 | CounterType.ALWAYS_COUNT, |
| 691 | IDebugCounterService.CTR_MDATA_ERROR); |
| 692 | switchDisconnectParseError = |
| 693 | debugCounters.registerCounter( |
| 694 | PREFIX, "switch-disconnect-parse-error", |
| 695 | "Number of times a switch was disconnected " + |
| 696 | "because it sent an invalid packet that could " + |
| 697 | "not be parsed", |
| 698 | CounterType.ALWAYS_COUNT, |
| 699 | IDebugCounterService.CTR_MDATA_ERROR); |
| 700 | |
| 701 | switchDisconnectSwitchStateException = |
| 702 | debugCounters.registerCounter( |
| 703 | PREFIX, "switch-disconnect-switch-state-exception", |
| 704 | "Number of times a switch was disconnected " + |
| 705 | "because it sent messages that were invalid " + |
| 706 | "given the switch connection's state.", |
| 707 | CounterType.ALWAYS_COUNT, |
| 708 | IDebugCounterService.CTR_MDATA_ERROR); |
| 709 | rejectedExecutionException = |
| 710 | debugCounters.registerCounter( |
| 711 | PREFIX, "rejected-execution-exception", |
| 712 | "TODO", |
| 713 | CounterType.ALWAYS_COUNT, |
| 714 | IDebugCounterService.CTR_MDATA_ERROR); |
| 715 | |
| 716 | switchDisconnectOtherException = |
| 717 | debugCounters.registerCounter( |
| 718 | PREFIX, "switch-disconnect-other-exception", |
| 719 | "Number of times a switch was disconnected " + |
| 720 | "due to an exceptional situation not covered " + |
| 721 | "by other counters", |
| 722 | CounterType.ALWAYS_COUNT, |
| 723 | IDebugCounterService.CTR_MDATA_ERROR); |
| 724 | |
| 725 | switchConnected = |
| 726 | debugCounters.registerCounter( |
| 727 | PREFIX, "switch-connected", |
| 728 | "Number of times a new switch connection was " + |
| 729 | "established", |
| 730 | CounterType.ALWAYS_COUNT); |
| 731 | |
| 732 | unhandledMessage = |
| 733 | debugCounters.registerCounter( |
| 734 | PREFIX, "unhandled-message", |
| 735 | "Number of times an OpenFlow message was " + |
| 736 | "received that the controller ignored because " + |
| 737 | "it was inapproriate given the switch " + |
| 738 | "connection's state.", |
| 739 | CounterType.ALWAYS_COUNT, |
| 740 | IDebugCounterService.CTR_MDATA_WARN); |
| 741 | // might be less than warning |
| 742 | |
| 743 | packetInWhileSwitchIsSlave = |
| 744 | debugCounters.registerCounter( |
| 745 | PREFIX, "packet-in-while-switch-is-slave", |
| 746 | "Number of times a packet in was received " + |
| 747 | "from a switch that was in SLAVE role. " + |
| 748 | "Possibly inidicates inconsistent roles.", |
| 749 | CounterType.ALWAYS_COUNT); |
| 750 | epermErrorWhileSwitchIsMaster = |
| 751 | debugCounters.registerCounter( |
| 752 | PREFIX, "eperm-error-while-switch-is-master", |
| 753 | "Number of times a permission error was " + |
| 754 | "received while the switch was in MASTER role. " + |
| 755 | "Possibly inidicates inconsistent roles.", |
| 756 | CounterType.ALWAYS_COUNT, |
| 757 | IDebugCounterService.CTR_MDATA_WARN); |
| 758 | |
| 759 | roleReplyTimeout = |
| 760 | debugCounters.registerCounter( |
| 761 | PREFIX, "role-reply-timeout", |
| 762 | "Number of times a role request message did not " + |
| 763 | "receive the expected reply from a switch", |
| 764 | CounterType.ALWAYS_COUNT, |
| 765 | IDebugCounterService.CTR_MDATA_WARN); |
| 766 | |
| 767 | roleReplyReceived = // expected RoleReply received |
| 768 | debugCounters.registerCounter( |
| 769 | PREFIX, "role-reply-received", |
| 770 | "Number of times the controller received the " + |
| 771 | "expected role reply message from a switch", |
| 772 | CounterType.ALWAYS_COUNT); |
| 773 | |
| 774 | roleReplyErrorUnsupported = |
| 775 | debugCounters.registerCounter( |
| 776 | PREFIX, "role-reply-error-unsupported", |
| 777 | "Number of times the controller received an " + |
| 778 | "error from a switch in response to a role " + |
| 779 | "request indicating that the switch does not " + |
| 780 | "support roles.", |
| 781 | CounterType.ALWAYS_COUNT); |
| 782 | |
| 783 | switchCounterRegistrationFailed = |
| 784 | debugCounters.registerCounter(PREFIX, |
| 785 | "switch-counter-registration-failed", |
| 786 | "Number of times the controller failed to " + |
| 787 | "register per-switch debug counters", |
| 788 | CounterType.ALWAYS_COUNT, |
| 789 | IDebugCounterService.CTR_MDATA_WARN); |
| 790 | |
| 791 | |
| 792 | } |
| 793 | } |
| 794 | |
| 795 | public Counters getCounters() { |
| 796 | return this.counters; |
| 797 | } |
| 798 | |
| 799 | |
| 800 | // ************** |
| 801 | // Utility methods |
| 802 | // ************** |
| 803 | |
| 804 | public Map<String, Long> getMemory() { |
| 805 | Map<String, Long> m = new HashMap<String, Long>(); |
| 806 | Runtime runtime = Runtime.getRuntime(); |
| 807 | m.put("total", runtime.totalMemory()); |
| 808 | m.put("free", runtime.freeMemory()); |
| 809 | return m; |
| 810 | } |
| 811 | |
| 812 | |
| 813 | public Long getUptime() { |
| 814 | RuntimeMXBean rb = ManagementFactory.getRuntimeMXBean(); |
| 815 | return rb.getUptime(); |
| 816 | } |
| 817 | |
| 818 | /** |
| 819 | * Forward to the driver-manager to get an IOFSwitch instance. |
| 820 | * @param desc |
| 821 | * @return |
| 822 | */ |
| 823 | protected IOFSwitch getOFSwitchInstance(OFDescStatsReply desc, OFVersion ofv) { |
| 824 | if (switchManager == null) { |
| 825 | return new DummySwitchForTesting(); |
| 826 | } |
| 827 | return switchManager.getSwitchImpl(desc.getMfrDesc(), desc.getHwDesc(), |
| 828 | desc.getSwDesc(), ofv); |
| 829 | } |
| 830 | |
| 831 | @Activate |
| 832 | public void activate() { |
| 833 | log.info("Initialising OpenFlow Lib and IO"); |
| 834 | this.init(new HashMap<String, String>()); |
| 835 | this.startupComponents(); |
| 836 | this.run(); |
| 837 | } |
| 838 | |
| 839 | } |