Jon Hall | 2548559 | 2016-08-19 13:56:14 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | NOTES |
| 5 | |
| 6 | To change onos log levels before start you can add something similar to |
| 7 | onos.py's ONOSNode.start method before starting onos service: |
| 8 | # Change log levels |
| 9 | self.ucmd( 'echo "log4j.logger.io.atomix= DEBUG" >> $ONOS_HOME/apache-karaf-*/etc/org.ops4j.pax.logging.cfg' ) |
| 10 | |
| 11 | """ |
| 12 | |
| 13 | import argparse |
| 14 | from mininet.log import output, info, warn, error, debug, setLogLevel |
| 15 | from mininet.cli import CLI as origCLI |
| 16 | from mininet.net import Mininet |
| 17 | from mininet.topo import SingleSwitchTopo, Topo |
| 18 | from mininet.node import Host |
| 19 | from os.path import join |
| 20 | from glob import glob |
| 21 | import re |
| 22 | import json |
| 23 | from collections import deque |
| 24 | import hashlib |
| 25 | import onos # onos.py |
| 26 | |
| 27 | # Utility functions |
| 28 | |
| 29 | def pause( net, msg, hint=False): |
| 30 | """Reenter the CLI. Note that we use the mn base CLI class to allow |
| 31 | extensibility and combination of custom files""" |
| 32 | |
| 33 | info( msg ) |
| 34 | |
| 35 | if hint: |
| 36 | help_msg = "Currently in the root Mininet net namespace...\n" |
| 37 | help_msg += "To access control net functions use:\n" |
| 38 | help_msg += "\tpx cnet=net.controllers[0].net\n" |
| 39 | help_msg += "\tpy cnet.METHOD\n" |
| 40 | help_msg += "To send commands to each onos node, use: onos_all CMD\n" |
| 41 | help_msg += "\nBy default, ONOS nodes are running on the 192.168.123.X network\n" |
| 42 | info( "%s\n" % help_msg ) |
| 43 | # NOTE: If we use onos.py as a custom file and as an imported module, |
| 44 | # we get two different sets of ONOS* classes. They don't play |
| 45 | # together and things don't work properly. Specifically the |
| 46 | # isinstance calls fail. This is due to import vs. exec calls |
| 47 | onos.ONOSCLI( net ) |
| 48 | |
| 49 | def cprint( msg, color="default"): |
| 50 | color=color.lower() |
| 51 | colors = { 'cyan': '\033[96m', 'purple': '\033[95m', |
| 52 | 'blue': '\033[94m', 'green': '\033[92m', |
| 53 | 'yellow': '\033[93m', 'red': '\033[91m', |
| 54 | 'end': '\033[0m' } |
| 55 | pre = colors.get( color, '' ) |
| 56 | output = pre + msg + colors['end'] |
| 57 | print( output ) |
| 58 | |
| 59 | def getNode( net, nodeId=0 ): |
| 60 | "Helper function: return ONOS node, defaults to the first node" |
| 61 | return net.controllers[ 0 ].nodes()[ nodeId ] |
| 62 | |
| 63 | def onos_cli( net, line, nodeId=0 ): |
| 64 | "Send command to ONOS CLI" |
| 65 | c0 = net.controllers[ 0 ] |
| 66 | # FIXME add this back after import onos.py works |
| 67 | if isinstance( c0, onos.ONOSCluster ): |
| 68 | # cmdLoop strips off command name 'onos' |
| 69 | if line.startswith( ':' ): |
| 70 | line = 'onos' + line |
| 71 | node = getNode( net, nodeId ) |
| 72 | if line: |
| 73 | line = '"%s"' % line |
| 74 | cmd = 'client -h %s %s' % ( node.IP(), line ) |
| 75 | #node.cmdPrint( cmd ) |
| 76 | output = node.cmd( cmd ) |
| 77 | info( line ) |
| 78 | # Remove verbose spam from output |
| 79 | m = re.search( "unverified \{\} key: \{\}", output ) |
| 80 | if m: |
| 81 | info( output[m.end():] ) |
| 82 | else: |
| 83 | info( output ) |
| 84 | |
| 85 | def onos_all( net, line ): |
| 86 | onosNodes = [ n for cluster in net.controllers for n in cluster.nodes() ] |
| 87 | for node in range( len( onosNodes ) ): |
| 88 | cprint( "*" * 53, "red" ) |
| 89 | cprint( "onos%s: %s" % ( str( node + 1 ), repr( onosNodes[ node ] ) ), |
| 90 | "red" ) |
| 91 | cprint( "*" * 53, "red" ) |
| 92 | onos_cli( net, line, node ) |
| 93 | |
| 94 | # FIXME This needs a better name |
| 95 | def do_onos_all( self, line ): |
| 96 | onos_all( self.mn, line) |
| 97 | |
| 98 | # Add custom cli commands |
| 99 | # NOTE: This is so we can keep ONOSCLI and also add commands to it! |
| 100 | origCLI.do_onos_all = do_onos_all |
| 101 | |
| 102 | # Test cases |
| 103 | |
| 104 | def Partition( net ): |
| 105 | # Controller net instance |
| 106 | cnet = net.controllers[0].net |
| 107 | |
| 108 | info( "ONOS control network partition test\n") |
| 109 | net.pingAll() |
| 110 | if args.interactive: |
| 111 | pause( net, "~~~ Dropping into cli... Exit cli to continue test\n", True ) |
| 112 | |
| 113 | onos_all( net, "nodes;partitions;partitions -c") |
| 114 | info( "~~~ Right before the partitioned\n" ) |
| 115 | if args.interactive: |
| 116 | pause( net, "Dropping into cli... Exit cli to continue test\n" ) |
| 117 | |
| 118 | cs1 = cnet.switches[0] |
| 119 | cs2 = cnet.switches[1] |
| 120 | |
| 121 | # PARTITION sub-clusters |
| 122 | |
| 123 | # we need to use names here |
| 124 | cnet.configLinkStatus( cs1.name, cs2.name, "down" ) |
| 125 | onos_all( net, "nodes;partitions;partitions -c") |
| 126 | info( "~~~ Right after cluster is partitioned. Next step is to heal the partition\n" ) |
| 127 | if args.interactive: |
| 128 | pause( net, "Dropping into cli... Exit cli to continue test\n" ) |
| 129 | |
| 130 | cnet.configLinkStatus( cs1.name, cs2.name, "up" ) |
| 131 | onos_all( net, "nodes;partitions;partitions -c") |
| 132 | info( "~~~ Right after the partition is healed \n" ) |
| 133 | if args.interactive: |
| 134 | pause( net, "Test is finished! Exit cli to exit test.\n" ) |
| 135 | |
| 136 | def Scaling( net ): |
| 137 | |
| 138 | def startNodes( net, nodes ): |
| 139 | "start multiple ONOS nodes" |
| 140 | cluster = net.controllers[0] |
| 141 | cluster.activeNodes.extend( nodes ) |
| 142 | cluster.activeNodes = sorted( set( cluster.activeNodes ) ) |
| 143 | for node in nodes: |
| 144 | node.shouldStart = True |
| 145 | node.start( cluster.env, cluster.activeNodes ) |
| 146 | for node in nodes: |
| 147 | node.waitStarted() |
| 148 | |
| 149 | # control net objects |
| 150 | cluster = net.controllers[0] |
| 151 | cnet = cluster.net |
| 152 | cs1 = cnet.switches[0] |
| 153 | |
| 154 | info( "ONOS dynamic clustering scaling test\n") |
| 155 | # Start the first node |
| 156 | cluster.activeNodes.append( cnet.hosts[0] ) |
| 157 | cluster.activeNodes = sorted( set( cluster.activeNodes ) ) |
| 158 | startNodes( net, cluster.activeNodes ) |
| 159 | |
| 160 | onos_all( net, "nodes;partitions;partitions -c") |
| 161 | if args.interactive: |
| 162 | pause( net, "Dropping into cli... Exit cli to continue test\n" ) |
| 163 | |
| 164 | # Scale up by two |
| 165 | while True: |
| 166 | new = [ n for c in net.controllers for n in c.net.hosts if isinstance( n, DynamicONOSNode) and not n.started ][:2] |
| 167 | if not new: |
| 168 | break |
| 169 | startNodes( net, new ) |
| 170 | onos_all( net, "nodes;partitions;partitions -c") |
| 171 | if args.interactive: |
| 172 | pause( net, "Dropping into cli... Exit cli to continue test\n" ) |
| 173 | |
| 174 | # Scale down |
| 175 | for i in range( len( cluster.activeNodes ) - 1 ): |
| 176 | node = cluster.activeNodes.pop() |
| 177 | node.genPartitions( cluster.activeNodes, node.metadata ) |
| 178 | onos_all( net, "nodes;partitions;partitions -c") |
| 179 | if args.interactive: |
| 180 | pause( net, "Dropping into cli... Exit cli to continue test\n" ) |
| 181 | if args.interactive: |
| 182 | pause( net, "Test is finished! Exit cli to exit test.\n" ) |
| 183 | |
| 184 | |
| 185 | |
| 186 | # Mininet object subclasses |
| 187 | |
| 188 | class HTTP( Host ): |
| 189 | def __init__( self, *args, **kwargs ): |
| 190 | super( HTTP, self).__init__( *args, **kwargs ) |
| 191 | self.dir = '/tmp/%s' % self.name |
| 192 | self.cmd( 'rm -rf', self.dir ) |
| 193 | self.cmd( 'mkdir', self.dir ) |
| 194 | self.cmd( 'cd', self.dir ) |
| 195 | |
| 196 | def start( self ): |
| 197 | output( "(starting HTTP Server)" ) |
| 198 | # start python web server as a bg process |
| 199 | self.cmd( 'python -m SimpleHTTPServer &> web.log &' ) |
| 200 | |
| 201 | def stop( self ): |
| 202 | # XXX is this ever called? |
| 203 | print "Stopping HTTP Server..." |
| 204 | print self.cmd( 'fg' ) |
| 205 | print self.cmd( '\x03' ) # ctrl-c |
| 206 | |
| 207 | |
| 208 | class DynamicONOSNode( onos.ONOSNode ): |
| 209 | def __init__( self, *args, **kwargs ): |
| 210 | self.shouldStart = False |
| 211 | self.started = False |
| 212 | self.metadata = '/tmp/cluster.json' |
| 213 | super( DynamicONOSNode, self ).__init__( *args, **kwargs ) |
| 214 | # XXX HACK, need to get this passed in correctly |
| 215 | self.alertAction = 'warn' |
| 216 | |
| 217 | def start( self, env, nodes=()): |
| 218 | if not self.shouldStart: |
| 219 | return |
| 220 | elif self.started: |
| 221 | return |
| 222 | else: |
| 223 | ##### Modified from base class |
| 224 | env = dict( env ) |
| 225 | env.update( ONOS_HOME=self.ONOS_HOME ) |
| 226 | if self.remote: |
| 227 | # Point onos to rewmote cluster metadata file |
| 228 | ip = self.remote.get( 'ip', '127.0.0.1' ) |
| 229 | port = self.remote.get( 'port', '8000' ) |
| 230 | filename = self.remote.get( 'filename', 'cluster.json' ) |
| 231 | remote = 'http://%s:%s/%s' % ( ip, port, filename ) |
| 232 | uri = '-Donos.cluster.metadata.uri=%s' % remote |
| 233 | prev = env.get( 'JAVA_OPTS', False ) |
| 234 | if prev: |
| 235 | jarg = ':'.join( [prev, uri] ) |
| 236 | else: |
| 237 | jarg = uri |
| 238 | env.update( JAVA_OPTS=jarg ) |
| 239 | self.updateEnv( env ) |
| 240 | karafbin = glob( '%s/apache*/bin' % self.ONOS_HOME )[ 0 ] |
| 241 | onosbin = join( self.ONOS_ROOT, 'tools/test/bin' ) |
| 242 | self.cmd( 'export PATH=%s:%s:$PATH' % ( onosbin, karafbin ) ) |
| 243 | self.cmd( 'cd', self.ONOS_HOME ) |
| 244 | self.ucmd( 'mkdir -p config ' ) |
| 245 | self.genPartitions( nodes, self.metadata ) |
| 246 | info( '(starting %s)' % self ) |
| 247 | service = join( self.ONOS_HOME, 'bin/onos-service' ) |
| 248 | self.ucmd( service, 'server 1>../onos.log 2>../onos.log' |
| 249 | ' & echo $! > onos.pid; ln -s `pwd`/onos.pid ..' ) |
| 250 | self.onosPid = int( self.cmd( 'cat onos.pid' ).strip() ) |
| 251 | self.warningCount = 0 |
| 252 | #### |
| 253 | self.started=True |
| 254 | |
| 255 | def sanityCheck( self, lowMem=100000 ): |
| 256 | if self.started: |
| 257 | super( DynamicONOSNode, self ).sanityCheck( lowMem ) |
| 258 | |
| 259 | def waitStarted( self ): |
| 260 | if self.started: |
| 261 | super( DynamicONOSNode, self ).waitStarted() |
| 262 | |
| 263 | def genPartitions( self, nodes, location='/tmp/cluster.json' ): |
| 264 | """ |
| 265 | Generate a cluster metadata file for dynamic clustering. |
| 266 | Note: name should be the same in different versions of the file as |
| 267 | well as the number of partitions. |
| 268 | """ |
| 269 | def genParts( nodes, k, parts=3): |
| 270 | l = deque( nodes ) |
| 271 | perms = [] |
| 272 | for i in range( 1, parts + 1 ): |
| 273 | part = { |
| 274 | 'id': i, |
| 275 | 'members': list(l)[:k] |
| 276 | } |
| 277 | perms.append( part ) |
| 278 | l.rotate( -1 ) |
| 279 | return perms |
| 280 | |
| 281 | print "Generating %s with %s" % ( location, str(nodes) ) |
| 282 | port = 9876 |
| 283 | ips = [ node.IP() for node in nodes ] |
| 284 | node = lambda k: { 'id': k, 'ip': k, 'port': port } |
| 285 | m = hashlib.sha256( "Mininet based ONOS test" ) |
| 286 | name = int(m.hexdigest()[:8], base=16 ) |
| 287 | partitions = genParts( ips, 3 ) |
| 288 | data = { |
| 289 | 'name': name, |
| 290 | 'nodes': [ node(v) for v in ips ], |
| 291 | 'partitions': partitions |
| 292 | } |
| 293 | output = json.dumps( data, indent=4 ) |
| 294 | with open( location, 'w' ) as f: |
| 295 | f.write( output ) |
| 296 | cprint( output, "yellow" ) |
| 297 | |
| 298 | |
| 299 | class DynamicONOSCluster( onos.ONOSCluster ): |
| 300 | def __init__( self, *args, **kwargs ): |
| 301 | self.activeNodes = [] |
| 302 | # TODO: can we get super to use super's nodes()? |
| 303 | super( DynamicONOSCluster, self ).__init__( *args, **kwargs ) |
| 304 | self.activeNodes = [ h for h in self.net.hosts if onos.isONOSNode( h ) ] |
| 305 | onos.updateNodeIPs( self.env, self.nodes() ) |
| 306 | self.activeNodes = [] |
| 307 | |
| 308 | def start( self ): |
| 309 | "Start up ONOS control network" |
| 310 | info( '*** ONOS_APPS = %s\n' % onos.ONOS_APPS ) |
| 311 | self.net.start() |
| 312 | for node in self.net.hosts: |
| 313 | if onos.isONOSNode( node ): |
| 314 | node.start( self.env, self.nodes() ) |
| 315 | else: |
| 316 | try: |
| 317 | node.start() |
| 318 | except AttributeError: |
| 319 | # NAT doesn't have start? |
| 320 | pass |
| 321 | info( '\n' ) |
| 322 | self.configPortForwarding( ports=self.forward, action='A' ) |
| 323 | self.waitStarted() |
| 324 | return |
| 325 | |
| 326 | def nodes( self ): |
| 327 | "Return list of ONOS nodes that should be running" |
| 328 | return self.activeNodes |
| 329 | |
| 330 | class HATopo( Topo ): |
| 331 | def build( self, partitions=[], serverCount=1, dynamic=False, **kwargs ): |
| 332 | """ |
| 333 | partitions = a list of strings specifing the assignment of onos nodes |
| 334 | to regions. ['1', '2,3'] designates two regions, with |
| 335 | ONOS 1 in the first and ONOS 2 and 3 in the second. |
| 336 | serverCount = If partitions is not given, then the number of ONOS |
| 337 | nodes to create |
| 338 | dynamic = A boolean indicating dynamic ONOS clustering |
| 339 | """ |
| 340 | self.switchNum = 1 |
| 341 | if dynamic: |
| 342 | cls = DynamicONOSNode |
| 343 | else: |
| 344 | cls = onos.ONOSNode |
| 345 | if partitions: |
| 346 | prev = None |
| 347 | for partition in partitions: |
| 348 | # Create a region of ONOS nodes connected to a switch |
| 349 | # FIXME Check for nodes that are not assigned to a partition? |
| 350 | cur = self.addRegion( partition, cls ) |
| 351 | |
| 352 | # Connect switch to previous switch |
| 353 | if prev: |
| 354 | self.addLink( prev, cur ) |
| 355 | prev = cur |
| 356 | else: |
| 357 | partition = ','.join( [ str( x ) for x in range( 1, serverCount + 1 ) ] ) |
| 358 | cs1 = self.addRegion( partition, cls ) |
| 359 | if dynamic: |
| 360 | # TODO Pass these in |
| 361 | scale = 2 |
| 362 | new = ','.join( [ str( x + 1 ) for x in range( serverCount , serverCount + scale ) ] ) |
| 363 | cs2 = self.addRegion( new, cls ) |
| 364 | self.addLink( cs1, cs2 ) |
| 365 | server = self.addHost( "server", cls=HTTP ) |
| 366 | for switch in self.switches(): |
| 367 | self.addLink( server, switch ) |
| 368 | |
| 369 | def addRegion( self, partition, cls=onos.ONOSNode ): |
| 370 | switch = self.addSwitch( 'cs%s' % self.switchNum ) |
| 371 | self.switchNum += 1 |
| 372 | for n in partition.split( ',' ): |
| 373 | node = self.addHost( "onos" + str( n ), cls=cls ) |
| 374 | self.addLink( switch, node ) |
| 375 | return switch |
| 376 | |
| 377 | |
| 378 | CLI = onos.ONOSCLI |
| 379 | |
| 380 | # The main runner |
| 381 | def runTest( args ): |
| 382 | test = None |
| 383 | if args.test == "partition": |
| 384 | test=Partition |
| 385 | serverCount = args.nodes |
| 386 | # NOTE we are ignoring serverCount for this test, using partition assignment instead. |
| 387 | topo = HATopo( partitions=args.partition ) |
| 388 | # FIXME Configurable dataplane topology |
| 389 | net = Mininet( topo=SingleSwitchTopo( 3 ), |
| 390 | controller=[ onos.ONOSCluster( 'c0', topo=topo, alertAction='warn' ) ], |
| 391 | switch=onos.ONOSOVSSwitch ) |
| 392 | elif args.test == "scaling": |
| 393 | test=Scaling |
| 394 | serverCount = args.nodes |
| 395 | topo = HATopo( serverCount=serverCount, dynamic=True ) |
| 396 | net = Mininet( topo=SingleSwitchTopo( 3 ), |
| 397 | controller=[ DynamicONOSCluster( 'c0', topo=topo, alertAction='warn' ) ], |
| 398 | switch=onos.ONOSOVSSwitch ) |
| 399 | cluster = net.controllers[0] |
| 400 | cnet = cluster.net |
| 401 | server = cnet.get( 'server' ) |
| 402 | remote = { 'ip': server.IP(), |
| 403 | 'port': '8000', |
| 404 | 'filename':'cluster.json' } |
| 405 | for node in cnet.hosts: |
| 406 | if isinstance( node, DynamicONOSNode ): |
| 407 | node.metadata = '%s/cluster.json' % server.dir |
| 408 | node.remote = remote |
| 409 | ips = [] |
| 410 | cluster.activeNodes = [ cnet.get( "onos%s" % ( i + 1 ) ) for i in range( serverCount ) ] |
| 411 | for node in cluster.activeNodes: |
| 412 | node.shouldStart = True |
| 413 | else: |
| 414 | print "Incorrect test" |
| 415 | return |
| 416 | net.start() |
| 417 | if args.interactive: |
| 418 | CLI( net ) |
| 419 | test(net) |
| 420 | CLI( net ) |
| 421 | net.stop() |
| 422 | |
| 423 | |
| 424 | if __name__ == '__main__': |
| 425 | setLogLevel( 'info' ) |
| 426 | # Base parser |
| 427 | parser= argparse.ArgumentParser( |
| 428 | description='Mininet based HA tests for ONOS. For more detailed help on a test include the test option' ) |
| 429 | parser.add_argument( |
| 430 | '-n', '--nodes', metavar="NODES", type=int, default=1, |
| 431 | help="Number of nodes in the ONOS cluster" ) |
| 432 | parser.add_argument( |
| 433 | '-i', '--interactive',# type=bool, |
| 434 | default=False, action="store_true", |
| 435 | help="Pause the test in between steps" ) |
| 436 | test_parsers=parser.add_subparsers( title="Tests", help="Types of HA tests", dest="test" ) |
| 437 | |
| 438 | # Partition test parser |
| 439 | partition_help = 'Network partition test. Each set of ONOS nodes is connected to their own switch in the control network. Partitions are introduced by removing links between control network switches.' |
| 440 | partition_parser = test_parsers.add_parser( |
| 441 | "partition", description=partition_help ) |
| 442 | partition_parser.add_argument( |
| 443 | '-p', '--partition', metavar='Partition', required=True, |
| 444 | type=str, nargs=2, |
| 445 | help='Specify the membership for two partitions by node id. Nodes are comma separated and node count begins at 1. E.g. "1,3 2" will create a network with 3 ONOS nodes and two connected switches. Switch 1 will be connected to ONOS1 and ONOS3 while switch 2 will be connected to ONOS2. A partition will be created by disconnecting the two switches. All ONOS nodes will still be connected to the dataplane.' ) |
| 446 | |
| 447 | # Dynamic scaling test parser |
| 448 | # FIXME Replace with real values |
| 449 | scaling_parser = test_parsers.add_parser( "scaling" ) |
| 450 | |
| 451 | args = parser.parse_args() |
| 452 | runTest( args ) |