blob: 18f12226da5025f3ee4a9f40d7b927cddd526f13 [file] [log] [blame]
Jon Hall25485592016-08-19 13:56:14 -07001#!/usr/bin/env python
2
3"""
4NOTES
5
6To change onos log levels before start you can add something similar to
7onos.py's ONOSNode.start method before starting onos service:
8 # Change log levels
9 self.ucmd( 'echo "log4j.logger.io.atomix= DEBUG" >> $ONOS_HOME/apache-karaf-*/etc/org.ops4j.pax.logging.cfg' )
10
11"""
12
13import argparse
14from mininet.log import output, info, warn, error, debug, setLogLevel
15from mininet.cli import CLI as origCLI
16from mininet.net import Mininet
17from mininet.topo import SingleSwitchTopo, Topo
18from mininet.node import Host
19from os.path import join
20from glob import glob
21import re
22import json
23from collections import deque
24import hashlib
25import onos # onos.py
26
27# Utility functions
28
29def pause( net, msg, hint=False):
30 """Reenter the CLI. Note that we use the mn base CLI class to allow
31 extensibility and combination of custom files"""
32
33 info( msg )
34
35 if hint:
36 help_msg = "Currently in the root Mininet net namespace...\n"
37 help_msg += "To access control net functions use:\n"
38 help_msg += "\tpx cnet=net.controllers[0].net\n"
39 help_msg += "\tpy cnet.METHOD\n"
40 help_msg += "To send commands to each onos node, use: onos_all CMD\n"
41 help_msg += "\nBy default, ONOS nodes are running on the 192.168.123.X network\n"
42 info( "%s\n" % help_msg )
43 # NOTE: If we use onos.py as a custom file and as an imported module,
44 # we get two different sets of ONOS* classes. They don't play
45 # together and things don't work properly. Specifically the
46 # isinstance calls fail. This is due to import vs. exec calls
47 onos.ONOSCLI( net )
48
49def cprint( msg, color="default"):
50 color=color.lower()
51 colors = { 'cyan': '\033[96m', 'purple': '\033[95m',
52 'blue': '\033[94m', 'green': '\033[92m',
53 'yellow': '\033[93m', 'red': '\033[91m',
54 'end': '\033[0m' }
55 pre = colors.get( color, '' )
56 output = pre + msg + colors['end']
57 print( output )
58
59def getNode( net, nodeId=0 ):
60 "Helper function: return ONOS node, defaults to the first node"
61 return net.controllers[ 0 ].nodes()[ nodeId ]
62
63def onos_cli( net, line, nodeId=0 ):
64 "Send command to ONOS CLI"
65 c0 = net.controllers[ 0 ]
66 # FIXME add this back after import onos.py works
67 if isinstance( c0, onos.ONOSCluster ):
68 # cmdLoop strips off command name 'onos'
69 if line.startswith( ':' ):
70 line = 'onos' + line
71 node = getNode( net, nodeId )
72 if line:
73 line = '"%s"' % line
74 cmd = 'client -h %s %s' % ( node.IP(), line )
75 #node.cmdPrint( cmd )
76 output = node.cmd( cmd )
77 info( line )
78 # Remove verbose spam from output
79 m = re.search( "unverified \{\} key: \{\}", output )
80 if m:
81 info( output[m.end():] )
82 else:
83 info( output )
84
85def onos_all( net, line ):
86 onosNodes = [ n for cluster in net.controllers for n in cluster.nodes() ]
87 for node in range( len( onosNodes ) ):
88 cprint( "*" * 53, "red" )
89 cprint( "onos%s: %s" % ( str( node + 1 ), repr( onosNodes[ node ] ) ),
90 "red" )
91 cprint( "*" * 53, "red" )
92 onos_cli( net, line, node )
93
94# FIXME This needs a better name
95def do_onos_all( self, line ):
96 onos_all( self.mn, line)
97
98# Add custom cli commands
99# NOTE: This is so we can keep ONOSCLI and also add commands to it!
100origCLI.do_onos_all = do_onos_all
101
102# Test cases
103
104def Partition( net ):
105 # Controller net instance
106 cnet = net.controllers[0].net
107
108 info( "ONOS control network partition test\n")
109 net.pingAll()
110 if args.interactive:
111 pause( net, "~~~ Dropping into cli... Exit cli to continue test\n", True )
112
113 onos_all( net, "nodes;partitions;partitions -c")
114 info( "~~~ Right before the partitioned\n" )
115 if args.interactive:
116 pause( net, "Dropping into cli... Exit cli to continue test\n" )
117
118 cs1 = cnet.switches[0]
119 cs2 = cnet.switches[1]
120
121 # PARTITION sub-clusters
122
123 # we need to use names here
124 cnet.configLinkStatus( cs1.name, cs2.name, "down" )
125 onos_all( net, "nodes;partitions;partitions -c")
126 info( "~~~ Right after cluster is partitioned. Next step is to heal the partition\n" )
127 if args.interactive:
128 pause( net, "Dropping into cli... Exit cli to continue test\n" )
129
130 cnet.configLinkStatus( cs1.name, cs2.name, "up" )
131 onos_all( net, "nodes;partitions;partitions -c")
132 info( "~~~ Right after the partition is healed \n" )
133 if args.interactive:
134 pause( net, "Test is finished! Exit cli to exit test.\n" )
135
136def Scaling( net ):
137
138 def startNodes( net, nodes ):
139 "start multiple ONOS nodes"
140 cluster = net.controllers[0]
141 cluster.activeNodes.extend( nodes )
142 cluster.activeNodes = sorted( set( cluster.activeNodes ) )
143 for node in nodes:
144 node.shouldStart = True
145 node.start( cluster.env, cluster.activeNodes )
146 for node in nodes:
147 node.waitStarted()
148
149 # control net objects
150 cluster = net.controllers[0]
151 cnet = cluster.net
152 cs1 = cnet.switches[0]
153
154 info( "ONOS dynamic clustering scaling test\n")
155 # Start the first node
156 cluster.activeNodes.append( cnet.hosts[0] )
157 cluster.activeNodes = sorted( set( cluster.activeNodes ) )
158 startNodes( net, cluster.activeNodes )
159
160 onos_all( net, "nodes;partitions;partitions -c")
161 if args.interactive:
162 pause( net, "Dropping into cli... Exit cli to continue test\n" )
163
164 # Scale up by two
165 while True:
166 new = [ n for c in net.controllers for n in c.net.hosts if isinstance( n, DynamicONOSNode) and not n.started ][:2]
167 if not new:
168 break
169 startNodes( net, new )
170 onos_all( net, "nodes;partitions;partitions -c")
171 if args.interactive:
172 pause( net, "Dropping into cli... Exit cli to continue test\n" )
173
174 # Scale down
175 for i in range( len( cluster.activeNodes ) - 1 ):
176 node = cluster.activeNodes.pop()
177 node.genPartitions( cluster.activeNodes, node.metadata )
178 onos_all( net, "nodes;partitions;partitions -c")
179 if args.interactive:
180 pause( net, "Dropping into cli... Exit cli to continue test\n" )
181 if args.interactive:
182 pause( net, "Test is finished! Exit cli to exit test.\n" )
183
184
185
186# Mininet object subclasses
187
188class HTTP( Host ):
189 def __init__( self, *args, **kwargs ):
190 super( HTTP, self).__init__( *args, **kwargs )
191 self.dir = '/tmp/%s' % self.name
192 self.cmd( 'rm -rf', self.dir )
193 self.cmd( 'mkdir', self.dir )
194 self.cmd( 'cd', self.dir )
195
196 def start( self ):
197 output( "(starting HTTP Server)" )
198 # start python web server as a bg process
199 self.cmd( 'python -m SimpleHTTPServer &> web.log &' )
200
201 def stop( self ):
202 # XXX is this ever called?
203 print "Stopping HTTP Server..."
204 print self.cmd( 'fg' )
205 print self.cmd( '\x03' ) # ctrl-c
206
207
208class DynamicONOSNode( onos.ONOSNode ):
209 def __init__( self, *args, **kwargs ):
210 self.shouldStart = False
211 self.started = False
212 self.metadata = '/tmp/cluster.json'
213 super( DynamicONOSNode, self ).__init__( *args, **kwargs )
214 # XXX HACK, need to get this passed in correctly
215 self.alertAction = 'warn'
216
217 def start( self, env, nodes=()):
218 if not self.shouldStart:
219 return
220 elif self.started:
221 return
222 else:
223 ##### Modified from base class
224 env = dict( env )
225 env.update( ONOS_HOME=self.ONOS_HOME )
226 if self.remote:
227 # Point onos to rewmote cluster metadata file
228 ip = self.remote.get( 'ip', '127.0.0.1' )
229 port = self.remote.get( 'port', '8000' )
230 filename = self.remote.get( 'filename', 'cluster.json' )
231 remote = 'http://%s:%s/%s' % ( ip, port, filename )
232 uri = '-Donos.cluster.metadata.uri=%s' % remote
233 prev = env.get( 'JAVA_OPTS', False )
234 if prev:
235 jarg = ':'.join( [prev, uri] )
236 else:
237 jarg = uri
238 env.update( JAVA_OPTS=jarg )
239 self.updateEnv( env )
240 karafbin = glob( '%s/apache*/bin' % self.ONOS_HOME )[ 0 ]
241 onosbin = join( self.ONOS_ROOT, 'tools/test/bin' )
242 self.cmd( 'export PATH=%s:%s:$PATH' % ( onosbin, karafbin ) )
243 self.cmd( 'cd', self.ONOS_HOME )
244 self.ucmd( 'mkdir -p config ' )
245 self.genPartitions( nodes, self.metadata )
246 info( '(starting %s)' % self )
247 service = join( self.ONOS_HOME, 'bin/onos-service' )
248 self.ucmd( service, 'server 1>../onos.log 2>../onos.log'
249 ' & echo $! > onos.pid; ln -s `pwd`/onos.pid ..' )
250 self.onosPid = int( self.cmd( 'cat onos.pid' ).strip() )
251 self.warningCount = 0
252 ####
253 self.started=True
254
255 def sanityCheck( self, lowMem=100000 ):
256 if self.started:
257 super( DynamicONOSNode, self ).sanityCheck( lowMem )
258
259 def waitStarted( self ):
260 if self.started:
261 super( DynamicONOSNode, self ).waitStarted()
262
263 def genPartitions( self, nodes, location='/tmp/cluster.json' ):
264 """
265 Generate a cluster metadata file for dynamic clustering.
266 Note: name should be the same in different versions of the file as
267 well as the number of partitions.
268 """
269 def genParts( nodes, k, parts=3):
270 l = deque( nodes )
271 perms = []
272 for i in range( 1, parts + 1 ):
273 part = {
274 'id': i,
275 'members': list(l)[:k]
276 }
277 perms.append( part )
278 l.rotate( -1 )
279 return perms
280
281 print "Generating %s with %s" % ( location, str(nodes) )
282 port = 9876
283 ips = [ node.IP() for node in nodes ]
284 node = lambda k: { 'id': k, 'ip': k, 'port': port }
285 m = hashlib.sha256( "Mininet based ONOS test" )
286 name = int(m.hexdigest()[:8], base=16 )
287 partitions = genParts( ips, 3 )
288 data = {
289 'name': name,
290 'nodes': [ node(v) for v in ips ],
291 'partitions': partitions
292 }
293 output = json.dumps( data, indent=4 )
294 with open( location, 'w' ) as f:
295 f.write( output )
296 cprint( output, "yellow" )
297
298
299class DynamicONOSCluster( onos.ONOSCluster ):
300 def __init__( self, *args, **kwargs ):
301 self.activeNodes = []
302 # TODO: can we get super to use super's nodes()?
303 super( DynamicONOSCluster, self ).__init__( *args, **kwargs )
304 self.activeNodes = [ h for h in self.net.hosts if onos.isONOSNode( h ) ]
305 onos.updateNodeIPs( self.env, self.nodes() )
306 self.activeNodes = []
307
308 def start( self ):
309 "Start up ONOS control network"
310 info( '*** ONOS_APPS = %s\n' % onos.ONOS_APPS )
311 self.net.start()
312 for node in self.net.hosts:
313 if onos.isONOSNode( node ):
314 node.start( self.env, self.nodes() )
315 else:
316 try:
317 node.start()
318 except AttributeError:
319 # NAT doesn't have start?
320 pass
321 info( '\n' )
322 self.configPortForwarding( ports=self.forward, action='A' )
323 self.waitStarted()
324 return
325
326 def nodes( self ):
327 "Return list of ONOS nodes that should be running"
328 return self.activeNodes
329
330class HATopo( Topo ):
331 def build( self, partitions=[], serverCount=1, dynamic=False, **kwargs ):
332 """
333 partitions = a list of strings specifing the assignment of onos nodes
334 to regions. ['1', '2,3'] designates two regions, with
335 ONOS 1 in the first and ONOS 2 and 3 in the second.
336 serverCount = If partitions is not given, then the number of ONOS
337 nodes to create
338 dynamic = A boolean indicating dynamic ONOS clustering
339 """
340 self.switchNum = 1
341 if dynamic:
342 cls = DynamicONOSNode
343 else:
344 cls = onos.ONOSNode
345 if partitions:
346 prev = None
347 for partition in partitions:
348 # Create a region of ONOS nodes connected to a switch
349 # FIXME Check for nodes that are not assigned to a partition?
350 cur = self.addRegion( partition, cls )
351
352 # Connect switch to previous switch
353 if prev:
354 self.addLink( prev, cur )
355 prev = cur
356 else:
357 partition = ','.join( [ str( x ) for x in range( 1, serverCount + 1 ) ] )
358 cs1 = self.addRegion( partition, cls )
359 if dynamic:
360 # TODO Pass these in
361 scale = 2
362 new = ','.join( [ str( x + 1 ) for x in range( serverCount , serverCount + scale ) ] )
363 cs2 = self.addRegion( new, cls )
364 self.addLink( cs1, cs2 )
365 server = self.addHost( "server", cls=HTTP )
366 for switch in self.switches():
367 self.addLink( server, switch )
368
369 def addRegion( self, partition, cls=onos.ONOSNode ):
370 switch = self.addSwitch( 'cs%s' % self.switchNum )
371 self.switchNum += 1
372 for n in partition.split( ',' ):
373 node = self.addHost( "onos" + str( n ), cls=cls )
374 self.addLink( switch, node )
375 return switch
376
377
378CLI = onos.ONOSCLI
379
380# The main runner
381def runTest( args ):
382 test = None
383 if args.test == "partition":
384 test=Partition
385 serverCount = args.nodes
386 # NOTE we are ignoring serverCount for this test, using partition assignment instead.
387 topo = HATopo( partitions=args.partition )
388 # FIXME Configurable dataplane topology
389 net = Mininet( topo=SingleSwitchTopo( 3 ),
390 controller=[ onos.ONOSCluster( 'c0', topo=topo, alertAction='warn' ) ],
391 switch=onos.ONOSOVSSwitch )
392 elif args.test == "scaling":
393 test=Scaling
394 serverCount = args.nodes
395 topo = HATopo( serverCount=serverCount, dynamic=True )
396 net = Mininet( topo=SingleSwitchTopo( 3 ),
397 controller=[ DynamicONOSCluster( 'c0', topo=topo, alertAction='warn' ) ],
398 switch=onos.ONOSOVSSwitch )
399 cluster = net.controllers[0]
400 cnet = cluster.net
401 server = cnet.get( 'server' )
402 remote = { 'ip': server.IP(),
403 'port': '8000',
404 'filename':'cluster.json' }
405 for node in cnet.hosts:
406 if isinstance( node, DynamicONOSNode ):
407 node.metadata = '%s/cluster.json' % server.dir
408 node.remote = remote
409 ips = []
410 cluster.activeNodes = [ cnet.get( "onos%s" % ( i + 1 ) ) for i in range( serverCount ) ]
411 for node in cluster.activeNodes:
412 node.shouldStart = True
413 else:
414 print "Incorrect test"
415 return
416 net.start()
417 if args.interactive:
418 CLI( net )
419 test(net)
420 CLI( net )
421 net.stop()
422
423
424if __name__ == '__main__':
425 setLogLevel( 'info' )
426 # Base parser
427 parser= argparse.ArgumentParser(
428 description='Mininet based HA tests for ONOS. For more detailed help on a test include the test option' )
429 parser.add_argument(
430 '-n', '--nodes', metavar="NODES", type=int, default=1,
431 help="Number of nodes in the ONOS cluster" )
432 parser.add_argument(
433 '-i', '--interactive',# type=bool,
434 default=False, action="store_true",
435 help="Pause the test in between steps" )
436 test_parsers=parser.add_subparsers( title="Tests", help="Types of HA tests", dest="test" )
437
438 # Partition test parser
439 partition_help = 'Network partition test. Each set of ONOS nodes is connected to their own switch in the control network. Partitions are introduced by removing links between control network switches.'
440 partition_parser = test_parsers.add_parser(
441 "partition", description=partition_help )
442 partition_parser.add_argument(
443 '-p', '--partition', metavar='Partition', required=True,
444 type=str, nargs=2,
445 help='Specify the membership for two partitions by node id. Nodes are comma separated and node count begins at 1. E.g. "1,3 2" will create a network with 3 ONOS nodes and two connected switches. Switch 1 will be connected to ONOS1 and ONOS3 while switch 2 will be connected to ONOS2. A partition will be created by disconnecting the two switches. All ONOS nodes will still be connected to the dataplane.' )
446
447 # Dynamic scaling test parser
448 # FIXME Replace with real values
449 scaling_parser = test_parsers.add_parser( "scaling" )
450
451 args = parser.parse_args()
452 runTest( args )