Use netstat to wait for Zookeeper, Cassandra startup
diff --git a/vm-utils/onos.py b/vm-utils/onos.py
index c6b96dd..1cff74c 100755
--- a/vm-utils/onos.py
+++ b/vm-utils/onos.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 """
-onos.py: A simple ONOS Controller() subclass for Mininet
+onos.py: A basic (?) ONOS Controller() subclass for Mininet
 
 We implement the following classes:
 
@@ -27,14 +27,15 @@
 from mininet.node import Controller, OVSSwitch
 from mininet.net import Mininet
 from mininet.cli import CLI
-from mininet.topo import SingleSwitchTopo
-from mininet.log import setLogLevel, info
+from mininet.topo import LinearTopo
+from mininet.log import setLogLevel, info, warn
 from mininet.util import quietRun
 
 from shutil import copyfile
 from os import environ
 from functools import partial
-
+from time import time, sleep
+from sys import argv
 
 class ONOS( Controller ):
     "Custom controller class for ONOS"
@@ -48,38 +49,75 @@
     cassDir = dirBase + '/onos-%s.cassandra'
     configFile = dirBase + '/onos-%s.properties'
     logbackFile = dirBase + '/onos-%s.logback'
-    jmxbase = 7189
-    restbase = 8080
+
+    # Base ONOS modules
+    baseModules = (
+        'net.floodlightcontroller.core.FloodlightProvider',
+        'net.floodlightcontroller.threadpool.ThreadPool',
+        'net.onrc.onos.ofcontroller.floodlightlistener.NetworkGraphPublisher',
+        'net.floodlightcontroller.ui.web.StaticWebRoutable',
+        'net.onrc.onos.datagrid.HazelcastDatagrid',
+        'net.onrc.onos.ofcontroller.flowmanager.FlowManager',
+        'net.onrc.onos.ofcontroller.flowprogrammer.FlowProgrammer',
+        'net.onrc.onos.ofcontroller.topology.TopologyManager',
+        'net.onrc.onos.registry.controller.ZookeeperRegistry'
+    )
+
+    # Additions for reactive forwarding
+    reactiveModules = (
+            'net.onrc.onos.ofcontroller.proxyarp.ProxyArpManager',
+            'net.onrc.onos.ofcontroller.core.config.DefaultConfiguration',
+            'net.onrc.onos.ofcontroller.forwarding.Forwarding'
+    )
+
+    # Module parameters
     ofbase = 6633
+    restbase = 8080
+    jmxbase = 7189
 
-    # Per-instance property template
     fc = 'net.floodlightcontroller.'
-    proctag = 'mn-onos-id'
-    jvmopts = (
-        # We match on this to shut down our instances
-        ( proctag, 0 ),
-        ( fc + 'restserver.RestApiServer.port', restbase ),
-        ( fc + 'core.FloodlightProvider.openflowport', ofbase ),
-        ( fc + 'core.FloodlightProvider.controllerid', 0 ) )
 
+    perNodeConfigBase = {
+        fc + 'core.FloodlightProvider.openflowport': ofbase,
+        fc + 'restserver.RestApiServer.port': restbase,
+        fc + 'core.FloodlightProvider.controllerid': 0
+    }
+
+    staticConfig = {
+        'net.onrc.onos.ofcontroller.floodlightlistener.NetworkGraphPublisher.dbconf':
+            '/tmp/cassandra.titan',
+        'net.onrc.onos.datagrid.HazelcastDatagrid.datagridConfig':
+            onosDir + '/conf/hazelcast.xml',
+        'net.floodlightcontroller.core.FloodlightProvider.workerthreads': 16,
+        'net.floodlightcontroller.forwarding.Forwarding.idletimeout': 5,
+        'net.floodlightcontroller.forwarding.Forwarding.hardtimeout': 0
+    }
+
+    proctag = 'mn-onos-id'
 
     # For maven debugging
     # mvn = 'mvn -o -e -X'
 
-    def __init__( self, name, n=1, drop=True, **params):
+    def __init__( self, name, n=1, reactive=True, runAsRoot=False, **params):
         """n: number of ONOS instances to run (1)
-           drop: drop root privileges (True)"""
+           reactive: run in reactive mode (True)
+           runAsRoot: run ONOS as root (False)"""
         self.check()
-        self.drop = drop
         self.count = n
+        self.reactive = reactive
+        self.runAsRoot = runAsRoot
         self.ids = range( 0, self.count )
         Controller.__init__( self, name, **params )
         # We don't need to run as root, and it can interfere
         # with starting Zookeeper manually
-        if self.drop:
-            self.user = quietRun( 'who am i' ).split()[ 0 ]
-            self.sendCmd( 'su', self.user )
-            self.waiting = False
+        self.user = None
+        if not self.runAsRoot:
+            try:
+                self.user = quietRun( 'who am i' ).split()[ 0 ]
+                self.sendCmd( 'su', self.user )
+                self.waiting = False
+            except:
+                warn( '__init__: failed to drop privileges\n' )
         # Need to run commands from ONOS dir
         self.cmd( 'cd', self.onosDir )
         self.cmd( 'export PATH=$PATH:%s' % self.onosDir )
@@ -87,7 +125,7 @@
             self.cmd( 'export MVN="%s"' % self.mvn )
 
     def check( self ):
-        "Check for prerequisites"
+        "Check for ONOS prerequisites"
         if not quietRun( 'which java' ):
                 raise Exception( 'java not found -'
                                  ' make sure it is installed and in $PATH' )
@@ -95,11 +133,33 @@
                 raise Exception( 'Maven (mvn) not found -'
                                 ' make sure it is installed and in $PATH' )
 
+
+    def waitNetstat( self, pid ):
+        """Wait for pid to show up in netstat
+           We assume that once a process is listening on some
+           port, it is ready to go!"""
+        while True:
+            output = self.cmd( 'sudo netstat -natp | grep %s/' % pid )
+            if output:
+                return output
+            info( '.' )
+            sleep( 1 )
+
+    def waitStart( self, procname, pattern ):
+        "Wait for at least one of procname to show up in netstat"
+        info( '* Waiting for %s startup' % procname )
+        result = self.cmd( 'pgrep -f %s' % pattern ).split()[ 0 ]
+        pid = int( result )
+        output = self.waitNetstat( pid )
+        info( '\n* %s process %d is listening\n' % ( procname, pid ) )
+        info( output )
+
     def startCassandra( self ):
         "Start Cassandra"
         self.cmd( 'start-cassandra.sh start' )
+        self.waitStart( 'Cassandra', 'apache-cassandra' )
         status = self.cmd( 'start-cassandra.sh status' )
-        if 'Error' in status:
+        if 'running' not in status:
             raise Exception( 'Cassandra startup failed: ' + status )
 
     def stopCassandra( self ):
@@ -114,6 +174,7 @@
             template = self.zookeeperDir + '/conf/zoo_sample.cfg'
             copyfile( template, cfg )
         self.cmd( 'start-zk.sh restart' )
+        self.waitStart( 'Zookeeper', 'zookeeper' )
         status = self.cmd( 'start-zk.sh status' )
         if 'Error' in status:
             raise Exception( 'Zookeeper startup failed: ' + status )
@@ -122,8 +183,29 @@
         "Stop Zookeeper"
         self.cmd( 'start-zk.sh stop' )
 
-    def setVars( self, id ):
-        "Set and return environment vars"
+    def genProperties( self, id, path='/tmp' ):
+        "Generate ONOS properties file"
+        filename = path + '/onos-%s.properties' % id
+        with open( filename, 'w' ) as f:
+            # Write modules list
+            modules = list( self.baseModules )
+            if self.reactive:
+                modules += list( self.reactiveModules )
+            f.write( 'floodlight.modules = %s\n' %
+                     ',\\\n'.join( modules ) )
+            # Write other parameters
+            for var, val in self.perNodeConfigBase.iteritems():
+                if type( val ) is int:
+                    val += id
+                f.write( '%s = %s\n' % ( var, val ) )
+            for var, val in self.staticConfig.iteritems():
+                f.write( '%s = %s\n' % ( var, val ) )
+        return filename
+
+    def setVars( self, id, propsFile ):
+        """Set and return environment vars
+           id: ONOS instance number
+           propsFile: properties file name"""
         # ONOS directories and files
         logdir = self.logDir % id
         cassdir = self.cassDir % id
@@ -135,18 +217,21 @@
         self.cmd( 'export CASS_DIR="%s"' % cassdir )
         self.cmd( 'export ONOS_LOGBACK="%s"' % logback )
         self.cmd( 'export JMX_PORT=%s' % jmxport )
-        jvmopts = ('-agentlib:jdwp=transport=dt_socket,address=%s,server=y,suspend=n '
-            % ( 8000 + id ) )
-        jvmopts += ' '.join( '-D%s=%s '% ( opt, val + id )
-            for opt, val in self.jvmopts )
-        self.cmd( 'export JVM_OPTS="%s"' % jvmopts )
+        self.cmd( 'export JVM_OPTS="-D%s=%s"' % (
+            self.proctag, id ) )
+        self.cmd( 'export ONOS_PROPS="%s"' % propsFile )
 
     def startONOS( self, id ):
         """Start ONOS
-           id: identifier for new instance"""
-        # self.stopONOS( id )
-        self.setVars( id )
+           id: new instance number"""
+        start = time()
+        self.stopONOS( id )
+        propsFile = self.genProperties( id )
+        self.setVars( id, propsFile )
         self.cmdPrint( 'start-onos.sh startnokill' )
+        # start-onos.sh waits for ONOS startup
+        elapsed = time() - start
+        info( '* ONOS %s started in %.2f seconds\n' % ( id, elapsed ) )
 
     def stopONOS( self, id ):
         """Shut down ONOS
@@ -171,7 +256,7 @@
         for id in self.ids:
             info( '* Stopping ONOS %s\n' % id )
             self.stopONOS( id )
-        info( '* Stopping zookeeper\n' )
+        info( '* Stopping Zookeeper\n' )
         self.stopZookeeper()
         info( '* Stopping Cassandra\n' )
         self.stopCassandra()
@@ -201,16 +286,32 @@
                       'max_backoff=1000' )
 
 
+def waitConnected( switches ):
+    "Wait until all switches connect to controllers"
+    start = time()
+    info( '* Waiting for switches to connect...\n' )
+    for s in switches:
+        info( s )
+        while not s.connected():
+            info( '.' )
+            sleep( 1 )
+        info( ' ' )
+    elapsed = time() - start
+    info( '\n* Connected in %.2f seconds\n' % elapsed )
+
+
 controllers = { 'onos': ONOS }
 switches = { 'ovso': OVSSwitchONOS }
 
 
 if __name__ == '__main__':
-    "Simple test of ONOSController"
+    # Simple test for ONOS() controller class
     setLogLevel( 'info' )
-    net = Mininet( topo=SingleSwitchTopo( 2 ),
+    size = 2 if len( argv ) != 2 else int( argv[ 1 ] )
+    net = Mininet( topo=LinearTopo( size ),
                    controller=partial( ONOS, n=2 ),
                    switch=OVSSwitchONOS )
     net.start()
+    waitConnected( net.switches )
     CLI( net )
     net.stop()