Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 1 | """ |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 2 | Copyright 2017 Open Networking Foundation ( ONF ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 3 | |
| 4 | Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>, |
| 5 | the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>, |
| 6 | or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg> |
| 7 | |
| 8 | TestON is free software: you can redistribute it and/or modify |
| 9 | it under the terms of the GNU General Public License as published by |
| 10 | the Free Software Foundation, either version 2 of the License, or |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 11 | ( at your option ) any later version. |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 12 | |
| 13 | TestON is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | GNU General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License |
| 19 | along with TestON. If not, see <http://www.gnu.org/licenses/>. |
| 20 | """ |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 21 | class SCPFmastershipFailoverLat: |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 22 | |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 23 | def __init__( self ): |
| 24 | self.default = '' |
| 25 | |
| 26 | def CASE0( self, main ): |
| 27 | import os |
| 28 | import imp |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 29 | """ |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 30 | - GIT |
| 31 | - BUILDING ONOS |
| 32 | Pull specific ONOS branch, then Build ONOS ono ONOS Bench. |
| 33 | This step is usually skipped. Because in a Jenkins driven automated |
| 34 | test env. We want Jenkins jobs to pull&build for flexibility to handle |
| 35 | different versions of ONOS. |
| 36 | - Construct tests variables |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 37 | """ |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 38 | try: |
| 39 | from tests.dependencies.ONOSSetup import ONOSSetup |
| 40 | main.testSetUp = ONOSSetup() |
| 41 | except ImportError: |
| 42 | main.log.error( "ONOSSetup not found. exiting the test" ) |
| 43 | main.exit() |
| 44 | main.testSetUp.envSetupDescription() |
| 45 | stepResult = main.FALSE |
| 46 | try: |
| 47 | main.MN1Ip = main.params[ 'MN' ][ 'ip1' ] |
| 48 | main.cellName = main.params[ 'ENV' ][ 'cellName' ] |
| 49 | main.apps = main.params[ 'ENV' ][ 'cellApps' ] |
| 50 | main.scale = ( main.params[ 'SCALE' ] ).split( "," ) |
| 51 | main.ofpRoleRequest = main.params[ 'TSHARK' ][ 'ofpRoleRequest' ] |
| 52 | main.tsharkResultPath = main.params[ 'TSHARK' ][ 'tsharkResultPath' ] |
| 53 | main.sampleSize = int( main.params[ 'TEST' ][ 'sampleSize' ] ) |
| 54 | main.warmUp = int( main.params[ 'TEST' ][ 'warmUp' ] ) |
| 55 | main.dbFileName = main.params[ 'DATABASE' ][ 'dbName' ] |
| 56 | main.maxScale = int( main.params[ 'max' ] ) |
| 57 | main.timeout = int( main.params[ 'TIMEOUT' ][ 'timeout' ] ) |
| 58 | main.MNSleep = int( main.params[ 'SLEEP' ][ 'mininet' ] ) |
| 59 | main.recoverySleep = int( main.params[ 'SLEEP' ][ 'recovery' ] ) |
| 60 | main.debug = main.params[ 'TEST' ][ 'debug' ] |
| 61 | main.failoverSleep = int( main.params[ 'SLEEP' ][ 'failover' ] ) |
| 62 | main.switchID = main.params[ 'SWITCH' ][ 'id' ] |
| 63 | main.topologySwitchCount = main.params[ 'TOPOLOGY' ][ 'switchCount' ] |
| 64 | main.topologyType = main.params[ 'TOPOLOGY' ][ 'type' ] |
| 65 | main.nodeNumToKill = int( main.params[ 'KILL' ][ 'nodeNum' ] ) |
| 66 | main.failPercent = float( main.params[ 'TEST' ][ 'failPercent' ] ) |
| 67 | |
| 68 | if main.debug == "True": |
| 69 | main.debug = True |
| 70 | else: |
| 71 | main.debug = False |
| 72 | |
| 73 | stepResult = main.testSetUp.envSetup() |
| 74 | main.log.info( "Create Database file " + main.dbFileName ) |
| 75 | resultsDB = open( main.dbFileName, "w+" ) |
| 76 | resultsDB.close() |
| 77 | |
| 78 | except Exception as e: |
| 79 | main.testSetUp.envSetupException( e ) |
| 80 | main.testSetUp.evnSetupConclusion( stepResult ) |
| 81 | |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 82 | def CASE1( self, main ): |
| 83 | # Clean up test environment and set up |
| 84 | import time |
You Wang | a0f6ff6 | 2018-01-11 15:46:30 -0800 | [diff] [blame] | 85 | main.testSetUp.ONOSSetUp( main.Cluster, True, |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 86 | cellName=main.cellName, killRemoveMax=False ) |
| 87 | try: |
| 88 | from tests.dependencies.utils import Utils |
| 89 | except ImportError: |
| 90 | main.log.error( "Utils not found exiting the test" ) |
| 91 | main.exit() |
| 92 | try: |
| 93 | main.Utils |
| 94 | except ( NameError, AttributeError ): |
| 95 | main.Utils = Utils() |
| 96 | main.Utils.mininetCleanup( main.Mininet1 ) |
| 97 | |
| 98 | main.step( "Starting up Mininet from command." ) |
| 99 | |
| 100 | mnCmd = " mn " + " --topo " + main.topologyType + "," + main.topologySwitchCount |
| 101 | for ctrl in main.Cluster.active(): |
| 102 | mnCmd += " --controller remote,ip=" + ctrl.ipAddress |
| 103 | |
| 104 | stepResult = main.Mininet1.startNet( mnCmd=mnCmd ) |
| 105 | |
| 106 | utilities.assert_equals( expect=main.TRUE, |
| 107 | actual=stepResult, |
| 108 | onpass="Mininet was set up correctly.", |
| 109 | onfail="Mininet was NOT set up correctly." ) |
| 110 | |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 111 | def CASE2( self, main ): |
| 112 | """ |
| 113 | Kill ONOS node, and measure the latency for INSTANCE_DEACTIVATED, MASTER_CHANGED, and role request |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 114 | ( tshark time ), then bring the node back up. |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 115 | """ |
| 116 | import time |
| 117 | import datetime |
| 118 | import numpy |
| 119 | from tests.HA.dependencies.HA import HA |
| 120 | |
| 121 | main.HA = HA() |
| 122 | |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 123 | main.latencyData = { 'kill_to_deactivation': [], |
| 124 | 'deactivation_to_role_request': [] } |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 125 | |
| 126 | main.failCounter = 0 |
| 127 | passingResult = True |
| 128 | criticalError = False |
| 129 | |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 130 | main.step( "Gathering data starting with " |
| 131 | + str( main.warmUp ) |
| 132 | + " warm ups and a sample size of " |
| 133 | + str( main.sampleSize ) ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 134 | |
| 135 | for iteration in range( 0, main.sampleSize + main.warmUp ): |
| 136 | |
| 137 | main.log.info( "==========================================" ) |
| 138 | main.log.info( "================iteration:{}==============".format( str( iteration + 1 ) ) ) |
| 139 | |
| 140 | ip_address = main.Cluster.active( 0 ).ipAddress |
| 141 | strNodeNumToKill = str( main.nodeNumToKill ) |
| 142 | |
| 143 | main.log.info( "Assigning mastership to ONOS node " + strNodeNumToKill ) |
| 144 | main.Cluster.active( 0 ).CLI.deviceRole( main.switchID, ip_address ) |
| 145 | |
| 146 | main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." ) |
| 147 | time.sleep( main.recoverySleep ) |
| 148 | mastershipCheck = main.Cluster.active( 0 ).CLI.getMaster( main.switchID ) == ip_address |
| 149 | |
| 150 | if not mastershipCheck: |
| 151 | main.log.warn( "Mastership is NOT as expected." ) |
| 152 | |
| 153 | with open( main.tsharkResultPath, "w" ) as tshark: |
| 154 | tshark.write( "" ) |
| 155 | main.log.info( "Starting tshark capture." ) |
| 156 | main.ONOSbench.tsharkGrep( main.ofpRoleRequest, main.tsharkResultPath ) |
| 157 | time1 = time.time() * 1000.0 |
| 158 | |
| 159 | # Kill an ONOS node |
| 160 | main.log.info( "Killing ONOS node " + strNodeNumToKill + "." ) |
| 161 | killresult = main.ONOSbench.onosKill( ip_address ) |
| 162 | main.Cluster.runningNodes[ main.nodeNumToKill ].active = False |
| 163 | |
| 164 | # Stop an ONOS node |
| 165 | main.log.info( "Stopping ONOS node " + strNodeNumToKill + "." ) |
| 166 | stopresult = main.ONOSbench.onosStop( ip_address ) |
| 167 | |
| 168 | killStopResult = stopresult == killresult and True |
| 169 | |
| 170 | if not killStopResult: |
| 171 | main.log.error( "ONOS node was NOT successfully stopped and killed." ) |
| 172 | criticalError = True |
| 173 | |
| 174 | time.sleep( main.failoverSleep ) |
| 175 | |
| 176 | # Stop tshark and get times |
| 177 | main.log.info( "Stopping tshark." ) |
| 178 | main.ONOSbench.tsharkStop() |
| 179 | |
| 180 | masterChangedLats = [] |
| 181 | instanceDeactivatedLats = [] |
| 182 | |
| 183 | main.log.info( "Obtaining latencies from 'events' output." ) |
| 184 | for CLInum in range( 0, main.Cluster.numCtrls - 1 ): |
| 185 | eventOutput = main.Cluster.active( CLInum ).CLI.events( args='-a' ).split( "\r\n" ) |
| 186 | for line in reversed( eventOutput ): |
| 187 | if "INSTANCE_DEACTIVATED" in line and len( instanceDeactivatedLats ) == CLInum: |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 188 | deactivateTime = float( datetime.datetime.strptime( |
Jeremy Ronquillo | 3f279e4 | 2017-11-03 10:17:01 -0700 | [diff] [blame] | 189 | line.split()[ 0 ][ : 23 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0 |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 190 | instanceDeactivatedLats.append( deactivateTime - time1 ) |
| 191 | elif "MASTER_CHANGED" in line and len( masterChangedLats ) == CLInum: |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 192 | changedTime = float( datetime.datetime.strptime( |
Jeremy Ronquillo | 3f279e4 | 2017-11-03 10:17:01 -0700 | [diff] [blame] | 193 | line.split()[ 0 ][ : 23 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0 |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 194 | masterChangedLats.append( changedTime - time1 ) |
| 195 | if len( instanceDeactivatedLats ) > CLInum and len( masterChangedLats ) > CLInum: |
| 196 | break |
| 197 | |
| 198 | instanceDeactivatedLats.sort() |
| 199 | instanceDeactivated = instanceDeactivatedLats[ 0 ] |
| 200 | |
| 201 | eventLatCheck = True if masterChangedLats and instanceDeactivated else False |
| 202 | if not eventLatCheck: |
| 203 | main.log.warn( "Latencies were NOT obtained from 'events' successfully." ) |
| 204 | |
| 205 | main.log.info( "Obtain latency from tshark output." ) |
| 206 | tsharkLatCheck = True |
| 207 | with open( main.tsharkResultPath, "r" ) as resultFile: |
| 208 | resultText = resultFile.readline() |
| 209 | main.log.info( "Capture result: " + resultText ) |
| 210 | resultText = resultText.split() |
| 211 | if len( resultText ) > 1: |
| 212 | roleRequestLat = int( float( resultText[ 1 ] ) * 1000.0 ) - time1 |
| 213 | resultFile.close() |
| 214 | else: |
| 215 | main.log.error( "Tshark output file is NOT as expected." ) |
| 216 | tsharkLatCheck = False |
| 217 | if not tsharkLatCheck: |
| 218 | main.log.warn( "Latency was NOT obtained from tshark successfully." ) |
| 219 | |
| 220 | validDataCheck = False |
| 221 | if tsharkLatCheck: |
| 222 | main.log.info( "instanceDeactivated: " + str( instanceDeactivated ) ) |
| 223 | main.log.info( "roleRequestLat - instanceDeactivated: " + str( roleRequestLat - instanceDeactivated ) ) |
| 224 | if iteration >= main.warmUp: |
| 225 | main.log.info( "Verifying that the data are valid." ) # Don't record data during a warm-up |
| 226 | validDataCheck = roleRequestLat - instanceDeactivated >= 0 and \ |
| 227 | instanceDeactivated >= 0 |
| 228 | if not validDataCheck: |
| 229 | main.log.warn( "Data are NOT valid." ) |
| 230 | |
| 231 | if eventLatCheck and tsharkLatCheck and validDataCheck: |
| 232 | main.log.info( "Saving data..." ) |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 233 | main.latencyData[ 'kill_to_deactivation' ]\ |
| 234 | .append( instanceDeactivated ) |
| 235 | main.latencyData[ 'deactivation_to_role_request' ]\ |
| 236 | .append( roleRequestLat - instanceDeactivated ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 237 | |
| 238 | # Restart ONOS node |
| 239 | main.log.info( "Restart ONOS node " + strNodeNumToKill + " and checking status of restart." ) |
| 240 | startResult = main.ONOSbench.onosStart( ip_address ) |
| 241 | |
| 242 | if not startResult: |
| 243 | main.log.error( "ONOS nodes NOT successfully started." ) |
| 244 | criticalError = True |
| 245 | |
| 246 | # Check if ONOS is up yet |
| 247 | main.log.info( "Checking if ONOS node " + strNodeNumToKill + " is up." ) |
| 248 | upResult = main.ONOSbench.isup( ip_address ) |
| 249 | |
| 250 | if not upResult: |
| 251 | main.log.error( "ONOS did NOT successfully restart." ) |
| 252 | criticalError = True |
| 253 | |
| 254 | # Restart CLI |
| 255 | main.log.info( "Restarting ONOS node " + strNodeNumToKill + "'s main.CLI." ) |
| 256 | cliResult = main.Cluster.active( main.nodeNumToKill ).CLI.startOnosCli( ip_address ) |
| 257 | main.Cluster.runningNodes[ main.nodeNumToKill ] .active = True |
| 258 | |
| 259 | if not cliResult: |
| 260 | main.log.error( "ONOS CLI did NOT successfully restart." ) |
| 261 | criticalError = True |
| 262 | |
| 263 | main.log.info( "Checking ONOS nodes." ) |
Devin Lim | 3ebd5e7 | 2017-11-14 10:38:00 -0800 | [diff] [blame] | 264 | nodeResults = utilities.retry( main.Cluster.nodesCheck, |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 265 | False, |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 266 | sleep=1, |
| 267 | attempts=3 ) |
| 268 | |
| 269 | if not nodeResults: |
| 270 | main.log.error( "Nodes check NOT successful." ) |
| 271 | criticalError = True |
| 272 | |
| 273 | main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." ) |
| 274 | time.sleep( main.recoverySleep ) |
| 275 | |
| 276 | if not ( mastershipCheck and |
| 277 | eventLatCheck and |
| 278 | tsharkLatCheck and |
| 279 | validDataCheck ) and \ |
| 280 | iteration >= main.warmUp: |
| 281 | main.failCounter += 1 |
| 282 | main.log.warn( "Iteration failed. Failure count: " + str( main.failCounter ) ) |
| 283 | if float( main.failCounter ) / float( main.sampleSize ) >= main.failPercent or criticalError: |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 284 | main.log.error( str( main.failPercent * 100 ) |
| 285 | + "% or more of data is invalid, or a critical error has occurred." ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 286 | passingResult = False |
| 287 | break |
| 288 | |
| 289 | utilities.assert_equals( expect=True, actual=passingResult, |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 290 | onpass="Node scaling " |
| 291 | + str( main.Cluster.numCtrls ) |
| 292 | + " data gathering was successful.", |
| 293 | onfail="Node scaling " |
| 294 | + str( main.Cluster.numCtrls ) |
| 295 | + " data gathering FAILED. Stopping test." ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 296 | if not passingResult: |
| 297 | main.cleanAndExit() |
| 298 | |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 299 | def CASE3( self, main ): |
| 300 | """ |
| 301 | Write results to database file. |
| 302 | Omit this case if you don't want to write to database. |
| 303 | """ |
| 304 | import numpy |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 305 | result = { 'avg': {}, 'stddev': {} } |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 306 | |
| 307 | for i in main.latencyData: |
| 308 | result[ 'avg' ][ i ] = numpy.average( main.latencyData[ i ] ) |
| 309 | result[ 'stddev' ][ i ] = numpy.std( main.latencyData[ i ] ) |
| 310 | |
| 311 | main.log.info( "result: " + str( result ) ) |
| 312 | with open( main.dbFileName, "a" ) as dbFile: |
| 313 | strToWrite = str( main.Cluster.numCtrls ) + ",'baremetal1'" |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 314 | strToWrite += ",'" + main.commit.split()[1] + "'" |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 315 | for i in result: |
| 316 | for j in result[ i ]: |
| 317 | strToWrite += "," + str( result[ i ][ j ] ) |
| 318 | strToWrite += "\n" |
| 319 | dbFile.write( strToWrite ) |
| 320 | dbFile.close() |