Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 1 | """ |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 2 | Copyright 2017 Open Networking Foundation ( ONF ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 3 | |
| 4 | Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>, |
| 5 | the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>, |
| 6 | or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg> |
| 7 | |
| 8 | TestON is free software: you can redistribute it and/or modify |
| 9 | it under the terms of the GNU General Public License as published by |
| 10 | the Free Software Foundation, either version 2 of the License, or |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 11 | ( at your option ) any later version. |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 12 | |
| 13 | TestON is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | GNU General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License |
| 19 | along with TestON. If not, see <http://www.gnu.org/licenses/>. |
| 20 | """ |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 21 | class SCPFmastershipFailoverLat: |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 22 | |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 23 | def __init__( self ): |
| 24 | self.default = '' |
| 25 | |
| 26 | def CASE0( self, main ): |
| 27 | import os |
| 28 | import imp |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 29 | """ |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 30 | - GIT |
| 31 | - BUILDING ONOS |
| 32 | Pull specific ONOS branch, then Build ONOS ono ONOS Bench. |
| 33 | This step is usually skipped. Because in a Jenkins driven automated |
| 34 | test env. We want Jenkins jobs to pull&build for flexibility to handle |
| 35 | different versions of ONOS. |
| 36 | - Construct tests variables |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 37 | """ |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 38 | try: |
| 39 | from tests.dependencies.ONOSSetup import ONOSSetup |
| 40 | main.testSetUp = ONOSSetup() |
| 41 | except ImportError: |
| 42 | main.log.error( "ONOSSetup not found. exiting the test" ) |
| 43 | main.exit() |
| 44 | main.testSetUp.envSetupDescription() |
| 45 | stepResult = main.FALSE |
| 46 | try: |
| 47 | main.MN1Ip = main.params[ 'MN' ][ 'ip1' ] |
| 48 | main.cellName = main.params[ 'ENV' ][ 'cellName' ] |
| 49 | main.apps = main.params[ 'ENV' ][ 'cellApps' ] |
| 50 | main.scale = ( main.params[ 'SCALE' ] ).split( "," ) |
| 51 | main.ofpRoleRequest = main.params[ 'TSHARK' ][ 'ofpRoleRequest' ] |
| 52 | main.tsharkResultPath = main.params[ 'TSHARK' ][ 'tsharkResultPath' ] |
You Wang | b204392 | 2020-10-28 18:45:24 -0700 | [diff] [blame] | 53 | main.tsharkInterface = main.params[ 'TSHARK' ][ 'tsharkInterface' ] |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 54 | main.sampleSize = int( main.params[ 'TEST' ][ 'sampleSize' ] ) |
| 55 | main.warmUp = int( main.params[ 'TEST' ][ 'warmUp' ] ) |
| 56 | main.dbFileName = main.params[ 'DATABASE' ][ 'dbName' ] |
| 57 | main.maxScale = int( main.params[ 'max' ] ) |
| 58 | main.timeout = int( main.params[ 'TIMEOUT' ][ 'timeout' ] ) |
| 59 | main.MNSleep = int( main.params[ 'SLEEP' ][ 'mininet' ] ) |
| 60 | main.recoverySleep = int( main.params[ 'SLEEP' ][ 'recovery' ] ) |
| 61 | main.debug = main.params[ 'TEST' ][ 'debug' ] |
| 62 | main.failoverSleep = int( main.params[ 'SLEEP' ][ 'failover' ] ) |
| 63 | main.switchID = main.params[ 'SWITCH' ][ 'id' ] |
| 64 | main.topologySwitchCount = main.params[ 'TOPOLOGY' ][ 'switchCount' ] |
| 65 | main.topologyType = main.params[ 'TOPOLOGY' ][ 'type' ] |
| 66 | main.nodeNumToKill = int( main.params[ 'KILL' ][ 'nodeNum' ] ) |
| 67 | main.failPercent = float( main.params[ 'TEST' ][ 'failPercent' ] ) |
| 68 | |
| 69 | if main.debug == "True": |
| 70 | main.debug = True |
| 71 | else: |
| 72 | main.debug = False |
| 73 | |
| 74 | stepResult = main.testSetUp.envSetup() |
| 75 | main.log.info( "Create Database file " + main.dbFileName ) |
| 76 | resultsDB = open( main.dbFileName, "w+" ) |
| 77 | resultsDB.close() |
| 78 | |
| 79 | except Exception as e: |
| 80 | main.testSetUp.envSetupException( e ) |
Jon Hall | aa1d9b8 | 2020-07-30 13:49:42 -0700 | [diff] [blame] | 81 | main.testSetUp.envSetupConclusion( stepResult ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 82 | |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 83 | def CASE1( self, main ): |
| 84 | # Clean up test environment and set up |
| 85 | import time |
You Wang | a0f6ff6 | 2018-01-11 15:46:30 -0800 | [diff] [blame] | 86 | main.testSetUp.ONOSSetUp( main.Cluster, True, |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 87 | cellName=main.cellName, killRemoveMax=False ) |
| 88 | try: |
| 89 | from tests.dependencies.utils import Utils |
| 90 | except ImportError: |
| 91 | main.log.error( "Utils not found exiting the test" ) |
| 92 | main.exit() |
| 93 | try: |
| 94 | main.Utils |
| 95 | except ( NameError, AttributeError ): |
| 96 | main.Utils = Utils() |
| 97 | main.Utils.mininetCleanup( main.Mininet1 ) |
| 98 | |
| 99 | main.step( "Starting up Mininet from command." ) |
| 100 | |
| 101 | mnCmd = " mn " + " --topo " + main.topologyType + "," + main.topologySwitchCount |
| 102 | for ctrl in main.Cluster.active(): |
| 103 | mnCmd += " --controller remote,ip=" + ctrl.ipAddress |
| 104 | |
| 105 | stepResult = main.Mininet1.startNet( mnCmd=mnCmd ) |
| 106 | |
| 107 | utilities.assert_equals( expect=main.TRUE, |
| 108 | actual=stepResult, |
| 109 | onpass="Mininet was set up correctly.", |
| 110 | onfail="Mininet was NOT set up correctly." ) |
| 111 | |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 112 | def CASE2( self, main ): |
| 113 | """ |
| 114 | Kill ONOS node, and measure the latency for INSTANCE_DEACTIVATED, MASTER_CHANGED, and role request |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 115 | ( tshark time ), then bring the node back up. |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 116 | """ |
| 117 | import time |
| 118 | import datetime |
| 119 | import numpy |
| 120 | from tests.HA.dependencies.HA import HA |
| 121 | |
| 122 | main.HA = HA() |
| 123 | |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 124 | main.latencyData = { 'kill_to_deactivation': [], |
| 125 | 'deactivation_to_role_request': [] } |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 126 | |
| 127 | main.failCounter = 0 |
| 128 | passingResult = True |
| 129 | criticalError = False |
| 130 | |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 131 | main.step( "Gathering data starting with " |
| 132 | + str( main.warmUp ) |
| 133 | + " warm ups and a sample size of " |
| 134 | + str( main.sampleSize ) ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 135 | |
| 136 | for iteration in range( 0, main.sampleSize + main.warmUp ): |
| 137 | |
| 138 | main.log.info( "==========================================" ) |
| 139 | main.log.info( "================iteration:{}==============".format( str( iteration + 1 ) ) ) |
| 140 | |
| 141 | ip_address = main.Cluster.active( 0 ).ipAddress |
| 142 | strNodeNumToKill = str( main.nodeNumToKill ) |
| 143 | |
| 144 | main.log.info( "Assigning mastership to ONOS node " + strNodeNumToKill ) |
| 145 | main.Cluster.active( 0 ).CLI.deviceRole( main.switchID, ip_address ) |
| 146 | |
| 147 | main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." ) |
| 148 | time.sleep( main.recoverySleep ) |
| 149 | mastershipCheck = main.Cluster.active( 0 ).CLI.getMaster( main.switchID ) == ip_address |
| 150 | |
| 151 | if not mastershipCheck: |
| 152 | main.log.warn( "Mastership is NOT as expected." ) |
| 153 | |
| 154 | with open( main.tsharkResultPath, "w" ) as tshark: |
| 155 | tshark.write( "" ) |
| 156 | main.log.info( "Starting tshark capture." ) |
You Wang | b204392 | 2020-10-28 18:45:24 -0700 | [diff] [blame] | 157 | main.ONOSbench.tsharkGrep( main.ofpRoleRequest, main.tsharkResultPath, interface=main.tsharkInterface ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 158 | time1 = time.time() * 1000.0 |
| 159 | |
| 160 | # Kill an ONOS node |
| 161 | main.log.info( "Killing ONOS node " + strNodeNumToKill + "." ) |
| 162 | killresult = main.ONOSbench.onosKill( ip_address ) |
| 163 | main.Cluster.runningNodes[ main.nodeNumToKill ].active = False |
| 164 | |
| 165 | # Stop an ONOS node |
| 166 | main.log.info( "Stopping ONOS node " + strNodeNumToKill + "." ) |
| 167 | stopresult = main.ONOSbench.onosStop( ip_address ) |
| 168 | |
| 169 | killStopResult = stopresult == killresult and True |
| 170 | |
| 171 | if not killStopResult: |
| 172 | main.log.error( "ONOS node was NOT successfully stopped and killed." ) |
| 173 | criticalError = True |
| 174 | |
| 175 | time.sleep( main.failoverSleep ) |
| 176 | |
| 177 | # Stop tshark and get times |
| 178 | main.log.info( "Stopping tshark." ) |
| 179 | main.ONOSbench.tsharkStop() |
| 180 | |
| 181 | masterChangedLats = [] |
| 182 | instanceDeactivatedLats = [] |
| 183 | |
| 184 | main.log.info( "Obtaining latencies from 'events' output." ) |
| 185 | for CLInum in range( 0, main.Cluster.numCtrls - 1 ): |
| 186 | eventOutput = main.Cluster.active( CLInum ).CLI.events( args='-a' ).split( "\r\n" ) |
| 187 | for line in reversed( eventOutput ): |
You Wang | 4aa9272 | 2018-07-05 14:34:52 -0700 | [diff] [blame] | 188 | timestamp = line[ :23 ] if line[ 19 ] != '-' else line[ :19 ] + '.000' |
You Wang | ae56416 | 2018-08-22 14:27:38 -0700 | [diff] [blame] | 189 | timestamp = float( datetime.datetime.strptime( timestamp, "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0 |
| 190 | if timestamp - time1 >= 0: |
| 191 | if "INSTANCE_DEACTIVATED" in line: |
| 192 | instanceDeactivatedLats.append( timestamp - time1 ) |
| 193 | elif "MASTER_CHANGED" in line: |
| 194 | masterChangedLats.append( timestamp - time1 ) |
| 195 | else: |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 196 | break |
| 197 | |
You Wang | 96b3f16 | 2018-10-12 11:53:28 -0700 | [diff] [blame] | 198 | if instanceDeactivatedLats and masterChangedLats: |
| 199 | instanceDeactivatedLats.sort() |
| 200 | masterChangedLats.sort() |
| 201 | instanceDeactivated = instanceDeactivatedLats[ 0 ] |
| 202 | masterChanged = masterChangedLats[ 0 ] |
| 203 | eventLatCheck = True |
| 204 | else: |
| 205 | eventLatCheck = False |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 206 | main.log.warn( "Latencies were NOT obtained from 'events' successfully." ) |
| 207 | |
| 208 | main.log.info( "Obtain latency from tshark output." ) |
| 209 | tsharkLatCheck = True |
| 210 | with open( main.tsharkResultPath, "r" ) as resultFile: |
| 211 | resultText = resultFile.readline() |
| 212 | main.log.info( "Capture result: " + resultText ) |
| 213 | resultText = resultText.split() |
| 214 | if len( resultText ) > 1: |
| 215 | roleRequestLat = int( float( resultText[ 1 ] ) * 1000.0 ) - time1 |
| 216 | resultFile.close() |
| 217 | else: |
| 218 | main.log.error( "Tshark output file is NOT as expected." ) |
| 219 | tsharkLatCheck = False |
| 220 | if not tsharkLatCheck: |
| 221 | main.log.warn( "Latency was NOT obtained from tshark successfully." ) |
| 222 | |
| 223 | validDataCheck = False |
You Wang | 25e3bcc | 2019-01-02 16:02:02 -0800 | [diff] [blame] | 224 | if eventLatCheck and tsharkLatCheck: |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 225 | main.log.info( "instanceDeactivated: " + str( instanceDeactivated ) ) |
You Wang | ae56416 | 2018-08-22 14:27:38 -0700 | [diff] [blame] | 226 | main.log.info( "masterChanged: " + str( masterChanged ) ) |
| 227 | main.log.info( "roleRequestLat: " + str( roleRequestLat ) ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 228 | if iteration >= main.warmUp: |
| 229 | main.log.info( "Verifying that the data are valid." ) # Don't record data during a warm-up |
You Wang | ae56416 | 2018-08-22 14:27:38 -0700 | [diff] [blame] | 230 | validDataCheck = roleRequestLat >= 0 and \ |
| 231 | instanceDeactivated >= 0 and \ |
| 232 | masterChanged >= 0 |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 233 | if not validDataCheck: |
| 234 | main.log.warn( "Data are NOT valid." ) |
| 235 | |
| 236 | if eventLatCheck and tsharkLatCheck and validDataCheck: |
| 237 | main.log.info( "Saving data..." ) |
You Wang | ae56416 | 2018-08-22 14:27:38 -0700 | [diff] [blame] | 238 | if roleRequestLat >= instanceDeactivated: |
| 239 | main.latencyData[ 'kill_to_deactivation' ].append( instanceDeactivated ) |
| 240 | main.latencyData[ 'deactivation_to_role_request' ].append( roleRequestLat - instanceDeactivated ) |
| 241 | else: |
| 242 | main.latencyData[ 'kill_to_deactivation' ].append( roleRequestLat ) |
| 243 | main.latencyData[ 'deactivation_to_role_request' ].append( 0 ) |
| 244 | main.log.info( "kill_to_deactivation: " + str( main.latencyData[ 'kill_to_deactivation' ][ -1 ] ) ) |
| 245 | main.log.info( "deactivation_to_role_request: " + str( main.latencyData[ 'deactivation_to_role_request' ][ -1 ] ) ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 246 | |
| 247 | # Restart ONOS node |
| 248 | main.log.info( "Restart ONOS node " + strNodeNumToKill + " and checking status of restart." ) |
| 249 | startResult = main.ONOSbench.onosStart( ip_address ) |
| 250 | |
| 251 | if not startResult: |
| 252 | main.log.error( "ONOS nodes NOT successfully started." ) |
| 253 | criticalError = True |
| 254 | |
| 255 | # Check if ONOS is up yet |
| 256 | main.log.info( "Checking if ONOS node " + strNodeNumToKill + " is up." ) |
| 257 | upResult = main.ONOSbench.isup( ip_address ) |
| 258 | |
| 259 | if not upResult: |
| 260 | main.log.error( "ONOS did NOT successfully restart." ) |
| 261 | criticalError = True |
| 262 | |
| 263 | # Restart CLI |
| 264 | main.log.info( "Restarting ONOS node " + strNodeNumToKill + "'s main.CLI." ) |
| 265 | cliResult = main.Cluster.active( main.nodeNumToKill ).CLI.startOnosCli( ip_address ) |
| 266 | main.Cluster.runningNodes[ main.nodeNumToKill ] .active = True |
| 267 | |
| 268 | if not cliResult: |
| 269 | main.log.error( "ONOS CLI did NOT successfully restart." ) |
| 270 | criticalError = True |
| 271 | |
| 272 | main.log.info( "Checking ONOS nodes." ) |
Devin Lim | 3ebd5e7 | 2017-11-14 10:38:00 -0800 | [diff] [blame] | 273 | nodeResults = utilities.retry( main.Cluster.nodesCheck, |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 274 | False, |
You Wang | 25e3bcc | 2019-01-02 16:02:02 -0800 | [diff] [blame] | 275 | sleep=5, |
| 276 | attempts=50 ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 277 | |
| 278 | if not nodeResults: |
| 279 | main.log.error( "Nodes check NOT successful." ) |
| 280 | criticalError = True |
| 281 | |
| 282 | main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." ) |
| 283 | time.sleep( main.recoverySleep ) |
| 284 | |
| 285 | if not ( mastershipCheck and |
| 286 | eventLatCheck and |
| 287 | tsharkLatCheck and |
| 288 | validDataCheck ) and \ |
| 289 | iteration >= main.warmUp: |
| 290 | main.failCounter += 1 |
| 291 | main.log.warn( "Iteration failed. Failure count: " + str( main.failCounter ) ) |
| 292 | if float( main.failCounter ) / float( main.sampleSize ) >= main.failPercent or criticalError: |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 293 | main.log.error( str( main.failPercent * 100 ) |
| 294 | + "% or more of data is invalid, or a critical error has occurred." ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 295 | passingResult = False |
| 296 | break |
| 297 | |
| 298 | utilities.assert_equals( expect=True, actual=passingResult, |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 299 | onpass="Node scaling " |
| 300 | + str( main.Cluster.numCtrls ) |
| 301 | + " data gathering was successful.", |
| 302 | onfail="Node scaling " |
| 303 | + str( main.Cluster.numCtrls ) |
| 304 | + " data gathering FAILED. Stopping test." ) |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 305 | if not passingResult: |
| 306 | main.cleanAndExit() |
| 307 | |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 308 | def CASE3( self, main ): |
| 309 | """ |
| 310 | Write results to database file. |
| 311 | Omit this case if you don't want to write to database. |
| 312 | """ |
| 313 | import numpy |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 314 | result = { 'avg': {}, 'stddev': {} } |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 315 | |
| 316 | for i in main.latencyData: |
| 317 | result[ 'avg' ][ i ] = numpy.average( main.latencyData[ i ] ) |
| 318 | result[ 'stddev' ][ i ] = numpy.std( main.latencyData[ i ] ) |
| 319 | |
| 320 | main.log.info( "result: " + str( result ) ) |
| 321 | with open( main.dbFileName, "a" ) as dbFile: |
| 322 | strToWrite = str( main.Cluster.numCtrls ) + ",'baremetal1'" |
Jeremy Ronquillo | 23fb216 | 2017-09-15 14:59:57 -0700 | [diff] [blame] | 323 | strToWrite += ",'" + main.commit.split()[1] + "'" |
Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 324 | for i in result: |
| 325 | for j in result[ i ]: |
| 326 | strToWrite += "," + str( result[ i ][ j ] ) |
| 327 | strToWrite += "\n" |
| 328 | dbFile.write( strToWrite ) |
| 329 | dbFile.close() |