Jeremy Ronquillo | 818bc7c | 2017-08-09 17:14:53 +0000 | [diff] [blame] | 1 | """ |
| 2 | Copyright 2017 Open Networking Foundation (ONF) |
| 3 | |
| 4 | Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>, |
| 5 | the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>, |
| 6 | or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg> |
| 7 | |
| 8 | TestON is free software: you can redistribute it and/or modify |
| 9 | it under the terms of the GNU General Public License as published by |
| 10 | the Free Software Foundation, either version 2 of the License, or |
| 11 | (at your option) any later version. |
| 12 | |
| 13 | TestON is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | GNU General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License |
| 19 | along with TestON. If not, see <http://www.gnu.org/licenses/>. |
| 20 | """ |
| 21 | |
| 22 | class SCPFmastershipFailoverLat: |
| 23 | def __init__( self ): |
| 24 | self.default = '' |
| 25 | |
| 26 | def CASE0( self, main ): |
| 27 | import os |
| 28 | import imp |
| 29 | ''' |
| 30 | - GIT |
| 31 | - BUILDING ONOS |
| 32 | Pull specific ONOS branch, then Build ONOS ono ONOS Bench. |
| 33 | This step is usually skipped. Because in a Jenkins driven automated |
| 34 | test env. We want Jenkins jobs to pull&build for flexibility to handle |
| 35 | different versions of ONOS. |
| 36 | - Construct tests variables |
| 37 | ''' |
| 38 | try: |
| 39 | from tests.dependencies.ONOSSetup import ONOSSetup |
| 40 | main.testSetUp = ONOSSetup() |
| 41 | except ImportError: |
| 42 | main.log.error( "ONOSSetup not found. exiting the test" ) |
| 43 | main.exit() |
| 44 | main.testSetUp.envSetupDescription() |
| 45 | stepResult = main.FALSE |
| 46 | try: |
| 47 | main.MN1Ip = main.params[ 'MN' ][ 'ip1' ] |
| 48 | main.cellName = main.params[ 'ENV' ][ 'cellName' ] |
| 49 | main.apps = main.params[ 'ENV' ][ 'cellApps' ] |
| 50 | main.scale = ( main.params[ 'SCALE' ] ).split( "," ) |
| 51 | main.ofpRoleRequest = main.params[ 'TSHARK' ][ 'ofpRoleRequest' ] |
| 52 | main.tsharkResultPath = main.params[ 'TSHARK' ][ 'tsharkResultPath' ] |
| 53 | main.sampleSize = int( main.params[ 'TEST' ][ 'sampleSize' ] ) |
| 54 | main.warmUp = int( main.params[ 'TEST' ][ 'warmUp' ] ) |
| 55 | main.dbFileName = main.params[ 'DATABASE' ][ 'dbName' ] |
| 56 | main.maxScale = int( main.params[ 'max' ] ) |
| 57 | main.timeout = int( main.params[ 'TIMEOUT' ][ 'timeout' ] ) |
| 58 | main.MNSleep = int( main.params[ 'SLEEP' ][ 'mininet' ] ) |
| 59 | main.recoverySleep = int( main.params[ 'SLEEP' ][ 'recovery' ] ) |
| 60 | main.debug = main.params[ 'TEST' ][ 'debug' ] |
| 61 | main.failoverSleep = int( main.params[ 'SLEEP' ][ 'failover' ] ) |
| 62 | main.switchID = main.params[ 'SWITCH' ][ 'id' ] |
| 63 | main.topologySwitchCount = main.params[ 'TOPOLOGY' ][ 'switchCount' ] |
| 64 | main.topologyType = main.params[ 'TOPOLOGY' ][ 'type' ] |
| 65 | main.nodeNumToKill = int( main.params[ 'KILL' ][ 'nodeNum' ] ) |
| 66 | main.failPercent = float( main.params[ 'TEST' ][ 'failPercent' ] ) |
| 67 | |
| 68 | if main.debug == "True": |
| 69 | main.debug = True |
| 70 | else: |
| 71 | main.debug = False |
| 72 | |
| 73 | stepResult = main.testSetUp.envSetup() |
| 74 | main.log.info( "Create Database file " + main.dbFileName ) |
| 75 | resultsDB = open( main.dbFileName, "w+" ) |
| 76 | resultsDB.close() |
| 77 | |
| 78 | except Exception as e: |
| 79 | main.testSetUp.envSetupException( e ) |
| 80 | main.testSetUp.evnSetupConclusion( stepResult ) |
| 81 | |
| 82 | |
| 83 | |
| 84 | def CASE1( self, main ): |
| 85 | # Clean up test environment and set up |
| 86 | import time |
| 87 | main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, True, |
| 88 | cellName=main.cellName, killRemoveMax=False ) |
| 89 | try: |
| 90 | from tests.dependencies.utils import Utils |
| 91 | except ImportError: |
| 92 | main.log.error( "Utils not found exiting the test" ) |
| 93 | main.exit() |
| 94 | try: |
| 95 | main.Utils |
| 96 | except ( NameError, AttributeError ): |
| 97 | main.Utils = Utils() |
| 98 | main.Utils.mininetCleanup( main.Mininet1 ) |
| 99 | |
| 100 | main.step( "Starting up Mininet from command." ) |
| 101 | |
| 102 | mnCmd = " mn " + " --topo " + main.topologyType + "," + main.topologySwitchCount |
| 103 | for ctrl in main.Cluster.active(): |
| 104 | mnCmd += " --controller remote,ip=" + ctrl.ipAddress |
| 105 | |
| 106 | stepResult = main.Mininet1.startNet( mnCmd=mnCmd ) |
| 107 | |
| 108 | utilities.assert_equals( expect=main.TRUE, |
| 109 | actual=stepResult, |
| 110 | onpass="Mininet was set up correctly.", |
| 111 | onfail="Mininet was NOT set up correctly." ) |
| 112 | |
| 113 | |
| 114 | def CASE2( self, main ): |
| 115 | """ |
| 116 | Kill ONOS node, and measure the latency for INSTANCE_DEACTIVATED, MASTER_CHANGED, and role request |
| 117 | (tshark time), then bring the node back up. |
| 118 | """ |
| 119 | import time |
| 120 | import datetime |
| 121 | import numpy |
| 122 | from tests.HA.dependencies.HA import HA |
| 123 | |
| 124 | main.HA = HA() |
| 125 | |
| 126 | main.latencyData = { 'kill_to_deactivation' : [], |
| 127 | 'deactivation_to_role_request' : [] } |
| 128 | |
| 129 | main.failCounter = 0 |
| 130 | passingResult = True |
| 131 | criticalError = False |
| 132 | |
| 133 | main.step( "Gathering data starting with " + str( main.warmUp ) + " warm ups and a sample size of " + str( main.sampleSize ) ) |
| 134 | |
| 135 | for iteration in range( 0, main.sampleSize + main.warmUp ): |
| 136 | |
| 137 | main.log.info( "==========================================" ) |
| 138 | main.log.info( "================iteration:{}==============".format( str( iteration + 1 ) ) ) |
| 139 | |
| 140 | ip_address = main.Cluster.active( 0 ).ipAddress |
| 141 | strNodeNumToKill = str( main.nodeNumToKill ) |
| 142 | |
| 143 | main.log.info( "Assigning mastership to ONOS node " + strNodeNumToKill ) |
| 144 | main.Cluster.active( 0 ).CLI.deviceRole( main.switchID, ip_address ) |
| 145 | |
| 146 | main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." ) |
| 147 | time.sleep( main.recoverySleep ) |
| 148 | mastershipCheck = main.Cluster.active( 0 ).CLI.getMaster( main.switchID ) == ip_address |
| 149 | |
| 150 | if not mastershipCheck: |
| 151 | main.log.warn( "Mastership is NOT as expected." ) |
| 152 | |
| 153 | with open( main.tsharkResultPath, "w" ) as tshark: |
| 154 | tshark.write( "" ) |
| 155 | main.log.info( "Starting tshark capture." ) |
| 156 | main.ONOSbench.tsharkGrep( main.ofpRoleRequest, main.tsharkResultPath ) |
| 157 | time1 = time.time() * 1000.0 |
| 158 | |
| 159 | # Kill an ONOS node |
| 160 | main.log.info( "Killing ONOS node " + strNodeNumToKill + "." ) |
| 161 | killresult = main.ONOSbench.onosKill( ip_address ) |
| 162 | main.Cluster.runningNodes[ main.nodeNumToKill ].active = False |
| 163 | |
| 164 | # Stop an ONOS node |
| 165 | main.log.info( "Stopping ONOS node " + strNodeNumToKill + "." ) |
| 166 | stopresult = main.ONOSbench.onosStop( ip_address ) |
| 167 | |
| 168 | killStopResult = stopresult == killresult and True |
| 169 | |
| 170 | if not killStopResult: |
| 171 | main.log.error( "ONOS node was NOT successfully stopped and killed." ) |
| 172 | criticalError = True |
| 173 | |
| 174 | time.sleep( main.failoverSleep ) |
| 175 | |
| 176 | # Stop tshark and get times |
| 177 | main.log.info( "Stopping tshark." ) |
| 178 | main.ONOSbench.tsharkStop() |
| 179 | |
| 180 | masterChangedLats = [] |
| 181 | instanceDeactivatedLats = [] |
| 182 | |
| 183 | main.log.info( "Obtaining latencies from 'events' output." ) |
| 184 | for CLInum in range( 0, main.Cluster.numCtrls - 1 ): |
| 185 | eventOutput = main.Cluster.active( CLInum ).CLI.events( args='-a' ).split( "\r\n" ) |
| 186 | for line in reversed( eventOutput ): |
| 187 | if "INSTANCE_DEACTIVATED" in line and len( instanceDeactivatedLats ) == CLInum: |
| 188 | deactivateTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0 |
| 189 | instanceDeactivatedLats.append( deactivateTime - time1 ) |
| 190 | elif "MASTER_CHANGED" in line and len( masterChangedLats ) == CLInum: |
| 191 | changedTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0 |
| 192 | masterChangedLats.append( changedTime - time1 ) |
| 193 | if len( instanceDeactivatedLats ) > CLInum and len( masterChangedLats ) > CLInum: |
| 194 | break |
| 195 | |
| 196 | instanceDeactivatedLats.sort() |
| 197 | instanceDeactivated = instanceDeactivatedLats[ 0 ] |
| 198 | |
| 199 | eventLatCheck = True if masterChangedLats and instanceDeactivated else False |
| 200 | if not eventLatCheck: |
| 201 | main.log.warn( "Latencies were NOT obtained from 'events' successfully." ) |
| 202 | |
| 203 | main.log.info( "Obtain latency from tshark output." ) |
| 204 | tsharkLatCheck = True |
| 205 | with open( main.tsharkResultPath, "r" ) as resultFile: |
| 206 | resultText = resultFile.readline() |
| 207 | main.log.info( "Capture result: " + resultText ) |
| 208 | resultText = resultText.split() |
| 209 | if len( resultText ) > 1: |
| 210 | roleRequestLat = int( float( resultText[ 1 ] ) * 1000.0 ) - time1 |
| 211 | resultFile.close() |
| 212 | else: |
| 213 | main.log.error( "Tshark output file is NOT as expected." ) |
| 214 | tsharkLatCheck = False |
| 215 | if not tsharkLatCheck: |
| 216 | main.log.warn( "Latency was NOT obtained from tshark successfully." ) |
| 217 | |
| 218 | validDataCheck = False |
| 219 | if tsharkLatCheck: |
| 220 | main.log.info( "instanceDeactivated: " + str( instanceDeactivated ) ) |
| 221 | main.log.info( "roleRequestLat - instanceDeactivated: " + str( roleRequestLat - instanceDeactivated ) ) |
| 222 | if iteration >= main.warmUp: |
| 223 | main.log.info( "Verifying that the data are valid." ) # Don't record data during a warm-up |
| 224 | validDataCheck = roleRequestLat - instanceDeactivated >= 0 and \ |
| 225 | instanceDeactivated >= 0 |
| 226 | if not validDataCheck: |
| 227 | main.log.warn( "Data are NOT valid." ) |
| 228 | |
| 229 | if eventLatCheck and tsharkLatCheck and validDataCheck: |
| 230 | main.log.info( "Saving data..." ) |
| 231 | main.latencyData[ 'kill_to_deactivation' ].append( instanceDeactivated ) |
| 232 | main.latencyData[ 'deactivation_to_role_request' ].append( roleRequestLat - instanceDeactivated ) |
| 233 | |
| 234 | |
| 235 | # Restart ONOS node |
| 236 | main.log.info( "Restart ONOS node " + strNodeNumToKill + " and checking status of restart." ) |
| 237 | startResult = main.ONOSbench.onosStart( ip_address ) |
| 238 | |
| 239 | if not startResult: |
| 240 | main.log.error( "ONOS nodes NOT successfully started." ) |
| 241 | criticalError = True |
| 242 | |
| 243 | # Check if ONOS is up yet |
| 244 | main.log.info( "Checking if ONOS node " + strNodeNumToKill + " is up." ) |
| 245 | upResult = main.ONOSbench.isup( ip_address ) |
| 246 | |
| 247 | if not upResult: |
| 248 | main.log.error( "ONOS did NOT successfully restart." ) |
| 249 | criticalError = True |
| 250 | |
| 251 | # Restart CLI |
| 252 | main.log.info( "Restarting ONOS node " + strNodeNumToKill + "'s main.CLI." ) |
| 253 | cliResult = main.Cluster.active( main.nodeNumToKill ).CLI.startOnosCli( ip_address ) |
| 254 | main.Cluster.runningNodes[ main.nodeNumToKill ] .active = True |
| 255 | |
| 256 | if not cliResult: |
| 257 | main.log.error( "ONOS CLI did NOT successfully restart." ) |
| 258 | criticalError = True |
| 259 | |
| 260 | main.log.info( "Checking ONOS nodes." ) |
| 261 | nodeResults = utilities.retry( main.HA.nodesCheck, |
| 262 | False, |
| 263 | args=[ main.Cluster.active() ], |
| 264 | sleep=1, |
| 265 | attempts=3 ) |
| 266 | |
| 267 | if not nodeResults: |
| 268 | main.log.error( "Nodes check NOT successful." ) |
| 269 | criticalError = True |
| 270 | |
| 271 | main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." ) |
| 272 | time.sleep( main.recoverySleep ) |
| 273 | |
| 274 | if not ( mastershipCheck and |
| 275 | eventLatCheck and |
| 276 | tsharkLatCheck and |
| 277 | validDataCheck ) and \ |
| 278 | iteration >= main.warmUp: |
| 279 | main.failCounter += 1 |
| 280 | main.log.warn( "Iteration failed. Failure count: " + str( main.failCounter ) ) |
| 281 | if float( main.failCounter ) / float( main.sampleSize ) >= main.failPercent or criticalError: |
| 282 | main.log.error( str( main.failPercent * 100 ) + "% or more of data is invalid, or a critical error has occurred." ) |
| 283 | passingResult = False |
| 284 | break |
| 285 | |
| 286 | utilities.assert_equals( expect=True, actual=passingResult, |
| 287 | onpass="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering was successful.", |
| 288 | onfail="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering FAILED. Stopping test.") |
| 289 | if not passingResult: |
| 290 | main.cleanAndExit() |
| 291 | |
| 292 | |
| 293 | def CASE3( self, main ): |
| 294 | """ |
| 295 | Write results to database file. |
| 296 | Omit this case if you don't want to write to database. |
| 297 | """ |
| 298 | import numpy |
| 299 | result = { 'avg' : {}, 'stddev' : {} } |
| 300 | |
| 301 | for i in main.latencyData: |
| 302 | result[ 'avg' ][ i ] = numpy.average( main.latencyData[ i ] ) |
| 303 | result[ 'stddev' ][ i ] = numpy.std( main.latencyData[ i ] ) |
| 304 | |
| 305 | main.log.info( "result: " + str( result ) ) |
| 306 | with open( main.dbFileName, "a" ) as dbFile: |
| 307 | strToWrite = str( main.Cluster.numCtrls ) + ",'baremetal1'" |
| 308 | strToWrite += ",'" + main.commit.split()[ 1 ] + "'" |
| 309 | for i in result: |
| 310 | for j in result[ i ]: |
| 311 | strToWrite += "," + str( result[ i ][ j ] ) |
| 312 | strToWrite += "\n" |
| 313 | dbFile.write( strToWrite ) |
| 314 | dbFile.close() |