blob: ecbf5cda67ca158cbb91fa47238ab2199abba632 [file] [log] [blame]
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +00001"""
2Copyright 2017 Open Networking Foundation (ONF)
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21
22class SCPFmastershipFailoverLat:
23 def __init__( self ):
24 self.default = ''
25
26 def CASE0( self, main ):
27 import os
28 import imp
29 '''
30 - GIT
31 - BUILDING ONOS
32 Pull specific ONOS branch, then Build ONOS ono ONOS Bench.
33 This step is usually skipped. Because in a Jenkins driven automated
34 test env. We want Jenkins jobs to pull&build for flexibility to handle
35 different versions of ONOS.
36 - Construct tests variables
37 '''
38 try:
39 from tests.dependencies.ONOSSetup import ONOSSetup
40 main.testSetUp = ONOSSetup()
41 except ImportError:
42 main.log.error( "ONOSSetup not found. exiting the test" )
43 main.exit()
44 main.testSetUp.envSetupDescription()
45 stepResult = main.FALSE
46 try:
47 main.MN1Ip = main.params[ 'MN' ][ 'ip1' ]
48 main.cellName = main.params[ 'ENV' ][ 'cellName' ]
49 main.apps = main.params[ 'ENV' ][ 'cellApps' ]
50 main.scale = ( main.params[ 'SCALE' ] ).split( "," )
51 main.ofpRoleRequest = main.params[ 'TSHARK' ][ 'ofpRoleRequest' ]
52 main.tsharkResultPath = main.params[ 'TSHARK' ][ 'tsharkResultPath' ]
53 main.sampleSize = int( main.params[ 'TEST' ][ 'sampleSize' ] )
54 main.warmUp = int( main.params[ 'TEST' ][ 'warmUp' ] )
55 main.dbFileName = main.params[ 'DATABASE' ][ 'dbName' ]
56 main.maxScale = int( main.params[ 'max' ] )
57 main.timeout = int( main.params[ 'TIMEOUT' ][ 'timeout' ] )
58 main.MNSleep = int( main.params[ 'SLEEP' ][ 'mininet' ] )
59 main.recoverySleep = int( main.params[ 'SLEEP' ][ 'recovery' ] )
60 main.debug = main.params[ 'TEST' ][ 'debug' ]
61 main.failoverSleep = int( main.params[ 'SLEEP' ][ 'failover' ] )
62 main.switchID = main.params[ 'SWITCH' ][ 'id' ]
63 main.topologySwitchCount = main.params[ 'TOPOLOGY' ][ 'switchCount' ]
64 main.topologyType = main.params[ 'TOPOLOGY' ][ 'type' ]
65 main.nodeNumToKill = int( main.params[ 'KILL' ][ 'nodeNum' ] )
66 main.failPercent = float( main.params[ 'TEST' ][ 'failPercent' ] )
67
68 if main.debug == "True":
69 main.debug = True
70 else:
71 main.debug = False
72
73 stepResult = main.testSetUp.envSetup()
74 main.log.info( "Create Database file " + main.dbFileName )
75 resultsDB = open( main.dbFileName, "w+" )
76 resultsDB.close()
77
78 except Exception as e:
79 main.testSetUp.envSetupException( e )
80 main.testSetUp.evnSetupConclusion( stepResult )
81
82
83
84 def CASE1( self, main ):
85 # Clean up test environment and set up
86 import time
87 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, True,
88 cellName=main.cellName, killRemoveMax=False )
89 try:
90 from tests.dependencies.utils import Utils
91 except ImportError:
92 main.log.error( "Utils not found exiting the test" )
93 main.exit()
94 try:
95 main.Utils
96 except ( NameError, AttributeError ):
97 main.Utils = Utils()
98 main.Utils.mininetCleanup( main.Mininet1 )
99
100 main.step( "Starting up Mininet from command." )
101
102 mnCmd = " mn " + " --topo " + main.topologyType + "," + main.topologySwitchCount
103 for ctrl in main.Cluster.active():
104 mnCmd += " --controller remote,ip=" + ctrl.ipAddress
105
106 stepResult = main.Mininet1.startNet( mnCmd=mnCmd )
107
108 utilities.assert_equals( expect=main.TRUE,
109 actual=stepResult,
110 onpass="Mininet was set up correctly.",
111 onfail="Mininet was NOT set up correctly." )
112
113
114 def CASE2( self, main ):
115 """
116 Kill ONOS node, and measure the latency for INSTANCE_DEACTIVATED, MASTER_CHANGED, and role request
117 (tshark time), then bring the node back up.
118 """
119 import time
120 import datetime
121 import numpy
122 from tests.HA.dependencies.HA import HA
123
124 main.HA = HA()
125
126 main.latencyData = { 'kill_to_deactivation' : [],
127 'deactivation_to_role_request' : [] }
128
129 main.failCounter = 0
130 passingResult = True
131 criticalError = False
132
133 main.step( "Gathering data starting with " + str( main.warmUp ) + " warm ups and a sample size of " + str( main.sampleSize ) )
134
135 for iteration in range( 0, main.sampleSize + main.warmUp ):
136
137 main.log.info( "==========================================" )
138 main.log.info( "================iteration:{}==============".format( str( iteration + 1 ) ) )
139
140 ip_address = main.Cluster.active( 0 ).ipAddress
141 strNodeNumToKill = str( main.nodeNumToKill )
142
143 main.log.info( "Assigning mastership to ONOS node " + strNodeNumToKill )
144 main.Cluster.active( 0 ).CLI.deviceRole( main.switchID, ip_address )
145
146 main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
147 time.sleep( main.recoverySleep )
148 mastershipCheck = main.Cluster.active( 0 ).CLI.getMaster( main.switchID ) == ip_address
149
150 if not mastershipCheck:
151 main.log.warn( "Mastership is NOT as expected." )
152
153 with open( main.tsharkResultPath, "w" ) as tshark:
154 tshark.write( "" )
155 main.log.info( "Starting tshark capture." )
156 main.ONOSbench.tsharkGrep( main.ofpRoleRequest, main.tsharkResultPath )
157 time1 = time.time() * 1000.0
158
159 # Kill an ONOS node
160 main.log.info( "Killing ONOS node " + strNodeNumToKill + "." )
161 killresult = main.ONOSbench.onosKill( ip_address )
162 main.Cluster.runningNodes[ main.nodeNumToKill ].active = False
163
164 # Stop an ONOS node
165 main.log.info( "Stopping ONOS node " + strNodeNumToKill + "." )
166 stopresult = main.ONOSbench.onosStop( ip_address )
167
168 killStopResult = stopresult == killresult and True
169
170 if not killStopResult:
171 main.log.error( "ONOS node was NOT successfully stopped and killed." )
172 criticalError = True
173
174 time.sleep( main.failoverSleep )
175
176 # Stop tshark and get times
177 main.log.info( "Stopping tshark." )
178 main.ONOSbench.tsharkStop()
179
180 masterChangedLats = []
181 instanceDeactivatedLats = []
182
183 main.log.info( "Obtaining latencies from 'events' output." )
184 for CLInum in range( 0, main.Cluster.numCtrls - 1 ):
185 eventOutput = main.Cluster.active( CLInum ).CLI.events( args='-a' ).split( "\r\n" )
186 for line in reversed( eventOutput ):
187 if "INSTANCE_DEACTIVATED" in line and len( instanceDeactivatedLats ) == CLInum:
188 deactivateTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
189 instanceDeactivatedLats.append( deactivateTime - time1 )
190 elif "MASTER_CHANGED" in line and len( masterChangedLats ) == CLInum:
191 changedTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
192 masterChangedLats.append( changedTime - time1 )
193 if len( instanceDeactivatedLats ) > CLInum and len( masterChangedLats ) > CLInum:
194 break
195
196 instanceDeactivatedLats.sort()
197 instanceDeactivated = instanceDeactivatedLats[ 0 ]
198
199 eventLatCheck = True if masterChangedLats and instanceDeactivated else False
200 if not eventLatCheck:
201 main.log.warn( "Latencies were NOT obtained from 'events' successfully." )
202
203 main.log.info( "Obtain latency from tshark output." )
204 tsharkLatCheck = True
205 with open( main.tsharkResultPath, "r" ) as resultFile:
206 resultText = resultFile.readline()
207 main.log.info( "Capture result: " + resultText )
208 resultText = resultText.split()
209 if len( resultText ) > 1:
210 roleRequestLat = int( float( resultText[ 1 ] ) * 1000.0 ) - time1
211 resultFile.close()
212 else:
213 main.log.error( "Tshark output file is NOT as expected." )
214 tsharkLatCheck = False
215 if not tsharkLatCheck:
216 main.log.warn( "Latency was NOT obtained from tshark successfully." )
217
218 validDataCheck = False
219 if tsharkLatCheck:
220 main.log.info( "instanceDeactivated: " + str( instanceDeactivated ) )
221 main.log.info( "roleRequestLat - instanceDeactivated: " + str( roleRequestLat - instanceDeactivated ) )
222 if iteration >= main.warmUp:
223 main.log.info( "Verifying that the data are valid." ) # Don't record data during a warm-up
224 validDataCheck = roleRequestLat - instanceDeactivated >= 0 and \
225 instanceDeactivated >= 0
226 if not validDataCheck:
227 main.log.warn( "Data are NOT valid." )
228
229 if eventLatCheck and tsharkLatCheck and validDataCheck:
230 main.log.info( "Saving data..." )
231 main.latencyData[ 'kill_to_deactivation' ].append( instanceDeactivated )
232 main.latencyData[ 'deactivation_to_role_request' ].append( roleRequestLat - instanceDeactivated )
233
234
235 # Restart ONOS node
236 main.log.info( "Restart ONOS node " + strNodeNumToKill + " and checking status of restart." )
237 startResult = main.ONOSbench.onosStart( ip_address )
238
239 if not startResult:
240 main.log.error( "ONOS nodes NOT successfully started." )
241 criticalError = True
242
243 # Check if ONOS is up yet
244 main.log.info( "Checking if ONOS node " + strNodeNumToKill + " is up." )
245 upResult = main.ONOSbench.isup( ip_address )
246
247 if not upResult:
248 main.log.error( "ONOS did NOT successfully restart." )
249 criticalError = True
250
251 # Restart CLI
252 main.log.info( "Restarting ONOS node " + strNodeNumToKill + "'s main.CLI." )
253 cliResult = main.Cluster.active( main.nodeNumToKill ).CLI.startOnosCli( ip_address )
254 main.Cluster.runningNodes[ main.nodeNumToKill ] .active = True
255
256 if not cliResult:
257 main.log.error( "ONOS CLI did NOT successfully restart." )
258 criticalError = True
259
260 main.log.info( "Checking ONOS nodes." )
261 nodeResults = utilities.retry( main.HA.nodesCheck,
262 False,
263 args=[ main.Cluster.active() ],
264 sleep=1,
265 attempts=3 )
266
267 if not nodeResults:
268 main.log.error( "Nodes check NOT successful." )
269 criticalError = True
270
271 main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
272 time.sleep( main.recoverySleep )
273
274 if not ( mastershipCheck and
275 eventLatCheck and
276 tsharkLatCheck and
277 validDataCheck ) and \
278 iteration >= main.warmUp:
279 main.failCounter += 1
280 main.log.warn( "Iteration failed. Failure count: " + str( main.failCounter ) )
281 if float( main.failCounter ) / float( main.sampleSize ) >= main.failPercent or criticalError:
282 main.log.error( str( main.failPercent * 100 ) + "% or more of data is invalid, or a critical error has occurred." )
283 passingResult = False
284 break
285
286 utilities.assert_equals( expect=True, actual=passingResult,
287 onpass="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering was successful.",
288 onfail="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering FAILED. Stopping test.")
289 if not passingResult:
290 main.cleanAndExit()
291
292
293 def CASE3( self, main ):
294 """
295 Write results to database file.
296 Omit this case if you don't want to write to database.
297 """
298 import numpy
299 result = { 'avg' : {}, 'stddev' : {} }
300
301 for i in main.latencyData:
302 result[ 'avg' ][ i ] = numpy.average( main.latencyData[ i ] )
303 result[ 'stddev' ][ i ] = numpy.std( main.latencyData[ i ] )
304
305 main.log.info( "result: " + str( result ) )
306 with open( main.dbFileName, "a" ) as dbFile:
307 strToWrite = str( main.Cluster.numCtrls ) + ",'baremetal1'"
308 strToWrite += ",'" + main.commit.split()[ 1 ] + "'"
309 for i in result:
310 for j in result[ i ]:
311 strToWrite += "," + str( result[ i ][ j ] )
312 strToWrite += "\n"
313 dbFile.write( strToWrite )
314 dbFile.close()