blob: 024d823ebcaa619701a0b13cff96b3358abd3d57 [file] [log] [blame]
Jon Hallb5488012017-06-21 14:08:36 -07001"""
2Description: This test is to determine if ONOS can handle
3 a minority of it's nodes restarting
4
5List of test cases:
6CASE1: Compile ONOS and push it to the test machines
7CASE2: Assign devices to controllers
8CASE21: Assign mastership to controllers
9CASE3: Assign intents
10CASE4: Ping across added host intents
11CASE5: Reading state of ONOS
12CASE61: The Failure inducing case.
13CASE62: The Failure recovery case.
14CASE7: Check state after control plane failure
15CASE8: Compare topo
16CASE9: Link s3-s28 down
17CASE10: Link s3-s28 up
18CASE11: Switch down
19CASE12: Switch up
20CASE13: Clean up
21CASE14: start election app on all onos nodes
22CASE15: Check that Leadership Election is still functional
23CASE16: Install Distributed Primitives app
24CASE17: Check for basic functionality with distributed primitives
25"""
26class HAcontinuousStopNodes:
27
28 def __init__( self ):
29 self.default = ''
30
31 def CASE1( self, main ):
32 """
33 CASE1 is to compile ONOS and push it to the test machines
34
35 Startup sequence:
36 cell <name>
37 onos-verify-cell
38 NOTE: temporary - onos-remove-raft-logs
39 onos-uninstall
40 start mininet
41 git pull
42 mvn clean install
43 onos-package
44 onos-install -f
45 onos-wait-for-start
46 start cli sessions
47 start tcpdump
48 """
49 import imp
50 import pexpect
51 import time
52 import json
53 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
54 "initialization" )
Jon Hallb5488012017-06-21 14:08:36 -070055 # set global variables
Jon Hallb5488012017-06-21 14:08:36 -070056 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070057 main.HAlabels = []
58 main.HAdata = []
59 try:
60 from tests.dependencies.ONOSSetup import ONOSSetup
61 main.testSetUp = ONOSSetup()
62 except ImportError:
63 main.log.error( "ONOSSetup not found. exiting the test" )
64 main.exit()
65 main.testSetUp.envSetupDescription()
Jon Hallb5488012017-06-21 14:08:36 -070066 try:
67 from tests.HA.dependencies.HA import HA
68 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070069 # load some variables from the params file
70 cellName = main.params[ 'ENV' ][ 'cellName' ]
71 main.apps = main.params[ 'ENV' ][ 'appString' ]
72 main.numCtrls = int( main.params[ 'num_controllers' ] )
73 if main.ONOSbench.maxNodes and\
74 main.ONOSbench.maxNodes < main.numCtrls:
75 main.numCtrls = int( main.ONOSbench.maxNodes )
76 main.maxNodes = main.numCtrls
77 stepResult = main.testSetUp.envSetup( hasNode=True )
Jon Hallb5488012017-06-21 14:08:36 -070078 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070079 main.testSetUp.envSetupException( e )
80 main.testSetUp.evnSetupConclusion( stepResult )
81 main.HA.generateGraph( "HAcontinuousStopNodes" )
Jon Hallb5488012017-06-21 14:08:36 -070082
Devin Lim58046fa2017-07-05 16:55:00 -070083 main.testSetUp.ONOSSetUp( main.Mininet1, cellName=cellName, removeLog=True,
84 extraApply=main.HA.customizeOnosGenPartitions,
85 extraClean=main.HA.cleanUpGenPartition )
Jon Hallb5488012017-06-21 14:08:36 -070086
Devin Lim58046fa2017-07-05 16:55:00 -070087 main.HA.initialSetUp()
Jon Hallb5488012017-06-21 14:08:36 -070088
Jon Hallb5488012017-06-21 14:08:36 -070089
90 def CASE2( self, main ):
91 """
92 Assign devices to controllers
93 """
Devin Lim58046fa2017-07-05 16:55:00 -070094 main.HA.assignDevices( main )
Jon Hallb5488012017-06-21 14:08:36 -070095
96 def CASE21( self, main ):
97 """
98 Assign mastership to controllers
99 """
Devin Lim58046fa2017-07-05 16:55:00 -0700100 main.HA.assignMastership( main )
Jon Hallb5488012017-06-21 14:08:36 -0700101
102 def CASE3( self, main ):
103 """
104 Assign intents
105 """
Devin Lim58046fa2017-07-05 16:55:00 -0700106 main.HA.assignIntents( main )
Jon Hallb5488012017-06-21 14:08:36 -0700107
108 def CASE4( self, main ):
109 """
110 Ping across added host intents
111 """
Devin Lim58046fa2017-07-05 16:55:00 -0700112 main.HA.pingAcrossHostIntent( main, True, False )
Jon Hallb5488012017-06-21 14:08:36 -0700113
114 def CASE5( self, main ):
115 """
116 Reading state of ONOS
117 """
Devin Lim58046fa2017-07-05 16:55:00 -0700118 main.HA.readingState( main )
Jon Hallb5488012017-06-21 14:08:36 -0700119
120 def CASE61( self, main ):
121 """
122 The Failure case.
123 """
124 assert main.numCtrls, "main.numCtrls not defined"
125 assert main, "main not defined"
126 assert utilities.assert_equals, "utilities.assert_equals not defined"
127 assert main.CLIs, "main.CLIs not defined"
128 assert main.nodes, "main.nodes not defined"
129 try:
130 assert main.nodeIndex is not None, "main.nodeIndex not defined"
131 assert main.killCount is not None, "main.killCount not defined"
132 except AttributeError as e:
133 main.log.warn( "Node to kill not selected, defaulting to node 1" )
134 main.nodeIndex = 0
135 main.killCount = 1
136
137 main.case( "Stopping ONOS nodes - iteration " + str( main.killCount ) )
138
139 main.step( "Checking ONOS Logs for errors" )
140 for node in main.nodes:
141 main.log.debug( "Checking logs for errors on " + node.name + ":" )
142 main.log.warn( main.ONOSbench.checkLogs( node.ip_address ) )
143
144 # NOTE: For now only kill one node. If we move to killing more, we need to
145 # make sure we don't lose any partitions
146 n = len( main.nodes ) # Number of nodes
147 main.nodeIndex = ( main.nodeIndex + 1 ) % n
148 main.kill = [ main.nodeIndex ] # ONOS node to kill, listed by index in main.nodes
149
150 # TODO: Be able to configure bringing up old node vs. a new/fresh installation
151 main.step( "Stopping " + str( len( main.kill ) ) + " ONOS nodes" )
152 killResults = main.TRUE
153 for i in main.kill:
154 killResults = killResults and\
155 main.ONOSbench.onosStop( main.nodes[ i ].ip_address )
156 main.activeNodes.remove( i )
157 utilities.assert_equals( expect=main.TRUE, actual=killResults,
158 onpass="ONOS nodes stopped successfully",
159 onfail="ONOS nodes NOT successfully stopped" )
160
161 main.step( "Checking ONOS nodes" )
162 nodeResults = utilities.retry( main.HA.nodesCheck,
163 False,
164 args=[ main.activeNodes ],
165 sleep=15,
166 attempts=5 )
167
168 utilities.assert_equals( expect=True, actual=nodeResults,
169 onpass="Nodes check successful",
170 onfail="Nodes check NOT successful" )
171
172 if not nodeResults:
173 for i in main.activeNodes:
174 cli = main.CLIs[ i ]
175 main.log.debug( "{} components not ACTIVE: \n{}".format(
176 cli.name,
177 cli.sendline( "scr:list | grep -v ACTIVE" ) ) )
178 main.log.error( "Failed to start ONOS, stopping test" )
179 main.cleanup()
180 main.exit()
181
182 main.killCount += 1
183
184 def CASE62( self, main ):
185 """
186 The bring up stopped nodes
187 """
Devin Lim58046fa2017-07-05 16:55:00 -0700188 main.HA.bringUpStoppedNode( main )
Jon Hallb5488012017-06-21 14:08:36 -0700189
190 def CASE7( self, main ):
191 """
192 Check state after ONOS failure
193 """
Jon Hallb5488012017-06-21 14:08:36 -0700194 try:
195 main.kill
196 except AttributeError:
197 main.kill = []
198
Devin Lim58046fa2017-07-05 16:55:00 -0700199 main.HA.checkStateAfterONOS( main, afterWhich=0 )
Jon Hallb5488012017-06-21 14:08:36 -0700200
Jon Hallb5488012017-06-21 14:08:36 -0700201 main.step( "Leadership Election is still functional" )
202 # Test of LeadershipElection
203 leaderList = []
204
205 restarted = []
206 for i in main.kill:
207 restarted.append( main.nodes[ i ].ip_address )
208 leaderResult = main.TRUE
209
210 for i in main.activeNodes:
211 cli = main.CLIs[ i ]
212 leaderN = cli.electionTestLeader()
213 leaderList.append( leaderN )
214 if leaderN == main.FALSE:
215 # error in response
216 main.log.error( "Something is wrong with " +
217 "electionTestLeader function, check the" +
218 " error logs" )
219 leaderResult = main.FALSE
220 elif leaderN is None:
221 main.log.error( cli.name +
222 " shows no leader for the election-app was" +
223 " elected after the old one died" )
224 leaderResult = main.FALSE
225 elif leaderN in restarted:
226 main.log.error( cli.name + " shows " + str( leaderN ) +
227 " as leader for the election-app, but it " +
228 "was restarted" )
229 leaderResult = main.FALSE
230 if len( set( leaderList ) ) != 1:
231 leaderResult = main.FALSE
232 main.log.error(
233 "Inconsistent view of leader for the election test app" )
234 # TODO: print the list
235 utilities.assert_equals(
236 expect=main.TRUE,
237 actual=leaderResult,
238 onpass="Leadership election passed",
239 onfail="Something went wrong with Leadership election" )
240
241 def CASE8( self, main ):
242 """
243 Compare topo
244 """
Devin Lim58046fa2017-07-05 16:55:00 -0700245 main.HA.compareTopo( main )
Jon Hallb5488012017-06-21 14:08:36 -0700246
Jon Hallb5488012017-06-21 14:08:36 -0700247
Jon Hallb5488012017-06-21 14:08:36 -0700248
249 def CASE9( self, main ):
250 """
251 Link s3-s28 down
252 """
Devin Lim58046fa2017-07-05 16:55:00 -0700253 main.HA.linkDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700254
255 def CASE10( self, main ):
256 """
257 Link s3-s28 up
258 """
Devin Lim58046fa2017-07-05 16:55:00 -0700259 main.HA.linkUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700260
261 def CASE11( self, main ):
262 """
263 Switch Down
264 """
265 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700266 main.HA.switchDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700267
268 def CASE12( self, main ):
269 """
270 Switch Up
271 """
272 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700273 main.HA.switchUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700274
275 def CASE13( self, main ):
276 """
277 Clean up
278 """
Devin Lim58046fa2017-07-05 16:55:00 -0700279 main.HAlabels.append( "Restart" )
280 main.HAdata.append( str( main.restartTime ) )
281 main.HA.cleanUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700282
283 def CASE14( self, main ):
284 """
285 start election app on all onos nodes
286 """
Devin Lim58046fa2017-07-05 16:55:00 -0700287 main.HA.startElectionApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700288
289 def CASE15( self, main ):
290 """
291 Check that Leadership Election is still functional
292 15.1 Run election on each node
293 15.2 Check that each node has the same leaders and candidates
294 15.3 Find current leader and withdraw
295 15.4 Check that a new node was elected leader
296 15.5 Check that that new leader was the candidate of old leader
297 15.6 Run for election on old leader
298 15.7 Check that oldLeader is a candidate, and leader if only 1 node
299 15.8 Make sure that the old leader was added to the candidate list
300
301 old and new variable prefixes refer to data from before vs after
302 withdrawl and later before withdrawl vs after re-election
303 """
Devin Lim58046fa2017-07-05 16:55:00 -0700304 main.HA.isElectionFunctional( main )
Jon Hallb5488012017-06-21 14:08:36 -0700305
306 def CASE16( self, main ):
307 """
308 Install Distributed Primitives app
309 """
Devin Lim58046fa2017-07-05 16:55:00 -0700310 main.HA.installDistributedPrimitiveApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700311
312 def CASE17( self, main ):
313 """
314 Check for basic functionality with distributed primitives
315 """
Devin Lim58046fa2017-07-05 16:55:00 -0700316 main.HA.checkDistPrimitivesFunc( main )