blob: b011fcee46246243ec959e6833242f92cd044ab1 [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
2Description: This test is to determine if ONOS can handle
3 a minority of it's nodes restarting
4
5List of test cases:
6CASE1: Compile ONOS and push it to the test machines
7CASE2: Assign devices to controllers
8CASE21: Assign mastership to controllers
9CASE3: Assign intents
10CASE4: Ping across added host intents
11CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070012CASE61: The Failure inducing case.
13CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070014CASE7: Check state after control plane failure
15CASE8: Compare topo
16CASE9: Link s3-s28 down
17CASE10: Link s3-s28 up
18CASE11: Switch down
19CASE12: Switch up
20CASE13: Clean up
21CASE14: start election app on all onos nodes
22CASE15: Check that Leadership Election is still functional
23CASE16: Install Distributed Primitives app
24CASE17: Check for basic functionality with distributed primitives
25"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070026class HAkillNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070027
28 def __init__( self ):
29 self.default = ''
30
31 def CASE1( self, main ):
32 """
33 CASE1 is to compile ONOS and push it to the test machines
34
35 Startup sequence:
36 cell <name>
37 onos-verify-cell
38 NOTE: temporary - onos-remove-raft-logs
39 onos-uninstall
40 start mininet
41 git pull
42 mvn clean install
43 onos-package
44 onos-install -f
45 onos-wait-for-start
46 start cli sessions
47 start tcpdump
48 """
Jon Halle1a3b752015-07-22 13:02:46 -070049 import imp
Jon Hall3b489db2015-10-05 14:38:37 -070050 import pexpect
Jon Hall6e709752016-02-01 13:38:46 -080051 import time
Jon Halla440e872016-03-31 15:15:50 -070052 import json
Jon Hall6e709752016-02-01 13:38:46 -080053 main.log.info( "ONOS HA test: Restart a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070054 "initialization" )
Devin Lim58046fa2017-07-05 16:55:00 -070055 # set global variables
Jon Halla440e872016-03-31 15:15:50 -070056 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070057 main.HAlabels = []
58 main.HAdata = []
59 try:
60 from tests.dependencies.ONOSSetup import ONOSSetup
61 main.testSetUp = ONOSSetup()
62 except ImportError:
63 main.log.error( "ONOSSetup not found. exiting the test" )
64 main.exit()
65 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070066 try:
Jon Hall53c5e662016-04-13 16:06:56 -070067 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070068 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070069 # load some variables from the params file
70 cellName = main.params[ 'ENV' ][ 'cellName' ]
71 main.apps = main.params[ 'ENV' ][ 'appString' ]
72 main.numCtrls = int( main.params[ 'num_controllers' ] )
73 if main.ONOSbench.maxNodes and\
74 main.ONOSbench.maxNodes < main.numCtrls:
75 main.numCtrls = int( main.ONOSbench.maxNodes )
76 main.maxNodes = main.numCtrls
77 stepResult = main.testSetUp.envSetup( hasNode=True )
Jon Halle1a3b752015-07-22 13:02:46 -070078 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070079 main.testSetUp.envSetupException( e )
80 main.testSetUp.evnSetupConclusion( stepResult )
81 main.HA.generateGraph( "HAkillNodes" )
Jon Hall5cf14d52015-07-16 12:15:19 -070082
Jon Hallb3ed8ed2015-10-28 16:43:55 -070083 main.step( "Make sure ONOS service doesn't automatically respawn" )
84 handle = main.ONOSbench.handle
85 handle.sendline( "sed -i -e 's/^respawn$/#respawn/g' tools/package/init/onos.conf" )
86 handle.expect( "\$" ) # $ from the command
Jon Hall334ba942017-01-19 17:02:17 -080087 handle.sendline( "sed -i -e 's/^Restart=always/Restart=no/g' tools/package/init/onos.service" )
88 handle.expect( "\$" ) # $ from the command
Jon Hallb3ed8ed2015-10-28 16:43:55 -070089 handle.expect( "\$" ) # $ from the prompt
90
Devin Lim58046fa2017-07-05 16:55:00 -070091 main.testSetUp.ONOSSetUp( main.Mininet1, cellName=cellName, removeLog=True,
92 extraApply=main.HA.customizeOnosGenPartitions,
93 extraClean=main.HA.cleanUpGenPartition )
Jon Hall5cf14d52015-07-16 12:15:19 -070094
Devin Lim58046fa2017-07-05 16:55:00 -070095 main.HA.initialSetUp()
Jon Hall9d2dcad2016-04-08 10:15:20 -070096
Jon Hall5cf14d52015-07-16 12:15:19 -070097 def CASE2( self, main ):
98 """
99 Assign devices to controllers
100 """
Devin Lim58046fa2017-07-05 16:55:00 -0700101 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700102
103 def CASE21( self, main ):
104 """
105 Assign mastership to controllers
106 """
Devin Lim58046fa2017-07-05 16:55:00 -0700107 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700108
109 def CASE3( self, main ):
110 """
111 Assign intents
112 """
Devin Lim58046fa2017-07-05 16:55:00 -0700113 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700114
115 def CASE4( self, main ):
116 """
117 Ping across added host intents
118 """
Devin Lim58046fa2017-07-05 16:55:00 -0700119 main.HA.pingAcrossHostIntent( main, True, False )
Jon Hall5cf14d52015-07-16 12:15:19 -0700120
121 def CASE5( self, main ):
122 """
123 Reading state of ONOS
124 """
Devin Lim58046fa2017-07-05 16:55:00 -0700125 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700126
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700127 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700128 """
129 The Failure case.
130 """
Jon Halle1a3b752015-07-22 13:02:46 -0700131 assert main.numCtrls, "main.numCtrls not defined"
Jon Hall5cf14d52015-07-16 12:15:19 -0700132 assert main, "main not defined"
133 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Halle1a3b752015-07-22 13:02:46 -0700134 assert main.CLIs, "main.CLIs not defined"
135 assert main.nodes, "main.nodes not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700136 main.case( "Kill minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700137
138 main.step( "Checking ONOS Logs for errors" )
139 for node in main.nodes:
140 main.log.debug( "Checking logs for errors on " + node.name + ":" )
141 main.log.warn( main.ONOSbench.checkLogs( node.ip_address ) )
142
Jon Hall3b489db2015-10-05 14:38:37 -0700143 n = len( main.nodes ) # Number of nodes
144 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
145 main.kill = [ 0 ] # ONOS node to kill, listed by index in main.nodes
146 if n > 3:
147 main.kill.append( p - 1 )
148 # NOTE: This only works for cluster sizes of 3,5, or 7.
149
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700150 main.step( "Kill " + str( len( main.kill ) ) + " ONOS nodes" )
Jon Hall3b489db2015-10-05 14:38:37 -0700151 killResults = main.TRUE
152 for i in main.kill:
153 killResults = killResults and\
Jon Hallf37d44d2017-05-24 10:37:30 -0700154 main.ONOSbench.onosKill( main.nodes[ i ].ip_address )
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700155 main.activeNodes.remove( i )
Jon Hall5cf14d52015-07-16 12:15:19 -0700156 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700157 onpass="ONOS nodes killed successfully",
158 onfail="ONOS nodes NOT successfully killed" )
159
Jon Halld2871c22016-07-26 11:01:14 -0700160 main.step( "Checking ONOS nodes" )
161 nodeResults = utilities.retry( main.HA.nodesCheck,
162 False,
Jon Hallf37d44d2017-05-24 10:37:30 -0700163 args=[ main.activeNodes ],
Jon Halld2871c22016-07-26 11:01:14 -0700164 sleep=15,
165 attempts=5 )
166
167 utilities.assert_equals( expect=True, actual=nodeResults,
168 onpass="Nodes check successful",
169 onfail="Nodes check NOT successful" )
170
171 if not nodeResults:
172 for i in main.activeNodes:
Jon Hallf37d44d2017-05-24 10:37:30 -0700173 cli = main.CLIs[ i ]
Jon Halld2871c22016-07-26 11:01:14 -0700174 main.log.debug( "{} components not ACTIVE: \n{}".format(
175 cli.name,
176 cli.sendline( "scr:list | grep -v ACTIVE" ) ) )
177 main.log.error( "Failed to start ONOS, stopping test" )
178 main.cleanup()
179 main.exit()
180
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700181 def CASE62( self, main ):
182 """
183 The bring up stopped nodes
184 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700185
Devin Lim58046fa2017-07-05 16:55:00 -0700186 main.HA.bringUpStoppedNode( main )
Jon Halla440e872016-03-31 15:15:50 -0700187
Jon Hall5cf14d52015-07-16 12:15:19 -0700188 def CASE7( self, main ):
189 """
190 Check state after ONOS failure
191 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700192 try:
193 main.kill
194 except AttributeError:
195 main.kill = []
196
Devin Lim58046fa2017-07-05 16:55:00 -0700197 main.HA.checkStateAfterONOS( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700198 main.step( "Leadership Election is still functional" )
199 # Test of LeadershipElection
200 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700201
Jon Hall3b489db2015-10-05 14:38:37 -0700202 restarted = []
203 for i in main.kill:
Jon Hallf37d44d2017-05-24 10:37:30 -0700204 restarted.append( main.nodes[ i ].ip_address )
Jon Hall5cf14d52015-07-16 12:15:19 -0700205 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700206
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700207 for i in main.activeNodes:
Jon Hallf37d44d2017-05-24 10:37:30 -0700208 cli = main.CLIs[ i ]
Jon Hall5cf14d52015-07-16 12:15:19 -0700209 leaderN = cli.electionTestLeader()
210 leaderList.append( leaderN )
211 if leaderN == main.FALSE:
212 # error in response
213 main.log.error( "Something is wrong with " +
214 "electionTestLeader function, check the" +
215 " error logs" )
216 leaderResult = main.FALSE
217 elif leaderN is None:
218 main.log.error( cli.name +
219 " shows no leader for the election-app was" +
220 " elected after the old one died" )
221 leaderResult = main.FALSE
222 elif leaderN in restarted:
223 main.log.error( cli.name + " shows " + str( leaderN ) +
224 " as leader for the election-app, but it " +
225 "was restarted" )
226 leaderResult = main.FALSE
227 if len( set( leaderList ) ) != 1:
228 leaderResult = main.FALSE
229 main.log.error(
230 "Inconsistent view of leader for the election test app" )
231 # TODO: print the list
232 utilities.assert_equals(
233 expect=main.TRUE,
234 actual=leaderResult,
235 onpass="Leadership election passed",
236 onfail="Something went wrong with Leadership election" )
237
238 def CASE8( self, main ):
239 """
240 Compare topo
241 """
Devin Lim58046fa2017-07-05 16:55:00 -0700242 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700243
Jon Hall5cf14d52015-07-16 12:15:19 -0700244 def CASE9( self, main ):
245 """
246 Link s3-s28 down
247 """
Devin Lim58046fa2017-07-05 16:55:00 -0700248 main.HA.linkDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700249
250 def CASE10( self, main ):
251 """
252 Link s3-s28 up
253 """
Devin Lim58046fa2017-07-05 16:55:00 -0700254 main.HA.linkUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700255
256 def CASE11( self, main ):
257 """
258 Switch Down
259 """
260 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700261 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700262
263 def CASE12( self, main ):
264 """
265 Switch Up
266 """
267 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700268 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700269
270 def CASE13( self, main ):
271 """
272 Clean up
273 """
Devin Lim58046fa2017-07-05 16:55:00 -0700274 main.HAlabels.append( "Restart" )
275 main.HAdata.append( str( main.restartTime ) )
276 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700277
278 def CASE14( self, main ):
279 """
280 start election app on all onos nodes
281 """
Devin Lim58046fa2017-07-05 16:55:00 -0700282 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700283
284 def CASE15( self, main ):
285 """
286 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700287 15.1 Run election on each node
288 15.2 Check that each node has the same leaders and candidates
289 15.3 Find current leader and withdraw
290 15.4 Check that a new node was elected leader
291 15.5 Check that that new leader was the candidate of old leader
292 15.6 Run for election on old leader
293 15.7 Check that oldLeader is a candidate, and leader if only 1 node
294 15.8 Make sure that the old leader was added to the candidate list
295
296 old and new variable prefixes refer to data from before vs after
297 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700298 """
Devin Lim58046fa2017-07-05 16:55:00 -0700299 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700300
301 def CASE16( self, main ):
302 """
303 Install Distributed Primitives app
304 """
Devin Lim58046fa2017-07-05 16:55:00 -0700305 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700306
307 def CASE17( self, main ):
308 """
309 Check for basic functionality with distributed primitives
310 """
Devin Lim58046fa2017-07-05 16:55:00 -0700311 main.HA.checkDistPrimitivesFunc( main )