blob: b2ae8c5ec7ed4c43fdbb641b5457e72627839d65 [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07002Copyright 2015 Open Networking Foundation (ONF)
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21
22"""
Jon Hall5cf14d52015-07-16 12:15:19 -070023Description: This test is to determine if ONOS can handle
24 a minority of it's nodes restarting
25
26List of test cases:
27CASE1: Compile ONOS and push it to the test machines
28CASE2: Assign devices to controllers
29CASE21: Assign mastership to controllers
30CASE3: Assign intents
31CASE4: Ping across added host intents
32CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070033CASE61: The Failure inducing case.
34CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070035CASE7: Check state after control plane failure
36CASE8: Compare topo
37CASE9: Link s3-s28 down
38CASE10: Link s3-s28 up
39CASE11: Switch down
40CASE12: Switch up
41CASE13: Clean up
42CASE14: start election app on all onos nodes
43CASE15: Check that Leadership Election is still functional
44CASE16: Install Distributed Primitives app
45CASE17: Check for basic functionality with distributed primitives
46"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070047class HAkillNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070048
49 def __init__( self ):
50 self.default = ''
51
52 def CASE1( self, main ):
53 """
54 CASE1 is to compile ONOS and push it to the test machines
55
56 Startup sequence:
57 cell <name>
58 onos-verify-cell
59 NOTE: temporary - onos-remove-raft-logs
60 onos-uninstall
61 start mininet
62 git pull
63 mvn clean install
64 onos-package
65 onos-install -f
66 onos-wait-for-start
67 start cli sessions
68 start tcpdump
69 """
Jon Hall6e709752016-02-01 13:38:46 -080070 main.log.info( "ONOS HA test: Restart a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070071 "initialization" )
Jon Halla440e872016-03-31 15:15:50 -070072 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070073 main.HAlabels = []
74 main.HAdata = []
75 try:
76 from tests.dependencies.ONOSSetup import ONOSSetup
77 main.testSetUp = ONOSSetup()
78 except ImportError:
79 main.log.error( "ONOSSetup not found. exiting the test" )
80 main.exit()
81 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070082 try:
Jon Hallca319892017-06-15 15:25:22 -070083 from dependencies.Cluster import Cluster
Jon Hall53c5e662016-04-13 16:06:56 -070084 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070085 main.HA = HA()
Jon Hallca319892017-06-15 15:25:22 -070086 main.Cluster = Cluster( main.ONOScell.nodes )
Devin Lim58046fa2017-07-05 16:55:00 -070087 cellName = main.params[ 'ENV' ][ 'cellName' ]
88 main.apps = main.params[ 'ENV' ][ 'appString' ]
Jon Hallca319892017-06-15 15:25:22 -070089 stepResult = main.testSetUp.envSetup( main.Cluster, hasNode=True )
Jon Halle1a3b752015-07-22 13:02:46 -070090 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070091 main.testSetUp.envSetupException( e )
92 main.testSetUp.evnSetupConclusion( stepResult )
93 main.HA.generateGraph( "HAkillNodes" )
Jon Hall5cf14d52015-07-16 12:15:19 -070094
Jon Hallb3ed8ed2015-10-28 16:43:55 -070095 main.step( "Make sure ONOS service doesn't automatically respawn" )
Jon Hallca319892017-06-15 15:25:22 -070096 handle = main.Cluster.controllers[0].Bench.handle
Jon Hallb3ed8ed2015-10-28 16:43:55 -070097 handle.sendline( "sed -i -e 's/^respawn$/#respawn/g' tools/package/init/onos.conf" )
98 handle.expect( "\$" ) # $ from the command
Jon Hall334ba942017-01-19 17:02:17 -080099 handle.sendline( "sed -i -e 's/^Restart=always/Restart=no/g' tools/package/init/onos.service" )
100 handle.expect( "\$" ) # $ from the command
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700101 handle.expect( "\$" ) # $ from the prompt
102
Jon Hallca319892017-06-15 15:25:22 -0700103 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
Devin Lim58046fa2017-07-05 16:55:00 -0700104 extraApply=main.HA.customizeOnosGenPartitions,
105 extraClean=main.HA.cleanUpGenPartition )
Jon Hall5cf14d52015-07-16 12:15:19 -0700106
Devin Lim58046fa2017-07-05 16:55:00 -0700107 main.HA.initialSetUp()
Jon Hall9d2dcad2016-04-08 10:15:20 -0700108
Jon Hall5cf14d52015-07-16 12:15:19 -0700109 def CASE2( self, main ):
110 """
111 Assign devices to controllers
112 """
Devin Lim58046fa2017-07-05 16:55:00 -0700113 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700114
115 def CASE21( self, main ):
116 """
117 Assign mastership to controllers
118 """
Devin Lim58046fa2017-07-05 16:55:00 -0700119 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700120
121 def CASE3( self, main ):
122 """
123 Assign intents
124 """
Devin Lim58046fa2017-07-05 16:55:00 -0700125 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700126
127 def CASE4( self, main ):
128 """
129 Ping across added host intents
130 """
Jon Hallca319892017-06-15 15:25:22 -0700131 main.HA.pingAcrossHostIntent( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700132
133 def CASE5( self, main ):
134 """
135 Reading state of ONOS
136 """
Devin Lim58046fa2017-07-05 16:55:00 -0700137 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700138
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700139 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700140 """
141 The Failure case.
142 """
Jon Halle1a3b752015-07-22 13:02:46 -0700143 assert main.numCtrls, "main.numCtrls not defined"
Jon Hall5cf14d52015-07-16 12:15:19 -0700144 assert main, "main not defined"
145 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700146 main.case( "Kill minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700147
148 main.step( "Checking ONOS Logs for errors" )
Jon Hallca319892017-06-15 15:25:22 -0700149 for ctrl in main.Cluster.active():
150 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
151 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
Jon Hall96091e62015-09-21 17:34:17 -0700152
Jon Hallca319892017-06-15 15:25:22 -0700153 n = len( main.Cluster.controllers ) # Number of nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700154 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
Jon Hallca319892017-06-15 15:25:22 -0700155 main.kill = [ main.Cluster.controllers[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700156 if n > 3:
Jon Hallca319892017-06-15 15:25:22 -0700157 main.kill.append( main.Cluster.controllers[ p - 1 ] )
Jon Hall3b489db2015-10-05 14:38:37 -0700158 # NOTE: This only works for cluster sizes of 3,5, or 7.
159
Jon Hallca319892017-06-15 15:25:22 -0700160 main.step( "Killing nodes: " + str( main.kill ) )
Jon Hall3b489db2015-10-05 14:38:37 -0700161 killResults = main.TRUE
Jon Hallca319892017-06-15 15:25:22 -0700162 for ctrl in main.kill:
Jon Hall3b489db2015-10-05 14:38:37 -0700163 killResults = killResults and\
Jon Hallca319892017-06-15 15:25:22 -0700164 ctrl.onosKill( ctrl.ipAddress )
165 ctrl.active = False
Jon Hall5cf14d52015-07-16 12:15:19 -0700166 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700167 onpass="ONOS nodes killed successfully",
168 onfail="ONOS nodes NOT successfully killed" )
169
Jon Halld2871c22016-07-26 11:01:14 -0700170 main.step( "Checking ONOS nodes" )
171 nodeResults = utilities.retry( main.HA.nodesCheck,
172 False,
Jon Hallca319892017-06-15 15:25:22 -0700173 args=[ main.Cluster.active() ],
Jon Halld2871c22016-07-26 11:01:14 -0700174 sleep=15,
175 attempts=5 )
176
177 utilities.assert_equals( expect=True, actual=nodeResults,
178 onpass="Nodes check successful",
179 onfail="Nodes check NOT successful" )
180
181 if not nodeResults:
Jon Hallca319892017-06-15 15:25:22 -0700182 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700183 main.log.debug( "{} components not ACTIVE: \n{}".format(
Jon Hallca319892017-06-15 15:25:22 -0700184 ctrl.name,
185 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700186 main.log.error( "Failed to start ONOS, stopping test" )
187 main.cleanup()
188 main.exit()
189
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700190 def CASE62( self, main ):
191 """
192 The bring up stopped nodes
193 """
Devin Lim58046fa2017-07-05 16:55:00 -0700194 main.HA.bringUpStoppedNode( main )
Jon Halla440e872016-03-31 15:15:50 -0700195
Jon Hall5cf14d52015-07-16 12:15:19 -0700196 def CASE7( self, main ):
197 """
198 Check state after ONOS failure
199 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700200 try:
201 main.kill
202 except AttributeError:
203 main.kill = []
204
Devin Lim58046fa2017-07-05 16:55:00 -0700205 main.HA.checkStateAfterONOS( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700206 main.step( "Leadership Election is still functional" )
207 # Test of LeadershipElection
208 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700209
Jon Hall3b489db2015-10-05 14:38:37 -0700210 restarted = []
Jon Hallca319892017-06-15 15:25:22 -0700211 for ctrl in main.kill:
212 restarted.append( ctrl.ipAddress )
Jon Hall5cf14d52015-07-16 12:15:19 -0700213 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700214
Jon Hallca319892017-06-15 15:25:22 -0700215 for ctrl in main.Cluster.active():
216 leaderN = ctrl.electionTestLeader()
Jon Hall5cf14d52015-07-16 12:15:19 -0700217 leaderList.append( leaderN )
218 if leaderN == main.FALSE:
219 # error in response
220 main.log.error( "Something is wrong with " +
221 "electionTestLeader function, check the" +
222 " error logs" )
223 leaderResult = main.FALSE
224 elif leaderN is None:
Jon Hallca319892017-06-15 15:25:22 -0700225 main.log.error( ctrl.name +
Jon Hall5cf14d52015-07-16 12:15:19 -0700226 " shows no leader for the election-app was" +
227 " elected after the old one died" )
228 leaderResult = main.FALSE
229 elif leaderN in restarted:
Jon Hallca319892017-06-15 15:25:22 -0700230 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall5cf14d52015-07-16 12:15:19 -0700231 " as leader for the election-app, but it " +
232 "was restarted" )
233 leaderResult = main.FALSE
234 if len( set( leaderList ) ) != 1:
235 leaderResult = main.FALSE
236 main.log.error(
237 "Inconsistent view of leader for the election test app" )
238 # TODO: print the list
239 utilities.assert_equals(
240 expect=main.TRUE,
241 actual=leaderResult,
242 onpass="Leadership election passed",
243 onfail="Something went wrong with Leadership election" )
244
245 def CASE8( self, main ):
246 """
247 Compare topo
248 """
Devin Lim58046fa2017-07-05 16:55:00 -0700249 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700250
Jon Hall5cf14d52015-07-16 12:15:19 -0700251 def CASE9( self, main ):
252 """
253 Link s3-s28 down
254 """
Devin Lim58046fa2017-07-05 16:55:00 -0700255 main.HA.linkDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700256
257 def CASE10( self, main ):
258 """
259 Link s3-s28 up
260 """
Devin Lim58046fa2017-07-05 16:55:00 -0700261 main.HA.linkUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700262
263 def CASE11( self, main ):
264 """
265 Switch Down
266 """
267 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700268 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700269
270 def CASE12( self, main ):
271 """
272 Switch Up
273 """
274 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700275 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700276
277 def CASE13( self, main ):
278 """
279 Clean up
280 """
Devin Lim58046fa2017-07-05 16:55:00 -0700281 main.HAlabels.append( "Restart" )
282 main.HAdata.append( str( main.restartTime ) )
283 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700284
285 def CASE14( self, main ):
286 """
287 start election app on all onos nodes
288 """
Devin Lim58046fa2017-07-05 16:55:00 -0700289 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700290
291 def CASE15( self, main ):
292 """
293 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700294 15.1 Run election on each node
295 15.2 Check that each node has the same leaders and candidates
296 15.3 Find current leader and withdraw
297 15.4 Check that a new node was elected leader
298 15.5 Check that that new leader was the candidate of old leader
299 15.6 Run for election on old leader
300 15.7 Check that oldLeader is a candidate, and leader if only 1 node
301 15.8 Make sure that the old leader was added to the candidate list
302
303 old and new variable prefixes refer to data from before vs after
304 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700305 """
Devin Lim58046fa2017-07-05 16:55:00 -0700306 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700307
308 def CASE16( self, main ):
309 """
310 Install Distributed Primitives app
311 """
Devin Lim58046fa2017-07-05 16:55:00 -0700312 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700313
314 def CASE17( self, main ):
315 """
316 Check for basic functionality with distributed primitives
317 """
Devin Lim58046fa2017-07-05 16:55:00 -0700318 main.HA.checkDistPrimitivesFunc( main )