blob: 4a952066c2f7c0e38e360ff8b31922aa4ffc9d56 [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07002Copyright 2015 Open Networking Foundation (ONF)
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21
22"""
Jon Hall5cf14d52015-07-16 12:15:19 -070023Description: This test is to determine if ONOS can handle
24 a minority of it's nodes restarting
25
26List of test cases:
27CASE1: Compile ONOS and push it to the test machines
28CASE2: Assign devices to controllers
29CASE21: Assign mastership to controllers
30CASE3: Assign intents
31CASE4: Ping across added host intents
32CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070033CASE61: The Failure inducing case.
34CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070035CASE7: Check state after control plane failure
36CASE8: Compare topo
37CASE9: Link s3-s28 down
38CASE10: Link s3-s28 up
39CASE11: Switch down
40CASE12: Switch up
41CASE13: Clean up
42CASE14: start election app on all onos nodes
43CASE15: Check that Leadership Election is still functional
44CASE16: Install Distributed Primitives app
45CASE17: Check for basic functionality with distributed primitives
46"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070047class HAkillNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070048
49 def __init__( self ):
50 self.default = ''
51
52 def CASE1( self, main ):
53 """
54 CASE1 is to compile ONOS and push it to the test machines
55
56 Startup sequence:
57 cell <name>
58 onos-verify-cell
59 NOTE: temporary - onos-remove-raft-logs
60 onos-uninstall
61 start mininet
62 git pull
63 mvn clean install
64 onos-package
65 onos-install -f
66 onos-wait-for-start
67 start cli sessions
68 start tcpdump
69 """
Jon Hall6e709752016-02-01 13:38:46 -080070 main.log.info( "ONOS HA test: Restart a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070071 "initialization" )
Jon Halla440e872016-03-31 15:15:50 -070072 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070073 main.HAlabels = []
74 main.HAdata = []
75 try:
76 from tests.dependencies.ONOSSetup import ONOSSetup
77 main.testSetUp = ONOSSetup()
78 except ImportError:
79 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070080 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070081 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070082 try:
Jon Hall53c5e662016-04-13 16:06:56 -070083 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070084 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070085 cellName = main.params[ 'ENV' ][ 'cellName' ]
86 main.apps = main.params[ 'ENV' ][ 'appString' ]
Devin Lim142b5342017-07-20 15:22:39 -070087 stepResult = main.testSetUp.envSetup()
Jon Halle1a3b752015-07-22 13:02:46 -070088 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070089 main.testSetUp.envSetupException( e )
90 main.testSetUp.evnSetupConclusion( stepResult )
91 main.HA.generateGraph( "HAkillNodes" )
Jon Hall5cf14d52015-07-16 12:15:19 -070092
Jon Hallca319892017-06-15 15:25:22 -070093 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
Jon Hall4f360bc2017-09-07 10:19:52 -070094 extraApply=[ main.HA.startingMininet,
95 main.HA.customizeOnosGenPartitions,
96 main.HA.copyBackupConfig,
97 main.ONOSbench.preventAutoRespawn ],
98 extraClean= main.HA.cleanUpGenPartition )
Jon Hall5cf14d52015-07-16 12:15:19 -070099
Jon Hall4f360bc2017-09-07 10:19:52 -0700100 main.HA.initialSetUp( serviceClean=True )
Jon Hall9d2dcad2016-04-08 10:15:20 -0700101
Jon Hall5cf14d52015-07-16 12:15:19 -0700102 def CASE2( self, main ):
103 """
104 Assign devices to controllers
105 """
Devin Lim58046fa2017-07-05 16:55:00 -0700106 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700107
108 def CASE21( self, main ):
109 """
110 Assign mastership to controllers
111 """
Devin Lim58046fa2017-07-05 16:55:00 -0700112 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700113
114 def CASE3( self, main ):
115 """
116 Assign intents
117 """
Devin Lim58046fa2017-07-05 16:55:00 -0700118 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700119
120 def CASE4( self, main ):
121 """
122 Ping across added host intents
123 """
Jon Hallca319892017-06-15 15:25:22 -0700124 main.HA.pingAcrossHostIntent( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700125
126 def CASE5( self, main ):
127 """
128 Reading state of ONOS
129 """
Devin Lim58046fa2017-07-05 16:55:00 -0700130 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700131
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700132 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700133 """
134 The Failure case.
135 """
Jon Hall5cf14d52015-07-16 12:15:19 -0700136 assert main, "main not defined"
137 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700138 main.case( "Kill minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700139
140 main.step( "Checking ONOS Logs for errors" )
Jon Hallca319892017-06-15 15:25:22 -0700141 for ctrl in main.Cluster.active():
142 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
143 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
Jon Hall96091e62015-09-21 17:34:17 -0700144
Devin Lim142b5342017-07-20 15:22:39 -0700145 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700146 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
Devin Lim142b5342017-07-20 15:22:39 -0700147 main.kill = [ main.Cluster.runningNodes[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700148 if n > 3:
Devin Lim142b5342017-07-20 15:22:39 -0700149 main.kill.append( main.Cluster.runningNodes[ p - 1 ] )
Jon Hall3b489db2015-10-05 14:38:37 -0700150 # NOTE: This only works for cluster sizes of 3,5, or 7.
151
Jon Hall4f360bc2017-09-07 10:19:52 -0700152 #NOTE: This is to fix an issue with wiki formating
153 nodeNames = [ node.name for node in main.kill ]
154 main.step( "Killing nodes: " + str( nodeNames ) )
Jon Hall3b489db2015-10-05 14:38:37 -0700155 killResults = main.TRUE
Jon Hallca319892017-06-15 15:25:22 -0700156 for ctrl in main.kill:
Jon Hall3b489db2015-10-05 14:38:37 -0700157 killResults = killResults and\
Jon Hallca319892017-06-15 15:25:22 -0700158 ctrl.onosKill( ctrl.ipAddress )
159 ctrl.active = False
Jon Hall5cf14d52015-07-16 12:15:19 -0700160 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700161 onpass="ONOS nodes killed successfully",
162 onfail="ONOS nodes NOT successfully killed" )
163
Jon Halld2871c22016-07-26 11:01:14 -0700164 main.step( "Checking ONOS nodes" )
165 nodeResults = utilities.retry( main.HA.nodesCheck,
166 False,
Jon Hallca319892017-06-15 15:25:22 -0700167 args=[ main.Cluster.active() ],
Jon Halld2871c22016-07-26 11:01:14 -0700168 sleep=15,
169 attempts=5 )
170
171 utilities.assert_equals( expect=True, actual=nodeResults,
172 onpass="Nodes check successful",
173 onfail="Nodes check NOT successful" )
174
175 if not nodeResults:
Jon Hallca319892017-06-15 15:25:22 -0700176 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700177 main.log.debug( "{} components not ACTIVE: \n{}".format(
Jon Hallca319892017-06-15 15:25:22 -0700178 ctrl.name,
179 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700180 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700181 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700182
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700183 def CASE62( self, main ):
184 """
185 The bring up stopped nodes
186 """
Devin Lim58046fa2017-07-05 16:55:00 -0700187 main.HA.bringUpStoppedNode( main )
Jon Halla440e872016-03-31 15:15:50 -0700188
Jon Hall5cf14d52015-07-16 12:15:19 -0700189 def CASE7( self, main ):
190 """
191 Check state after ONOS failure
192 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700193 try:
194 main.kill
195 except AttributeError:
196 main.kill = []
197
Devin Lim142b5342017-07-20 15:22:39 -0700198 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700199 main.step( "Leadership Election is still functional" )
200 # Test of LeadershipElection
201 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700202
Jon Hall3b489db2015-10-05 14:38:37 -0700203 restarted = []
Jon Hallca319892017-06-15 15:25:22 -0700204 for ctrl in main.kill:
205 restarted.append( ctrl.ipAddress )
Jon Hall5cf14d52015-07-16 12:15:19 -0700206 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700207
Jon Hallca319892017-06-15 15:25:22 -0700208 for ctrl in main.Cluster.active():
209 leaderN = ctrl.electionTestLeader()
Jon Hall5cf14d52015-07-16 12:15:19 -0700210 leaderList.append( leaderN )
211 if leaderN == main.FALSE:
212 # error in response
213 main.log.error( "Something is wrong with " +
214 "electionTestLeader function, check the" +
215 " error logs" )
216 leaderResult = main.FALSE
217 elif leaderN is None:
Jon Hallca319892017-06-15 15:25:22 -0700218 main.log.error( ctrl.name +
Jon Hall5cf14d52015-07-16 12:15:19 -0700219 " shows no leader for the election-app was" +
220 " elected after the old one died" )
221 leaderResult = main.FALSE
222 elif leaderN in restarted:
Jon Hallca319892017-06-15 15:25:22 -0700223 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall5cf14d52015-07-16 12:15:19 -0700224 " as leader for the election-app, but it " +
225 "was restarted" )
226 leaderResult = main.FALSE
227 if len( set( leaderList ) ) != 1:
228 leaderResult = main.FALSE
229 main.log.error(
230 "Inconsistent view of leader for the election test app" )
231 # TODO: print the list
232 utilities.assert_equals(
233 expect=main.TRUE,
234 actual=leaderResult,
235 onpass="Leadership election passed",
236 onfail="Something went wrong with Leadership election" )
237
238 def CASE8( self, main ):
239 """
240 Compare topo
241 """
Devin Lim58046fa2017-07-05 16:55:00 -0700242 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700243
Jon Hall5cf14d52015-07-16 12:15:19 -0700244 def CASE9( self, main ):
245 """
246 Link s3-s28 down
247 """
Devin Lim58046fa2017-07-05 16:55:00 -0700248 main.HA.linkDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700249
250 def CASE10( self, main ):
251 """
252 Link s3-s28 up
253 """
Devin Lim58046fa2017-07-05 16:55:00 -0700254 main.HA.linkUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700255
256 def CASE11( self, main ):
257 """
258 Switch Down
259 """
260 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700261 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700262
263 def CASE12( self, main ):
264 """
265 Switch Up
266 """
267 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700268 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700269
270 def CASE13( self, main ):
271 """
272 Clean up
273 """
Devin Lim58046fa2017-07-05 16:55:00 -0700274 main.HAlabels.append( "Restart" )
275 main.HAdata.append( str( main.restartTime ) )
276 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700277
278 def CASE14( self, main ):
279 """
280 start election app on all onos nodes
281 """
Devin Lim58046fa2017-07-05 16:55:00 -0700282 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700283
284 def CASE15( self, main ):
285 """
286 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700287 15.1 Run election on each node
288 15.2 Check that each node has the same leaders and candidates
289 15.3 Find current leader and withdraw
290 15.4 Check that a new node was elected leader
291 15.5 Check that that new leader was the candidate of old leader
292 15.6 Run for election on old leader
293 15.7 Check that oldLeader is a candidate, and leader if only 1 node
294 15.8 Make sure that the old leader was added to the candidate list
295
296 old and new variable prefixes refer to data from before vs after
297 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700298 """
Devin Lim58046fa2017-07-05 16:55:00 -0700299 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700300
301 def CASE16( self, main ):
302 """
303 Install Distributed Primitives app
304 """
Devin Lim58046fa2017-07-05 16:55:00 -0700305 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700306
307 def CASE17( self, main ):
308 """
309 Check for basic functionality with distributed primitives
310 """
Devin Lim58046fa2017-07-05 16:55:00 -0700311 main.HA.checkDistPrimitivesFunc( main )