blob: 843878022f25a23962a3f829b7d209a06af994e4 [file] [log] [blame]
Jon Hallb5488012017-06-21 14:08:36 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2017 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hallb5488012017-06-21 14:08:36 -070022Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
34CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
46class HAcontinuousStopNodes:
47
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hallb5488012017-06-21 14:08:36 -070069 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
70 "initialization" )
Jon Hallb5488012017-06-21 14:08:36 -070071 # set global variables
Jon Hallb5488012017-06-21 14:08:36 -070072 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070073 main.HAlabels = []
74 main.HAdata = []
75 try:
76 from tests.dependencies.ONOSSetup import ONOSSetup
77 main.testSetUp = ONOSSetup()
78 except ImportError:
79 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070080 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070081 main.testSetUp.envSetupDescription()
Jon Hallb5488012017-06-21 14:08:36 -070082 try:
83 from tests.HA.dependencies.HA import HA
84 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070085 # load some variables from the params file
86 cellName = main.params[ 'ENV' ][ 'cellName' ]
87 main.apps = main.params[ 'ENV' ][ 'appString' ]
88 main.numCtrls = int( main.params[ 'num_controllers' ] )
Devin Lim142b5342017-07-20 15:22:39 -070089 stepResult = main.testSetUp.envSetup()
Jon Hallb5488012017-06-21 14:08:36 -070090 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070091 main.testSetUp.envSetupException( e )
92 main.testSetUp.evnSetupConclusion( stepResult )
Jon Hallb5488012017-06-21 14:08:36 -070093
You Wanga0f6ff62018-01-11 15:46:30 -080094 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName, removeLog=True,
Devin Lim142b5342017-07-20 15:22:39 -070095 extraApply=[ main.HA.startingMininet,
96 main.HA.customizeOnosGenPartitions ],
97 extraClean=main.HA.cleanUpGenPartition )
Devin Lim58046fa2017-07-05 16:55:00 -070098 main.HA.initialSetUp()
Jon Hallb5488012017-06-21 14:08:36 -070099
Jon Hallb5488012017-06-21 14:08:36 -0700100 def CASE2( self, main ):
101 """
102 Assign devices to controllers
103 """
Devin Lim58046fa2017-07-05 16:55:00 -0700104 main.HA.assignDevices( main )
Jon Hallb5488012017-06-21 14:08:36 -0700105
106 def CASE21( self, main ):
107 """
108 Assign mastership to controllers
109 """
Devin Lim58046fa2017-07-05 16:55:00 -0700110 main.HA.assignMastership( main )
Jon Hallb5488012017-06-21 14:08:36 -0700111
112 def CASE3( self, main ):
113 """
114 Assign intents
115 """
Devin Lim58046fa2017-07-05 16:55:00 -0700116 main.HA.assignIntents( main )
Jon Hallb5488012017-06-21 14:08:36 -0700117
118 def CASE4( self, main ):
119 """
120 Ping across added host intents
121 """
Jon Hallca319892017-06-15 15:25:22 -0700122 main.HA.pingAcrossHostIntent( main )
Jon Hallb5488012017-06-21 14:08:36 -0700123
124 def CASE5( self, main ):
125 """
126 Reading state of ONOS
127 """
Devin Lim58046fa2017-07-05 16:55:00 -0700128 main.HA.readingState( main )
Jon Hallb5488012017-06-21 14:08:36 -0700129
130 def CASE61( self, main ):
131 """
132 The Failure case.
133 """
Jon Hallb5488012017-06-21 14:08:36 -0700134 assert main, "main not defined"
135 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb5488012017-06-21 14:08:36 -0700136 try:
137 assert main.nodeIndex is not None, "main.nodeIndex not defined"
138 assert main.killCount is not None, "main.killCount not defined"
Jon Halla478b852017-12-04 15:00:15 -0800139 except AttributeError:
Jon Hallb5488012017-06-21 14:08:36 -0700140 main.log.warn( "Node to kill not selected, defaulting to node 1" )
Jon Hall5d5876e2017-11-30 09:33:16 -0800141 main.nodeIndex = -1
Jon Hallb5488012017-06-21 14:08:36 -0700142 main.killCount = 1
143
144 main.case( "Stopping ONOS nodes - iteration " + str( main.killCount ) )
145
146 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700147 for ctrl in main.Cluster.runningNodes:
148 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
149 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hallb5488012017-06-21 14:08:36 -0700150
151 # NOTE: For now only kill one node. If we move to killing more, we need to
152 # make sure we don't lose any partitions
Devin Lim142b5342017-07-20 15:22:39 -0700153 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hallb5488012017-06-21 14:08:36 -0700154 main.nodeIndex = ( main.nodeIndex + 1 ) % n
Devin Lim142b5342017-07-20 15:22:39 -0700155 main.kill = [ main.Cluster.runningNodes[ main.nodeIndex ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hallb5488012017-06-21 14:08:36 -0700156
157 # TODO: Be able to configure bringing up old node vs. a new/fresh installation
158 main.step( "Stopping " + str( len( main.kill ) ) + " ONOS nodes" )
159 killResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700160 for ctrl in main.kill:
Jon Hallb5488012017-06-21 14:08:36 -0700161 killResults = killResults and\
Devin Lim142b5342017-07-20 15:22:39 -0700162 main.ONOSbench.onosStop( ctrl.ipAddress )
163 ctrl.active = False
Jon Hallb5488012017-06-21 14:08:36 -0700164 utilities.assert_equals( expect=main.TRUE, actual=killResults,
165 onpass="ONOS nodes stopped successfully",
166 onfail="ONOS nodes NOT successfully stopped" )
Jon Hall5d5876e2017-11-30 09:33:16 -0800167 main.Cluster.reset()
Jon Hallb5488012017-06-21 14:08:36 -0700168
169 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800170 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Hallb5488012017-06-21 14:08:36 -0700171 False,
Jon Hallb5488012017-06-21 14:08:36 -0700172 sleep=15,
173 attempts=5 )
174
175 utilities.assert_equals( expect=True, actual=nodeResults,
176 onpass="Nodes check successful",
177 onfail="Nodes check NOT successful" )
178
179 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700180 for ctrl in main.Cluster.active():
Jon Hallb5488012017-06-21 14:08:36 -0700181 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700182 ctrl.name,
183 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Hallb5488012017-06-21 14:08:36 -0700184 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700185 main.cleanAndExit()
Jon Hallb5488012017-06-21 14:08:36 -0700186
187 main.killCount += 1
188
189 def CASE62( self, main ):
190 """
191 The bring up stopped nodes
192 """
Jon Hall5d5876e2017-11-30 09:33:16 -0800193 main.HA.bringUpStoppedNodes( main )
Jon Hallb5488012017-06-21 14:08:36 -0700194
195 def CASE7( self, main ):
196 """
197 Check state after ONOS failure
198 """
Jon Hallb5488012017-06-21 14:08:36 -0700199 try:
200 main.kill
201 except AttributeError:
202 main.kill = []
203
Devin Lim142b5342017-07-20 15:22:39 -0700204 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hallb5488012017-06-21 14:08:36 -0700205
Jon Hallb5488012017-06-21 14:08:36 -0700206 main.step( "Leadership Election is still functional" )
207 # Test of LeadershipElection
208 leaderList = []
209
Jon Hallb5488012017-06-21 14:08:36 -0700210 leaderResult = main.TRUE
211
Devin Lim142b5342017-07-20 15:22:39 -0700212 for ctrl in main.Cluster.active():
213 leaderN = ctrl.CLI.electionTestLeader()
Jon Hallb5488012017-06-21 14:08:36 -0700214 leaderList.append( leaderN )
215 if leaderN == main.FALSE:
216 # error in response
217 main.log.error( "Something is wrong with " +
218 "electionTestLeader function, check the" +
219 " error logs" )
220 leaderResult = main.FALSE
221 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700222 main.log.error( ctrl.name +
Jon Hallb5488012017-06-21 14:08:36 -0700223 " shows no leader for the election-app was" +
224 " elected after the old one died" )
225 leaderResult = main.FALSE
Jon Hallb5488012017-06-21 14:08:36 -0700226 if len( set( leaderList ) ) != 1:
227 leaderResult = main.FALSE
228 main.log.error(
229 "Inconsistent view of leader for the election test app" )
230 # TODO: print the list
231 utilities.assert_equals(
232 expect=main.TRUE,
233 actual=leaderResult,
234 onpass="Leadership election passed",
235 onfail="Something went wrong with Leadership election" )
236
237 def CASE8( self, main ):
238 """
239 Compare topo
240 """
Devin Lim58046fa2017-07-05 16:55:00 -0700241 main.HA.compareTopo( main )
Jon Hallb5488012017-06-21 14:08:36 -0700242
Jon Hallb5488012017-06-21 14:08:36 -0700243 def CASE9( self, main ):
244 """
245 Link s3-s28 down
246 """
Devin Lim58046fa2017-07-05 16:55:00 -0700247 main.HA.linkDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700248
249 def CASE10( self, main ):
250 """
251 Link s3-s28 up
252 """
Devin Lim58046fa2017-07-05 16:55:00 -0700253 main.HA.linkUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700254
255 def CASE11( self, main ):
256 """
257 Switch Down
258 """
259 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700260 main.HA.switchDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700261
262 def CASE12( self, main ):
263 """
264 Switch Up
265 """
266 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700267 main.HA.switchUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700268
269 def CASE13( self, main ):
270 """
271 Clean up
272 """
Devin Lim58046fa2017-07-05 16:55:00 -0700273 main.HAlabels.append( "Restart" )
274 main.HAdata.append( str( main.restartTime ) )
275 main.HA.cleanUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700276
277 def CASE14( self, main ):
278 """
279 start election app on all onos nodes
280 """
Devin Lim58046fa2017-07-05 16:55:00 -0700281 main.HA.startElectionApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700282
283 def CASE15( self, main ):
284 """
285 Check that Leadership Election is still functional
286 15.1 Run election on each node
287 15.2 Check that each node has the same leaders and candidates
288 15.3 Find current leader and withdraw
289 15.4 Check that a new node was elected leader
290 15.5 Check that that new leader was the candidate of old leader
291 15.6 Run for election on old leader
292 15.7 Check that oldLeader is a candidate, and leader if only 1 node
293 15.8 Make sure that the old leader was added to the candidate list
294
295 old and new variable prefixes refer to data from before vs after
296 withdrawl and later before withdrawl vs after re-election
297 """
Devin Lim58046fa2017-07-05 16:55:00 -0700298 main.HA.isElectionFunctional( main )
Jon Hallb5488012017-06-21 14:08:36 -0700299
300 def CASE16( self, main ):
301 """
302 Install Distributed Primitives app
303 """
Devin Lim58046fa2017-07-05 16:55:00 -0700304 main.HA.installDistributedPrimitiveApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700305
306 def CASE17( self, main ):
307 """
308 Check for basic functionality with distributed primitives
309 """
Devin Lim58046fa2017-07-05 16:55:00 -0700310 main.HA.checkDistPrimitivesFunc( main )