blob: 026be2d4ed91b186a02df96dde1a0ddd3ddda7c1 [file] [log] [blame]
Jon Hallb5488012017-06-21 14:08:36 -07001"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07002Copyright 2017 Open Networking Foundation (ONF)
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21
22"""
Jon Hallb5488012017-06-21 14:08:36 -070023Description: This test is to determine if ONOS can handle
24 a minority of it's nodes restarting
25
26List of test cases:
27CASE1: Compile ONOS and push it to the test machines
28CASE2: Assign devices to controllers
29CASE21: Assign mastership to controllers
30CASE3: Assign intents
31CASE4: Ping across added host intents
32CASE5: Reading state of ONOS
33CASE61: The Failure inducing case.
34CASE62: The Failure recovery case.
35CASE7: Check state after control plane failure
36CASE8: Compare topo
37CASE9: Link s3-s28 down
38CASE10: Link s3-s28 up
39CASE11: Switch down
40CASE12: Switch up
41CASE13: Clean up
42CASE14: start election app on all onos nodes
43CASE15: Check that Leadership Election is still functional
44CASE16: Install Distributed Primitives app
45CASE17: Check for basic functionality with distributed primitives
46"""
47class HAcontinuousStopNodes:
48
49 def __init__( self ):
50 self.default = ''
51
52 def CASE1( self, main ):
53 """
54 CASE1 is to compile ONOS and push it to the test machines
55
56 Startup sequence:
57 cell <name>
58 onos-verify-cell
59 NOTE: temporary - onos-remove-raft-logs
60 onos-uninstall
61 start mininet
62 git pull
63 mvn clean install
64 onos-package
65 onos-install -f
66 onos-wait-for-start
67 start cli sessions
68 start tcpdump
69 """
70 import imp
71 import pexpect
72 import time
73 import json
74 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
75 "initialization" )
Jon Hallb5488012017-06-21 14:08:36 -070076 # set global variables
Jon Hallb5488012017-06-21 14:08:36 -070077 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070078 main.HAlabels = []
79 main.HAdata = []
80 try:
81 from tests.dependencies.ONOSSetup import ONOSSetup
82 main.testSetUp = ONOSSetup()
83 except ImportError:
84 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070085 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070086 main.testSetUp.envSetupDescription()
Jon Hallb5488012017-06-21 14:08:36 -070087 try:
88 from tests.HA.dependencies.HA import HA
89 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070090 # load some variables from the params file
91 cellName = main.params[ 'ENV' ][ 'cellName' ]
92 main.apps = main.params[ 'ENV' ][ 'appString' ]
93 main.numCtrls = int( main.params[ 'num_controllers' ] )
Devin Lim142b5342017-07-20 15:22:39 -070094 stepResult = main.testSetUp.envSetup()
Jon Hallb5488012017-06-21 14:08:36 -070095 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070096 main.testSetUp.envSetupException( e )
97 main.testSetUp.evnSetupConclusion( stepResult )
98 main.HA.generateGraph( "HAcontinuousStopNodes" )
Jon Hallb5488012017-06-21 14:08:36 -070099
Devin Lim142b5342017-07-20 15:22:39 -0700100 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
101 extraApply=[ main.HA.startingMininet,
102 main.HA.customizeOnosGenPartitions ],
103 extraClean=main.HA.cleanUpGenPartition )
Devin Lim58046fa2017-07-05 16:55:00 -0700104 main.HA.initialSetUp()
Jon Hallb5488012017-06-21 14:08:36 -0700105
Jon Hallb5488012017-06-21 14:08:36 -0700106
107 def CASE2( self, main ):
108 """
109 Assign devices to controllers
110 """
Devin Lim58046fa2017-07-05 16:55:00 -0700111 main.HA.assignDevices( main )
Jon Hallb5488012017-06-21 14:08:36 -0700112
113 def CASE21( self, main ):
114 """
115 Assign mastership to controllers
116 """
Devin Lim58046fa2017-07-05 16:55:00 -0700117 main.HA.assignMastership( main )
Jon Hallb5488012017-06-21 14:08:36 -0700118
119 def CASE3( self, main ):
120 """
121 Assign intents
122 """
Devin Lim58046fa2017-07-05 16:55:00 -0700123 main.HA.assignIntents( main )
Jon Hallb5488012017-06-21 14:08:36 -0700124
125 def CASE4( self, main ):
126 """
127 Ping across added host intents
128 """
Jon Hallca319892017-06-15 15:25:22 -0700129 main.HA.pingAcrossHostIntent( main )
Jon Hallb5488012017-06-21 14:08:36 -0700130
131 def CASE5( self, main ):
132 """
133 Reading state of ONOS
134 """
Devin Lim58046fa2017-07-05 16:55:00 -0700135 main.HA.readingState( main )
Jon Hallb5488012017-06-21 14:08:36 -0700136
137 def CASE61( self, main ):
138 """
139 The Failure case.
140 """
Jon Hallb5488012017-06-21 14:08:36 -0700141 assert main, "main not defined"
142 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb5488012017-06-21 14:08:36 -0700143 try:
144 assert main.nodeIndex is not None, "main.nodeIndex not defined"
145 assert main.killCount is not None, "main.killCount not defined"
146 except AttributeError as e:
147 main.log.warn( "Node to kill not selected, defaulting to node 1" )
148 main.nodeIndex = 0
149 main.killCount = 1
150
151 main.case( "Stopping ONOS nodes - iteration " + str( main.killCount ) )
152
153 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700154 for ctrl in main.Cluster.runningNodes:
155 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
156 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hallb5488012017-06-21 14:08:36 -0700157
158 # NOTE: For now only kill one node. If we move to killing more, we need to
159 # make sure we don't lose any partitions
Devin Lim142b5342017-07-20 15:22:39 -0700160 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hallb5488012017-06-21 14:08:36 -0700161 main.nodeIndex = ( main.nodeIndex + 1 ) % n
Devin Lim142b5342017-07-20 15:22:39 -0700162 main.kill = [ main.Cluster.runningNodes[ main.nodeIndex ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hallb5488012017-06-21 14:08:36 -0700163
164 # TODO: Be able to configure bringing up old node vs. a new/fresh installation
165 main.step( "Stopping " + str( len( main.kill ) ) + " ONOS nodes" )
166 killResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700167 for ctrl in main.kill:
Jon Hallb5488012017-06-21 14:08:36 -0700168 killResults = killResults and\
Devin Lim142b5342017-07-20 15:22:39 -0700169 main.ONOSbench.onosStop( ctrl.ipAddress )
170 ctrl.active = False
Jon Hallb5488012017-06-21 14:08:36 -0700171 utilities.assert_equals( expect=main.TRUE, actual=killResults,
172 onpass="ONOS nodes stopped successfully",
173 onfail="ONOS nodes NOT successfully stopped" )
174
175 main.step( "Checking ONOS nodes" )
176 nodeResults = utilities.retry( main.HA.nodesCheck,
177 False,
Devin Lim142b5342017-07-20 15:22:39 -0700178 args=[ main.Cluster.active() ],
Jon Hallb5488012017-06-21 14:08:36 -0700179 sleep=15,
180 attempts=5 )
181
182 utilities.assert_equals( expect=True, actual=nodeResults,
183 onpass="Nodes check successful",
184 onfail="Nodes check NOT successful" )
185
186 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700187 for ctrl in main.Cluster.active():
Jon Hallb5488012017-06-21 14:08:36 -0700188 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700189 ctrl.name,
190 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Hallb5488012017-06-21 14:08:36 -0700191 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700192 main.cleanAndExit()
Jon Hallb5488012017-06-21 14:08:36 -0700193
194 main.killCount += 1
195
196 def CASE62( self, main ):
197 """
198 The bring up stopped nodes
199 """
Devin Lim58046fa2017-07-05 16:55:00 -0700200 main.HA.bringUpStoppedNode( main )
Jon Hallb5488012017-06-21 14:08:36 -0700201
202 def CASE7( self, main ):
203 """
204 Check state after ONOS failure
205 """
Jon Hallb5488012017-06-21 14:08:36 -0700206 try:
207 main.kill
208 except AttributeError:
209 main.kill = []
210
Devin Lim142b5342017-07-20 15:22:39 -0700211 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hallb5488012017-06-21 14:08:36 -0700212
Jon Hallb5488012017-06-21 14:08:36 -0700213 main.step( "Leadership Election is still functional" )
214 # Test of LeadershipElection
215 leaderList = []
216
217 restarted = []
Devin Lim142b5342017-07-20 15:22:39 -0700218 for ctrl in main.kill:
219 restarted.append( ctrl.ipAddress )
Jon Hallb5488012017-06-21 14:08:36 -0700220 leaderResult = main.TRUE
221
Devin Lim142b5342017-07-20 15:22:39 -0700222 for ctrl in main.Cluster.active():
223 leaderN = ctrl.CLI.electionTestLeader()
Jon Hallb5488012017-06-21 14:08:36 -0700224 leaderList.append( leaderN )
225 if leaderN == main.FALSE:
226 # error in response
227 main.log.error( "Something is wrong with " +
228 "electionTestLeader function, check the" +
229 " error logs" )
230 leaderResult = main.FALSE
231 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700232 main.log.error( ctrl.name +
Jon Hallb5488012017-06-21 14:08:36 -0700233 " shows no leader for the election-app was" +
234 " elected after the old one died" )
235 leaderResult = main.FALSE
236 elif leaderN in restarted:
Devin Lim142b5342017-07-20 15:22:39 -0700237 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hallb5488012017-06-21 14:08:36 -0700238 " as leader for the election-app, but it " +
239 "was restarted" )
240 leaderResult = main.FALSE
241 if len( set( leaderList ) ) != 1:
242 leaderResult = main.FALSE
243 main.log.error(
244 "Inconsistent view of leader for the election test app" )
245 # TODO: print the list
246 utilities.assert_equals(
247 expect=main.TRUE,
248 actual=leaderResult,
249 onpass="Leadership election passed",
250 onfail="Something went wrong with Leadership election" )
251
252 def CASE8( self, main ):
253 """
254 Compare topo
255 """
Devin Lim58046fa2017-07-05 16:55:00 -0700256 main.HA.compareTopo( main )
Jon Hallb5488012017-06-21 14:08:36 -0700257
Jon Hallb5488012017-06-21 14:08:36 -0700258
Jon Hallb5488012017-06-21 14:08:36 -0700259
260 def CASE9( self, main ):
261 """
262 Link s3-s28 down
263 """
Devin Lim58046fa2017-07-05 16:55:00 -0700264 main.HA.linkDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700265
266 def CASE10( self, main ):
267 """
268 Link s3-s28 up
269 """
Devin Lim58046fa2017-07-05 16:55:00 -0700270 main.HA.linkUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700271
272 def CASE11( self, main ):
273 """
274 Switch Down
275 """
276 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700277 main.HA.switchDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700278
279 def CASE12( self, main ):
280 """
281 Switch Up
282 """
283 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700284 main.HA.switchUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700285
286 def CASE13( self, main ):
287 """
288 Clean up
289 """
Devin Lim58046fa2017-07-05 16:55:00 -0700290 main.HAlabels.append( "Restart" )
291 main.HAdata.append( str( main.restartTime ) )
292 main.HA.cleanUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700293
294 def CASE14( self, main ):
295 """
296 start election app on all onos nodes
297 """
Devin Lim58046fa2017-07-05 16:55:00 -0700298 main.HA.startElectionApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700299
300 def CASE15( self, main ):
301 """
302 Check that Leadership Election is still functional
303 15.1 Run election on each node
304 15.2 Check that each node has the same leaders and candidates
305 15.3 Find current leader and withdraw
306 15.4 Check that a new node was elected leader
307 15.5 Check that that new leader was the candidate of old leader
308 15.6 Run for election on old leader
309 15.7 Check that oldLeader is a candidate, and leader if only 1 node
310 15.8 Make sure that the old leader was added to the candidate list
311
312 old and new variable prefixes refer to data from before vs after
313 withdrawl and later before withdrawl vs after re-election
314 """
Devin Lim58046fa2017-07-05 16:55:00 -0700315 main.HA.isElectionFunctional( main )
Jon Hallb5488012017-06-21 14:08:36 -0700316
317 def CASE16( self, main ):
318 """
319 Install Distributed Primitives app
320 """
Devin Lim58046fa2017-07-05 16:55:00 -0700321 main.HA.installDistributedPrimitiveApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700322
323 def CASE17( self, main ):
324 """
325 Check for basic functionality with distributed primitives
326 """
Devin Lim58046fa2017-07-05 16:55:00 -0700327 main.HA.checkDistPrimitivesFunc( main )