blob: 1f9f32df3ded4cd23e2cf3f78f9dbf7c86d8e2f2 [file] [log] [blame]
Jon Hallb5488012017-06-21 14:08:36 -07001"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07002Copyright 2017 Open Networking Foundation (ONF)
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21
22"""
Jon Hallb5488012017-06-21 14:08:36 -070023Description: This test is to determine if ONOS can handle
24 a minority of it's nodes restarting
25
26List of test cases:
27CASE1: Compile ONOS and push it to the test machines
28CASE2: Assign devices to controllers
29CASE21: Assign mastership to controllers
30CASE3: Assign intents
31CASE4: Ping across added host intents
32CASE5: Reading state of ONOS
33CASE61: The Failure inducing case.
34CASE62: The Failure recovery case.
35CASE7: Check state after control plane failure
36CASE8: Compare topo
37CASE9: Link s3-s28 down
38CASE10: Link s3-s28 up
39CASE11: Switch down
40CASE12: Switch up
41CASE13: Clean up
42CASE14: start election app on all onos nodes
43CASE15: Check that Leadership Election is still functional
44CASE16: Install Distributed Primitives app
45CASE17: Check for basic functionality with distributed primitives
46"""
47class HAcontinuousStopNodes:
48
49 def __init__( self ):
50 self.default = ''
51
52 def CASE1( self, main ):
53 """
54 CASE1 is to compile ONOS and push it to the test machines
55
56 Startup sequence:
57 cell <name>
58 onos-verify-cell
59 NOTE: temporary - onos-remove-raft-logs
60 onos-uninstall
61 start mininet
62 git pull
63 mvn clean install
64 onos-package
65 onos-install -f
66 onos-wait-for-start
67 start cli sessions
68 start tcpdump
69 """
70 import imp
71 import pexpect
72 import time
73 import json
74 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
75 "initialization" )
Jon Hallb5488012017-06-21 14:08:36 -070076 # set global variables
Jon Hallb5488012017-06-21 14:08:36 -070077 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070078 main.HAlabels = []
79 main.HAdata = []
80 try:
81 from tests.dependencies.ONOSSetup import ONOSSetup
82 main.testSetUp = ONOSSetup()
83 except ImportError:
84 main.log.error( "ONOSSetup not found. exiting the test" )
85 main.exit()
86 main.testSetUp.envSetupDescription()
Jon Hallb5488012017-06-21 14:08:36 -070087 try:
88 from tests.HA.dependencies.HA import HA
89 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070090 # load some variables from the params file
91 cellName = main.params[ 'ENV' ][ 'cellName' ]
92 main.apps = main.params[ 'ENV' ][ 'appString' ]
93 main.numCtrls = int( main.params[ 'num_controllers' ] )
Devin Lim142b5342017-07-20 15:22:39 -070094 stepResult = main.testSetUp.envSetup()
Jon Hallb5488012017-06-21 14:08:36 -070095 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070096 main.testSetUp.envSetupException( e )
97 main.testSetUp.evnSetupConclusion( stepResult )
98 main.HA.generateGraph( "HAcontinuousStopNodes" )
Jon Hallb5488012017-06-21 14:08:36 -070099
Devin Lim142b5342017-07-20 15:22:39 -0700100 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
101 extraApply=[ main.HA.startingMininet,
102 main.HA.customizeOnosGenPartitions ],
103 extraClean=main.HA.cleanUpGenPartition )
Devin Lim58046fa2017-07-05 16:55:00 -0700104 main.HA.initialSetUp()
Jon Hallb5488012017-06-21 14:08:36 -0700105
Jon Hallb5488012017-06-21 14:08:36 -0700106
107 def CASE2( self, main ):
108 """
109 Assign devices to controllers
110 """
Devin Lim58046fa2017-07-05 16:55:00 -0700111 main.HA.assignDevices( main )
Jon Hallb5488012017-06-21 14:08:36 -0700112
113 def CASE21( self, main ):
114 """
115 Assign mastership to controllers
116 """
Devin Lim58046fa2017-07-05 16:55:00 -0700117 main.HA.assignMastership( main )
Jon Hallb5488012017-06-21 14:08:36 -0700118
119 def CASE3( self, main ):
120 """
121 Assign intents
122 """
Devin Lim58046fa2017-07-05 16:55:00 -0700123 main.HA.assignIntents( main )
Jon Hallb5488012017-06-21 14:08:36 -0700124
125 def CASE4( self, main ):
126 """
127 Ping across added host intents
128 """
Jon Hallca319892017-06-15 15:25:22 -0700129 main.HA.pingAcrossHostIntent( main )
Jon Hallb5488012017-06-21 14:08:36 -0700130
131 def CASE5( self, main ):
132 """
133 Reading state of ONOS
134 """
Devin Lim58046fa2017-07-05 16:55:00 -0700135 main.HA.readingState( main )
Jon Hallb5488012017-06-21 14:08:36 -0700136
137 def CASE61( self, main ):
138 """
139 The Failure case.
140 """
Jon Hallb5488012017-06-21 14:08:36 -0700141 assert main, "main not defined"
142 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb5488012017-06-21 14:08:36 -0700143 try:
144 assert main.nodeIndex is not None, "main.nodeIndex not defined"
145 assert main.killCount is not None, "main.killCount not defined"
146 except AttributeError as e:
147 main.log.warn( "Node to kill not selected, defaulting to node 1" )
148 main.nodeIndex = 0
149 main.killCount = 1
150
151 main.case( "Stopping ONOS nodes - iteration " + str( main.killCount ) )
152
153 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700154 for ctrl in main.Cluster.runningNodes:
155 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
156 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hallb5488012017-06-21 14:08:36 -0700157
158 # NOTE: For now only kill one node. If we move to killing more, we need to
159 # make sure we don't lose any partitions
Devin Lim142b5342017-07-20 15:22:39 -0700160 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hallb5488012017-06-21 14:08:36 -0700161 main.nodeIndex = ( main.nodeIndex + 1 ) % n
Devin Lim142b5342017-07-20 15:22:39 -0700162 main.kill = [ main.Cluster.runningNodes[ main.nodeIndex ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hallb5488012017-06-21 14:08:36 -0700163
164 # TODO: Be able to configure bringing up old node vs. a new/fresh installation
165 main.step( "Stopping " + str( len( main.kill ) ) + " ONOS nodes" )
166 killResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700167 for ctrl in main.kill:
Jon Hallb5488012017-06-21 14:08:36 -0700168 killResults = killResults and\
Devin Lim142b5342017-07-20 15:22:39 -0700169 main.ONOSbench.onosStop( ctrl.ipAddress )
170 ctrl.active = False
Jon Hallb5488012017-06-21 14:08:36 -0700171 utilities.assert_equals( expect=main.TRUE, actual=killResults,
172 onpass="ONOS nodes stopped successfully",
173 onfail="ONOS nodes NOT successfully stopped" )
174
175 main.step( "Checking ONOS nodes" )
176 nodeResults = utilities.retry( main.HA.nodesCheck,
177 False,
Devin Lim142b5342017-07-20 15:22:39 -0700178 args=[ main.Cluster.active() ],
Jon Hallb5488012017-06-21 14:08:36 -0700179 sleep=15,
180 attempts=5 )
181
182 utilities.assert_equals( expect=True, actual=nodeResults,
183 onpass="Nodes check successful",
184 onfail="Nodes check NOT successful" )
185
186 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700187 for ctrl in main.Cluster.active():
Jon Hallb5488012017-06-21 14:08:36 -0700188 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700189 ctrl.name,
190 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Hallb5488012017-06-21 14:08:36 -0700191 main.log.error( "Failed to start ONOS, stopping test" )
192 main.cleanup()
193 main.exit()
194
195 main.killCount += 1
196
197 def CASE62( self, main ):
198 """
199 The bring up stopped nodes
200 """
Devin Lim58046fa2017-07-05 16:55:00 -0700201 main.HA.bringUpStoppedNode( main )
Jon Hallb5488012017-06-21 14:08:36 -0700202
203 def CASE7( self, main ):
204 """
205 Check state after ONOS failure
206 """
Jon Hallb5488012017-06-21 14:08:36 -0700207 try:
208 main.kill
209 except AttributeError:
210 main.kill = []
211
Devin Lim142b5342017-07-20 15:22:39 -0700212 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hallb5488012017-06-21 14:08:36 -0700213
Jon Hallb5488012017-06-21 14:08:36 -0700214 main.step( "Leadership Election is still functional" )
215 # Test of LeadershipElection
216 leaderList = []
217
218 restarted = []
Devin Lim142b5342017-07-20 15:22:39 -0700219 for ctrl in main.kill:
220 restarted.append( ctrl.ipAddress )
Jon Hallb5488012017-06-21 14:08:36 -0700221 leaderResult = main.TRUE
222
Devin Lim142b5342017-07-20 15:22:39 -0700223 for ctrl in main.Cluster.active():
224 leaderN = ctrl.CLI.electionTestLeader()
Jon Hallb5488012017-06-21 14:08:36 -0700225 leaderList.append( leaderN )
226 if leaderN == main.FALSE:
227 # error in response
228 main.log.error( "Something is wrong with " +
229 "electionTestLeader function, check the" +
230 " error logs" )
231 leaderResult = main.FALSE
232 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700233 main.log.error( ctrl.name +
Jon Hallb5488012017-06-21 14:08:36 -0700234 " shows no leader for the election-app was" +
235 " elected after the old one died" )
236 leaderResult = main.FALSE
237 elif leaderN in restarted:
Devin Lim142b5342017-07-20 15:22:39 -0700238 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hallb5488012017-06-21 14:08:36 -0700239 " as leader for the election-app, but it " +
240 "was restarted" )
241 leaderResult = main.FALSE
242 if len( set( leaderList ) ) != 1:
243 leaderResult = main.FALSE
244 main.log.error(
245 "Inconsistent view of leader for the election test app" )
246 # TODO: print the list
247 utilities.assert_equals(
248 expect=main.TRUE,
249 actual=leaderResult,
250 onpass="Leadership election passed",
251 onfail="Something went wrong with Leadership election" )
252
253 def CASE8( self, main ):
254 """
255 Compare topo
256 """
Devin Lim58046fa2017-07-05 16:55:00 -0700257 main.HA.compareTopo( main )
Jon Hallb5488012017-06-21 14:08:36 -0700258
Jon Hallb5488012017-06-21 14:08:36 -0700259
Jon Hallb5488012017-06-21 14:08:36 -0700260
261 def CASE9( self, main ):
262 """
263 Link s3-s28 down
264 """
Devin Lim58046fa2017-07-05 16:55:00 -0700265 main.HA.linkDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700266
267 def CASE10( self, main ):
268 """
269 Link s3-s28 up
270 """
Devin Lim58046fa2017-07-05 16:55:00 -0700271 main.HA.linkUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700272
273 def CASE11( self, main ):
274 """
275 Switch Down
276 """
277 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700278 main.HA.switchDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700279
280 def CASE12( self, main ):
281 """
282 Switch Up
283 """
284 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700285 main.HA.switchUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700286
287 def CASE13( self, main ):
288 """
289 Clean up
290 """
Devin Lim58046fa2017-07-05 16:55:00 -0700291 main.HAlabels.append( "Restart" )
292 main.HAdata.append( str( main.restartTime ) )
293 main.HA.cleanUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700294
295 def CASE14( self, main ):
296 """
297 start election app on all onos nodes
298 """
Devin Lim58046fa2017-07-05 16:55:00 -0700299 main.HA.startElectionApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700300
301 def CASE15( self, main ):
302 """
303 Check that Leadership Election is still functional
304 15.1 Run election on each node
305 15.2 Check that each node has the same leaders and candidates
306 15.3 Find current leader and withdraw
307 15.4 Check that a new node was elected leader
308 15.5 Check that that new leader was the candidate of old leader
309 15.6 Run for election on old leader
310 15.7 Check that oldLeader is a candidate, and leader if only 1 node
311 15.8 Make sure that the old leader was added to the candidate list
312
313 old and new variable prefixes refer to data from before vs after
314 withdrawl and later before withdrawl vs after re-election
315 """
Devin Lim58046fa2017-07-05 16:55:00 -0700316 main.HA.isElectionFunctional( main )
Jon Hallb5488012017-06-21 14:08:36 -0700317
318 def CASE16( self, main ):
319 """
320 Install Distributed Primitives app
321 """
Devin Lim58046fa2017-07-05 16:55:00 -0700322 main.HA.installDistributedPrimitiveApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700323
324 def CASE17( self, main ):
325 """
326 Check for basic functionality with distributed primitives
327 """
Devin Lim58046fa2017-07-05 16:55:00 -0700328 main.HA.checkDistPrimitivesFunc( main )