blob: 8238b94d3f7713b099b0a0fd86c84c80ce1a59f6 [file] [log] [blame]
Jon Hallb5488012017-06-21 14:08:36 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2017 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hallb5488012017-06-21 14:08:36 -070022Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
34CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
46class HAcontinuousStopNodes:
47
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
69 import imp
70 import pexpect
71 import time
72 import json
73 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
74 "initialization" )
Jon Hallb5488012017-06-21 14:08:36 -070075 # set global variables
Jon Hallb5488012017-06-21 14:08:36 -070076 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070077 main.HAlabels = []
78 main.HAdata = []
79 try:
80 from tests.dependencies.ONOSSetup import ONOSSetup
81 main.testSetUp = ONOSSetup()
82 except ImportError:
83 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070084 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070085 main.testSetUp.envSetupDescription()
Jon Hallb5488012017-06-21 14:08:36 -070086 try:
87 from tests.HA.dependencies.HA import HA
88 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070089 # load some variables from the params file
90 cellName = main.params[ 'ENV' ][ 'cellName' ]
91 main.apps = main.params[ 'ENV' ][ 'appString' ]
92 main.numCtrls = int( main.params[ 'num_controllers' ] )
Devin Lim142b5342017-07-20 15:22:39 -070093 stepResult = main.testSetUp.envSetup()
Jon Hallb5488012017-06-21 14:08:36 -070094 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070095 main.testSetUp.envSetupException( e )
96 main.testSetUp.evnSetupConclusion( stepResult )
Jon Hallb5488012017-06-21 14:08:36 -070097
Devin Lim142b5342017-07-20 15:22:39 -070098 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
99 extraApply=[ main.HA.startingMininet,
100 main.HA.customizeOnosGenPartitions ],
101 extraClean=main.HA.cleanUpGenPartition )
Devin Lim58046fa2017-07-05 16:55:00 -0700102 main.HA.initialSetUp()
Jon Hallb5488012017-06-21 14:08:36 -0700103
Jon Hallb5488012017-06-21 14:08:36 -0700104 def CASE2( self, main ):
105 """
106 Assign devices to controllers
107 """
Devin Lim58046fa2017-07-05 16:55:00 -0700108 main.HA.assignDevices( main )
Jon Hallb5488012017-06-21 14:08:36 -0700109
110 def CASE21( self, main ):
111 """
112 Assign mastership to controllers
113 """
Devin Lim58046fa2017-07-05 16:55:00 -0700114 main.HA.assignMastership( main )
Jon Hallb5488012017-06-21 14:08:36 -0700115
116 def CASE3( self, main ):
117 """
118 Assign intents
119 """
Devin Lim58046fa2017-07-05 16:55:00 -0700120 main.HA.assignIntents( main )
Jon Hallb5488012017-06-21 14:08:36 -0700121
122 def CASE4( self, main ):
123 """
124 Ping across added host intents
125 """
Jon Hallca319892017-06-15 15:25:22 -0700126 main.HA.pingAcrossHostIntent( main )
Jon Hallb5488012017-06-21 14:08:36 -0700127
128 def CASE5( self, main ):
129 """
130 Reading state of ONOS
131 """
Devin Lim58046fa2017-07-05 16:55:00 -0700132 main.HA.readingState( main )
Jon Hallb5488012017-06-21 14:08:36 -0700133
134 def CASE61( self, main ):
135 """
136 The Failure case.
137 """
Jon Hallb5488012017-06-21 14:08:36 -0700138 assert main, "main not defined"
139 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb5488012017-06-21 14:08:36 -0700140 try:
141 assert main.nodeIndex is not None, "main.nodeIndex not defined"
142 assert main.killCount is not None, "main.killCount not defined"
143 except AttributeError as e:
144 main.log.warn( "Node to kill not selected, defaulting to node 1" )
145 main.nodeIndex = 0
146 main.killCount = 1
147
148 main.case( "Stopping ONOS nodes - iteration " + str( main.killCount ) )
149
150 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700151 for ctrl in main.Cluster.runningNodes:
152 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
153 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hallb5488012017-06-21 14:08:36 -0700154
155 # NOTE: For now only kill one node. If we move to killing more, we need to
156 # make sure we don't lose any partitions
Devin Lim142b5342017-07-20 15:22:39 -0700157 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hallb5488012017-06-21 14:08:36 -0700158 main.nodeIndex = ( main.nodeIndex + 1 ) % n
Devin Lim142b5342017-07-20 15:22:39 -0700159 main.kill = [ main.Cluster.runningNodes[ main.nodeIndex ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hallb5488012017-06-21 14:08:36 -0700160
161 # TODO: Be able to configure bringing up old node vs. a new/fresh installation
162 main.step( "Stopping " + str( len( main.kill ) ) + " ONOS nodes" )
163 killResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700164 for ctrl in main.kill:
Jon Hallb5488012017-06-21 14:08:36 -0700165 killResults = killResults and\
Devin Lim142b5342017-07-20 15:22:39 -0700166 main.ONOSbench.onosStop( ctrl.ipAddress )
167 ctrl.active = False
Jon Hallb5488012017-06-21 14:08:36 -0700168 utilities.assert_equals( expect=main.TRUE, actual=killResults,
169 onpass="ONOS nodes stopped successfully",
170 onfail="ONOS nodes NOT successfully stopped" )
171
172 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800173 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Hallb5488012017-06-21 14:08:36 -0700174 False,
Jon Hallb5488012017-06-21 14:08:36 -0700175 sleep=15,
176 attempts=5 )
177
178 utilities.assert_equals( expect=True, actual=nodeResults,
179 onpass="Nodes check successful",
180 onfail="Nodes check NOT successful" )
181
182 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700183 for ctrl in main.Cluster.active():
Jon Hallb5488012017-06-21 14:08:36 -0700184 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700185 ctrl.name,
186 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Hallb5488012017-06-21 14:08:36 -0700187 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700188 main.cleanAndExit()
Jon Hallb5488012017-06-21 14:08:36 -0700189
190 main.killCount += 1
191
192 def CASE62( self, main ):
193 """
194 The bring up stopped nodes
195 """
Devin Lim58046fa2017-07-05 16:55:00 -0700196 main.HA.bringUpStoppedNode( main )
Jon Hallb5488012017-06-21 14:08:36 -0700197
198 def CASE7( self, main ):
199 """
200 Check state after ONOS failure
201 """
Jon Hallb5488012017-06-21 14:08:36 -0700202 try:
203 main.kill
204 except AttributeError:
205 main.kill = []
206
Devin Lim142b5342017-07-20 15:22:39 -0700207 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hallb5488012017-06-21 14:08:36 -0700208
Jon Hallb5488012017-06-21 14:08:36 -0700209 main.step( "Leadership Election is still functional" )
210 # Test of LeadershipElection
211 leaderList = []
212
213 restarted = []
Devin Lim142b5342017-07-20 15:22:39 -0700214 for ctrl in main.kill:
215 restarted.append( ctrl.ipAddress )
Jon Hallb5488012017-06-21 14:08:36 -0700216 leaderResult = main.TRUE
217
Devin Lim142b5342017-07-20 15:22:39 -0700218 for ctrl in main.Cluster.active():
219 leaderN = ctrl.CLI.electionTestLeader()
Jon Hallb5488012017-06-21 14:08:36 -0700220 leaderList.append( leaderN )
221 if leaderN == main.FALSE:
222 # error in response
223 main.log.error( "Something is wrong with " +
224 "electionTestLeader function, check the" +
225 " error logs" )
226 leaderResult = main.FALSE
227 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700228 main.log.error( ctrl.name +
Jon Hallb5488012017-06-21 14:08:36 -0700229 " shows no leader for the election-app was" +
230 " elected after the old one died" )
231 leaderResult = main.FALSE
232 elif leaderN in restarted:
Devin Lim142b5342017-07-20 15:22:39 -0700233 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hallb5488012017-06-21 14:08:36 -0700234 " as leader for the election-app, but it " +
235 "was restarted" )
236 leaderResult = main.FALSE
237 if len( set( leaderList ) ) != 1:
238 leaderResult = main.FALSE
239 main.log.error(
240 "Inconsistent view of leader for the election test app" )
241 # TODO: print the list
242 utilities.assert_equals(
243 expect=main.TRUE,
244 actual=leaderResult,
245 onpass="Leadership election passed",
246 onfail="Something went wrong with Leadership election" )
247
248 def CASE8( self, main ):
249 """
250 Compare topo
251 """
Devin Lim58046fa2017-07-05 16:55:00 -0700252 main.HA.compareTopo( main )
Jon Hallb5488012017-06-21 14:08:36 -0700253
Jon Hallb5488012017-06-21 14:08:36 -0700254 def CASE9( self, main ):
255 """
256 Link s3-s28 down
257 """
Devin Lim58046fa2017-07-05 16:55:00 -0700258 main.HA.linkDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700259
260 def CASE10( self, main ):
261 """
262 Link s3-s28 up
263 """
Devin Lim58046fa2017-07-05 16:55:00 -0700264 main.HA.linkUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700265
266 def CASE11( self, main ):
267 """
268 Switch Down
269 """
270 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700271 main.HA.switchDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700272
273 def CASE12( self, main ):
274 """
275 Switch Up
276 """
277 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700278 main.HA.switchUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700279
280 def CASE13( self, main ):
281 """
282 Clean up
283 """
Devin Lim58046fa2017-07-05 16:55:00 -0700284 main.HAlabels.append( "Restart" )
285 main.HAdata.append( str( main.restartTime ) )
286 main.HA.cleanUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700287
288 def CASE14( self, main ):
289 """
290 start election app on all onos nodes
291 """
Devin Lim58046fa2017-07-05 16:55:00 -0700292 main.HA.startElectionApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700293
294 def CASE15( self, main ):
295 """
296 Check that Leadership Election is still functional
297 15.1 Run election on each node
298 15.2 Check that each node has the same leaders and candidates
299 15.3 Find current leader and withdraw
300 15.4 Check that a new node was elected leader
301 15.5 Check that that new leader was the candidate of old leader
302 15.6 Run for election on old leader
303 15.7 Check that oldLeader is a candidate, and leader if only 1 node
304 15.8 Make sure that the old leader was added to the candidate list
305
306 old and new variable prefixes refer to data from before vs after
307 withdrawl and later before withdrawl vs after re-election
308 """
Devin Lim58046fa2017-07-05 16:55:00 -0700309 main.HA.isElectionFunctional( main )
Jon Hallb5488012017-06-21 14:08:36 -0700310
311 def CASE16( self, main ):
312 """
313 Install Distributed Primitives app
314 """
Devin Lim58046fa2017-07-05 16:55:00 -0700315 main.HA.installDistributedPrimitiveApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700316
317 def CASE17( self, main ):
318 """
319 Check for basic functionality with distributed primitives
320 """
Devin Lim58046fa2017-07-05 16:55:00 -0700321 main.HA.checkDistPrimitivesFunc( main )