blob: 537ab1b67d1ae0ea48e4fd2ead7f2e6001ab644f [file] [log] [blame]
Jon Hallb5488012017-06-21 14:08:36 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2017 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hallb5488012017-06-21 14:08:36 -070022Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
34CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
46class HAcontinuousStopNodes:
47
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
69 import imp
70 import pexpect
71 import time
72 import json
73 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
74 "initialization" )
Jon Hallb5488012017-06-21 14:08:36 -070075 # set global variables
Jon Hallb5488012017-06-21 14:08:36 -070076 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070077 main.HAlabels = []
78 main.HAdata = []
79 try:
80 from tests.dependencies.ONOSSetup import ONOSSetup
81 main.testSetUp = ONOSSetup()
82 except ImportError:
83 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070084 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070085 main.testSetUp.envSetupDescription()
Jon Hallb5488012017-06-21 14:08:36 -070086 try:
87 from tests.HA.dependencies.HA import HA
88 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070089 # load some variables from the params file
90 cellName = main.params[ 'ENV' ][ 'cellName' ]
91 main.apps = main.params[ 'ENV' ][ 'appString' ]
92 main.numCtrls = int( main.params[ 'num_controllers' ] )
Devin Lim142b5342017-07-20 15:22:39 -070093 stepResult = main.testSetUp.envSetup()
Jon Hallb5488012017-06-21 14:08:36 -070094 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070095 main.testSetUp.envSetupException( e )
96 main.testSetUp.evnSetupConclusion( stepResult )
97 main.HA.generateGraph( "HAcontinuousStopNodes" )
Jon Hallb5488012017-06-21 14:08:36 -070098
Devin Lim142b5342017-07-20 15:22:39 -070099 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
100 extraApply=[ main.HA.startingMininet,
101 main.HA.customizeOnosGenPartitions ],
102 extraClean=main.HA.cleanUpGenPartition )
Devin Lim58046fa2017-07-05 16:55:00 -0700103 main.HA.initialSetUp()
Jon Hallb5488012017-06-21 14:08:36 -0700104
Jon Hallb5488012017-06-21 14:08:36 -0700105 def CASE2( self, main ):
106 """
107 Assign devices to controllers
108 """
Devin Lim58046fa2017-07-05 16:55:00 -0700109 main.HA.assignDevices( main )
Jon Hallb5488012017-06-21 14:08:36 -0700110
111 def CASE21( self, main ):
112 """
113 Assign mastership to controllers
114 """
Devin Lim58046fa2017-07-05 16:55:00 -0700115 main.HA.assignMastership( main )
Jon Hallb5488012017-06-21 14:08:36 -0700116
117 def CASE3( self, main ):
118 """
119 Assign intents
120 """
Devin Lim58046fa2017-07-05 16:55:00 -0700121 main.HA.assignIntents( main )
Jon Hallb5488012017-06-21 14:08:36 -0700122
123 def CASE4( self, main ):
124 """
125 Ping across added host intents
126 """
Jon Hallca319892017-06-15 15:25:22 -0700127 main.HA.pingAcrossHostIntent( main )
Jon Hallb5488012017-06-21 14:08:36 -0700128
129 def CASE5( self, main ):
130 """
131 Reading state of ONOS
132 """
Devin Lim58046fa2017-07-05 16:55:00 -0700133 main.HA.readingState( main )
Jon Hallb5488012017-06-21 14:08:36 -0700134
135 def CASE61( self, main ):
136 """
137 The Failure case.
138 """
Jon Hallb5488012017-06-21 14:08:36 -0700139 assert main, "main not defined"
140 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb5488012017-06-21 14:08:36 -0700141 try:
142 assert main.nodeIndex is not None, "main.nodeIndex not defined"
143 assert main.killCount is not None, "main.killCount not defined"
144 except AttributeError as e:
145 main.log.warn( "Node to kill not selected, defaulting to node 1" )
146 main.nodeIndex = 0
147 main.killCount = 1
148
149 main.case( "Stopping ONOS nodes - iteration " + str( main.killCount ) )
150
151 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700152 for ctrl in main.Cluster.runningNodes:
153 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
154 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hallb5488012017-06-21 14:08:36 -0700155
156 # NOTE: For now only kill one node. If we move to killing more, we need to
157 # make sure we don't lose any partitions
Devin Lim142b5342017-07-20 15:22:39 -0700158 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hallb5488012017-06-21 14:08:36 -0700159 main.nodeIndex = ( main.nodeIndex + 1 ) % n
Devin Lim142b5342017-07-20 15:22:39 -0700160 main.kill = [ main.Cluster.runningNodes[ main.nodeIndex ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hallb5488012017-06-21 14:08:36 -0700161
162 # TODO: Be able to configure bringing up old node vs. a new/fresh installation
163 main.step( "Stopping " + str( len( main.kill ) ) + " ONOS nodes" )
164 killResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700165 for ctrl in main.kill:
Jon Hallb5488012017-06-21 14:08:36 -0700166 killResults = killResults and\
Devin Lim142b5342017-07-20 15:22:39 -0700167 main.ONOSbench.onosStop( ctrl.ipAddress )
168 ctrl.active = False
Jon Hallb5488012017-06-21 14:08:36 -0700169 utilities.assert_equals( expect=main.TRUE, actual=killResults,
170 onpass="ONOS nodes stopped successfully",
171 onfail="ONOS nodes NOT successfully stopped" )
172
173 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800174 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Hallb5488012017-06-21 14:08:36 -0700175 False,
Jon Hallb5488012017-06-21 14:08:36 -0700176 sleep=15,
177 attempts=5 )
178
179 utilities.assert_equals( expect=True, actual=nodeResults,
180 onpass="Nodes check successful",
181 onfail="Nodes check NOT successful" )
182
183 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700184 for ctrl in main.Cluster.active():
Jon Hallb5488012017-06-21 14:08:36 -0700185 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700186 ctrl.name,
187 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Hallb5488012017-06-21 14:08:36 -0700188 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700189 main.cleanAndExit()
Jon Hallb5488012017-06-21 14:08:36 -0700190
191 main.killCount += 1
192
193 def CASE62( self, main ):
194 """
195 The bring up stopped nodes
196 """
Devin Lim58046fa2017-07-05 16:55:00 -0700197 main.HA.bringUpStoppedNode( main )
Jon Hallb5488012017-06-21 14:08:36 -0700198
199 def CASE7( self, main ):
200 """
201 Check state after ONOS failure
202 """
Jon Hallb5488012017-06-21 14:08:36 -0700203 try:
204 main.kill
205 except AttributeError:
206 main.kill = []
207
Devin Lim142b5342017-07-20 15:22:39 -0700208 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hallb5488012017-06-21 14:08:36 -0700209
Jon Hallb5488012017-06-21 14:08:36 -0700210 main.step( "Leadership Election is still functional" )
211 # Test of LeadershipElection
212 leaderList = []
213
214 restarted = []
Devin Lim142b5342017-07-20 15:22:39 -0700215 for ctrl in main.kill:
216 restarted.append( ctrl.ipAddress )
Jon Hallb5488012017-06-21 14:08:36 -0700217 leaderResult = main.TRUE
218
Devin Lim142b5342017-07-20 15:22:39 -0700219 for ctrl in main.Cluster.active():
220 leaderN = ctrl.CLI.electionTestLeader()
Jon Hallb5488012017-06-21 14:08:36 -0700221 leaderList.append( leaderN )
222 if leaderN == main.FALSE:
223 # error in response
224 main.log.error( "Something is wrong with " +
225 "electionTestLeader function, check the" +
226 " error logs" )
227 leaderResult = main.FALSE
228 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700229 main.log.error( ctrl.name +
Jon Hallb5488012017-06-21 14:08:36 -0700230 " shows no leader for the election-app was" +
231 " elected after the old one died" )
232 leaderResult = main.FALSE
233 elif leaderN in restarted:
Devin Lim142b5342017-07-20 15:22:39 -0700234 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hallb5488012017-06-21 14:08:36 -0700235 " as leader for the election-app, but it " +
236 "was restarted" )
237 leaderResult = main.FALSE
238 if len( set( leaderList ) ) != 1:
239 leaderResult = main.FALSE
240 main.log.error(
241 "Inconsistent view of leader for the election test app" )
242 # TODO: print the list
243 utilities.assert_equals(
244 expect=main.TRUE,
245 actual=leaderResult,
246 onpass="Leadership election passed",
247 onfail="Something went wrong with Leadership election" )
248
249 def CASE8( self, main ):
250 """
251 Compare topo
252 """
Devin Lim58046fa2017-07-05 16:55:00 -0700253 main.HA.compareTopo( main )
Jon Hallb5488012017-06-21 14:08:36 -0700254
Jon Hallb5488012017-06-21 14:08:36 -0700255 def CASE9( self, main ):
256 """
257 Link s3-s28 down
258 """
Devin Lim58046fa2017-07-05 16:55:00 -0700259 main.HA.linkDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700260
261 def CASE10( self, main ):
262 """
263 Link s3-s28 up
264 """
Devin Lim58046fa2017-07-05 16:55:00 -0700265 main.HA.linkUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700266
267 def CASE11( self, main ):
268 """
269 Switch Down
270 """
271 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700272 main.HA.switchDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700273
274 def CASE12( self, main ):
275 """
276 Switch Up
277 """
278 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700279 main.HA.switchUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700280
281 def CASE13( self, main ):
282 """
283 Clean up
284 """
Devin Lim58046fa2017-07-05 16:55:00 -0700285 main.HAlabels.append( "Restart" )
286 main.HAdata.append( str( main.restartTime ) )
287 main.HA.cleanUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700288
289 def CASE14( self, main ):
290 """
291 start election app on all onos nodes
292 """
Devin Lim58046fa2017-07-05 16:55:00 -0700293 main.HA.startElectionApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700294
295 def CASE15( self, main ):
296 """
297 Check that Leadership Election is still functional
298 15.1 Run election on each node
299 15.2 Check that each node has the same leaders and candidates
300 15.3 Find current leader and withdraw
301 15.4 Check that a new node was elected leader
302 15.5 Check that that new leader was the candidate of old leader
303 15.6 Run for election on old leader
304 15.7 Check that oldLeader is a candidate, and leader if only 1 node
305 15.8 Make sure that the old leader was added to the candidate list
306
307 old and new variable prefixes refer to data from before vs after
308 withdrawl and later before withdrawl vs after re-election
309 """
Devin Lim58046fa2017-07-05 16:55:00 -0700310 main.HA.isElectionFunctional( main )
Jon Hallb5488012017-06-21 14:08:36 -0700311
312 def CASE16( self, main ):
313 """
314 Install Distributed Primitives app
315 """
Devin Lim58046fa2017-07-05 16:55:00 -0700316 main.HA.installDistributedPrimitiveApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700317
318 def CASE17( self, main ):
319 """
320 Check for basic functionality with distributed primitives
321 """
Devin Lim58046fa2017-07-05 16:55:00 -0700322 main.HA.checkDistPrimitivesFunc( main )