blob: 560b7857118091cd7494f8fe2283cb29e8f04455 [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2015 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall5cf14d52015-07-16 12:15:19 -070022Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070032CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070034CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070046class HAkillNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070047
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hall6e709752016-02-01 13:38:46 -080069 main.log.info( "ONOS HA test: Restart a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070070 "initialization" )
Jon Halla440e872016-03-31 15:15:50 -070071 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070072 main.HAlabels = []
73 main.HAdata = []
74 try:
75 from tests.dependencies.ONOSSetup import ONOSSetup
76 main.testSetUp = ONOSSetup()
77 except ImportError:
78 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070079 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070080 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070081 try:
Jon Hall53c5e662016-04-13 16:06:56 -070082 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070083 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070084 cellName = main.params[ 'ENV' ][ 'cellName' ]
85 main.apps = main.params[ 'ENV' ][ 'appString' ]
Jon Hallab611372018-02-21 15:26:05 -080086 stepResult = main.testSetUp.envSetup( includeCaseDesc=False )
Jon Halle1a3b752015-07-22 13:02:46 -070087 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070088 main.testSetUp.envSetupException( e )
89 main.testSetUp.evnSetupConclusion( stepResult )
Jon Hall5cf14d52015-07-16 12:15:19 -070090
Jon Hallab611372018-02-21 15:26:05 -080091 applyFuncs = [ main.HA.customizeOnosGenPartitions,
92 main.HA.copyBackupConfig,
93 main.ONOSbench.preventAutoRespawn ]
94 applyArgs = [ None, None, None ]
95 try:
96 if main.params[ 'topology' ][ 'topoFile' ]:
97 main.log.info( 'Skipping start of Mininet in this case, make sure you start it elsewhere' )
98 else:
99 applyFuncs.append( main.HA.startingMininet )
100 applyArgs.append( None )
101 except (KeyError, IndexError):
102 applyFuncs.append( main.HA.startingMininet )
103 applyArgs.append( None )
Jon Hall5cf14d52015-07-16 12:15:19 -0700104
Jon Hall3e6edb32018-08-21 16:20:30 -0700105 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName,
Jon Hallab611372018-02-21 15:26:05 -0800106 extraApply=applyFuncs,
107 applyArgs=applyArgs,
108 extraClean=main.HA.cleanUpGenPartition,
109 includeCaseDesc=False )
Jon Hall4f360bc2017-09-07 10:19:52 -0700110 main.HA.initialSetUp( serviceClean=True )
Jon Hall9d2dcad2016-04-08 10:15:20 -0700111
Jon Hallab611372018-02-21 15:26:05 -0800112 main.step( 'Set logging levels' )
113 logging = True
114 try:
115 logs = main.params.get( 'ONOS_Logging', False )
116 if logs:
117 for namespace, level in logs.items():
118 for ctrl in main.Cluster.active():
119 ctrl.CLI.logSet( level, namespace )
120 except AttributeError:
121 logging = False
122 utilities.assert_equals( expect=True, actual=logging,
123 onpass="Set log levels",
124 onfail="Failed to set log levels" )
125
Jon Hall5cf14d52015-07-16 12:15:19 -0700126 def CASE2( self, main ):
127 """
128 Assign devices to controllers
129 """
Devin Lim58046fa2017-07-05 16:55:00 -0700130 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700131
Jon Hallab611372018-02-21 15:26:05 -0800132 def CASE102( self, main ):
133 """
134 Set up Spine-Leaf fabric topology in Mininet
135 """
136 main.HA.startTopology( main )
137
Jon Hall5cf14d52015-07-16 12:15:19 -0700138 def CASE21( self, main ):
139 """
140 Assign mastership to controllers
141 """
Devin Lim58046fa2017-07-05 16:55:00 -0700142 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700143
144 def CASE3( self, main ):
145 """
146 Assign intents
147 """
Devin Lim58046fa2017-07-05 16:55:00 -0700148 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700149
150 def CASE4( self, main ):
151 """
152 Ping across added host intents
153 """
Jon Hallca319892017-06-15 15:25:22 -0700154 main.HA.pingAcrossHostIntent( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700155
Jon Hallab611372018-02-21 15:26:05 -0800156 def CASE104( self, main ):
157 """
158 Ping Hosts
159 """
160 main.case( "Check connectivity" )
161 main.step( "Ping between all hosts" )
162 pingResult = main.Mininet1.pingall()
163 utilities.assert_equals( expect=main.TRUE, actual=pingResult,
164 onpass="All Pings Passed",
165 onfail="Failed to ping between all hosts" )
166
Jon Hall5cf14d52015-07-16 12:15:19 -0700167 def CASE5( self, main ):
168 """
169 Reading state of ONOS
170 """
Devin Lim58046fa2017-07-05 16:55:00 -0700171 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700172
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700173 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700174 """
175 The Failure case.
176 """
Jon Hall5cf14d52015-07-16 12:15:19 -0700177 assert main, "main not defined"
178 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700179 main.case( "Kill minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700180
181 main.step( "Checking ONOS Logs for errors" )
Jon Hallca319892017-06-15 15:25:22 -0700182 for ctrl in main.Cluster.active():
183 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
184 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
Jon Hall96091e62015-09-21 17:34:17 -0700185
Devin Lim142b5342017-07-20 15:22:39 -0700186 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700187 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
Devin Lim142b5342017-07-20 15:22:39 -0700188 main.kill = [ main.Cluster.runningNodes[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700189 if n > 3:
Devin Lim142b5342017-07-20 15:22:39 -0700190 main.kill.append( main.Cluster.runningNodes[ p - 1 ] )
Jon Hall3b489db2015-10-05 14:38:37 -0700191 # NOTE: This only works for cluster sizes of 3,5, or 7.
192
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700193 # NOTE: This is to fix an issue with wiki formating
Jon Hall4f360bc2017-09-07 10:19:52 -0700194 nodeNames = [ node.name for node in main.kill ]
195 main.step( "Killing nodes: " + str( nodeNames ) )
Jon Hall3b489db2015-10-05 14:38:37 -0700196 killResults = main.TRUE
Jon Hallca319892017-06-15 15:25:22 -0700197 for ctrl in main.kill:
Jon Hall3b489db2015-10-05 14:38:37 -0700198 killResults = killResults and\
Jon Hallca319892017-06-15 15:25:22 -0700199 ctrl.onosKill( ctrl.ipAddress )
200 ctrl.active = False
Jon Hall5cf14d52015-07-16 12:15:19 -0700201 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700202 onpass="ONOS nodes killed successfully",
203 onfail="ONOS nodes NOT successfully killed" )
204
Jon Halld2871c22016-07-26 11:01:14 -0700205 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800206 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Halld2871c22016-07-26 11:01:14 -0700207 False,
Jon Halld2871c22016-07-26 11:01:14 -0700208 sleep=15,
209 attempts=5 )
210
211 utilities.assert_equals( expect=True, actual=nodeResults,
212 onpass="Nodes check successful",
213 onfail="Nodes check NOT successful" )
214
215 if not nodeResults:
Jon Hallca319892017-06-15 15:25:22 -0700216 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700217 main.log.debug( "{} components not ACTIVE: \n{}".format(
Jon Hallca319892017-06-15 15:25:22 -0700218 ctrl.name,
Jon Hall6c9e2da2018-11-06 12:01:23 -0800219 ctrl.CLI.sendline( "onos:scr-list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700220 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700221 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700222
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700223 def CASE62( self, main ):
224 """
225 The bring up stopped nodes
226 """
Jon Hall5d5876e2017-11-30 09:33:16 -0800227 main.HA.bringUpStoppedNodes( main )
Jon Halla440e872016-03-31 15:15:50 -0700228
Jon Hall5cf14d52015-07-16 12:15:19 -0700229 def CASE7( self, main ):
230 """
231 Check state after ONOS failure
232 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700233 try:
234 main.kill
235 except AttributeError:
236 main.kill = []
237
Devin Lim142b5342017-07-20 15:22:39 -0700238 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700239 main.step( "Leadership Election is still functional" )
240 # Test of LeadershipElection
241 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700242
Jon Hall3b489db2015-10-05 14:38:37 -0700243 restarted = []
Jon Hallca319892017-06-15 15:25:22 -0700244 for ctrl in main.kill:
245 restarted.append( ctrl.ipAddress )
Jon Hall5cf14d52015-07-16 12:15:19 -0700246 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700247
Jon Hallca319892017-06-15 15:25:22 -0700248 for ctrl in main.Cluster.active():
249 leaderN = ctrl.electionTestLeader()
Jon Hall5cf14d52015-07-16 12:15:19 -0700250 leaderList.append( leaderN )
251 if leaderN == main.FALSE:
252 # error in response
253 main.log.error( "Something is wrong with " +
254 "electionTestLeader function, check the" +
255 " error logs" )
256 leaderResult = main.FALSE
257 elif leaderN is None:
Jon Hallca319892017-06-15 15:25:22 -0700258 main.log.error( ctrl.name +
Jon Hall5cf14d52015-07-16 12:15:19 -0700259 " shows no leader for the election-app was" +
260 " elected after the old one died" )
261 leaderResult = main.FALSE
262 elif leaderN in restarted:
Jon Hallca319892017-06-15 15:25:22 -0700263 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall5cf14d52015-07-16 12:15:19 -0700264 " as leader for the election-app, but it " +
265 "was restarted" )
266 leaderResult = main.FALSE
267 if len( set( leaderList ) ) != 1:
268 leaderResult = main.FALSE
269 main.log.error(
270 "Inconsistent view of leader for the election test app" )
Jon Hallab611372018-02-21 15:26:05 -0800271 main.log.debug( leaderList )
Jon Hall5cf14d52015-07-16 12:15:19 -0700272 utilities.assert_equals(
273 expect=main.TRUE,
274 actual=leaderResult,
275 onpass="Leadership election passed",
276 onfail="Something went wrong with Leadership election" )
277
278 def CASE8( self, main ):
279 """
280 Compare topo
281 """
Devin Lim58046fa2017-07-05 16:55:00 -0700282 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700283
Jon Hall5cf14d52015-07-16 12:15:19 -0700284 def CASE9( self, main ):
285 """
Jon Hallab611372018-02-21 15:26:05 -0800286 Link down
Jon Hall5cf14d52015-07-16 12:15:19 -0700287 """
Jon Hallab611372018-02-21 15:26:05 -0800288 src = main.params['kill']['linkSrc']
289 dst = main.params['kill']['linkDst']
290 main.HA.linkDown( main, src, dst )
Jon Hall5cf14d52015-07-16 12:15:19 -0700291
292 def CASE10( self, main ):
293 """
Jon Hallab611372018-02-21 15:26:05 -0800294 Link up
Jon Hall5cf14d52015-07-16 12:15:19 -0700295 """
Jon Hallab611372018-02-21 15:26:05 -0800296 src = main.params['kill']['linkSrc']
297 dst = main.params['kill']['linkDst']
298 main.HA.linkUp( main, src, dst )
Jon Hall5cf14d52015-07-16 12:15:19 -0700299
300 def CASE11( self, main ):
301 """
302 Switch Down
303 """
304 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700305 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700306
307 def CASE12( self, main ):
308 """
309 Switch Up
310 """
311 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700312 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700313
314 def CASE13( self, main ):
315 """
316 Clean up
317 """
Devin Lim58046fa2017-07-05 16:55:00 -0700318 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700319
320 def CASE14( self, main ):
321 """
Jon Hallab611372018-02-21 15:26:05 -0800322 Start election app on all onos nodes
Jon Hall5cf14d52015-07-16 12:15:19 -0700323 """
Devin Lim58046fa2017-07-05 16:55:00 -0700324 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700325
326 def CASE15( self, main ):
327 """
328 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700329 15.1 Run election on each node
330 15.2 Check that each node has the same leaders and candidates
331 15.3 Find current leader and withdraw
332 15.4 Check that a new node was elected leader
333 15.5 Check that that new leader was the candidate of old leader
334 15.6 Run for election on old leader
335 15.7 Check that oldLeader is a candidate, and leader if only 1 node
336 15.8 Make sure that the old leader was added to the candidate list
337
338 old and new variable prefixes refer to data from before vs after
339 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700340 """
Devin Lim58046fa2017-07-05 16:55:00 -0700341 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700342
343 def CASE16( self, main ):
344 """
345 Install Distributed Primitives app
346 """
Devin Lim58046fa2017-07-05 16:55:00 -0700347 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700348
349 def CASE17( self, main ):
350 """
351 Check for basic functionality with distributed primitives
352 """
Devin Lim58046fa2017-07-05 16:55:00 -0700353 main.HA.checkDistPrimitivesFunc( main )