blob: f04e8781075ecee6c3a59e86a083916284898c4e [file] [log] [blame]
Jon Hall6e709752016-02-01 13:38:46 -08001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2016 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall6e709752016-02-01 13:38:46 -080022Description: This test is to determine if ONOS can handle
23 a full network partion
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
34CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
Jon Hall6e709752016-02-01 13:38:46 -080046class HAfullNetPartition:
47
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hall6e709752016-02-01 13:38:46 -080069 main.log.info( "ONOS HA test: Partition ONOS nodes into two sub-clusters - " +
70 "initialization" )
Jon Hall6e709752016-02-01 13:38:46 -080071 # set global variables
Jon Halla440e872016-03-31 15:15:50 -070072 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070073 main.HAlabels = []
74 main.HAdata = []
75 try:
76 from tests.dependencies.ONOSSetup import ONOSSetup
77 main.testSetUp = ONOSSetup()
78 except ImportError:
79 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070080 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070081 main.testSetUp.envSetupDescription()
Jon Hall6e709752016-02-01 13:38:46 -080082 try:
Jon Hall53c5e662016-04-13 16:06:56 -070083 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070084 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070085 # load some variables from the params file
86 cellName = main.params[ 'ENV' ][ 'cellName' ]
87 main.apps = main.params[ 'ENV' ][ 'appString' ]
Jon Hallab611372018-02-21 15:26:05 -080088 stepResult = main.testSetUp.envSetup( includeCaseDesc=False )
Jon Hall6e709752016-02-01 13:38:46 -080089 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070090 main.testSetUp.envSetupException( e )
91 main.testSetUp.evnSetupConclusion( stepResult )
Jon Hall6e709752016-02-01 13:38:46 -080092
Jon Hallab611372018-02-21 15:26:05 -080093 applyFuncs = [ main.HA.customizeOnosGenPartitions ]
94 applyArgs = [ None ]
95 try:
96 if main.params[ 'topology' ][ 'topoFile' ]:
97 main.log.info( 'Skipping start of Mininet in this case, make sure you start it elsewhere' )
98 else:
99 applyFuncs.append( main.HA.startingMininet )
100 applyArgs.append( None )
101 except (KeyError, IndexError):
102 applyFuncs.append( main.HA.startingMininet )
103 applyArgs.append( None )
104
You Wanga0f6ff62018-01-11 15:46:30 -0800105 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName, removeLog=True,
Jon Hallab611372018-02-21 15:26:05 -0800106 extraApply=applyFuncs,
107 applyArgs=applyArgs,
108 extraClean=main.HA.cleanUpGenPartition,
109 includeCaseDesc=False )
Devin Lim58046fa2017-07-05 16:55:00 -0700110 main.HA.initialSetUp()
Jon Hall6e709752016-02-01 13:38:46 -0800111
Jon Hallab611372018-02-21 15:26:05 -0800112 main.step( 'Set logging levels' )
113 logging = True
114 try:
115 logs = main.params.get( 'ONOS_Logging', False )
116 if logs:
117 for namespace, level in logs.items():
118 for ctrl in main.Cluster.active():
119 ctrl.CLI.logSet( level, namespace )
120 except AttributeError:
121 logging = False
122 utilities.assert_equals( expect=True, actual=logging,
123 onpass="Set log levels",
124 onfail="Failed to set log levels" )
125
Jon Hall6e709752016-02-01 13:38:46 -0800126 def CASE2( self, main ):
127 """
128 Assign devices to controllers
129 """
Devin Lim58046fa2017-07-05 16:55:00 -0700130 main.HA.assignDevices( main )
Jon Hall6e709752016-02-01 13:38:46 -0800131
Jon Hallab611372018-02-21 15:26:05 -0800132 def CASE102( self, main ):
133 """
134 Set up Spine-Leaf fabric topology in Mininet
135 """
136 main.HA.startTopology( main )
137
Jon Hall6e709752016-02-01 13:38:46 -0800138 def CASE21( self, main ):
139 """
140 Assign mastership to controllers
141 """
Devin Lim58046fa2017-07-05 16:55:00 -0700142 main.HA.assignMastership( main )
Jon Hall6e709752016-02-01 13:38:46 -0800143
144 def CASE3( self, main ):
145 """
146 Assign intents
147 """
Devin Lim58046fa2017-07-05 16:55:00 -0700148 main.HA.assignIntents( main )
Jon Hall6e709752016-02-01 13:38:46 -0800149
150 def CASE4( self, main ):
151 """
152 Ping across added host intents
153 """
Jon Hallca319892017-06-15 15:25:22 -0700154 main.HA.pingAcrossHostIntent( main )
Jon Hall6e709752016-02-01 13:38:46 -0800155
Jon Hallab611372018-02-21 15:26:05 -0800156 def CASE104( self, main ):
157 """
158 Ping Hosts
159 """
160 main.case( "Check connectivity" )
161 main.step( "Ping between all hosts" )
162 pingResult = main.Mininet1.pingall()
163 utilities.assert_equals( expect=main.TRUE, actual=pingResult,
164 onpass="All Pings Passed",
165 onfail="Failed to ping between all hosts" )
166
Jon Hall6e709752016-02-01 13:38:46 -0800167 def CASE5( self, main ):
168 """
169 Reading state of ONOS
170 """
Devin Lim58046fa2017-07-05 16:55:00 -0700171 main.HA.readingState( main )
Jon Hall6e709752016-02-01 13:38:46 -0800172
173 def CASE61( self, main ):
174 """
175 The Failure case.
176 """
Jon Halla478b852017-12-04 15:00:15 -0800177 import pexpect
178 import time
Jon Hall6e709752016-02-01 13:38:46 -0800179 assert main, "main not defined"
180 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800181 main.case( "Partition ONOS nodes into two distinct partitions" )
182
183 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700184 for ctrl in main.Cluster.runningNodes:
185 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
186 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hall6e709752016-02-01 13:38:46 -0800187
Devin Lim142b5342017-07-20 15:22:39 -0700188 main.log.debug( main.Cluster.next().CLI.roles( jsonFormat=False ) )
Jon Halld2871c22016-07-26 11:01:14 -0700189
Devin Lim142b5342017-07-20 15:22:39 -0700190 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall6e709752016-02-01 13:38:46 -0800191 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
192 main.partition = [ 0 ] # ONOS node to partition, listed by index in main.nodes
193 if n > 3:
194 main.partition.append( p - 1 )
195 # NOTE: This only works for cluster sizes of 3,5, or 7.
196
197 main.step( "Partitioning ONOS nodes" )
198 nodeList = [ str( i + 1 ) for i in main.partition ]
199 main.log.info( "Nodes to be partitioned: " + str( nodeList ) )
200 partitionResults = main.TRUE
201 for i in range( 0, n ):
Devin Lim142b5342017-07-20 15:22:39 -0700202 iCtrl = main.Cluster.runningNodes[ i ]
203 this = iCtrl.Bench.sshToNode( iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800204 if i not in main.partition:
205 for j in main.partition:
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700206 foe = main.Cluster.runningNodes[ j ]
Jon Halla478b852017-12-04 15:00:15 -0800207 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress,
208 foe.ipAddress ) )
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700209 # CMD HERE
Devin Lim142b5342017-07-20 15:22:39 -0700210 try:
Jon Halla478b852017-12-04 15:00:15 -0800211 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT",
212 iCtrl.ipAddress,
213 foe.ipAddress )
Devin Lim142b5342017-07-20 15:22:39 -0700214 this.sendline( cmdStr )
215 this.expect( "\$" )
216 main.log.debug( this.before )
217 except pexpect.EOF:
Jon Halla478b852017-12-04 15:00:15 -0800218 main.log.error( iCtrl.name + ": EOF exception found" )
219 main.log.error( iCtrl.name + ": " + this.before )
Devin Lim44075962017-08-11 10:56:37 -0700220 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700221 except Exception:
Jon Halla478b852017-12-04 15:00:15 -0800222 main.log.exception( iCtrl.name + ": Uncaught exception!" )
Devin Lim44075962017-08-11 10:56:37 -0700223 main.cleanAndExit()
Jon Hall6e709752016-02-01 13:38:46 -0800224 else:
225 for j in range( 0, n ):
226 if j not in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700227 foe = main.Cluster.runningNodes[ j ]
Jon Halla478b852017-12-04 15:00:15 -0800228 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress,
229 foe.ipAddress ) )
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700230 # CMD HERE
Jon Halla478b852017-12-04 15:00:15 -0800231 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT",
232 iCtrl.ipAddress,
233 foe.ipAddress )
Devin Lim142b5342017-07-20 15:22:39 -0700234 try:
235 this.sendline( cmdStr )
236 this.expect( "\$" )
237 main.log.debug( this.before )
238 except pexpect.EOF:
Jon Halla478b852017-12-04 15:00:15 -0800239 main.log.error( iCtrl.name + ": EOF exception found" )
240 main.log.error( iCtrl.name + ": " + this.before )
Devin Lim44075962017-08-11 10:56:37 -0700241 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700242 except Exception:
Jon Halla478b852017-12-04 15:00:15 -0800243 main.log.exception( iCtrl.name + ": Uncaught exception!" )
Devin Lim44075962017-08-11 10:56:37 -0700244 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700245 main.Cluster.runningNodes[ i ].active = False
246 iCtrl.Bench.exitFromSsh( this, iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800247 # NOTE: When dynamic clustering is finished, we need to start checking
248 # main.partion nodes still work when partitioned
249 utilities.assert_equals( expect=main.TRUE, actual=partitionResults,
250 onpass="Firewall rules set successfully",
251 onfail="Error setting firewall rules" )
252
Jon Hall6509dbf2016-06-21 17:01:17 -0700253 main.step( "Sleeping 60 seconds" )
Jon Hall6e709752016-02-01 13:38:46 -0800254 time.sleep( 60 )
255
256 def CASE62( self, main ):
257 """
258 Healing Partition
259 """
Jon Hall6e709752016-02-01 13:38:46 -0800260 assert main, "main not defined"
261 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800262 assert main.partition, "main.partition not defined"
263 main.case( "Healing Partition" )
264
265 main.step( "Deleteing firewall rules" )
266 healResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700267 for ctrl in main.Cluster.runningNodes:
Jon Hall6e709752016-02-01 13:38:46 -0800268 cmdStr = "sudo iptables -F"
Devin Lim142b5342017-07-20 15:22:39 -0700269 handle = ctrl.Bench.sshToNode( ctrl.ipAddress )
270 handle.sendline( cmdStr )
271 handle.expect( "\$" )
272 main.log.debug( handle.before )
273 ctrl.Bench.exitFromSsh( handle, ctrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800274 utilities.assert_equals( expect=main.TRUE, actual=healResults,
275 onpass="Firewall rules removed",
276 onfail="Error removing firewall rules" )
277
278 for node in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700279 main.Cluster.runningNodes[ node ].active = True
280
Jon Halld2871c22016-07-26 11:01:14 -0700281 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800282 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Halld2871c22016-07-26 11:01:14 -0700283 False,
Jon Halld2871c22016-07-26 11:01:14 -0700284 sleep=15,
Jon Halla478b852017-12-04 15:00:15 -0800285 attempts=50 )
Jon Halld2871c22016-07-26 11:01:14 -0700286
287 utilities.assert_equals( expect=True, actual=nodeResults,
288 onpass="Nodes check successful",
289 onfail="Nodes check NOT successful" )
290
291 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700292 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700293 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700294 ctrl.name,
295 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700296 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700297 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700298
Jon Hall6e709752016-02-01 13:38:46 -0800299 def CASE7( self, main ):
300 """
301 Check state after ONOS failure
302 """
Devin Lim142b5342017-07-20 15:22:39 -0700303 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall6e709752016-02-01 13:38:46 -0800304
Jon Hall6e709752016-02-01 13:38:46 -0800305 main.step( "Leadership Election is still functional" )
306 # Test of LeadershipElection
307 leaderList = []
308
309 partitioned = []
310 for i in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700311 partitioned.append( main.Cluster.runningNodes[ i ].ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800312 leaderResult = main.TRUE
313
Devin Lim142b5342017-07-20 15:22:39 -0700314 for ctrl in main.Cluster.active():
315 leaderN = ctrl.CLI.electionTestLeader()
Jon Hall6e709752016-02-01 13:38:46 -0800316 leaderList.append( leaderN )
317 if leaderN == main.FALSE:
318 # error in response
319 main.log.error( "Something is wrong with " +
320 "electionTestLeader function, check the" +
321 " error logs" )
322 leaderResult = main.FALSE
323 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700324 main.log.error( ctrl.name +
Jon Hall6e709752016-02-01 13:38:46 -0800325 " shows no leader for the election-app was" +
326 " elected after the old one died" )
327 leaderResult = main.FALSE
328 elif leaderN in partitioned:
Devin Lim142b5342017-07-20 15:22:39 -0700329 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall6e709752016-02-01 13:38:46 -0800330 " as leader for the election-app, but it " +
331 "was partitioned" )
332 leaderResult = main.FALSE
333 if len( set( leaderList ) ) != 1:
334 leaderResult = main.FALSE
335 main.log.error(
336 "Inconsistent view of leader for the election test app" )
Jon Hallab611372018-02-21 15:26:05 -0800337 main.log.debug( leaderList )
Jon Hall6e709752016-02-01 13:38:46 -0800338 utilities.assert_equals(
339 expect=main.TRUE,
340 actual=leaderResult,
341 onpass="Leadership election passed",
342 onfail="Something went wrong with Leadership election" )
343
344 def CASE8( self, main ):
345 """
346 Compare topo
347 """
Devin Lim58046fa2017-07-05 16:55:00 -0700348 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700349
Jon Hall6e709752016-02-01 13:38:46 -0800350 def CASE9( self, main ):
351 """
Jon Hallab611372018-02-21 15:26:05 -0800352 Link down
Jon Hall6e709752016-02-01 13:38:46 -0800353 """
Jon Hallab611372018-02-21 15:26:05 -0800354 src = main.params['kill']['linkSrc']
355 dst = main.params['kill']['linkDst']
356 main.HA.linkDown( main, src, dst )
Jon Hall6e709752016-02-01 13:38:46 -0800357
358 def CASE10( self, main ):
359 """
Jon Hallab611372018-02-21 15:26:05 -0800360 Link up
Jon Hall6e709752016-02-01 13:38:46 -0800361 """
Jon Hallab611372018-02-21 15:26:05 -0800362 src = main.params['kill']['linkSrc']
363 dst = main.params['kill']['linkDst']
364 main.HA.linkUp( main, src, dst )
Jon Hall6e709752016-02-01 13:38:46 -0800365
366 def CASE11( self, main ):
367 """
368 Switch Down
369 """
370 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700371 main.HA.switchDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800372
373 def CASE12( self, main ):
374 """
375 Switch Up
376 """
377 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700378 main.HA.switchUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800379
380 def CASE13( self, main ):
381 """
382 Clean up
383 """
Devin Lim58046fa2017-07-05 16:55:00 -0700384 main.HA.cleanUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800385
386 def CASE14( self, main ):
387 """
Jon Hallab611372018-02-21 15:26:05 -0800388 Start election app on all onos nodes
Jon Hall6e709752016-02-01 13:38:46 -0800389 """
Devin Lim58046fa2017-07-05 16:55:00 -0700390 main.HA.startElectionApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800391
392 def CASE15( self, main ):
393 """
394 Check that Leadership Election is still functional
395 15.1 Run election on each node
396 15.2 Check that each node has the same leaders and candidates
397 15.3 Find current leader and withdraw
398 15.4 Check that a new node was elected leader
399 15.5 Check that that new leader was the candidate of old leader
400 15.6 Run for election on old leader
401 15.7 Check that oldLeader is a candidate, and leader if only 1 node
402 15.8 Make sure that the old leader was added to the candidate list
403
404 old and new variable prefixes refer to data from before vs after
405 withdrawl and later before withdrawl vs after re-election
406 """
Devin Lim58046fa2017-07-05 16:55:00 -0700407 main.HA.isElectionFunctional( main )
Jon Hall6e709752016-02-01 13:38:46 -0800408
409 def CASE16( self, main ):
410 """
411 Install Distributed Primitives app
412 """
Devin Lim58046fa2017-07-05 16:55:00 -0700413 main.HA.installDistributedPrimitiveApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800414
415 def CASE17( self, main ):
416 """
417 Check for basic functionality with distributed primitives
418 """
Devin Lim58046fa2017-07-05 16:55:00 -0700419 main.HA.checkDistPrimitivesFunc( main )