blob: 886c6725d40525c65a80e3e267e655e234f1e010 [file] [log] [blame]
Jon Hall6e709752016-02-01 13:38:46 -08001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2016 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall6e709752016-02-01 13:38:46 -080022Description: This test is to determine if ONOS can handle
23 a full network partion
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
34CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
Jon Hall6e709752016-02-01 13:38:46 -080046class HAfullNetPartition:
47
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hall6e709752016-02-01 13:38:46 -080069 main.log.info( "ONOS HA test: Partition ONOS nodes into two sub-clusters - " +
70 "initialization" )
Jon Hall6e709752016-02-01 13:38:46 -080071 # set global variables
Jon Halla440e872016-03-31 15:15:50 -070072 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070073 main.HAlabels = []
74 main.HAdata = []
75 try:
76 from tests.dependencies.ONOSSetup import ONOSSetup
77 main.testSetUp = ONOSSetup()
78 except ImportError:
79 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070080 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070081 main.testSetUp.envSetupDescription()
Jon Hall6e709752016-02-01 13:38:46 -080082 try:
Jon Hall53c5e662016-04-13 16:06:56 -070083 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070084 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070085 # load some variables from the params file
86 cellName = main.params[ 'ENV' ][ 'cellName' ]
87 main.apps = main.params[ 'ENV' ][ 'appString' ]
Jon Hallab611372018-02-21 15:26:05 -080088 stepResult = main.testSetUp.envSetup( includeCaseDesc=False )
Jon Hall6e709752016-02-01 13:38:46 -080089 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070090 main.testSetUp.envSetupException( e )
91 main.testSetUp.evnSetupConclusion( stepResult )
Jon Hall6e709752016-02-01 13:38:46 -080092
Jon Hallab611372018-02-21 15:26:05 -080093 applyFuncs = [ main.HA.customizeOnosGenPartitions ]
94 applyArgs = [ None ]
95 try:
96 if main.params[ 'topology' ][ 'topoFile' ]:
97 main.log.info( 'Skipping start of Mininet in this case, make sure you start it elsewhere' )
98 else:
99 applyFuncs.append( main.HA.startingMininet )
100 applyArgs.append( None )
101 except (KeyError, IndexError):
102 applyFuncs.append( main.HA.startingMininet )
103 applyArgs.append( None )
104
You Wanga0f6ff62018-01-11 15:46:30 -0800105 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName, removeLog=True,
Jon Hallab611372018-02-21 15:26:05 -0800106 extraApply=applyFuncs,
107 applyArgs=applyArgs,
108 extraClean=main.HA.cleanUpGenPartition,
109 includeCaseDesc=False )
Devin Lim58046fa2017-07-05 16:55:00 -0700110 main.HA.initialSetUp()
Jon Hall6e709752016-02-01 13:38:46 -0800111
Jon Hallab611372018-02-21 15:26:05 -0800112 main.step( 'Set logging levels' )
113 logging = True
114 try:
115 logs = main.params.get( 'ONOS_Logging', False )
116 if logs:
117 for namespace, level in logs.items():
118 for ctrl in main.Cluster.active():
119 ctrl.CLI.logSet( level, namespace )
120 except AttributeError:
121 logging = False
122 utilities.assert_equals( expect=True, actual=logging,
123 onpass="Set log levels",
124 onfail="Failed to set log levels" )
125
Jon Hall6e709752016-02-01 13:38:46 -0800126 def CASE2( self, main ):
127 """
128 Assign devices to controllers
129 """
Devin Lim58046fa2017-07-05 16:55:00 -0700130 main.HA.assignDevices( main )
Jon Hall6e709752016-02-01 13:38:46 -0800131
Jon Hallab611372018-02-21 15:26:05 -0800132 def CASE102( self, main ):
133 """
134 Set up Spine-Leaf fabric topology in Mininet
135 """
136 main.HA.startTopology( main )
137
Jon Hall6e709752016-02-01 13:38:46 -0800138 def CASE21( self, main ):
139 """
140 Assign mastership to controllers
141 """
Devin Lim58046fa2017-07-05 16:55:00 -0700142 main.HA.assignMastership( main )
Jon Hall6e709752016-02-01 13:38:46 -0800143
144 def CASE3( self, main ):
145 """
146 Assign intents
147 """
Devin Lim58046fa2017-07-05 16:55:00 -0700148 main.HA.assignIntents( main )
Jon Hall6e709752016-02-01 13:38:46 -0800149
150 def CASE4( self, main ):
151 """
152 Ping across added host intents
153 """
Jon Hallca319892017-06-15 15:25:22 -0700154 main.HA.pingAcrossHostIntent( main )
Jon Hall6e709752016-02-01 13:38:46 -0800155
Jon Hallab611372018-02-21 15:26:05 -0800156 def CASE104( self, main ):
157 """
158 Ping Hosts
159 """
160 main.case( "Check connectivity" )
161 main.step( "Ping between all hosts" )
162 pingResult = main.Mininet1.pingall()
163 utilities.assert_equals( expect=main.TRUE, actual=pingResult,
164 onpass="All Pings Passed",
165 onfail="Failed to ping between all hosts" )
166
Jon Hall6e709752016-02-01 13:38:46 -0800167 def CASE5( self, main ):
168 """
169 Reading state of ONOS
170 """
Devin Lim58046fa2017-07-05 16:55:00 -0700171 main.HA.readingState( main )
Jon Hall6e709752016-02-01 13:38:46 -0800172
173 def CASE61( self, main ):
174 """
175 The Failure case.
176 """
Jon Halla478b852017-12-04 15:00:15 -0800177 import pexpect
178 import time
Jon Hall6e709752016-02-01 13:38:46 -0800179 assert main, "main not defined"
180 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800181 main.case( "Partition ONOS nodes into two distinct partitions" )
182
183 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700184 for ctrl in main.Cluster.runningNodes:
185 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
186 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hall6e709752016-02-01 13:38:46 -0800187
Devin Lim142b5342017-07-20 15:22:39 -0700188 main.log.debug( main.Cluster.next().CLI.roles( jsonFormat=False ) )
Jon Halld2871c22016-07-26 11:01:14 -0700189
Devin Lim142b5342017-07-20 15:22:39 -0700190 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall6e709752016-02-01 13:38:46 -0800191 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
192 main.partition = [ 0 ] # ONOS node to partition, listed by index in main.nodes
193 if n > 3:
194 main.partition.append( p - 1 )
195 # NOTE: This only works for cluster sizes of 3,5, or 7.
196
197 main.step( "Partitioning ONOS nodes" )
198 nodeList = [ str( i + 1 ) for i in main.partition ]
199 main.log.info( "Nodes to be partitioned: " + str( nodeList ) )
200 partitionResults = main.TRUE
201 for i in range( 0, n ):
Devin Lim142b5342017-07-20 15:22:39 -0700202 iCtrl = main.Cluster.runningNodes[ i ]
203 this = iCtrl.Bench.sshToNode( iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800204 if i not in main.partition:
205 for j in main.partition:
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700206 foe = main.Cluster.runningNodes[ j ]
Jon Halla478b852017-12-04 15:00:15 -0800207 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress,
208 foe.ipAddress ) )
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700209 # CMD HERE
Devin Lim142b5342017-07-20 15:22:39 -0700210 try:
Jon Halla478b852017-12-04 15:00:15 -0800211 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT",
212 iCtrl.ipAddress,
213 foe.ipAddress )
Devin Lim142b5342017-07-20 15:22:39 -0700214 this.sendline( cmdStr )
215 this.expect( "\$" )
216 main.log.debug( this.before )
217 except pexpect.EOF:
Jon Halla478b852017-12-04 15:00:15 -0800218 main.log.error( iCtrl.name + ": EOF exception found" )
219 main.log.error( iCtrl.name + ": " + this.before )
Devin Lim44075962017-08-11 10:56:37 -0700220 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700221 except Exception:
Jon Halla478b852017-12-04 15:00:15 -0800222 main.log.exception( iCtrl.name + ": Uncaught exception!" )
Devin Lim44075962017-08-11 10:56:37 -0700223 main.cleanAndExit()
Jon Hall6e709752016-02-01 13:38:46 -0800224 else:
225 for j in range( 0, n ):
226 if j not in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700227 foe = main.Cluster.runningNodes[ j ]
Jon Halla478b852017-12-04 15:00:15 -0800228 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress,
229 foe.ipAddress ) )
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700230 # CMD HERE
Jon Halla478b852017-12-04 15:00:15 -0800231 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT",
232 iCtrl.ipAddress,
233 foe.ipAddress )
Devin Lim142b5342017-07-20 15:22:39 -0700234 try:
235 this.sendline( cmdStr )
236 this.expect( "\$" )
237 main.log.debug( this.before )
238 except pexpect.EOF:
Jon Halla478b852017-12-04 15:00:15 -0800239 main.log.error( iCtrl.name + ": EOF exception found" )
240 main.log.error( iCtrl.name + ": " + this.before )
Devin Lim44075962017-08-11 10:56:37 -0700241 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700242 except Exception:
Jon Halla478b852017-12-04 15:00:15 -0800243 main.log.exception( iCtrl.name + ": Uncaught exception!" )
Devin Lim44075962017-08-11 10:56:37 -0700244 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700245 main.Cluster.runningNodes[ i ].active = False
246 iCtrl.Bench.exitFromSsh( this, iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800247 # NOTE: When dynamic clustering is finished, we need to start checking
248 # main.partion nodes still work when partitioned
249 utilities.assert_equals( expect=main.TRUE, actual=partitionResults,
250 onpass="Firewall rules set successfully",
251 onfail="Error setting firewall rules" )
Jon Halla1e8e512018-05-11 13:30:57 -0700252 main.Cluster.reset()
Jon Hall6e709752016-02-01 13:38:46 -0800253
Jon Hall6509dbf2016-06-21 17:01:17 -0700254 main.step( "Sleeping 60 seconds" )
Jon Hall6e709752016-02-01 13:38:46 -0800255 time.sleep( 60 )
256
257 def CASE62( self, main ):
258 """
259 Healing Partition
260 """
Jon Hall6e709752016-02-01 13:38:46 -0800261 assert main, "main not defined"
262 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800263 assert main.partition, "main.partition not defined"
264 main.case( "Healing Partition" )
265
266 main.step( "Deleteing firewall rules" )
267 healResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700268 for ctrl in main.Cluster.runningNodes:
Jon Hall6e709752016-02-01 13:38:46 -0800269 cmdStr = "sudo iptables -F"
Devin Lim142b5342017-07-20 15:22:39 -0700270 handle = ctrl.Bench.sshToNode( ctrl.ipAddress )
271 handle.sendline( cmdStr )
272 handle.expect( "\$" )
273 main.log.debug( handle.before )
274 ctrl.Bench.exitFromSsh( handle, ctrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800275 utilities.assert_equals( expect=main.TRUE, actual=healResults,
276 onpass="Firewall rules removed",
277 onfail="Error removing firewall rules" )
278
279 for node in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700280 main.Cluster.runningNodes[ node ].active = True
Jon Halla1e8e512018-05-11 13:30:57 -0700281 main.Cluster.reset()
Devin Lim142b5342017-07-20 15:22:39 -0700282
Jon Halld2871c22016-07-26 11:01:14 -0700283 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800284 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Halld2871c22016-07-26 11:01:14 -0700285 False,
Jon Halld2871c22016-07-26 11:01:14 -0700286 sleep=15,
Jon Halla478b852017-12-04 15:00:15 -0800287 attempts=50 )
Jon Halld2871c22016-07-26 11:01:14 -0700288
289 utilities.assert_equals( expect=True, actual=nodeResults,
290 onpass="Nodes check successful",
291 onfail="Nodes check NOT successful" )
292
293 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700294 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700295 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700296 ctrl.name,
297 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700298 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700299 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700300
Jon Hall6e709752016-02-01 13:38:46 -0800301 def CASE7( self, main ):
302 """
303 Check state after ONOS failure
304 """
Devin Lim142b5342017-07-20 15:22:39 -0700305 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall6e709752016-02-01 13:38:46 -0800306
Jon Hall6e709752016-02-01 13:38:46 -0800307 main.step( "Leadership Election is still functional" )
308 # Test of LeadershipElection
309 leaderList = []
310
311 partitioned = []
312 for i in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700313 partitioned.append( main.Cluster.runningNodes[ i ].ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800314 leaderResult = main.TRUE
315
Devin Lim142b5342017-07-20 15:22:39 -0700316 for ctrl in main.Cluster.active():
317 leaderN = ctrl.CLI.electionTestLeader()
Jon Hall6e709752016-02-01 13:38:46 -0800318 leaderList.append( leaderN )
319 if leaderN == main.FALSE:
320 # error in response
321 main.log.error( "Something is wrong with " +
322 "electionTestLeader function, check the" +
323 " error logs" )
324 leaderResult = main.FALSE
325 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700326 main.log.error( ctrl.name +
Jon Hall6e709752016-02-01 13:38:46 -0800327 " shows no leader for the election-app was" +
328 " elected after the old one died" )
329 leaderResult = main.FALSE
330 elif leaderN in partitioned:
Devin Lim142b5342017-07-20 15:22:39 -0700331 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall6e709752016-02-01 13:38:46 -0800332 " as leader for the election-app, but it " +
333 "was partitioned" )
334 leaderResult = main.FALSE
335 if len( set( leaderList ) ) != 1:
336 leaderResult = main.FALSE
337 main.log.error(
338 "Inconsistent view of leader for the election test app" )
Jon Hallab611372018-02-21 15:26:05 -0800339 main.log.debug( leaderList )
Jon Hall6e709752016-02-01 13:38:46 -0800340 utilities.assert_equals(
341 expect=main.TRUE,
342 actual=leaderResult,
343 onpass="Leadership election passed",
344 onfail="Something went wrong with Leadership election" )
345
346 def CASE8( self, main ):
347 """
348 Compare topo
349 """
Devin Lim58046fa2017-07-05 16:55:00 -0700350 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700351
Jon Hall6e709752016-02-01 13:38:46 -0800352 def CASE9( self, main ):
353 """
Jon Hallab611372018-02-21 15:26:05 -0800354 Link down
Jon Hall6e709752016-02-01 13:38:46 -0800355 """
Jon Hallab611372018-02-21 15:26:05 -0800356 src = main.params['kill']['linkSrc']
357 dst = main.params['kill']['linkDst']
358 main.HA.linkDown( main, src, dst )
Jon Hall6e709752016-02-01 13:38:46 -0800359
360 def CASE10( self, main ):
361 """
Jon Hallab611372018-02-21 15:26:05 -0800362 Link up
Jon Hall6e709752016-02-01 13:38:46 -0800363 """
Jon Hallab611372018-02-21 15:26:05 -0800364 src = main.params['kill']['linkSrc']
365 dst = main.params['kill']['linkDst']
366 main.HA.linkUp( main, src, dst )
Jon Hall6e709752016-02-01 13:38:46 -0800367
368 def CASE11( self, main ):
369 """
370 Switch Down
371 """
372 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700373 main.HA.switchDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800374
375 def CASE12( self, main ):
376 """
377 Switch Up
378 """
379 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700380 main.HA.switchUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800381
382 def CASE13( self, main ):
383 """
384 Clean up
385 """
Devin Lim58046fa2017-07-05 16:55:00 -0700386 main.HA.cleanUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800387
388 def CASE14( self, main ):
389 """
Jon Hallab611372018-02-21 15:26:05 -0800390 Start election app on all onos nodes
Jon Hall6e709752016-02-01 13:38:46 -0800391 """
Devin Lim58046fa2017-07-05 16:55:00 -0700392 main.HA.startElectionApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800393
394 def CASE15( self, main ):
395 """
396 Check that Leadership Election is still functional
397 15.1 Run election on each node
398 15.2 Check that each node has the same leaders and candidates
399 15.3 Find current leader and withdraw
400 15.4 Check that a new node was elected leader
401 15.5 Check that that new leader was the candidate of old leader
402 15.6 Run for election on old leader
403 15.7 Check that oldLeader is a candidate, and leader if only 1 node
404 15.8 Make sure that the old leader was added to the candidate list
405
406 old and new variable prefixes refer to data from before vs after
407 withdrawl and later before withdrawl vs after re-election
408 """
Devin Lim58046fa2017-07-05 16:55:00 -0700409 main.HA.isElectionFunctional( main )
Jon Hall6e709752016-02-01 13:38:46 -0800410
411 def CASE16( self, main ):
412 """
413 Install Distributed Primitives app
414 """
Devin Lim58046fa2017-07-05 16:55:00 -0700415 main.HA.installDistributedPrimitiveApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800416
417 def CASE17( self, main ):
418 """
419 Check for basic functionality with distributed primitives
420 """
Devin Lim58046fa2017-07-05 16:55:00 -0700421 main.HA.checkDistPrimitivesFunc( main )