blob: d3bb1538e978667dbdb6f2966b35eefce4b6f17e [file] [log] [blame]
Jon Hall6e709752016-02-01 13:38:46 -08001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2016 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall6e709752016-02-01 13:38:46 -080022Description: This test is to determine if ONOS can handle
23 a full network partion
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
34CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
Jon Hall6e709752016-02-01 13:38:46 -080046class HAfullNetPartition:
47
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hall6e709752016-02-01 13:38:46 -080069 main.log.info( "ONOS HA test: Partition ONOS nodes into two sub-clusters - " +
70 "initialization" )
Jon Hall6e709752016-02-01 13:38:46 -080071 # set global variables
Jon Halla440e872016-03-31 15:15:50 -070072 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070073 main.HAlabels = []
74 main.HAdata = []
75 try:
76 from tests.dependencies.ONOSSetup import ONOSSetup
77 main.testSetUp = ONOSSetup()
78 except ImportError:
79 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070080 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070081 main.testSetUp.envSetupDescription()
Jon Hall6e709752016-02-01 13:38:46 -080082 try:
Jon Hall53c5e662016-04-13 16:06:56 -070083 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070084 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070085 # load some variables from the params file
86 cellName = main.params[ 'ENV' ][ 'cellName' ]
87 main.apps = main.params[ 'ENV' ][ 'appString' ]
Devin Lim142b5342017-07-20 15:22:39 -070088 stepResult = main.testSetUp.envSetup()
Jon Hall6e709752016-02-01 13:38:46 -080089 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070090 main.testSetUp.envSetupException( e )
91 main.testSetUp.evnSetupConclusion( stepResult )
Jon Hall6e709752016-02-01 13:38:46 -080092
You Wanga0f6ff62018-01-11 15:46:30 -080093 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName, removeLog=True,
Devin Lim142b5342017-07-20 15:22:39 -070094 extraApply=[ main.HA.startingMininet,
95 main.HA.customizeOnosGenPartitions ],
96 extraClean=main.HA.cleanUpGenPartition )
Devin Lim58046fa2017-07-05 16:55:00 -070097 main.HA.initialSetUp()
Jon Hall6e709752016-02-01 13:38:46 -080098
Jon Hall6e709752016-02-01 13:38:46 -080099 def CASE2( self, main ):
100 """
101 Assign devices to controllers
102 """
Devin Lim58046fa2017-07-05 16:55:00 -0700103 main.HA.assignDevices( main )
Jon Hall6e709752016-02-01 13:38:46 -0800104
105 def CASE21( self, main ):
106 """
107 Assign mastership to controllers
108 """
Devin Lim58046fa2017-07-05 16:55:00 -0700109 main.HA.assignMastership( main )
Jon Hall6e709752016-02-01 13:38:46 -0800110
111 def CASE3( self, main ):
112 """
113 Assign intents
114 """
Devin Lim58046fa2017-07-05 16:55:00 -0700115 main.HA.assignIntents( main )
Jon Hall6e709752016-02-01 13:38:46 -0800116
117 def CASE4( self, main ):
118 """
119 Ping across added host intents
120 """
Jon Hallca319892017-06-15 15:25:22 -0700121 main.HA.pingAcrossHostIntent( main )
Jon Hall6e709752016-02-01 13:38:46 -0800122
123 def CASE5( self, main ):
124 """
125 Reading state of ONOS
126 """
Devin Lim58046fa2017-07-05 16:55:00 -0700127 main.HA.readingState( main )
Jon Hall6e709752016-02-01 13:38:46 -0800128
129 def CASE61( self, main ):
130 """
131 The Failure case.
132 """
Jon Halla478b852017-12-04 15:00:15 -0800133 import pexpect
134 import time
Jon Hall6e709752016-02-01 13:38:46 -0800135 assert main, "main not defined"
136 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800137 main.case( "Partition ONOS nodes into two distinct partitions" )
138
139 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700140 for ctrl in main.Cluster.runningNodes:
141 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
142 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hall6e709752016-02-01 13:38:46 -0800143
Devin Lim142b5342017-07-20 15:22:39 -0700144 main.log.debug( main.Cluster.next().CLI.roles( jsonFormat=False ) )
Jon Halld2871c22016-07-26 11:01:14 -0700145
Devin Lim142b5342017-07-20 15:22:39 -0700146 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall6e709752016-02-01 13:38:46 -0800147 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
148 main.partition = [ 0 ] # ONOS node to partition, listed by index in main.nodes
149 if n > 3:
150 main.partition.append( p - 1 )
151 # NOTE: This only works for cluster sizes of 3,5, or 7.
152
153 main.step( "Partitioning ONOS nodes" )
154 nodeList = [ str( i + 1 ) for i in main.partition ]
155 main.log.info( "Nodes to be partitioned: " + str( nodeList ) )
156 partitionResults = main.TRUE
157 for i in range( 0, n ):
Devin Lim142b5342017-07-20 15:22:39 -0700158 iCtrl = main.Cluster.runningNodes[ i ]
159 this = iCtrl.Bench.sshToNode( iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800160 if i not in main.partition:
161 for j in main.partition:
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700162 foe = main.Cluster.runningNodes[ j ]
Jon Halla478b852017-12-04 15:00:15 -0800163 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress,
164 foe.ipAddress ) )
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700165 # CMD HERE
Devin Lim142b5342017-07-20 15:22:39 -0700166 try:
Jon Halla478b852017-12-04 15:00:15 -0800167 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT",
168 iCtrl.ipAddress,
169 foe.ipAddress )
Devin Lim142b5342017-07-20 15:22:39 -0700170 this.sendline( cmdStr )
171 this.expect( "\$" )
172 main.log.debug( this.before )
173 except pexpect.EOF:
Jon Halla478b852017-12-04 15:00:15 -0800174 main.log.error( iCtrl.name + ": EOF exception found" )
175 main.log.error( iCtrl.name + ": " + this.before )
Devin Lim44075962017-08-11 10:56:37 -0700176 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700177 except Exception:
Jon Halla478b852017-12-04 15:00:15 -0800178 main.log.exception( iCtrl.name + ": Uncaught exception!" )
Devin Lim44075962017-08-11 10:56:37 -0700179 main.cleanAndExit()
Jon Hall6e709752016-02-01 13:38:46 -0800180 else:
181 for j in range( 0, n ):
182 if j not in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700183 foe = main.Cluster.runningNodes[ j ]
Jon Halla478b852017-12-04 15:00:15 -0800184 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress,
185 foe.ipAddress ) )
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700186 # CMD HERE
Jon Halla478b852017-12-04 15:00:15 -0800187 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT",
188 iCtrl.ipAddress,
189 foe.ipAddress )
Devin Lim142b5342017-07-20 15:22:39 -0700190 try:
191 this.sendline( cmdStr )
192 this.expect( "\$" )
193 main.log.debug( this.before )
194 except pexpect.EOF:
Jon Halla478b852017-12-04 15:00:15 -0800195 main.log.error( iCtrl.name + ": EOF exception found" )
196 main.log.error( iCtrl.name + ": " + this.before )
Devin Lim44075962017-08-11 10:56:37 -0700197 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700198 except Exception:
Jon Halla478b852017-12-04 15:00:15 -0800199 main.log.exception( iCtrl.name + ": Uncaught exception!" )
Devin Lim44075962017-08-11 10:56:37 -0700200 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700201 main.Cluster.runningNodes[ i ].active = False
202 iCtrl.Bench.exitFromSsh( this, iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800203 # NOTE: When dynamic clustering is finished, we need to start checking
204 # main.partion nodes still work when partitioned
205 utilities.assert_equals( expect=main.TRUE, actual=partitionResults,
206 onpass="Firewall rules set successfully",
207 onfail="Error setting firewall rules" )
208
Jon Hall6509dbf2016-06-21 17:01:17 -0700209 main.step( "Sleeping 60 seconds" )
Jon Hall6e709752016-02-01 13:38:46 -0800210 time.sleep( 60 )
211
212 def CASE62( self, main ):
213 """
214 Healing Partition
215 """
Jon Hall6e709752016-02-01 13:38:46 -0800216 assert main, "main not defined"
217 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800218 assert main.partition, "main.partition not defined"
219 main.case( "Healing Partition" )
220
221 main.step( "Deleteing firewall rules" )
222 healResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700223 for ctrl in main.Cluster.runningNodes:
Jon Hall6e709752016-02-01 13:38:46 -0800224 cmdStr = "sudo iptables -F"
Devin Lim142b5342017-07-20 15:22:39 -0700225 handle = ctrl.Bench.sshToNode( ctrl.ipAddress )
226 handle.sendline( cmdStr )
227 handle.expect( "\$" )
228 main.log.debug( handle.before )
229 ctrl.Bench.exitFromSsh( handle, ctrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800230 utilities.assert_equals( expect=main.TRUE, actual=healResults,
231 onpass="Firewall rules removed",
232 onfail="Error removing firewall rules" )
233
234 for node in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700235 main.Cluster.runningNodes[ node ].active = True
236
Jon Halld2871c22016-07-26 11:01:14 -0700237 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800238 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Halld2871c22016-07-26 11:01:14 -0700239 False,
Jon Halld2871c22016-07-26 11:01:14 -0700240 sleep=15,
Jon Halla478b852017-12-04 15:00:15 -0800241 attempts=50 )
Jon Halld2871c22016-07-26 11:01:14 -0700242
243 utilities.assert_equals( expect=True, actual=nodeResults,
244 onpass="Nodes check successful",
245 onfail="Nodes check NOT successful" )
246
247 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700248 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700249 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700250 ctrl.name,
251 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700252 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700253 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700254
Jon Hall6e709752016-02-01 13:38:46 -0800255 def CASE7( self, main ):
256 """
257 Check state after ONOS failure
258 """
Devin Lim142b5342017-07-20 15:22:39 -0700259 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall6e709752016-02-01 13:38:46 -0800260
Jon Hall6e709752016-02-01 13:38:46 -0800261 main.step( "Leadership Election is still functional" )
262 # Test of LeadershipElection
263 leaderList = []
264
265 partitioned = []
266 for i in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700267 partitioned.append( main.Cluster.runningNodes[ i ].ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800268 leaderResult = main.TRUE
269
Devin Lim142b5342017-07-20 15:22:39 -0700270 for ctrl in main.Cluster.active():
271 leaderN = ctrl.CLI.electionTestLeader()
Jon Hall6e709752016-02-01 13:38:46 -0800272 leaderList.append( leaderN )
273 if leaderN == main.FALSE:
274 # error in response
275 main.log.error( "Something is wrong with " +
276 "electionTestLeader function, check the" +
277 " error logs" )
278 leaderResult = main.FALSE
279 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700280 main.log.error( ctrl.name +
Jon Hall6e709752016-02-01 13:38:46 -0800281 " shows no leader for the election-app was" +
282 " elected after the old one died" )
283 leaderResult = main.FALSE
284 elif leaderN in partitioned:
Devin Lim142b5342017-07-20 15:22:39 -0700285 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall6e709752016-02-01 13:38:46 -0800286 " as leader for the election-app, but it " +
287 "was partitioned" )
288 leaderResult = main.FALSE
289 if len( set( leaderList ) ) != 1:
290 leaderResult = main.FALSE
291 main.log.error(
292 "Inconsistent view of leader for the election test app" )
293 # TODO: print the list
294 utilities.assert_equals(
295 expect=main.TRUE,
296 actual=leaderResult,
297 onpass="Leadership election passed",
298 onfail="Something went wrong with Leadership election" )
299
300 def CASE8( self, main ):
301 """
302 Compare topo
303 """
Devin Lim58046fa2017-07-05 16:55:00 -0700304 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700305
Jon Hall6e709752016-02-01 13:38:46 -0800306 def CASE9( self, main ):
307 """
308 Link s3-s28 down
309 """
Devin Lim58046fa2017-07-05 16:55:00 -0700310 main.HA.linkDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800311
312 def CASE10( self, main ):
313 """
314 Link s3-s28 up
315 """
Devin Lim58046fa2017-07-05 16:55:00 -0700316 main.HA.linkUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800317
318 def CASE11( self, main ):
319 """
320 Switch Down
321 """
322 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700323 main.HA.switchDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800324
325 def CASE12( self, main ):
326 """
327 Switch Up
328 """
329 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700330 main.HA.switchUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800331
332 def CASE13( self, main ):
333 """
334 Clean up
335 """
Devin Lim58046fa2017-07-05 16:55:00 -0700336 main.HA.cleanUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800337
338 def CASE14( self, main ):
339 """
340 start election app on all onos nodes
341 """
Devin Lim58046fa2017-07-05 16:55:00 -0700342 main.HA.startElectionApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800343
344 def CASE15( self, main ):
345 """
346 Check that Leadership Election is still functional
347 15.1 Run election on each node
348 15.2 Check that each node has the same leaders and candidates
349 15.3 Find current leader and withdraw
350 15.4 Check that a new node was elected leader
351 15.5 Check that that new leader was the candidate of old leader
352 15.6 Run for election on old leader
353 15.7 Check that oldLeader is a candidate, and leader if only 1 node
354 15.8 Make sure that the old leader was added to the candidate list
355
356 old and new variable prefixes refer to data from before vs after
357 withdrawl and later before withdrawl vs after re-election
358 """
Devin Lim58046fa2017-07-05 16:55:00 -0700359 main.HA.isElectionFunctional( main )
Jon Hall6e709752016-02-01 13:38:46 -0800360
361 def CASE16( self, main ):
362 """
363 Install Distributed Primitives app
364 """
Devin Lim58046fa2017-07-05 16:55:00 -0700365 main.HA.installDistributedPrimitiveApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800366
367 def CASE17( self, main ):
368 """
369 Check for basic functionality with distributed primitives
370 """
Devin Lim58046fa2017-07-05 16:55:00 -0700371 main.HA.checkDistPrimitivesFunc( main )