blob: c3a20381a8476a716d946762cf13bd4c47ed5fff [file] [log] [blame]
Jon Hall6e709752016-02-01 13:38:46 -08001"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07002Copyright 2016 Open Networking Foundation (ONF)
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21
22"""
Jon Hall6e709752016-02-01 13:38:46 -080023Description: This test is to determine if ONOS can handle
24 a full network partion
25
26List of test cases:
27CASE1: Compile ONOS and push it to the test machines
28CASE2: Assign devices to controllers
29CASE21: Assign mastership to controllers
30CASE3: Assign intents
31CASE4: Ping across added host intents
32CASE5: Reading state of ONOS
33CASE61: The Failure inducing case.
34CASE62: The Failure recovery case.
35CASE7: Check state after control plane failure
36CASE8: Compare topo
37CASE9: Link s3-s28 down
38CASE10: Link s3-s28 up
39CASE11: Switch down
40CASE12: Switch up
41CASE13: Clean up
42CASE14: start election app on all onos nodes
43CASE15: Check that Leadership Election is still functional
44CASE16: Install Distributed Primitives app
45CASE17: Check for basic functionality with distributed primitives
46"""
Jon Hall6e709752016-02-01 13:38:46 -080047class HAfullNetPartition:
48
49 def __init__( self ):
50 self.default = ''
51
52 def CASE1( self, main ):
53 """
54 CASE1 is to compile ONOS and push it to the test machines
55
56 Startup sequence:
57 cell <name>
58 onos-verify-cell
59 NOTE: temporary - onos-remove-raft-logs
60 onos-uninstall
61 start mininet
62 git pull
63 mvn clean install
64 onos-package
65 onos-install -f
66 onos-wait-for-start
67 start cli sessions
68 start tcpdump
69 """
70 import imp
71 import pexpect
72 import time
Jon Halla440e872016-03-31 15:15:50 -070073 import json
Jon Hall6e709752016-02-01 13:38:46 -080074 main.log.info( "ONOS HA test: Partition ONOS nodes into two sub-clusters - " +
75 "initialization" )
Jon Hall6e709752016-02-01 13:38:46 -080076 # set global variables
Jon Halla440e872016-03-31 15:15:50 -070077 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070078 main.HAlabels = []
79 main.HAdata = []
80 try:
81 from tests.dependencies.ONOSSetup import ONOSSetup
82 main.testSetUp = ONOSSetup()
83 except ImportError:
84 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070085 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070086 main.testSetUp.envSetupDescription()
Jon Hall6e709752016-02-01 13:38:46 -080087 try:
Jon Hall53c5e662016-04-13 16:06:56 -070088 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070089 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070090 # load some variables from the params file
91 cellName = main.params[ 'ENV' ][ 'cellName' ]
92 main.apps = main.params[ 'ENV' ][ 'appString' ]
Devin Lim142b5342017-07-20 15:22:39 -070093 stepResult = main.testSetUp.envSetup()
Jon Hall6e709752016-02-01 13:38:46 -080094 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070095 main.testSetUp.envSetupException( e )
96 main.testSetUp.evnSetupConclusion( stepResult )
97 main.HA.generateGraph( "HAfullNetPartition" )
Jon Hall6e709752016-02-01 13:38:46 -080098
Devin Lim142b5342017-07-20 15:22:39 -070099 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
100 extraApply=[ main.HA.startingMininet,
101 main.HA.customizeOnosGenPartitions ],
102 extraClean=main.HA.cleanUpGenPartition )
Devin Lim58046fa2017-07-05 16:55:00 -0700103 main.HA.initialSetUp()
Jon Hall6e709752016-02-01 13:38:46 -0800104
Jon Hall9d2dcad2016-04-08 10:15:20 -0700105
Jon Hall6e709752016-02-01 13:38:46 -0800106 def CASE2( self, main ):
107 """
108 Assign devices to controllers
109 """
Devin Lim58046fa2017-07-05 16:55:00 -0700110 main.HA.assignDevices( main )
Jon Hall6e709752016-02-01 13:38:46 -0800111
112 def CASE21( self, main ):
113 """
114 Assign mastership to controllers
115 """
Devin Lim58046fa2017-07-05 16:55:00 -0700116 main.HA.assignMastership( main )
Jon Hall6e709752016-02-01 13:38:46 -0800117
118 def CASE3( self, main ):
119 """
120 Assign intents
121 """
Devin Lim58046fa2017-07-05 16:55:00 -0700122 main.HA.assignIntents( main )
Jon Hall6e709752016-02-01 13:38:46 -0800123
124 def CASE4( self, main ):
125 """
126 Ping across added host intents
127 """
Jon Hallca319892017-06-15 15:25:22 -0700128 main.HA.pingAcrossHostIntent( main )
Jon Hall6e709752016-02-01 13:38:46 -0800129
130 def CASE5( self, main ):
131 """
132 Reading state of ONOS
133 """
Devin Lim58046fa2017-07-05 16:55:00 -0700134 main.HA.readingState( main )
Jon Hall6e709752016-02-01 13:38:46 -0800135
136 def CASE61( self, main ):
137 """
138 The Failure case.
139 """
140 import math
Jon Hall6e709752016-02-01 13:38:46 -0800141 assert main, "main not defined"
142 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800143 main.case( "Partition ONOS nodes into two distinct partitions" )
144
145 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700146 for ctrl in main.Cluster.runningNodes:
147 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
148 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hall6e709752016-02-01 13:38:46 -0800149
Devin Lim142b5342017-07-20 15:22:39 -0700150 main.log.debug( main.Cluster.next().CLI.roles( jsonFormat=False ) )
Jon Halld2871c22016-07-26 11:01:14 -0700151
Devin Lim142b5342017-07-20 15:22:39 -0700152 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall6e709752016-02-01 13:38:46 -0800153 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
154 main.partition = [ 0 ] # ONOS node to partition, listed by index in main.nodes
155 if n > 3:
156 main.partition.append( p - 1 )
157 # NOTE: This only works for cluster sizes of 3,5, or 7.
158
159 main.step( "Partitioning ONOS nodes" )
160 nodeList = [ str( i + 1 ) for i in main.partition ]
161 main.log.info( "Nodes to be partitioned: " + str( nodeList ) )
162 partitionResults = main.TRUE
163 for i in range( 0, n ):
Devin Lim142b5342017-07-20 15:22:39 -0700164 iCtrl = main.Cluster.runningNodes[ i ]
165 this = iCtrl.Bench.sshToNode( iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800166 if i not in main.partition:
167 for j in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700168 foe = main.Cluster.runningNodes[ j ]
169 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress, foe.ipAddress ) )
Jon Hall6e709752016-02-01 13:38:46 -0800170 #CMD HERE
Devin Lim142b5342017-07-20 15:22:39 -0700171 try:
172 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT", iCtrl.ipAddress, foe.ipAddress )
173 this.sendline( cmdStr )
174 this.expect( "\$" )
175 main.log.debug( this.before )
176 except pexpect.EOF:
177 main.log.error( self.name + ": EOF exception found" )
178 main.log.error( self.name + ": " + self.handle.before )
Devin Lim44075962017-08-11 10:56:37 -0700179 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700180 except Exception:
181 main.log.exception( self.name + ": Uncaught exception!" )
Devin Lim44075962017-08-11 10:56:37 -0700182 main.cleanAndExit()
Jon Hall6e709752016-02-01 13:38:46 -0800183 else:
184 for j in range( 0, n ):
185 if j not in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700186 foe = main.Cluster.runningNodes[ j ]
187 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress, foe.ipAddress ) )
Jon Hall6e709752016-02-01 13:38:46 -0800188 #CMD HERE
Devin Lim142b5342017-07-20 15:22:39 -0700189 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT", iCtrl.ipAddress, foe.ipAddress )
190 try:
191 this.sendline( cmdStr )
192 this.expect( "\$" )
193 main.log.debug( this.before )
194 except pexpect.EOF:
195 main.log.error( self.name + ": EOF exception found" )
196 main.log.error( self.name + ": " + self.handle.before )
Devin Lim44075962017-08-11 10:56:37 -0700197 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700198 except Exception:
199 main.log.exception( self.name + ": Uncaught exception!" )
Devin Lim44075962017-08-11 10:56:37 -0700200 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700201 main.Cluster.runningNodes[ i ].active = False
202 iCtrl.Bench.exitFromSsh( this, iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800203 # NOTE: When dynamic clustering is finished, we need to start checking
204 # main.partion nodes still work when partitioned
205 utilities.assert_equals( expect=main.TRUE, actual=partitionResults,
206 onpass="Firewall rules set successfully",
207 onfail="Error setting firewall rules" )
208
Jon Hall6509dbf2016-06-21 17:01:17 -0700209 main.step( "Sleeping 60 seconds" )
Jon Hall6e709752016-02-01 13:38:46 -0800210 time.sleep( 60 )
211
212 def CASE62( self, main ):
213 """
214 Healing Partition
215 """
216 import time
Jon Hall6e709752016-02-01 13:38:46 -0800217 assert main, "main not defined"
218 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800219 assert main.partition, "main.partition not defined"
220 main.case( "Healing Partition" )
221
222 main.step( "Deleteing firewall rules" )
223 healResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700224 for ctrl in main.Cluster.runningNodes:
Jon Hall6e709752016-02-01 13:38:46 -0800225 cmdStr = "sudo iptables -F"
Devin Lim142b5342017-07-20 15:22:39 -0700226 handle = ctrl.Bench.sshToNode( ctrl.ipAddress )
227 handle.sendline( cmdStr )
228 handle.expect( "\$" )
229 main.log.debug( handle.before )
230 ctrl.Bench.exitFromSsh( handle, ctrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800231 utilities.assert_equals( expect=main.TRUE, actual=healResults,
232 onpass="Firewall rules removed",
233 onfail="Error removing firewall rules" )
234
235 for node in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700236 main.Cluster.runningNodes[ node ].active = True
237
238 '''
239 # NOTE : Not sure if this can be removed
240 main.activeNodes.sort()
Jon Hall6e709752016-02-01 13:38:46 -0800241 try:
242 assert list( set( main.activeNodes ) ) == main.activeNodes,\
243 "List of active nodes has duplicates, this likely indicates something was run out of order"
244 except AssertionError:
245 main.log.exception( "" )
Devin Lim44075962017-08-11 10:56:37 -0700246 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700247 '''
Jon Hall6e709752016-02-01 13:38:46 -0800248
Jon Halld2871c22016-07-26 11:01:14 -0700249 main.step( "Checking ONOS nodes" )
250 nodeResults = utilities.retry( main.HA.nodesCheck,
251 False,
Devin Lim142b5342017-07-20 15:22:39 -0700252 args=[ main.Cluster.active() ],
Jon Halld2871c22016-07-26 11:01:14 -0700253 sleep=15,
254 attempts=5 )
255
256 utilities.assert_equals( expect=True, actual=nodeResults,
257 onpass="Nodes check successful",
258 onfail="Nodes check NOT successful" )
259
260 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700261 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700262 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700263 ctrl.name,
264 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700265 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700266 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700267
Jon Hall6e709752016-02-01 13:38:46 -0800268 def CASE7( self, main ):
269 """
270 Check state after ONOS failure
271 """
Jon Hall6e709752016-02-01 13:38:46 -0800272
Devin Lim142b5342017-07-20 15:22:39 -0700273 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall6e709752016-02-01 13:38:46 -0800274
Jon Hall6e709752016-02-01 13:38:46 -0800275 main.step( "Leadership Election is still functional" )
276 # Test of LeadershipElection
277 leaderList = []
278
279 partitioned = []
280 for i in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700281 partitioned.append( main.Cluster.runningNodes[ i ].ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800282 leaderResult = main.TRUE
283
Devin Lim142b5342017-07-20 15:22:39 -0700284 for ctrl in main.Cluster.active():
285 leaderN = ctrl.CLI.electionTestLeader()
Jon Hall6e709752016-02-01 13:38:46 -0800286 leaderList.append( leaderN )
287 if leaderN == main.FALSE:
288 # error in response
289 main.log.error( "Something is wrong with " +
290 "electionTestLeader function, check the" +
291 " error logs" )
292 leaderResult = main.FALSE
293 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700294 main.log.error( ctrl.name +
Jon Hall6e709752016-02-01 13:38:46 -0800295 " shows no leader for the election-app was" +
296 " elected after the old one died" )
297 leaderResult = main.FALSE
298 elif leaderN in partitioned:
Devin Lim142b5342017-07-20 15:22:39 -0700299 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall6e709752016-02-01 13:38:46 -0800300 " as leader for the election-app, but it " +
301 "was partitioned" )
302 leaderResult = main.FALSE
303 if len( set( leaderList ) ) != 1:
304 leaderResult = main.FALSE
305 main.log.error(
306 "Inconsistent view of leader for the election test app" )
307 # TODO: print the list
308 utilities.assert_equals(
309 expect=main.TRUE,
310 actual=leaderResult,
311 onpass="Leadership election passed",
312 onfail="Something went wrong with Leadership election" )
313
314 def CASE8( self, main ):
315 """
316 Compare topo
317 """
Devin Lim58046fa2017-07-05 16:55:00 -0700318 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700319
Jon Hall6e709752016-02-01 13:38:46 -0800320 def CASE9( self, main ):
321 """
322 Link s3-s28 down
323 """
Devin Lim58046fa2017-07-05 16:55:00 -0700324 main.HA.linkDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800325
326 def CASE10( self, main ):
327 """
328 Link s3-s28 up
329 """
Devin Lim58046fa2017-07-05 16:55:00 -0700330 main.HA.linkUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800331
332 def CASE11( self, main ):
333 """
334 Switch Down
335 """
336 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700337 main.HA.switchDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800338
339 def CASE12( self, main ):
340 """
341 Switch Up
342 """
343 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700344 main.HA.switchUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800345
346 def CASE13( self, main ):
347 """
348 Clean up
349 """
Devin Lim58046fa2017-07-05 16:55:00 -0700350 main.HA.cleanUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800351
352 def CASE14( self, main ):
353 """
354 start election app on all onos nodes
355 """
Devin Lim58046fa2017-07-05 16:55:00 -0700356 main.HA.startElectionApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800357
358 def CASE15( self, main ):
359 """
360 Check that Leadership Election is still functional
361 15.1 Run election on each node
362 15.2 Check that each node has the same leaders and candidates
363 15.3 Find current leader and withdraw
364 15.4 Check that a new node was elected leader
365 15.5 Check that that new leader was the candidate of old leader
366 15.6 Run for election on old leader
367 15.7 Check that oldLeader is a candidate, and leader if only 1 node
368 15.8 Make sure that the old leader was added to the candidate list
369
370 old and new variable prefixes refer to data from before vs after
371 withdrawl and later before withdrawl vs after re-election
372 """
Devin Lim58046fa2017-07-05 16:55:00 -0700373 main.HA.isElectionFunctional( main )
Jon Hall6e709752016-02-01 13:38:46 -0800374
375 def CASE16( self, main ):
376 """
377 Install Distributed Primitives app
378 """
Devin Lim58046fa2017-07-05 16:55:00 -0700379 main.HA.installDistributedPrimitiveApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800380
381 def CASE17( self, main ):
382 """
383 Check for basic functionality with distributed primitives
384 """
Devin Lim58046fa2017-07-05 16:55:00 -0700385 main.HA.checkDistPrimitivesFunc( main )