blob: 475759f8a154ee6ccdb104184bd94d8557808ff6 [file] [log] [blame]
Jon Hall6e709752016-02-01 13:38:46 -08001"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07002Copyright 2016 Open Networking Foundation (ONF)
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21
22"""
Jon Hall6e709752016-02-01 13:38:46 -080023Description: This test is to determine if ONOS can handle
24 a full network partion
25
26List of test cases:
27CASE1: Compile ONOS and push it to the test machines
28CASE2: Assign devices to controllers
29CASE21: Assign mastership to controllers
30CASE3: Assign intents
31CASE4: Ping across added host intents
32CASE5: Reading state of ONOS
33CASE61: The Failure inducing case.
34CASE62: The Failure recovery case.
35CASE7: Check state after control plane failure
36CASE8: Compare topo
37CASE9: Link s3-s28 down
38CASE10: Link s3-s28 up
39CASE11: Switch down
40CASE12: Switch up
41CASE13: Clean up
42CASE14: start election app on all onos nodes
43CASE15: Check that Leadership Election is still functional
44CASE16: Install Distributed Primitives app
45CASE17: Check for basic functionality with distributed primitives
46"""
Jon Hall6e709752016-02-01 13:38:46 -080047class HAfullNetPartition:
48
49 def __init__( self ):
50 self.default = ''
51
52 def CASE1( self, main ):
53 """
54 CASE1 is to compile ONOS and push it to the test machines
55
56 Startup sequence:
57 cell <name>
58 onos-verify-cell
59 NOTE: temporary - onos-remove-raft-logs
60 onos-uninstall
61 start mininet
62 git pull
63 mvn clean install
64 onos-package
65 onos-install -f
66 onos-wait-for-start
67 start cli sessions
68 start tcpdump
69 """
70 import imp
71 import pexpect
72 import time
Jon Halla440e872016-03-31 15:15:50 -070073 import json
Jon Hall6e709752016-02-01 13:38:46 -080074 main.log.info( "ONOS HA test: Partition ONOS nodes into two sub-clusters - " +
75 "initialization" )
Jon Hall6e709752016-02-01 13:38:46 -080076 # set global variables
Jon Halla440e872016-03-31 15:15:50 -070077 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070078 main.HAlabels = []
79 main.HAdata = []
80 try:
81 from tests.dependencies.ONOSSetup import ONOSSetup
82 main.testSetUp = ONOSSetup()
83 except ImportError:
84 main.log.error( "ONOSSetup not found. exiting the test" )
85 main.exit()
86 main.testSetUp.envSetupDescription()
Jon Hall6e709752016-02-01 13:38:46 -080087 try:
Jon Hall53c5e662016-04-13 16:06:56 -070088 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070089 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070090 # load some variables from the params file
91 cellName = main.params[ 'ENV' ][ 'cellName' ]
92 main.apps = main.params[ 'ENV' ][ 'appString' ]
Devin Lim142b5342017-07-20 15:22:39 -070093 stepResult = main.testSetUp.envSetup()
Jon Hall6e709752016-02-01 13:38:46 -080094 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070095 main.testSetUp.envSetupException( e )
96 main.testSetUp.evnSetupConclusion( stepResult )
97 main.HA.generateGraph( "HAfullNetPartition" )
Jon Hall6e709752016-02-01 13:38:46 -080098
Devin Lim142b5342017-07-20 15:22:39 -070099 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
100 extraApply=[ main.HA.startingMininet,
101 main.HA.customizeOnosGenPartitions ],
102 extraClean=main.HA.cleanUpGenPartition )
Devin Lim58046fa2017-07-05 16:55:00 -0700103 main.HA.initialSetUp()
Jon Hall6e709752016-02-01 13:38:46 -0800104
Jon Hall9d2dcad2016-04-08 10:15:20 -0700105
Jon Hall6e709752016-02-01 13:38:46 -0800106 def CASE2( self, main ):
107 """
108 Assign devices to controllers
109 """
Devin Lim58046fa2017-07-05 16:55:00 -0700110 main.HA.assignDevices( main )
Jon Hall6e709752016-02-01 13:38:46 -0800111
112 def CASE21( self, main ):
113 """
114 Assign mastership to controllers
115 """
Devin Lim58046fa2017-07-05 16:55:00 -0700116 main.HA.assignMastership( main )
Jon Hall6e709752016-02-01 13:38:46 -0800117
118 def CASE3( self, main ):
119 """
120 Assign intents
121 """
Devin Lim58046fa2017-07-05 16:55:00 -0700122 main.HA.assignIntents( main )
Jon Hall6e709752016-02-01 13:38:46 -0800123
124 def CASE4( self, main ):
125 """
126 Ping across added host intents
127 """
Jon Hallca319892017-06-15 15:25:22 -0700128 main.HA.pingAcrossHostIntent( main )
Jon Hall6e709752016-02-01 13:38:46 -0800129
130 def CASE5( self, main ):
131 """
132 Reading state of ONOS
133 """
Devin Lim58046fa2017-07-05 16:55:00 -0700134 main.HA.readingState( main )
Jon Hall6e709752016-02-01 13:38:46 -0800135
136 def CASE61( self, main ):
137 """
138 The Failure case.
139 """
140 import math
Jon Hall6e709752016-02-01 13:38:46 -0800141 assert main, "main not defined"
142 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800143 main.case( "Partition ONOS nodes into two distinct partitions" )
144
145 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700146 for ctrl in main.Cluster.runningNodes:
147 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
148 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hall6e709752016-02-01 13:38:46 -0800149
Devin Lim142b5342017-07-20 15:22:39 -0700150 main.log.debug( main.Cluster.next().CLI.roles( jsonFormat=False ) )
Jon Halld2871c22016-07-26 11:01:14 -0700151
Devin Lim142b5342017-07-20 15:22:39 -0700152 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall6e709752016-02-01 13:38:46 -0800153 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
154 main.partition = [ 0 ] # ONOS node to partition, listed by index in main.nodes
155 if n > 3:
156 main.partition.append( p - 1 )
157 # NOTE: This only works for cluster sizes of 3,5, or 7.
158
159 main.step( "Partitioning ONOS nodes" )
160 nodeList = [ str( i + 1 ) for i in main.partition ]
161 main.log.info( "Nodes to be partitioned: " + str( nodeList ) )
162 partitionResults = main.TRUE
163 for i in range( 0, n ):
Devin Lim142b5342017-07-20 15:22:39 -0700164 iCtrl = main.Cluster.runningNodes[ i ]
165 this = iCtrl.Bench.sshToNode( iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800166 if i not in main.partition:
167 for j in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700168 foe = main.Cluster.runningNodes[ j ]
169 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress, foe.ipAddress ) )
Jon Hall6e709752016-02-01 13:38:46 -0800170 #CMD HERE
Devin Lim142b5342017-07-20 15:22:39 -0700171 try:
172 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT", iCtrl.ipAddress, foe.ipAddress )
173 this.sendline( cmdStr )
174 this.expect( "\$" )
175 main.log.debug( this.before )
176 except pexpect.EOF:
177 main.log.error( self.name + ": EOF exception found" )
178 main.log.error( self.name + ": " + self.handle.before )
179 main.cleanup()
180 main.exit()
181 except Exception:
182 main.log.exception( self.name + ": Uncaught exception!" )
183 main.cleanup()
184 main.exit()
Jon Hall6e709752016-02-01 13:38:46 -0800185 else:
186 for j in range( 0, n ):
187 if j not in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700188 foe = main.Cluster.runningNodes[ j ]
189 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress, foe.ipAddress ) )
Jon Hall6e709752016-02-01 13:38:46 -0800190 #CMD HERE
Devin Lim142b5342017-07-20 15:22:39 -0700191 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT", iCtrl.ipAddress, foe.ipAddress )
192 try:
193 this.sendline( cmdStr )
194 this.expect( "\$" )
195 main.log.debug( this.before )
196 except pexpect.EOF:
197 main.log.error( self.name + ": EOF exception found" )
198 main.log.error( self.name + ": " + self.handle.before )
199 main.cleanup()
200 main.exit()
201 except Exception:
202 main.log.exception( self.name + ": Uncaught exception!" )
203 main.cleanup()
204 main.exit()
205 main.Cluster.runningNodes[ i ].active = False
206 iCtrl.Bench.exitFromSsh( this, iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800207 # NOTE: When dynamic clustering is finished, we need to start checking
208 # main.partion nodes still work when partitioned
209 utilities.assert_equals( expect=main.TRUE, actual=partitionResults,
210 onpass="Firewall rules set successfully",
211 onfail="Error setting firewall rules" )
212
Jon Hall6509dbf2016-06-21 17:01:17 -0700213 main.step( "Sleeping 60 seconds" )
Jon Hall6e709752016-02-01 13:38:46 -0800214 time.sleep( 60 )
215
216 def CASE62( self, main ):
217 """
218 Healing Partition
219 """
220 import time
Jon Hall6e709752016-02-01 13:38:46 -0800221 assert main, "main not defined"
222 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800223 assert main.partition, "main.partition not defined"
224 main.case( "Healing Partition" )
225
226 main.step( "Deleteing firewall rules" )
227 healResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700228 for ctrl in main.Cluster.runningNodes:
Jon Hall6e709752016-02-01 13:38:46 -0800229 cmdStr = "sudo iptables -F"
Devin Lim142b5342017-07-20 15:22:39 -0700230 handle = ctrl.Bench.sshToNode( ctrl.ipAddress )
231 handle.sendline( cmdStr )
232 handle.expect( "\$" )
233 main.log.debug( handle.before )
234 ctrl.Bench.exitFromSsh( handle, ctrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800235 utilities.assert_equals( expect=main.TRUE, actual=healResults,
236 onpass="Firewall rules removed",
237 onfail="Error removing firewall rules" )
238
239 for node in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700240 main.Cluster.runningNodes[ node ].active = True
241
242 '''
243 # NOTE : Not sure if this can be removed
244 main.activeNodes.sort()
Jon Hall6e709752016-02-01 13:38:46 -0800245 try:
246 assert list( set( main.activeNodes ) ) == main.activeNodes,\
247 "List of active nodes has duplicates, this likely indicates something was run out of order"
248 except AssertionError:
249 main.log.exception( "" )
250 main.cleanup()
251 main.exit()
Devin Lim142b5342017-07-20 15:22:39 -0700252 '''
Jon Hall6e709752016-02-01 13:38:46 -0800253
Jon Halld2871c22016-07-26 11:01:14 -0700254 main.step( "Checking ONOS nodes" )
255 nodeResults = utilities.retry( main.HA.nodesCheck,
256 False,
Devin Lim142b5342017-07-20 15:22:39 -0700257 args=[ main.Cluster.active() ],
Jon Halld2871c22016-07-26 11:01:14 -0700258 sleep=15,
259 attempts=5 )
260
261 utilities.assert_equals( expect=True, actual=nodeResults,
262 onpass="Nodes check successful",
263 onfail="Nodes check NOT successful" )
264
265 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700266 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700267 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700268 ctrl.name,
269 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700270 main.log.error( "Failed to start ONOS, stopping test" )
271 main.cleanup()
272 main.exit()
273
Jon Hall6e709752016-02-01 13:38:46 -0800274 def CASE7( self, main ):
275 """
276 Check state after ONOS failure
277 """
Jon Hall6e709752016-02-01 13:38:46 -0800278
Devin Lim142b5342017-07-20 15:22:39 -0700279 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall6e709752016-02-01 13:38:46 -0800280
Jon Hall6e709752016-02-01 13:38:46 -0800281 main.step( "Leadership Election is still functional" )
282 # Test of LeadershipElection
283 leaderList = []
284
285 partitioned = []
286 for i in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700287 partitioned.append( main.Cluster.runningNodes[ i ].ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800288 leaderResult = main.TRUE
289
Devin Lim142b5342017-07-20 15:22:39 -0700290 for ctrl in main.Cluster.active():
291 leaderN = ctrl.CLI.electionTestLeader()
Jon Hall6e709752016-02-01 13:38:46 -0800292 leaderList.append( leaderN )
293 if leaderN == main.FALSE:
294 # error in response
295 main.log.error( "Something is wrong with " +
296 "electionTestLeader function, check the" +
297 " error logs" )
298 leaderResult = main.FALSE
299 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700300 main.log.error( ctrl.name +
Jon Hall6e709752016-02-01 13:38:46 -0800301 " shows no leader for the election-app was" +
302 " elected after the old one died" )
303 leaderResult = main.FALSE
304 elif leaderN in partitioned:
Devin Lim142b5342017-07-20 15:22:39 -0700305 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall6e709752016-02-01 13:38:46 -0800306 " as leader for the election-app, but it " +
307 "was partitioned" )
308 leaderResult = main.FALSE
309 if len( set( leaderList ) ) != 1:
310 leaderResult = main.FALSE
311 main.log.error(
312 "Inconsistent view of leader for the election test app" )
313 # TODO: print the list
314 utilities.assert_equals(
315 expect=main.TRUE,
316 actual=leaderResult,
317 onpass="Leadership election passed",
318 onfail="Something went wrong with Leadership election" )
319
320 def CASE8( self, main ):
321 """
322 Compare topo
323 """
Devin Lim58046fa2017-07-05 16:55:00 -0700324 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700325
Jon Hall6e709752016-02-01 13:38:46 -0800326 def CASE9( self, main ):
327 """
328 Link s3-s28 down
329 """
Devin Lim58046fa2017-07-05 16:55:00 -0700330 main.HA.linkDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800331
332 def CASE10( self, main ):
333 """
334 Link s3-s28 up
335 """
Devin Lim58046fa2017-07-05 16:55:00 -0700336 main.HA.linkUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800337
338 def CASE11( self, main ):
339 """
340 Switch Down
341 """
342 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700343 main.HA.switchDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800344
345 def CASE12( self, main ):
346 """
347 Switch Up
348 """
349 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700350 main.HA.switchUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800351
352 def CASE13( self, main ):
353 """
354 Clean up
355 """
Devin Lim58046fa2017-07-05 16:55:00 -0700356 main.HA.cleanUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800357
358 def CASE14( self, main ):
359 """
360 start election app on all onos nodes
361 """
Devin Lim58046fa2017-07-05 16:55:00 -0700362 main.HA.startElectionApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800363
364 def CASE15( self, main ):
365 """
366 Check that Leadership Election is still functional
367 15.1 Run election on each node
368 15.2 Check that each node has the same leaders and candidates
369 15.3 Find current leader and withdraw
370 15.4 Check that a new node was elected leader
371 15.5 Check that that new leader was the candidate of old leader
372 15.6 Run for election on old leader
373 15.7 Check that oldLeader is a candidate, and leader if only 1 node
374 15.8 Make sure that the old leader was added to the candidate list
375
376 old and new variable prefixes refer to data from before vs after
377 withdrawl and later before withdrawl vs after re-election
378 """
Devin Lim58046fa2017-07-05 16:55:00 -0700379 main.HA.isElectionFunctional( main )
Jon Hall6e709752016-02-01 13:38:46 -0800380
381 def CASE16( self, main ):
382 """
383 Install Distributed Primitives app
384 """
Devin Lim58046fa2017-07-05 16:55:00 -0700385 main.HA.installDistributedPrimitiveApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800386
387 def CASE17( self, main ):
388 """
389 Check for basic functionality with distributed primitives
390 """
Devin Lim58046fa2017-07-05 16:55:00 -0700391 main.HA.checkDistPrimitivesFunc( main )