blob: c72cafa7c01dbe8a414f87c1de0b7d79b7b25a7b [file] [log] [blame]
Jon Hall6e709752016-02-01 13:38:46 -08001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2016 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall6e709752016-02-01 13:38:46 -080022Description: This test is to determine if ONOS can handle
23 a full network partion
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
34CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
Jon Hall6e709752016-02-01 13:38:46 -080046class HAfullNetPartition:
47
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
69 import imp
70 import pexpect
71 import time
Jon Halla440e872016-03-31 15:15:50 -070072 import json
Jon Hall6e709752016-02-01 13:38:46 -080073 main.log.info( "ONOS HA test: Partition ONOS nodes into two sub-clusters - " +
74 "initialization" )
Jon Hall6e709752016-02-01 13:38:46 -080075 # set global variables
Jon Halla440e872016-03-31 15:15:50 -070076 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070077 main.HAlabels = []
78 main.HAdata = []
79 try:
80 from tests.dependencies.ONOSSetup import ONOSSetup
81 main.testSetUp = ONOSSetup()
82 except ImportError:
83 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070084 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070085 main.testSetUp.envSetupDescription()
Jon Hall6e709752016-02-01 13:38:46 -080086 try:
Jon Hall53c5e662016-04-13 16:06:56 -070087 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070088 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070089 # load some variables from the params file
90 cellName = main.params[ 'ENV' ][ 'cellName' ]
91 main.apps = main.params[ 'ENV' ][ 'appString' ]
Devin Lim142b5342017-07-20 15:22:39 -070092 stepResult = main.testSetUp.envSetup()
Jon Hall6e709752016-02-01 13:38:46 -080093 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070094 main.testSetUp.envSetupException( e )
95 main.testSetUp.evnSetupConclusion( stepResult )
96 main.HA.generateGraph( "HAfullNetPartition" )
Jon Hall6e709752016-02-01 13:38:46 -080097
Devin Lim142b5342017-07-20 15:22:39 -070098 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
99 extraApply=[ main.HA.startingMininet,
100 main.HA.customizeOnosGenPartitions ],
101 extraClean=main.HA.cleanUpGenPartition )
Devin Lim58046fa2017-07-05 16:55:00 -0700102 main.HA.initialSetUp()
Jon Hall6e709752016-02-01 13:38:46 -0800103
Jon Hall6e709752016-02-01 13:38:46 -0800104 def CASE2( self, main ):
105 """
106 Assign devices to controllers
107 """
Devin Lim58046fa2017-07-05 16:55:00 -0700108 main.HA.assignDevices( main )
Jon Hall6e709752016-02-01 13:38:46 -0800109
110 def CASE21( self, main ):
111 """
112 Assign mastership to controllers
113 """
Devin Lim58046fa2017-07-05 16:55:00 -0700114 main.HA.assignMastership( main )
Jon Hall6e709752016-02-01 13:38:46 -0800115
116 def CASE3( self, main ):
117 """
118 Assign intents
119 """
Devin Lim58046fa2017-07-05 16:55:00 -0700120 main.HA.assignIntents( main )
Jon Hall6e709752016-02-01 13:38:46 -0800121
122 def CASE4( self, main ):
123 """
124 Ping across added host intents
125 """
Jon Hallca319892017-06-15 15:25:22 -0700126 main.HA.pingAcrossHostIntent( main )
Jon Hall6e709752016-02-01 13:38:46 -0800127
128 def CASE5( self, main ):
129 """
130 Reading state of ONOS
131 """
Devin Lim58046fa2017-07-05 16:55:00 -0700132 main.HA.readingState( main )
Jon Hall6e709752016-02-01 13:38:46 -0800133
134 def CASE61( self, main ):
135 """
136 The Failure case.
137 """
138 import math
Jon Hall6e709752016-02-01 13:38:46 -0800139 assert main, "main not defined"
140 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800141 main.case( "Partition ONOS nodes into two distinct partitions" )
142
143 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700144 for ctrl in main.Cluster.runningNodes:
145 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
146 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hall6e709752016-02-01 13:38:46 -0800147
Devin Lim142b5342017-07-20 15:22:39 -0700148 main.log.debug( main.Cluster.next().CLI.roles( jsonFormat=False ) )
Jon Halld2871c22016-07-26 11:01:14 -0700149
Devin Lim142b5342017-07-20 15:22:39 -0700150 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall6e709752016-02-01 13:38:46 -0800151 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
152 main.partition = [ 0 ] # ONOS node to partition, listed by index in main.nodes
153 if n > 3:
154 main.partition.append( p - 1 )
155 # NOTE: This only works for cluster sizes of 3,5, or 7.
156
157 main.step( "Partitioning ONOS nodes" )
158 nodeList = [ str( i + 1 ) for i in main.partition ]
159 main.log.info( "Nodes to be partitioned: " + str( nodeList ) )
160 partitionResults = main.TRUE
161 for i in range( 0, n ):
Devin Lim142b5342017-07-20 15:22:39 -0700162 iCtrl = main.Cluster.runningNodes[ i ]
163 this = iCtrl.Bench.sshToNode( iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800164 if i not in main.partition:
165 for j in main.partition:
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700166 foe = main.Cluster.runningNodes[ j ]
Devin Lim142b5342017-07-20 15:22:39 -0700167 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress, foe.ipAddress ) )
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700168 # CMD HERE
Devin Lim142b5342017-07-20 15:22:39 -0700169 try:
170 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT", iCtrl.ipAddress, foe.ipAddress )
171 this.sendline( cmdStr )
172 this.expect( "\$" )
173 main.log.debug( this.before )
174 except pexpect.EOF:
175 main.log.error( self.name + ": EOF exception found" )
176 main.log.error( self.name + ": " + self.handle.before )
Devin Lim44075962017-08-11 10:56:37 -0700177 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700178 except Exception:
179 main.log.exception( self.name + ": Uncaught exception!" )
Devin Lim44075962017-08-11 10:56:37 -0700180 main.cleanAndExit()
Jon Hall6e709752016-02-01 13:38:46 -0800181 else:
182 for j in range( 0, n ):
183 if j not in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700184 foe = main.Cluster.runningNodes[ j ]
185 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( iCtrl.ipAddress, foe.ipAddress ) )
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700186 # CMD HERE
Devin Lim142b5342017-07-20 15:22:39 -0700187 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT", iCtrl.ipAddress, foe.ipAddress )
188 try:
189 this.sendline( cmdStr )
190 this.expect( "\$" )
191 main.log.debug( this.before )
192 except pexpect.EOF:
193 main.log.error( self.name + ": EOF exception found" )
194 main.log.error( self.name + ": " + self.handle.before )
Devin Lim44075962017-08-11 10:56:37 -0700195 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700196 except Exception:
197 main.log.exception( self.name + ": Uncaught exception!" )
Devin Lim44075962017-08-11 10:56:37 -0700198 main.cleanAndExit()
Devin Lim142b5342017-07-20 15:22:39 -0700199 main.Cluster.runningNodes[ i ].active = False
200 iCtrl.Bench.exitFromSsh( this, iCtrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800201 # NOTE: When dynamic clustering is finished, we need to start checking
202 # main.partion nodes still work when partitioned
203 utilities.assert_equals( expect=main.TRUE, actual=partitionResults,
204 onpass="Firewall rules set successfully",
205 onfail="Error setting firewall rules" )
206
Jon Hall6509dbf2016-06-21 17:01:17 -0700207 main.step( "Sleeping 60 seconds" )
Jon Hall6e709752016-02-01 13:38:46 -0800208 time.sleep( 60 )
209
210 def CASE62( self, main ):
211 """
212 Healing Partition
213 """
214 import time
Jon Hall6e709752016-02-01 13:38:46 -0800215 assert main, "main not defined"
216 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall6e709752016-02-01 13:38:46 -0800217 assert main.partition, "main.partition not defined"
218 main.case( "Healing Partition" )
219
220 main.step( "Deleteing firewall rules" )
221 healResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700222 for ctrl in main.Cluster.runningNodes:
Jon Hall6e709752016-02-01 13:38:46 -0800223 cmdStr = "sudo iptables -F"
Devin Lim142b5342017-07-20 15:22:39 -0700224 handle = ctrl.Bench.sshToNode( ctrl.ipAddress )
225 handle.sendline( cmdStr )
226 handle.expect( "\$" )
227 main.log.debug( handle.before )
228 ctrl.Bench.exitFromSsh( handle, ctrl.ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800229 utilities.assert_equals( expect=main.TRUE, actual=healResults,
230 onpass="Firewall rules removed",
231 onfail="Error removing firewall rules" )
232
233 for node in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700234 main.Cluster.runningNodes[ node ].active = True
235
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700236 """
Devin Lim142b5342017-07-20 15:22:39 -0700237 # NOTE : Not sure if this can be removed
238 main.activeNodes.sort()
Jon Hall6e709752016-02-01 13:38:46 -0800239 try:
240 assert list( set( main.activeNodes ) ) == main.activeNodes,\
241 "List of active nodes has duplicates, this likely indicates something was run out of order"
242 except AssertionError:
243 main.log.exception( "" )
Devin Lim44075962017-08-11 10:56:37 -0700244 main.cleanAndExit()
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700245 """
Jon Halld2871c22016-07-26 11:01:14 -0700246 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800247 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Halld2871c22016-07-26 11:01:14 -0700248 False,
Jon Halld2871c22016-07-26 11:01:14 -0700249 sleep=15,
250 attempts=5 )
251
252 utilities.assert_equals( expect=True, actual=nodeResults,
253 onpass="Nodes check successful",
254 onfail="Nodes check NOT successful" )
255
256 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700257 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700258 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700259 ctrl.name,
260 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700261 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700262 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700263
Jon Hall6e709752016-02-01 13:38:46 -0800264 def CASE7( self, main ):
265 """
266 Check state after ONOS failure
267 """
Devin Lim142b5342017-07-20 15:22:39 -0700268 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall6e709752016-02-01 13:38:46 -0800269
Jon Hall6e709752016-02-01 13:38:46 -0800270 main.step( "Leadership Election is still functional" )
271 # Test of LeadershipElection
272 leaderList = []
273
274 partitioned = []
275 for i in main.partition:
Devin Lim142b5342017-07-20 15:22:39 -0700276 partitioned.append( main.Cluster.runningNodes[ i ].ipAddress )
Jon Hall6e709752016-02-01 13:38:46 -0800277 leaderResult = main.TRUE
278
Devin Lim142b5342017-07-20 15:22:39 -0700279 for ctrl in main.Cluster.active():
280 leaderN = ctrl.CLI.electionTestLeader()
Jon Hall6e709752016-02-01 13:38:46 -0800281 leaderList.append( leaderN )
282 if leaderN == main.FALSE:
283 # error in response
284 main.log.error( "Something is wrong with " +
285 "electionTestLeader function, check the" +
286 " error logs" )
287 leaderResult = main.FALSE
288 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700289 main.log.error( ctrl.name +
Jon Hall6e709752016-02-01 13:38:46 -0800290 " shows no leader for the election-app was" +
291 " elected after the old one died" )
292 leaderResult = main.FALSE
293 elif leaderN in partitioned:
Devin Lim142b5342017-07-20 15:22:39 -0700294 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall6e709752016-02-01 13:38:46 -0800295 " as leader for the election-app, but it " +
296 "was partitioned" )
297 leaderResult = main.FALSE
298 if len( set( leaderList ) ) != 1:
299 leaderResult = main.FALSE
300 main.log.error(
301 "Inconsistent view of leader for the election test app" )
302 # TODO: print the list
303 utilities.assert_equals(
304 expect=main.TRUE,
305 actual=leaderResult,
306 onpass="Leadership election passed",
307 onfail="Something went wrong with Leadership election" )
308
309 def CASE8( self, main ):
310 """
311 Compare topo
312 """
Devin Lim58046fa2017-07-05 16:55:00 -0700313 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700314
Jon Hall6e709752016-02-01 13:38:46 -0800315 def CASE9( self, main ):
316 """
317 Link s3-s28 down
318 """
Devin Lim58046fa2017-07-05 16:55:00 -0700319 main.HA.linkDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800320
321 def CASE10( self, main ):
322 """
323 Link s3-s28 up
324 """
Devin Lim58046fa2017-07-05 16:55:00 -0700325 main.HA.linkUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800326
327 def CASE11( self, main ):
328 """
329 Switch Down
330 """
331 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700332 main.HA.switchDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800333
334 def CASE12( self, main ):
335 """
336 Switch Up
337 """
338 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700339 main.HA.switchUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800340
341 def CASE13( self, main ):
342 """
343 Clean up
344 """
Devin Lim58046fa2017-07-05 16:55:00 -0700345 main.HA.cleanUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800346
347 def CASE14( self, main ):
348 """
349 start election app on all onos nodes
350 """
Devin Lim58046fa2017-07-05 16:55:00 -0700351 main.HA.startElectionApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800352
353 def CASE15( self, main ):
354 """
355 Check that Leadership Election is still functional
356 15.1 Run election on each node
357 15.2 Check that each node has the same leaders and candidates
358 15.3 Find current leader and withdraw
359 15.4 Check that a new node was elected leader
360 15.5 Check that that new leader was the candidate of old leader
361 15.6 Run for election on old leader
362 15.7 Check that oldLeader is a candidate, and leader if only 1 node
363 15.8 Make sure that the old leader was added to the candidate list
364
365 old and new variable prefixes refer to data from before vs after
366 withdrawl and later before withdrawl vs after re-election
367 """
Devin Lim58046fa2017-07-05 16:55:00 -0700368 main.HA.isElectionFunctional( main )
Jon Hall6e709752016-02-01 13:38:46 -0800369
370 def CASE16( self, main ):
371 """
372 Install Distributed Primitives app
373 """
Devin Lim58046fa2017-07-05 16:55:00 -0700374 main.HA.installDistributedPrimitiveApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800375
376 def CASE17( self, main ):
377 """
378 Check for basic functionality with distributed primitives
379 """
Devin Lim58046fa2017-07-05 16:55:00 -0700380 main.HA.checkDistPrimitivesFunc( main )