blob: b5c1e866034c1f6bcfb39dcfc1a10ef7aa416cb4 [file] [log] [blame]
Jon Hall6e709752016-02-01 13:38:46 -08001"""
2Description: This test is to determine if ONOS can handle
3 a full network partion
4
5List of test cases:
6CASE1: Compile ONOS and push it to the test machines
7CASE2: Assign devices to controllers
8CASE21: Assign mastership to controllers
9CASE3: Assign intents
10CASE4: Ping across added host intents
11CASE5: Reading state of ONOS
12CASE61: The Failure inducing case.
13CASE62: The Failure recovery case.
14CASE7: Check state after control plane failure
15CASE8: Compare topo
16CASE9: Link s3-s28 down
17CASE10: Link s3-s28 up
18CASE11: Switch down
19CASE12: Switch up
20CASE13: Clean up
21CASE14: start election app on all onos nodes
22CASE15: Check that Leadership Election is still functional
23CASE16: Install Distributed Primitives app
24CASE17: Check for basic functionality with distributed primitives
25"""
Jon Hall6e709752016-02-01 13:38:46 -080026class HAfullNetPartition:
27
28 def __init__( self ):
29 self.default = ''
30
31 def CASE1( self, main ):
32 """
33 CASE1 is to compile ONOS and push it to the test machines
34
35 Startup sequence:
36 cell <name>
37 onos-verify-cell
38 NOTE: temporary - onos-remove-raft-logs
39 onos-uninstall
40 start mininet
41 git pull
42 mvn clean install
43 onos-package
44 onos-install -f
45 onos-wait-for-start
46 start cli sessions
47 start tcpdump
48 """
49 import imp
50 import pexpect
51 import time
Jon Halla440e872016-03-31 15:15:50 -070052 import json
Jon Hall6e709752016-02-01 13:38:46 -080053 main.log.info( "ONOS HA test: Partition ONOS nodes into two sub-clusters - " +
54 "initialization" )
Jon Hall6e709752016-02-01 13:38:46 -080055 # set global variables
Jon Halla440e872016-03-31 15:15:50 -070056 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070057 main.HAlabels = []
58 main.HAdata = []
59 try:
60 from tests.dependencies.ONOSSetup import ONOSSetup
61 main.testSetUp = ONOSSetup()
62 except ImportError:
63 main.log.error( "ONOSSetup not found. exiting the test" )
64 main.exit()
65 main.testSetUp.envSetupDescription()
Jon Hall6e709752016-02-01 13:38:46 -080066 try:
Jon Hall53c5e662016-04-13 16:06:56 -070067 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070068 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070069 # load some variables from the params file
70 cellName = main.params[ 'ENV' ][ 'cellName' ]
71 main.apps = main.params[ 'ENV' ][ 'appString' ]
72 main.numCtrls = int( main.params[ 'num_controllers' ] )
73 if main.ONOSbench.maxNodes and\
74 main.ONOSbench.maxNodes < main.numCtrls:
75 main.numCtrls = int( main.ONOSbench.maxNodes )
76 main.maxNodes = main.numCtrls
77 stepResult = main.testSetUp.envSetup( hasNode=True )
Jon Hall6e709752016-02-01 13:38:46 -080078 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070079 main.testSetUp.envSetupException( e )
80 main.testSetUp.evnSetupConclusion( stepResult )
81 main.HA.generateGraph( "HAfullNetPartition" )
Jon Hall6e709752016-02-01 13:38:46 -080082
Devin Lim58046fa2017-07-05 16:55:00 -070083 main.testSetUp.ONOSSetUp( main.Mininet1, cellName=cellName, removeLog=True,
84 extraApply=main.HA.customizeOnosGenPartitions,
85 extraClean=main.HA.cleanUpGenPartition )
Jon Hall6e709752016-02-01 13:38:46 -080086
Devin Lim58046fa2017-07-05 16:55:00 -070087 main.HA.initialSetUp()
Jon Hall6e709752016-02-01 13:38:46 -080088
Jon Hall9d2dcad2016-04-08 10:15:20 -070089
Jon Hall6e709752016-02-01 13:38:46 -080090 def CASE2( self, main ):
91 """
92 Assign devices to controllers
93 """
Devin Lim58046fa2017-07-05 16:55:00 -070094 main.HA.assignDevices( main )
Jon Hall6e709752016-02-01 13:38:46 -080095
96 def CASE21( self, main ):
97 """
98 Assign mastership to controllers
99 """
Devin Lim58046fa2017-07-05 16:55:00 -0700100 main.HA.assignMastership( main )
Jon Hall6e709752016-02-01 13:38:46 -0800101
102 def CASE3( self, main ):
103 """
104 Assign intents
105 """
Devin Lim58046fa2017-07-05 16:55:00 -0700106 main.HA.assignIntents( main )
Jon Hall6e709752016-02-01 13:38:46 -0800107
108 def CASE4( self, main ):
109 """
110 Ping across added host intents
111 """
Devin Lim58046fa2017-07-05 16:55:00 -0700112 main.HA.pingAcrossHostIntent( main, True, True )
Jon Hall6e709752016-02-01 13:38:46 -0800113
114 def CASE5( self, main ):
115 """
116 Reading state of ONOS
117 """
Devin Lim58046fa2017-07-05 16:55:00 -0700118 main.HA.readingState( main )
Jon Hall6e709752016-02-01 13:38:46 -0800119
120 def CASE61( self, main ):
121 """
122 The Failure case.
123 """
124 import math
125 assert main.numCtrls, "main.numCtrls not defined"
126 assert main, "main not defined"
127 assert utilities.assert_equals, "utilities.assert_equals not defined"
128 assert main.CLIs, "main.CLIs not defined"
129 assert main.nodes, "main.nodes not defined"
130 main.case( "Partition ONOS nodes into two distinct partitions" )
131
132 main.step( "Checking ONOS Logs for errors" )
133 for node in main.nodes:
134 main.log.debug( "Checking logs for errors on " + node.name + ":" )
135 main.log.warn( main.ONOSbench.checkLogs( node.ip_address ) )
136
Jon Hallf37d44d2017-05-24 10:37:30 -0700137 main.log.debug( main.CLIs[ 0 ].roles( jsonFormat=False ) )
Jon Halld2871c22016-07-26 11:01:14 -0700138
Jon Hall6e709752016-02-01 13:38:46 -0800139 n = len( main.nodes ) # Number of nodes
140 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
141 main.partition = [ 0 ] # ONOS node to partition, listed by index in main.nodes
142 if n > 3:
143 main.partition.append( p - 1 )
144 # NOTE: This only works for cluster sizes of 3,5, or 7.
145
146 main.step( "Partitioning ONOS nodes" )
147 nodeList = [ str( i + 1 ) for i in main.partition ]
148 main.log.info( "Nodes to be partitioned: " + str( nodeList ) )
149 partitionResults = main.TRUE
150 for i in range( 0, n ):
Jon Hallf37d44d2017-05-24 10:37:30 -0700151 this = main.nodes[ i ]
Jon Hall6e709752016-02-01 13:38:46 -0800152 if i not in main.partition:
153 for j in main.partition:
Jon Hallf37d44d2017-05-24 10:37:30 -0700154 foe = main.nodes[ j ]
Jon Hall6e709752016-02-01 13:38:46 -0800155 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( this.ip_address, foe.ip_address ) )
156 #CMD HERE
157 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT", this.ip_address, foe.ip_address )
158 this.handle.sendline( cmdStr )
159 this.handle.expect( "\$" )
160 main.log.debug( this.handle.before )
161 else:
162 for j in range( 0, n ):
163 if j not in main.partition:
Jon Hallf37d44d2017-05-24 10:37:30 -0700164 foe = main.nodes[ j ]
Jon Hall6e709752016-02-01 13:38:46 -0800165 main.log.warn( "Setting IP Tables rule from {} to {}. ".format( this.ip_address, foe.ip_address ) )
166 #CMD HERE
167 cmdStr = "sudo iptables -A {} -d {} -s {} -j DROP".format( "INPUT", this.ip_address, foe.ip_address )
168 this.handle.sendline( cmdStr )
169 this.handle.expect( "\$" )
170 main.log.debug( this.handle.before )
171 main.activeNodes.remove( i )
172 # NOTE: When dynamic clustering is finished, we need to start checking
173 # main.partion nodes still work when partitioned
174 utilities.assert_equals( expect=main.TRUE, actual=partitionResults,
175 onpass="Firewall rules set successfully",
176 onfail="Error setting firewall rules" )
177
Jon Hall6509dbf2016-06-21 17:01:17 -0700178 main.step( "Sleeping 60 seconds" )
Jon Hall6e709752016-02-01 13:38:46 -0800179 time.sleep( 60 )
180
181 def CASE62( self, main ):
182 """
183 Healing Partition
184 """
185 import time
186 assert main.numCtrls, "main.numCtrls not defined"
187 assert main, "main not defined"
188 assert utilities.assert_equals, "utilities.assert_equals not defined"
189 assert main.CLIs, "main.CLIs not defined"
190 assert main.nodes, "main.nodes not defined"
191 assert main.partition, "main.partition not defined"
192 main.case( "Healing Partition" )
193
194 main.step( "Deleteing firewall rules" )
195 healResults = main.TRUE
196 for node in main.nodes:
197 cmdStr = "sudo iptables -F"
198 node.handle.sendline( cmdStr )
199 node.handle.expect( "\$" )
200 main.log.debug( node.handle.before )
201 utilities.assert_equals( expect=main.TRUE, actual=healResults,
202 onpass="Firewall rules removed",
203 onfail="Error removing firewall rules" )
204
205 for node in main.partition:
206 main.activeNodes.append( node )
207 main.activeNodes.sort()
208 try:
209 assert list( set( main.activeNodes ) ) == main.activeNodes,\
210 "List of active nodes has duplicates, this likely indicates something was run out of order"
211 except AssertionError:
212 main.log.exception( "" )
213 main.cleanup()
214 main.exit()
215
Jon Halld2871c22016-07-26 11:01:14 -0700216 main.step( "Checking ONOS nodes" )
217 nodeResults = utilities.retry( main.HA.nodesCheck,
218 False,
Jon Hallf37d44d2017-05-24 10:37:30 -0700219 args=[ main.activeNodes ],
Jon Halld2871c22016-07-26 11:01:14 -0700220 sleep=15,
221 attempts=5 )
222
223 utilities.assert_equals( expect=True, actual=nodeResults,
224 onpass="Nodes check successful",
225 onfail="Nodes check NOT successful" )
226
227 if not nodeResults:
228 for i in main.activeNodes:
Jon Hallf37d44d2017-05-24 10:37:30 -0700229 cli = main.CLIs[ i ]
Jon Halld2871c22016-07-26 11:01:14 -0700230 main.log.debug( "{} components not ACTIVE: \n{}".format(
231 cli.name,
232 cli.sendline( "scr:list | grep -v ACTIVE" ) ) )
233 main.log.error( "Failed to start ONOS, stopping test" )
234 main.cleanup()
235 main.exit()
236
Jon Hall6e709752016-02-01 13:38:46 -0800237 def CASE7( self, main ):
238 """
239 Check state after ONOS failure
240 """
Jon Hall6e709752016-02-01 13:38:46 -0800241
Devin Lim58046fa2017-07-05 16:55:00 -0700242 main.HA.checkStateAfterONOS( main, afterWhich=0 )
Jon Hall6e709752016-02-01 13:38:46 -0800243
Jon Hall6e709752016-02-01 13:38:46 -0800244 main.step( "Leadership Election is still functional" )
245 # Test of LeadershipElection
246 leaderList = []
247
248 partitioned = []
249 for i in main.partition:
Jon Hallf37d44d2017-05-24 10:37:30 -0700250 partitioned.append( main.nodes[ i ].ip_address )
Jon Hall6e709752016-02-01 13:38:46 -0800251 leaderResult = main.TRUE
252
253 for i in main.activeNodes:
Jon Hallf37d44d2017-05-24 10:37:30 -0700254 cli = main.CLIs[ i ]
Jon Hall6e709752016-02-01 13:38:46 -0800255 leaderN = cli.electionTestLeader()
256 leaderList.append( leaderN )
257 if leaderN == main.FALSE:
258 # error in response
259 main.log.error( "Something is wrong with " +
260 "electionTestLeader function, check the" +
261 " error logs" )
262 leaderResult = main.FALSE
263 elif leaderN is None:
264 main.log.error( cli.name +
265 " shows no leader for the election-app was" +
266 " elected after the old one died" )
267 leaderResult = main.FALSE
268 elif leaderN in partitioned:
269 main.log.error( cli.name + " shows " + str( leaderN ) +
270 " as leader for the election-app, but it " +
271 "was partitioned" )
272 leaderResult = main.FALSE
273 if len( set( leaderList ) ) != 1:
274 leaderResult = main.FALSE
275 main.log.error(
276 "Inconsistent view of leader for the election test app" )
277 # TODO: print the list
278 utilities.assert_equals(
279 expect=main.TRUE,
280 actual=leaderResult,
281 onpass="Leadership election passed",
282 onfail="Something went wrong with Leadership election" )
283
284 def CASE8( self, main ):
285 """
286 Compare topo
287 """
Devin Lim58046fa2017-07-05 16:55:00 -0700288 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700289
Jon Hall6e709752016-02-01 13:38:46 -0800290 def CASE9( self, main ):
291 """
292 Link s3-s28 down
293 """
Devin Lim58046fa2017-07-05 16:55:00 -0700294 main.HA.linkDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800295
296 def CASE10( self, main ):
297 """
298 Link s3-s28 up
299 """
Devin Lim58046fa2017-07-05 16:55:00 -0700300 main.HA.linkUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800301
302 def CASE11( self, main ):
303 """
304 Switch Down
305 """
306 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700307 main.HA.switchDown( main )
Jon Hall6e709752016-02-01 13:38:46 -0800308
309 def CASE12( self, main ):
310 """
311 Switch Up
312 """
313 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700314 main.HA.switchUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800315
316 def CASE13( self, main ):
317 """
318 Clean up
319 """
Devin Lim58046fa2017-07-05 16:55:00 -0700320 main.HA.cleanUp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800321
322 def CASE14( self, main ):
323 """
324 start election app on all onos nodes
325 """
Devin Lim58046fa2017-07-05 16:55:00 -0700326 main.HA.startElectionApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800327
328 def CASE15( self, main ):
329 """
330 Check that Leadership Election is still functional
331 15.1 Run election on each node
332 15.2 Check that each node has the same leaders and candidates
333 15.3 Find current leader and withdraw
334 15.4 Check that a new node was elected leader
335 15.5 Check that that new leader was the candidate of old leader
336 15.6 Run for election on old leader
337 15.7 Check that oldLeader is a candidate, and leader if only 1 node
338 15.8 Make sure that the old leader was added to the candidate list
339
340 old and new variable prefixes refer to data from before vs after
341 withdrawl and later before withdrawl vs after re-election
342 """
Devin Lim58046fa2017-07-05 16:55:00 -0700343 main.HA.isElectionFunctional( main )
Jon Hall6e709752016-02-01 13:38:46 -0800344
345 def CASE16( self, main ):
346 """
347 Install Distributed Primitives app
348 """
Devin Lim58046fa2017-07-05 16:55:00 -0700349 main.HA.installDistributedPrimitiveApp( main )
Jon Hall6e709752016-02-01 13:38:46 -0800350
351 def CASE17( self, main ):
352 """
353 Check for basic functionality with distributed primitives
354 """
Devin Lim58046fa2017-07-05 16:55:00 -0700355 main.HA.checkDistPrimitivesFunc( main )