blob: f826e91263b64c2c177ab2b145522a46a45f7ee9 [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2015 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall5cf14d52015-07-16 12:15:19 -070022Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070032CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070034CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070046class HAkillNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070047
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hall6e709752016-02-01 13:38:46 -080069 main.log.info( "ONOS HA test: Restart a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070070 "initialization" )
Jon Halla440e872016-03-31 15:15:50 -070071 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070072 main.HAlabels = []
73 main.HAdata = []
74 try:
75 from tests.dependencies.ONOSSetup import ONOSSetup
76 main.testSetUp = ONOSSetup()
77 except ImportError:
78 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070079 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070080 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070081 try:
Jon Hall53c5e662016-04-13 16:06:56 -070082 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070083 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070084 cellName = main.params[ 'ENV' ][ 'cellName' ]
85 main.apps = main.params[ 'ENV' ][ 'appString' ]
Jon Hallab611372018-02-21 15:26:05 -080086 stepResult = main.testSetUp.envSetup( includeCaseDesc=False )
Jon Halle1a3b752015-07-22 13:02:46 -070087 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070088 main.testSetUp.envSetupException( e )
89 main.testSetUp.evnSetupConclusion( stepResult )
Jon Hall5cf14d52015-07-16 12:15:19 -070090
Jon Hall5a5c8432018-11-28 11:39:57 -080091 applyFuncs = [ main.HA.removeKarafConsoleLogging,
92 main.HA.customizeOnosGenPartitions,
Jon Hallab611372018-02-21 15:26:05 -080093 main.HA.copyBackupConfig,
94 main.ONOSbench.preventAutoRespawn ]
Jon Hall5a5c8432018-11-28 11:39:57 -080095 applyArgs = [ None, None, None, None ]
Jon Hallab611372018-02-21 15:26:05 -080096 try:
97 if main.params[ 'topology' ][ 'topoFile' ]:
98 main.log.info( 'Skipping start of Mininet in this case, make sure you start it elsewhere' )
99 else:
100 applyFuncs.append( main.HA.startingMininet )
101 applyArgs.append( None )
102 except (KeyError, IndexError):
103 applyFuncs.append( main.HA.startingMininet )
104 applyArgs.append( None )
Jon Hall5cf14d52015-07-16 12:15:19 -0700105
Jon Hall3e6edb32018-08-21 16:20:30 -0700106 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName,
Jon Hallab611372018-02-21 15:26:05 -0800107 extraApply=applyFuncs,
108 applyArgs=applyArgs,
109 extraClean=main.HA.cleanUpGenPartition,
110 includeCaseDesc=False )
Jon Hall4f360bc2017-09-07 10:19:52 -0700111 main.HA.initialSetUp( serviceClean=True )
Jon Hall9d2dcad2016-04-08 10:15:20 -0700112
Jon Hallab611372018-02-21 15:26:05 -0800113 main.step( 'Set logging levels' )
114 logging = True
115 try:
116 logs = main.params.get( 'ONOS_Logging', False )
117 if logs:
118 for namespace, level in logs.items():
119 for ctrl in main.Cluster.active():
120 ctrl.CLI.logSet( level, namespace )
121 except AttributeError:
122 logging = False
123 utilities.assert_equals( expect=True, actual=logging,
124 onpass="Set log levels",
125 onfail="Failed to set log levels" )
126
Jon Hall5cf14d52015-07-16 12:15:19 -0700127 def CASE2( self, main ):
128 """
129 Assign devices to controllers
130 """
Devin Lim58046fa2017-07-05 16:55:00 -0700131 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700132
Jon Hallab611372018-02-21 15:26:05 -0800133 def CASE102( self, main ):
134 """
135 Set up Spine-Leaf fabric topology in Mininet
136 """
137 main.HA.startTopology( main )
138
Jon Hall5cf14d52015-07-16 12:15:19 -0700139 def CASE21( self, main ):
140 """
141 Assign mastership to controllers
142 """
Devin Lim58046fa2017-07-05 16:55:00 -0700143 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700144
145 def CASE3( self, main ):
146 """
147 Assign intents
148 """
Devin Lim58046fa2017-07-05 16:55:00 -0700149 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700150
151 def CASE4( self, main ):
152 """
153 Ping across added host intents
154 """
Jon Hallca319892017-06-15 15:25:22 -0700155 main.HA.pingAcrossHostIntent( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700156
Jon Hallab611372018-02-21 15:26:05 -0800157 def CASE104( self, main ):
158 """
159 Ping Hosts
160 """
161 main.case( "Check connectivity" )
162 main.step( "Ping between all hosts" )
163 pingResult = main.Mininet1.pingall()
164 utilities.assert_equals( expect=main.TRUE, actual=pingResult,
165 onpass="All Pings Passed",
166 onfail="Failed to ping between all hosts" )
167
Jon Hall5cf14d52015-07-16 12:15:19 -0700168 def CASE5( self, main ):
169 """
170 Reading state of ONOS
171 """
Devin Lim58046fa2017-07-05 16:55:00 -0700172 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700173
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700174 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700175 """
176 The Failure case.
177 """
Jon Hall5cf14d52015-07-16 12:15:19 -0700178 assert main, "main not defined"
179 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700180 main.case( "Kill minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700181
182 main.step( "Checking ONOS Logs for errors" )
Jon Hallca319892017-06-15 15:25:22 -0700183 for ctrl in main.Cluster.active():
184 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
185 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
Jon Hall96091e62015-09-21 17:34:17 -0700186
Devin Lim142b5342017-07-20 15:22:39 -0700187 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700188 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
Devin Lim142b5342017-07-20 15:22:39 -0700189 main.kill = [ main.Cluster.runningNodes[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700190 if n > 3:
Devin Lim142b5342017-07-20 15:22:39 -0700191 main.kill.append( main.Cluster.runningNodes[ p - 1 ] )
Jon Hall3b489db2015-10-05 14:38:37 -0700192 # NOTE: This only works for cluster sizes of 3,5, or 7.
193
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700194 # NOTE: This is to fix an issue with wiki formating
Jon Hall4f360bc2017-09-07 10:19:52 -0700195 nodeNames = [ node.name for node in main.kill ]
196 main.step( "Killing nodes: " + str( nodeNames ) )
Jon Hall3b489db2015-10-05 14:38:37 -0700197 killResults = main.TRUE
Jon Hallca319892017-06-15 15:25:22 -0700198 for ctrl in main.kill:
Jon Hall3b489db2015-10-05 14:38:37 -0700199 killResults = killResults and\
Jon Hallca319892017-06-15 15:25:22 -0700200 ctrl.onosKill( ctrl.ipAddress )
201 ctrl.active = False
Jon Hall5cf14d52015-07-16 12:15:19 -0700202 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700203 onpass="ONOS nodes killed successfully",
204 onfail="ONOS nodes NOT successfully killed" )
205
Jon Hall30668ff2019-02-27 17:43:09 -0800206 main.testSetUp.checkOnosNodes( main.Cluster )
Jon Halld2871c22016-07-26 11:01:14 -0700207
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700208 def CASE62( self, main ):
209 """
210 The bring up stopped nodes
211 """
Jon Hall5d5876e2017-11-30 09:33:16 -0800212 main.HA.bringUpStoppedNodes( main )
Jon Halla440e872016-03-31 15:15:50 -0700213
Jon Hall5cf14d52015-07-16 12:15:19 -0700214 def CASE7( self, main ):
215 """
216 Check state after ONOS failure
217 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700218 try:
219 main.kill
220 except AttributeError:
221 main.kill = []
222
Devin Lim142b5342017-07-20 15:22:39 -0700223 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700224 main.step( "Leadership Election is still functional" )
225 # Test of LeadershipElection
226 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700227
Jon Hall3b489db2015-10-05 14:38:37 -0700228 restarted = []
Jon Hallca319892017-06-15 15:25:22 -0700229 for ctrl in main.kill:
230 restarted.append( ctrl.ipAddress )
Jon Hall5cf14d52015-07-16 12:15:19 -0700231 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700232
Jon Hallca319892017-06-15 15:25:22 -0700233 for ctrl in main.Cluster.active():
234 leaderN = ctrl.electionTestLeader()
Jon Hall5cf14d52015-07-16 12:15:19 -0700235 leaderList.append( leaderN )
236 if leaderN == main.FALSE:
237 # error in response
238 main.log.error( "Something is wrong with " +
239 "electionTestLeader function, check the" +
240 " error logs" )
241 leaderResult = main.FALSE
242 elif leaderN is None:
Jon Hallca319892017-06-15 15:25:22 -0700243 main.log.error( ctrl.name +
Jon Hall5cf14d52015-07-16 12:15:19 -0700244 " shows no leader for the election-app was" +
245 " elected after the old one died" )
246 leaderResult = main.FALSE
247 elif leaderN in restarted:
Jon Hallca319892017-06-15 15:25:22 -0700248 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall5cf14d52015-07-16 12:15:19 -0700249 " as leader for the election-app, but it " +
250 "was restarted" )
251 leaderResult = main.FALSE
252 if len( set( leaderList ) ) != 1:
253 leaderResult = main.FALSE
254 main.log.error(
255 "Inconsistent view of leader for the election test app" )
Jon Hallab611372018-02-21 15:26:05 -0800256 main.log.debug( leaderList )
Jon Hall5cf14d52015-07-16 12:15:19 -0700257 utilities.assert_equals(
258 expect=main.TRUE,
259 actual=leaderResult,
260 onpass="Leadership election passed",
261 onfail="Something went wrong with Leadership election" )
262
263 def CASE8( self, main ):
264 """
265 Compare topo
266 """
Devin Lim58046fa2017-07-05 16:55:00 -0700267 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700268
Jon Hall5cf14d52015-07-16 12:15:19 -0700269 def CASE9( self, main ):
270 """
Jon Hallab611372018-02-21 15:26:05 -0800271 Link down
Jon Hall5cf14d52015-07-16 12:15:19 -0700272 """
Jon Hallab611372018-02-21 15:26:05 -0800273 src = main.params['kill']['linkSrc']
274 dst = main.params['kill']['linkDst']
275 main.HA.linkDown( main, src, dst )
Jon Hall5cf14d52015-07-16 12:15:19 -0700276
277 def CASE10( self, main ):
278 """
Jon Hallab611372018-02-21 15:26:05 -0800279 Link up
Jon Hall5cf14d52015-07-16 12:15:19 -0700280 """
Jon Hallab611372018-02-21 15:26:05 -0800281 src = main.params['kill']['linkSrc']
282 dst = main.params['kill']['linkDst']
283 main.HA.linkUp( main, src, dst )
Jon Hall5cf14d52015-07-16 12:15:19 -0700284
285 def CASE11( self, main ):
286 """
287 Switch Down
288 """
289 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700290 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700291
292 def CASE12( self, main ):
293 """
294 Switch Up
295 """
296 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700297 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700298
299 def CASE13( self, main ):
300 """
301 Clean up
302 """
Devin Lim58046fa2017-07-05 16:55:00 -0700303 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700304
305 def CASE14( self, main ):
306 """
Jon Hallab611372018-02-21 15:26:05 -0800307 Start election app on all onos nodes
Jon Hall5cf14d52015-07-16 12:15:19 -0700308 """
Devin Lim58046fa2017-07-05 16:55:00 -0700309 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700310
311 def CASE15( self, main ):
312 """
313 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700314 15.1 Run election on each node
315 15.2 Check that each node has the same leaders and candidates
316 15.3 Find current leader and withdraw
317 15.4 Check that a new node was elected leader
318 15.5 Check that that new leader was the candidate of old leader
319 15.6 Run for election on old leader
320 15.7 Check that oldLeader is a candidate, and leader if only 1 node
321 15.8 Make sure that the old leader was added to the candidate list
322
323 old and new variable prefixes refer to data from before vs after
324 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700325 """
Devin Lim58046fa2017-07-05 16:55:00 -0700326 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700327
328 def CASE16( self, main ):
329 """
330 Install Distributed Primitives app
331 """
Devin Lim58046fa2017-07-05 16:55:00 -0700332 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700333
334 def CASE17( self, main ):
335 """
336 Check for basic functionality with distributed primitives
337 """
Devin Lim58046fa2017-07-05 16:55:00 -0700338 main.HA.checkDistPrimitivesFunc( main )