blob: 815ab52adb10e562a9c226bb7341f90d073fc9f7 [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2015 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall5cf14d52015-07-16 12:15:19 -070022Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070032CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070034CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070046class HAstopNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070047
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -070069 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070070 "initialization" )
Jon Halla440e872016-03-31 15:15:50 -070071 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070072 main.HAlabels = []
73 main.HAdata = []
74 try:
75 from tests.dependencies.ONOSSetup import ONOSSetup
76 main.testSetUp = ONOSSetup()
77 except ImportError:
78 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070079 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070080 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070081 try:
Jon Hall53c5e662016-04-13 16:06:56 -070082 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070083 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070084 cellName = main.params[ 'ENV' ][ 'cellName' ]
85 main.apps = main.params[ 'ENV' ][ 'appString' ]
Devin Lim142b5342017-07-20 15:22:39 -070086 stepResult = main.testSetUp.envSetup()
Jon Halle1a3b752015-07-22 13:02:46 -070087 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070088 main.testSetUp.envSetupException( e )
89 main.testSetUp.evnSetupConclusion( stepResult )
Jon Halle1a3b752015-07-22 13:02:46 -070090
You Wanga0f6ff62018-01-11 15:46:30 -080091 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName, removeLog=True,
Devin Lim142b5342017-07-20 15:22:39 -070092 extraApply=[ main.HA.startingMininet,
Jon Hall4f360bc2017-09-07 10:19:52 -070093 main.HA.customizeOnosGenPartitions,
94 main.HA.copyBackupConfig ],
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070095 extraClean=main.HA.cleanUpGenPartition )
Jon Hall5cf14d52015-07-16 12:15:19 -070096
Jon Hall4f360bc2017-09-07 10:19:52 -070097 main.HA.initialSetUp( serviceClean=True )
Jon Hall9d2dcad2016-04-08 10:15:20 -070098
Jon Hall5cf14d52015-07-16 12:15:19 -070099 def CASE2( self, main ):
100 """
101 Assign devices to controllers
102 """
Devin Lim58046fa2017-07-05 16:55:00 -0700103 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700104
105 def CASE21( self, main ):
106 """
107 Assign mastership to controllers
108 """
Devin Lim58046fa2017-07-05 16:55:00 -0700109 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700110
Jon Hall5cf14d52015-07-16 12:15:19 -0700111 def CASE3( self, main ):
112 """
113 Assign intents
114 """
Devin Lim58046fa2017-07-05 16:55:00 -0700115 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700116
Jon Hall5cf14d52015-07-16 12:15:19 -0700117 def CASE4( self, main ):
118 """
119 Ping across added host intents
120 """
Jon Hallca319892017-06-15 15:25:22 -0700121 main.HA.pingAcrossHostIntent( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700122
123 def CASE5( self, main ):
124 """
125 Reading state of ONOS
126 """
Devin Lim58046fa2017-07-05 16:55:00 -0700127 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700128
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700129 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700130 """
131 The Failure case.
132 """
Jon Hall5cf14d52015-07-16 12:15:19 -0700133 assert main, "main not defined"
134 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700135 main.case( "Stop minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700136
137 main.step( "Checking ONOS Logs for errors" )
Jon Hallca319892017-06-15 15:25:22 -0700138 for ctrl in main.Cluster.active():
139 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
140 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
Jon Hall96091e62015-09-21 17:34:17 -0700141
Devin Lim142b5342017-07-20 15:22:39 -0700142 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700143 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
Devin Lim142b5342017-07-20 15:22:39 -0700144 main.kill = [ main.Cluster.runningNodes[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700145 if n > 3:
Devin Lim142b5342017-07-20 15:22:39 -0700146 main.kill.append( main.Cluster.runningNodes[ p - 1 ] )
Jon Hall3b489db2015-10-05 14:38:37 -0700147 # NOTE: This only works for cluster sizes of 3,5, or 7.
148
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700149 # NOTE: This is to fix an issue with wiki formating
Jon Hall4f360bc2017-09-07 10:19:52 -0700150 nodeNames = [ node.name for node in main.kill ]
151 main.step( "Stopping nodes: " + str( nodeNames ) )
Jon Hall3b489db2015-10-05 14:38:37 -0700152 killResults = main.TRUE
Jon Hallca319892017-06-15 15:25:22 -0700153 for ctrl in main.kill:
Jon Hall3b489db2015-10-05 14:38:37 -0700154 killResults = killResults and\
Jon Hallca319892017-06-15 15:25:22 -0700155 ctrl.onosStop( ctrl.ipAddress )
156 ctrl.active = False
Jon Hall5cf14d52015-07-16 12:15:19 -0700157 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700158 onpass="ONOS nodes stopped successfully",
159 onfail="ONOS nodes NOT successfully stopped" )
160
Jon Halld2871c22016-07-26 11:01:14 -0700161 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800162 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Halld2871c22016-07-26 11:01:14 -0700163 False,
Jon Halld2871c22016-07-26 11:01:14 -0700164 sleep=15,
165 attempts=5 )
166
167 utilities.assert_equals( expect=True, actual=nodeResults,
168 onpass="Nodes check successful",
169 onfail="Nodes check NOT successful" )
170
171 if not nodeResults:
Jon Hallca319892017-06-15 15:25:22 -0700172 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700173 main.log.debug( "{} components not ACTIVE: \n{}".format(
Jon Hallca319892017-06-15 15:25:22 -0700174 ctrl.name,
175 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700176 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700177 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700178
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700179 def CASE62( self, main ):
180 """
181 The bring up stopped nodes
182 """
Jon Hall5d5876e2017-11-30 09:33:16 -0800183 main.HA.bringUpStoppedNodes( main )
Jon Halla440e872016-03-31 15:15:50 -0700184
Jon Hall5cf14d52015-07-16 12:15:19 -0700185 def CASE7( self, main ):
186 """
187 Check state after ONOS failure
188 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700189 try:
190 main.kill
191 except AttributeError:
192 main.kill = []
193
Devin Lim142b5342017-07-20 15:22:39 -0700194 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700195 main.step( "Leadership Election is still functional" )
196 # Test of LeadershipElection
197 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700198
Jon Hall3b489db2015-10-05 14:38:37 -0700199 restarted = []
Jon Hallca319892017-06-15 15:25:22 -0700200 for ctrl in main.kill:
201 restarted.append( ctrl.ipAddress )
Jon Hall5cf14d52015-07-16 12:15:19 -0700202 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700203
Jon Hallca319892017-06-15 15:25:22 -0700204 for ctrl in main.Cluster.active():
205 leaderN = ctrl.electionTestLeader()
Jon Hall5cf14d52015-07-16 12:15:19 -0700206 leaderList.append( leaderN )
207 if leaderN == main.FALSE:
208 # error in response
209 main.log.error( "Something is wrong with " +
210 "electionTestLeader function, check the" +
211 " error logs" )
212 leaderResult = main.FALSE
213 elif leaderN is None:
Jon Hallca319892017-06-15 15:25:22 -0700214 main.log.error( ctrl.name +
Jon Hall5cf14d52015-07-16 12:15:19 -0700215 " shows no leader for the election-app was" +
216 " elected after the old one died" )
217 leaderResult = main.FALSE
218 elif leaderN in restarted:
Jon Hallca319892017-06-15 15:25:22 -0700219 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall5cf14d52015-07-16 12:15:19 -0700220 " as leader for the election-app, but it " +
221 "was restarted" )
222 leaderResult = main.FALSE
223 if len( set( leaderList ) ) != 1:
224 leaderResult = main.FALSE
225 main.log.error(
226 "Inconsistent view of leader for the election test app" )
227 # TODO: print the list
228 utilities.assert_equals(
229 expect=main.TRUE,
230 actual=leaderResult,
231 onpass="Leadership election passed",
232 onfail="Something went wrong with Leadership election" )
233
234 def CASE8( self, main ):
235 """
236 Compare topo
237 """
Devin Lim58046fa2017-07-05 16:55:00 -0700238 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700239
Jon Hall5cf14d52015-07-16 12:15:19 -0700240 def CASE9( self, main ):
241 """
242 Link s3-s28 down
243 """
Devin Lim58046fa2017-07-05 16:55:00 -0700244 main.HA.linkDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700245
246 def CASE10( self, main ):
247 """
248 Link s3-s28 up
249 """
Devin Lim58046fa2017-07-05 16:55:00 -0700250 main.HA.linkUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700251
252 def CASE11( self, main ):
253 """
254 Switch Down
255 """
256 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700257 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700258
259 def CASE12( self, main ):
260 """
261 Switch Up
262 """
263 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700264 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700265
266 def CASE13( self, main ):
267 """
268 Clean up
269 """
Devin Lim58046fa2017-07-05 16:55:00 -0700270 main.HAlabels.append( "Restart" )
271 main.HAdata.append( str( main.restartTime ) )
272 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700273
274 def CASE14( self, main ):
275 """
276 start election app on all onos nodes
277 """
Devin Lim58046fa2017-07-05 16:55:00 -0700278 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700279
280 def CASE15( self, main ):
281 """
282 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700283 15.1 Run election on each node
284 15.2 Check that each node has the same leaders and candidates
285 15.3 Find current leader and withdraw
286 15.4 Check that a new node was elected leader
287 15.5 Check that that new leader was the candidate of old leader
288 15.6 Run for election on old leader
289 15.7 Check that oldLeader is a candidate, and leader if only 1 node
290 15.8 Make sure that the old leader was added to the candidate list
291
292 old and new variable prefixes refer to data from before vs after
293 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700294 """
Devin Lim58046fa2017-07-05 16:55:00 -0700295 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700296
297 def CASE16( self, main ):
298 """
299 Install Distributed Primitives app
300 """
Devin Lim58046fa2017-07-05 16:55:00 -0700301 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700302
303 def CASE17( self, main ):
304 """
305 Check for basic functionality with distributed primitives
306 """
Devin Lim58046fa2017-07-05 16:55:00 -0700307 main.HA.checkDistPrimitivesFunc( main )