blob: 8e6bd6f5b119e200e1a98e97d5cc9a0fc5085d43 [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07002Copyright 2015 Open Networking Foundation (ONF)
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21
22"""
Jon Hall5cf14d52015-07-16 12:15:19 -070023Description: This test is to determine if ONOS can handle
24 a minority of it's nodes restarting
25
26List of test cases:
27CASE1: Compile ONOS and push it to the test machines
28CASE2: Assign devices to controllers
29CASE21: Assign mastership to controllers
30CASE3: Assign intents
31CASE4: Ping across added host intents
32CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070033CASE61: The Failure inducing case.
34CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070035CASE7: Check state after control plane failure
36CASE8: Compare topo
37CASE9: Link s3-s28 down
38CASE10: Link s3-s28 up
39CASE11: Switch down
40CASE12: Switch up
41CASE13: Clean up
42CASE14: start election app on all onos nodes
43CASE15: Check that Leadership Election is still functional
44CASE16: Install Distributed Primitives app
45CASE17: Check for basic functionality with distributed primitives
46"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070047class HAstopNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070048
49 def __init__( self ):
50 self.default = ''
51
52 def CASE1( self, main ):
53 """
54 CASE1 is to compile ONOS and push it to the test machines
55
56 Startup sequence:
57 cell <name>
58 onos-verify-cell
59 NOTE: temporary - onos-remove-raft-logs
60 onos-uninstall
61 start mininet
62 git pull
63 mvn clean install
64 onos-package
65 onos-install -f
66 onos-wait-for-start
67 start cli sessions
68 start tcpdump
69 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -070070 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070071 "initialization" )
Jon Halla440e872016-03-31 15:15:50 -070072 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070073 main.HAlabels = []
74 main.HAdata = []
75 try:
76 from tests.dependencies.ONOSSetup import ONOSSetup
77 main.testSetUp = ONOSSetup()
78 except ImportError:
79 main.log.error( "ONOSSetup not found. exiting the test" )
80 main.exit()
81 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070082 try:
Jon Hall53c5e662016-04-13 16:06:56 -070083 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070084 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070085 cellName = main.params[ 'ENV' ][ 'cellName' ]
86 main.apps = main.params[ 'ENV' ][ 'appString' ]
Devin Lim142b5342017-07-20 15:22:39 -070087 stepResult = main.testSetUp.envSetup()
Jon Halle1a3b752015-07-22 13:02:46 -070088 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070089 main.testSetUp.envSetupException( e )
90 main.testSetUp.evnSetupConclusion( stepResult )
91 main.HA.generateGraph( "HAstopNodes" )
Jon Halle1a3b752015-07-22 13:02:46 -070092
Jon Hallca319892017-06-15 15:25:22 -070093 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
Devin Lim142b5342017-07-20 15:22:39 -070094 extraApply=[ main.HA.startingMininet,
95 main.HA.customizeOnosGenPartitions ],
Jon Hallca319892017-06-15 15:25:22 -070096 extraClean=main.HA.cleanUpGenPartition )
Jon Hall5cf14d52015-07-16 12:15:19 -070097
Devin Lim58046fa2017-07-05 16:55:00 -070098 main.HA.initialSetUp()
Jon Hall9d2dcad2016-04-08 10:15:20 -070099
Jon Hall5cf14d52015-07-16 12:15:19 -0700100 def CASE2( self, main ):
101 """
102 Assign devices to controllers
103 """
Devin Lim58046fa2017-07-05 16:55:00 -0700104 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700105
106 def CASE21( self, main ):
107 """
108 Assign mastership to controllers
109 """
Devin Lim58046fa2017-07-05 16:55:00 -0700110 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700111
Jon Hall5cf14d52015-07-16 12:15:19 -0700112 def CASE3( self, main ):
113 """
114 Assign intents
115 """
Devin Lim58046fa2017-07-05 16:55:00 -0700116 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700117
Jon Hall5cf14d52015-07-16 12:15:19 -0700118 def CASE4( self, main ):
119 """
120 Ping across added host intents
121 """
Jon Hallca319892017-06-15 15:25:22 -0700122 main.HA.pingAcrossHostIntent( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700123
124 def CASE5( self, main ):
125 """
126 Reading state of ONOS
127 """
Devin Lim58046fa2017-07-05 16:55:00 -0700128 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700129
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700130 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700131 """
132 The Failure case.
133 """
Jon Hall5cf14d52015-07-16 12:15:19 -0700134 assert main, "main not defined"
135 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700136 main.case( "Stop minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700137
138 main.step( "Checking ONOS Logs for errors" )
Jon Hallca319892017-06-15 15:25:22 -0700139 for ctrl in main.Cluster.active():
140 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
141 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
Jon Hall96091e62015-09-21 17:34:17 -0700142
Devin Lim142b5342017-07-20 15:22:39 -0700143 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700144 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
Devin Lim142b5342017-07-20 15:22:39 -0700145 main.kill = [ main.Cluster.runningNodes[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700146 if n > 3:
Devin Lim142b5342017-07-20 15:22:39 -0700147 main.kill.append( main.Cluster.runningNodes[ p - 1 ] )
Jon Hall3b489db2015-10-05 14:38:37 -0700148 # NOTE: This only works for cluster sizes of 3,5, or 7.
149
Jon Hallca319892017-06-15 15:25:22 -0700150 main.step( "Stopping nodes: " + str( main.kill ) )
Jon Hall3b489db2015-10-05 14:38:37 -0700151 killResults = main.TRUE
Jon Hallca319892017-06-15 15:25:22 -0700152 for ctrl in main.kill:
Jon Hall3b489db2015-10-05 14:38:37 -0700153 killResults = killResults and\
Jon Hallca319892017-06-15 15:25:22 -0700154 ctrl.onosStop( ctrl.ipAddress )
155 ctrl.active = False
Jon Hall5cf14d52015-07-16 12:15:19 -0700156 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700157 onpass="ONOS nodes stopped successfully",
158 onfail="ONOS nodes NOT successfully stopped" )
159
Jon Halld2871c22016-07-26 11:01:14 -0700160 main.step( "Checking ONOS nodes" )
161 nodeResults = utilities.retry( main.HA.nodesCheck,
162 False,
Jon Hallca319892017-06-15 15:25:22 -0700163 args=[ main.Cluster.active() ],
Jon Halld2871c22016-07-26 11:01:14 -0700164 sleep=15,
165 attempts=5 )
166
167 utilities.assert_equals( expect=True, actual=nodeResults,
168 onpass="Nodes check successful",
169 onfail="Nodes check NOT successful" )
170
171 if not nodeResults:
Jon Hallca319892017-06-15 15:25:22 -0700172 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700173 main.log.debug( "{} components not ACTIVE: \n{}".format(
Jon Hallca319892017-06-15 15:25:22 -0700174 ctrl.name,
175 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700176 main.log.error( "Failed to start ONOS, stopping test" )
177 main.cleanup()
178 main.exit()
179
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700180 def CASE62( self, main ):
181 """
182 The bring up stopped nodes
183 """
Devin Lim58046fa2017-07-05 16:55:00 -0700184 main.HA.bringUpStoppedNode( main )
Jon Halla440e872016-03-31 15:15:50 -0700185
Jon Hall5cf14d52015-07-16 12:15:19 -0700186 def CASE7( self, main ):
187 """
188 Check state after ONOS failure
189 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700190 try:
191 main.kill
192 except AttributeError:
193 main.kill = []
194
Devin Lim142b5342017-07-20 15:22:39 -0700195 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700196 main.step( "Leadership Election is still functional" )
197 # Test of LeadershipElection
198 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700199
Jon Hall3b489db2015-10-05 14:38:37 -0700200 restarted = []
Jon Hallca319892017-06-15 15:25:22 -0700201 for ctrl in main.kill:
202 restarted.append( ctrl.ipAddress )
Jon Hall5cf14d52015-07-16 12:15:19 -0700203 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700204
Jon Hallca319892017-06-15 15:25:22 -0700205 for ctrl in main.Cluster.active():
206 leaderN = ctrl.electionTestLeader()
Jon Hall5cf14d52015-07-16 12:15:19 -0700207 leaderList.append( leaderN )
208 if leaderN == main.FALSE:
209 # error in response
210 main.log.error( "Something is wrong with " +
211 "electionTestLeader function, check the" +
212 " error logs" )
213 leaderResult = main.FALSE
214 elif leaderN is None:
Jon Hallca319892017-06-15 15:25:22 -0700215 main.log.error( ctrl.name +
Jon Hall5cf14d52015-07-16 12:15:19 -0700216 " shows no leader for the election-app was" +
217 " elected after the old one died" )
218 leaderResult = main.FALSE
219 elif leaderN in restarted:
Jon Hallca319892017-06-15 15:25:22 -0700220 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall5cf14d52015-07-16 12:15:19 -0700221 " as leader for the election-app, but it " +
222 "was restarted" )
223 leaderResult = main.FALSE
224 if len( set( leaderList ) ) != 1:
225 leaderResult = main.FALSE
226 main.log.error(
227 "Inconsistent view of leader for the election test app" )
228 # TODO: print the list
229 utilities.assert_equals(
230 expect=main.TRUE,
231 actual=leaderResult,
232 onpass="Leadership election passed",
233 onfail="Something went wrong with Leadership election" )
234
235 def CASE8( self, main ):
236 """
237 Compare topo
238 """
Devin Lim58046fa2017-07-05 16:55:00 -0700239 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700240
Jon Hall5cf14d52015-07-16 12:15:19 -0700241 def CASE9( self, main ):
242 """
243 Link s3-s28 down
244 """
Devin Lim58046fa2017-07-05 16:55:00 -0700245 main.HA.linkDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700246
247 def CASE10( self, main ):
248 """
249 Link s3-s28 up
250 """
Devin Lim58046fa2017-07-05 16:55:00 -0700251 main.HA.linkUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700252
253 def CASE11( self, main ):
254 """
255 Switch Down
256 """
257 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700258 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700259
260 def CASE12( self, main ):
261 """
262 Switch Up
263 """
264 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700265 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700266
267 def CASE13( self, main ):
268 """
269 Clean up
270 """
Devin Lim58046fa2017-07-05 16:55:00 -0700271 main.HAlabels.append( "Restart" )
272 main.HAdata.append( str( main.restartTime ) )
273 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700274
275 def CASE14( self, main ):
276 """
277 start election app on all onos nodes
278 """
Devin Lim58046fa2017-07-05 16:55:00 -0700279 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700280
281 def CASE15( self, main ):
282 """
283 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700284 15.1 Run election on each node
285 15.2 Check that each node has the same leaders and candidates
286 15.3 Find current leader and withdraw
287 15.4 Check that a new node was elected leader
288 15.5 Check that that new leader was the candidate of old leader
289 15.6 Run for election on old leader
290 15.7 Check that oldLeader is a candidate, and leader if only 1 node
291 15.8 Make sure that the old leader was added to the candidate list
292
293 old and new variable prefixes refer to data from before vs after
294 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700295 """
Devin Lim58046fa2017-07-05 16:55:00 -0700296 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700297
298 def CASE16( self, main ):
299 """
300 Install Distributed Primitives app
301 """
Devin Lim58046fa2017-07-05 16:55:00 -0700302 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700303
304 def CASE17( self, main ):
305 """
306 Check for basic functionality with distributed primitives
307 """
Devin Lim58046fa2017-07-05 16:55:00 -0700308 main.HA.checkDistPrimitivesFunc( main )