blob: 01ebe38d21d0fd4764b3299025481f864991eab1 [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2015 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall5cf14d52015-07-16 12:15:19 -070022Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070032CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070034CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070046class HAkillNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070047
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hall6e709752016-02-01 13:38:46 -080069 main.log.info( "ONOS HA test: Restart a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070070 "initialization" )
Jon Halla440e872016-03-31 15:15:50 -070071 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070072 main.HAlabels = []
73 main.HAdata = []
74 try:
75 from tests.dependencies.ONOSSetup import ONOSSetup
76 main.testSetUp = ONOSSetup()
77 except ImportError:
78 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070079 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070080 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070081 try:
Jon Hall53c5e662016-04-13 16:06:56 -070082 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070083 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070084 cellName = main.params[ 'ENV' ][ 'cellName' ]
85 main.apps = main.params[ 'ENV' ][ 'appString' ]
Devin Lim142b5342017-07-20 15:22:39 -070086 stepResult = main.testSetUp.envSetup()
Jon Halle1a3b752015-07-22 13:02:46 -070087 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070088 main.testSetUp.envSetupException( e )
89 main.testSetUp.evnSetupConclusion( stepResult )
90 main.HA.generateGraph( "HAkillNodes" )
Jon Hall5cf14d52015-07-16 12:15:19 -070091
Jon Hallca319892017-06-15 15:25:22 -070092 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
Jon Hall4f360bc2017-09-07 10:19:52 -070093 extraApply=[ main.HA.startingMininet,
94 main.HA.customizeOnosGenPartitions,
95 main.HA.copyBackupConfig,
96 main.ONOSbench.preventAutoRespawn ],
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070097 extraClean=main.HA.cleanUpGenPartition )
Jon Hall5cf14d52015-07-16 12:15:19 -070098
Jon Hall4f360bc2017-09-07 10:19:52 -070099 main.HA.initialSetUp( serviceClean=True )
Jon Hall9d2dcad2016-04-08 10:15:20 -0700100
Jon Hall5cf14d52015-07-16 12:15:19 -0700101 def CASE2( self, main ):
102 """
103 Assign devices to controllers
104 """
Devin Lim58046fa2017-07-05 16:55:00 -0700105 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700106
107 def CASE21( self, main ):
108 """
109 Assign mastership to controllers
110 """
Devin Lim58046fa2017-07-05 16:55:00 -0700111 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700112
113 def CASE3( self, main ):
114 """
115 Assign intents
116 """
Devin Lim58046fa2017-07-05 16:55:00 -0700117 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700118
119 def CASE4( self, main ):
120 """
121 Ping across added host intents
122 """
Jon Hallca319892017-06-15 15:25:22 -0700123 main.HA.pingAcrossHostIntent( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700124
125 def CASE5( self, main ):
126 """
127 Reading state of ONOS
128 """
Devin Lim58046fa2017-07-05 16:55:00 -0700129 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700130
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700131 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700132 """
133 The Failure case.
134 """
Jon Hall5cf14d52015-07-16 12:15:19 -0700135 assert main, "main not defined"
136 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700137 main.case( "Kill minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700138
139 main.step( "Checking ONOS Logs for errors" )
Jon Hallca319892017-06-15 15:25:22 -0700140 for ctrl in main.Cluster.active():
141 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
142 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
Jon Hall96091e62015-09-21 17:34:17 -0700143
Devin Lim142b5342017-07-20 15:22:39 -0700144 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700145 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
Devin Lim142b5342017-07-20 15:22:39 -0700146 main.kill = [ main.Cluster.runningNodes[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700147 if n > 3:
Devin Lim142b5342017-07-20 15:22:39 -0700148 main.kill.append( main.Cluster.runningNodes[ p - 1 ] )
Jon Hall3b489db2015-10-05 14:38:37 -0700149 # NOTE: This only works for cluster sizes of 3,5, or 7.
150
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700151 # NOTE: This is to fix an issue with wiki formating
Jon Hall4f360bc2017-09-07 10:19:52 -0700152 nodeNames = [ node.name for node in main.kill ]
153 main.step( "Killing nodes: " + str( nodeNames ) )
Jon Hall3b489db2015-10-05 14:38:37 -0700154 killResults = main.TRUE
Jon Hallca319892017-06-15 15:25:22 -0700155 for ctrl in main.kill:
Jon Hall3b489db2015-10-05 14:38:37 -0700156 killResults = killResults and\
Jon Hallca319892017-06-15 15:25:22 -0700157 ctrl.onosKill( ctrl.ipAddress )
158 ctrl.active = False
Jon Hall5cf14d52015-07-16 12:15:19 -0700159 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700160 onpass="ONOS nodes killed successfully",
161 onfail="ONOS nodes NOT successfully killed" )
162
Jon Halld2871c22016-07-26 11:01:14 -0700163 main.step( "Checking ONOS nodes" )
164 nodeResults = utilities.retry( main.HA.nodesCheck,
165 False,
Jon Hallca319892017-06-15 15:25:22 -0700166 args=[ main.Cluster.active() ],
Jon Halld2871c22016-07-26 11:01:14 -0700167 sleep=15,
168 attempts=5 )
169
170 utilities.assert_equals( expect=True, actual=nodeResults,
171 onpass="Nodes check successful",
172 onfail="Nodes check NOT successful" )
173
174 if not nodeResults:
Jon Hallca319892017-06-15 15:25:22 -0700175 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700176 main.log.debug( "{} components not ACTIVE: \n{}".format(
Jon Hallca319892017-06-15 15:25:22 -0700177 ctrl.name,
178 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700179 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700180 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700181
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700182 def CASE62( self, main ):
183 """
184 The bring up stopped nodes
185 """
Devin Lim58046fa2017-07-05 16:55:00 -0700186 main.HA.bringUpStoppedNode( main )
Jon Halla440e872016-03-31 15:15:50 -0700187
Jon Hall5cf14d52015-07-16 12:15:19 -0700188 def CASE7( self, main ):
189 """
190 Check state after ONOS failure
191 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700192 try:
193 main.kill
194 except AttributeError:
195 main.kill = []
196
Devin Lim142b5342017-07-20 15:22:39 -0700197 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700198 main.step( "Leadership Election is still functional" )
199 # Test of LeadershipElection
200 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700201
Jon Hall3b489db2015-10-05 14:38:37 -0700202 restarted = []
Jon Hallca319892017-06-15 15:25:22 -0700203 for ctrl in main.kill:
204 restarted.append( ctrl.ipAddress )
Jon Hall5cf14d52015-07-16 12:15:19 -0700205 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700206
Jon Hallca319892017-06-15 15:25:22 -0700207 for ctrl in main.Cluster.active():
208 leaderN = ctrl.electionTestLeader()
Jon Hall5cf14d52015-07-16 12:15:19 -0700209 leaderList.append( leaderN )
210 if leaderN == main.FALSE:
211 # error in response
212 main.log.error( "Something is wrong with " +
213 "electionTestLeader function, check the" +
214 " error logs" )
215 leaderResult = main.FALSE
216 elif leaderN is None:
Jon Hallca319892017-06-15 15:25:22 -0700217 main.log.error( ctrl.name +
Jon Hall5cf14d52015-07-16 12:15:19 -0700218 " shows no leader for the election-app was" +
219 " elected after the old one died" )
220 leaderResult = main.FALSE
221 elif leaderN in restarted:
Jon Hallca319892017-06-15 15:25:22 -0700222 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall5cf14d52015-07-16 12:15:19 -0700223 " as leader for the election-app, but it " +
224 "was restarted" )
225 leaderResult = main.FALSE
226 if len( set( leaderList ) ) != 1:
227 leaderResult = main.FALSE
228 main.log.error(
229 "Inconsistent view of leader for the election test app" )
230 # TODO: print the list
231 utilities.assert_equals(
232 expect=main.TRUE,
233 actual=leaderResult,
234 onpass="Leadership election passed",
235 onfail="Something went wrong with Leadership election" )
236
237 def CASE8( self, main ):
238 """
239 Compare topo
240 """
Devin Lim58046fa2017-07-05 16:55:00 -0700241 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700242
Jon Hall5cf14d52015-07-16 12:15:19 -0700243 def CASE9( self, main ):
244 """
245 Link s3-s28 down
246 """
Devin Lim58046fa2017-07-05 16:55:00 -0700247 main.HA.linkDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700248
249 def CASE10( self, main ):
250 """
251 Link s3-s28 up
252 """
Devin Lim58046fa2017-07-05 16:55:00 -0700253 main.HA.linkUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700254
255 def CASE11( self, main ):
256 """
257 Switch Down
258 """
259 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700260 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700261
262 def CASE12( self, main ):
263 """
264 Switch Up
265 """
266 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700267 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700268
269 def CASE13( self, main ):
270 """
271 Clean up
272 """
Devin Lim58046fa2017-07-05 16:55:00 -0700273 main.HAlabels.append( "Restart" )
274 main.HAdata.append( str( main.restartTime ) )
275 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700276
277 def CASE14( self, main ):
278 """
279 start election app on all onos nodes
280 """
Devin Lim58046fa2017-07-05 16:55:00 -0700281 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700282
283 def CASE15( self, main ):
284 """
285 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700286 15.1 Run election on each node
287 15.2 Check that each node has the same leaders and candidates
288 15.3 Find current leader and withdraw
289 15.4 Check that a new node was elected leader
290 15.5 Check that that new leader was the candidate of old leader
291 15.6 Run for election on old leader
292 15.7 Check that oldLeader is a candidate, and leader if only 1 node
293 15.8 Make sure that the old leader was added to the candidate list
294
295 old and new variable prefixes refer to data from before vs after
296 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700297 """
Devin Lim58046fa2017-07-05 16:55:00 -0700298 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700299
300 def CASE16( self, main ):
301 """
302 Install Distributed Primitives app
303 """
Devin Lim58046fa2017-07-05 16:55:00 -0700304 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700305
306 def CASE17( self, main ):
307 """
308 Check for basic functionality with distributed primitives
309 """
Devin Lim58046fa2017-07-05 16:55:00 -0700310 main.HA.checkDistPrimitivesFunc( main )