blob: 7b57730105aac33c8584971a4ad92970d51eb472 [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2015 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall5cf14d52015-07-16 12:15:19 -070022Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070032CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070034CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070046class HAstopNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070047
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -070069 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070070 "initialization" )
Jon Halla440e872016-03-31 15:15:50 -070071 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070072 main.HAlabels = []
73 main.HAdata = []
74 try:
75 from tests.dependencies.ONOSSetup import ONOSSetup
76 main.testSetUp = ONOSSetup()
77 except ImportError:
78 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070079 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070080 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070081 try:
Jon Hall53c5e662016-04-13 16:06:56 -070082 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070083 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070084 cellName = main.params[ 'ENV' ][ 'cellName' ]
85 main.apps = main.params[ 'ENV' ][ 'appString' ]
Devin Lim142b5342017-07-20 15:22:39 -070086 stepResult = main.testSetUp.envSetup()
Jon Halle1a3b752015-07-22 13:02:46 -070087 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070088 main.testSetUp.envSetupException( e )
89 main.testSetUp.evnSetupConclusion( stepResult )
90 main.HA.generateGraph( "HAstopNodes" )
Jon Halle1a3b752015-07-22 13:02:46 -070091
Jon Hallca319892017-06-15 15:25:22 -070092 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, cellName=cellName, removeLog=True,
Devin Lim142b5342017-07-20 15:22:39 -070093 extraApply=[ main.HA.startingMininet,
Jon Hall4f360bc2017-09-07 10:19:52 -070094 main.HA.customizeOnosGenPartitions,
95 main.HA.copyBackupConfig ],
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070096 extraClean=main.HA.cleanUpGenPartition )
Jon Hall5cf14d52015-07-16 12:15:19 -070097
Jon Hall4f360bc2017-09-07 10:19:52 -070098 main.HA.initialSetUp( serviceClean=True )
Jon Hall9d2dcad2016-04-08 10:15:20 -070099
Jon Hall5cf14d52015-07-16 12:15:19 -0700100 def CASE2( self, main ):
101 """
102 Assign devices to controllers
103 """
Devin Lim58046fa2017-07-05 16:55:00 -0700104 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700105
106 def CASE21( self, main ):
107 """
108 Assign mastership to controllers
109 """
Devin Lim58046fa2017-07-05 16:55:00 -0700110 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700111
Jon Hall5cf14d52015-07-16 12:15:19 -0700112 def CASE3( self, main ):
113 """
114 Assign intents
115 """
Devin Lim58046fa2017-07-05 16:55:00 -0700116 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700117
Jon Hall5cf14d52015-07-16 12:15:19 -0700118 def CASE4( self, main ):
119 """
120 Ping across added host intents
121 """
Jon Hallca319892017-06-15 15:25:22 -0700122 main.HA.pingAcrossHostIntent( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700123
124 def CASE5( self, main ):
125 """
126 Reading state of ONOS
127 """
Devin Lim58046fa2017-07-05 16:55:00 -0700128 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700129
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700130 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700131 """
132 The Failure case.
133 """
Jon Hall5cf14d52015-07-16 12:15:19 -0700134 assert main, "main not defined"
135 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700136 main.case( "Stop minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700137
138 main.step( "Checking ONOS Logs for errors" )
Jon Hallca319892017-06-15 15:25:22 -0700139 for ctrl in main.Cluster.active():
140 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
141 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
Jon Hall96091e62015-09-21 17:34:17 -0700142
Devin Lim142b5342017-07-20 15:22:39 -0700143 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700144 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
Devin Lim142b5342017-07-20 15:22:39 -0700145 main.kill = [ main.Cluster.runningNodes[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700146 if n > 3:
Devin Lim142b5342017-07-20 15:22:39 -0700147 main.kill.append( main.Cluster.runningNodes[ p - 1 ] )
Jon Hall3b489db2015-10-05 14:38:37 -0700148 # NOTE: This only works for cluster sizes of 3,5, or 7.
149
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700150 # NOTE: This is to fix an issue with wiki formating
Jon Hall4f360bc2017-09-07 10:19:52 -0700151 nodeNames = [ node.name for node in main.kill ]
152 main.step( "Stopping nodes: " + str( nodeNames ) )
Jon Hall3b489db2015-10-05 14:38:37 -0700153 killResults = main.TRUE
Jon Hallca319892017-06-15 15:25:22 -0700154 for ctrl in main.kill:
Jon Hall3b489db2015-10-05 14:38:37 -0700155 killResults = killResults and\
Jon Hallca319892017-06-15 15:25:22 -0700156 ctrl.onosStop( ctrl.ipAddress )
157 ctrl.active = False
Jon Hall5cf14d52015-07-16 12:15:19 -0700158 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700159 onpass="ONOS nodes stopped successfully",
160 onfail="ONOS nodes NOT successfully stopped" )
161
Jon Halld2871c22016-07-26 11:01:14 -0700162 main.step( "Checking ONOS nodes" )
163 nodeResults = utilities.retry( main.HA.nodesCheck,
164 False,
Jon Hallca319892017-06-15 15:25:22 -0700165 args=[ main.Cluster.active() ],
Jon Halld2871c22016-07-26 11:01:14 -0700166 sleep=15,
167 attempts=5 )
168
169 utilities.assert_equals( expect=True, actual=nodeResults,
170 onpass="Nodes check successful",
171 onfail="Nodes check NOT successful" )
172
173 if not nodeResults:
Jon Hallca319892017-06-15 15:25:22 -0700174 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700175 main.log.debug( "{} components not ACTIVE: \n{}".format(
Jon Hallca319892017-06-15 15:25:22 -0700176 ctrl.name,
177 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700178 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700179 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700180
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700181 def CASE62( self, main ):
182 """
183 The bring up stopped nodes
184 """
Devin Lim58046fa2017-07-05 16:55:00 -0700185 main.HA.bringUpStoppedNode( main )
Jon Halla440e872016-03-31 15:15:50 -0700186
Jon Hall5cf14d52015-07-16 12:15:19 -0700187 def CASE7( self, main ):
188 """
189 Check state after ONOS failure
190 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700191 try:
192 main.kill
193 except AttributeError:
194 main.kill = []
195
Devin Lim142b5342017-07-20 15:22:39 -0700196 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700197 main.step( "Leadership Election is still functional" )
198 # Test of LeadershipElection
199 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700200
Jon Hall3b489db2015-10-05 14:38:37 -0700201 restarted = []
Jon Hallca319892017-06-15 15:25:22 -0700202 for ctrl in main.kill:
203 restarted.append( ctrl.ipAddress )
Jon Hall5cf14d52015-07-16 12:15:19 -0700204 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700205
Jon Hallca319892017-06-15 15:25:22 -0700206 for ctrl in main.Cluster.active():
207 leaderN = ctrl.electionTestLeader()
Jon Hall5cf14d52015-07-16 12:15:19 -0700208 leaderList.append( leaderN )
209 if leaderN == main.FALSE:
210 # error in response
211 main.log.error( "Something is wrong with " +
212 "electionTestLeader function, check the" +
213 " error logs" )
214 leaderResult = main.FALSE
215 elif leaderN is None:
Jon Hallca319892017-06-15 15:25:22 -0700216 main.log.error( ctrl.name +
Jon Hall5cf14d52015-07-16 12:15:19 -0700217 " shows no leader for the election-app was" +
218 " elected after the old one died" )
219 leaderResult = main.FALSE
220 elif leaderN in restarted:
Jon Hallca319892017-06-15 15:25:22 -0700221 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall5cf14d52015-07-16 12:15:19 -0700222 " as leader for the election-app, but it " +
223 "was restarted" )
224 leaderResult = main.FALSE
225 if len( set( leaderList ) ) != 1:
226 leaderResult = main.FALSE
227 main.log.error(
228 "Inconsistent view of leader for the election test app" )
229 # TODO: print the list
230 utilities.assert_equals(
231 expect=main.TRUE,
232 actual=leaderResult,
233 onpass="Leadership election passed",
234 onfail="Something went wrong with Leadership election" )
235
236 def CASE8( self, main ):
237 """
238 Compare topo
239 """
Devin Lim58046fa2017-07-05 16:55:00 -0700240 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700241
Jon Hall5cf14d52015-07-16 12:15:19 -0700242 def CASE9( self, main ):
243 """
244 Link s3-s28 down
245 """
Devin Lim58046fa2017-07-05 16:55:00 -0700246 main.HA.linkDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700247
248 def CASE10( self, main ):
249 """
250 Link s3-s28 up
251 """
Devin Lim58046fa2017-07-05 16:55:00 -0700252 main.HA.linkUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700253
254 def CASE11( self, main ):
255 """
256 Switch Down
257 """
258 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700259 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700260
261 def CASE12( self, main ):
262 """
263 Switch Up
264 """
265 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700266 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700267
268 def CASE13( self, main ):
269 """
270 Clean up
271 """
Devin Lim58046fa2017-07-05 16:55:00 -0700272 main.HAlabels.append( "Restart" )
273 main.HAdata.append( str( main.restartTime ) )
274 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700275
276 def CASE14( self, main ):
277 """
278 start election app on all onos nodes
279 """
Devin Lim58046fa2017-07-05 16:55:00 -0700280 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700281
282 def CASE15( self, main ):
283 """
284 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700285 15.1 Run election on each node
286 15.2 Check that each node has the same leaders and candidates
287 15.3 Find current leader and withdraw
288 15.4 Check that a new node was elected leader
289 15.5 Check that that new leader was the candidate of old leader
290 15.6 Run for election on old leader
291 15.7 Check that oldLeader is a candidate, and leader if only 1 node
292 15.8 Make sure that the old leader was added to the candidate list
293
294 old and new variable prefixes refer to data from before vs after
295 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700296 """
Devin Lim58046fa2017-07-05 16:55:00 -0700297 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700298
299 def CASE16( self, main ):
300 """
301 Install Distributed Primitives app
302 """
Devin Lim58046fa2017-07-05 16:55:00 -0700303 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700304
305 def CASE17( self, main ):
306 """
307 Check for basic functionality with distributed primitives
308 """
Devin Lim58046fa2017-07-05 16:55:00 -0700309 main.HA.checkDistPrimitivesFunc( main )