Add HA test for restoring from and offline backup
- Add driver function for backing up onos data
- Add driver function for restoring from onos backup
- Add HA test HAbackupRecover
Change-Id: I127e2781044122ce12c6f25ecd8451316b204428
diff --git a/TestON/drivers/common/cli/onosdriver.py b/TestON/drivers/common/cli/onosdriver.py
index f8c77e9..5b12d0c 100755
--- a/TestON/drivers/common/cli/onosdriver.py
+++ b/TestON/drivers/common/cli/onosdriver.py
@@ -2476,3 +2476,67 @@
except Exception:
main.log.exception( self.name + ": Uncaught exception!" )
main.cleanAndExit()
+
+ def backupData( self, location ):
+ """
+ Backs up ONOS data and logs to a given location. Returns main.FALSE
+ if there is an error executing the command, and main.TRUE otherwise.
+ required arguments:
+ loaction - The file path to save the backup to
+ """
+ try:
+ cmd = "/opt/onos/bin/onos-backup " + str( location )
+ self.handle.sendline( cmd )
+ self.handle.expect( self.prompt )
+ handle = self.handle.before
+ main.log.debug( handle )
+ assert handle is not None, "Error in sendline"
+ assert "Command not found:" not in handle, handle
+ assert "Error" not in handle, handle
+ assert "Exception:" not in handle, handle
+ return main.TRUE
+ except AssertionError:
+ main.log.exception( "{} Error in onos-backup output:".format( self.name ) )
+ return main.FALSE
+ except TypeError:
+ main.log.exception( self.name + ": Object not as expected" )
+ return main.FALSE
+ except pexpect.EOF:
+ main.log.error( self.name + ": EOF exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ main.cleanAndExit()
+ except Exception:
+ main.log.exception( self.name + ": Uncaught exception!" )
+ main.cleanAndExit()
+
+ def restoreData( self, location ):
+ """
+ Restores ONOS data and logs from a given location. Returns main.FALSE
+ if there is an error executing the command, and main.TRUE otherwise.
+ required arguments:
+ loaction - The file path of a backup file
+ """
+ try:
+ cmd = "/opt/onos/bin/onos-restore " + str( location )
+ self.handle.sendline( cmd )
+ self.handle.expect( self.prompt )
+ handle = self.handle.before
+ main.log.debug( handle )
+ assert handle is not None, "Error in sendline"
+ assert "Command not found:" not in handle, handle
+ assert "Error" not in handle, handle
+ assert "Exception:" not in handle, handle
+ return main.TRUE
+ except AssertionError:
+ main.log.exception( "{} Error in onos-restore output:".format( self.name ) )
+ return main.FALSE
+ except TypeError:
+ main.log.exception( self.name + ": Object not as expected" )
+ return main.FALSE
+ except pexpect.EOF:
+ main.log.error( self.name + ": EOF exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ main.cleanAndExit()
+ except Exception:
+ main.log.exception( self.name + ": Uncaught exception!" )
+ main.cleanAndExit()
diff --git a/TestON/tests/HA/HAbackupRecover/HAbackupRecover.params b/TestON/tests/HA/HAbackupRecover/HAbackupRecover.params
new file mode 100644
index 0000000..7091a1d
--- /dev/null
+++ b/TestON/tests/HA/HAbackupRecover/HAbackupRecover.params
@@ -0,0 +1,96 @@
+<PARAMS>
+ #CASE1: Compile ONOS and push it to the test machines
+ #CASE2: Assign devices to controllers
+ #CASE21: Assign mastership to controllers
+ #CASE3: Assign intents
+ #CASE4: Ping across added host intents
+ #CASE5: Reading state of ONOS
+ #CASE6: The Failure case.
+ #CASE7: Check state after control plane failure
+ #CASE8: Compare topo
+ #CASE9: Link s3-s28 down
+ #CASE10: Link s3-s28 up
+ #CASE11: Switch down
+ #CASE12: Switch up
+ #CASE13: Clean up
+ #CASE14: start election app on all onos nodes
+ #CASE15: Check that Leadership Election is still functional
+ #CASE16: Install Distributed Primitives app
+ #CASE17: Check for basic functionality with distributed primitives
+ <testcases>1,2,8,[21,3,8,4,5,14,16,17]*1,[6],8,[3,7,4,15,17,9,8,4,10,8,4,11,8,4,12,8,4]*1,13</testcases>
+
+ <GRAPH>
+ <nodeCluster>VM</nodeCluster>
+ <builds>20</builds>
+ </GRAPH>
+
+ <apps></apps>
+ <ONOS_Configuration>
+ <org.onosproject.net.intent.impl.compiler.IntentConfigurableRegistrator>
+ <useFlowObjectives>false</useFlowObjectives>
+ <defaultFlowObjectiveCompiler>org.onosproject.net.intent.impl.compiler.LinkCollectionIntentObjectiveCompiler</defaultFlowObjectiveCompiler>
+ </org.onosproject.net.intent.impl.compiler.IntentConfigurableRegistrator>
+ </ONOS_Configuration>
+ <ENV>
+ <cellName>HA</cellName>
+ <appString>drivers,openflow,proxyarp,mobility,events</appString>
+ </ENV>
+ <GIT>
+ <pull>False</pull>
+ <branch>master</branch>
+ </GIT>
+ <num_controllers> 7 </num_controllers>
+ <tcpdump> False </tcpdump>
+
+ <CTRL>
+ <port1>6653</port1>
+ <port2>6653</port2>
+ <port3>6653</port3>
+ <port4>6653</port4>
+ <port5>6653</port5>
+ <port6>6653</port6>
+ <port7>6653</port7>
+ </CTRL>
+ <BACKUP>
+ <ENABLED> False </ENABLED>
+ <TESTONUSER>sdn</TESTONUSER>
+ <TESTONIP>10.128.30.9</TESTONIP>
+ </BACKUP>
+ <PING>
+ <source1>h8</source1>
+ <source2>h9</source2>
+ <source3>h10</source3>
+ <source4>h11</source4>
+ <source5>h12</source5>
+ <source6>h13</source6>
+ <source7>h14</source7>
+ <source8>h15</source8>
+ <source9>h16</source9>
+ <source10>h17</source10>
+ <target1>10.0.0.18</target1>
+ <target2>10.0.0.19</target2>
+ <target3>10.0.0.20</target3>
+ <target4>10.0.0.21</target4>
+ <target5>10.0.0.22</target5>
+ <target6>10.0.0.23</target6>
+ <target7>10.0.0.24</target7>
+ <target8>10.0.0.25</target8>
+ <target9>10.0.0.26</target9>
+ <target10>10.0.0.27</target10>
+ </PING>
+ <timers>
+ <LinkDiscovery>12</LinkDiscovery>
+ <SwitchDiscovery>12</SwitchDiscovery>
+ <gossip>5</gossip>
+ </timers>
+ <kill>
+ <switch> s5 </switch>
+ <dpid> 0000000000005000 </dpid>
+ <links> h5 s2 s1 s6 </links>
+ </kill>
+ <MNtcpdump>
+ <intf>eth0</intf>
+ <port> </port>
+ <folder>~/packet_captures/</folder>
+ </MNtcpdump>
+</PARAMS>
diff --git a/TestON/tests/HA/HAbackupRecover/HAbackupRecover.py b/TestON/tests/HA/HAbackupRecover/HAbackupRecover.py
new file mode 100644
index 0000000..5e9d2bc
--- /dev/null
+++ b/TestON/tests/HA/HAbackupRecover/HAbackupRecover.py
@@ -0,0 +1,323 @@
+"""
+Copyright 2018 Open Networking Foundation ( ONF )
+
+Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
+the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
+or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
+
+ TestON is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ ( at your option ) any later version.
+
+ TestON is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with TestON. If not, see <http://www.gnu.org/licenses/>.
+"""
+"""
+Description: This test is to determine if ONOS can handle
+ all of it's nodes restarting
+
+List of test cases:
+CASE1: Compile ONOS and push it to the test machines
+CASE2: Assign devices to controllers
+CASE21: Assign mastership to controllers
+CASE3: Assign intents
+CASE4: Ping across added host intents
+CASE5: Reading state of ONOS
+CASE6: The Failure case.
+CASE7: Check state after control plane failure
+CASE8: Compare topo
+CASE9: Link s3-s28 down
+CASE10: Link s3-s28 up
+CASE11: Switch down
+CASE12: Switch up
+CASE13: Clean up
+CASE14: start election app on all onos nodes
+CASE15: Check that Leadership Election is still functional
+CASE16: Install Distributed Primitives app
+CASE17: Check for basic functionality with distributed primitives
+"""
+class HAbackupRecover:
+
+ def __init__( self ):
+ self.default = ''
+
+ def CASE1( self, main ):
+ """
+ CASE1 is to compile ONOS and push it to the test machines
+
+ Startup sequence:
+ cell <name>
+ onos-verify-cell
+ NOTE: temporary - onos-remove-raft-logs
+ onos-uninstall
+ start mininet
+ git pull
+ mvn clean install
+ onos-package
+ onos-install -f
+ onos-wait-for-start
+ start cli sessions
+ start tcpdump
+ """
+ main.log.info( "ONOS HA test: Restart all ONOS nodes - " +
+ "initialization" )
+ # These are for csv plotting in jenkins
+ main.HAlabels = []
+ main.HAdata = []
+ try:
+ from tests.dependencies.ONOSSetup import ONOSSetup
+ main.testSetUp = ONOSSetup()
+ except ImportError:
+ main.log.error( "ONOSSetup not found exiting the test" )
+ main.cleanAndExit()
+ main.testSetUp.envSetupDescription()
+ try:
+ from tests.HA.dependencies.HA import HA
+ main.HA = HA()
+ # load some variables from the params file
+ cellName = main.params[ 'ENV' ][ 'cellName' ]
+ main.apps = main.params[ 'ENV' ][ 'appString' ]
+ stepResult = main.testSetUp.envSetup()
+ except Exception as e:
+ main.testSetUp.envSetupException( e )
+ main.testSetUp.evnSetupConclusion( stepResult )
+
+ main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName, removeLog=True,
+ extraApply=main.HA.startingMininet )
+
+ main.HA.initialSetUp()
+
+ def CASE2( self, main ):
+ """
+ Assign devices to controllers
+ """
+ main.HA.assignDevices( main )
+
+ def CASE21( self, main ):
+ """
+ Assign mastership to controllers
+ """
+ main.HA.assignMastership( main )
+
+ def CASE3( self, main ):
+ """
+ Assign intents
+ """
+ main.HA.assignIntents( main )
+
+ def CASE4( self, main ):
+ """
+ Ping across added host intents
+ """
+ main.HA.pingAcrossHostIntent( main )
+
+ def CASE5( self, main ):
+ """
+ Reading state of ONOS
+ """
+ main.HA.readingState( main )
+
+ def CASE6( self, main ):
+ """
+ The Failure case.
+ """
+ import time
+ assert main, "main not defined"
+ assert utilities.assert_equals, "utilities.assert_equals not defined"
+ try:
+ main.HAlabels
+ except ( NameError, AttributeError ):
+ main.log.error( "main.HAlabels not defined, setting to []" )
+ main.HAlabels = []
+ try:
+ main.HAdata
+ except ( NameError, AttributeError ):
+ main.log.error( "main.HAdata not defined, setting to []" )
+ main.HAdata = []
+
+ main.case( "Restart entire ONOS cluster with backed up state" )
+
+ main.step( "Backup ONOS data" )
+ location = "/tmp/" + main.TEST + ".tar.gz"
+ backupResult = main.HA.backupData( main, location )
+ utilities.assert_equals( expect=True, actual=backupResult,
+ onpass="ONOS backup succeded",
+ onfail="ONOS backup failed" )
+
+ main.step( "Checking ONOS Logs for errors" )
+ for ctrl in main.Cluster.active():
+ main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
+ main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
+
+ killTime = time.time()
+ main.testSetUp.uninstallOnos( main.Cluster, uninstallMax=True )
+
+ clusterSize = len( main.Cluster.active() )
+ main.Cluster.setRunningNode( 0 ) # So we can install without starting ONOS
+ main.testSetUp.installOnos( main.Cluster, installMax=True )
+ main.Cluster.setRunningNode( clusterSize )
+
+ main.step( "Restore ONOS data" )
+ restoreResult = main.HA.restoreData( main, location )
+ utilities.assert_equals( expect=True, actual=restoreResult,
+ onpass="ONOS restore succeded",
+ onfail="ONOS restore failed" )
+
+ main.step( "Restart ONOS nodes" )
+ started = main.Cluster.command( "onosStart",
+ args=[ "ipAddress" ],
+ getFrom=0,
+ funcFromCtrl=True )
+ for ctrl in main.Cluster.controllers:
+ ctrl.active = True
+ main.log.debug( repr( ctrl ) )
+
+ main.testSetUp.setupSsh( main.Cluster )
+ main.testSetUp.checkOnosService( main.Cluster )
+ main.testSetUp.startOnosClis( main.Cluster )
+
+ ready = utilities.retry( main.Cluster.command,
+ False,
+ kwargs={ "function": "summary", "contentCheck": True },
+ sleep=30,
+ attempts=10 )
+ utilities.assert_equals( expect=True, actual=ready,
+ onpass="ONOS summary command succeded",
+ onfail="ONOS summary command failed" )
+ if not ready:
+ main.cleanAndExit()
+
+ # Grab the time of restart so we chan check how long the gossip
+ # protocol has had time to work
+ main.restartTime = time.time() - killTime
+ main.log.debug( "Restart time: " + str( main.restartTime ) )
+ main.HAlabels.append( "Restart" )
+ main.HAdata.append( str( main.restartTime ) )
+
+ # Rerun for election on restarted nodes
+ runResults = main.Cluster.command( "electionTestRun", returnBool=True )
+ utilities.assert_equals( expect=True, actual=runResults,
+ onpass="Reran for election",
+ onfail="Failed to rerun for election" )
+
+ main.HA.commonChecks()
+
+ def CASE7( self, main ):
+ """
+ Check state after ONOS failure
+ """
+ # NOTE: Store has no durability, so intents are lost across system
+ # restarts
+ main.HA.checkStateAfterEvent( main, afterWhich=0, isRestart=True )
+
+ main.step( "Leadership Election is still functional" )
+ # Test of LeadershipElection
+ leaderList = []
+ leaderResult = main.TRUE
+
+ for ctrl in main.Cluster.active():
+ ctrl.CLI.electionTestLeader()
+ leaderN = ctrl.CLI.electionTestLeader()
+ leaderList.append( leaderN )
+ if leaderN == main.FALSE:
+ # error in response
+ main.log.error( "Something is wrong with " +
+ "electionTestLeader function, check the" +
+ " error logs" )
+ leaderResult = main.FALSE
+ elif leaderN is None:
+ main.log.error( ctrl.name +
+ " shows no leader for the election-app." )
+ leaderResult = main.FALSE
+ if len( set( leaderList ) ) != 1:
+ leaderResult = main.FALSE
+ main.log.error(
+ "Inconsistent view of leader for the election test app" )
+ # TODO: print the list
+ utilities.assert_equals(
+ expect=main.TRUE,
+ actual=leaderResult,
+ onpass="Leadership election passed",
+ onfail="Something went wrong with Leadership election" )
+
+ def CASE8( self, main ):
+ """
+ Compare topo
+ """
+ main.HA.compareTopo( main )
+
+ def CASE9( self, main ):
+ """
+ Link s3-s28 down
+ """
+ main.HA.linkDown( main )
+
+ def CASE10( self, main ):
+ """
+ Link s3-s28 up
+ """
+ main.HA.linkUp( main )
+
+ def CASE11( self, main ):
+ """
+ Switch Down
+ """
+ # NOTE: You should probably run a topology check after this
+ main.HA.switchDown( main )
+
+ def CASE12( self, main ):
+ """
+ Switch Up
+ """
+ # NOTE: You should probably run a topology check after this
+ main.HA.switchUp( main )
+
+ def CASE13( self, main ):
+ """
+ Clean up
+ """
+ main.HA.cleanUp( main )
+
+ def CASE14( self, main ):
+ """
+ start election app on all onos nodes
+ """
+ try:
+ main.HA.startElectionApp( main )
+ except Exception as e:
+ main.log.error( e )
+
+ def CASE15( self, main ):
+ """
+ Check that Leadership Election is still functional
+ 15.1 Run election on each node
+ 15.2 Check that each node has the same leaders and candidates
+ 15.3 Find current leader and withdraw
+ 15.4 Check that a new node was elected leader
+ 15.5 Check that that new leader was the candidate of old leader
+ 15.6 Run for election on old leader
+ 15.7 Check that oldLeader is a candidate, and leader if only 1 node
+ 15.8 Make sure that the old leader was added to the candidate list
+
+ old and new variable prefixes refer to data from before vs after
+ withdrawl and later before withdrawl vs after re-election
+ """
+ main.HA.isElectionFunctional( main )
+
+ def CASE16( self, main ):
+ """
+ Install Distributed Primitives app
+ """
+ main.HA.installDistributedPrimitiveApp( main )
+
+ def CASE17( self, main ):
+ """
+ Check for basic functionality with distributed primitives
+ """
+ main.HA.checkDistPrimitivesFunc( main )
diff --git a/TestON/tests/HA/HAbackupRecover/HAbackupRecover.topo b/TestON/tests/HA/HAbackupRecover/HAbackupRecover.topo
new file mode 100644
index 0000000..4bf4bd4
--- /dev/null
+++ b/TestON/tests/HA/HAbackupRecover/HAbackupRecover.topo
@@ -0,0 +1,53 @@
+<TOPOLOGY>
+ <COMPONENT>
+
+ <ONOScell>
+ <host>localhost</host> # ONOS "bench" machine
+ <user>sdn</user>
+ <password>rocks</password>
+ <type>OnosClusterDriver</type>
+ <connect_order>1</connect_order>
+ <COMPONENTS>
+ <cluster_name></cluster_name> # Used as a prefix for cluster components. Defaults to 'ONOS'
+ <diff_clihost></diff_clihost> # if it has different host other than localhost for CLI. True or empty. OC# will be used if True.
+ <karaf_username></karaf_username>
+ <karaf_password></karaf_password>
+ <web_user></web_user>
+ <web_pass></web_pass>
+ <rest_port></rest_port>
+ <prompt></prompt> # TODO: we technically need a few of these, one per component
+ <onos_home></onos_home> # defines where onos home is
+ <nodes> 7 </nodes> # number of nodes in the cluster
+ </COMPONENTS>
+ </ONOScell>
+
+ <Mininet1>
+ <host>OCN</host>
+ <user>sdn</user>
+ <password>rocks</password>
+ <type>MininetCliDriver</type>
+ <connect_order>2</connect_order>
+ <COMPONENTS>
+ #Specify the Option for mininet
+ <arg1> --custom ~/mininet/custom/obelisk.py </arg1>
+ <arg2> --topo obelisk </arg2>
+ <arg3> --switch ovs,protocols=OpenFlow13 </arg3>
+ <controller> none </controller>
+ <home>~/mininet/custom/</home>
+ <prompt></prompt>
+ </COMPONENTS>
+ </Mininet1>
+
+ <Mininet2>
+ <host>OCN</host>
+ <user>sdn</user>
+ <password>rocks</password>
+ <type>RemoteMininetDriver</type>
+ <connect_order>3</connect_order>
+ <COMPONENTS>
+ <prompt></prompt>
+ </COMPONENTS>
+ </Mininet2>
+
+ </COMPONENT>
+</TOPOLOGY>
diff --git a/TestON/tests/HA/HAbackupRecover/README b/TestON/tests/HA/HAbackupRecover/README
new file mode 100644
index 0000000..b5bce27
--- /dev/null
+++ b/TestON/tests/HA/HAbackupRecover/README
@@ -0,0 +1,23 @@
+This test is designed to verify that an ONOS cluster behaves correctly when
+restoring an ONOS cluster from backups. We will take a backup of ONOS data
+for each node in the cluster. Then stop and reinstall ONOS on each node.
+Then copy the backup data to the correct locations, restart ONOS, and verify
+correct behavior as the cluster restarts.
+
+The gerneral structure for the test:
+- Startup
+- Assign switches
+- Verify ONOS state and functionality
+ - Device mastership
+ - Intents
+ - Leadership election
+ - Distributed Primitives
+- Take backup of ONOS data
+- Stop ONOS nodes
+- Reinstall ONOS nodes
+- Restore data from backups
+- Restart ONOS nodes
+- Verify ONOS state and functionality
+- Dataplane failures
+ - link down and up
+ - switch down and up
diff --git a/TestON/tests/HA/HAbackupRecover/__init__.py b/TestON/tests/HA/HAbackupRecover/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/TestON/tests/HA/HAbackupRecover/__init__.py
diff --git a/TestON/tests/HA/HAbackupRecover/dependencies/__init__.py b/TestON/tests/HA/HAbackupRecover/dependencies/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/TestON/tests/HA/HAbackupRecover/dependencies/__init__.py
diff --git a/TestON/tests/HA/dependencies/HA.py b/TestON/tests/HA/dependencies/HA.py
index c61cd4f..262dbaf 100644
--- a/TestON/tests/HA/dependencies/HA.py
+++ b/TestON/tests/HA/dependencies/HA.py
@@ -3795,3 +3795,30 @@
utilities.assert_equals( expect=True, actual=nodeResults,
onpass="Nodes check successful",
onfail="Nodes check NOT successful" )
+
+ def backupData( self, main, location ):
+ """
+ Backs up ONOS data and logs to a given location on each active node in a cluster
+ """
+ result = True
+ for ctrl in main.Cluster.active():
+ try:
+ ctrl.server.handle.sendline( "rm " + location )
+ ctrl.server.handle.expect( ctrl.server.prompt )
+ main.log.debug( ctrl.server.handle.before + ctrl.server.handle.after )
+ except pexpect.ExceptionPexpect as e:
+ main.log.error( e )
+ main.cleanAndExit()
+ ctrl.CLI.log( "'Starting backup of onos data'", level="INFO" )
+ result = result and ( ctrl.server.backupData( location ) is main.TRUE )
+ ctrl.CLI.log( "'End of backup of onos data'", level="INFO" )
+ return result
+
+ def restoreData( self, main, location ):
+ """
+ Restores ONOS data and logs from a given location on each node in a cluster
+ """
+ result = True
+ for ctrl in main.Cluster.controllers:
+ result = result and ( ctrl.server.restoreData( location ) is main.TRUE )
+ return result