Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2017-present Open Networking Foundation |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | package org.onosproject.upgrade.impl; |
| 17 | |
| 18 | import java.util.Objects; |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 19 | import java.util.Set; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 20 | import java.util.concurrent.atomic.AtomicReference; |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 21 | import java.util.stream.Collectors; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 22 | |
| 23 | import org.apache.felix.scr.annotations.Activate; |
| 24 | import org.apache.felix.scr.annotations.Component; |
| 25 | import org.apache.felix.scr.annotations.Deactivate; |
| 26 | import org.apache.felix.scr.annotations.Reference; |
| 27 | import org.apache.felix.scr.annotations.ReferenceCardinality; |
| 28 | import org.apache.felix.scr.annotations.Service; |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 29 | import org.onosproject.cluster.ClusterEvent; |
| 30 | import org.onosproject.cluster.ClusterEventListener; |
Jordan Halterman | 28183ee | 2017-10-17 17:29:10 -0700 | [diff] [blame] | 31 | import org.onosproject.cluster.ClusterService; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 32 | import org.onosproject.cluster.ControllerNode; |
Jordan Halterman | 28183ee | 2017-10-17 17:29:10 -0700 | [diff] [blame] | 33 | import org.onosproject.cluster.MembershipService; |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 34 | import org.onosproject.cluster.NodeId; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 35 | import org.onosproject.core.Version; |
| 36 | import org.onosproject.core.VersionService; |
| 37 | import org.onosproject.event.AbstractListenerManager; |
| 38 | import org.onosproject.store.serializers.KryoNamespaces; |
| 39 | import org.onosproject.store.service.AtomicValue; |
| 40 | import org.onosproject.store.service.AtomicValueEvent; |
| 41 | import org.onosproject.store.service.AtomicValueEventListener; |
| 42 | import org.onosproject.store.service.CoordinationService; |
| 43 | import org.onosproject.store.service.Serializer; |
| 44 | import org.onosproject.upgrade.Upgrade; |
| 45 | import org.onosproject.upgrade.UpgradeAdminService; |
| 46 | import org.onosproject.upgrade.UpgradeEvent; |
| 47 | import org.onosproject.upgrade.UpgradeEventListener; |
| 48 | import org.onosproject.upgrade.UpgradeService; |
| 49 | import org.slf4j.Logger; |
| 50 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 51 | import static org.onosproject.security.AppGuard.checkPermission; |
Jordan Halterman | 07f052b | 2017-10-08 14:22:41 -0700 | [diff] [blame] | 52 | import static org.onosproject.security.AppPermission.Type.CLUSTER_EVENT; |
| 53 | import static org.onosproject.security.AppPermission.Type.UPGRADE_EVENT; |
| 54 | import static org.onosproject.security.AppPermission.Type.UPGRADE_READ; |
| 55 | import static org.onosproject.security.AppPermission.Type.UPGRADE_WRITE; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 56 | import static org.slf4j.LoggerFactory.getLogger; |
| 57 | |
| 58 | /** |
| 59 | * Upgrade service implementation. |
| 60 | * <p> |
| 61 | * This implementation uses the {@link CoordinationService} to store upgrade state in a version-agnostic primitive. |
| 62 | * Upgrade state can be seen by current and future version nodes. |
| 63 | */ |
| 64 | @Component(immediate = true) |
| 65 | @Service |
| 66 | public class UpgradeManager |
| 67 | extends AbstractListenerManager<UpgradeEvent, UpgradeEventListener> |
| 68 | implements UpgradeService, UpgradeAdminService { |
| 69 | |
| 70 | private final Logger log = getLogger(getClass()); |
| 71 | |
| 72 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
| 73 | protected VersionService versionService; |
| 74 | |
| 75 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
| 76 | protected CoordinationService coordinationService; |
| 77 | |
| 78 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
Jordan Halterman | 28183ee | 2017-10-17 17:29:10 -0700 | [diff] [blame] | 79 | protected ClusterService clusterService; |
| 80 | |
| 81 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
| 82 | protected MembershipService membershipService; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 83 | |
| 84 | private Version localVersion; |
| 85 | private AtomicValue<Upgrade> state; |
| 86 | private final AtomicReference<Upgrade> currentState = new AtomicReference<>(); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 87 | private final AtomicValueEventListener<Upgrade> stateListener = this::handleUpgradeEvent; |
| 88 | private final ClusterEventListener clusterListener = this::handleClusterEvent; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 89 | |
| 90 | @Activate |
| 91 | public void activate() { |
Jordan Halterman | 07f052b | 2017-10-08 14:22:41 -0700 | [diff] [blame] | 92 | eventDispatcher.addSink(UpgradeEvent.class, listenerRegistry); |
| 93 | |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 94 | state = coordinationService.<Upgrade>atomicValueBuilder() |
| 95 | .withName("onos-upgrade-state") |
| 96 | .withSerializer(Serializer.using(KryoNamespaces.API)) |
| 97 | .build() |
| 98 | .asAtomicValue(); |
| 99 | localVersion = versionService.version(); |
| 100 | |
| 101 | currentState.set(state.get()); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 102 | if (getState() == null) { |
| 103 | initializeState(new Upgrade(localVersion, localVersion, Upgrade.Status.INACTIVE)); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 104 | } |
| 105 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 106 | Upgrade upgrade = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 107 | |
| 108 | // If the upgrade state is not initialized, ensure this node matches the version of the cluster. |
| 109 | if (!upgrade.status().active() && !Objects.equals(upgrade.source(), localVersion)) { |
| 110 | log.error("Node version {} inconsistent with cluster version {}", localVersion, upgrade.source()); |
| 111 | throw new IllegalStateException("Node version " + localVersion + |
| 112 | " inconsistent with cluster version " + upgrade.source()); |
| 113 | } |
| 114 | |
| 115 | // If the upgrade state is initialized then check the node version. |
| 116 | if (upgrade.status() == Upgrade.Status.INITIALIZED) { |
| 117 | // If the source version equals the target version, attempt to update the target version. |
| 118 | if (Objects.equals(upgrade.source(), upgrade.target()) && !Objects.equals(upgrade.target(), localVersion)) { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 119 | checkPermission(UPGRADE_WRITE); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 120 | upgrade = new Upgrade(upgrade.source(), localVersion, upgrade.status()); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 121 | initializeState(upgrade); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 122 | } |
| 123 | } |
| 124 | |
| 125 | // If the upgrade status is active, verify that the local version matches the upgrade version. |
| 126 | if (upgrade.status().active() && !Objects.equals(upgrade.source(), upgrade.target())) { |
| 127 | // If the upgrade source/target are not equal, validate that the node's version is consistent |
| 128 | // with versions in the upgrade. There are two possibilities: that a not-yet-upgraded node is being |
| 129 | // restarted, or that a node has been upgraded, so we need to check that this node is running either |
| 130 | // the source or target version. |
| 131 | if (!Objects.equals(localVersion, upgrade.source()) && !Objects.equals(localVersion, upgrade.target())) { |
| 132 | log.error("Cannot upgrade node to version {}; Upgrade to {} already in progress", |
| 133 | localVersion, upgrade.target()); |
| 134 | throw new IllegalStateException("Cannot upgrade node to version " + localVersion + "; Upgrade to " + |
| 135 | upgrade.target() + " already in progress"); |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | state.addListener(stateListener); |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 140 | clusterService.addListener(clusterListener); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 141 | log.info("Started"); |
| 142 | } |
| 143 | |
| 144 | @Deactivate |
| 145 | public void deactivate() { |
Jordan Halterman | 07f052b | 2017-10-08 14:22:41 -0700 | [diff] [blame] | 146 | eventDispatcher.removeSink(UpgradeEvent.class); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 147 | state.removeListener(stateListener); |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 148 | clusterService.removeListener(clusterListener); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 149 | log.info("Stopped"); |
| 150 | } |
| 151 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 152 | /** |
| 153 | * Initializes the state when the cluster starts. |
| 154 | * <p> |
| 155 | * This method must be called when updating the state in order to check the permissions |
| 156 | * |
| 157 | * @param newState new state |
| 158 | */ |
| 159 | private void initializeState(Upgrade newState) { |
| 160 | checkPermission(UPGRADE_WRITE); |
| 161 | currentState.set(newState); |
| 162 | state.set(newState); |
| 163 | } |
| 164 | |
| 165 | /** |
| 166 | * Changes the current state to new one. |
| 167 | * <p> |
| 168 | * This method must be called when changing between states in order to check the permissions and |
| 169 | * to avoid concurrent state modifications |
| 170 | * |
| 171 | * @param oldState current upgrade state |
| 172 | * @param newState new upgrade state |
| 173 | * |
| 174 | * @throws IllegalStateException if an upgrade is already in progress |
| 175 | */ |
| 176 | private void changeState(Upgrade oldState, Upgrade newState) { |
| 177 | checkPermission(UPGRADE_WRITE); |
| 178 | if (!state.compareAndSet(oldState, newState)) { |
| 179 | throw new IllegalStateException("Concurrent upgrade modification"); |
| 180 | } else { |
| 181 | currentState.set(newState); |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | @Override |
| 186 | public Upgrade getState() { |
| 187 | checkPermission(UPGRADE_READ); |
| 188 | return currentState.get(); |
| 189 | } |
| 190 | |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 191 | @Override |
| 192 | public boolean isUpgrading() { |
| 193 | return getState().status().active(); |
| 194 | } |
| 195 | |
| 196 | @Override |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 197 | public Version getVersion() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 198 | Upgrade upgrade = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 199 | return upgrade.status().upgraded() |
| 200 | ? upgrade.target() |
| 201 | : upgrade.source(); |
| 202 | } |
| 203 | |
| 204 | @Override |
| 205 | public boolean isLocalActive() { |
| 206 | return localVersion.equals(getVersion()); |
| 207 | } |
| 208 | |
| 209 | @Override |
| 210 | public boolean isLocalUpgraded() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 211 | Upgrade upgrade = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 212 | return upgrade.status().active() |
| 213 | && !upgrade.source().equals(upgrade.target()) |
| 214 | && localVersion.equals(upgrade.target()); |
| 215 | } |
| 216 | |
| 217 | @Override |
| 218 | public void initialize() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 219 | Upgrade inactive = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 220 | |
| 221 | // If the current upgrade status is active, fail initialization. |
| 222 | if (inactive.status().active()) { |
| 223 | throw new IllegalStateException("Upgrade already active"); |
| 224 | } |
| 225 | |
| 226 | // Set the upgrade status to INITIALIZING. |
| 227 | Upgrade initializing = new Upgrade( |
| 228 | localVersion, |
| 229 | localVersion, |
| 230 | Upgrade.Status.INITIALIZING); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 231 | changeState(inactive, initializing); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 232 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 233 | // Set the upgrade status to INITIALIZED. |
| 234 | Upgrade initialized = new Upgrade( |
| 235 | initializing.source(), |
| 236 | initializing.target(), |
| 237 | Upgrade.Status.INITIALIZED); |
| 238 | changeState(initializing, initialized); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 239 | } |
| 240 | |
| 241 | @Override |
| 242 | public void upgrade() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 243 | Upgrade initialized = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 244 | |
| 245 | // If the current upgrade status is not INITIALIZED, throw an exception. |
| 246 | if (initialized.status() != Upgrade.Status.INITIALIZED) { |
| 247 | throw new IllegalStateException("Upgrade not initialized"); |
| 248 | } |
| 249 | |
| 250 | // Set the upgrade status to UPGRADING. |
| 251 | Upgrade upgrading = new Upgrade( |
| 252 | initialized.source(), |
| 253 | initialized.target(), |
| 254 | Upgrade.Status.UPGRADING); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 255 | changeState(initialized, upgrading); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 256 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 257 | // Set the upgrade status to UPGRADED. |
| 258 | Upgrade upgraded = new Upgrade( |
| 259 | upgrading.source(), |
| 260 | upgrading.target(), |
| 261 | Upgrade.Status.UPGRADED); |
| 262 | changeState(upgrading, upgraded); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 263 | } |
| 264 | |
| 265 | @Override |
| 266 | public void commit() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 267 | Upgrade upgraded = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 268 | |
| 269 | // If the current upgrade status is not UPGRADED, throw an exception. |
| 270 | if (upgraded.status() != Upgrade.Status.UPGRADED) { |
| 271 | throw new IllegalStateException("Upgrade not performed"); |
| 272 | } |
| 273 | |
| 274 | // Determine whether any nodes have not been upgraded to the target version. |
Jordan Halterman | 28183ee | 2017-10-17 17:29:10 -0700 | [diff] [blame] | 275 | boolean upgradeComplete = membershipService.getGroups().size() == 1 |
| 276 | && membershipService.getLocalGroup().version().equals(upgraded.target()); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 277 | |
| 278 | // If some nodes have not yet been upgraded, throw an exception. |
| 279 | if (!upgradeComplete) { |
| 280 | throw new IllegalStateException("Some nodes have not yet been upgraded to version " + upgraded.target()); |
| 281 | } |
| 282 | |
| 283 | // Set the upgrade status to COMMITTING. |
| 284 | Upgrade committing = new Upgrade( |
| 285 | upgraded.source(), |
| 286 | upgraded.target(), |
| 287 | Upgrade.Status.COMMITTING); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 288 | changeState(upgraded, committing); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 289 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 290 | // Set the upgrade status to COMMITTED. |
| 291 | Upgrade committed = new Upgrade( |
| 292 | committing.source(), |
| 293 | committing.target(), |
| 294 | Upgrade.Status.COMMITTED); |
| 295 | changeState(committing, committed); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 296 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 297 | // Set the upgrade status to INACTIVE. |
| 298 | Upgrade inactive = new Upgrade( |
| 299 | localVersion, |
| 300 | localVersion, |
| 301 | Upgrade.Status.INACTIVE); |
| 302 | changeState(committed, inactive); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 303 | } |
| 304 | |
| 305 | @Override |
| 306 | public void rollback() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 307 | Upgrade upgraded = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 308 | |
| 309 | // If the current upgrade status is not UPGRADED, throw an exception. |
| 310 | if (upgraded.status() != Upgrade.Status.UPGRADED) { |
| 311 | throw new IllegalStateException("Upgrade not performed"); |
| 312 | } |
| 313 | |
| 314 | // Set the upgrade status to ROLLING_BACK. |
| 315 | Upgrade rollingBack = new Upgrade( |
| 316 | upgraded.source(), |
| 317 | upgraded.target(), |
| 318 | Upgrade.Status.ROLLING_BACK); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 319 | changeState(upgraded, rollingBack); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 320 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 321 | // Set the upgrade status to ROLLED_BACK. |
| 322 | Upgrade rolledBack = new Upgrade( |
| 323 | rollingBack.source(), |
| 324 | rollingBack.target(), |
| 325 | Upgrade.Status.ROLLED_BACK); |
| 326 | changeState(rollingBack, rolledBack); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 327 | } |
| 328 | |
| 329 | @Override |
| 330 | public void reset() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 331 | Upgrade upgraded = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 332 | |
| 333 | // If the current upgrade status is not INITIALIZED or ROLLED_BACK, throw an exception. |
| 334 | if (upgraded.status() != Upgrade.Status.INITIALIZED |
| 335 | && upgraded.status() != Upgrade.Status.ROLLED_BACK) { |
| 336 | throw new IllegalStateException("Upgrade not rolled back"); |
| 337 | } |
| 338 | |
| 339 | // Determine whether any nodes are still running the target version. |
Jordan Halterman | 28183ee | 2017-10-17 17:29:10 -0700 | [diff] [blame] | 340 | boolean rollbackComplete = membershipService.getGroups().size() == 1 |
| 341 | && membershipService.getLocalGroup().version().equals(upgraded.source()); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 342 | |
| 343 | // If some nodes have not yet been downgraded, throw an exception. |
| 344 | if (!rollbackComplete) { |
| 345 | throw new IllegalStateException("Some nodes have not yet been downgraded to version " + upgraded.source()); |
| 346 | } |
| 347 | |
| 348 | // Set the upgrade status to RESETTING. |
| 349 | Upgrade resetting = new Upgrade( |
| 350 | upgraded.source(), |
| 351 | upgraded.target(), |
| 352 | Upgrade.Status.RESETTING); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 353 | changeState(upgraded, resetting); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 354 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 355 | // Set the upgrade status to RESET. |
| 356 | Upgrade reset = new Upgrade( |
| 357 | resetting.source(), |
| 358 | resetting.target(), |
| 359 | Upgrade.Status.RESET); |
| 360 | changeState(resetting, reset); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 361 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 362 | // Set the upgrade status to INACTIVE. |
| 363 | Upgrade inactive = new Upgrade( |
| 364 | localVersion, |
| 365 | localVersion, |
| 366 | Upgrade.Status.INACTIVE); |
| 367 | changeState(reset, inactive); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 368 | } |
| 369 | |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 370 | /** |
| 371 | * Handles a cluster event. |
| 372 | * |
| 373 | * @param event the cluster event |
| 374 | */ |
| 375 | protected void handleClusterEvent(ClusterEvent event) { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 376 | checkPermission(CLUSTER_EVENT); |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 377 | // If an instance was deactivated, check whether we need to roll back the upgrade. |
| 378 | if (event.type() == ClusterEvent.Type.INSTANCE_DEACTIVATED) { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 379 | Upgrade upgrade = getState(); |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 380 | if (upgrade.status().upgraded()) { |
| 381 | // Get the upgraded subset of the cluster and check whether the down node is a member |
| 382 | // of the upgraded subset. If so, roll back the upgrade to tolerate the failure. |
| 383 | Set<NodeId> upgradedNodes = clusterService.getNodes().stream() |
| 384 | .map(ControllerNode::id) |
| 385 | .filter(id -> clusterService.getVersion(id).equals(upgrade.target())) |
| 386 | .collect(Collectors.toSet()); |
| 387 | if (upgradedNodes.contains(event.subject().id())) { |
Jordan Halterman | 3c65d1b | 2018-01-09 13:01:03 -0800 | [diff] [blame] | 388 | log.warn("Upgrade failure detected: rolling back upgrade"); |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 389 | rollback(); |
| 390 | } |
| 391 | } |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | /** |
| 396 | * Handles an upgrade state event. |
| 397 | * |
| 398 | * @param event the upgrade value event |
| 399 | */ |
| 400 | protected void handleUpgradeEvent(AtomicValueEvent<Upgrade> event) { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 401 | checkPermission(UPGRADE_EVENT); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 402 | currentState.set(event.newValue()); |
| 403 | switch (event.newValue().status()) { |
| 404 | case INITIALIZED: |
| 405 | post(new UpgradeEvent(UpgradeEvent.Type.INITIALIZED, event.newValue())); |
| 406 | break; |
| 407 | case UPGRADED: |
| 408 | post(new UpgradeEvent(UpgradeEvent.Type.UPGRADED, event.newValue())); |
| 409 | break; |
| 410 | case COMMITTED: |
| 411 | post(new UpgradeEvent(UpgradeEvent.Type.COMMITTED, event.newValue())); |
| 412 | break; |
| 413 | case ROLLED_BACK: |
| 414 | post(new UpgradeEvent(UpgradeEvent.Type.ROLLED_BACK, event.newValue())); |
| 415 | break; |
| 416 | case RESET: |
| 417 | post(new UpgradeEvent(UpgradeEvent.Type.RESET, event.newValue())); |
| 418 | break; |
| 419 | default: |
| 420 | break; |
| 421 | } |
| 422 | } |
| 423 | } |