Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2017-present Open Networking Foundation |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | package org.onosproject.upgrade.impl; |
| 17 | |
| 18 | import java.util.Objects; |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 19 | import java.util.Set; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 20 | import java.util.concurrent.atomic.AtomicReference; |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 21 | import java.util.stream.Collectors; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 22 | |
| 23 | import org.apache.felix.scr.annotations.Activate; |
| 24 | import org.apache.felix.scr.annotations.Component; |
| 25 | import org.apache.felix.scr.annotations.Deactivate; |
| 26 | import org.apache.felix.scr.annotations.Reference; |
| 27 | import org.apache.felix.scr.annotations.ReferenceCardinality; |
| 28 | import org.apache.felix.scr.annotations.Service; |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 29 | import org.onosproject.cluster.ClusterEvent; |
| 30 | import org.onosproject.cluster.ClusterEventListener; |
Jordan Halterman | 28183ee | 2017-10-17 17:29:10 -0700 | [diff] [blame] | 31 | import org.onosproject.cluster.ClusterService; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 32 | import org.onosproject.cluster.ControllerNode; |
Jordan Halterman | 28183ee | 2017-10-17 17:29:10 -0700 | [diff] [blame] | 33 | import org.onosproject.cluster.MembershipService; |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 34 | import org.onosproject.cluster.NodeId; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 35 | import org.onosproject.core.Version; |
| 36 | import org.onosproject.core.VersionService; |
| 37 | import org.onosproject.event.AbstractListenerManager; |
| 38 | import org.onosproject.store.serializers.KryoNamespaces; |
| 39 | import org.onosproject.store.service.AtomicValue; |
| 40 | import org.onosproject.store.service.AtomicValueEvent; |
| 41 | import org.onosproject.store.service.AtomicValueEventListener; |
| 42 | import org.onosproject.store.service.CoordinationService; |
| 43 | import org.onosproject.store.service.Serializer; |
| 44 | import org.onosproject.upgrade.Upgrade; |
| 45 | import org.onosproject.upgrade.UpgradeAdminService; |
| 46 | import org.onosproject.upgrade.UpgradeEvent; |
| 47 | import org.onosproject.upgrade.UpgradeEventListener; |
| 48 | import org.onosproject.upgrade.UpgradeService; |
| 49 | import org.slf4j.Logger; |
| 50 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 51 | import static org.onosproject.security.AppGuard.checkPermission; |
Jordan Halterman | 07f052b | 2017-10-08 14:22:41 -0700 | [diff] [blame] | 52 | import static org.onosproject.security.AppPermission.Type.CLUSTER_EVENT; |
| 53 | import static org.onosproject.security.AppPermission.Type.UPGRADE_EVENT; |
| 54 | import static org.onosproject.security.AppPermission.Type.UPGRADE_READ; |
| 55 | import static org.onosproject.security.AppPermission.Type.UPGRADE_WRITE; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 56 | import static org.slf4j.LoggerFactory.getLogger; |
| 57 | |
| 58 | /** |
| 59 | * Upgrade service implementation. |
| 60 | * <p> |
| 61 | * This implementation uses the {@link CoordinationService} to store upgrade state in a version-agnostic primitive. |
| 62 | * Upgrade state can be seen by current and future version nodes. |
| 63 | */ |
| 64 | @Component(immediate = true) |
| 65 | @Service |
| 66 | public class UpgradeManager |
| 67 | extends AbstractListenerManager<UpgradeEvent, UpgradeEventListener> |
| 68 | implements UpgradeService, UpgradeAdminService { |
| 69 | |
| 70 | private final Logger log = getLogger(getClass()); |
| 71 | |
| 72 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
| 73 | protected VersionService versionService; |
| 74 | |
| 75 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
| 76 | protected CoordinationService coordinationService; |
| 77 | |
| 78 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
Jordan Halterman | 28183ee | 2017-10-17 17:29:10 -0700 | [diff] [blame] | 79 | protected ClusterService clusterService; |
| 80 | |
| 81 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
| 82 | protected MembershipService membershipService; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 83 | |
| 84 | private Version localVersion; |
| 85 | private AtomicValue<Upgrade> state; |
| 86 | private final AtomicReference<Upgrade> currentState = new AtomicReference<>(); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 87 | private final AtomicValueEventListener<Upgrade> stateListener = this::handleUpgradeEvent; |
| 88 | private final ClusterEventListener clusterListener = this::handleClusterEvent; |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 89 | |
| 90 | @Activate |
| 91 | public void activate() { |
Jordan Halterman | 07f052b | 2017-10-08 14:22:41 -0700 | [diff] [blame] | 92 | eventDispatcher.addSink(UpgradeEvent.class, listenerRegistry); |
| 93 | |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 94 | state = coordinationService.<Upgrade>atomicValueBuilder() |
| 95 | .withName("onos-upgrade-state") |
| 96 | .withSerializer(Serializer.using(KryoNamespaces.API)) |
| 97 | .build() |
| 98 | .asAtomicValue(); |
| 99 | localVersion = versionService.version(); |
| 100 | |
| 101 | currentState.set(state.get()); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 102 | if (getState() == null) { |
| 103 | initializeState(new Upgrade(localVersion, localVersion, Upgrade.Status.INACTIVE)); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 104 | } |
| 105 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 106 | Upgrade upgrade = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 107 | |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 108 | // If the upgrade state is initialized then check the node version. |
| 109 | if (upgrade.status() == Upgrade.Status.INITIALIZED) { |
| 110 | // If the source version equals the target version, attempt to update the target version. |
| 111 | if (Objects.equals(upgrade.source(), upgrade.target()) && !Objects.equals(upgrade.target(), localVersion)) { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 112 | checkPermission(UPGRADE_WRITE); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 113 | upgrade = new Upgrade(upgrade.source(), localVersion, upgrade.status()); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 114 | initializeState(upgrade); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 115 | } |
| 116 | } |
| 117 | |
| 118 | // If the upgrade status is active, verify that the local version matches the upgrade version. |
| 119 | if (upgrade.status().active() && !Objects.equals(upgrade.source(), upgrade.target())) { |
| 120 | // If the upgrade source/target are not equal, validate that the node's version is consistent |
| 121 | // with versions in the upgrade. There are two possibilities: that a not-yet-upgraded node is being |
| 122 | // restarted, or that a node has been upgraded, so we need to check that this node is running either |
| 123 | // the source or target version. |
| 124 | if (!Objects.equals(localVersion, upgrade.source()) && !Objects.equals(localVersion, upgrade.target())) { |
| 125 | log.error("Cannot upgrade node to version {}; Upgrade to {} already in progress", |
| 126 | localVersion, upgrade.target()); |
| 127 | throw new IllegalStateException("Cannot upgrade node to version " + localVersion + "; Upgrade to " + |
| 128 | upgrade.target() + " already in progress"); |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | state.addListener(stateListener); |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 133 | clusterService.addListener(clusterListener); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 134 | log.info("Started"); |
| 135 | } |
| 136 | |
| 137 | @Deactivate |
| 138 | public void deactivate() { |
Jordan Halterman | 07f052b | 2017-10-08 14:22:41 -0700 | [diff] [blame] | 139 | eventDispatcher.removeSink(UpgradeEvent.class); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 140 | state.removeListener(stateListener); |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 141 | clusterService.removeListener(clusterListener); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 142 | log.info("Stopped"); |
| 143 | } |
| 144 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 145 | /** |
| 146 | * Initializes the state when the cluster starts. |
| 147 | * <p> |
| 148 | * This method must be called when updating the state in order to check the permissions |
| 149 | * |
| 150 | * @param newState new state |
| 151 | */ |
| 152 | private void initializeState(Upgrade newState) { |
| 153 | checkPermission(UPGRADE_WRITE); |
| 154 | currentState.set(newState); |
| 155 | state.set(newState); |
| 156 | } |
| 157 | |
| 158 | /** |
| 159 | * Changes the current state to new one. |
| 160 | * <p> |
| 161 | * This method must be called when changing between states in order to check the permissions and |
| 162 | * to avoid concurrent state modifications |
| 163 | * |
| 164 | * @param oldState current upgrade state |
| 165 | * @param newState new upgrade state |
| 166 | * |
| 167 | * @throws IllegalStateException if an upgrade is already in progress |
| 168 | */ |
| 169 | private void changeState(Upgrade oldState, Upgrade newState) { |
| 170 | checkPermission(UPGRADE_WRITE); |
| 171 | if (!state.compareAndSet(oldState, newState)) { |
| 172 | throw new IllegalStateException("Concurrent upgrade modification"); |
| 173 | } else { |
| 174 | currentState.set(newState); |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | @Override |
| 179 | public Upgrade getState() { |
| 180 | checkPermission(UPGRADE_READ); |
| 181 | return currentState.get(); |
| 182 | } |
| 183 | |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 184 | @Override |
| 185 | public boolean isUpgrading() { |
| 186 | return getState().status().active(); |
| 187 | } |
| 188 | |
| 189 | @Override |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 190 | public Version getVersion() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 191 | Upgrade upgrade = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 192 | return upgrade.status().upgraded() |
| 193 | ? upgrade.target() |
| 194 | : upgrade.source(); |
| 195 | } |
| 196 | |
| 197 | @Override |
| 198 | public boolean isLocalActive() { |
| 199 | return localVersion.equals(getVersion()); |
| 200 | } |
| 201 | |
| 202 | @Override |
| 203 | public boolean isLocalUpgraded() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 204 | Upgrade upgrade = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 205 | return upgrade.status().active() |
| 206 | && !upgrade.source().equals(upgrade.target()) |
| 207 | && localVersion.equals(upgrade.target()); |
| 208 | } |
| 209 | |
| 210 | @Override |
| 211 | public void initialize() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 212 | Upgrade inactive = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 213 | |
| 214 | // If the current upgrade status is active, fail initialization. |
| 215 | if (inactive.status().active()) { |
| 216 | throw new IllegalStateException("Upgrade already active"); |
| 217 | } |
| 218 | |
| 219 | // Set the upgrade status to INITIALIZING. |
| 220 | Upgrade initializing = new Upgrade( |
| 221 | localVersion, |
| 222 | localVersion, |
| 223 | Upgrade.Status.INITIALIZING); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 224 | changeState(inactive, initializing); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 225 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 226 | // Set the upgrade status to INITIALIZED. |
| 227 | Upgrade initialized = new Upgrade( |
| 228 | initializing.source(), |
| 229 | initializing.target(), |
| 230 | Upgrade.Status.INITIALIZED); |
| 231 | changeState(initializing, initialized); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 232 | } |
| 233 | |
| 234 | @Override |
| 235 | public void upgrade() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 236 | Upgrade initialized = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 237 | |
| 238 | // If the current upgrade status is not INITIALIZED, throw an exception. |
| 239 | if (initialized.status() != Upgrade.Status.INITIALIZED) { |
| 240 | throw new IllegalStateException("Upgrade not initialized"); |
| 241 | } |
| 242 | |
| 243 | // Set the upgrade status to UPGRADING. |
| 244 | Upgrade upgrading = new Upgrade( |
| 245 | initialized.source(), |
| 246 | initialized.target(), |
| 247 | Upgrade.Status.UPGRADING); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 248 | changeState(initialized, upgrading); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 249 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 250 | // Set the upgrade status to UPGRADED. |
| 251 | Upgrade upgraded = new Upgrade( |
| 252 | upgrading.source(), |
| 253 | upgrading.target(), |
| 254 | Upgrade.Status.UPGRADED); |
| 255 | changeState(upgrading, upgraded); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 256 | } |
| 257 | |
| 258 | @Override |
| 259 | public void commit() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 260 | Upgrade upgraded = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 261 | |
| 262 | // If the current upgrade status is not UPGRADED, throw an exception. |
| 263 | if (upgraded.status() != Upgrade.Status.UPGRADED) { |
| 264 | throw new IllegalStateException("Upgrade not performed"); |
| 265 | } |
| 266 | |
| 267 | // Determine whether any nodes have not been upgraded to the target version. |
Jordan Halterman | 28183ee | 2017-10-17 17:29:10 -0700 | [diff] [blame] | 268 | boolean upgradeComplete = membershipService.getGroups().size() == 1 |
| 269 | && membershipService.getLocalGroup().version().equals(upgraded.target()); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 270 | |
| 271 | // If some nodes have not yet been upgraded, throw an exception. |
| 272 | if (!upgradeComplete) { |
| 273 | throw new IllegalStateException("Some nodes have not yet been upgraded to version " + upgraded.target()); |
| 274 | } |
| 275 | |
| 276 | // Set the upgrade status to COMMITTING. |
| 277 | Upgrade committing = new Upgrade( |
| 278 | upgraded.source(), |
| 279 | upgraded.target(), |
| 280 | Upgrade.Status.COMMITTING); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 281 | changeState(upgraded, committing); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 282 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 283 | // Set the upgrade status to COMMITTED. |
| 284 | Upgrade committed = new Upgrade( |
| 285 | committing.source(), |
| 286 | committing.target(), |
| 287 | Upgrade.Status.COMMITTED); |
| 288 | changeState(committing, committed); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 289 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 290 | // Set the upgrade status to INACTIVE. |
| 291 | Upgrade inactive = new Upgrade( |
| 292 | localVersion, |
| 293 | localVersion, |
| 294 | Upgrade.Status.INACTIVE); |
| 295 | changeState(committed, inactive); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 296 | } |
| 297 | |
| 298 | @Override |
| 299 | public void rollback() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 300 | Upgrade upgraded = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 301 | |
| 302 | // If the current upgrade status is not UPGRADED, throw an exception. |
| 303 | if (upgraded.status() != Upgrade.Status.UPGRADED) { |
| 304 | throw new IllegalStateException("Upgrade not performed"); |
| 305 | } |
| 306 | |
| 307 | // Set the upgrade status to ROLLING_BACK. |
| 308 | Upgrade rollingBack = new Upgrade( |
| 309 | upgraded.source(), |
| 310 | upgraded.target(), |
| 311 | Upgrade.Status.ROLLING_BACK); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 312 | changeState(upgraded, rollingBack); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 313 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 314 | // Set the upgrade status to ROLLED_BACK. |
| 315 | Upgrade rolledBack = new Upgrade( |
| 316 | rollingBack.source(), |
| 317 | rollingBack.target(), |
| 318 | Upgrade.Status.ROLLED_BACK); |
| 319 | changeState(rollingBack, rolledBack); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 320 | } |
| 321 | |
| 322 | @Override |
| 323 | public void reset() { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 324 | Upgrade upgraded = getState(); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 325 | |
| 326 | // If the current upgrade status is not INITIALIZED or ROLLED_BACK, throw an exception. |
| 327 | if (upgraded.status() != Upgrade.Status.INITIALIZED |
| 328 | && upgraded.status() != Upgrade.Status.ROLLED_BACK) { |
| 329 | throw new IllegalStateException("Upgrade not rolled back"); |
| 330 | } |
| 331 | |
| 332 | // Determine whether any nodes are still running the target version. |
Jordan Halterman | 28183ee | 2017-10-17 17:29:10 -0700 | [diff] [blame] | 333 | boolean rollbackComplete = membershipService.getGroups().size() == 1 |
| 334 | && membershipService.getLocalGroup().version().equals(upgraded.source()); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 335 | |
| 336 | // If some nodes have not yet been downgraded, throw an exception. |
| 337 | if (!rollbackComplete) { |
| 338 | throw new IllegalStateException("Some nodes have not yet been downgraded to version " + upgraded.source()); |
| 339 | } |
| 340 | |
| 341 | // Set the upgrade status to RESETTING. |
| 342 | Upgrade resetting = new Upgrade( |
| 343 | upgraded.source(), |
| 344 | upgraded.target(), |
| 345 | Upgrade.Status.RESETTING); |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 346 | changeState(upgraded, resetting); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 347 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 348 | // Set the upgrade status to RESET. |
| 349 | Upgrade reset = new Upgrade( |
| 350 | resetting.source(), |
| 351 | resetting.target(), |
| 352 | Upgrade.Status.RESET); |
| 353 | changeState(resetting, reset); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 354 | |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 355 | // Set the upgrade status to INACTIVE. |
| 356 | Upgrade inactive = new Upgrade( |
| 357 | localVersion, |
| 358 | localVersion, |
| 359 | Upgrade.Status.INACTIVE); |
| 360 | changeState(reset, inactive); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 361 | } |
| 362 | |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 363 | /** |
| 364 | * Handles a cluster event. |
| 365 | * |
| 366 | * @param event the cluster event |
| 367 | */ |
| 368 | protected void handleClusterEvent(ClusterEvent event) { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 369 | checkPermission(CLUSTER_EVENT); |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 370 | // If an instance was deactivated, check whether we need to roll back the upgrade. |
| 371 | if (event.type() == ClusterEvent.Type.INSTANCE_DEACTIVATED) { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 372 | Upgrade upgrade = getState(); |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 373 | if (upgrade.status().upgraded()) { |
| 374 | // Get the upgraded subset of the cluster and check whether the down node is a member |
| 375 | // of the upgraded subset. If so, roll back the upgrade to tolerate the failure. |
| 376 | Set<NodeId> upgradedNodes = clusterService.getNodes().stream() |
| 377 | .map(ControllerNode::id) |
| 378 | .filter(id -> clusterService.getVersion(id).equals(upgrade.target())) |
| 379 | .collect(Collectors.toSet()); |
| 380 | if (upgradedNodes.contains(event.subject().id())) { |
Jordan Halterman | 3c65d1b | 2018-01-09 13:01:03 -0800 | [diff] [blame] | 381 | log.warn("Upgrade failure detected: rolling back upgrade"); |
Jordan Halterman | 5ca0793 | 2017-10-07 13:28:22 -0700 | [diff] [blame] | 382 | rollback(); |
| 383 | } |
| 384 | } |
| 385 | } |
| 386 | } |
| 387 | |
| 388 | /** |
| 389 | * Handles an upgrade state event. |
| 390 | * |
| 391 | * @param event the upgrade value event |
| 392 | */ |
| 393 | protected void handleUpgradeEvent(AtomicValueEvent<Upgrade> event) { |
slowr | 0a44fde | 2017-10-09 14:48:53 -0700 | [diff] [blame] | 394 | checkPermission(UPGRADE_EVENT); |
Jordan Halterman | 980a8c1 | 2017-09-22 18:01:19 -0700 | [diff] [blame] | 395 | currentState.set(event.newValue()); |
| 396 | switch (event.newValue().status()) { |
| 397 | case INITIALIZED: |
| 398 | post(new UpgradeEvent(UpgradeEvent.Type.INITIALIZED, event.newValue())); |
| 399 | break; |
| 400 | case UPGRADED: |
| 401 | post(new UpgradeEvent(UpgradeEvent.Type.UPGRADED, event.newValue())); |
| 402 | break; |
| 403 | case COMMITTED: |
| 404 | post(new UpgradeEvent(UpgradeEvent.Type.COMMITTED, event.newValue())); |
| 405 | break; |
| 406 | case ROLLED_BACK: |
| 407 | post(new UpgradeEvent(UpgradeEvent.Type.ROLLED_BACK, event.newValue())); |
| 408 | break; |
| 409 | case RESET: |
| 410 | post(new UpgradeEvent(UpgradeEvent.Type.RESET, event.newValue())); |
| 411 | break; |
| 412 | default: |
| 413 | break; |
| 414 | } |
| 415 | } |
| 416 | } |