blob: 99f1345d53b82f266295b74e53245f9bdfa02707 [file] [log] [blame]
Jordan Halterman980a8c12017-09-22 18:01:19 -07001/*
2 * Copyright 2017-present Open Networking Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.upgrade.impl;
17
Jordan Halterman5ca07932017-10-07 13:28:22 -070018import org.onosproject.cluster.ClusterEvent;
19import org.onosproject.cluster.ClusterEventListener;
Jordan Halterman28183ee2017-10-17 17:29:10 -070020import org.onosproject.cluster.ClusterService;
Jordan Halterman980a8c12017-09-22 18:01:19 -070021import org.onosproject.cluster.ControllerNode;
Jordan Halterman28183ee2017-10-17 17:29:10 -070022import org.onosproject.cluster.MembershipService;
Jordan Halterman5ca07932017-10-07 13:28:22 -070023import org.onosproject.cluster.NodeId;
Jordan Halterman980a8c12017-09-22 18:01:19 -070024import org.onosproject.core.Version;
25import org.onosproject.core.VersionService;
26import org.onosproject.event.AbstractListenerManager;
27import org.onosproject.store.serializers.KryoNamespaces;
28import org.onosproject.store.service.AtomicValue;
29import org.onosproject.store.service.AtomicValueEvent;
30import org.onosproject.store.service.AtomicValueEventListener;
31import org.onosproject.store.service.CoordinationService;
32import org.onosproject.store.service.Serializer;
33import org.onosproject.upgrade.Upgrade;
34import org.onosproject.upgrade.UpgradeAdminService;
35import org.onosproject.upgrade.UpgradeEvent;
36import org.onosproject.upgrade.UpgradeEventListener;
37import org.onosproject.upgrade.UpgradeService;
Ray Milkeyd84f89b2018-08-17 14:54:17 -070038import org.osgi.service.component.annotations.Activate;
39import org.osgi.service.component.annotations.Component;
40import org.osgi.service.component.annotations.Deactivate;
41import org.osgi.service.component.annotations.Reference;
42import org.osgi.service.component.annotations.ReferenceCardinality;
Jordan Halterman980a8c12017-09-22 18:01:19 -070043import org.slf4j.Logger;
44
Ray Milkeyd84f89b2018-08-17 14:54:17 -070045import java.util.Objects;
46import java.util.Set;
47import java.util.concurrent.atomic.AtomicReference;
48import java.util.stream.Collectors;
49
slowr0a44fde2017-10-09 14:48:53 -070050import static org.onosproject.security.AppGuard.checkPermission;
Jordan Halterman07f052b2017-10-08 14:22:41 -070051import static org.onosproject.security.AppPermission.Type.CLUSTER_EVENT;
52import static org.onosproject.security.AppPermission.Type.UPGRADE_EVENT;
53import static org.onosproject.security.AppPermission.Type.UPGRADE_READ;
54import static org.onosproject.security.AppPermission.Type.UPGRADE_WRITE;
Jordan Halterman980a8c12017-09-22 18:01:19 -070055import static org.slf4j.LoggerFactory.getLogger;
56
57/**
58 * Upgrade service implementation.
59 * <p>
60 * This implementation uses the {@link CoordinationService} to store upgrade state in a version-agnostic primitive.
61 * Upgrade state can be seen by current and future version nodes.
62 */
Ray Milkeyd84f89b2018-08-17 14:54:17 -070063@Component(immediate = true, service = { UpgradeService.class, UpgradeAdminService.class })
Jordan Halterman980a8c12017-09-22 18:01:19 -070064public class UpgradeManager
65 extends AbstractListenerManager<UpgradeEvent, UpgradeEventListener>
66 implements UpgradeService, UpgradeAdminService {
67
68 private final Logger log = getLogger(getClass());
69
Ray Milkeyd84f89b2018-08-17 14:54:17 -070070 @Reference(cardinality = ReferenceCardinality.MANDATORY)
Jordan Halterman980a8c12017-09-22 18:01:19 -070071 protected VersionService versionService;
72
Ray Milkeyd84f89b2018-08-17 14:54:17 -070073 @Reference(cardinality = ReferenceCardinality.MANDATORY)
Jordan Halterman980a8c12017-09-22 18:01:19 -070074 protected CoordinationService coordinationService;
75
Ray Milkeyd84f89b2018-08-17 14:54:17 -070076 @Reference(cardinality = ReferenceCardinality.MANDATORY)
Jordan Halterman28183ee2017-10-17 17:29:10 -070077 protected ClusterService clusterService;
78
Ray Milkeyd84f89b2018-08-17 14:54:17 -070079 @Reference(cardinality = ReferenceCardinality.MANDATORY)
Jordan Halterman28183ee2017-10-17 17:29:10 -070080 protected MembershipService membershipService;
Jordan Halterman980a8c12017-09-22 18:01:19 -070081
82 private Version localVersion;
83 private AtomicValue<Upgrade> state;
84 private final AtomicReference<Upgrade> currentState = new AtomicReference<>();
slowr0a44fde2017-10-09 14:48:53 -070085 private final AtomicValueEventListener<Upgrade> stateListener = this::handleUpgradeEvent;
86 private final ClusterEventListener clusterListener = this::handleClusterEvent;
Jordan Halterman980a8c12017-09-22 18:01:19 -070087
88 @Activate
89 public void activate() {
Jordan Halterman07f052b2017-10-08 14:22:41 -070090 eventDispatcher.addSink(UpgradeEvent.class, listenerRegistry);
91
Jordan Halterman980a8c12017-09-22 18:01:19 -070092 state = coordinationService.<Upgrade>atomicValueBuilder()
93 .withName("onos-upgrade-state")
94 .withSerializer(Serializer.using(KryoNamespaces.API))
95 .build()
96 .asAtomicValue();
97 localVersion = versionService.version();
98
99 currentState.set(state.get());
slowr0a44fde2017-10-09 14:48:53 -0700100 if (getState() == null) {
101 initializeState(new Upgrade(localVersion, localVersion, Upgrade.Status.INACTIVE));
Jordan Halterman980a8c12017-09-22 18:01:19 -0700102 }
103
slowr0a44fde2017-10-09 14:48:53 -0700104 Upgrade upgrade = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700105
Jordan Halterman980a8c12017-09-22 18:01:19 -0700106 // If the upgrade state is initialized then check the node version.
107 if (upgrade.status() == Upgrade.Status.INITIALIZED) {
108 // If the source version equals the target version, attempt to update the target version.
109 if (Objects.equals(upgrade.source(), upgrade.target()) && !Objects.equals(upgrade.target(), localVersion)) {
slowr0a44fde2017-10-09 14:48:53 -0700110 checkPermission(UPGRADE_WRITE);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700111 upgrade = new Upgrade(upgrade.source(), localVersion, upgrade.status());
slowr0a44fde2017-10-09 14:48:53 -0700112 initializeState(upgrade);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700113 }
114 }
115
116 // If the upgrade status is active, verify that the local version matches the upgrade version.
117 if (upgrade.status().active() && !Objects.equals(upgrade.source(), upgrade.target())) {
118 // If the upgrade source/target are not equal, validate that the node's version is consistent
119 // with versions in the upgrade. There are two possibilities: that a not-yet-upgraded node is being
120 // restarted, or that a node has been upgraded, so we need to check that this node is running either
121 // the source or target version.
122 if (!Objects.equals(localVersion, upgrade.source()) && !Objects.equals(localVersion, upgrade.target())) {
123 log.error("Cannot upgrade node to version {}; Upgrade to {} already in progress",
124 localVersion, upgrade.target());
125 throw new IllegalStateException("Cannot upgrade node to version " + localVersion + "; Upgrade to " +
126 upgrade.target() + " already in progress");
127 }
128 }
129
130 state.addListener(stateListener);
Jordan Halterman5ca07932017-10-07 13:28:22 -0700131 clusterService.addListener(clusterListener);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700132 log.info("Started");
133 }
134
135 @Deactivate
136 public void deactivate() {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700137 eventDispatcher.removeSink(UpgradeEvent.class);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700138 state.removeListener(stateListener);
Jordan Halterman5ca07932017-10-07 13:28:22 -0700139 clusterService.removeListener(clusterListener);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700140 log.info("Stopped");
141 }
142
slowr0a44fde2017-10-09 14:48:53 -0700143 /**
144 * Initializes the state when the cluster starts.
145 * <p>
146 * This method must be called when updating the state in order to check the permissions
147 *
148 * @param newState new state
149 */
150 private void initializeState(Upgrade newState) {
151 checkPermission(UPGRADE_WRITE);
152 currentState.set(newState);
153 state.set(newState);
154 }
155
156 /**
157 * Changes the current state to new one.
158 * <p>
159 * This method must be called when changing between states in order to check the permissions and
160 * to avoid concurrent state modifications
161 *
162 * @param oldState current upgrade state
163 * @param newState new upgrade state
164 *
165 * @throws IllegalStateException if an upgrade is already in progress
166 */
167 private void changeState(Upgrade oldState, Upgrade newState) {
168 checkPermission(UPGRADE_WRITE);
169 if (!state.compareAndSet(oldState, newState)) {
170 throw new IllegalStateException("Concurrent upgrade modification");
171 } else {
172 currentState.set(newState);
173 }
174 }
175
176 @Override
177 public Upgrade getState() {
178 checkPermission(UPGRADE_READ);
179 return currentState.get();
180 }
181
Jordan Halterman980a8c12017-09-22 18:01:19 -0700182 @Override
183 public boolean isUpgrading() {
184 return getState().status().active();
185 }
186
187 @Override
Jordan Halterman980a8c12017-09-22 18:01:19 -0700188 public Version getVersion() {
slowr0a44fde2017-10-09 14:48:53 -0700189 Upgrade upgrade = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700190 return upgrade.status().upgraded()
191 ? upgrade.target()
192 : upgrade.source();
193 }
194
195 @Override
196 public boolean isLocalActive() {
197 return localVersion.equals(getVersion());
198 }
199
200 @Override
201 public boolean isLocalUpgraded() {
slowr0a44fde2017-10-09 14:48:53 -0700202 Upgrade upgrade = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700203 return upgrade.status().active()
204 && !upgrade.source().equals(upgrade.target())
205 && localVersion.equals(upgrade.target());
206 }
207
208 @Override
209 public void initialize() {
slowr0a44fde2017-10-09 14:48:53 -0700210 Upgrade inactive = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700211
212 // If the current upgrade status is active, fail initialization.
213 if (inactive.status().active()) {
214 throw new IllegalStateException("Upgrade already active");
215 }
216
217 // Set the upgrade status to INITIALIZING.
218 Upgrade initializing = new Upgrade(
219 localVersion,
220 localVersion,
221 Upgrade.Status.INITIALIZING);
slowr0a44fde2017-10-09 14:48:53 -0700222 changeState(inactive, initializing);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700223
slowr0a44fde2017-10-09 14:48:53 -0700224 // Set the upgrade status to INITIALIZED.
225 Upgrade initialized = new Upgrade(
226 initializing.source(),
227 initializing.target(),
228 Upgrade.Status.INITIALIZED);
229 changeState(initializing, initialized);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700230 }
231
232 @Override
233 public void upgrade() {
slowr0a44fde2017-10-09 14:48:53 -0700234 Upgrade initialized = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700235
236 // If the current upgrade status is not INITIALIZED, throw an exception.
237 if (initialized.status() != Upgrade.Status.INITIALIZED) {
238 throw new IllegalStateException("Upgrade not initialized");
239 }
240
241 // Set the upgrade status to UPGRADING.
242 Upgrade upgrading = new Upgrade(
243 initialized.source(),
244 initialized.target(),
245 Upgrade.Status.UPGRADING);
slowr0a44fde2017-10-09 14:48:53 -0700246 changeState(initialized, upgrading);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700247
slowr0a44fde2017-10-09 14:48:53 -0700248 // Set the upgrade status to UPGRADED.
249 Upgrade upgraded = new Upgrade(
250 upgrading.source(),
251 upgrading.target(),
252 Upgrade.Status.UPGRADED);
253 changeState(upgrading, upgraded);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700254 }
255
256 @Override
257 public void commit() {
slowr0a44fde2017-10-09 14:48:53 -0700258 Upgrade upgraded = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700259
260 // If the current upgrade status is not UPGRADED, throw an exception.
261 if (upgraded.status() != Upgrade.Status.UPGRADED) {
262 throw new IllegalStateException("Upgrade not performed");
263 }
264
265 // Determine whether any nodes have not been upgraded to the target version.
Jordan Halterman28183ee2017-10-17 17:29:10 -0700266 boolean upgradeComplete = membershipService.getGroups().size() == 1
267 && membershipService.getLocalGroup().version().equals(upgraded.target());
Jordan Halterman980a8c12017-09-22 18:01:19 -0700268
269 // If some nodes have not yet been upgraded, throw an exception.
270 if (!upgradeComplete) {
271 throw new IllegalStateException("Some nodes have not yet been upgraded to version " + upgraded.target());
272 }
273
274 // Set the upgrade status to COMMITTING.
275 Upgrade committing = new Upgrade(
276 upgraded.source(),
277 upgraded.target(),
278 Upgrade.Status.COMMITTING);
slowr0a44fde2017-10-09 14:48:53 -0700279 changeState(upgraded, committing);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700280
slowr0a44fde2017-10-09 14:48:53 -0700281 // Set the upgrade status to COMMITTED.
282 Upgrade committed = new Upgrade(
283 committing.source(),
284 committing.target(),
285 Upgrade.Status.COMMITTED);
286 changeState(committing, committed);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700287
slowr0a44fde2017-10-09 14:48:53 -0700288 // Set the upgrade status to INACTIVE.
289 Upgrade inactive = new Upgrade(
290 localVersion,
291 localVersion,
292 Upgrade.Status.INACTIVE);
293 changeState(committed, inactive);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700294 }
295
296 @Override
297 public void rollback() {
slowr0a44fde2017-10-09 14:48:53 -0700298 Upgrade upgraded = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700299
300 // If the current upgrade status is not UPGRADED, throw an exception.
301 if (upgraded.status() != Upgrade.Status.UPGRADED) {
302 throw new IllegalStateException("Upgrade not performed");
303 }
304
305 // Set the upgrade status to ROLLING_BACK.
306 Upgrade rollingBack = new Upgrade(
307 upgraded.source(),
308 upgraded.target(),
309 Upgrade.Status.ROLLING_BACK);
slowr0a44fde2017-10-09 14:48:53 -0700310 changeState(upgraded, rollingBack);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700311
slowr0a44fde2017-10-09 14:48:53 -0700312 // Set the upgrade status to ROLLED_BACK.
313 Upgrade rolledBack = new Upgrade(
314 rollingBack.source(),
315 rollingBack.target(),
316 Upgrade.Status.ROLLED_BACK);
317 changeState(rollingBack, rolledBack);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700318 }
319
320 @Override
321 public void reset() {
slowr0a44fde2017-10-09 14:48:53 -0700322 Upgrade upgraded = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700323
324 // If the current upgrade status is not INITIALIZED or ROLLED_BACK, throw an exception.
325 if (upgraded.status() != Upgrade.Status.INITIALIZED
326 && upgraded.status() != Upgrade.Status.ROLLED_BACK) {
327 throw new IllegalStateException("Upgrade not rolled back");
328 }
329
330 // Determine whether any nodes are still running the target version.
Jordan Halterman28183ee2017-10-17 17:29:10 -0700331 boolean rollbackComplete = membershipService.getGroups().size() == 1
332 && membershipService.getLocalGroup().version().equals(upgraded.source());
Jordan Halterman980a8c12017-09-22 18:01:19 -0700333
334 // If some nodes have not yet been downgraded, throw an exception.
335 if (!rollbackComplete) {
336 throw new IllegalStateException("Some nodes have not yet been downgraded to version " + upgraded.source());
337 }
338
339 // Set the upgrade status to RESETTING.
340 Upgrade resetting = new Upgrade(
341 upgraded.source(),
342 upgraded.target(),
343 Upgrade.Status.RESETTING);
slowr0a44fde2017-10-09 14:48:53 -0700344 changeState(upgraded, resetting);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700345
slowr0a44fde2017-10-09 14:48:53 -0700346 // Set the upgrade status to RESET.
347 Upgrade reset = new Upgrade(
348 resetting.source(),
349 resetting.target(),
350 Upgrade.Status.RESET);
351 changeState(resetting, reset);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700352
slowr0a44fde2017-10-09 14:48:53 -0700353 // Set the upgrade status to INACTIVE.
354 Upgrade inactive = new Upgrade(
355 localVersion,
356 localVersion,
357 Upgrade.Status.INACTIVE);
358 changeState(reset, inactive);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700359 }
360
Jordan Halterman5ca07932017-10-07 13:28:22 -0700361 /**
362 * Handles a cluster event.
363 *
364 * @param event the cluster event
365 */
366 protected void handleClusterEvent(ClusterEvent event) {
slowr0a44fde2017-10-09 14:48:53 -0700367 checkPermission(CLUSTER_EVENT);
Jordan Halterman5ca07932017-10-07 13:28:22 -0700368 // If an instance was deactivated, check whether we need to roll back the upgrade.
369 if (event.type() == ClusterEvent.Type.INSTANCE_DEACTIVATED) {
slowr0a44fde2017-10-09 14:48:53 -0700370 Upgrade upgrade = getState();
Jordan Halterman5ca07932017-10-07 13:28:22 -0700371 if (upgrade.status().upgraded()) {
372 // Get the upgraded subset of the cluster and check whether the down node is a member
373 // of the upgraded subset. If so, roll back the upgrade to tolerate the failure.
374 Set<NodeId> upgradedNodes = clusterService.getNodes().stream()
375 .map(ControllerNode::id)
376 .filter(id -> clusterService.getVersion(id).equals(upgrade.target()))
377 .collect(Collectors.toSet());
378 if (upgradedNodes.contains(event.subject().id())) {
Jordan Halterman3c65d1b2018-01-09 13:01:03 -0800379 log.warn("Upgrade failure detected: rolling back upgrade");
Jordan Halterman5ca07932017-10-07 13:28:22 -0700380 rollback();
381 }
382 }
383 }
384 }
385
386 /**
387 * Handles an upgrade state event.
388 *
389 * @param event the upgrade value event
390 */
391 protected void handleUpgradeEvent(AtomicValueEvent<Upgrade> event) {
slowr0a44fde2017-10-09 14:48:53 -0700392 checkPermission(UPGRADE_EVENT);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700393 currentState.set(event.newValue());
394 switch (event.newValue().status()) {
395 case INITIALIZED:
396 post(new UpgradeEvent(UpgradeEvent.Type.INITIALIZED, event.newValue()));
397 break;
398 case UPGRADED:
399 post(new UpgradeEvent(UpgradeEvent.Type.UPGRADED, event.newValue()));
400 break;
401 case COMMITTED:
402 post(new UpgradeEvent(UpgradeEvent.Type.COMMITTED, event.newValue()));
403 break;
404 case ROLLED_BACK:
405 post(new UpgradeEvent(UpgradeEvent.Type.ROLLED_BACK, event.newValue()));
406 break;
407 case RESET:
408 post(new UpgradeEvent(UpgradeEvent.Type.RESET, event.newValue()));
409 break;
410 default:
411 break;
412 }
413 }
414}