blob: 3910f4bd587639e8770e4c5854e026ab5a06612a [file] [log] [blame]
Jordan Halterman980a8c12017-09-22 18:01:19 -07001/*
2 * Copyright 2017-present Open Networking Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.upgrade.impl;
17
18import java.util.Objects;
Jordan Halterman5ca07932017-10-07 13:28:22 -070019import java.util.Set;
Jordan Halterman980a8c12017-09-22 18:01:19 -070020import java.util.concurrent.atomic.AtomicReference;
Jordan Halterman5ca07932017-10-07 13:28:22 -070021import java.util.stream.Collectors;
Jordan Halterman980a8c12017-09-22 18:01:19 -070022
23import org.apache.felix.scr.annotations.Activate;
24import org.apache.felix.scr.annotations.Component;
25import org.apache.felix.scr.annotations.Deactivate;
26import org.apache.felix.scr.annotations.Reference;
27import org.apache.felix.scr.annotations.ReferenceCardinality;
28import org.apache.felix.scr.annotations.Service;
Jordan Halterman5ca07932017-10-07 13:28:22 -070029import org.onosproject.cluster.ClusterEvent;
30import org.onosproject.cluster.ClusterEventListener;
Jordan Halterman28183ee2017-10-17 17:29:10 -070031import org.onosproject.cluster.ClusterService;
Jordan Halterman980a8c12017-09-22 18:01:19 -070032import org.onosproject.cluster.ControllerNode;
Jordan Halterman28183ee2017-10-17 17:29:10 -070033import org.onosproject.cluster.MembershipService;
Jordan Halterman5ca07932017-10-07 13:28:22 -070034import org.onosproject.cluster.NodeId;
Jordan Halterman980a8c12017-09-22 18:01:19 -070035import org.onosproject.core.Version;
36import org.onosproject.core.VersionService;
37import org.onosproject.event.AbstractListenerManager;
38import org.onosproject.store.serializers.KryoNamespaces;
39import org.onosproject.store.service.AtomicValue;
40import org.onosproject.store.service.AtomicValueEvent;
41import org.onosproject.store.service.AtomicValueEventListener;
42import org.onosproject.store.service.CoordinationService;
43import org.onosproject.store.service.Serializer;
44import org.onosproject.upgrade.Upgrade;
45import org.onosproject.upgrade.UpgradeAdminService;
46import org.onosproject.upgrade.UpgradeEvent;
47import org.onosproject.upgrade.UpgradeEventListener;
48import org.onosproject.upgrade.UpgradeService;
49import org.slf4j.Logger;
50
slowr0a44fde2017-10-09 14:48:53 -070051import static org.onosproject.security.AppGuard.checkPermission;
Jordan Halterman07f052b2017-10-08 14:22:41 -070052import static org.onosproject.security.AppPermission.Type.CLUSTER_EVENT;
53import static org.onosproject.security.AppPermission.Type.UPGRADE_EVENT;
54import static org.onosproject.security.AppPermission.Type.UPGRADE_READ;
55import static org.onosproject.security.AppPermission.Type.UPGRADE_WRITE;
Jordan Halterman980a8c12017-09-22 18:01:19 -070056import static org.slf4j.LoggerFactory.getLogger;
57
58/**
59 * Upgrade service implementation.
60 * <p>
61 * This implementation uses the {@link CoordinationService} to store upgrade state in a version-agnostic primitive.
62 * Upgrade state can be seen by current and future version nodes.
63 */
64@Component(immediate = true)
65@Service
66public class UpgradeManager
67 extends AbstractListenerManager<UpgradeEvent, UpgradeEventListener>
68 implements UpgradeService, UpgradeAdminService {
69
70 private final Logger log = getLogger(getClass());
71
72 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
73 protected VersionService versionService;
74
75 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
76 protected CoordinationService coordinationService;
77
78 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
Jordan Halterman28183ee2017-10-17 17:29:10 -070079 protected ClusterService clusterService;
80
81 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
82 protected MembershipService membershipService;
Jordan Halterman980a8c12017-09-22 18:01:19 -070083
84 private Version localVersion;
85 private AtomicValue<Upgrade> state;
86 private final AtomicReference<Upgrade> currentState = new AtomicReference<>();
slowr0a44fde2017-10-09 14:48:53 -070087 private final AtomicValueEventListener<Upgrade> stateListener = this::handleUpgradeEvent;
88 private final ClusterEventListener clusterListener = this::handleClusterEvent;
Jordan Halterman980a8c12017-09-22 18:01:19 -070089
90 @Activate
91 public void activate() {
Jordan Halterman07f052b2017-10-08 14:22:41 -070092 eventDispatcher.addSink(UpgradeEvent.class, listenerRegistry);
93
Jordan Halterman980a8c12017-09-22 18:01:19 -070094 state = coordinationService.<Upgrade>atomicValueBuilder()
95 .withName("onos-upgrade-state")
96 .withSerializer(Serializer.using(KryoNamespaces.API))
97 .build()
98 .asAtomicValue();
99 localVersion = versionService.version();
100
101 currentState.set(state.get());
slowr0a44fde2017-10-09 14:48:53 -0700102 if (getState() == null) {
103 initializeState(new Upgrade(localVersion, localVersion, Upgrade.Status.INACTIVE));
Jordan Halterman980a8c12017-09-22 18:01:19 -0700104 }
105
slowr0a44fde2017-10-09 14:48:53 -0700106 Upgrade upgrade = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700107
Jordan Halterman980a8c12017-09-22 18:01:19 -0700108 // If the upgrade state is initialized then check the node version.
109 if (upgrade.status() == Upgrade.Status.INITIALIZED) {
110 // If the source version equals the target version, attempt to update the target version.
111 if (Objects.equals(upgrade.source(), upgrade.target()) && !Objects.equals(upgrade.target(), localVersion)) {
slowr0a44fde2017-10-09 14:48:53 -0700112 checkPermission(UPGRADE_WRITE);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700113 upgrade = new Upgrade(upgrade.source(), localVersion, upgrade.status());
slowr0a44fde2017-10-09 14:48:53 -0700114 initializeState(upgrade);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700115 }
116 }
117
118 // If the upgrade status is active, verify that the local version matches the upgrade version.
119 if (upgrade.status().active() && !Objects.equals(upgrade.source(), upgrade.target())) {
120 // If the upgrade source/target are not equal, validate that the node's version is consistent
121 // with versions in the upgrade. There are two possibilities: that a not-yet-upgraded node is being
122 // restarted, or that a node has been upgraded, so we need to check that this node is running either
123 // the source or target version.
124 if (!Objects.equals(localVersion, upgrade.source()) && !Objects.equals(localVersion, upgrade.target())) {
125 log.error("Cannot upgrade node to version {}; Upgrade to {} already in progress",
126 localVersion, upgrade.target());
127 throw new IllegalStateException("Cannot upgrade node to version " + localVersion + "; Upgrade to " +
128 upgrade.target() + " already in progress");
129 }
130 }
131
132 state.addListener(stateListener);
Jordan Halterman5ca07932017-10-07 13:28:22 -0700133 clusterService.addListener(clusterListener);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700134 log.info("Started");
135 }
136
137 @Deactivate
138 public void deactivate() {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700139 eventDispatcher.removeSink(UpgradeEvent.class);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700140 state.removeListener(stateListener);
Jordan Halterman5ca07932017-10-07 13:28:22 -0700141 clusterService.removeListener(clusterListener);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700142 log.info("Stopped");
143 }
144
slowr0a44fde2017-10-09 14:48:53 -0700145 /**
146 * Initializes the state when the cluster starts.
147 * <p>
148 * This method must be called when updating the state in order to check the permissions
149 *
150 * @param newState new state
151 */
152 private void initializeState(Upgrade newState) {
153 checkPermission(UPGRADE_WRITE);
154 currentState.set(newState);
155 state.set(newState);
156 }
157
158 /**
159 * Changes the current state to new one.
160 * <p>
161 * This method must be called when changing between states in order to check the permissions and
162 * to avoid concurrent state modifications
163 *
164 * @param oldState current upgrade state
165 * @param newState new upgrade state
166 *
167 * @throws IllegalStateException if an upgrade is already in progress
168 */
169 private void changeState(Upgrade oldState, Upgrade newState) {
170 checkPermission(UPGRADE_WRITE);
171 if (!state.compareAndSet(oldState, newState)) {
172 throw new IllegalStateException("Concurrent upgrade modification");
173 } else {
174 currentState.set(newState);
175 }
176 }
177
178 @Override
179 public Upgrade getState() {
180 checkPermission(UPGRADE_READ);
181 return currentState.get();
182 }
183
Jordan Halterman980a8c12017-09-22 18:01:19 -0700184 @Override
185 public boolean isUpgrading() {
186 return getState().status().active();
187 }
188
189 @Override
Jordan Halterman980a8c12017-09-22 18:01:19 -0700190 public Version getVersion() {
slowr0a44fde2017-10-09 14:48:53 -0700191 Upgrade upgrade = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700192 return upgrade.status().upgraded()
193 ? upgrade.target()
194 : upgrade.source();
195 }
196
197 @Override
198 public boolean isLocalActive() {
199 return localVersion.equals(getVersion());
200 }
201
202 @Override
203 public boolean isLocalUpgraded() {
slowr0a44fde2017-10-09 14:48:53 -0700204 Upgrade upgrade = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700205 return upgrade.status().active()
206 && !upgrade.source().equals(upgrade.target())
207 && localVersion.equals(upgrade.target());
208 }
209
210 @Override
211 public void initialize() {
slowr0a44fde2017-10-09 14:48:53 -0700212 Upgrade inactive = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700213
214 // If the current upgrade status is active, fail initialization.
215 if (inactive.status().active()) {
216 throw new IllegalStateException("Upgrade already active");
217 }
218
219 // Set the upgrade status to INITIALIZING.
220 Upgrade initializing = new Upgrade(
221 localVersion,
222 localVersion,
223 Upgrade.Status.INITIALIZING);
slowr0a44fde2017-10-09 14:48:53 -0700224 changeState(inactive, initializing);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700225
slowr0a44fde2017-10-09 14:48:53 -0700226 // Set the upgrade status to INITIALIZED.
227 Upgrade initialized = new Upgrade(
228 initializing.source(),
229 initializing.target(),
230 Upgrade.Status.INITIALIZED);
231 changeState(initializing, initialized);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700232 }
233
234 @Override
235 public void upgrade() {
slowr0a44fde2017-10-09 14:48:53 -0700236 Upgrade initialized = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700237
238 // If the current upgrade status is not INITIALIZED, throw an exception.
239 if (initialized.status() != Upgrade.Status.INITIALIZED) {
240 throw new IllegalStateException("Upgrade not initialized");
241 }
242
243 // Set the upgrade status to UPGRADING.
244 Upgrade upgrading = new Upgrade(
245 initialized.source(),
246 initialized.target(),
247 Upgrade.Status.UPGRADING);
slowr0a44fde2017-10-09 14:48:53 -0700248 changeState(initialized, upgrading);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700249
slowr0a44fde2017-10-09 14:48:53 -0700250 // Set the upgrade status to UPGRADED.
251 Upgrade upgraded = new Upgrade(
252 upgrading.source(),
253 upgrading.target(),
254 Upgrade.Status.UPGRADED);
255 changeState(upgrading, upgraded);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700256 }
257
258 @Override
259 public void commit() {
slowr0a44fde2017-10-09 14:48:53 -0700260 Upgrade upgraded = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700261
262 // If the current upgrade status is not UPGRADED, throw an exception.
263 if (upgraded.status() != Upgrade.Status.UPGRADED) {
264 throw new IllegalStateException("Upgrade not performed");
265 }
266
267 // Determine whether any nodes have not been upgraded to the target version.
Jordan Halterman28183ee2017-10-17 17:29:10 -0700268 boolean upgradeComplete = membershipService.getGroups().size() == 1
269 && membershipService.getLocalGroup().version().equals(upgraded.target());
Jordan Halterman980a8c12017-09-22 18:01:19 -0700270
271 // If some nodes have not yet been upgraded, throw an exception.
272 if (!upgradeComplete) {
273 throw new IllegalStateException("Some nodes have not yet been upgraded to version " + upgraded.target());
274 }
275
276 // Set the upgrade status to COMMITTING.
277 Upgrade committing = new Upgrade(
278 upgraded.source(),
279 upgraded.target(),
280 Upgrade.Status.COMMITTING);
slowr0a44fde2017-10-09 14:48:53 -0700281 changeState(upgraded, committing);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700282
slowr0a44fde2017-10-09 14:48:53 -0700283 // Set the upgrade status to COMMITTED.
284 Upgrade committed = new Upgrade(
285 committing.source(),
286 committing.target(),
287 Upgrade.Status.COMMITTED);
288 changeState(committing, committed);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700289
slowr0a44fde2017-10-09 14:48:53 -0700290 // Set the upgrade status to INACTIVE.
291 Upgrade inactive = new Upgrade(
292 localVersion,
293 localVersion,
294 Upgrade.Status.INACTIVE);
295 changeState(committed, inactive);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700296 }
297
298 @Override
299 public void rollback() {
slowr0a44fde2017-10-09 14:48:53 -0700300 Upgrade upgraded = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700301
302 // If the current upgrade status is not UPGRADED, throw an exception.
303 if (upgraded.status() != Upgrade.Status.UPGRADED) {
304 throw new IllegalStateException("Upgrade not performed");
305 }
306
307 // Set the upgrade status to ROLLING_BACK.
308 Upgrade rollingBack = new Upgrade(
309 upgraded.source(),
310 upgraded.target(),
311 Upgrade.Status.ROLLING_BACK);
slowr0a44fde2017-10-09 14:48:53 -0700312 changeState(upgraded, rollingBack);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700313
slowr0a44fde2017-10-09 14:48:53 -0700314 // Set the upgrade status to ROLLED_BACK.
315 Upgrade rolledBack = new Upgrade(
316 rollingBack.source(),
317 rollingBack.target(),
318 Upgrade.Status.ROLLED_BACK);
319 changeState(rollingBack, rolledBack);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700320 }
321
322 @Override
323 public void reset() {
slowr0a44fde2017-10-09 14:48:53 -0700324 Upgrade upgraded = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700325
326 // If the current upgrade status is not INITIALIZED or ROLLED_BACK, throw an exception.
327 if (upgraded.status() != Upgrade.Status.INITIALIZED
328 && upgraded.status() != Upgrade.Status.ROLLED_BACK) {
329 throw new IllegalStateException("Upgrade not rolled back");
330 }
331
332 // Determine whether any nodes are still running the target version.
Jordan Halterman28183ee2017-10-17 17:29:10 -0700333 boolean rollbackComplete = membershipService.getGroups().size() == 1
334 && membershipService.getLocalGroup().version().equals(upgraded.source());
Jordan Halterman980a8c12017-09-22 18:01:19 -0700335
336 // If some nodes have not yet been downgraded, throw an exception.
337 if (!rollbackComplete) {
338 throw new IllegalStateException("Some nodes have not yet been downgraded to version " + upgraded.source());
339 }
340
341 // Set the upgrade status to RESETTING.
342 Upgrade resetting = new Upgrade(
343 upgraded.source(),
344 upgraded.target(),
345 Upgrade.Status.RESETTING);
slowr0a44fde2017-10-09 14:48:53 -0700346 changeState(upgraded, resetting);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700347
slowr0a44fde2017-10-09 14:48:53 -0700348 // Set the upgrade status to RESET.
349 Upgrade reset = new Upgrade(
350 resetting.source(),
351 resetting.target(),
352 Upgrade.Status.RESET);
353 changeState(resetting, reset);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700354
slowr0a44fde2017-10-09 14:48:53 -0700355 // Set the upgrade status to INACTIVE.
356 Upgrade inactive = new Upgrade(
357 localVersion,
358 localVersion,
359 Upgrade.Status.INACTIVE);
360 changeState(reset, inactive);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700361 }
362
Jordan Halterman5ca07932017-10-07 13:28:22 -0700363 /**
364 * Handles a cluster event.
365 *
366 * @param event the cluster event
367 */
368 protected void handleClusterEvent(ClusterEvent event) {
slowr0a44fde2017-10-09 14:48:53 -0700369 checkPermission(CLUSTER_EVENT);
Jordan Halterman5ca07932017-10-07 13:28:22 -0700370 // If an instance was deactivated, check whether we need to roll back the upgrade.
371 if (event.type() == ClusterEvent.Type.INSTANCE_DEACTIVATED) {
slowr0a44fde2017-10-09 14:48:53 -0700372 Upgrade upgrade = getState();
Jordan Halterman5ca07932017-10-07 13:28:22 -0700373 if (upgrade.status().upgraded()) {
374 // Get the upgraded subset of the cluster and check whether the down node is a member
375 // of the upgraded subset. If so, roll back the upgrade to tolerate the failure.
376 Set<NodeId> upgradedNodes = clusterService.getNodes().stream()
377 .map(ControllerNode::id)
378 .filter(id -> clusterService.getVersion(id).equals(upgrade.target()))
379 .collect(Collectors.toSet());
380 if (upgradedNodes.contains(event.subject().id())) {
Jordan Halterman3c65d1b2018-01-09 13:01:03 -0800381 log.warn("Upgrade failure detected: rolling back upgrade");
Jordan Halterman5ca07932017-10-07 13:28:22 -0700382 rollback();
383 }
384 }
385 }
386 }
387
388 /**
389 * Handles an upgrade state event.
390 *
391 * @param event the upgrade value event
392 */
393 protected void handleUpgradeEvent(AtomicValueEvent<Upgrade> event) {
slowr0a44fde2017-10-09 14:48:53 -0700394 checkPermission(UPGRADE_EVENT);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700395 currentState.set(event.newValue());
396 switch (event.newValue().status()) {
397 case INITIALIZED:
398 post(new UpgradeEvent(UpgradeEvent.Type.INITIALIZED, event.newValue()));
399 break;
400 case UPGRADED:
401 post(new UpgradeEvent(UpgradeEvent.Type.UPGRADED, event.newValue()));
402 break;
403 case COMMITTED:
404 post(new UpgradeEvent(UpgradeEvent.Type.COMMITTED, event.newValue()));
405 break;
406 case ROLLED_BACK:
407 post(new UpgradeEvent(UpgradeEvent.Type.ROLLED_BACK, event.newValue()));
408 break;
409 case RESET:
410 post(new UpgradeEvent(UpgradeEvent.Type.RESET, event.newValue()));
411 break;
412 default:
413 break;
414 }
415 }
416}