blob: ab1b1c3e92020faf8903cf4f5e91f10de204522a [file] [log] [blame]
Jordan Halterman980a8c12017-09-22 18:01:19 -07001/*
2 * Copyright 2017-present Open Networking Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.upgrade.impl;
17
18import java.util.Objects;
Jordan Halterman5ca07932017-10-07 13:28:22 -070019import java.util.Set;
Jordan Halterman980a8c12017-09-22 18:01:19 -070020import java.util.concurrent.atomic.AtomicReference;
Jordan Halterman5ca07932017-10-07 13:28:22 -070021import java.util.stream.Collectors;
Jordan Halterman980a8c12017-09-22 18:01:19 -070022
23import org.apache.felix.scr.annotations.Activate;
24import org.apache.felix.scr.annotations.Component;
25import org.apache.felix.scr.annotations.Deactivate;
26import org.apache.felix.scr.annotations.Reference;
27import org.apache.felix.scr.annotations.ReferenceCardinality;
28import org.apache.felix.scr.annotations.Service;
Jordan Halterman5ca07932017-10-07 13:28:22 -070029import org.onosproject.cluster.ClusterEvent;
30import org.onosproject.cluster.ClusterEventListener;
Jordan Halterman28183ee2017-10-17 17:29:10 -070031import org.onosproject.cluster.ClusterService;
Jordan Halterman980a8c12017-09-22 18:01:19 -070032import org.onosproject.cluster.ControllerNode;
Jordan Halterman28183ee2017-10-17 17:29:10 -070033import org.onosproject.cluster.MembershipService;
Jordan Halterman5ca07932017-10-07 13:28:22 -070034import org.onosproject.cluster.NodeId;
Jordan Halterman980a8c12017-09-22 18:01:19 -070035import org.onosproject.core.Version;
36import org.onosproject.core.VersionService;
37import org.onosproject.event.AbstractListenerManager;
38import org.onosproject.store.serializers.KryoNamespaces;
39import org.onosproject.store.service.AtomicValue;
40import org.onosproject.store.service.AtomicValueEvent;
41import org.onosproject.store.service.AtomicValueEventListener;
42import org.onosproject.store.service.CoordinationService;
43import org.onosproject.store.service.Serializer;
44import org.onosproject.upgrade.Upgrade;
45import org.onosproject.upgrade.UpgradeAdminService;
46import org.onosproject.upgrade.UpgradeEvent;
47import org.onosproject.upgrade.UpgradeEventListener;
48import org.onosproject.upgrade.UpgradeService;
49import org.slf4j.Logger;
50
slowr0a44fde2017-10-09 14:48:53 -070051import static org.onosproject.security.AppGuard.checkPermission;
Jordan Halterman07f052b2017-10-08 14:22:41 -070052import static org.onosproject.security.AppPermission.Type.CLUSTER_EVENT;
53import static org.onosproject.security.AppPermission.Type.UPGRADE_EVENT;
54import static org.onosproject.security.AppPermission.Type.UPGRADE_READ;
55import static org.onosproject.security.AppPermission.Type.UPGRADE_WRITE;
Jordan Halterman980a8c12017-09-22 18:01:19 -070056import static org.slf4j.LoggerFactory.getLogger;
57
58/**
59 * Upgrade service implementation.
60 * <p>
61 * This implementation uses the {@link CoordinationService} to store upgrade state in a version-agnostic primitive.
62 * Upgrade state can be seen by current and future version nodes.
63 */
64@Component(immediate = true)
65@Service
66public class UpgradeManager
67 extends AbstractListenerManager<UpgradeEvent, UpgradeEventListener>
68 implements UpgradeService, UpgradeAdminService {
69
70 private final Logger log = getLogger(getClass());
71
72 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
73 protected VersionService versionService;
74
75 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
76 protected CoordinationService coordinationService;
77
78 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
Jordan Halterman28183ee2017-10-17 17:29:10 -070079 protected ClusterService clusterService;
80
81 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
82 protected MembershipService membershipService;
Jordan Halterman980a8c12017-09-22 18:01:19 -070083
84 private Version localVersion;
85 private AtomicValue<Upgrade> state;
86 private final AtomicReference<Upgrade> currentState = new AtomicReference<>();
slowr0a44fde2017-10-09 14:48:53 -070087 private final AtomicValueEventListener<Upgrade> stateListener = this::handleUpgradeEvent;
88 private final ClusterEventListener clusterListener = this::handleClusterEvent;
Jordan Halterman980a8c12017-09-22 18:01:19 -070089
90 @Activate
91 public void activate() {
Jordan Halterman07f052b2017-10-08 14:22:41 -070092 eventDispatcher.addSink(UpgradeEvent.class, listenerRegistry);
93
Jordan Halterman980a8c12017-09-22 18:01:19 -070094 state = coordinationService.<Upgrade>atomicValueBuilder()
95 .withName("onos-upgrade-state")
96 .withSerializer(Serializer.using(KryoNamespaces.API))
97 .build()
98 .asAtomicValue();
99 localVersion = versionService.version();
100
101 currentState.set(state.get());
slowr0a44fde2017-10-09 14:48:53 -0700102 if (getState() == null) {
103 initializeState(new Upgrade(localVersion, localVersion, Upgrade.Status.INACTIVE));
Jordan Halterman980a8c12017-09-22 18:01:19 -0700104 }
105
slowr0a44fde2017-10-09 14:48:53 -0700106 Upgrade upgrade = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700107
108 // If the upgrade state is not initialized, ensure this node matches the version of the cluster.
109 if (!upgrade.status().active() && !Objects.equals(upgrade.source(), localVersion)) {
110 log.error("Node version {} inconsistent with cluster version {}", localVersion, upgrade.source());
111 throw new IllegalStateException("Node version " + localVersion +
112 " inconsistent with cluster version " + upgrade.source());
113 }
114
115 // If the upgrade state is initialized then check the node version.
116 if (upgrade.status() == Upgrade.Status.INITIALIZED) {
117 // If the source version equals the target version, attempt to update the target version.
118 if (Objects.equals(upgrade.source(), upgrade.target()) && !Objects.equals(upgrade.target(), localVersion)) {
slowr0a44fde2017-10-09 14:48:53 -0700119 checkPermission(UPGRADE_WRITE);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700120 upgrade = new Upgrade(upgrade.source(), localVersion, upgrade.status());
slowr0a44fde2017-10-09 14:48:53 -0700121 initializeState(upgrade);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700122 }
123 }
124
125 // If the upgrade status is active, verify that the local version matches the upgrade version.
126 if (upgrade.status().active() && !Objects.equals(upgrade.source(), upgrade.target())) {
127 // If the upgrade source/target are not equal, validate that the node's version is consistent
128 // with versions in the upgrade. There are two possibilities: that a not-yet-upgraded node is being
129 // restarted, or that a node has been upgraded, so we need to check that this node is running either
130 // the source or target version.
131 if (!Objects.equals(localVersion, upgrade.source()) && !Objects.equals(localVersion, upgrade.target())) {
132 log.error("Cannot upgrade node to version {}; Upgrade to {} already in progress",
133 localVersion, upgrade.target());
134 throw new IllegalStateException("Cannot upgrade node to version " + localVersion + "; Upgrade to " +
135 upgrade.target() + " already in progress");
136 }
137 }
138
139 state.addListener(stateListener);
Jordan Halterman5ca07932017-10-07 13:28:22 -0700140 clusterService.addListener(clusterListener);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700141 log.info("Started");
142 }
143
144 @Deactivate
145 public void deactivate() {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700146 eventDispatcher.removeSink(UpgradeEvent.class);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700147 state.removeListener(stateListener);
Jordan Halterman5ca07932017-10-07 13:28:22 -0700148 clusterService.removeListener(clusterListener);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700149 log.info("Stopped");
150 }
151
slowr0a44fde2017-10-09 14:48:53 -0700152 /**
153 * Initializes the state when the cluster starts.
154 * <p>
155 * This method must be called when updating the state in order to check the permissions
156 *
157 * @param newState new state
158 */
159 private void initializeState(Upgrade newState) {
160 checkPermission(UPGRADE_WRITE);
161 currentState.set(newState);
162 state.set(newState);
163 }
164
165 /**
166 * Changes the current state to new one.
167 * <p>
168 * This method must be called when changing between states in order to check the permissions and
169 * to avoid concurrent state modifications
170 *
171 * @param oldState current upgrade state
172 * @param newState new upgrade state
173 *
174 * @throws IllegalStateException if an upgrade is already in progress
175 */
176 private void changeState(Upgrade oldState, Upgrade newState) {
177 checkPermission(UPGRADE_WRITE);
178 if (!state.compareAndSet(oldState, newState)) {
179 throw new IllegalStateException("Concurrent upgrade modification");
180 } else {
181 currentState.set(newState);
182 }
183 }
184
185 @Override
186 public Upgrade getState() {
187 checkPermission(UPGRADE_READ);
188 return currentState.get();
189 }
190
Jordan Halterman980a8c12017-09-22 18:01:19 -0700191 @Override
192 public boolean isUpgrading() {
193 return getState().status().active();
194 }
195
196 @Override
Jordan Halterman980a8c12017-09-22 18:01:19 -0700197 public Version getVersion() {
slowr0a44fde2017-10-09 14:48:53 -0700198 Upgrade upgrade = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700199 return upgrade.status().upgraded()
200 ? upgrade.target()
201 : upgrade.source();
202 }
203
204 @Override
205 public boolean isLocalActive() {
206 return localVersion.equals(getVersion());
207 }
208
209 @Override
210 public boolean isLocalUpgraded() {
slowr0a44fde2017-10-09 14:48:53 -0700211 Upgrade upgrade = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700212 return upgrade.status().active()
213 && !upgrade.source().equals(upgrade.target())
214 && localVersion.equals(upgrade.target());
215 }
216
217 @Override
218 public void initialize() {
slowr0a44fde2017-10-09 14:48:53 -0700219 Upgrade inactive = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700220
221 // If the current upgrade status is active, fail initialization.
222 if (inactive.status().active()) {
223 throw new IllegalStateException("Upgrade already active");
224 }
225
226 // Set the upgrade status to INITIALIZING.
227 Upgrade initializing = new Upgrade(
228 localVersion,
229 localVersion,
230 Upgrade.Status.INITIALIZING);
slowr0a44fde2017-10-09 14:48:53 -0700231 changeState(inactive, initializing);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700232
slowr0a44fde2017-10-09 14:48:53 -0700233 // Set the upgrade status to INITIALIZED.
234 Upgrade initialized = new Upgrade(
235 initializing.source(),
236 initializing.target(),
237 Upgrade.Status.INITIALIZED);
238 changeState(initializing, initialized);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700239 }
240
241 @Override
242 public void upgrade() {
slowr0a44fde2017-10-09 14:48:53 -0700243 Upgrade initialized = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700244
245 // If the current upgrade status is not INITIALIZED, throw an exception.
246 if (initialized.status() != Upgrade.Status.INITIALIZED) {
247 throw new IllegalStateException("Upgrade not initialized");
248 }
249
250 // Set the upgrade status to UPGRADING.
251 Upgrade upgrading = new Upgrade(
252 initialized.source(),
253 initialized.target(),
254 Upgrade.Status.UPGRADING);
slowr0a44fde2017-10-09 14:48:53 -0700255 changeState(initialized, upgrading);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700256
slowr0a44fde2017-10-09 14:48:53 -0700257 // Set the upgrade status to UPGRADED.
258 Upgrade upgraded = new Upgrade(
259 upgrading.source(),
260 upgrading.target(),
261 Upgrade.Status.UPGRADED);
262 changeState(upgrading, upgraded);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700263 }
264
265 @Override
266 public void commit() {
slowr0a44fde2017-10-09 14:48:53 -0700267 Upgrade upgraded = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700268
269 // If the current upgrade status is not UPGRADED, throw an exception.
270 if (upgraded.status() != Upgrade.Status.UPGRADED) {
271 throw new IllegalStateException("Upgrade not performed");
272 }
273
274 // Determine whether any nodes have not been upgraded to the target version.
Jordan Halterman28183ee2017-10-17 17:29:10 -0700275 boolean upgradeComplete = membershipService.getGroups().size() == 1
276 && membershipService.getLocalGroup().version().equals(upgraded.target());
Jordan Halterman980a8c12017-09-22 18:01:19 -0700277
278 // If some nodes have not yet been upgraded, throw an exception.
279 if (!upgradeComplete) {
280 throw new IllegalStateException("Some nodes have not yet been upgraded to version " + upgraded.target());
281 }
282
283 // Set the upgrade status to COMMITTING.
284 Upgrade committing = new Upgrade(
285 upgraded.source(),
286 upgraded.target(),
287 Upgrade.Status.COMMITTING);
slowr0a44fde2017-10-09 14:48:53 -0700288 changeState(upgraded, committing);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700289
slowr0a44fde2017-10-09 14:48:53 -0700290 // Set the upgrade status to COMMITTED.
291 Upgrade committed = new Upgrade(
292 committing.source(),
293 committing.target(),
294 Upgrade.Status.COMMITTED);
295 changeState(committing, committed);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700296
slowr0a44fde2017-10-09 14:48:53 -0700297 // Set the upgrade status to INACTIVE.
298 Upgrade inactive = new Upgrade(
299 localVersion,
300 localVersion,
301 Upgrade.Status.INACTIVE);
302 changeState(committed, inactive);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700303 }
304
305 @Override
306 public void rollback() {
slowr0a44fde2017-10-09 14:48:53 -0700307 Upgrade upgraded = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700308
309 // If the current upgrade status is not UPGRADED, throw an exception.
310 if (upgraded.status() != Upgrade.Status.UPGRADED) {
311 throw new IllegalStateException("Upgrade not performed");
312 }
313
314 // Set the upgrade status to ROLLING_BACK.
315 Upgrade rollingBack = new Upgrade(
316 upgraded.source(),
317 upgraded.target(),
318 Upgrade.Status.ROLLING_BACK);
slowr0a44fde2017-10-09 14:48:53 -0700319 changeState(upgraded, rollingBack);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700320
slowr0a44fde2017-10-09 14:48:53 -0700321 // Set the upgrade status to ROLLED_BACK.
322 Upgrade rolledBack = new Upgrade(
323 rollingBack.source(),
324 rollingBack.target(),
325 Upgrade.Status.ROLLED_BACK);
326 changeState(rollingBack, rolledBack);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700327 }
328
329 @Override
330 public void reset() {
slowr0a44fde2017-10-09 14:48:53 -0700331 Upgrade upgraded = getState();
Jordan Halterman980a8c12017-09-22 18:01:19 -0700332
333 // If the current upgrade status is not INITIALIZED or ROLLED_BACK, throw an exception.
334 if (upgraded.status() != Upgrade.Status.INITIALIZED
335 && upgraded.status() != Upgrade.Status.ROLLED_BACK) {
336 throw new IllegalStateException("Upgrade not rolled back");
337 }
338
339 // Determine whether any nodes are still running the target version.
Jordan Halterman28183ee2017-10-17 17:29:10 -0700340 boolean rollbackComplete = membershipService.getGroups().size() == 1
341 && membershipService.getLocalGroup().version().equals(upgraded.source());
Jordan Halterman980a8c12017-09-22 18:01:19 -0700342
343 // If some nodes have not yet been downgraded, throw an exception.
344 if (!rollbackComplete) {
345 throw new IllegalStateException("Some nodes have not yet been downgraded to version " + upgraded.source());
346 }
347
348 // Set the upgrade status to RESETTING.
349 Upgrade resetting = new Upgrade(
350 upgraded.source(),
351 upgraded.target(),
352 Upgrade.Status.RESETTING);
slowr0a44fde2017-10-09 14:48:53 -0700353 changeState(upgraded, resetting);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700354
slowr0a44fde2017-10-09 14:48:53 -0700355 // Set the upgrade status to RESET.
356 Upgrade reset = new Upgrade(
357 resetting.source(),
358 resetting.target(),
359 Upgrade.Status.RESET);
360 changeState(resetting, reset);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700361
slowr0a44fde2017-10-09 14:48:53 -0700362 // Set the upgrade status to INACTIVE.
363 Upgrade inactive = new Upgrade(
364 localVersion,
365 localVersion,
366 Upgrade.Status.INACTIVE);
367 changeState(reset, inactive);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700368 }
369
Jordan Halterman5ca07932017-10-07 13:28:22 -0700370 /**
371 * Handles a cluster event.
372 *
373 * @param event the cluster event
374 */
375 protected void handleClusterEvent(ClusterEvent event) {
slowr0a44fde2017-10-09 14:48:53 -0700376 checkPermission(CLUSTER_EVENT);
Jordan Halterman5ca07932017-10-07 13:28:22 -0700377 // If an instance was deactivated, check whether we need to roll back the upgrade.
378 if (event.type() == ClusterEvent.Type.INSTANCE_DEACTIVATED) {
slowr0a44fde2017-10-09 14:48:53 -0700379 Upgrade upgrade = getState();
Jordan Halterman5ca07932017-10-07 13:28:22 -0700380 if (upgrade.status().upgraded()) {
381 // Get the upgraded subset of the cluster and check whether the down node is a member
382 // of the upgraded subset. If so, roll back the upgrade to tolerate the failure.
383 Set<NodeId> upgradedNodes = clusterService.getNodes().stream()
384 .map(ControllerNode::id)
385 .filter(id -> clusterService.getVersion(id).equals(upgrade.target()))
386 .collect(Collectors.toSet());
387 if (upgradedNodes.contains(event.subject().id())) {
388 rollback();
389 }
390 }
391 }
392 }
393
394 /**
395 * Handles an upgrade state event.
396 *
397 * @param event the upgrade value event
398 */
399 protected void handleUpgradeEvent(AtomicValueEvent<Upgrade> event) {
slowr0a44fde2017-10-09 14:48:53 -0700400 checkPermission(UPGRADE_EVENT);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700401 currentState.set(event.newValue());
402 switch (event.newValue().status()) {
403 case INITIALIZED:
404 post(new UpgradeEvent(UpgradeEvent.Type.INITIALIZED, event.newValue()));
405 break;
406 case UPGRADED:
407 post(new UpgradeEvent(UpgradeEvent.Type.UPGRADED, event.newValue()));
408 break;
409 case COMMITTED:
410 post(new UpgradeEvent(UpgradeEvent.Type.COMMITTED, event.newValue()));
411 break;
412 case ROLLED_BACK:
413 post(new UpgradeEvent(UpgradeEvent.Type.ROLLED_BACK, event.newValue()));
414 break;
415 case RESET:
416 post(new UpgradeEvent(UpgradeEvent.Type.RESET, event.newValue()));
417 break;
418 default:
419 break;
420 }
421 }
422}