blob: 5f22d1594a525b11154949328ceccbc65d292421 [file] [log] [blame]
Madan Jampani15b8ef52016-02-02 17:35:05 -08001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2016-present Open Networking Foundation
Madan Jampani15b8ef52016-02-02 17:35:05 -08003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.onosproject.store.primitives.impl;
18
Madan Jampani15b8ef52016-02-02 17:35:05 -080019import java.io.File;
Jordan Halterman07f052b2017-10-08 14:22:41 -070020import java.io.IOException;
21import java.nio.file.Files;
Madan Jampanie14a09c2016-02-11 10:43:21 -080022import java.util.List;
Madan Jampani15b8ef52016-02-02 17:35:05 -080023import java.util.Map;
Jordan Halterman07f052b2017-10-08 14:22:41 -070024import java.util.Optional;
Madan Jampani15b8ef52016-02-02 17:35:05 -080025import java.util.Set;
26import java.util.concurrent.CompletableFuture;
Madan Jampani33547452016-02-29 16:45:04 -080027import java.util.concurrent.atomic.AtomicReference;
Madan Jampanie14a09c2016-02-11 10:43:21 -080028import java.util.stream.Collectors;
Madan Jampani15b8ef52016-02-02 17:35:05 -080029
Jordan Halterman2bf177c2017-06-29 01:49:08 -070030import com.google.common.collect.ImmutableSet;
31import com.google.common.collect.Maps;
Madan Jampani15b8ef52016-02-02 17:35:05 -080032import org.apache.felix.scr.annotations.Activate;
33import org.apache.felix.scr.annotations.Component;
Madan Jampani86cb2432016-02-17 11:07:56 -080034import org.apache.felix.scr.annotations.Deactivate;
Madan Jampani15b8ef52016-02-02 17:35:05 -080035import org.apache.felix.scr.annotations.Reference;
36import org.apache.felix.scr.annotations.ReferenceCardinality;
37import org.apache.felix.scr.annotations.Service;
38import org.onlab.util.Tools;
Jordan Halterman07f052b2017-10-08 14:22:41 -070039import org.onosproject.cluster.ClusterEvent;
40import org.onosproject.cluster.ClusterEventListener;
Madan Jampani33547452016-02-29 16:45:04 -080041import org.onosproject.cluster.ClusterMetadata;
42import org.onosproject.cluster.ClusterMetadataDiff;
43import org.onosproject.cluster.ClusterMetadataEvent;
44import org.onosproject.cluster.ClusterMetadataEventListener;
Madan Jampani15b8ef52016-02-02 17:35:05 -080045import org.onosproject.cluster.ClusterMetadataService;
46import org.onosproject.cluster.ClusterService;
Jordan Halterman07f052b2017-10-08 14:22:41 -070047import org.onosproject.cluster.DefaultPartition;
48import org.onosproject.cluster.Member;
49import org.onosproject.cluster.MembershipService;
Madan Jampani15b8ef52016-02-02 17:35:05 -080050import org.onosproject.cluster.NodeId;
Jordan Halterman07f052b2017-10-08 14:22:41 -070051import org.onosproject.cluster.Partition;
Madan Jampani33547452016-02-29 16:45:04 -080052import org.onosproject.cluster.PartitionDiff;
Madan Jampani15b8ef52016-02-02 17:35:05 -080053import org.onosproject.cluster.PartitionId;
Jordan Halterman980a8c12017-09-22 18:01:19 -070054import org.onosproject.core.Version;
Madan Jampani15b8ef52016-02-02 17:35:05 -080055import org.onosproject.event.AbstractListenerManager;
Jordan Halterman28183ee2017-10-17 17:29:10 -070056import org.onosproject.store.cluster.messaging.ClusterCommunicationService;
Madan Jampani15b8ef52016-02-02 17:35:05 -080057import org.onosproject.store.primitives.DistributedPrimitiveCreator;
58import org.onosproject.store.primitives.PartitionAdminService;
59import org.onosproject.store.primitives.PartitionEvent;
60import org.onosproject.store.primitives.PartitionEventListener;
61import org.onosproject.store.primitives.PartitionService;
Madan Jampaniccdf9da2016-05-05 14:37:27 -070062import org.onosproject.store.service.PartitionClientInfo;
Madan Jampanie14a09c2016-02-11 10:43:21 -080063import org.onosproject.store.service.PartitionInfo;
Jordan Halterman07f052b2017-10-08 14:22:41 -070064import org.onosproject.upgrade.Upgrade;
65import org.onosproject.upgrade.UpgradeEvent;
66import org.onosproject.upgrade.UpgradeEventListener;
Jordan Halterman980a8c12017-09-22 18:01:19 -070067import org.onosproject.upgrade.UpgradeService;
Madan Jampani15b8ef52016-02-02 17:35:05 -080068import org.slf4j.Logger;
69
Heedo Kang4a47a302016-02-29 17:40:23 +090070import static org.onosproject.security.AppGuard.checkPermission;
71import static org.onosproject.security.AppPermission.Type.PARTITION_READ;
Jordan Halterman2bf177c2017-06-29 01:49:08 -070072import static org.slf4j.LoggerFactory.getLogger;
Heedo Kang4a47a302016-02-29 17:40:23 +090073
Madan Jampani15b8ef52016-02-02 17:35:05 -080074/**
75 * Implementation of {@code PartitionService} and {@code PartitionAdminService}.
76 */
77@Component
78@Service
79public class PartitionManager extends AbstractListenerManager<PartitionEvent, PartitionEventListener>
80 implements PartitionService, PartitionAdminService {
81
82 private final Logger log = getLogger(getClass());
83
84 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
Jordan Halterman28183ee2017-10-17 17:29:10 -070085 protected ClusterCommunicationService clusterCommunicator;
Madan Jampani15b8ef52016-02-02 17:35:05 -080086
87 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
88 protected ClusterMetadataService metadataService;
89
90 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
91 protected ClusterService clusterService;
92
Jordan Halterman980a8c12017-09-22 18:01:19 -070093 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
Jordan Halterman07f052b2017-10-08 14:22:41 -070094 protected MembershipService membershipService;
Jordan Halterman980a8c12017-09-22 18:01:19 -070095
96 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
Jordan Halterman07f052b2017-10-08 14:22:41 -070097 protected UpgradeService upgradeService;
Jordan Halterman980a8c12017-09-22 18:01:19 -070098
99 private final Map<PartitionId, StoragePartition> inactivePartitions = Maps.newConcurrentMap();
100 private final Map<PartitionId, StoragePartition> activePartitions = Maps.newConcurrentMap();
Madan Jampani33547452016-02-29 16:45:04 -0800101 private final AtomicReference<ClusterMetadata> currentClusterMetadata = new AtomicReference<>();
Jordan Halterman07f052b2017-10-08 14:22:41 -0700102
103 private final ClusterEventListener clusterListener = new InternalClusterEventListener();
104 private final UpgradeEventListener upgradeListener = new InternalUpgradeEventListener();
105 private final ClusterMetadataEventListener metadataListener = new InternalClusterMetadataListener();
Madan Jampani15b8ef52016-02-02 17:35:05 -0800106
107 @Activate
108 public void activate() {
109 eventDispatcher.addSink(PartitionEvent.class, listenerRegistry);
Madan Jampani33547452016-02-29 16:45:04 -0800110 currentClusterMetadata.set(metadataService.getClusterMetadata());
Jordan Halterman07f052b2017-10-08 14:22:41 -0700111
112 clusterService.addListener(clusterListener);
113 upgradeService.addListener(upgradeListener);
Madan Jampani33547452016-02-29 16:45:04 -0800114 metadataService.addListener(metadataListener);
Madan Jampani15b8ef52016-02-02 17:35:05 -0800115
Jordan Halterman980a8c12017-09-22 18:01:19 -0700116 // If an upgrade is currently in progress and this node is an upgraded node, initialize upgrade partitions.
117 CompletableFuture<Void> openFuture;
118 if (upgradeService.isUpgrading() && upgradeService.isLocalUpgraded()) {
Jordan Halterman980a8c12017-09-22 18:01:19 -0700119 currentClusterMetadata.get()
120 .getPartitions()
Jordan Halterman28183ee2017-10-17 17:29:10 -0700121 .forEach(partition -> {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700122 // Create a default partition and assign it to inactive partitions. This node will join
123 // inactive partitions to participate in consensus for fault tolerance, but the partitions
124 // won't be accessible via client proxies.
125 inactivePartitions.put(partition.getId(), new InactiveStoragePartition(
Jordan Halterman28183ee2017-10-17 17:29:10 -0700126 partition,
Jordan Halterman28183ee2017-10-17 17:29:10 -0700127 clusterCommunicator,
Jordan Halterman07f052b2017-10-08 14:22:41 -0700128 clusterService));
129
130 // Create a forked partition and assign it to active partitions. These partitions will be
131 // forked from commit logs for previous version partitions.
132 Partition forkedPartition = computeInitialPartition(
Jordan Halterman28183ee2017-10-17 17:29:10 -0700133 partition,
Jordan Halterman07f052b2017-10-08 14:22:41 -0700134 upgradeService.getState().target(),
135 getLocalNodes());
136 activePartitions.put(partition.getId(), new ForkedStoragePartition(
137 forkedPartition,
138 partition,
Jordan Halterman28183ee2017-10-17 17:29:10 -0700139 clusterCommunicator,
Jordan Halterman07f052b2017-10-08 14:22:41 -0700140 clusterService));
Jordan Halterman28183ee2017-10-17 17:29:10 -0700141 });
Jordan Halterman980a8c12017-09-22 18:01:19 -0700142
143 // We have to fork existing partitions before we can start inactive partition servers to
144 // avoid duplicate message handlers when both servers are running.
145 openFuture = CompletableFuture.allOf(activePartitions.values().stream()
146 .map(StoragePartition::open)
147 .toArray(CompletableFuture[]::new))
148 .thenCompose(v -> CompletableFuture.allOf(inactivePartitions.values().stream()
149 .map(StoragePartition::open)
150 .toArray(CompletableFuture[]::new)));
151 } else {
Jordan Halterman980a8c12017-09-22 18:01:19 -0700152 currentClusterMetadata.get()
153 .getPartitions()
Jordan Halterman07f052b2017-10-08 14:22:41 -0700154 .forEach(partition -> activePartitions.put(partition.getId(), new ActiveStoragePartition(
Jordan Halterman980a8c12017-09-22 18:01:19 -0700155 partition,
Jordan Halterman980a8c12017-09-22 18:01:19 -0700156 clusterCommunicator,
Jordan Halterman07f052b2017-10-08 14:22:41 -0700157 clusterService)));
Jordan Halterman980a8c12017-09-22 18:01:19 -0700158 openFuture = CompletableFuture.allOf(activePartitions.values().stream()
159 .map(StoragePartition::open)
160 .toArray(CompletableFuture[]::new));
161 }
162
Madan Jampani15b8ef52016-02-02 17:35:05 -0800163 openFuture.join();
164 log.info("Started");
165 }
166
Madan Jampani86cb2432016-02-17 11:07:56 -0800167 @Deactivate
Madan Jampani15b8ef52016-02-02 17:35:05 -0800168 public void deactivate() {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700169 clusterService.removeListener(clusterListener);
170 upgradeService.removeListener(upgradeListener);
Madan Jampani33547452016-02-29 16:45:04 -0800171 metadataService.removeListener(metadataListener);
Madan Jampani15b8ef52016-02-02 17:35:05 -0800172 eventDispatcher.removeSink(PartitionEvent.class);
173
Jordan Halterman980a8c12017-09-22 18:01:19 -0700174 CompletableFuture<Void> closeFuture = CompletableFuture.allOf(
175 CompletableFuture.allOf(inactivePartitions.values().stream()
176 .map(StoragePartition::close)
177 .toArray(CompletableFuture[]::new)),
178 CompletableFuture.allOf(activePartitions.values().stream()
179 .map(StoragePartition::close)
180 .toArray(CompletableFuture[]::new)));
Madan Jampani15b8ef52016-02-02 17:35:05 -0800181 closeFuture.join();
182 log.info("Stopped");
183 }
184
185 @Override
Madan Jampani15b8ef52016-02-02 17:35:05 -0800186 public int getNumberOfPartitions() {
Heedo Kang4a47a302016-02-29 17:40:23 +0900187 checkPermission(PARTITION_READ);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700188 return activePartitions.size();
Madan Jampani15b8ef52016-02-02 17:35:05 -0800189 }
190
191 @Override
192 public Set<PartitionId> getAllPartitionIds() {
Heedo Kang4a47a302016-02-29 17:40:23 +0900193 checkPermission(PARTITION_READ);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700194 return activePartitions.keySet();
Madan Jampani15b8ef52016-02-02 17:35:05 -0800195 }
196
197 @Override
198 public DistributedPrimitiveCreator getDistributedPrimitiveCreator(PartitionId partitionId) {
Heedo Kang4a47a302016-02-29 17:40:23 +0900199 checkPermission(PARTITION_READ);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700200 return activePartitions.get(partitionId).client();
Madan Jampani15b8ef52016-02-02 17:35:05 -0800201 }
202
203 @Override
204 public Set<NodeId> getConfiguredMembers(PartitionId partitionId) {
Heedo Kang4a47a302016-02-29 17:40:23 +0900205 checkPermission(PARTITION_READ);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700206 StoragePartition partition = activePartitions.get(partitionId);
Madan Jampani15b8ef52016-02-02 17:35:05 -0800207 return ImmutableSet.copyOf(partition.getMembers());
208 }
209
210 @Override
211 public Set<NodeId> getActiveMembersMembers(PartitionId partitionId) {
Heedo Kang4a47a302016-02-29 17:40:23 +0900212 checkPermission(PARTITION_READ);
Madan Jampani15b8ef52016-02-02 17:35:05 -0800213 // TODO: This needs to query metadata to determine currently active
214 // members of partition
215 return getConfiguredMembers(partitionId);
216 }
Madan Jampanie14a09c2016-02-11 10:43:21 -0800217
218 @Override
219 public List<PartitionInfo> partitionInfo() {
Jordan Halterman980a8c12017-09-22 18:01:19 -0700220 return activePartitions.values()
Madan Jampanie14a09c2016-02-11 10:43:21 -0800221 .stream()
Sho SHIMIZU5fab6e52016-02-15 11:54:15 -0800222 .flatMap(x -> Tools.stream(x.info()))
Madan Jampanie14a09c2016-02-11 10:43:21 -0800223 .collect(Collectors.toList());
224 }
Madan Jampani33547452016-02-29 16:45:04 -0800225
Jordan Halterman07f052b2017-10-08 14:22:41 -0700226 /**
227 * Returns a list of nodes sorted by time ordered oldest to newest.
228 *
229 * @return a list of nodes sorted by time
230 */
231 private List<NodeId> getLocalNodes() {
232 return membershipService.getLocalGroup()
233 .members()
234 .stream()
235 .map(Member::nodeId)
236 .collect(Collectors.toList());
237 }
238
239 /**
240 * Computes an initial forked partition from the given source partition.
241 *
242 * @param sourcePartition the source partition from which to compute the partition
243 * @param targetVersion the target partition version
244 * @param members the set of members available to the partition
245 * @return the computed forked partition
246 */
247 protected static Partition computeInitialPartition(
248 Partition sourcePartition,
249 Version targetVersion,
250 List<NodeId> members) {
251 return computePartition(sourcePartition, targetVersion, members, 1);
252 }
253
254 /**
255 * Computes a final forked partition from the given source partition.
256 *
257 * @param sourcePartition the source partition from which to compute the partition
258 * @param targetVersion the target partition version
259 * @param members the set of members available to the partition
260 * @return the computed forked partition
261 */
262 protected static Partition computeFinalPartition(
263 Partition sourcePartition,
264 Version targetVersion,
265 List<NodeId> members) {
266 return computePartition(sourcePartition, targetVersion, members, 0);
267 }
268
269 /**
270 * Computes a forked partition from the given source partition.
271 *
272 * @param sourcePartition the source partition from which to compute the partition
273 * @param targetVersion the target partition version
274 * @param members the set of members available to the partition
275 * @param delta the number of additional members to preserve outside the partition
276 * @return the computed forked partition
277 */
278 private static Partition computePartition(
279 Partition sourcePartition,
280 Version targetVersion,
281 List<NodeId> members,
282 int delta) {
283 // Create a collection of members of the forked/isolated partition. Initial membership
284 // will include up to n upgraded nodes until all n nodes in the partition have been upgraded.
285 List<NodeId> sortedMembers = members.stream()
286 .sorted()
287 .collect(Collectors.toList());
288
289 // Create a list of members of the partition that have been upgraded according to the
290 // version isolated cluster membership.
291 List<NodeId> partitionMembers = sortedMembers.stream()
292 .filter(nodeId -> sourcePartition.getMembers().contains(nodeId))
293 .collect(Collectors.toList());
294
295 // If additional members need to be added to the partition to make up a full member list,
296 // add members in sorted order to create deterministic rebalancing of nodes.
297 int totalMembers = sourcePartition.getMembers().size() + delta;
298 if (partitionMembers.size() < totalMembers) {
299 for (int i = partitionMembers.size(); i < totalMembers; i++) {
300 Optional<NodeId> nextMember = sortedMembers.stream()
301 .filter(nodeId -> !partitionMembers.contains(nodeId))
302 .findFirst();
303 if (nextMember.isPresent()) {
304 partitionMembers.add(nextMember.get());
305 } else {
306 break;
307 }
308 }
309 }
310
311 return new DefaultPartition(
312 sourcePartition.getId(),
313 targetVersion,
314 partitionMembers);
315 }
316
317 private void processInstanceReady(NodeId nodeId) {
318 if (upgradeService.isUpgrading() && upgradeService.isLocalUpgraded()) {
319 currentClusterMetadata.get()
320 .getPartitions()
321 .forEach(partition -> {
322 StoragePartition activePartition = activePartitions.get(partition.getId());
323 if (activePartition != null) {
324 Partition newPartition = computeFinalPartition(
325 partition,
326 upgradeService.getState().target(),
327 getLocalNodes());
328 log.info("Updating storage partition {}: {}", partition, newPartition);
329 activePartition.onUpdate(newPartition);
330 }
331 });
332 }
333 }
334
335 private void processUpgradeComplete(Upgrade upgrade) {
336 if (!inactivePartitions.isEmpty()) {
337 List<CompletableFuture<Void>> futures = inactivePartitions.values()
338 .stream()
339 .map(StoragePartition::delete)
340 .collect(Collectors.toList());
341 CompletableFuture.allOf(futures.toArray(new CompletableFuture[futures.size()])).thenRun(() -> {
342 try {
343 Files.delete(new File(InactiveStoragePartition.INACTIVE_DIR).toPath());
344 } catch (IOException e) {
345 log.error("Failed to delete partition archive");
346 }
347 });
348 inactivePartitions.clear();
349 }
350 }
351
Madan Jampani33547452016-02-29 16:45:04 -0800352 private void processMetadataUpdate(ClusterMetadata clusterMetadata) {
353 ClusterMetadataDiff diffExaminer =
354 new ClusterMetadataDiff(currentClusterMetadata.get(), clusterMetadata);
355 diffExaminer.partitionDiffs()
356 .values()
357 .stream()
Madan Jampani33547452016-02-29 16:45:04 -0800358 .filter(PartitionDiff::hasChanged)
Jordan Halterman980a8c12017-09-22 18:01:19 -0700359 .forEach(diff -> activePartitions.get(diff.partitionId()).onUpdate(diff.newValue()));
jiangrui9e956a52017-11-07 10:33:24 +0800360 currentClusterMetadata.set(clusterMetadata);
Madan Jampani33547452016-02-29 16:45:04 -0800361 }
362
Jordan Halterman07f052b2017-10-08 14:22:41 -0700363 private class InternalClusterEventListener implements ClusterEventListener {
364 @Override
365 public void event(ClusterEvent event) {
366 if (event.type() == ClusterEvent.Type.INSTANCE_READY) {
367 processInstanceReady(event.subject().id());
368 }
369 }
370 }
371
372 private class InternalUpgradeEventListener implements UpgradeEventListener {
373 @Override
374 public void event(UpgradeEvent event) {
375 if (event.type() == UpgradeEvent.Type.COMMITTED) {
376 processUpgradeComplete(event.subject());
377 }
378 }
379 }
380
Madan Jampani33547452016-02-29 16:45:04 -0800381 private class InternalClusterMetadataListener implements ClusterMetadataEventListener {
382 @Override
383 public void event(ClusterMetadataEvent event) {
384 processMetadataUpdate(event.subject());
385 }
386 }
Madan Jampaniccdf9da2016-05-05 14:37:27 -0700387
388 @Override
389 public List<PartitionClientInfo> partitionClientInfo() {
Jordan Halterman980a8c12017-09-22 18:01:19 -0700390 return activePartitions.values()
Madan Jampaniccdf9da2016-05-05 14:37:27 -0700391 .stream()
392 .map(StoragePartition::client)
393 .map(StoragePartitionClient::clientInfo)
394 .collect(Collectors.toList());
395 }
Madan Jampani2f9cc712016-02-15 19:36:21 -0800396}