blob: eb48f668a2133a28013303b2b92fa3f7a9627544 [file] [log] [blame]
Madan Jampani15b8ef52016-02-02 17:35:05 -08001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2016-present Open Networking Foundation
Madan Jampani15b8ef52016-02-02 17:35:05 -08003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.store.primitives.impl;
17
Madan Jampani15b8ef52016-02-02 17:35:05 -080018import java.io.File;
Jordan Halterman980a8c12017-09-22 18:01:19 -070019import java.io.IOException;
Jordan Halterman07f052b2017-10-08 14:22:41 -070020import java.nio.file.FileVisitResult;
Jordan Halterman980a8c12017-09-22 18:01:19 -070021import java.nio.file.Files;
Jordan Halterman07f052b2017-10-08 14:22:41 -070022import java.nio.file.Path;
23import java.nio.file.SimpleFileVisitor;
24import java.nio.file.attribute.BasicFileAttributes;
Jordan Halterman19201232017-09-12 17:20:26 -070025import java.time.Duration;
Madan Jampani15b8ef52016-02-02 17:35:05 -080026import java.util.Collection;
Madan Jampani15b8ef52016-02-02 17:35:05 -080027import java.util.concurrent.CompletableFuture;
Jordan Halterman07f052b2017-10-08 14:22:41 -070028import java.util.stream.Collectors;
Madan Jampani15b8ef52016-02-02 17:35:05 -080029
Jordan Halterman2bf177c2017-06-29 01:49:08 -070030import io.atomix.protocols.raft.RaftServer;
31import io.atomix.protocols.raft.cluster.MemberId;
Jordan Halterman2bf177c2017-06-29 01:49:08 -070032import io.atomix.protocols.raft.storage.RaftStorage;
33import io.atomix.storage.StorageLevel;
Jordan Halterman07f052b2017-10-08 14:22:41 -070034import org.onosproject.cluster.Partition;
Jordan Halterman28183ee2017-10-17 17:29:10 -070035import org.onosproject.store.cluster.messaging.ClusterCommunicationService;
Jordan Halterman2bf177c2017-06-29 01:49:08 -070036import org.onosproject.store.primitives.resources.impl.AtomixSerializerAdapter;
Madan Jampanie14a09c2016-02-11 10:43:21 -080037import org.onosproject.store.service.PartitionInfo;
Jordan Halterman2bf177c2017-06-29 01:49:08 -070038import org.onosproject.store.service.Serializer;
Madan Jampanifc981772016-02-16 09:46:42 -080039import org.slf4j.Logger;
Madan Jampani15b8ef52016-02-02 17:35:05 -080040
Jordan Halterman2bf177c2017-06-29 01:49:08 -070041import static org.slf4j.LoggerFactory.getLogger;
42
Madan Jampani15b8ef52016-02-02 17:35:05 -080043/**
44 * {@link StoragePartition} server.
45 */
46public class StoragePartitionServer implements Managed<StoragePartitionServer> {
47
Madan Jampanifc981772016-02-16 09:46:42 -080048 private final Logger log = getLogger(getClass());
49
Jordan Halterman035231e2017-07-18 08:39:07 -070050 private static final int MAX_SEGMENT_SIZE = 1024 * 1024 * 64;
Jordan Halterman19201232017-09-12 17:20:26 -070051 private static final long ELECTION_TIMEOUT_MILLIS = 2500;
Jordan Halterman21249352018-01-23 12:35:09 -080052 private static final int ELECTION_THRESHOLD = 5;
53 private static final long HEARTBEAT_INTERVAL_MILLIS = 500;
Jordan Halterman19201232017-09-12 17:20:26 -070054
Jordan Halterman2bf177c2017-06-29 01:49:08 -070055 private final MemberId localMemberId;
Madan Jampani15b8ef52016-02-02 17:35:05 -080056 private final StoragePartition partition;
Jordan Halterman28183ee2017-10-17 17:29:10 -070057 private final ClusterCommunicationService clusterCommunicator;
Jordan Halterman2bf177c2017-06-29 01:49:08 -070058 private RaftServer server;
Madan Jampani15b8ef52016-02-02 17:35:05 -080059
Jordan Halterman2bf177c2017-06-29 01:49:08 -070060 public StoragePartitionServer(
Madan Jampani15b8ef52016-02-02 17:35:05 -080061 StoragePartition partition,
Jordan Halterman2bf177c2017-06-29 01:49:08 -070062 MemberId localMemberId,
Jordan Halterman28183ee2017-10-17 17:29:10 -070063 ClusterCommunicationService clusterCommunicator) {
Madan Jampani15b8ef52016-02-02 17:35:05 -080064 this.partition = partition;
Jordan Halterman2bf177c2017-06-29 01:49:08 -070065 this.localMemberId = localMemberId;
Jordan Halterman980a8c12017-09-22 18:01:19 -070066 this.clusterCommunicator = clusterCommunicator;
Madan Jampani15b8ef52016-02-02 17:35:05 -080067 }
68
69 @Override
70 public CompletableFuture<Void> open() {
Jordan Halterman980a8c12017-09-22 18:01:19 -070071 log.info("Starting server for partition {} ({})", partition.getId(), partition.getVersion());
Jordan Halterman2bf177c2017-06-29 01:49:08 -070072 CompletableFuture<RaftServer> serverOpenFuture;
73 if (partition.getMemberIds().contains(localMemberId)) {
Madan Jampani65f24bb2016-03-15 15:16:18 -070074 if (server != null && server.isRunning()) {
Madan Jampani15b8ef52016-02-02 17:35:05 -080075 return CompletableFuture.completedFuture(null);
76 }
77 synchronized (this) {
Madan Jampani630e7ac2016-05-31 11:34:05 -070078 server = buildServer();
Madan Jampani15b8ef52016-02-02 17:35:05 -080079 }
Jordan Halterman2bf177c2017-06-29 01:49:08 -070080 serverOpenFuture = server.bootstrap(partition.getMemberIds());
Madan Jampani15b8ef52016-02-02 17:35:05 -080081 } else {
82 serverOpenFuture = CompletableFuture.completedFuture(null);
83 }
Madan Jampanifc981772016-02-16 09:46:42 -080084 return serverOpenFuture.whenComplete((r, e) -> {
85 if (e == null) {
Jordan Halterman980a8c12017-09-22 18:01:19 -070086 log.info("Successfully started server for partition {} ({})",
87 partition.getId(), partition.getVersion());
Madan Jampanifc981772016-02-16 09:46:42 -080088 } else {
Jordan Halterman980a8c12017-09-22 18:01:19 -070089 log.info("Failed to start server for partition {} ({})",
90 partition.getId(), partition.getVersion(), e);
Madan Jampanifc981772016-02-16 09:46:42 -080091 }
92 }).thenApply(v -> null);
Madan Jampani15b8ef52016-02-02 17:35:05 -080093 }
94
95 @Override
96 public CompletableFuture<Void> close() {
Madan Jampani630e7ac2016-05-31 11:34:05 -070097 return server.shutdown();
Madan Jampani15b8ef52016-02-02 17:35:05 -080098 }
99
Madan Jampani33547452016-02-29 16:45:04 -0800100 /**
101 * Closes the server and exits the partition.
102 * @return future that is completed when the operation is complete
103 */
104 public CompletableFuture<Void> closeAndExit() {
Madan Jampani630e7ac2016-05-31 11:34:05 -0700105 return server.leave();
Madan Jampani33547452016-02-29 16:45:04 -0800106 }
107
Jordan Halterman980a8c12017-09-22 18:01:19 -0700108 /**
Jordan Halterman07f052b2017-10-08 14:22:41 -0700109 * Deletes the server.
110 */
111 public void delete() {
112 try {
113 Files.walkFileTree(partition.getDataFolder().toPath(), new SimpleFileVisitor<Path>() {
114 @Override
115 public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
116 Files.delete(file);
117 return FileVisitResult.CONTINUE;
118 }
119 @Override
120 public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
121 Files.delete(dir);
122 return FileVisitResult.CONTINUE;
123 }
124 });
125 } catch (IOException e) {
126 log.error("Failed to delete partition: {}", e);
127 }
128 }
129
130 /**
Jordan Halterman980a8c12017-09-22 18:01:19 -0700131 * Forks the existing partition into a new partition.
132 *
Jordan Halterman07f052b2017-10-08 14:22:41 -0700133 * @param fromPartition the partition from which to fork the server
Jordan Halterman980a8c12017-09-22 18:01:19 -0700134 * @return future to be completed once the fork operation is complete
135 */
Jordan Halterman07f052b2017-10-08 14:22:41 -0700136 public CompletableFuture<Void> fork(Partition fromPartition) {
137 log.info("Forking server for partition {} ({}->{})",
138 partition.getId(), fromPartition.getVersion(), partition.getVersion());
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700139 RaftServer.Builder builder = RaftServer.newBuilder(localMemberId)
Jordan Halterman07f052b2017-10-08 14:22:41 -0700140 .withName(String.format("partition-%s", fromPartition.getId()))
Jordan Halterman980a8c12017-09-22 18:01:19 -0700141 .withProtocol(new RaftServerCommunicator(
Jordan Halterman07f052b2017-10-08 14:22:41 -0700142 String.format("partition-%s-%s", fromPartition.getId(), fromPartition.getVersion()),
Jordan Halterman980a8c12017-09-22 18:01:19 -0700143 Serializer.using(StorageNamespaces.RAFT_PROTOCOL),
144 clusterCommunicator))
Jordan Halterman19201232017-09-12 17:20:26 -0700145 .withElectionTimeout(Duration.ofMillis(ELECTION_TIMEOUT_MILLIS))
146 .withHeartbeatInterval(Duration.ofMillis(HEARTBEAT_INTERVAL_MILLIS))
Jordan Halterman21249352018-01-23 12:35:09 -0800147 .withElectionThreshold(ELECTION_THRESHOLD)
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700148 .withStorage(RaftStorage.newBuilder()
Jordan Halterman07f052b2017-10-08 14:22:41 -0700149 .withPrefix(String.format("partition-%s", partition.getId()))
Jordan Halterman21249352018-01-23 12:35:09 -0800150 .withStorageLevel(StorageLevel.DISK)
151 .withFlushOnCommit()
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700152 .withSerializer(new AtomixSerializerAdapter(Serializer.using(StorageNamespaces.RAFT_STORAGE)))
Jordan Halterman980a8c12017-09-22 18:01:19 -0700153 .withDirectory(partition.getDataFolder())
154 .withMaxSegmentSize(MAX_SEGMENT_SIZE)
155 .build());
156 StoragePartition.RAFT_SERVICES.forEach(builder::addService);
157 RaftServer server = builder.build();
Jordan Halterman07f052b2017-10-08 14:22:41 -0700158
159 // Create a collection of members currently in the source partition.
160 Collection<MemberId> members = fromPartition.getMembers()
161 .stream()
162 .map(id -> MemberId.from(id.id()))
163 .collect(Collectors.toList());
164
165 // If this node is a member of the partition, join the partition. Otherwise, listen to the partition.
166 CompletableFuture<RaftServer> future = members.contains(localMemberId)
167 ? server.bootstrap(members) : server.listen(members);
168
169 // TODO: We should leave the cluster for nodes that aren't normally members to ensure the source
170 // cluster's configuration is kept consistent for rolling back upgrades, but Atomix deletes configuration
171 // files when a node leaves the cluster so we can't do that here.
172 return future.thenCompose(v -> server.shutdown())
Jordan Halterman980a8c12017-09-22 18:01:19 -0700173 .thenCompose(v -> {
174 // Delete the cluster configuration file from the forked partition.
175 try {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700176 Files.delete(new File(
177 partition.getDataFolder(),
178 String.format("partition-%s.conf", partition.getId())).toPath());
Jordan Halterman980a8c12017-09-22 18:01:19 -0700179 } catch (IOException e) {
180 log.error("Failed to delete partition configuration: {}", e);
181 }
182
183 // Build and bootstrap a new server.
184 this.server = buildServer();
185 return this.server.bootstrap();
186 }).whenComplete((r, e) -> {
187 if (e == null) {
188 log.info("Successfully forked server for partition {} ({}->{})",
Jordan Halterman07f052b2017-10-08 14:22:41 -0700189 partition.getId(), fromPartition.getVersion(), partition.getVersion());
Jordan Halterman980a8c12017-09-22 18:01:19 -0700190 } else {
191 log.info("Failed to fork server for partition {} ({}->{})",
Jordan Halterman07f052b2017-10-08 14:22:41 -0700192 partition.getId(), fromPartition.getVersion(), partition.getVersion(), e);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700193 }
194 }).thenApply(v -> null);
195 }
196
197 private RaftServer buildServer() {
198 RaftServer.Builder builder = RaftServer.newBuilder(localMemberId)
Jordan Halterman07f052b2017-10-08 14:22:41 -0700199 .withName(String.format("partition-%s", partition.getId()))
Jordan Halterman980a8c12017-09-22 18:01:19 -0700200 .withProtocol(new RaftServerCommunicator(
Jordan Halterman07f052b2017-10-08 14:22:41 -0700201 String.format("partition-%s-%s", partition.getId(), partition.getVersion()),
Jordan Halterman980a8c12017-09-22 18:01:19 -0700202 Serializer.using(StorageNamespaces.RAFT_PROTOCOL),
203 clusterCommunicator))
204 .withElectionTimeout(Duration.ofMillis(ELECTION_TIMEOUT_MILLIS))
205 .withHeartbeatInterval(Duration.ofMillis(HEARTBEAT_INTERVAL_MILLIS))
Jordan Halterman21249352018-01-23 12:35:09 -0800206 .withElectionThreshold(ELECTION_THRESHOLD)
Jordan Halterman980a8c12017-09-22 18:01:19 -0700207 .withStorage(RaftStorage.newBuilder()
Jordan Halterman07f052b2017-10-08 14:22:41 -0700208 .withPrefix(String.format("partition-%s", partition.getId()))
Jordan Halterman4ce65e82018-02-15 18:09:38 -0800209 .withStorageLevel(StorageLevel.MAPPED)
210 .withFlushOnCommit(false)
Jordan Halterman980a8c12017-09-22 18:01:19 -0700211 .withSerializer(new AtomixSerializerAdapter(Serializer.using(StorageNamespaces.RAFT_STORAGE)))
212 .withDirectory(partition.getDataFolder())
Jordan Halterman035231e2017-07-18 08:39:07 -0700213 .withMaxSegmentSize(MAX_SEGMENT_SIZE)
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700214 .build());
215 StoragePartition.RAFT_SERVICES.forEach(builder::addService);
216 return builder.build();
Madan Jampani15b8ef52016-02-02 17:35:05 -0800217 }
218
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700219 public CompletableFuture<Void> join(Collection<MemberId> otherMembers) {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700220 log.info("Joining partition {} ({})", partition.getId(), partition.getName());
Madan Jampani630e7ac2016-05-31 11:34:05 -0700221 server = buildServer();
222 return server.join(otherMembers).whenComplete((r, e) -> {
Madan Jampanif172d402016-03-04 00:56:38 -0800223 if (e == null) {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700224 log.info("Successfully joined partition {} ({})", partition.getId(), partition.getName());
Madan Jampanif172d402016-03-04 00:56:38 -0800225 } else {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700226 log.info("Failed to join partition {} ({})", partition.getId(), partition.getName(), e);
Madan Jampanif172d402016-03-04 00:56:38 -0800227 }
228 }).thenApply(v -> null);
229 }
230
Madan Jampani15b8ef52016-02-02 17:35:05 -0800231 @Override
232 public boolean isOpen() {
Madan Jampani65f24bb2016-03-15 15:16:18 -0700233 return server.isRunning();
Madan Jampani15b8ef52016-02-02 17:35:05 -0800234 }
Madan Jampanie14a09c2016-02-11 10:43:21 -0800235
236 /**
237 * Returns the partition information.
238 * @return partition info
239 */
240 public PartitionInfo info() {
241 return new StoragePartitionDetails(partition.getId(),
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700242 server.cluster().getMembers(),
243 server.cluster().getMembers(),
244 server.cluster().getLeader(),
245 server.cluster().getTerm()).toPartitionInfo();
Madan Jampanie14a09c2016-02-11 10:43:21 -0800246 }
Madan Jampani15b8ef52016-02-02 17:35:05 -0800247}