blob: f2600e82c4d5fa5c3e95345dce202db21b5f5156 [file] [log] [blame]
Madan Jampani15b8ef52016-02-02 17:35:05 -08001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2016-present Open Networking Foundation
Madan Jampani15b8ef52016-02-02 17:35:05 -08003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.store.primitives.impl;
17
Madan Jampani15b8ef52016-02-02 17:35:05 -080018import java.io.File;
Jordan Halterman980a8c12017-09-22 18:01:19 -070019import java.io.IOException;
Jordan Halterman07f052b2017-10-08 14:22:41 -070020import java.nio.file.FileVisitResult;
Jordan Halterman980a8c12017-09-22 18:01:19 -070021import java.nio.file.Files;
Jordan Halterman07f052b2017-10-08 14:22:41 -070022import java.nio.file.Path;
23import java.nio.file.SimpleFileVisitor;
24import java.nio.file.attribute.BasicFileAttributes;
Jordan Halterman19201232017-09-12 17:20:26 -070025import java.time.Duration;
Madan Jampani15b8ef52016-02-02 17:35:05 -080026import java.util.Collection;
Madan Jampani15b8ef52016-02-02 17:35:05 -080027import java.util.concurrent.CompletableFuture;
Jordan Halterman07f052b2017-10-08 14:22:41 -070028import java.util.stream.Collectors;
Madan Jampani15b8ef52016-02-02 17:35:05 -080029
Jordan Halterman2bf177c2017-06-29 01:49:08 -070030import io.atomix.protocols.raft.RaftServer;
31import io.atomix.protocols.raft.cluster.MemberId;
Jordan Halterman2bf177c2017-06-29 01:49:08 -070032import io.atomix.protocols.raft.storage.RaftStorage;
33import io.atomix.storage.StorageLevel;
Jordan Halterman07f052b2017-10-08 14:22:41 -070034import org.onosproject.cluster.Partition;
Jordan Halterman28183ee2017-10-17 17:29:10 -070035import org.onosproject.store.cluster.messaging.ClusterCommunicationService;
Jordan Halterman2bf177c2017-06-29 01:49:08 -070036import org.onosproject.store.primitives.resources.impl.AtomixSerializerAdapter;
Madan Jampanie14a09c2016-02-11 10:43:21 -080037import org.onosproject.store.service.PartitionInfo;
Jordan Halterman2bf177c2017-06-29 01:49:08 -070038import org.onosproject.store.service.Serializer;
Madan Jampanifc981772016-02-16 09:46:42 -080039import org.slf4j.Logger;
Madan Jampani15b8ef52016-02-02 17:35:05 -080040
Jordan Halterman2bf177c2017-06-29 01:49:08 -070041import static org.slf4j.LoggerFactory.getLogger;
42
Madan Jampani15b8ef52016-02-02 17:35:05 -080043/**
44 * {@link StoragePartition} server.
45 */
46public class StoragePartitionServer implements Managed<StoragePartitionServer> {
47
Madan Jampanifc981772016-02-16 09:46:42 -080048 private final Logger log = getLogger(getClass());
49
Jordan Halterman153449c2018-04-05 18:31:26 -070050 private static final String ELECTION_TIMEOUT_MILLIS_PROPERTY = "onos.cluster.raft.electionTimeoutMillis";
51 private static final String ELECTION_THRESHOLD_PROPERTY = "onos.cluster.raft.electionFailureThreshold";
52 private static final String SESSION_THRESHOLD_PROPERTY = "onos.cluster.raft.sessionFailureThreshold";
53 private static final String HEARTBEAT_INTERVAL_MILLIS_PROPERTY = "onos.cluster.raft.heartbeatIntervalMillis";
54 private static final String MAX_SEGMENT_SIZE_PROPERTY = "onos.cluster.raft.storage.maxSegmentSize";
55 private static final String STORAGE_LEVEL_PROPERTY = "onos.cluster.raft.storage.level";
56 private static final String FLUSH_ON_COMMIT_PROPERTY = "onos.cluster.raft.storage.flushOnCommit";
57
58 private static final long ELECTION_TIMEOUT_MILLIS;
59 private static final int ELECTION_THRESHOLD;
60 private static final int SESSION_THRESHOLD;
61 private static final long HEARTBEAT_INTERVAL_MILLIS;
62 private static final int MAX_SEGMENT_SIZE;
63 private static final StorageLevel STORAGE_LEVEL;
64 private static final boolean FLUSH_ON_COMMIT;
65
66 private static final int DEFAULT_MAX_SEGMENT_SIZE = 1024 * 1024 * 64;
67 private static final long DEFAULT_ELECTION_TIMEOUT_MILLIS = 2500;
68 private static final int DEFAULT_ELECTION_THRESHOLD = 12;
69 private static final int DEFAULT_SESSION_THRESHOLD = 10;
70 private static final long DEFAULT_HEARTBEAT_INTERVAL_MILLIS = 500;
71 private static final StorageLevel DEFAULT_STORAGE_LEVEL = StorageLevel.MAPPED;
72 private static final boolean DEFAULT_FLUSH_ON_COMMIT = false;
73
74 static {
75 int maxSegmentSize;
76 try {
77 maxSegmentSize = Integer.parseInt(System.getProperty(
78 MAX_SEGMENT_SIZE_PROPERTY,
79 String.valueOf(DEFAULT_MAX_SEGMENT_SIZE)));
80 } catch (NumberFormatException e) {
81 maxSegmentSize = DEFAULT_MAX_SEGMENT_SIZE;
82 }
83 MAX_SEGMENT_SIZE = maxSegmentSize;
84
85 long electionTimeoutMillis;
86 try {
87 electionTimeoutMillis = Long.parseLong(System.getProperty(
88 ELECTION_TIMEOUT_MILLIS_PROPERTY,
89 String.valueOf(DEFAULT_ELECTION_TIMEOUT_MILLIS)));
90 } catch (NumberFormatException e) {
91 electionTimeoutMillis = DEFAULT_ELECTION_TIMEOUT_MILLIS;
92 }
93 ELECTION_TIMEOUT_MILLIS = electionTimeoutMillis;
94
95 int electionFailureThreshold;
96 try {
97 electionFailureThreshold = Integer.parseInt(System.getProperty(
98 ELECTION_THRESHOLD_PROPERTY,
99 String.valueOf(DEFAULT_ELECTION_THRESHOLD)));
100 } catch (NumberFormatException e) {
101 electionFailureThreshold = DEFAULT_ELECTION_THRESHOLD;
102 }
103 ELECTION_THRESHOLD = electionFailureThreshold;
104
105 int sessionFailureThreshold;
106 try {
107 sessionFailureThreshold = Integer.parseInt(System.getProperty(
108 SESSION_THRESHOLD_PROPERTY,
109 String.valueOf(DEFAULT_SESSION_THRESHOLD)));
110 } catch (NumberFormatException e) {
111 sessionFailureThreshold = DEFAULT_SESSION_THRESHOLD;
112 }
113 SESSION_THRESHOLD = sessionFailureThreshold;
114
115 long heartbeatIntervalMillis;
116 try {
117 heartbeatIntervalMillis = Long.parseLong(System.getProperty(
118 HEARTBEAT_INTERVAL_MILLIS_PROPERTY,
119 String.valueOf(DEFAULT_HEARTBEAT_INTERVAL_MILLIS)));
120 } catch (NumberFormatException e) {
121 heartbeatIntervalMillis = DEFAULT_HEARTBEAT_INTERVAL_MILLIS;
122 }
123 HEARTBEAT_INTERVAL_MILLIS = heartbeatIntervalMillis;
124
125 StorageLevel storageLevel;
126 try {
127 storageLevel = StorageLevel.valueOf(System.getProperty(
128 STORAGE_LEVEL_PROPERTY,
129 DEFAULT_STORAGE_LEVEL.name()).toUpperCase());
130 } catch (IllegalArgumentException e) {
131 storageLevel = DEFAULT_STORAGE_LEVEL;
132 }
133 STORAGE_LEVEL = storageLevel;
134
135 boolean flushOnCommit;
136 try {
137 flushOnCommit = Boolean.parseBoolean(System.getProperty(
138 FLUSH_ON_COMMIT_PROPERTY,
139 String.valueOf(DEFAULT_FLUSH_ON_COMMIT)));
140 } catch (Exception e) {
141 flushOnCommit = DEFAULT_FLUSH_ON_COMMIT;
142 }
143 FLUSH_ON_COMMIT = flushOnCommit;
144 }
Jordan Halterman19201232017-09-12 17:20:26 -0700145
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700146 private final MemberId localMemberId;
Madan Jampani15b8ef52016-02-02 17:35:05 -0800147 private final StoragePartition partition;
Jordan Halterman28183ee2017-10-17 17:29:10 -0700148 private final ClusterCommunicationService clusterCommunicator;
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700149 private RaftServer server;
Madan Jampani15b8ef52016-02-02 17:35:05 -0800150
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700151 public StoragePartitionServer(
Madan Jampani15b8ef52016-02-02 17:35:05 -0800152 StoragePartition partition,
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700153 MemberId localMemberId,
Jordan Halterman28183ee2017-10-17 17:29:10 -0700154 ClusterCommunicationService clusterCommunicator) {
Madan Jampani15b8ef52016-02-02 17:35:05 -0800155 this.partition = partition;
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700156 this.localMemberId = localMemberId;
Jordan Halterman980a8c12017-09-22 18:01:19 -0700157 this.clusterCommunicator = clusterCommunicator;
Madan Jampani15b8ef52016-02-02 17:35:05 -0800158 }
159
160 @Override
161 public CompletableFuture<Void> open() {
Jordan Halterman980a8c12017-09-22 18:01:19 -0700162 log.info("Starting server for partition {} ({})", partition.getId(), partition.getVersion());
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700163 CompletableFuture<RaftServer> serverOpenFuture;
164 if (partition.getMemberIds().contains(localMemberId)) {
Madan Jampani65f24bb2016-03-15 15:16:18 -0700165 if (server != null && server.isRunning()) {
Madan Jampani15b8ef52016-02-02 17:35:05 -0800166 return CompletableFuture.completedFuture(null);
167 }
168 synchronized (this) {
Madan Jampani630e7ac2016-05-31 11:34:05 -0700169 server = buildServer();
Madan Jampani15b8ef52016-02-02 17:35:05 -0800170 }
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700171 serverOpenFuture = server.bootstrap(partition.getMemberIds());
Madan Jampani15b8ef52016-02-02 17:35:05 -0800172 } else {
173 serverOpenFuture = CompletableFuture.completedFuture(null);
174 }
Madan Jampanifc981772016-02-16 09:46:42 -0800175 return serverOpenFuture.whenComplete((r, e) -> {
176 if (e == null) {
Jordan Halterman980a8c12017-09-22 18:01:19 -0700177 log.info("Successfully started server for partition {} ({})",
178 partition.getId(), partition.getVersion());
Madan Jampanifc981772016-02-16 09:46:42 -0800179 } else {
Jordan Halterman980a8c12017-09-22 18:01:19 -0700180 log.info("Failed to start server for partition {} ({})",
181 partition.getId(), partition.getVersion(), e);
Madan Jampanifc981772016-02-16 09:46:42 -0800182 }
183 }).thenApply(v -> null);
Madan Jampani15b8ef52016-02-02 17:35:05 -0800184 }
185
186 @Override
187 public CompletableFuture<Void> close() {
Madan Jampani630e7ac2016-05-31 11:34:05 -0700188 return server.shutdown();
Madan Jampani15b8ef52016-02-02 17:35:05 -0800189 }
190
Madan Jampani33547452016-02-29 16:45:04 -0800191 /**
192 * Closes the server and exits the partition.
193 * @return future that is completed when the operation is complete
194 */
195 public CompletableFuture<Void> closeAndExit() {
Madan Jampani630e7ac2016-05-31 11:34:05 -0700196 return server.leave();
Madan Jampani33547452016-02-29 16:45:04 -0800197 }
198
Jordan Halterman980a8c12017-09-22 18:01:19 -0700199 /**
Jordan Halterman07f052b2017-10-08 14:22:41 -0700200 * Deletes the server.
201 */
202 public void delete() {
203 try {
204 Files.walkFileTree(partition.getDataFolder().toPath(), new SimpleFileVisitor<Path>() {
205 @Override
206 public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
207 Files.delete(file);
208 return FileVisitResult.CONTINUE;
209 }
210 @Override
211 public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
212 Files.delete(dir);
213 return FileVisitResult.CONTINUE;
214 }
215 });
216 } catch (IOException e) {
217 log.error("Failed to delete partition: {}", e);
218 }
219 }
220
221 /**
Jordan Halterman980a8c12017-09-22 18:01:19 -0700222 * Forks the existing partition into a new partition.
223 *
Jordan Halterman07f052b2017-10-08 14:22:41 -0700224 * @param fromPartition the partition from which to fork the server
Jordan Halterman980a8c12017-09-22 18:01:19 -0700225 * @return future to be completed once the fork operation is complete
226 */
Jordan Halterman07f052b2017-10-08 14:22:41 -0700227 public CompletableFuture<Void> fork(Partition fromPartition) {
228 log.info("Forking server for partition {} ({}->{})",
229 partition.getId(), fromPartition.getVersion(), partition.getVersion());
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700230 RaftServer.Builder builder = RaftServer.newBuilder(localMemberId)
Jordan Halterman07f052b2017-10-08 14:22:41 -0700231 .withName(String.format("partition-%s", fromPartition.getId()))
Jordan Halterman980a8c12017-09-22 18:01:19 -0700232 .withProtocol(new RaftServerCommunicator(
Jordan Halterman07f052b2017-10-08 14:22:41 -0700233 String.format("partition-%s-%s", fromPartition.getId(), fromPartition.getVersion()),
Jordan Halterman980a8c12017-09-22 18:01:19 -0700234 Serializer.using(StorageNamespaces.RAFT_PROTOCOL),
235 clusterCommunicator))
Jordan Halterman19201232017-09-12 17:20:26 -0700236 .withElectionTimeout(Duration.ofMillis(ELECTION_TIMEOUT_MILLIS))
237 .withHeartbeatInterval(Duration.ofMillis(HEARTBEAT_INTERVAL_MILLIS))
Jordan Halterman21249352018-01-23 12:35:09 -0800238 .withElectionThreshold(ELECTION_THRESHOLD)
Jordan Halterman153449c2018-04-05 18:31:26 -0700239 .withSessionFailureThreshold(SESSION_THRESHOLD)
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700240 .withStorage(RaftStorage.newBuilder()
Jordan Halterman07f052b2017-10-08 14:22:41 -0700241 .withPrefix(String.format("partition-%s", partition.getId()))
Jordan Halterman153449c2018-04-05 18:31:26 -0700242 .withStorageLevel(STORAGE_LEVEL)
243 .withFlushOnCommit(FLUSH_ON_COMMIT)
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700244 .withSerializer(new AtomixSerializerAdapter(Serializer.using(StorageNamespaces.RAFT_STORAGE)))
Jordan Halterman980a8c12017-09-22 18:01:19 -0700245 .withDirectory(partition.getDataFolder())
246 .withMaxSegmentSize(MAX_SEGMENT_SIZE)
247 .build());
248 StoragePartition.RAFT_SERVICES.forEach(builder::addService);
249 RaftServer server = builder.build();
Jordan Halterman07f052b2017-10-08 14:22:41 -0700250
251 // Create a collection of members currently in the source partition.
252 Collection<MemberId> members = fromPartition.getMembers()
253 .stream()
254 .map(id -> MemberId.from(id.id()))
255 .collect(Collectors.toList());
256
257 // If this node is a member of the partition, join the partition. Otherwise, listen to the partition.
258 CompletableFuture<RaftServer> future = members.contains(localMemberId)
259 ? server.bootstrap(members) : server.listen(members);
260
261 // TODO: We should leave the cluster for nodes that aren't normally members to ensure the source
262 // cluster's configuration is kept consistent for rolling back upgrades, but Atomix deletes configuration
263 // files when a node leaves the cluster so we can't do that here.
264 return future.thenCompose(v -> server.shutdown())
Jordan Halterman980a8c12017-09-22 18:01:19 -0700265 .thenCompose(v -> {
266 // Delete the cluster configuration file from the forked partition.
267 try {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700268 Files.delete(new File(
269 partition.getDataFolder(),
270 String.format("partition-%s.conf", partition.getId())).toPath());
Jordan Halterman980a8c12017-09-22 18:01:19 -0700271 } catch (IOException e) {
272 log.error("Failed to delete partition configuration: {}", e);
273 }
274
275 // Build and bootstrap a new server.
276 this.server = buildServer();
277 return this.server.bootstrap();
278 }).whenComplete((r, e) -> {
279 if (e == null) {
280 log.info("Successfully forked server for partition {} ({}->{})",
Jordan Halterman07f052b2017-10-08 14:22:41 -0700281 partition.getId(), fromPartition.getVersion(), partition.getVersion());
Jordan Halterman980a8c12017-09-22 18:01:19 -0700282 } else {
283 log.info("Failed to fork server for partition {} ({}->{})",
Jordan Halterman07f052b2017-10-08 14:22:41 -0700284 partition.getId(), fromPartition.getVersion(), partition.getVersion(), e);
Jordan Halterman980a8c12017-09-22 18:01:19 -0700285 }
286 }).thenApply(v -> null);
287 }
288
289 private RaftServer buildServer() {
290 RaftServer.Builder builder = RaftServer.newBuilder(localMemberId)
Jordan Halterman07f052b2017-10-08 14:22:41 -0700291 .withName(String.format("partition-%s", partition.getId()))
Jordan Halterman980a8c12017-09-22 18:01:19 -0700292 .withProtocol(new RaftServerCommunicator(
Jordan Halterman07f052b2017-10-08 14:22:41 -0700293 String.format("partition-%s-%s", partition.getId(), partition.getVersion()),
Jordan Halterman980a8c12017-09-22 18:01:19 -0700294 Serializer.using(StorageNamespaces.RAFT_PROTOCOL),
295 clusterCommunicator))
296 .withElectionTimeout(Duration.ofMillis(ELECTION_TIMEOUT_MILLIS))
297 .withHeartbeatInterval(Duration.ofMillis(HEARTBEAT_INTERVAL_MILLIS))
Jordan Halterman21249352018-01-23 12:35:09 -0800298 .withElectionThreshold(ELECTION_THRESHOLD)
Jordan Halterman153449c2018-04-05 18:31:26 -0700299 .withSessionFailureThreshold(SESSION_THRESHOLD)
Jordan Halterman980a8c12017-09-22 18:01:19 -0700300 .withStorage(RaftStorage.newBuilder()
Jordan Halterman07f052b2017-10-08 14:22:41 -0700301 .withPrefix(String.format("partition-%s", partition.getId()))
Jordan Halterman153449c2018-04-05 18:31:26 -0700302 .withStorageLevel(STORAGE_LEVEL)
303 .withFlushOnCommit(FLUSH_ON_COMMIT)
Jordan Halterman980a8c12017-09-22 18:01:19 -0700304 .withSerializer(new AtomixSerializerAdapter(Serializer.using(StorageNamespaces.RAFT_STORAGE)))
305 .withDirectory(partition.getDataFolder())
Jordan Halterman035231e2017-07-18 08:39:07 -0700306 .withMaxSegmentSize(MAX_SEGMENT_SIZE)
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700307 .build());
308 StoragePartition.RAFT_SERVICES.forEach(builder::addService);
309 return builder.build();
Madan Jampani15b8ef52016-02-02 17:35:05 -0800310 }
311
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700312 public CompletableFuture<Void> join(Collection<MemberId> otherMembers) {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700313 log.info("Joining partition {} ({})", partition.getId(), partition.getName());
Madan Jampani630e7ac2016-05-31 11:34:05 -0700314 server = buildServer();
315 return server.join(otherMembers).whenComplete((r, e) -> {
Madan Jampanif172d402016-03-04 00:56:38 -0800316 if (e == null) {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700317 log.info("Successfully joined partition {} ({})", partition.getId(), partition.getName());
Madan Jampanif172d402016-03-04 00:56:38 -0800318 } else {
Jordan Halterman07f052b2017-10-08 14:22:41 -0700319 log.info("Failed to join partition {} ({})", partition.getId(), partition.getName(), e);
Madan Jampanif172d402016-03-04 00:56:38 -0800320 }
321 }).thenApply(v -> null);
322 }
323
Madan Jampani15b8ef52016-02-02 17:35:05 -0800324 @Override
325 public boolean isOpen() {
Madan Jampani65f24bb2016-03-15 15:16:18 -0700326 return server.isRunning();
Madan Jampani15b8ef52016-02-02 17:35:05 -0800327 }
Madan Jampanie14a09c2016-02-11 10:43:21 -0800328
329 /**
330 * Returns the partition information.
331 * @return partition info
332 */
333 public PartitionInfo info() {
334 return new StoragePartitionDetails(partition.getId(),
Jordan Halterman2bf177c2017-06-29 01:49:08 -0700335 server.cluster().getMembers(),
336 server.cluster().getMembers(),
337 server.cluster().getLeader(),
338 server.cluster().getTerm()).toPartitionInfo();
Madan Jampanie14a09c2016-02-11 10:43:21 -0800339 }
Madan Jampani15b8ef52016-02-02 17:35:05 -0800340}