blob: ab266b6e76fae3bd50ba8db04931ed94348f5555 [file] [log] [blame]
Jonathan Hart74c83132015-02-02 18:37:57 -08001/*
Brian O'Connor5ab426f2016-04-09 01:19:45 -07002 * Copyright 2016-present Open Networking Laboratory
Jonathan Hart74c83132015-02-02 18:37:57 -08003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.store.intent.impl;
17
18import org.apache.felix.scr.annotations.Activate;
19import org.apache.felix.scr.annotations.Component;
20import org.apache.felix.scr.annotations.Deactivate;
21import org.apache.felix.scr.annotations.Reference;
22import org.apache.felix.scr.annotations.ReferenceCardinality;
23import org.apache.felix.scr.annotations.Service;
24import org.onosproject.cluster.ClusterService;
25import org.onosproject.cluster.Leadership;
26import org.onosproject.cluster.LeadershipEvent;
27import org.onosproject.cluster.LeadershipEventListener;
28import org.onosproject.cluster.LeadershipService;
Brian O'Connor5eb77c82015-03-02 18:09:39 -080029import org.onosproject.cluster.NodeId;
Brian O'Connor69d6ac72015-05-29 16:24:06 -070030import org.onosproject.event.EventDeliveryService;
31import org.onosproject.event.ListenerRegistry;
Madan Jampani1c965102016-01-13 14:34:16 -080032import org.onosproject.net.intent.IntentPartitionEvent;
33import org.onosproject.net.intent.IntentPartitionEventListener;
34import org.onosproject.net.intent.IntentPartitionService;
Thomas Vachuska7a8de842016-03-07 20:56:35 -080035import org.onosproject.net.intent.Key;
Jonathan Hart74c83132015-02-02 18:37:57 -080036import org.slf4j.Logger;
37import org.slf4j.LoggerFactory;
38
Yuta HIGUCHI1624df12016-07-21 16:54:33 -070039import static org.onlab.util.Tools.groupedThreads;
40
Jonathan Hartdc9d7b82015-02-22 17:59:50 -080041import java.util.List;
Brian O'Connor5eb77c82015-03-02 18:09:39 -080042import java.util.Objects;
Jonathan Hartf2fda812015-02-17 15:21:03 -080043import java.util.concurrent.Executors;
44import java.util.concurrent.ScheduledExecutorService;
45import java.util.concurrent.TimeUnit;
Madan Jampani4732c1b2015-05-19 17:11:50 -070046import java.util.concurrent.atomic.AtomicBoolean;
Jonathan Hartdc9d7b82015-02-22 17:59:50 -080047import java.util.stream.Collectors;
Madan Jampania4a59942016-05-02 11:25:34 -070048import java.util.stream.IntStream;
Jonathan Hart74c83132015-02-02 18:37:57 -080049
Jonathan Hart74c83132015-02-02 18:37:57 -080050/**
51 * Manages the assignment of intent keyspace partitions to instances.
52 */
53@Component(immediate = true)
54@Service
Madan Jampani1c965102016-01-13 14:34:16 -080055public class IntentPartitionManager implements IntentPartitionService {
Jonathan Hart74c83132015-02-02 18:37:57 -080056
Madan Jampani1c965102016-01-13 14:34:16 -080057 private static final Logger log = LoggerFactory.getLogger(IntentPartitionManager.class);
Jonathan Hart74c83132015-02-02 18:37:57 -080058
59 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
60 protected LeadershipService leadershipService;
61
62 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
63 protected ClusterService clusterService;
64
Brian O'Connor69d6ac72015-05-29 16:24:06 -070065 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
66 protected EventDeliveryService eventDispatcher;
67
Madan Jampani4732c1b2015-05-19 17:11:50 -070068 protected final AtomicBoolean rebalanceScheduled = new AtomicBoolean(false);
69
Jonathan Hart7061acd2015-03-04 13:15:32 -080070 static final int NUM_PARTITIONS = 14;
Jonathan Hartf2fda812015-02-17 15:21:03 -080071 private static final int BACKOFF_TIME = 2;
Madan Jampani4732c1b2015-05-19 17:11:50 -070072 private static final int CHECK_PARTITION_BALANCE_PERIOD_SEC = 10;
73 private static final int RETRY_AFTER_DELAY_SEC = 5;
Jonathan Hart74c83132015-02-02 18:37:57 -080074
75 private static final String ELECTION_PREFIX = "intent-partition-";
76
Madan Jampania9673fd2016-02-02 13:01:29 -080077 protected NodeId localNodeId;
Madan Jampani1c965102016-01-13 14:34:16 -080078 private ListenerRegistry<IntentPartitionEvent, IntentPartitionEventListener> listenerRegistry;
Jonathan Hart74c83132015-02-02 18:37:57 -080079 private LeadershipEventListener leaderListener = new InternalLeadershipListener();
80
Jonathan Hartf2fda812015-02-17 15:21:03 -080081 private ScheduledExecutorService executor = Executors
Yuta HIGUCHI1624df12016-07-21 16:54:33 -070082 .newScheduledThreadPool(1, groupedThreads("IntentPartition", "balancer-%d", log));
Jonathan Hart74c83132015-02-02 18:37:57 -080083
84 @Activate
85 public void activate() {
Madan Jampania9673fd2016-02-02 13:01:29 -080086 localNodeId = clusterService.getLocalNode().id();
Jonathan Hart74c83132015-02-02 18:37:57 -080087 leadershipService.addListener(leaderListener);
88
Brian O'Connor69d6ac72015-05-29 16:24:06 -070089 listenerRegistry = new ListenerRegistry<>();
Madan Jampani1c965102016-01-13 14:34:16 -080090 eventDispatcher.addSink(IntentPartitionEvent.class, listenerRegistry);
Brian O'Connor69d6ac72015-05-29 16:24:06 -070091
Jonathan Hart74c83132015-02-02 18:37:57 -080092 for (int i = 0; i < NUM_PARTITIONS; i++) {
Jonathan Hartf2fda812015-02-17 15:21:03 -080093 leadershipService.runForLeadership(getPartitionPath(i));
Madan Jampania9673fd2016-02-02 13:01:29 -080094 log.debug("Registered to run for {}", getPartitionPath(i));
Jonathan Hart74c83132015-02-02 18:37:57 -080095 }
Jonathan Hartf2fda812015-02-17 15:21:03 -080096
Madan Jampani4732c1b2015-05-19 17:11:50 -070097 executor.scheduleAtFixedRate(() -> scheduleRebalance(0), 0,
98 CHECK_PARTITION_BALANCE_PERIOD_SEC, TimeUnit.SECONDS);
Madan Jampania9673fd2016-02-02 13:01:29 -080099 log.info("Started");
Jonathan Hart74c83132015-02-02 18:37:57 -0800100 }
101
102 @Deactivate
103 public void deactivate() {
Jonathan Hartac48a952015-02-25 14:11:55 -0800104 executor.shutdownNow();
105
Madan Jampani1c965102016-01-13 14:34:16 -0800106 eventDispatcher.removeSink(IntentPartitionEvent.class);
Jonathan Hart74c83132015-02-02 18:37:57 -0800107 leadershipService.removeListener(leaderListener);
Madan Jampania9673fd2016-02-02 13:01:29 -0800108 log.info("Stopped");
Jonathan Hartf2fda812015-02-17 15:21:03 -0800109 }
110
Jonathan Hart7061acd2015-03-04 13:15:32 -0800111 /**
112 * Sets the specified executor to be used for scheduling background tasks.
113 *
114 * @param executor scheduled executor service for background tasks
115 * @return this PartitionManager
116 */
Sho SHIMIZUb8147732016-01-15 13:13:31 -0800117 IntentPartitionManager withScheduledExecutor(ScheduledExecutorService executor) {
Jonathan Hart7061acd2015-03-04 13:15:32 -0800118 this.executor = executor;
119 return this;
120 }
121
Jonathan Hartf2fda812015-02-17 15:21:03 -0800122 private String getPartitionPath(int i) {
123 return ELECTION_PREFIX + i;
Jonathan Hart74c83132015-02-02 18:37:57 -0800124 }
125
Jonathan Hartdc9d7b82015-02-22 17:59:50 -0800126 private String getPartitionPath(PartitionId id) {
127 return getPartitionPath(id.value());
128 }
129
Jonathan Hart5ec32ba2015-02-05 13:33:58 -0800130 private PartitionId getPartitionForKey(Key intentKey) {
Brian O'Connor1fdfacd2015-02-18 20:52:06 -0800131 int partition = Math.abs((int) intentKey.hash()) % NUM_PARTITIONS;
132 //TODO investigate Guava consistent hash method
133 // ... does it add significant computational complexity? is it worth it?
134 //int partition = consistentHash(intentKey.hash(), NUM_PARTITIONS);
135 PartitionId id = new PartitionId(partition);
Brian O'Connor1fdfacd2015-02-18 20:52:06 -0800136 return id;
Jonathan Hart74c83132015-02-02 18:37:57 -0800137 }
138
139 @Override
Jonathan Hart5ec32ba2015-02-05 13:33:58 -0800140 public boolean isMine(Key intentKey) {
Madan Jampania4a59942016-05-02 11:25:34 -0700141 return Objects.equals(leadershipService.getLeadership(getPartitionPath(getPartitionForKey(intentKey)))
142 .leaderNodeId(),
Madan Jampania9673fd2016-02-02 13:01:29 -0800143 localNodeId);
Brian O'Connor5eb77c82015-03-02 18:09:39 -0800144 }
145
146 @Override
147 public NodeId getLeader(Key intentKey) {
148 return leadershipService.getLeader(getPartitionPath(getPartitionForKey(intentKey)));
Jonathan Hart74c83132015-02-02 18:37:57 -0800149 }
150
Brian O'Connor69d6ac72015-05-29 16:24:06 -0700151 @Override
Madan Jampani1c965102016-01-13 14:34:16 -0800152 public void addListener(IntentPartitionEventListener listener) {
Brian O'Connor69d6ac72015-05-29 16:24:06 -0700153 listenerRegistry.addListener(listener);
154 }
155
156 @Override
Madan Jampani1c965102016-01-13 14:34:16 -0800157 public void removeListener(IntentPartitionEventListener listener) {
Brian O'Connor69d6ac72015-05-29 16:24:06 -0700158 listenerRegistry.removeListener(listener);
159 }
160
Sho SHIMIZUb8147732016-01-15 13:13:31 -0800161 void doRebalance() {
Madan Jampani4732c1b2015-05-19 17:11:50 -0700162 rebalanceScheduled.set(false);
Jonathan Hartf2fda812015-02-17 15:21:03 -0800163 try {
Madan Jampani4732c1b2015-05-19 17:11:50 -0700164 rebalance();
Jonathan Hartf2fda812015-02-17 15:21:03 -0800165 } catch (Exception e) {
Madan Jampani4732c1b2015-05-19 17:11:50 -0700166 log.warn("Exception caught during rebalance task. Will retry in " + RETRY_AFTER_DELAY_SEC + " seconds", e);
167 scheduleRebalance(RETRY_AFTER_DELAY_SEC);
Jonathan Hartf2fda812015-02-17 15:21:03 -0800168 }
169 }
170
Jonathan Hartf2fda812015-02-17 15:21:03 -0800171 /**
172 * Determine whether we have more than our fair share of partitions, and if
173 * so, relinquish leadership of some of them for a little while to let
174 * other instances take over.
175 */
Madan Jampani4732c1b2015-05-19 17:11:50 -0700176 private void rebalance() {
Jonathan Hartf2fda812015-02-17 15:21:03 -0800177 int activeNodes = (int) clusterService.getNodes()
178 .stream()
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800179 .filter(node -> clusterService.getState(node.id()).isActive())
Jonathan Hartf2fda812015-02-17 15:21:03 -0800180 .count();
181
182 int myShare = (int) Math.ceil((double) NUM_PARTITIONS / activeNodes);
183
Madan Jampania4d2c722016-06-06 16:39:06 -0700184 // First make sure this node is a candidate for all partitions.
185 IntStream.range(0, NUM_PARTITIONS)
186 .mapToObj(this::getPartitionPath)
187 .map(leadershipService::getLeadership)
188 .filter(leadership -> !leadership.candidates().contains(localNodeId))
189 .map(Leadership::topic)
190 .forEach(leadershipService::runForLeadership);
191
Madan Jampania4a59942016-05-02 11:25:34 -0700192 List<String> myPartitions = IntStream.range(0, NUM_PARTITIONS)
193 .mapToObj(this::getPartitionPath)
194 .map(leadershipService::getLeadership)
195 .filter(Objects::nonNull)
196 .filter(leadership -> localNodeId.equals(leadership.leaderNodeId()))
197 .map(Leadership::topic)
198 .collect(Collectors.toList());
Jonathan Hartf2fda812015-02-17 15:21:03 -0800199
Jonathan Hartdc9d7b82015-02-22 17:59:50 -0800200 int relinquish = myPartitions.size() - myShare;
Jonathan Hartf2fda812015-02-17 15:21:03 -0800201
Madan Jampania4d2c722016-06-06 16:39:06 -0700202
Jonathan Hartdc9d7b82015-02-22 17:59:50 -0800203 for (int i = 0; i < relinquish; i++) {
Madan Jampania4a59942016-05-02 11:25:34 -0700204 String topic = myPartitions.get(i);
Madan Jampani783d3d22016-06-13 17:40:02 -0700205 // Wait till all active nodes are in contention for partition ownership.
206 // This avoids too many relinquish/reclaim cycles.
207 if (leadershipService.getCandidates(topic).size() == activeNodes) {
208 leadershipService.withdraw(topic);
209 executor.schedule(() -> recontest(topic), BACKOFF_TIME, TimeUnit.SECONDS);
210 }
Jonathan Hartf2fda812015-02-17 15:21:03 -0800211 }
212 }
213
Madan Jampani4732c1b2015-05-19 17:11:50 -0700214 private void scheduleRebalance(int afterDelaySec) {
215 if (rebalanceScheduled.compareAndSet(false, true)) {
216 executor.schedule(this::doRebalance, afterDelaySec, TimeUnit.SECONDS);
217 }
218 }
219
Jonathan Hartf2fda812015-02-17 15:21:03 -0800220 /**
221 * Try and recontest for leadership of a partition.
222 *
223 * @param path topic name to recontest
224 */
225 private void recontest(String path) {
226 leadershipService.runForLeadership(path);
227 }
228
Jonathan Hart74c83132015-02-02 18:37:57 -0800229 private final class InternalLeadershipListener implements LeadershipEventListener {
230
231 @Override
232 public void event(LeadershipEvent event) {
233 Leadership leadership = event.subject();
Jonathan Hartdc9d7b82015-02-22 17:59:50 -0800234
Madan Jampania9673fd2016-02-02 13:01:29 -0800235 if (Objects.equals(leadership.leaderNodeId(), localNodeId) &&
Jonathan Hart74c83132015-02-02 18:37:57 -0800236 leadership.topic().startsWith(ELECTION_PREFIX)) {
237
Madan Jampani1c965102016-01-13 14:34:16 -0800238 eventDispatcher.post(new IntentPartitionEvent(IntentPartitionEvent.Type.LEADER_CHANGED,
Brian O'Connor69d6ac72015-05-29 16:24:06 -0700239 leadership.topic()));
Jonathan Hart74c83132015-02-02 18:37:57 -0800240 }
Jonathan Hartf2fda812015-02-17 15:21:03 -0800241
Madan Jampani620f70d2016-01-30 22:22:47 -0800242 if (event.type() == LeadershipEvent.Type.CANDIDATES_CHANGED) {
243 scheduleRebalance(0);
244 }
Jonathan Hartf2fda812015-02-17 15:21:03 -0800245 }
246 }
Jonathan Hart74c83132015-02-02 18:37:57 -0800247}