blob: b06fe6bdb4f58e9467f055ecda97f449afd3c137 [file] [log] [blame]
Jonathan Hart74c83132015-02-02 18:37:57 -08001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2016-present Open Networking Foundation
Jonathan Hart74c83132015-02-02 18:37:57 -08003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.store.intent.impl;
17
Jonathan Hart74c83132015-02-02 18:37:57 -080018import org.onosproject.cluster.ClusterService;
19import org.onosproject.cluster.Leadership;
20import org.onosproject.cluster.LeadershipEvent;
21import org.onosproject.cluster.LeadershipEventListener;
22import org.onosproject.cluster.LeadershipService;
Brian O'Connor5eb77c82015-03-02 18:09:39 -080023import org.onosproject.cluster.NodeId;
Brian O'Connor69d6ac72015-05-29 16:24:06 -070024import org.onosproject.event.EventDeliveryService;
25import org.onosproject.event.ListenerRegistry;
Madan Jampani3b8101a2016-09-15 13:22:01 -070026import org.onosproject.net.intent.WorkPartitionEvent;
27import org.onosproject.net.intent.WorkPartitionEventListener;
28import org.onosproject.net.intent.WorkPartitionService;
Ray Milkeyd84f89b2018-08-17 14:54:17 -070029import org.osgi.service.component.annotations.Activate;
30import org.osgi.service.component.annotations.Component;
31import org.osgi.service.component.annotations.Deactivate;
32import org.osgi.service.component.annotations.Reference;
33import org.osgi.service.component.annotations.ReferenceCardinality;
Jonathan Hart74c83132015-02-02 18:37:57 -080034import org.slf4j.Logger;
35import org.slf4j.LoggerFactory;
36
Jonathan Hartdc9d7b82015-02-22 17:59:50 -080037import java.util.List;
Brian O'Connor5eb77c82015-03-02 18:09:39 -080038import java.util.Objects;
Jonathan Hartf2fda812015-02-17 15:21:03 -080039import java.util.concurrent.Executors;
40import java.util.concurrent.ScheduledExecutorService;
41import java.util.concurrent.TimeUnit;
Madan Jampani4732c1b2015-05-19 17:11:50 -070042import java.util.concurrent.atomic.AtomicBoolean;
Madan Jampani3b8101a2016-09-15 13:22:01 -070043import java.util.function.Function;
Jonathan Hartdc9d7b82015-02-22 17:59:50 -080044import java.util.stream.Collectors;
Madan Jampania4a59942016-05-02 11:25:34 -070045import java.util.stream.IntStream;
Jonathan Hart74c83132015-02-02 18:37:57 -080046
Ray Milkeyd84f89b2018-08-17 14:54:17 -070047import static org.onlab.util.Tools.groupedThreads;
48
Jonathan Hart74c83132015-02-02 18:37:57 -080049/**
Madan Jampani3b8101a2016-09-15 13:22:01 -070050 * Manages the assignment of work partitions to instances.
Jonathan Hart74c83132015-02-02 18:37:57 -080051 */
Ray Milkeyd84f89b2018-08-17 14:54:17 -070052@Component(immediate = true, service = WorkPartitionService.class)
Madan Jampani3b8101a2016-09-15 13:22:01 -070053public class WorkPartitionManager implements WorkPartitionService {
Jonathan Hart74c83132015-02-02 18:37:57 -080054
Madan Jampani3b8101a2016-09-15 13:22:01 -070055 private static final Logger log = LoggerFactory.getLogger(WorkPartitionManager.class);
Jonathan Hart74c83132015-02-02 18:37:57 -080056
Ray Milkeyd84f89b2018-08-17 14:54:17 -070057 @Reference(cardinality = ReferenceCardinality.MANDATORY)
Jonathan Hart74c83132015-02-02 18:37:57 -080058 protected LeadershipService leadershipService;
59
Ray Milkeyd84f89b2018-08-17 14:54:17 -070060 @Reference(cardinality = ReferenceCardinality.MANDATORY)
Jonathan Hart74c83132015-02-02 18:37:57 -080061 protected ClusterService clusterService;
62
Ray Milkeyd84f89b2018-08-17 14:54:17 -070063 @Reference(cardinality = ReferenceCardinality.MANDATORY)
Brian O'Connor69d6ac72015-05-29 16:24:06 -070064 protected EventDeliveryService eventDispatcher;
65
Madan Jampani4732c1b2015-05-19 17:11:50 -070066 protected final AtomicBoolean rebalanceScheduled = new AtomicBoolean(false);
67
Jonathan Hart7061acd2015-03-04 13:15:32 -080068 static final int NUM_PARTITIONS = 14;
Jonathan Hartf2fda812015-02-17 15:21:03 -080069 private static final int BACKOFF_TIME = 2;
Madan Jampani4732c1b2015-05-19 17:11:50 -070070 private static final int CHECK_PARTITION_BALANCE_PERIOD_SEC = 10;
71 private static final int RETRY_AFTER_DELAY_SEC = 5;
Jonathan Hart74c83132015-02-02 18:37:57 -080072
Madan Jampani3b8101a2016-09-15 13:22:01 -070073 private static final String ELECTION_PREFIX = "work-partition-";
Jonathan Hart74c83132015-02-02 18:37:57 -080074
Madan Jampania9673fd2016-02-02 13:01:29 -080075 protected NodeId localNodeId;
Madan Jampani3b8101a2016-09-15 13:22:01 -070076 private ListenerRegistry<WorkPartitionEvent, WorkPartitionEventListener> listenerRegistry;
Jonathan Hart74c83132015-02-02 18:37:57 -080077 private LeadershipEventListener leaderListener = new InternalLeadershipListener();
78
Jonathan Hartf2fda812015-02-17 15:21:03 -080079 private ScheduledExecutorService executor = Executors
Madan Jampani3b8101a2016-09-15 13:22:01 -070080 .newScheduledThreadPool(1, groupedThreads("work-parition", "balancer-%d", log));
Jonathan Hart74c83132015-02-02 18:37:57 -080081
82 @Activate
83 public void activate() {
Madan Jampania9673fd2016-02-02 13:01:29 -080084 localNodeId = clusterService.getLocalNode().id();
Jonathan Hart74c83132015-02-02 18:37:57 -080085 leadershipService.addListener(leaderListener);
86
Brian O'Connor69d6ac72015-05-29 16:24:06 -070087 listenerRegistry = new ListenerRegistry<>();
Madan Jampani3b8101a2016-09-15 13:22:01 -070088 eventDispatcher.addSink(WorkPartitionEvent.class, listenerRegistry);
Brian O'Connor69d6ac72015-05-29 16:24:06 -070089
Jonathan Hart74c83132015-02-02 18:37:57 -080090 for (int i = 0; i < NUM_PARTITIONS; i++) {
Jonathan Hartf2fda812015-02-17 15:21:03 -080091 leadershipService.runForLeadership(getPartitionPath(i));
Madan Jampania9673fd2016-02-02 13:01:29 -080092 log.debug("Registered to run for {}", getPartitionPath(i));
Jonathan Hart74c83132015-02-02 18:37:57 -080093 }
Jonathan Hartf2fda812015-02-17 15:21:03 -080094
Madan Jampani4732c1b2015-05-19 17:11:50 -070095 executor.scheduleAtFixedRate(() -> scheduleRebalance(0), 0,
96 CHECK_PARTITION_BALANCE_PERIOD_SEC, TimeUnit.SECONDS);
Madan Jampania9673fd2016-02-02 13:01:29 -080097 log.info("Started");
Jonathan Hart74c83132015-02-02 18:37:57 -080098 }
99
100 @Deactivate
101 public void deactivate() {
Jonathan Hartac48a952015-02-25 14:11:55 -0800102 executor.shutdownNow();
103
Madan Jampani3b8101a2016-09-15 13:22:01 -0700104 eventDispatcher.removeSink(WorkPartitionEvent.class);
Jonathan Hart74c83132015-02-02 18:37:57 -0800105 leadershipService.removeListener(leaderListener);
Madan Jampania9673fd2016-02-02 13:01:29 -0800106 log.info("Stopped");
Jonathan Hartf2fda812015-02-17 15:21:03 -0800107 }
108
Jonathan Hart7061acd2015-03-04 13:15:32 -0800109 /**
110 * Sets the specified executor to be used for scheduling background tasks.
111 *
112 * @param executor scheduled executor service for background tasks
Madan Jampani3b8101a2016-09-15 13:22:01 -0700113 * @return this WorkPartitionManager
Jonathan Hart7061acd2015-03-04 13:15:32 -0800114 */
Madan Jampani3b8101a2016-09-15 13:22:01 -0700115 WorkPartitionManager withScheduledExecutor(ScheduledExecutorService executor) {
Jonathan Hart7061acd2015-03-04 13:15:32 -0800116 this.executor = executor;
117 return this;
118 }
119
Jonathan Hartf2fda812015-02-17 15:21:03 -0800120 private String getPartitionPath(int i) {
121 return ELECTION_PREFIX + i;
Jonathan Hart74c83132015-02-02 18:37:57 -0800122 }
123
Madan Jampani3b8101a2016-09-15 13:22:01 -0700124 @Override
125 public <K> boolean isMine(K id, Function<K, Long> hasher) {
126 return Objects.equals(localNodeId, getLeader(id, hasher));
Jonathan Hart74c83132015-02-02 18:37:57 -0800127 }
128
129 @Override
Madan Jampani3b8101a2016-09-15 13:22:01 -0700130 public <K> NodeId getLeader(K id, Function<K, Long> hasher) {
131 int partition = Math.abs(hasher.apply(id).intValue()) % NUM_PARTITIONS;
132 PartitionId partitionId = new PartitionId(partition);
133 return leadershipService.getLeadership(getPartitionPath(partitionId.value())).leaderNodeId();
Brian O'Connor5eb77c82015-03-02 18:09:39 -0800134 }
135
136 @Override
Madan Jampani3b8101a2016-09-15 13:22:01 -0700137 public void addListener(WorkPartitionEventListener listener) {
Brian O'Connor69d6ac72015-05-29 16:24:06 -0700138 listenerRegistry.addListener(listener);
139 }
140
141 @Override
Madan Jampani3b8101a2016-09-15 13:22:01 -0700142 public void removeListener(WorkPartitionEventListener listener) {
Brian O'Connor69d6ac72015-05-29 16:24:06 -0700143 listenerRegistry.removeListener(listener);
144 }
145
Sho SHIMIZUb8147732016-01-15 13:13:31 -0800146 void doRebalance() {
Madan Jampani4732c1b2015-05-19 17:11:50 -0700147 rebalanceScheduled.set(false);
Jonathan Hartf2fda812015-02-17 15:21:03 -0800148 try {
Madan Jampani4732c1b2015-05-19 17:11:50 -0700149 rebalance();
Jonathan Hartf2fda812015-02-17 15:21:03 -0800150 } catch (Exception e) {
Ray Milkey07b59352017-02-16 15:42:41 -0800151 log.warn("{} caught during rebalance task. Will retry in " +
Ray Milkeyac0a82d2017-02-16 08:58:59 -0800152 RETRY_AFTER_DELAY_SEC + " seconds", e.getMessage());
Madan Jampani4732c1b2015-05-19 17:11:50 -0700153 scheduleRebalance(RETRY_AFTER_DELAY_SEC);
Jonathan Hartf2fda812015-02-17 15:21:03 -0800154 }
155 }
156
Jonathan Hartf2fda812015-02-17 15:21:03 -0800157 /**
158 * Determine whether we have more than our fair share of partitions, and if
159 * so, relinquish leadership of some of them for a little while to let
160 * other instances take over.
161 */
Madan Jampani4732c1b2015-05-19 17:11:50 -0700162 private void rebalance() {
Jonathan Hartf2fda812015-02-17 15:21:03 -0800163 int activeNodes = (int) clusterService.getNodes()
164 .stream()
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800165 .filter(node -> clusterService.getState(node.id()).isActive())
Jonathan Hartf2fda812015-02-17 15:21:03 -0800166 .count();
167
168 int myShare = (int) Math.ceil((double) NUM_PARTITIONS / activeNodes);
169
Madan Jampania4d2c722016-06-06 16:39:06 -0700170 // First make sure this node is a candidate for all partitions.
171 IntStream.range(0, NUM_PARTITIONS)
172 .mapToObj(this::getPartitionPath)
173 .map(leadershipService::getLeadership)
174 .filter(leadership -> !leadership.candidates().contains(localNodeId))
175 .map(Leadership::topic)
176 .forEach(leadershipService::runForLeadership);
177
Madan Jampania4a59942016-05-02 11:25:34 -0700178 List<String> myPartitions = IntStream.range(0, NUM_PARTITIONS)
179 .mapToObj(this::getPartitionPath)
180 .map(leadershipService::getLeadership)
181 .filter(Objects::nonNull)
182 .filter(leadership -> localNodeId.equals(leadership.leaderNodeId()))
183 .map(Leadership::topic)
184 .collect(Collectors.toList());
Jonathan Hartf2fda812015-02-17 15:21:03 -0800185
Jonathan Hartdc9d7b82015-02-22 17:59:50 -0800186 int relinquish = myPartitions.size() - myShare;
Jonathan Hartf2fda812015-02-17 15:21:03 -0800187
Madan Jampania4d2c722016-06-06 16:39:06 -0700188
Jonathan Hartdc9d7b82015-02-22 17:59:50 -0800189 for (int i = 0; i < relinquish; i++) {
Madan Jampania4a59942016-05-02 11:25:34 -0700190 String topic = myPartitions.get(i);
Madan Jampani783d3d22016-06-13 17:40:02 -0700191 // Wait till all active nodes are in contention for partition ownership.
192 // This avoids too many relinquish/reclaim cycles.
193 if (leadershipService.getCandidates(topic).size() == activeNodes) {
194 leadershipService.withdraw(topic);
195 executor.schedule(() -> recontest(topic), BACKOFF_TIME, TimeUnit.SECONDS);
196 }
Jonathan Hartf2fda812015-02-17 15:21:03 -0800197 }
198 }
199
Madan Jampani4732c1b2015-05-19 17:11:50 -0700200 private void scheduleRebalance(int afterDelaySec) {
201 if (rebalanceScheduled.compareAndSet(false, true)) {
202 executor.schedule(this::doRebalance, afterDelaySec, TimeUnit.SECONDS);
203 }
204 }
205
Jonathan Hartf2fda812015-02-17 15:21:03 -0800206 /**
207 * Try and recontest for leadership of a partition.
208 *
209 * @param path topic name to recontest
210 */
211 private void recontest(String path) {
212 leadershipService.runForLeadership(path);
213 }
214
Jonathan Hart74c83132015-02-02 18:37:57 -0800215 private final class InternalLeadershipListener implements LeadershipEventListener {
216
217 @Override
218 public void event(LeadershipEvent event) {
219 Leadership leadership = event.subject();
Jonathan Hartdc9d7b82015-02-22 17:59:50 -0800220
Madan Jampania9673fd2016-02-02 13:01:29 -0800221 if (Objects.equals(leadership.leaderNodeId(), localNodeId) &&
Jonathan Hart74c83132015-02-02 18:37:57 -0800222 leadership.topic().startsWith(ELECTION_PREFIX)) {
223
Madan Jampani3b8101a2016-09-15 13:22:01 -0700224 eventDispatcher.post(new WorkPartitionEvent(WorkPartitionEvent.Type.LEADER_CHANGED,
Brian O'Connor69d6ac72015-05-29 16:24:06 -0700225 leadership.topic()));
Jonathan Hart74c83132015-02-02 18:37:57 -0800226 }
Jonathan Hartf2fda812015-02-17 15:21:03 -0800227
Madan Jampani620f70d2016-01-30 22:22:47 -0800228 if (event.type() == LeadershipEvent.Type.CANDIDATES_CHANGED) {
229 scheduleRebalance(0);
230 }
Jonathan Hartf2fda812015-02-17 15:21:03 -0800231 }
232 }
Jonathan Hart74c83132015-02-02 18:37:57 -0800233}