Refactored IntentPartitionService as WorkPartitionService

Change-Id: Ic5cf1978b7fce55b34f84eae9b03c8f9ddcfb9c1
diff --git a/core/store/dist/src/main/java/org/onosproject/store/intent/impl/WorkPartitionManager.java b/core/store/dist/src/main/java/org/onosproject/store/intent/impl/WorkPartitionManager.java
new file mode 100644
index 0000000..337bc55
--- /dev/null
+++ b/core/store/dist/src/main/java/org/onosproject/store/intent/impl/WorkPartitionManager.java
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2016-present Open Networking Laboratory
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.onosproject.store.intent.impl;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.ReferenceCardinality;
+import org.apache.felix.scr.annotations.Service;
+import org.onosproject.cluster.ClusterService;
+import org.onosproject.cluster.Leadership;
+import org.onosproject.cluster.LeadershipEvent;
+import org.onosproject.cluster.LeadershipEventListener;
+import org.onosproject.cluster.LeadershipService;
+import org.onosproject.cluster.NodeId;
+import org.onosproject.event.EventDeliveryService;
+import org.onosproject.event.ListenerRegistry;
+import org.onosproject.net.intent.WorkPartitionEvent;
+import org.onosproject.net.intent.WorkPartitionEventListener;
+import org.onosproject.net.intent.WorkPartitionService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.onlab.util.Tools.groupedThreads;
+
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/**
+ * Manages the assignment of work partitions to instances.
+ */
+@Component(immediate = true)
+@Service
+public class WorkPartitionManager implements WorkPartitionService {
+
+    private static final Logger log = LoggerFactory.getLogger(WorkPartitionManager.class);
+
+    @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
+    protected LeadershipService leadershipService;
+
+    @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
+    protected ClusterService clusterService;
+
+    @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
+    protected EventDeliveryService eventDispatcher;
+
+    protected final AtomicBoolean rebalanceScheduled = new AtomicBoolean(false);
+
+    static final int NUM_PARTITIONS = 14;
+    private static final int BACKOFF_TIME = 2;
+    private static final int CHECK_PARTITION_BALANCE_PERIOD_SEC = 10;
+    private static final int RETRY_AFTER_DELAY_SEC = 5;
+
+    private static final String ELECTION_PREFIX = "work-partition-";
+
+    protected NodeId localNodeId;
+    private ListenerRegistry<WorkPartitionEvent, WorkPartitionEventListener> listenerRegistry;
+    private LeadershipEventListener leaderListener = new InternalLeadershipListener();
+
+    private ScheduledExecutorService executor = Executors
+            .newScheduledThreadPool(1, groupedThreads("work-parition", "balancer-%d", log));
+
+    @Activate
+    public void activate() {
+        localNodeId = clusterService.getLocalNode().id();
+        leadershipService.addListener(leaderListener);
+
+        listenerRegistry = new ListenerRegistry<>();
+        eventDispatcher.addSink(WorkPartitionEvent.class, listenerRegistry);
+
+        for (int i = 0; i < NUM_PARTITIONS; i++) {
+            leadershipService.runForLeadership(getPartitionPath(i));
+            log.debug("Registered to run for {}", getPartitionPath(i));
+        }
+
+        executor.scheduleAtFixedRate(() -> scheduleRebalance(0), 0,
+                                     CHECK_PARTITION_BALANCE_PERIOD_SEC, TimeUnit.SECONDS);
+        log.info("Started");
+    }
+
+    @Deactivate
+    public void deactivate() {
+        executor.shutdownNow();
+
+        eventDispatcher.removeSink(WorkPartitionEvent.class);
+        leadershipService.removeListener(leaderListener);
+        log.info("Stopped");
+    }
+
+    /**
+     * Sets the specified executor to be used for scheduling background tasks.
+     *
+     * @param executor scheduled executor service for background tasks
+     * @return this WorkPartitionManager
+     */
+    WorkPartitionManager withScheduledExecutor(ScheduledExecutorService executor) {
+        this.executor = executor;
+        return this;
+    }
+
+    private String getPartitionPath(int i) {
+        return ELECTION_PREFIX + i;
+    }
+
+    @Override
+    public <K> boolean isMine(K id, Function<K, Long> hasher) {
+        return Objects.equals(localNodeId, getLeader(id, hasher));
+    }
+
+    @Override
+    public <K> NodeId getLeader(K id, Function<K, Long> hasher) {
+        int partition = Math.abs(hasher.apply(id).intValue()) % NUM_PARTITIONS;
+        PartitionId partitionId = new PartitionId(partition);
+        return leadershipService.getLeadership(getPartitionPath(partitionId.value())).leaderNodeId();
+    }
+
+    @Override
+    public void addListener(WorkPartitionEventListener listener) {
+        listenerRegistry.addListener(listener);
+    }
+
+    @Override
+    public void removeListener(WorkPartitionEventListener listener) {
+        listenerRegistry.removeListener(listener);
+    }
+
+    void doRebalance() {
+        rebalanceScheduled.set(false);
+        try {
+            rebalance();
+        } catch (Exception e) {
+            log.warn("Exception caught during rebalance task. Will retry in " + RETRY_AFTER_DELAY_SEC + " seconds", e);
+            scheduleRebalance(RETRY_AFTER_DELAY_SEC);
+        }
+    }
+
+    /**
+     * Determine whether we have more than our fair share of partitions, and if
+     * so, relinquish leadership of some of them for a little while to let
+     * other instances take over.
+     */
+    private void rebalance() {
+        int activeNodes = (int) clusterService.getNodes()
+                .stream()
+                .filter(node -> clusterService.getState(node.id()).isActive())
+                .count();
+
+        int myShare = (int) Math.ceil((double) NUM_PARTITIONS / activeNodes);
+
+        // First make sure this node is a candidate for all partitions.
+        IntStream.range(0, NUM_PARTITIONS)
+                 .mapToObj(this::getPartitionPath)
+                 .map(leadershipService::getLeadership)
+                 .filter(leadership -> !leadership.candidates().contains(localNodeId))
+                 .map(Leadership::topic)
+                 .forEach(leadershipService::runForLeadership);
+
+        List<String> myPartitions = IntStream.range(0, NUM_PARTITIONS)
+                                             .mapToObj(this::getPartitionPath)
+                                             .map(leadershipService::getLeadership)
+                                             .filter(Objects::nonNull)
+                                             .filter(leadership -> localNodeId.equals(leadership.leaderNodeId()))
+                                             .map(Leadership::topic)
+                                             .collect(Collectors.toList());
+
+        int relinquish = myPartitions.size() - myShare;
+
+
+        for (int i = 0; i < relinquish; i++) {
+            String topic = myPartitions.get(i);
+            // Wait till all active nodes are in contention for partition ownership.
+            // This avoids too many relinquish/reclaim cycles.
+            if (leadershipService.getCandidates(topic).size() == activeNodes) {
+                leadershipService.withdraw(topic);
+                executor.schedule(() -> recontest(topic), BACKOFF_TIME, TimeUnit.SECONDS);
+            }
+        }
+    }
+
+    private void scheduleRebalance(int afterDelaySec) {
+        if (rebalanceScheduled.compareAndSet(false, true)) {
+            executor.schedule(this::doRebalance, afterDelaySec, TimeUnit.SECONDS);
+        }
+    }
+
+    /**
+     * Try and recontest for leadership of a partition.
+     *
+     * @param path topic name to recontest
+     */
+    private void recontest(String path) {
+        leadershipService.runForLeadership(path);
+    }
+
+    private final class InternalLeadershipListener implements LeadershipEventListener {
+
+        @Override
+        public void event(LeadershipEvent event) {
+            Leadership leadership = event.subject();
+
+            if (Objects.equals(leadership.leaderNodeId(), localNodeId) &&
+                    leadership.topic().startsWith(ELECTION_PREFIX)) {
+
+                eventDispatcher.post(new WorkPartitionEvent(WorkPartitionEvent.Type.LEADER_CHANGED,
+                                                        leadership.topic()));
+            }
+
+            if (event.type() == LeadershipEvent.Type.CANDIDATES_CHANGED) {
+                scheduleRebalance(0);
+            }
+        }
+    }
+}