Automatically rebalance intent key partitions on cluster change.

Also sorted output of leaders command by leader IP.

Change-Id: Ie85896a4f6f50489ebd7994c905808ce34fca94c
diff --git a/cli/src/main/java/org/onosproject/cli/net/LeaderCommand.java b/cli/src/main/java/org/onosproject/cli/net/LeaderCommand.java
index 0800942..b9e9c7a 100644
--- a/cli/src/main/java/org/onosproject/cli/net/LeaderCommand.java
+++ b/cli/src/main/java/org/onosproject/cli/net/LeaderCommand.java
@@ -20,6 +20,7 @@
 import org.onosproject.cluster.Leadership;
 import org.onosproject.cluster.LeadershipService;
 
+import java.util.Comparator;
 import java.util.Map;
 
 /**
@@ -29,17 +30,32 @@
         description = "Finds the leader for particular topic.")
 public class LeaderCommand extends AbstractShellCommand {
 
-    private static final String FMT = "%-20s: %15s %5s";
+    private static final String FMT = "%-20s: %15s %15s";
 
     @Override
     protected void execute() {
         LeadershipService leaderService = get(LeadershipService.class);
         Map<String, Leadership> leaderBoard = leaderService.getLeaderBoard();
         print(FMT, "Topic", "Leader", "Epoch");
-        for (String topic : leaderBoard.keySet()) {
-            Leadership leadership = leaderBoard.get(topic);
-            print(FMT, topic, leadership.leader(), leadership.epoch());
-        }
+
+        Comparator<Leadership> leadershipComparator =
+                (e1, e2) -> {
+                    if (e1.leader() == null && e2.leader() == null) {
+                        return 0;
+                    }
+                    if (e1.leader() == null) {
+                        return 1;
+                    }
+                    if (e2.leader() == null) {
+                        return -1;
+                    }
+                    return e1.leader().toString().compareTo(e2.leader().toString());
+                };
+
+        leaderBoard.values()
+                .stream()
+                .sorted(leadershipComparator)
+                .forEach(l -> print(FMT, l.topic(), l.leader(), l.epoch()));
     }
 
 }
diff --git a/core/store/dist/src/main/java/org/onosproject/store/intent/impl/PartitionId.java b/core/store/dist/src/main/java/org/onosproject/store/intent/impl/PartitionId.java
index 1ed2e4f..885361f 100644
--- a/core/store/dist/src/main/java/org/onosproject/store/intent/impl/PartitionId.java
+++ b/core/store/dist/src/main/java/org/onosproject/store/intent/impl/PartitionId.java
@@ -24,17 +24,26 @@
  * processed by a single ONOS instance at a time.
  */
 public class PartitionId {
-    private final long id;
+    private final int id;
 
     /**
      * Creates a new partition ID.
      *
      * @param id the partition ID
      */
-    PartitionId(long id) {
+    PartitionId(int id) {
         this.id = id;
     }
 
+    /**
+     * Returns the integer ID value.
+     *
+     * @return ID value
+     */
+    public int value() {
+        return id;
+    }
+
     @Override
     public boolean equals(Object o) {
         if (!(o instanceof PartitionId)) {
diff --git a/core/store/dist/src/main/java/org/onosproject/store/intent/impl/PartitionManager.java b/core/store/dist/src/main/java/org/onosproject/store/intent/impl/PartitionManager.java
index a709a13..611ed21 100644
--- a/core/store/dist/src/main/java/org/onosproject/store/intent/impl/PartitionManager.java
+++ b/core/store/dist/src/main/java/org/onosproject/store/intent/impl/PartitionManager.java
@@ -21,7 +21,10 @@
 import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.ReferenceCardinality;
 import org.apache.felix.scr.annotations.Service;
+import org.onosproject.cluster.ClusterEvent;
+import org.onosproject.cluster.ClusterEventListener;
 import org.onosproject.cluster.ClusterService;
+import org.onosproject.cluster.ControllerNode;
 import org.onosproject.cluster.Leadership;
 import org.onosproject.cluster.LeadershipEvent;
 import org.onosproject.cluster.LeadershipEventListener;
@@ -31,8 +34,12 @@
 import org.slf4j.LoggerFactory;
 
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
 
 /**
  * Manages the assignment of intent keyspace partitions to instances.
@@ -49,35 +56,48 @@
     @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
     protected ClusterService clusterService;
 
-    // TODO make configurable
-    private static final int NUM_PARTITIONS = 100;
+    private static final int NUM_PARTITIONS = 32;
+    private static final int BACKOFF_TIME = 2;
+    private static final int CHECK_PERIOD = 10;
 
     private static final String ELECTION_PREFIX = "intent-partition-";
 
     private LeadershipEventListener leaderListener = new InternalLeadershipListener();
+    private ClusterEventListener clusterListener = new InternalClusterEventListener();
 
-    private Set<PartitionId> myPartitions;
+    private final Set<PartitionId> myPartitions
+            = Collections.newSetFromMap(new ConcurrentHashMap<>());
+
+    private ScheduledExecutorService executor = Executors
+            .newScheduledThreadPool(1);
 
     @Activate
     public void activate() {
-        myPartitions = Collections.newSetFromMap(new ConcurrentHashMap<>());
-
         leadershipService.addListener(leaderListener);
+        clusterService.addListener(clusterListener);
 
         for (int i = 0; i < NUM_PARTITIONS; i++) {
-            leadershipService.runForLeadership(ELECTION_PREFIX + i);
+            leadershipService.runForLeadership(getPartitionPath(i));
         }
+
+        executor.scheduleAtFixedRate(this::doRelinquish, 0,
+                                     CHECK_PERIOD, TimeUnit.SECONDS);
     }
 
     @Deactivate
     public void deactivate() {
         leadershipService.removeListener(leaderListener);
+        clusterService.removeListener(clusterListener);
+    }
+
+    private String getPartitionPath(int i) {
+        return ELECTION_PREFIX + i;
     }
 
     private PartitionId getPartitionForKey(Key intentKey) {
         log.debug("Getting partition for {}: {}", intentKey,
-                  new PartitionId(Math.abs(intentKey.hash()) % NUM_PARTITIONS));
-        return new PartitionId(Math.abs(intentKey.hash()) % NUM_PARTITIONS);
+                  new PartitionId((int) Math.abs(intentKey.hash()) % NUM_PARTITIONS));
+        return new PartitionId((int) Math.abs(intentKey.hash()) % NUM_PARTITIONS);
     }
 
     @Override
@@ -85,6 +105,58 @@
         return myPartitions.contains(getPartitionForKey(intentKey));
     }
 
+    private void doRelinquish() {
+        try {
+            relinquish();
+        } catch (Exception e) {
+            log.warn("Exception caught during relinquish task", e);
+        }
+    }
+
+
+    /**
+     * Determine whether we have more than our fair share of partitions, and if
+     * so, relinquish leadership of some of them for a little while to let
+     * other instances take over.
+     */
+    private void relinquish() {
+        int activeNodes = (int) clusterService.getNodes()
+                .stream()
+                .filter(n -> clusterService.getState(n.id())
+                        == ControllerNode.State.ACTIVE)
+                .count();
+
+        int myShare = (int) Math.ceil((double) NUM_PARTITIONS / activeNodes);
+
+        synchronized (myPartitions) {
+            int relinquish = myPartitions.size() - myShare;
+
+            if (relinquish <= 0) {
+                return;
+            }
+
+            Iterator<PartitionId> it = myPartitions.iterator();
+            for (int i = 0; i < relinquish; i++) {
+                PartitionId id = it.next();
+                it.remove();
+
+                leadershipService.withdraw(getPartitionPath(id.value()));
+
+                executor.schedule(() -> recontest(getPartitionPath(id.value())),
+                                  BACKOFF_TIME, TimeUnit.SECONDS);
+            }
+        }
+    }
+
+    /**
+     * Try and recontest for leadership of a partition.
+     *
+     * @param path topic name to recontest
+     */
+    private void recontest(String path) {
+        leadershipService.runForLeadership(path);
+    }
+
     private final class InternalLeadershipListener implements LeadershipEventListener {
 
         @Override
@@ -109,12 +181,26 @@
                     return;
                 }
 
-                if (event.type() == LeadershipEvent.Type.LEADER_ELECTED) {
-                    myPartitions.add(new PartitionId(partitionId));
-                } else if (event.type() == LeadershipEvent.Type.LEADER_BOOTED) {
-                    myPartitions.remove(new PartitionId(partitionId));
+                synchronized (myPartitions) {
+                    if (event.type() == LeadershipEvent.Type.LEADER_ELECTED) {
+                        myPartitions.add(new PartitionId(partitionId));
+                    } else if (event.type() == LeadershipEvent.Type.LEADER_BOOTED) {
+                        myPartitions.remove(new PartitionId(partitionId));
+                    }
                 }
+
+                // See if we need to let some partitions go
+                relinquish();
             }
         }
     }
+
+    private final class InternalClusterEventListener implements
+            ClusterEventListener {
+
+        @Override
+        public void event(ClusterEvent event) {
+            relinquish();
+        }
+    }
 }