Bugfix in routing logic to avoid null groups; Improvement in group handling.

Two main changes for the bug-fix:
    - avoid doing a full-reroute in the event of link failure as this will leave behind stale state in stores
      if event has been preceded by mastership change leading to the nuking of the ecmpSpg for one of the
      link's devices; instead do a rehash
    - when full-reroute is attempted, do it only once, with a complete nuke of the next-obj store

Improvement in group handling allows for a max number of retries for a group that failed to be added.
Earlier behavior was to try only once, and if it fails, it gets removed from the group-store. Now it
is removed after a couple of retries - so total 3 attempts to program the group.

Change-Id: I54ca8203cef779463522b01353540d12f8be3c91
diff --git a/core/api/src/main/java/org/onosproject/net/group/DefaultGroup.java b/core/api/src/main/java/org/onosproject/net/group/DefaultGroup.java
index 90da929..d0777ec 100644
--- a/core/api/src/main/java/org/onosproject/net/group/DefaultGroup.java
+++ b/core/api/src/main/java/org/onosproject/net/group/DefaultGroup.java
@@ -36,6 +36,7 @@
     private long referenceCount;
     private GroupId id;
     private int age;
+    private int failedRetryCount;
 
     /**
      * Initializes default values.
@@ -50,6 +51,7 @@
         bytes = 0;
         referenceCount = 0;
         age = 0;
+        failedRetryCount = 0;
     }
 
     /**
@@ -135,6 +137,11 @@
         return age;
     }
 
+    @Override
+    public int failedRetryCount() {
+        return failedRetryCount;
+    }
+
     /**
      * Sets the new state for this entry.
      *
@@ -190,6 +197,16 @@
         return referenceCount;
     }
 
+    @Override
+    public void incrFailedRetryCount() {
+        failedRetryCount++;
+    }
+
+    @Override
+    public void setFailedRetryCount(int failedRetryCount) {
+        this.failedRetryCount = failedRetryCount;
+    }
+
     /*
      * The deviceId, type and buckets are used for hash.
      *
diff --git a/core/api/src/main/java/org/onosproject/net/group/Group.java b/core/api/src/main/java/org/onosproject/net/group/Group.java
index 39c6a3d..dd2aa63 100644
--- a/core/api/src/main/java/org/onosproject/net/group/Group.java
+++ b/core/api/src/main/java/org/onosproject/net/group/Group.java
@@ -105,4 +105,12 @@
      * @return the age of the group as an integer
      */
     int age();
+
+    /**
+     * Returns the count for the number of attempts at programming a failed
+     * group.
+     *
+     * @return the count for the number of failed attempts at programming this group
+     */
+    int failedRetryCount();
 }
diff --git a/core/api/src/main/java/org/onosproject/net/group/StoredGroupEntry.java b/core/api/src/main/java/org/onosproject/net/group/StoredGroupEntry.java
index 6d22fa7..760fc5a 100644
--- a/core/api/src/main/java/org/onosproject/net/group/StoredGroupEntry.java
+++ b/core/api/src/main/java/org/onosproject/net/group/StoredGroupEntry.java
@@ -72,4 +72,20 @@
      * @param referenceCount reference count
      */
     void setReferenceCount(long referenceCount);
+
+    /**
+     * Increments the count for the number of failed attempts in programming
+     * this group.
+     *
+     */
+    void incrFailedRetryCount();
+
+    /**
+     * Sets the count for the number of failed attempts in programming this
+     * group.
+     *
+     * @param failedRetryCount count for number of failed attempts in
+     *            programming this group
+     */
+    void setFailedRetryCount(int failedRetryCount);
 }
diff --git a/core/api/src/test/java/org/onosproject/net/group/DefaultGroupTest.java b/core/api/src/test/java/org/onosproject/net/group/DefaultGroupTest.java
index 0c7df81..e8fb7b3 100644
--- a/core/api/src/test/java/org/onosproject/net/group/DefaultGroupTest.java
+++ b/core/api/src/test/java/org/onosproject/net/group/DefaultGroupTest.java
@@ -85,6 +85,7 @@
         assertThat(group1.referenceCount(), is(0L));
         assertThat(group1.buckets(), is(groupBuckets));
         assertThat(group1.state(), is(Group.GroupState.PENDING_ADD));
+        assertThat(group1.failedRetryCount(), is(0));
     }
 
     /**
@@ -101,5 +102,22 @@
         assertThat(group.referenceCount(), is(0L));
         assertThat(group.deviceId(), is(NetTestTools.did("1")));
         assertThat(group.buckets(), is(groupBuckets));
+        assertThat(group.failedRetryCount(), is(0));
+    }
+
+    /**
+     * Test failedRetryCount field.
+     */
+    @Test
+    public void checkFailedRetyCount() {
+        assertThat(group1.failedRetryCount(), is(0));
+        group1.incrFailedRetryCount();
+        assertThat(group1.failedRetryCount(), is(1));
+        group1.setFailedRetryCount(3);
+        assertThat(group1.failedRetryCount(), is(3));
+        group1.incrFailedRetryCount();
+        assertThat(group1.failedRetryCount(), is(4));
+        group1.setFailedRetryCount(0);
+        assertThat(group1.failedRetryCount(), is(0));
     }
 }
diff --git a/core/store/dist/src/main/java/org/onosproject/store/group/impl/DistributedGroupStore.java b/core/store/dist/src/main/java/org/onosproject/store/group/impl/DistributedGroupStore.java
index 58417a8..6e8b126 100644
--- a/core/store/dist/src/main/java/org/onosproject/store/group/impl/DistributedGroupStore.java
+++ b/core/store/dist/src/main/java/org/onosproject/store/group/impl/DistributedGroupStore.java
@@ -112,6 +112,7 @@
     private static final boolean GARBAGE_COLLECT = false;
     private static final int GC_THRESH = 6;
     private static final boolean ALLOW_EXTRANEOUS_GROUPS = true;
+    private static final int MAX_FAILED_ATTEMPTS = 3;
 
     private final int dummyId = 0xffffffff;
     private final GroupId dummyGroupId = new GroupId(dummyId);
@@ -152,6 +153,7 @@
             extraneousGroupEntriesById = new ConcurrentHashMap<>();
     private ExecutorService messageHandlingExecutor;
     private static final int MESSAGE_HANDLER_THREAD_POOL_SIZE = 1;
+
     private final HashMap<DeviceId, Boolean> deviceAuditStatus = new HashMap<>();
 
     private final AtomicInteger groupIdGen = new AtomicInteger();
@@ -941,6 +943,7 @@
                 existing.setPackets(group.packets());
                 existing.setBytes(group.bytes());
                 existing.setReferenceCount(group.referenceCount());
+                existing.setFailedRetryCount(0);
                 if ((existing.state() == GroupState.PENDING_ADD) ||
                         (existing.state() == GroupState.PENDING_ADD_RETRY)) {
                     log.trace("addOrUpdateGroupEntry: group entry {} in device {} moving from {} to ADDED",
@@ -969,6 +972,7 @@
                              + "happening for a non-existing entry in the map");
         }
 
+        // XXX if map is going to trigger event, is this one needed?
         if (event != null) {
             notifyDelegate(event);
         }
@@ -1082,9 +1086,10 @@
             return;
         }
 
-        log.warn("groupOperationFailed: group operation {} failed"
+        log.warn("groupOperationFailed: group operation {} failed in state {} "
                          + "for group {} in device {} with code {}",
                  operation.opType(),
+                 existing.state(),
                  existing.id(),
                  existing.deviceId(),
                  operation.failureCode());
@@ -1107,9 +1112,26 @@
                         existing.buckets());
             }
         }
+        if (operation.failureCode() == GroupOperation.GroupMsgErrorCode.INVALID_GROUP) {
+            existing.incrFailedRetryCount();
+            if (existing.failedRetryCount() < MAX_FAILED_ATTEMPTS) {
+                log.warn("Group {} programming failed {} of {} times in dev {}, "
+                        + "retrying ..", existing.id(),
+                         existing.failedRetryCount(), MAX_FAILED_ATTEMPTS,
+                         deviceId);
+                return;
+            }
+            log.warn("Group {} programming failed {} of {} times in dev {}, "
+                    + "removing group from store", existing.id(),
+                     existing.failedRetryCount(), MAX_FAILED_ATTEMPTS,
+                     deviceId);
+            // fall through to case
+        }
+
         switch (operation.opType()) {
             case ADD:
-                if (existing.state() == GroupState.PENDING_ADD) {
+                if (existing.state() == GroupState.PENDING_ADD
+                    || existing.state() == GroupState.PENDING_ADD_RETRY) {
                     notifyDelegate(new GroupEvent(Type.GROUP_ADD_FAILED, existing));
                     log.warn("groupOperationFailed: cleaningup "
                                      + "group {} from store in device {}....",
diff --git a/core/store/dist/src/test/java/org/onosproject/store/group/impl/DistributedGroupStoreTest.java b/core/store/dist/src/test/java/org/onosproject/store/group/impl/DistributedGroupStoreTest.java
index fd9b484..0a44359 100644
--- a/core/store/dist/src/test/java/org/onosproject/store/group/impl/DistributedGroupStoreTest.java
+++ b/core/store/dist/src/test/java/org/onosproject/store/group/impl/DistributedGroupStoreTest.java
@@ -36,6 +36,7 @@
 import org.onosproject.net.group.DefaultGroupDescription;
 import org.onosproject.net.group.DefaultGroupKey;
 import org.onosproject.net.group.Group;
+import org.onosproject.net.group.Group.GroupState;
 import org.onosproject.net.group.GroupBucket;
 import org.onosproject.net.group.GroupBuckets;
 import org.onosproject.net.group.GroupDescription;
@@ -44,6 +45,7 @@
 import org.onosproject.net.group.GroupOperation;
 import org.onosproject.net.group.GroupStore;
 import org.onosproject.net.group.GroupStoreDelegate;
+import org.onosproject.net.group.GroupOperation.GroupMsgErrorCode;
 import org.onosproject.store.cluster.messaging.ClusterCommunicationServiceAdapter;
 import org.onosproject.store.service.ConsistentMap;
 import org.onosproject.store.service.TestStorageService;
@@ -379,6 +381,85 @@
     }
 
     /**
+     * Tests group operation failed interface, with error codes for failures.
+     */
+    @Test
+    public void testGroupOperationFailedWithErrorCode() {
+        TestDelegate delegate = new TestDelegate();
+        groupStore.setDelegate(delegate);
+        groupStore.deviceInitialAuditCompleted(deviceId1, true);
+        groupStore.storeGroupDescription(groupDescription1);
+        groupStore.deviceInitialAuditCompleted(deviceId2, true);
+        groupStore.storeGroupDescription(groupDescription2);
+
+        List<GroupEvent> eventsAfterAdds = delegate.eventsSeen();
+        assertThat(eventsAfterAdds, hasSize(2));
+        eventsAfterAdds.forEach(event -> assertThat(event
+                .type(), is(GroupEvent.Type.GROUP_ADD_REQUESTED)));
+        delegate.resetEvents();
+
+        // test group exists
+        GroupOperation opAdd = GroupOperation
+                .createAddGroupOperation(groupId1, INDIRECT, buckets);
+        GroupOperation addFailedExists = GroupOperation
+                .createFailedGroupOperation(opAdd, GroupMsgErrorCode.GROUP_EXISTS);
+        groupStore.groupOperationFailed(deviceId1, addFailedExists);
+
+        List<GroupEvent> eventsAfterAddFailed = delegate.eventsSeen();
+        assertThat(eventsAfterAddFailed, hasSize(2));
+        assertThat(eventsAfterAddFailed.get(0).type(),
+                   is(GroupEvent.Type.GROUP_ADDED));
+        assertThat(eventsAfterAddFailed.get(1).type(),
+                   is(GroupEvent.Type.GROUP_ADDED));
+        Group g1 = groupStore.getGroup(deviceId1, groupId1);
+        assertEquals(0, g1.failedRetryCount());
+        delegate.resetEvents();
+
+        // test invalid group
+        Group g2 = groupStore.getGroup(deviceId2, groupId2);
+        assertEquals(0, g2.failedRetryCount());
+        assertEquals(GroupState.PENDING_ADD, g2.state());
+        GroupOperation opAdd1 = GroupOperation
+                .createAddGroupOperation(groupId2, INDIRECT, buckets);
+        GroupOperation addFailedInvalid = GroupOperation
+                .createFailedGroupOperation(opAdd1, GroupMsgErrorCode.INVALID_GROUP);
+
+        groupStore.groupOperationFailed(deviceId2, addFailedInvalid);
+        groupStore.pushGroupMetrics(deviceId2, ImmutableList.of());
+        List<GroupEvent> eventsAfterAddFailed1 = delegate.eventsSeen();
+        assertThat(eventsAfterAddFailed1, hasSize(1));
+        assertThat(eventsAfterAddFailed.get(0).type(),
+                   is(GroupEvent.Type.GROUP_ADD_REQUESTED));
+        g2 = groupStore.getGroup(deviceId2, groupId2);
+        assertEquals(1, g2.failedRetryCount());
+        assertEquals(GroupState.PENDING_ADD_RETRY, g2.state());
+        delegate.resetEvents();
+
+        groupStore.groupOperationFailed(deviceId2, addFailedInvalid);
+        groupStore.pushGroupMetrics(deviceId2, ImmutableList.of());
+        List<GroupEvent> eventsAfterAddFailed2 = delegate.eventsSeen();
+        assertThat(eventsAfterAddFailed2, hasSize(1));
+        assertThat(eventsAfterAddFailed.get(0).type(),
+                   is(GroupEvent.Type.GROUP_ADD_REQUESTED));
+        g2 = groupStore.getGroup(deviceId2, groupId2);
+        assertEquals(2, g2.failedRetryCount());
+        assertEquals(GroupState.PENDING_ADD_RETRY, g2.state());
+        delegate.resetEvents();
+
+        groupStore.groupOperationFailed(deviceId2, addFailedInvalid);
+        groupStore.pushGroupMetrics(deviceId2, ImmutableList.of());
+        List<GroupEvent> eventsAfterAddFailed3 = delegate.eventsSeen();
+        assertThat(eventsAfterAddFailed3, hasSize(2));
+        assertThat(eventsAfterAddFailed.get(0).type(),
+                   is(GroupEvent.Type.GROUP_ADD_FAILED));
+        assertThat(eventsAfterAddFailed.get(1).type(),
+                   is(GroupEvent.Type.GROUP_REMOVED));
+        g2 = groupStore.getGroup(deviceId2, groupId2);
+        assertEquals(null, g2);
+        delegate.resetEvents();
+    }
+
+    /**
      * Tests extraneous group operations.
      */
     @Test