Bugfix in routing logic to avoid null groups; Improvement in group handling.

Two main changes for the bug-fix:
    - avoid doing a full-reroute in the event of link failure as this will leave behind stale state in stores
      if event has been preceded by mastership change leading to the nuking of the ecmpSpg for one of the
      link's devices; instead do a rehash
    - when full-reroute is attempted, do it only once, with a complete nuke of the next-obj store

Improvement in group handling allows for a max number of retries for a group that failed to be added.
Earlier behavior was to try only once, and if it fails, it gets removed from the group-store. Now it
is removed after a couple of retries - so total 3 attempts to program the group.

Change-Id: I54ca8203cef779463522b01353540d12f8be3c91
diff --git a/core/store/dist/src/main/java/org/onosproject/store/group/impl/DistributedGroupStore.java b/core/store/dist/src/main/java/org/onosproject/store/group/impl/DistributedGroupStore.java
index 58417a8..6e8b126 100644
--- a/core/store/dist/src/main/java/org/onosproject/store/group/impl/DistributedGroupStore.java
+++ b/core/store/dist/src/main/java/org/onosproject/store/group/impl/DistributedGroupStore.java
@@ -112,6 +112,7 @@
     private static final boolean GARBAGE_COLLECT = false;
     private static final int GC_THRESH = 6;
     private static final boolean ALLOW_EXTRANEOUS_GROUPS = true;
+    private static final int MAX_FAILED_ATTEMPTS = 3;
 
     private final int dummyId = 0xffffffff;
     private final GroupId dummyGroupId = new GroupId(dummyId);
@@ -152,6 +153,7 @@
             extraneousGroupEntriesById = new ConcurrentHashMap<>();
     private ExecutorService messageHandlingExecutor;
     private static final int MESSAGE_HANDLER_THREAD_POOL_SIZE = 1;
+
     private final HashMap<DeviceId, Boolean> deviceAuditStatus = new HashMap<>();
 
     private final AtomicInteger groupIdGen = new AtomicInteger();
@@ -941,6 +943,7 @@
                 existing.setPackets(group.packets());
                 existing.setBytes(group.bytes());
                 existing.setReferenceCount(group.referenceCount());
+                existing.setFailedRetryCount(0);
                 if ((existing.state() == GroupState.PENDING_ADD) ||
                         (existing.state() == GroupState.PENDING_ADD_RETRY)) {
                     log.trace("addOrUpdateGroupEntry: group entry {} in device {} moving from {} to ADDED",
@@ -969,6 +972,7 @@
                              + "happening for a non-existing entry in the map");
         }
 
+        // XXX if map is going to trigger event, is this one needed?
         if (event != null) {
             notifyDelegate(event);
         }
@@ -1082,9 +1086,10 @@
             return;
         }
 
-        log.warn("groupOperationFailed: group operation {} failed"
+        log.warn("groupOperationFailed: group operation {} failed in state {} "
                          + "for group {} in device {} with code {}",
                  operation.opType(),
+                 existing.state(),
                  existing.id(),
                  existing.deviceId(),
                  operation.failureCode());
@@ -1107,9 +1112,26 @@
                         existing.buckets());
             }
         }
+        if (operation.failureCode() == GroupOperation.GroupMsgErrorCode.INVALID_GROUP) {
+            existing.incrFailedRetryCount();
+            if (existing.failedRetryCount() < MAX_FAILED_ATTEMPTS) {
+                log.warn("Group {} programming failed {} of {} times in dev {}, "
+                        + "retrying ..", existing.id(),
+                         existing.failedRetryCount(), MAX_FAILED_ATTEMPTS,
+                         deviceId);
+                return;
+            }
+            log.warn("Group {} programming failed {} of {} times in dev {}, "
+                    + "removing group from store", existing.id(),
+                     existing.failedRetryCount(), MAX_FAILED_ATTEMPTS,
+                     deviceId);
+            // fall through to case
+        }
+
         switch (operation.opType()) {
             case ADD:
-                if (existing.state() == GroupState.PENDING_ADD) {
+                if (existing.state() == GroupState.PENDING_ADD
+                    || existing.state() == GroupState.PENDING_ADD_RETRY) {
                     notifyDelegate(new GroupEvent(Type.GROUP_ADD_FAILED, existing));
                     log.warn("groupOperationFailed: cleaningup "
                                      + "group {} from store in device {}....",