Fix the case of dropped flowEntries that can occur when the current backup is down and no alternate backup exists
Change-Id: If584cb4bb99aa3930ac30ff6b85a5e2ed56071f0
diff --git a/core/store/dist/src/main/java/org/onosproject/store/flow/impl/NewDistributedFlowRuleStore.java b/core/store/dist/src/main/java/org/onosproject/store/flow/impl/NewDistributedFlowRuleStore.java
index 580c985..828b5a0 100644
--- a/core/store/dist/src/main/java/org/onosproject/store/flow/impl/NewDistributedFlowRuleStore.java
+++ b/core/store/dist/src/main/java/org/onosproject/store/flow/impl/NewDistributedFlowRuleStore.java
@@ -614,13 +614,28 @@
// ignore since this event is for a device this node does not manage.
return;
}
- NodeId latestBackupNode = getBackupNode(deviceId);
- NodeId existingBackupNode = lastBackupNodes.get(deviceId);
- if (Objects.equal(latestBackupNode, existingBackupNode)) {
+ NodeId newBackupNode = getBackupNode(deviceId);
+ NodeId currentBackupNode = lastBackupNodes.get(deviceId);
+ if (Objects.equal(newBackupNode, currentBackupNode)) {
// ignore since backup location hasn't changed.
return;
}
- backupSenderExecutor.schedule(() -> backupFlowEntries(latestBackupNode, Sets.newHashSet(deviceId)),
+ if (currentBackupNode != null && newBackupNode == null) {
+ // Current backup node is most likely down and no alternate backup node
+ // has been chosen. Clear current backup location so that we can resume
+ // backups when either current backup comes online or a different backup node
+ // is chosen.
+ log.warn("Lost backup location {} for deviceId {} and no alternate backup node exists. "
+ + "Flows can be lost if the master goes down", currentBackupNode, deviceId);
+ lastBackupNodes.remove(deviceId);
+ lastBackupTimes.remove(deviceId);
+ return;
+ // TODO: Pick any available node as backup and ensure hand-off occurs when
+ // a new master is elected.
+ }
+ log.info("Backup location for {} has changed from {} to {}.",
+ deviceId, currentBackupNode, newBackupNode);
+ backupSenderExecutor.schedule(() -> backupFlowEntries(newBackupNode, Sets.newHashSet(deviceId)),
0,
TimeUnit.SECONDS);
}
@@ -712,8 +727,9 @@
Long lastBackupTime = lastBackupTimes.get(deviceId);
Long lastUpdateTime = lastUpdateTimes.get(deviceId);
NodeId lastBackupNode = lastBackupNodes.get(deviceId);
+ NodeId newBackupNode = getBackupNode(deviceId);
return lastBackupTime == null
- || !Objects.equal(lastBackupNode, getBackupNode(deviceId))
+ || !Objects.equal(lastBackupNode, newBackupNode)
|| (lastUpdateTime != null && lastUpdateTime > lastBackupTime);
})
.collect(Collectors.toSet());
@@ -735,7 +751,7 @@
}
private void onBackupReceipt(Map<DeviceId, Map<FlowId, Set<StoredFlowEntry>>> flowTables) {
- log.debug("Received flows for {} to backup", flowTables.keySet());
+ log.debug("Received flowEntries for {} to backup", flowTables.keySet());
Set<DeviceId> managedDevices = mastershipService.getDevicesOf(local);
// Only process those devices are that not managed by the local node.
Maps.filterKeys(flowTables, deviceId -> !managedDevices.contains(deviceId))