blob: 1ce88748c63053e256742d65b8d32f1315fe1eb8 [file] [log] [blame]
sangho80f11cb2015-04-01 13:05:26 -07001/*
Brian O'Connor0947d7e2017-08-03 21:12:30 -07002 * Copyright 2015-present Open Networking Foundation
sangho80f11cb2015-04-01 13:05:26 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.segmentrouting;
17
Saurav Das62ae6792017-05-15 15:34:25 -070018import com.google.common.collect.ImmutableMap;
19import com.google.common.collect.ImmutableMap.Builder;
Charles Chanc22cef32016-04-29 14:38:22 -070020import com.google.common.collect.ImmutableSet;
Saurav Das1b391d52016-11-29 14:27:25 -080021import com.google.common.collect.Lists;
sanghofb7c7292015-04-13 15:15:58 -070022import com.google.common.collect.Maps;
23import com.google.common.collect.Sets;
Saurav Dasfbe74572017-08-03 18:30:35 -070024
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -070025import org.onlab.packet.EthType;
Charles Chan19b70032019-04-17 14:20:26 -070026import com.google.common.collect.Streams;
sangho9b169e32015-04-14 16:27:13 -070027import org.onlab.packet.Ip4Address;
Pier Ventreadb4ae62016-11-23 09:57:42 -080028import org.onlab.packet.Ip6Address;
sangho80f11cb2015-04-01 13:05:26 -070029import org.onlab.packet.IpPrefix;
Charles Chan910be6a2017-08-23 14:46:43 -070030import org.onlab.packet.MacAddress;
31import org.onlab.packet.VlanId;
piera9941192019-04-24 16:12:47 +020032import org.onlab.util.PredictableExecutor;
33import org.onlab.util.PredictableExecutor.PickyCallable;
Saurav Das261c3002017-06-13 15:35:54 -070034import org.onosproject.cluster.NodeId;
Saurav Das00e553b2018-04-21 17:19:48 -070035import org.onosproject.mastership.MastershipEvent;
Charles Chanc22cef32016-04-29 14:38:22 -070036import org.onosproject.net.ConnectPoint;
sangho80f11cb2015-04-01 13:05:26 -070037import org.onosproject.net.Device;
38import org.onosproject.net.DeviceId;
sanghofb7c7292015-04-13 15:15:58 -070039import org.onosproject.net.Link;
Charles Chan910be6a2017-08-23 14:46:43 -070040import org.onosproject.net.PortNumber;
Charles Chan319d1a22015-11-03 10:42:14 -080041import org.onosproject.segmentrouting.config.DeviceConfigNotFoundException;
42import org.onosproject.segmentrouting.config.DeviceConfiguration;
Saurav Das62ae6792017-05-15 15:34:25 -070043import org.onosproject.segmentrouting.grouphandler.DefaultGroupHandler;
Charles Chand66d6712018-03-29 16:03:41 -070044import org.onosproject.store.serializers.KryoNamespaces;
45import org.onosproject.store.service.Serializer;
sangho80f11cb2015-04-01 13:05:26 -070046import org.slf4j.Logger;
47import org.slf4j.LoggerFactory;
48
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -070049import java.time.Instant;
sangho80f11cb2015-04-01 13:05:26 -070050import java.util.ArrayList;
Charles Chand66d6712018-03-29 16:03:41 -070051import java.util.Collections;
sangho80f11cb2015-04-01 13:05:26 -070052import java.util.HashMap;
53import java.util.HashSet;
Saurav Das261c3002017-06-13 15:35:54 -070054import java.util.Iterator;
Charles Chand66d6712018-03-29 16:03:41 -070055import java.util.List;
Saurav Das261c3002017-06-13 15:35:54 -070056import java.util.Map;
Saurav Dasd1872b02016-12-02 15:43:47 -080057import java.util.Objects;
Charles Chan6dbcd252018-04-02 11:46:38 -070058import java.util.Optional;
sangho80f11cb2015-04-01 13:05:26 -070059import java.util.Set;
piera9941192019-04-24 16:12:47 +020060import java.util.concurrent.CompletableFuture;
61import java.util.concurrent.ExecutionException;
62import java.util.concurrent.ExecutorService;
63import java.util.concurrent.Future;
Saurav Das07c74602016-04-27 18:35:50 -070064import java.util.concurrent.ScheduledExecutorService;
65import java.util.concurrent.TimeUnit;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +090066import java.util.concurrent.locks.Lock;
67import java.util.concurrent.locks.ReentrantLock;
Charles Chan19b70032019-04-17 14:20:26 -070068import java.util.stream.Collectors;
Saurav Dasdc7f2752018-03-18 21:28:15 -070069import java.util.stream.Stream;
70
Pier Ventreadb4ae62016-11-23 09:57:42 -080071import static com.google.common.base.Preconditions.checkNotNull;
72import static java.util.concurrent.Executors.newScheduledThreadPool;
73import static org.onlab.util.Tools.groupedThreads;
sangho80f11cb2015-04-01 13:05:26 -070074
Charles Chanb7f75ac2016-01-11 18:28:54 -080075/**
76 * Default routing handler that is responsible for route computing and
77 * routing rule population.
78 */
sangho80f11cb2015-04-01 13:05:26 -070079public class DefaultRoutingHandler {
Saurav Dasf9332192017-02-18 14:05:44 -080080 private static final int MAX_CONSTANT_RETRY_ATTEMPTS = 5;
Ray Milkey092e9e22018-02-01 13:49:47 -080081 private static final long RETRY_INTERVAL_MS = 250L;
Saurav Dasf9332192017-02-18 14:05:44 -080082 private static final int RETRY_INTERVAL_SCALE = 1;
Saurav Dasfbe74572017-08-03 18:30:35 -070083 private static final long STABLITY_THRESHOLD = 10; //secs
Saurav Das00e553b2018-04-21 17:19:48 -070084 private static final long MASTER_CHANGE_DELAY = 1000; // ms
Saurav Das68e1b6a2018-06-11 17:02:31 -070085 private static final long PURGE_DELAY = 1000; // ms
Charles Chanc22cef32016-04-29 14:38:22 -070086 private static Logger log = LoggerFactory.getLogger(DefaultRoutingHandler.class);
sangho80f11cb2015-04-01 13:05:26 -070087
88 private SegmentRoutingManager srManager;
89 private RoutingRulePopulator rulePopulator;
Shashikanth VH0637b162015-12-11 01:32:44 +053090 private HashMap<DeviceId, EcmpShortestPathGraph> currentEcmpSpgMap;
91 private HashMap<DeviceId, EcmpShortestPathGraph> updatedEcmpSpgMap;
sangho9b169e32015-04-14 16:27:13 -070092 private DeviceConfiguration config;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +090093 private final Lock statusLock = new ReentrantLock();
94 private volatile Status populationStatus;
Yuta HIGUCHIebee2f12016-07-21 16:54:33 -070095 private ScheduledExecutorService executorService
Saurav Dasd1872b02016-12-02 15:43:47 -080096 = newScheduledThreadPool(1, groupedThreads("retryftr", "retry-%d", log));
Saurav Das49368392018-04-23 18:42:12 -070097 private ScheduledExecutorService executorServiceMstChg
98 = newScheduledThreadPool(1, groupedThreads("masterChg", "mstch-%d", log));
Saurav Das68e1b6a2018-06-11 17:02:31 -070099 private ScheduledExecutorService executorServiceFRR
100 = newScheduledThreadPool(1, groupedThreads("fullRR", "fullRR-%d", log));
piera9941192019-04-24 16:12:47 +0200101 // Route populators - 0 will leverage available processors
102 private static final int DEFAULT_THREADS = 0;
103 private ExecutorService routePopulators;
Saurav Das49368392018-04-23 18:42:12 -0700104
Saurav Das00e553b2018-04-21 17:19:48 -0700105 private Instant lastRoutingChange = Instant.EPOCH;
Saurav Das68e1b6a2018-06-11 17:02:31 -0700106 private Instant lastFullReroute = Instant.EPOCH;
sangho80f11cb2015-04-01 13:05:26 -0700107
Saurav Das00e553b2018-04-21 17:19:48 -0700108 // Distributed store to keep track of ONOS instance that should program the
109 // device pair. There should be only one instance (the king) that programs the same pair.
Charles Chand66d6712018-03-29 16:03:41 -0700110 Map<Set<DeviceId>, NodeId> shouldProgram;
Charles Chanfbcb8812018-04-18 18:41:05 -0700111 Map<DeviceId, Boolean> shouldProgramCache;
Charles Chand66d6712018-03-29 16:03:41 -0700112
Saurav Das00e553b2018-04-21 17:19:48 -0700113 // Local store to keep track of all devices that this instance was responsible
114 // for programming in the last run. Helps to determine if mastership changed
115 // during a run - only relevant for programming as a result of topo change.
116 Set<DeviceId> lastProgrammed;
117
sangho80f11cb2015-04-01 13:05:26 -0700118 /**
119 * Represents the default routing population status.
120 */
121 public enum Status {
122 // population process is not started yet.
123 IDLE,
sangho80f11cb2015-04-01 13:05:26 -0700124 // population process started.
125 STARTED,
piera9941192019-04-24 16:12:47 +0200126 // population process was aborted due to errors, mostly for groups not found.
sangho80f11cb2015-04-01 13:05:26 -0700127 ABORTED,
sangho80f11cb2015-04-01 13:05:26 -0700128 // population process was finished successfully.
129 SUCCEEDED
130 }
131
132 /**
133 * Creates a DefaultRoutingHandler object.
134 *
135 * @param srManager SegmentRoutingManager object
136 */
Charles Chand66d6712018-03-29 16:03:41 -0700137 DefaultRoutingHandler(SegmentRoutingManager srManager) {
Charles Chanfbcb8812018-04-18 18:41:05 -0700138 this.shouldProgram = srManager.storageService.<Set<DeviceId>, NodeId>consistentMapBuilder()
139 .withName("sr-should-program")
140 .withSerializer(Serializer.using(KryoNamespaces.API))
141 .withRelaxedReadConsistency()
142 .build().asJavaMap();
143 this.shouldProgramCache = Maps.newConcurrentMap();
144 update(srManager);
piera9941192019-04-24 16:12:47 +0200145 this.routePopulators = new PredictableExecutor(DEFAULT_THREADS,
146 groupedThreads("onos/sr", "r-populator-%d", log));
Charles Chanfbcb8812018-04-18 18:41:05 -0700147 }
148
149 /**
150 * Updates a DefaultRoutingHandler object.
151 *
152 * @param srManager SegmentRoutingManager object
153 */
154 void update(SegmentRoutingManager srManager) {
sangho80f11cb2015-04-01 13:05:26 -0700155 this.srManager = srManager;
156 this.rulePopulator = checkNotNull(srManager.routingRulePopulator);
sangho9b169e32015-04-14 16:27:13 -0700157 this.config = checkNotNull(srManager.deviceConfiguration);
sangho80f11cb2015-04-01 13:05:26 -0700158 this.populationStatus = Status.IDLE;
sanghofb7c7292015-04-13 15:15:58 -0700159 this.currentEcmpSpgMap = Maps.newHashMap();
Saurav Das00e553b2018-04-21 17:19:48 -0700160 this.lastProgrammed = Sets.newConcurrentHashSet();
sangho80f11cb2015-04-01 13:05:26 -0700161 }
162
163 /**
Saurav Das62ae6792017-05-15 15:34:25 -0700164 * Returns an immutable copy of the current ECMP shortest-path graph as
165 * computed by this controller instance.
166 *
Saurav Das261c3002017-06-13 15:35:54 -0700167 * @return immutable copy of the current ECMP graph
Saurav Das62ae6792017-05-15 15:34:25 -0700168 */
169 public ImmutableMap<DeviceId, EcmpShortestPathGraph> getCurrentEmcpSpgMap() {
170 Builder<DeviceId, EcmpShortestPathGraph> builder = ImmutableMap.builder();
171 currentEcmpSpgMap.entrySet().forEach(entry -> {
172 if (entry.getValue() != null) {
173 builder.put(entry.getKey(), entry.getValue());
174 }
175 });
176 return builder.build();
177 }
178
Saurav Dasfbe74572017-08-03 18:30:35 -0700179 /**
180 * Acquires the lock used when making routing changes.
181 */
182 public void acquireRoutingLock() {
183 statusLock.lock();
184 }
185
186 /**
187 * Releases the lock used when making routing changes.
188 */
189 public void releaseRoutingLock() {
190 statusLock.unlock();
191 }
192
193 /**
194 * Determines if routing in the network has been stable in the last
195 * STABLITY_THRESHOLD seconds, by comparing the current time to the last
196 * routing change timestamp.
197 *
198 * @return true if stable
199 */
200 public boolean isRoutingStable() {
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700201 long last = (long) (lastRoutingChange.toEpochMilli() / 1000.0);
202 long now = (long) (Instant.now().toEpochMilli() / 1000.0);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700203 log.trace("Routing stable since {}s", now - last);
Saurav Dasfbe74572017-08-03 18:30:35 -0700204 return (now - last) > STABLITY_THRESHOLD;
205 }
206
Saurav Das49368392018-04-23 18:42:12 -0700207 /**
208 * Gracefully shuts down the defaultRoutingHandler. Typically called when
209 * the app is deactivated
210 */
211 public void shutdown() {
212 executorService.shutdown();
213 executorServiceMstChg.shutdown();
Saurav Das68e1b6a2018-06-11 17:02:31 -0700214 executorServiceFRR.shutdown();
piera9941192019-04-24 16:12:47 +0200215 routePopulators.shutdown();
Saurav Das49368392018-04-23 18:42:12 -0700216 }
Saurav Dasfbe74572017-08-03 18:30:35 -0700217
Saurav Das261c3002017-06-13 15:35:54 -0700218 //////////////////////////////////////
219 // Route path handling
220 //////////////////////////////////////
221
Saurav Dase6c448a2018-01-18 12:07:33 -0800222 /* The following three methods represent the three major ways in which
223 * route-path handling is triggered in the network
Saurav Das261c3002017-06-13 15:35:54 -0700224 * a) due to configuration change
225 * b) due to route-added event
226 * c) due to change in the topology
227 */
228
Saurav Das62ae6792017-05-15 15:34:25 -0700229 /**
Saurav Das261c3002017-06-13 15:35:54 -0700230 * Populates all routing rules to all switches. Typically triggered at
231 * startup or after a configuration event.
sangho80f11cb2015-04-01 13:05:26 -0700232 */
Saurav Das62ae6792017-05-15 15:34:25 -0700233 public void populateAllRoutingRules() {
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700234 lastRoutingChange = Instant.now();
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900235 statusLock.lock();
236 try {
Saurav Das261c3002017-06-13 15:35:54 -0700237 if (populationStatus == Status.STARTED) {
238 log.warn("Previous rule population is not finished. Cannot"
239 + " proceed with populateAllRoutingRules");
240 return;
241 }
242
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900243 populationStatus = Status.STARTED;
244 rulePopulator.resetCounter();
Saurav Das261c3002017-06-13 15:35:54 -0700245 log.info("Starting to populate all routing rules");
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900246 log.debug("populateAllRoutingRules: populationStatus is STARTED");
sangho80f11cb2015-04-01 13:05:26 -0700247
Saurav Das261c3002017-06-13 15:35:54 -0700248 // take a snapshot of the topology
249 updatedEcmpSpgMap = new HashMap<>();
250 Set<EdgePair> edgePairs = new HashSet<>();
251 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
Jonathan Hart61e24e12017-11-30 18:23:42 -0800252 for (DeviceId dstSw : srManager.deviceConfiguration.getRouters()) {
Saurav Das261c3002017-06-13 15:35:54 -0700253 EcmpShortestPathGraph ecmpSpgUpdated =
Jonathan Hart61e24e12017-11-30 18:23:42 -0800254 new EcmpShortestPathGraph(dstSw, srManager);
255 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
Charles Chan6dbcd252018-04-02 11:46:38 -0700256 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
257 if (pairDev.isPresent()) {
Saurav Das261c3002017-06-13 15:35:54 -0700258 // pairDev may not be available yet, but we still need to add
Charles Chan6dbcd252018-04-02 11:46:38 -0700259 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
260 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
261 edgePairs.add(new EdgePair(dstSw, pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700262 }
Charles Chand66d6712018-03-29 16:03:41 -0700263
264 if (!shouldProgram(dstSw)) {
Saurav Das00e553b2018-04-21 17:19:48 -0700265 lastProgrammed.remove(dstSw);
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900266 continue;
Saurav Das00e553b2018-04-21 17:19:48 -0700267 } else {
268 lastProgrammed.add(dstSw);
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900269 }
Saurav Das00e553b2018-04-21 17:19:48 -0700270 // To do a full reroute, assume all route-paths have changed
Charles Chand66d6712018-03-29 16:03:41 -0700271 for (DeviceId dev : deviceAndItsPair(dstSw)) {
Jonathan Hart61e24e12017-11-30 18:23:42 -0800272 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
273 if (targetSw.equals(dev)) {
Saurav Das261c3002017-06-13 15:35:54 -0700274 continue;
275 }
Jonathan Hart61e24e12017-11-30 18:23:42 -0800276 routeChanges.add(Lists.newArrayList(targetSw, dev));
Saurav Das261c3002017-06-13 15:35:54 -0700277 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900278 }
Saurav Das261c3002017-06-13 15:35:54 -0700279 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900280
Saurav Das261c3002017-06-13 15:35:54 -0700281 if (!redoRouting(routeChanges, edgePairs, null)) {
282 log.debug("populateAllRoutingRules: populationStatus is ABORTED");
283 populationStatus = Status.ABORTED;
284 log.warn("Failed to repopulate all routing rules.");
285 return;
sangho80f11cb2015-04-01 13:05:26 -0700286 }
287
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900288 log.debug("populateAllRoutingRules: populationStatus is SUCCEEDED");
289 populationStatus = Status.SUCCEEDED;
Saurav Das261c3002017-06-13 15:35:54 -0700290 log.info("Completed all routing rule population. Total # of rules pushed : {}",
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900291 rulePopulator.getCounter());
Saurav Das62ae6792017-05-15 15:34:25 -0700292 return;
pierdebd15c2019-04-19 20:55:53 +0200293 } catch (Exception e) {
294 log.error("populateAllRoutingRules thrown an exception: {}",
295 e.getMessage(), e);
296 populationStatus = Status.ABORTED;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900297 } finally {
298 statusLock.unlock();
sangho80f11cb2015-04-01 13:05:26 -0700299 }
sangho80f11cb2015-04-01 13:05:26 -0700300 }
301
sanghofb7c7292015-04-13 15:15:58 -0700302 /**
Saurav Das261c3002017-06-13 15:35:54 -0700303 * Populate rules from all other edge devices to the connect-point(s)
304 * specified for the given subnets.
305 *
306 * @param cpts connect point(s) of the subnets being added
307 * @param subnets subnets being added
Charles Chan910be6a2017-08-23 14:46:43 -0700308 */
309 // XXX refactor
Saurav Das261c3002017-06-13 15:35:54 -0700310 protected void populateSubnet(Set<ConnectPoint> cpts, Set<IpPrefix> subnets) {
Charles Chan6db55b92017-09-11 15:21:57 -0700311 if (cpts == null || cpts.size() < 1 || cpts.size() > 2) {
312 log.warn("Skipping populateSubnet due to illegal size of connect points. {}", cpts);
313 return;
314 }
315
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700316 lastRoutingChange = Instant.now();
Saurav Das261c3002017-06-13 15:35:54 -0700317 statusLock.lock();
318 try {
319 if (populationStatus == Status.STARTED) {
320 log.warn("Previous rule population is not finished. Cannot"
321 + " proceed with routing rules for added routes");
322 return;
323 }
324 populationStatus = Status.STARTED;
325 rulePopulator.resetCounter();
Charles Chan910be6a2017-08-23 14:46:43 -0700326 log.info("Starting to populate routing rules for added routes, subnets={}, cpts={}",
327 subnets, cpts);
Saurav Das6430f412018-01-25 09:49:01 -0800328 // In principle an update to a subnet/prefix should not require a
329 // new ECMPspg calculation as it is not a topology event. As a
330 // result, we use the current/existing ECMPspg in the updated map
331 // used by the redoRouting method.
Saurav Das6de6ffd2018-02-09 09:15:03 -0800332 if (updatedEcmpSpgMap == null) {
333 updatedEcmpSpgMap = new HashMap<>();
334 }
Saurav Das6430f412018-01-25 09:49:01 -0800335 currentEcmpSpgMap.entrySet().forEach(entry -> {
336 updatedEcmpSpgMap.put(entry.getKey(), entry.getValue());
Saurav Dase321cff2018-02-09 17:26:45 -0800337 if (log.isTraceEnabled()) {
338 log.trace("Root switch: {}", entry.getKey());
339 log.trace(" Current/Existing SPG: {}", entry.getValue());
Saurav Das6430f412018-01-25 09:49:01 -0800340 }
341 });
Saurav Das261c3002017-06-13 15:35:54 -0700342 Set<EdgePair> edgePairs = new HashSet<>();
343 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
344 boolean handleRouting = false;
345
346 if (cpts.size() == 2) {
347 // ensure connect points are edge-pairs
348 Iterator<ConnectPoint> iter = cpts.iterator();
349 DeviceId dev1 = iter.next().deviceId();
Charles Chan6dbcd252018-04-02 11:46:38 -0700350 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dev1);
351 if (pairDev.isPresent() && iter.next().deviceId().equals(pairDev.get())) {
352 edgePairs.add(new EdgePair(dev1, pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700353 } else {
354 log.warn("Connectpoints {} for subnets {} not on "
355 + "pair-devices.. aborting populateSubnet", cpts, subnets);
356 populationStatus = Status.ABORTED;
357 return;
358 }
359 for (ConnectPoint cp : cpts) {
Saurav Das6430f412018-01-25 09:49:01 -0800360 if (updatedEcmpSpgMap.get(cp.deviceId()) == null) {
361 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das261c3002017-06-13 15:35:54 -0700362 new EcmpShortestPathGraph(cp.deviceId(), srManager);
Saurav Das6430f412018-01-25 09:49:01 -0800363 updatedEcmpSpgMap.put(cp.deviceId(), ecmpSpgUpdated);
364 log.warn("populateSubnet: no updated graph for dev:{}"
365 + " ... creating", cp.deviceId());
366 }
Charles Chand66d6712018-03-29 16:03:41 -0700367 if (!shouldProgram(cp.deviceId())) {
Saurav Das261c3002017-06-13 15:35:54 -0700368 continue;
369 }
370 handleRouting = true;
371 }
372 } else {
373 // single connect point
374 DeviceId dstSw = cpts.iterator().next().deviceId();
Saurav Das6430f412018-01-25 09:49:01 -0800375 if (updatedEcmpSpgMap.get(dstSw) == null) {
376 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das261c3002017-06-13 15:35:54 -0700377 new EcmpShortestPathGraph(dstSw, srManager);
Saurav Das6430f412018-01-25 09:49:01 -0800378 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
379 log.warn("populateSubnet: no updated graph for dev:{}"
380 + " ... creating", dstSw);
381 }
Charles Chand66d6712018-03-29 16:03:41 -0700382 handleRouting = shouldProgram(dstSw);
Saurav Das261c3002017-06-13 15:35:54 -0700383 }
384
385 if (!handleRouting) {
386 log.debug("This instance is not handling ecmp routing to the "
387 + "connectPoint(s) {}", cpts);
388 populationStatus = Status.ABORTED;
389 return;
390 }
391
392 // if it gets here, this instance should handle routing for the
393 // connectpoint(s). Assume all route-paths have to be updated to
394 // the connectpoint(s) with the following exceptions
395 // 1. if target is non-edge no need for routing rules
396 // 2. if target is one of the connectpoints
397 for (ConnectPoint cp : cpts) {
398 DeviceId dstSw = cp.deviceId();
399 for (Device targetSw : srManager.deviceService.getDevices()) {
400 boolean isEdge = false;
401 try {
402 isEdge = config.isEdgeDevice(targetSw.id());
403 } catch (DeviceConfigNotFoundException e) {
Charles Chaneaf3c9b2018-02-16 17:20:54 -0800404 log.warn(e.getMessage() + "aborting populateSubnet on targetSw {}", targetSw.id());
405 continue;
Saurav Das261c3002017-06-13 15:35:54 -0700406 }
Charles Chan6dbcd252018-04-02 11:46:38 -0700407 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
Saurav Das261c3002017-06-13 15:35:54 -0700408 if (dstSw.equals(targetSw.id()) || !isEdge ||
Charles Chan6dbcd252018-04-02 11:46:38 -0700409 (cpts.size() == 2 && pairDev.isPresent() && targetSw.id().equals(pairDev.get()))) {
Saurav Das261c3002017-06-13 15:35:54 -0700410 continue;
411 }
412 routeChanges.add(Lists.newArrayList(targetSw.id(), dstSw));
413 }
414 }
415
416 if (!redoRouting(routeChanges, edgePairs, subnets)) {
417 log.debug("populateSubnet: populationStatus is ABORTED");
418 populationStatus = Status.ABORTED;
419 log.warn("Failed to repopulate the rules for subnet.");
420 return;
421 }
422
423 log.debug("populateSubnet: populationStatus is SUCCEEDED");
424 populationStatus = Status.SUCCEEDED;
425 log.info("Completed subnet population. Total # of rules pushed : {}",
426 rulePopulator.getCounter());
427 return;
428
pierdebd15c2019-04-19 20:55:53 +0200429 } catch (Exception e) {
430 log.error("populateSubnet thrown an exception: {}",
431 e.getMessage(), e);
432 populationStatus = Status.ABORTED;
Saurav Das261c3002017-06-13 15:35:54 -0700433 } finally {
434 statusLock.unlock();
435 }
436 }
437
438 /**
Saurav Das62ae6792017-05-15 15:34:25 -0700439 * Populates the routing rules or makes hash group changes according to the
440 * route-path changes due to link failure, switch failure or link up. This
441 * method should only be called for one of these three possible event-types.
Saurav Dasdc7f2752018-03-18 21:28:15 -0700442 * Note that when a switch goes away, all of its links fail as well, but
443 * this is handled as a single switch removal event.
sanghofb7c7292015-04-13 15:15:58 -0700444 *
Saurav Dasdc7f2752018-03-18 21:28:15 -0700445 * @param linkDown the single failed link, or null for other conditions such
446 * as link-up or a removed switch
Saurav Das62ae6792017-05-15 15:34:25 -0700447 * @param linkUp the single link up, or null for other conditions such as
Saurav Dasdc7f2752018-03-18 21:28:15 -0700448 * link-down or a removed switch
449 * @param switchDown the removed switch, or null for other conditions such
450 * as link-down or link-up
451 * @param seenBefore true if this event is for a linkUp or linkDown for a
452 * seen link
453 */
454 // TODO This method should be refactored into three separated methods
Charles Chan9d2dd552018-06-19 20:56:33 -0700455 public void populateRoutingRulesForLinkStatusChange(Link linkDown, Link linkUp,
456 DeviceId switchDown, boolean seenBefore) {
Saurav Dasdc7f2752018-03-18 21:28:15 -0700457 if (Stream.of(linkDown, linkUp, switchDown).filter(Objects::nonNull)
458 .count() != 1) {
Saurav Das62ae6792017-05-15 15:34:25 -0700459 log.warn("Only one event can be handled for link status change .. aborting");
460 return;
461 }
Saurav Dasdc7f2752018-03-18 21:28:15 -0700462
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700463 lastRoutingChange = Instant.now();
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900464 statusLock.lock();
465 try {
sanghofb7c7292015-04-13 15:15:58 -0700466
467 if (populationStatus == Status.STARTED) {
Saurav Das261c3002017-06-13 15:35:54 -0700468 log.warn("Previous rule population is not finished. Cannot"
Saurav Das6430f412018-01-25 09:49:01 -0800469 + " proceeed with routingRules for Topology change");
Saurav Das62ae6792017-05-15 15:34:25 -0700470 return;
sanghofb7c7292015-04-13 15:15:58 -0700471 }
472
Saurav Das261c3002017-06-13 15:35:54 -0700473 // Take snapshots of the topology
sangho28d0b6d2015-05-07 13:30:57 -0700474 updatedEcmpSpgMap = new HashMap<>();
Saurav Das261c3002017-06-13 15:35:54 -0700475 Set<EdgePair> edgePairs = new HashSet<>();
sangho28d0b6d2015-05-07 13:30:57 -0700476 for (Device sw : srManager.deviceService.getDevices()) {
Shashikanth VH0637b162015-12-11 01:32:44 +0530477 EcmpShortestPathGraph ecmpSpgUpdated =
478 new EcmpShortestPathGraph(sw.id(), srManager);
sangho28d0b6d2015-05-07 13:30:57 -0700479 updatedEcmpSpgMap.put(sw.id(), ecmpSpgUpdated);
Charles Chan6dbcd252018-04-02 11:46:38 -0700480 Optional<DeviceId> pairDev = srManager.getPairDeviceId(sw.id());
481 if (pairDev.isPresent()) {
Saurav Das261c3002017-06-13 15:35:54 -0700482 // pairDev may not be available yet, but we still need to add
Charles Chan6dbcd252018-04-02 11:46:38 -0700483 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
484 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
485 edgePairs.add(new EdgePair(sw.id(), pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700486 }
sangho28d0b6d2015-05-07 13:30:57 -0700487 }
488
Saurav Das6430f412018-01-25 09:49:01 -0800489 log.info("Starting to populate routing rules from Topology change");
sanghodf0153f2015-05-05 14:13:34 -0700490
sanghofb7c7292015-04-13 15:15:58 -0700491 Set<ArrayList<DeviceId>> routeChanges;
Saurav Das62ae6792017-05-15 15:34:25 -0700492 log.debug("populateRoutingRulesForLinkStatusChange: "
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700493 + "populationStatus is STARTED");
sanghofb7c7292015-04-13 15:15:58 -0700494 populationStatus = Status.STARTED;
Saurav Das6430f412018-01-25 09:49:01 -0800495 rulePopulator.resetCounter(); //XXX maybe useful to have a rehash ctr
496 boolean hashGroupsChanged = false;
Saurav Das1b391d52016-11-29 14:27:25 -0800497 // try optimized re-routing
Saurav Das62ae6792017-05-15 15:34:25 -0700498 if (linkDown == null) {
499 // either a linkUp or a switchDown - compute all route changes by
500 // comparing all routes of existing ECMP SPG to new ECMP SPG
Saurav Dascea556f2018-03-05 14:37:16 -0800501 routeChanges = computeRouteChange(switchDown);
Saurav Das62ae6792017-05-15 15:34:25 -0700502
pier572d4a92019-04-25 18:51:51 +0200503 // deal with linkUp
504 if (linkUp != null) {
505 // deal with linkUp of a seen-before link
506 if (seenBefore) {
507 // link previously seen before
508 // do hash-bucket changes instead of a re-route
509 processHashGroupChangeForLinkUp(routeChanges);
510 // clear out routesChanges so a re-route is not attempted
511 routeChanges = ImmutableSet.of();
512 hashGroupsChanged = true;
513 } else {
514 // do hash-bucket changes first, method will return changed routes;
515 // for each route not changed it will perform a reroute
516 Set<ArrayList<DeviceId>> changedRoutes = processHashGroupChangeForLinkUp(routeChanges);
517 Set<ArrayList<DeviceId>> routeChangesTemp = getExpandedRoutes(routeChanges);
518 changedRoutes.forEach(routeChangesTemp::remove);
519 // if routesChanges is empty a re-route is not attempted
520 routeChanges = routeChangesTemp;
521 for (ArrayList<DeviceId> route : routeChanges) {
522 log.debug("remaining routes Target -> Root");
523 if (route.size() == 1) {
524 log.debug(" : all -> {}", route.get(0));
525 } else {
526 log.debug(" : {} -> {}", route.get(0), route.get(1));
527 }
528 }
529 // Mark hash groups as changed
530 if (!changedRoutes.isEmpty()) {
531 hashGroupsChanged = true;
532 }
533 }
534
Saurav Das62ae6792017-05-15 15:34:25 -0700535 }
536
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700537 //deal with switchDown
538 if (switchDown != null) {
pier572d4a92019-04-25 18:51:51 +0200539 processHashGroupChangeForFailure(routeChanges, switchDown);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700540 // clear out routesChanges so a re-route is not attempted
541 routeChanges = ImmutableSet.of();
Saurav Das6430f412018-01-25 09:49:01 -0800542 hashGroupsChanged = true;
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700543 }
sanghofb7c7292015-04-13 15:15:58 -0700544 } else {
Saurav Das62ae6792017-05-15 15:34:25 -0700545 // link has gone down
546 // Compare existing ECMP SPG only with the link that went down
547 routeChanges = computeDamagedRoutes(linkDown);
pier572d4a92019-04-25 18:51:51 +0200548 processHashGroupChangeForFailure(routeChanges, null);
Saurav Das68e1b6a2018-06-11 17:02:31 -0700549 // clear out routesChanges so a re-route is not attempted
550 routeChanges = ImmutableSet.of();
551 hashGroupsChanged = true;
Saurav Dasb149be12016-06-07 10:08:06 -0700552 }
553
sanghofb7c7292015-04-13 15:15:58 -0700554 if (routeChanges.isEmpty()) {
Saurav Das6430f412018-01-25 09:49:01 -0800555 if (hashGroupsChanged) {
556 log.info("Hash-groups changed for link status change");
557 } else {
558 log.info("No re-route or re-hash attempted for the link"
559 + " status change");
560 updatedEcmpSpgMap.keySet().forEach(devId -> {
561 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
562 log.debug("Updating ECMPspg for remaining dev:{}", devId);
563 });
564 }
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700565 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sanghofb7c7292015-04-13 15:15:58 -0700566 populationStatus = Status.SUCCEEDED;
Saurav Das62ae6792017-05-15 15:34:25 -0700567 return;
sanghofb7c7292015-04-13 15:15:58 -0700568 }
569
pier572d4a92019-04-25 18:51:51 +0200570 if (hashGroupsChanged) {
571 log.debug("Hash-groups changed for link status change");
572 }
573
Saurav Das62ae6792017-05-15 15:34:25 -0700574 // reroute of routeChanges
Saurav Das261c3002017-06-13 15:35:54 -0700575 if (redoRouting(routeChanges, edgePairs, null)) {
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700576 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sanghofb7c7292015-04-13 15:15:58 -0700577 populationStatus = Status.SUCCEEDED;
Saurav Das261c3002017-06-13 15:35:54 -0700578 log.info("Completed repopulation of rules for link-status change."
579 + " # of rules populated : {}", rulePopulator.getCounter());
Saurav Das62ae6792017-05-15 15:34:25 -0700580 return;
sanghofb7c7292015-04-13 15:15:58 -0700581 } else {
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700582 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is ABORTED");
sanghofb7c7292015-04-13 15:15:58 -0700583 populationStatus = Status.ABORTED;
Saurav Das261c3002017-06-13 15:35:54 -0700584 log.warn("Failed to repopulate the rules for link status change.");
Saurav Das62ae6792017-05-15 15:34:25 -0700585 return;
sanghofb7c7292015-04-13 15:15:58 -0700586 }
pierdebd15c2019-04-19 20:55:53 +0200587 } catch (Exception e) {
588 log.error("populateRoutingRulesForLinkStatusChange thrown an exception: {}",
589 e.getMessage(), e);
590 populationStatus = Status.ABORTED;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900591 } finally {
592 statusLock.unlock();
sanghofb7c7292015-04-13 15:15:58 -0700593 }
594 }
595
Saurav Das62ae6792017-05-15 15:34:25 -0700596 /**
Saurav Das261c3002017-06-13 15:35:54 -0700597 * Processes a set a route-path changes by reprogramming routing rules and
598 * creating new hash-groups or editing them if necessary. This method also
599 * determines the next-hops for the route-path from the src-switch (target)
600 * of the path towards the dst-switch of the path.
Saurav Das62ae6792017-05-15 15:34:25 -0700601 *
Saurav Das261c3002017-06-13 15:35:54 -0700602 * @param routeChanges a set of route-path changes, where each route-path is
603 * a list with its first element the src-switch (target)
604 * of the path, and the second element the dst-switch of
605 * the path.
606 * @param edgePairs a set of edge-switches that are paired by configuration
607 * @param subnets a set of prefixes that need to be populated in the routing
608 * table of the target switch in the route-path. Can be null,
609 * in which case all the prefixes belonging to the dst-switch
610 * will be populated in the target switch
611 * @return true if successful in repopulating all routes
Saurav Das62ae6792017-05-15 15:34:25 -0700612 */
Saurav Das261c3002017-06-13 15:35:54 -0700613 private boolean redoRouting(Set<ArrayList<DeviceId>> routeChanges,
614 Set<EdgePair> edgePairs, Set<IpPrefix> subnets) {
615 // first make every entry two-elements
pier572d4a92019-04-25 18:51:51 +0200616 Set<ArrayList<DeviceId>> changedRoutes = getExpandedRoutes(routeChanges);
617 // no valid routes - fail fast
618 if (changedRoutes.isEmpty()) {
619 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700620 }
621
622 // now process changedRoutes according to edgePairs
623 if (!redoRoutingEdgePairs(edgePairs, subnets, changedRoutes)) {
624 return false; //abort routing and fail fast
625 }
626
627 // whatever is left in changedRoutes is now processed for individual dsts.
Saurav Das6430f412018-01-25 09:49:01 -0800628 Set<DeviceId> updatedDevices = Sets.newHashSet();
629 if (!redoRoutingIndividualDests(subnets, changedRoutes,
630 updatedDevices)) {
Saurav Das261c3002017-06-13 15:35:54 -0700631 return false; //abort routing and fail fast
632 }
633
Saurav Das261c3002017-06-13 15:35:54 -0700634 // update ecmpSPG for all edge-pairs
635 for (EdgePair ep : edgePairs) {
636 currentEcmpSpgMap.put(ep.dev1, updatedEcmpSpgMap.get(ep.dev1));
637 currentEcmpSpgMap.put(ep.dev2, updatedEcmpSpgMap.get(ep.dev2));
638 log.debug("Updating ECMPspg for edge-pair:{}-{}", ep.dev1, ep.dev2);
639 }
Saurav Das6430f412018-01-25 09:49:01 -0800640
641 // here is where we update all devices not touched by this instance
642 updatedEcmpSpgMap.keySet().stream()
643 .filter(devId -> !edgePairs.stream().anyMatch(ep -> ep.includes(devId)))
644 .filter(devId -> !updatedDevices.contains(devId))
645 .forEach(devId -> {
646 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
647 log.debug("Updating ECMPspg for remaining dev:{}", devId);
648 });
Saurav Das261c3002017-06-13 15:35:54 -0700649 return true;
650 }
651
652 /**
653 * Programs targetSw in the changedRoutes for given prefixes reachable by
654 * an edgePair. If no prefixes are given, the method will use configured
655 * subnets/prefixes. If some configured subnets belong only to a specific
656 * destination in the edgePair, then the target switch will be programmed
657 * only to that destination.
658 *
659 * @param edgePairs set of edge-pairs for which target will be programmed
660 * @param subnets a set of prefixes that need to be populated in the routing
661 * table of the target switch in the changedRoutes. Can be null,
662 * in which case all the configured prefixes belonging to the
663 * paired switches will be populated in the target switch
664 * @param changedRoutes a set of route-path changes, where each route-path is
665 * a list with its first element the src-switch (target)
666 * of the path, and the second element the dst-switch of
667 * the path.
668 * @return true if successful
669 */
piera9941192019-04-24 16:12:47 +0200670 private boolean redoRoutingEdgePairs(Set<EdgePair> edgePairs, Set<IpPrefix> subnets,
671 Set<ArrayList<DeviceId>> changedRoutes) {
Saurav Das261c3002017-06-13 15:35:54 -0700672 for (EdgePair ep : edgePairs) {
673 // temp store for a target's changedRoutes to this edge-pair
674 Map<DeviceId, Set<ArrayList<DeviceId>>> targetRoutes = new HashMap<>();
675 Iterator<ArrayList<DeviceId>> i = changedRoutes.iterator();
676 while (i.hasNext()) {
677 ArrayList<DeviceId> route = i.next();
678 DeviceId dstSw = route.get(1);
679 if (ep.includes(dstSw)) {
680 // routeChange for edge pair found
681 // sort by target iff target is edge and remove from changedRoutes
682 DeviceId targetSw = route.get(0);
683 try {
684 if (!srManager.deviceConfiguration.isEdgeDevice(targetSw)) {
685 continue;
686 }
687 } catch (DeviceConfigNotFoundException e) {
688 log.warn(e.getMessage() + "aborting redoRouting");
689 return false;
690 }
691 // route is from another edge to this edge-pair
692 if (targetRoutes.containsKey(targetSw)) {
693 targetRoutes.get(targetSw).add(route);
694 } else {
695 Set<ArrayList<DeviceId>> temp = new HashSet<>();
696 temp.add(route);
697 targetRoutes.put(targetSw, temp);
698 }
699 i.remove();
700 }
701 }
702 // so now for this edgepair we have a per target set of routechanges
703 // process target->edgePair route
piera9941192019-04-24 16:12:47 +0200704 List<Future<Boolean>> futures = Lists.newArrayList();
Saurav Das261c3002017-06-13 15:35:54 -0700705 for (Map.Entry<DeviceId, Set<ArrayList<DeviceId>>> entry :
706 targetRoutes.entrySet()) {
707 log.debug("* redoRoutingDstPair Target:{} -> edge-pair {}",
708 entry.getKey(), ep);
piera9941192019-04-24 16:12:47 +0200709 futures.add(routePopulators.submit(new RedoRoutingEdgePair(entry.getKey(), entry.getValue(),
710 subnets, ep)));
711 }
712 if (!checkJobs(futures)) {
713 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700714 }
715 // if it gets here it has succeeded for all targets to this edge-pair
716 }
717 return true;
718 }
719
piera9941192019-04-24 16:12:47 +0200720 private final class RedoRoutingEdgePair implements PickyCallable<Boolean> {
721 private DeviceId targetSw;
722 private Set<ArrayList<DeviceId>> routes;
723 private Set<IpPrefix> subnets;
724 private EdgePair ep;
725
726 /**
727 * Builds a RedoRoutingEdgePair task which provides a result.
728 *
729 * @param targetSw the target switch
730 * @param routes the changed routes
731 * @param subnets the subnets
732 * @param ep the edge pair
733 */
734 RedoRoutingEdgePair(DeviceId targetSw, Set<ArrayList<DeviceId>> routes,
735 Set<IpPrefix> subnets, EdgePair ep) {
736 this.targetSw = targetSw;
737 this.routes = routes;
738 this.subnets = subnets;
739 this.ep = ep;
740 }
741
742 @Override
743 public Boolean call() throws Exception {
744 return redoRoutingEdgePair();
745 }
746
747 @Override
748 public int hint() {
749 return targetSw.hashCode();
750 }
751
752 private boolean redoRoutingEdgePair() {
753 Map<DeviceId, Set<DeviceId>> perDstNextHops = new HashMap<>();
754 routes.forEach(route -> {
755 Set<DeviceId> nhops = getNextHops(route.get(0), route.get(1));
756 log.debug("route: target {} -> dst {} found with next-hops {}",
757 route.get(0), route.get(1), nhops);
758 perDstNextHops.put(route.get(1), nhops);
759 });
760
761 List<Set<IpPrefix>> batchedSubnetDev1, batchedSubnetDev2;
762 if (subnets != null) {
763 batchedSubnetDev1 = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
764 batchedSubnetDev2 = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
765 } else {
766 batchedSubnetDev1 = config.getBatchedSubnets(ep.dev1);
767 batchedSubnetDev2 = config.getBatchedSubnets(ep.dev2);
768 }
769 List<Set<IpPrefix>> batchedSubnetBoth = Streams
770 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.intersection(a, b))
771 .filter(set -> !set.isEmpty())
772 .collect(Collectors.toList());
773 List<Set<IpPrefix>> batchedSubnetDev1Only = Streams
774 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.difference(a, b))
775 .filter(set -> !set.isEmpty())
776 .collect(Collectors.toList());
777 List<Set<IpPrefix>> batchedSubnetDev2Only = Streams
778 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.difference(b, a))
779 .filter(set -> !set.isEmpty())
780 .collect(Collectors.toList());
781
782 Set<DeviceId> nhDev1 = perDstNextHops.get(ep.dev1);
783 Set<DeviceId> nhDev2 = perDstNextHops.get(ep.dev2);
784
785 // handle routing to subnets common to edge-pair
786 // only if the targetSw is not part of the edge-pair and there
787 // exists a next hop to at least one of the devices in the edge-pair
788 if (!ep.includes(targetSw)
789 && ((nhDev1 != null && !nhDev1.isEmpty()) || (nhDev2 != null && !nhDev2.isEmpty()))) {
790 log.trace("getSubnets on both {} and {}: {}", ep.dev1, ep.dev2, batchedSubnetBoth);
791 for (Set<IpPrefix> prefixes : batchedSubnetBoth) {
792 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev1, ep.dev2,
793 perDstNextHops, prefixes)) {
794 return false; // abort everything and fail fast
795 }
796 }
797
798 }
799 // handle routing to subnets that only belong to dev1 only if
800 // a next-hop exists from the target to dev1
801 if (!batchedSubnetDev1Only.isEmpty() &&
802 batchedSubnetDev1Only.stream().anyMatch(subnet -> !subnet.isEmpty()) &&
803 nhDev1 != null && !nhDev1.isEmpty()) {
804 Map<DeviceId, Set<DeviceId>> onlyDev1NextHops = new HashMap<>();
805 onlyDev1NextHops.put(ep.dev1, nhDev1);
806 log.trace("getSubnets on {} only: {}", ep.dev1, batchedSubnetDev1Only);
807 for (Set<IpPrefix> prefixes : batchedSubnetDev1Only) {
808 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev1, null,
809 onlyDev1NextHops, prefixes)) {
810 return false; // abort everything and fail fast
811 }
812 }
813 }
814 // handle routing to subnets that only belong to dev2 only if
815 // a next-hop exists from the target to dev2
816 if (!batchedSubnetDev2Only.isEmpty() &&
817 batchedSubnetDev2Only.stream().anyMatch(subnet -> !subnet.isEmpty()) &&
818 nhDev2 != null && !nhDev2.isEmpty()) {
819 Map<DeviceId, Set<DeviceId>> onlyDev2NextHops = new HashMap<>();
820 onlyDev2NextHops.put(ep.dev2, nhDev2);
821 log.trace("getSubnets on {} only: {}", ep.dev2, batchedSubnetDev2Only);
822 for (Set<IpPrefix> prefixes : batchedSubnetDev2Only) {
823 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev2, null,
824 onlyDev2NextHops, prefixes)) {
825 return false; // abort everything and fail fast
826 }
827 }
828 }
829 return true;
830 }
831 }
832
Saurav Das261c3002017-06-13 15:35:54 -0700833 /**
834 * Programs targetSw in the changedRoutes for given prefixes reachable by
835 * a destination switch that is not part of an edge-pair.
836 * If no prefixes are given, the method will use configured subnets/prefixes.
837 *
838 * @param subnets a set of prefixes that need to be populated in the routing
839 * table of the target switch in the changedRoutes. Can be null,
840 * in which case all the configured prefixes belonging to the
841 * paired switches will be populated in the target switch
842 * @param changedRoutes a set of route-path changes, where each route-path is
843 * a list with its first element the src-switch (target)
844 * of the path, and the second element the dst-switch of
845 * the path.
846 * @return true if successful
847 */
piera9941192019-04-24 16:12:47 +0200848 private boolean redoRoutingIndividualDests(Set<IpPrefix> subnets, Set<ArrayList<DeviceId>> changedRoutes,
Saurav Das6430f412018-01-25 09:49:01 -0800849 Set<DeviceId> updatedDevices) {
Saurav Das261c3002017-06-13 15:35:54 -0700850 // aggregate route-path changes for each dst device
851 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> routesBydevice =
852 new HashMap<>();
853 for (ArrayList<DeviceId> route: changedRoutes) {
854 DeviceId dstSw = route.get(1);
855 ArrayList<ArrayList<DeviceId>> deviceRoutes =
856 routesBydevice.get(dstSw);
857 if (deviceRoutes == null) {
858 deviceRoutes = new ArrayList<>();
859 routesBydevice.put(dstSw, deviceRoutes);
860 }
861 deviceRoutes.add(route);
862 }
piera9941192019-04-24 16:12:47 +0200863 // iterate over the impacted devices
Saurav Das261c3002017-06-13 15:35:54 -0700864 for (DeviceId impactedDstDevice : routesBydevice.keySet()) {
865 ArrayList<ArrayList<DeviceId>> deviceRoutes =
866 routesBydevice.get(impactedDstDevice);
piera9941192019-04-24 16:12:47 +0200867 List<Future<Boolean>> futures = Lists.newArrayList();
Saurav Das261c3002017-06-13 15:35:54 -0700868 for (ArrayList<DeviceId> route: deviceRoutes) {
869 log.debug("* redoRoutingIndiDst Target: {} -> dst: {}",
870 route.get(0), route.get(1));
piera9941192019-04-24 16:12:47 +0200871 futures.add(routePopulators.submit(new RedoRoutingIndividualDest(subnets, route)));
872 }
873 // check the execution of each job
874 if (!checkJobs(futures)) {
875 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700876 }
877 //Only if all the flows for all impacted routes to a
878 //specific target are pushed successfully, update the
879 //ECMP graph for that target. Or else the next event
880 //would not see any changes in the ECMP graphs.
881 //In another case, the target switch has gone away, so
882 //routes can't be installed. In that case, the current map
883 //is updated here, without any flows being pushed.
884 currentEcmpSpgMap.put(impactedDstDevice,
885 updatedEcmpSpgMap.get(impactedDstDevice));
Saurav Das6430f412018-01-25 09:49:01 -0800886 updatedDevices.add(impactedDstDevice);
Saurav Das261c3002017-06-13 15:35:54 -0700887 log.debug("Updating ECMPspg for impacted dev:{}", impactedDstDevice);
888 }
889 return true;
890 }
891
piera9941192019-04-24 16:12:47 +0200892 private final class RedoRoutingIndividualDest implements PickyCallable<Boolean> {
893 private DeviceId targetSw;
894 private ArrayList<DeviceId> route;
895 private Set<IpPrefix> subnets;
896
897 /**
898 * Builds a RedoRoutingIndividualDest task, which provides a result.
899 *
900 * @param subnets a set of prefixes
901 * @param route a route-path change
902 */
903 RedoRoutingIndividualDest(Set<IpPrefix> subnets, ArrayList<DeviceId> route) {
904 this.targetSw = route.get(0);
905 this.route = route;
906 this.subnets = subnets;
907 }
908
909 @Override
910 public Boolean call() throws Exception {
911 DeviceId dstSw = route.get(1); // same as impactedDstDevice
912 Set<DeviceId> nextHops = getNextHops(targetSw, dstSw);
913 if (nextHops.isEmpty()) {
914 log.debug("Could not find next hop from target:{} --> dst {} "
915 + "skipping this route", targetSw, dstSw);
916 return true;
917 }
918 Map<DeviceId, Set<DeviceId>> nhops = new HashMap<>();
919 nhops.put(dstSw, nextHops);
920 if (!populateEcmpRoutingRulePartial(targetSw, dstSw, null, nhops,
921 (subnets == null) ? Sets.newHashSet() : subnets)) {
922 return false; // abort routing and fail fast
923 }
924 log.debug("Populating flow rules from target: {} to dst: {}"
925 + " is successful", targetSw, dstSw);
926 return true;
927 }
928
929 @Override
930 public int hint() {
931 return targetSw.hashCode();
932 }
933 }
934
Saurav Das261c3002017-06-13 15:35:54 -0700935 /**
936 * Populate ECMP rules for subnets from target to destination via nexthops.
937 *
938 * @param targetSw Device ID of target switch in which rules will be programmed
939 * @param destSw1 Device ID of final destination switch to which the rules will forward
940 * @param destSw2 Device ID of paired destination switch to which the rules will forward
941 * A null deviceId indicates packets should only be sent to destSw1
Saurav Das97241862018-02-14 14:14:54 -0800942 * @param nextHops Map of a set of next hops per destSw
Saurav Das261c3002017-06-13 15:35:54 -0700943 * @param subnets Subnets to be populated. If empty, populate all configured subnets.
944 * @return true if it succeeds in populating rules
945 */ // refactor
piera9941192019-04-24 16:12:47 +0200946 private boolean populateEcmpRoutingRulePartial(DeviceId targetSw, DeviceId destSw1, DeviceId destSw2,
947 Map<DeviceId, Set<DeviceId>> nextHops, Set<IpPrefix> subnets) {
Saurav Das261c3002017-06-13 15:35:54 -0700948 boolean result;
949 // If both target switch and dest switch are edge routers, then set IP
950 // rule for both subnet and router IP.
951 boolean targetIsEdge;
952 boolean dest1IsEdge;
953 Ip4Address dest1RouterIpv4, dest2RouterIpv4 = null;
954 Ip6Address dest1RouterIpv6, dest2RouterIpv6 = null;
955
956 try {
957 targetIsEdge = config.isEdgeDevice(targetSw);
958 dest1IsEdge = config.isEdgeDevice(destSw1);
959 dest1RouterIpv4 = config.getRouterIpv4(destSw1);
960 dest1RouterIpv6 = config.getRouterIpv6(destSw1);
961 if (destSw2 != null) {
962 dest2RouterIpv4 = config.getRouterIpv4(destSw2);
963 dest2RouterIpv6 = config.getRouterIpv6(destSw2);
964 }
965 } catch (DeviceConfigNotFoundException e) {
966 log.warn(e.getMessage() + " Aborting populateEcmpRoutingRulePartial.");
Saurav Das62ae6792017-05-15 15:34:25 -0700967 return false;
968 }
Saurav Das261c3002017-06-13 15:35:54 -0700969
970 if (targetIsEdge && dest1IsEdge) {
Charles Chan19b70032019-04-17 14:20:26 -0700971 List<Set<IpPrefix>> batchedSubnets;
972 if (subnets != null && !subnets.isEmpty()) {
973 batchedSubnets = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
974 } else {
975 batchedSubnets = config.getBatchedSubnets(destSw1);
976 }
Saurav Das97241862018-02-14 14:14:54 -0800977 // XXX - Rethink this - ignoring routerIPs in all other switches
978 // even edge to edge switches
Saurav Das261c3002017-06-13 15:35:54 -0700979 /*subnets.add(dest1RouterIpv4.toIpPrefix());
980 if (dest1RouterIpv6 != null) {
981 subnets.add(dest1RouterIpv6.toIpPrefix());
982 }
983 if (destSw2 != null && dest2RouterIpv4 != null) {
984 subnets.add(dest2RouterIpv4.toIpPrefix());
985 if (dest2RouterIpv6 != null) {
986 subnets.add(dest2RouterIpv6.toIpPrefix());
987 }
988 }*/
Charles Chan19b70032019-04-17 14:20:26 -0700989 log.trace("getSubnets on {}: {}", destSw1, batchedSubnets);
990 for (Set<IpPrefix> prefixes : batchedSubnets) {
991 log.debug(". populateEcmpRoutingRulePartial in device {} towards {} {} "
992 + "for subnets {}", targetSw, destSw1,
993 (destSw2 != null) ? ("& " + destSw2) : "",
994 prefixes);
995 if (!rulePopulator.populateIpRuleForSubnet(targetSw, prefixes, destSw1, destSw2, nextHops)) {
996 return false;
997 }
Saurav Das261c3002017-06-13 15:35:54 -0700998 }
Saurav Das62ae6792017-05-15 15:34:25 -0700999 }
Saurav Das261c3002017-06-13 15:35:54 -07001000
1001 if (!targetIsEdge && dest1IsEdge) {
1002 // MPLS rules in all non-edge target devices. These rules are for
1003 // individual destinations, even if the dsts are part of edge-pairs.
1004 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
1005 + "all MPLS rules", targetSw, destSw1);
piera9941192019-04-24 16:12:47 +02001006 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1), dest1RouterIpv4);
Saurav Das261c3002017-06-13 15:35:54 -07001007 if (!result) {
1008 return false;
1009 }
1010 if (dest1RouterIpv6 != null) {
Saurav Das97241862018-02-14 14:14:54 -08001011 int v4sid = 0, v6sid = 0;
1012 try {
1013 v4sid = config.getIPv4SegmentId(destSw1);
1014 v6sid = config.getIPv6SegmentId(destSw1);
1015 } catch (DeviceConfigNotFoundException e) {
1016 log.warn(e.getMessage());
1017 }
1018 if (v4sid != v6sid) {
piera9941192019-04-24 16:12:47 +02001019 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1),
Saurav Das97241862018-02-14 14:14:54 -08001020 dest1RouterIpv6);
1021 if (!result) {
1022 return false;
1023 }
Saurav Das261c3002017-06-13 15:35:54 -07001024 }
1025 }
1026 }
1027
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -07001028 if (!targetIsEdge && !dest1IsEdge) {
1029 // MPLS rules for inter-connected spines
1030 // can be merged with above if, left it here for clarity
1031 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
1032 + "all MPLS rules", targetSw, destSw1);
1033
piera9941192019-04-24 16:12:47 +02001034 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1), dest1RouterIpv4);
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -07001035 if (!result) {
1036 return false;
1037 }
1038
1039 if (dest1RouterIpv6 != null) {
1040 int v4sid = 0, v6sid = 0;
1041 try {
1042 v4sid = config.getIPv4SegmentId(destSw1);
1043 v6sid = config.getIPv6SegmentId(destSw1);
1044 } catch (DeviceConfigNotFoundException e) {
1045 log.warn(e.getMessage());
1046 }
1047 if (v4sid != v6sid) {
piera9941192019-04-24 16:12:47 +02001048 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1),
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -07001049 dest1RouterIpv6);
1050 if (!result) {
1051 return false;
1052 }
1053 }
1054 }
1055 }
1056
Saurav Das261c3002017-06-13 15:35:54 -07001057 // To save on ECMP groups
1058 // avoid MPLS rules in non-edge-devices to non-edge-devices
1059 // avoid MPLS transit rules in edge-devices
1060 // avoid loopback IP rules in edge-devices to non-edge-devices
1061 return true;
Saurav Das62ae6792017-05-15 15:34:25 -07001062 }
1063
1064 /**
pier572d4a92019-04-25 18:51:51 +02001065 * Processes a set a route-path changes due to a switch/link failure by editing hash groups.
Saurav Das62ae6792017-05-15 15:34:25 -07001066 *
1067 * @param routeChanges a set of route-path changes, where each route-path is
1068 * a list with its first element the src-switch of the path
1069 * and the second element the dst-switch of the path.
Saurav Das62ae6792017-05-15 15:34:25 -07001070 * @param failedSwitch the switchId if the route changes are for a failed switch,
1071 * otherwise null
1072 */
pier572d4a92019-04-25 18:51:51 +02001073 private void processHashGroupChangeForFailure(Set<ArrayList<DeviceId>> routeChanges,
1074 DeviceId failedSwitch) {
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001075 // first, ensure each routeChanges entry has two elements
pier572d4a92019-04-25 18:51:51 +02001076 Set<ArrayList<DeviceId>> changedRoutes = getAllExpandedRoutes(routeChanges);
Saurav Das6430f412018-01-25 09:49:01 -08001077 boolean someFailed = false;
pier572d4a92019-04-25 18:51:51 +02001078 boolean success;
Saurav Das6430f412018-01-25 09:49:01 -08001079 Set<DeviceId> updatedDevices = Sets.newHashSet();
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001080 for (ArrayList<DeviceId> route : changedRoutes) {
1081 DeviceId targetSw = route.get(0);
1082 DeviceId dstSw = route.get(1);
pier572d4a92019-04-25 18:51:51 +02001083 success = fixHashGroupsForRoute(route, true);
1084 // it's possible that we cannot fix hash groups for a route
1085 // if the target switch has failed. Nevertheless the ecmp graph
1086 // for the impacted switch must still be updated.
1087 if (!success && failedSwitch != null && targetSw.equals(failedSwitch)) {
1088 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1089 currentEcmpSpgMap.remove(targetSw);
1090 log.debug("Updating ECMPspg for dst:{} removing failed switch "
1091 + "target:{}", dstSw, targetSw);
1092 updatedDevices.add(targetSw);
1093 updatedDevices.add(dstSw);
1094 continue;
1095 }
1096 //linkfailed - update both sides
1097 if (success) {
1098 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1099 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1100 log.debug("Updating ECMPspg for dst:{} and target:{} for linkdown"
1101 + " or switchdown", dstSw, targetSw);
1102 updatedDevices.add(targetSw);
1103 updatedDevices.add(dstSw);
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001104 } else {
pier572d4a92019-04-25 18:51:51 +02001105 someFailed = true;
Saurav Das62ae6792017-05-15 15:34:25 -07001106 }
1107 }
Saurav Das6430f412018-01-25 09:49:01 -08001108 if (!someFailed) {
1109 // here is where we update all devices not touched by this instance
1110 updatedEcmpSpgMap.keySet().stream()
1111 .filter(devId -> !updatedDevices.contains(devId))
1112 .forEach(devId -> {
1113 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1114 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1115 });
1116 }
Saurav Das62ae6792017-05-15 15:34:25 -07001117 }
1118
1119 /**
pier572d4a92019-04-25 18:51:51 +02001120 * Processes a set a route-path changes due to link up by editing hash groups.
1121 *
1122 * @param routeChanges a set of route-path changes, where each route-path is
1123 * a list with its first element the src-switch of the path
1124 * and the second element the dst-switch of the path.
1125 * @return set of changed routes
1126 */
1127 private Set<ArrayList<DeviceId>> processHashGroupChangeForLinkUp(Set<ArrayList<DeviceId>> routeChanges) {
1128 // Stores changed routes
1129 Set<ArrayList<DeviceId>> doneRoutes = new HashSet<>();
1130 // first, ensure each routeChanges entry has two elements
1131 Set<ArrayList<DeviceId>> changedRoutes = getAllExpandedRoutes(routeChanges);
1132 boolean someFailed = false;
1133 boolean success;
1134 Set<DeviceId> updatedDevices = Sets.newHashSet();
1135 for (ArrayList<DeviceId> route : changedRoutes) {
1136 DeviceId targetSw = route.get(0);
1137 DeviceId dstSw = route.get(1);
1138 // linkup - fix (if possible)
1139 success = fixHashGroupsForRoute(route, false);
1140 if (success) {
1141 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1142 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1143 log.debug("Updating ECMPspg for target:{} and dst:{} for linkup",
1144 targetSw, dstSw);
1145 updatedDevices.add(targetSw);
1146 updatedDevices.add(dstSw);
1147 doneRoutes.add(route);
1148 } else {
1149 someFailed = true;
1150 }
1151
1152 }
1153 if (!someFailed) {
1154 // here is where we update all devices not touched by this instance
1155 updatedEcmpSpgMap.keySet().stream()
1156 .filter(devId -> !updatedDevices.contains(devId))
1157 .forEach(devId -> {
1158 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1159 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1160 });
1161 }
1162 return doneRoutes;
1163 }
1164
1165 /**
Saurav Das62ae6792017-05-15 15:34:25 -07001166 * Edits hash groups in the src-switch (targetSw) of a route-path by
1167 * calling the groupHandler to either add or remove buckets in an existing
1168 * hash group.
1169 *
1170 * @param route a single list representing a route-path where the first element
1171 * is the src-switch (targetSw) of the route-path and the
1172 * second element is the dst-switch
1173 * @param revoke true if buckets in the hash-groups need to be removed;
1174 * false if buckets in the hash-groups need to be added
1175 * @return true if the hash group editing is successful
1176 */
1177 private boolean fixHashGroupsForRoute(ArrayList<DeviceId> route,
1178 boolean revoke) {
1179 DeviceId targetSw = route.get(0);
1180 if (route.size() < 2) {
1181 log.warn("Cannot fixHashGroupsForRoute - no dstSw in route {}", route);
1182 return false;
1183 }
1184 DeviceId destSw = route.get(1);
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001185 log.debug("* processing fixHashGroupsForRoute: Target {} -> Dest {}",
Saurav Das62ae6792017-05-15 15:34:25 -07001186 targetSw, destSw);
Saurav Das62ae6792017-05-15 15:34:25 -07001187 // figure out the new next hops at the targetSw towards the destSw
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001188 Set<DeviceId> nextHops = getNextHops(targetSw, destSw);
Saurav Das62ae6792017-05-15 15:34:25 -07001189 // call group handler to change hash group at targetSw
1190 DefaultGroupHandler grpHandler = srManager.getGroupHandler(targetSw);
1191 if (grpHandler == null) {
1192 log.warn("Cannot find grouphandler for dev:{} .. aborting"
1193 + " {} hash group buckets for route:{} ", targetSw,
1194 (revoke) ? "revoke" : "repopulate", route);
1195 return false;
1196 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001197 log.debug("{} hash-groups buckets For Route {} -> {} to new next-hops {}",
Saurav Das62ae6792017-05-15 15:34:25 -07001198 (revoke) ? "revoke" : "repopulating",
1199 targetSw, destSw, nextHops);
1200 return (revoke) ? grpHandler.fixHashGroups(targetSw, nextHops,
1201 destSw, true)
1202 : grpHandler.fixHashGroups(targetSw, nextHops,
1203 destSw, false);
1204 }
1205
1206 /**
Saurav Das261c3002017-06-13 15:35:54 -07001207 * Start the flow rule population process if it was never started. The
1208 * process finishes successfully when all flow rules are set and stops with
1209 * ABORTED status when any groups required for flows is not set yet.
Saurav Das62ae6792017-05-15 15:34:25 -07001210 */
Saurav Das261c3002017-06-13 15:35:54 -07001211 public void startPopulationProcess() {
1212 statusLock.lock();
1213 try {
1214 if (populationStatus == Status.IDLE
1215 || populationStatus == Status.SUCCEEDED
1216 || populationStatus == Status.ABORTED) {
1217 populateAllRoutingRules();
sangho28d0b6d2015-05-07 13:30:57 -07001218 } else {
Saurav Das261c3002017-06-13 15:35:54 -07001219 log.warn("Not initiating startPopulationProcess as populationStatus is {}",
1220 populationStatus);
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001221 }
Saurav Das261c3002017-06-13 15:35:54 -07001222 } finally {
1223 statusLock.unlock();
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001224 }
sanghofb7c7292015-04-13 15:15:58 -07001225 }
1226
Saurav Dasb149be12016-06-07 10:08:06 -07001227 /**
Saurav Das261c3002017-06-13 15:35:54 -07001228 * Revoke rules of given subnet in all edge switches.
1229 *
1230 * @param subnets subnet being removed
1231 * @return true if succeed
1232 */
1233 protected boolean revokeSubnet(Set<IpPrefix> subnets) {
piera9941192019-04-24 16:12:47 +02001234 DeviceId targetSw;
1235 List<Future<Boolean>> futures = Lists.newArrayList();
1236 for (Device sw : srManager.deviceService.getAvailableDevices()) {
1237 targetSw = sw.id();
1238 if (shouldProgram(targetSw)) {
1239 futures.add(routePopulators.submit(new RevokeSubnet(targetSw, subnets)));
1240 } else {
1241 futures.add(CompletableFuture.completedFuture(true));
1242 }
1243 }
1244 // check the execution of each job
1245 return checkJobs(futures);
1246 }
1247
1248 private final class RevokeSubnet implements PickyCallable<Boolean> {
1249 private DeviceId targetSw;
1250 private Set<IpPrefix> subnets;
1251
1252 /**
1253 * Builds a RevokeSubnet task, which provides a result.
1254 *
1255 * @param subnets a set of prefixes
1256 * @param targetSw target switch
1257 */
1258 RevokeSubnet(DeviceId targetSw, Set<IpPrefix> subnets) {
1259 this.targetSw = targetSw;
1260 this.subnets = subnets;
1261 }
1262
1263 @Override
1264 public Boolean call() throws Exception {
1265 return srManager.routingRulePopulator.revokeIpRuleForSubnet(targetSw, subnets);
1266 }
1267
1268 @Override
1269 public int hint() {
1270 return targetSw.hashCode();
Saurav Das261c3002017-06-13 15:35:54 -07001271 }
1272 }
1273
1274 /**
Charles Chan910be6a2017-08-23 14:46:43 -07001275 * Populates IP rules for a route that has direct connection to the switch
1276 * if the current instance is the master of the switch.
1277 *
1278 * @param deviceId device ID of the device that next hop attaches to
1279 * @param prefix IP prefix of the route
1280 * @param hostMac MAC address of the next hop
1281 * @param hostVlanId Vlan ID of the nexthop
1282 * @param outPort port where the next hop attaches to
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001283 * @param directHost host is of type direct or indirect
Charles Chan910be6a2017-08-23 14:46:43 -07001284 */
1285 void populateRoute(DeviceId deviceId, IpPrefix prefix,
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001286 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort, boolean directHost) {
Charles Chand66d6712018-03-29 16:03:41 -07001287 if (shouldProgram(deviceId)) {
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001288 srManager.routingRulePopulator.populateRoute(deviceId, prefix, hostMac, hostVlanId, outPort, directHost);
Charles Chan910be6a2017-08-23 14:46:43 -07001289 }
1290 }
1291
1292 /**
1293 * Removes IP rules for a route when the next hop is gone.
1294 * if the current instance is the master of the switch.
1295 *
1296 * @param deviceId device ID of the device that next hop attaches to
1297 * @param prefix IP prefix of the route
1298 * @param hostMac MAC address of the next hop
1299 * @param hostVlanId Vlan ID of the nexthop
1300 * @param outPort port that next hop attaches to
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001301 * @param directHost host is of type direct or indirect
Charles Chan910be6a2017-08-23 14:46:43 -07001302 */
1303 void revokeRoute(DeviceId deviceId, IpPrefix prefix,
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001304 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort, boolean directHost) {
Charles Chand66d6712018-03-29 16:03:41 -07001305 if (shouldProgram(deviceId)) {
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001306 srManager.routingRulePopulator.revokeRoute(deviceId, prefix, hostMac, hostVlanId, outPort, directHost);
Charles Chan910be6a2017-08-23 14:46:43 -07001307 }
1308 }
1309
Charles Chand66d6712018-03-29 16:03:41 -07001310 void populateBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
1311 if (shouldProgram(deviceId)) {
1312 srManager.routingRulePopulator.populateBridging(deviceId, port, mac, vlanId);
1313 }
1314 }
1315
1316 void revokeBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
1317 if (shouldProgram(deviceId)) {
1318 srManager.routingRulePopulator.revokeBridging(deviceId, port, mac, vlanId);
1319 }
1320 }
1321
1322 void updateBridging(DeviceId deviceId, PortNumber portNum, MacAddress hostMac,
1323 VlanId vlanId, boolean popVlan, boolean install) {
1324 if (shouldProgram(deviceId)) {
1325 srManager.routingRulePopulator.updateBridging(deviceId, portNum, hostMac, vlanId, popVlan, install);
1326 }
1327 }
1328
1329 void updateFwdObj(DeviceId deviceId, PortNumber portNumber, IpPrefix prefix, MacAddress hostMac,
1330 VlanId vlanId, boolean popVlan, boolean install) {
1331 if (shouldProgram(deviceId)) {
1332 srManager.routingRulePopulator.updateFwdObj(deviceId, portNumber, prefix, hostMac,
1333 vlanId, popVlan, install);
1334 }
1335 }
1336
Charles Chan910be6a2017-08-23 14:46:43 -07001337 /**
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001338 * Populates IP rules for a route when the next hop is double-tagged.
1339 *
1340 * @param deviceId device ID that next hop attaches to
1341 * @param prefix IP prefix of the route
1342 * @param hostMac MAC address of the next hop
1343 * @param innerVlan Inner Vlan ID of the next hop
1344 * @param outerVlan Outer Vlan ID of the next hop
1345 * @param outerTpid Outer TPID of the next hop
1346 * @param outPort port that the next hop attaches to
1347 */
1348 void populateDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
1349 VlanId outerVlan, EthType outerTpid, PortNumber outPort) {
1350 if (srManager.mastershipService.isLocalMaster(deviceId)) {
Charles Chan61c086d2019-07-26 17:46:15 -07001351 srManager.routingRulePopulator.populateDoubleTaggedRoute(
1352 deviceId, prefix, hostMac, innerVlan, outerVlan, outerTpid, outPort);
1353 srManager.routingRulePopulator.processDoubleTaggedFilter(
1354 deviceId, outPort, outerVlan, innerVlan, true);
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001355 }
1356 }
1357
1358 /**
1359 * Revokes IP rules for a route when the next hop is double-tagged.
1360 *
1361 * @param deviceId device ID that next hop attaches to
1362 * @param prefix IP prefix of the route
1363 * @param hostMac MAC address of the next hop
1364 * @param innerVlan Inner Vlan ID of the next hop
1365 * @param outerVlan Outer Vlan ID of the next hop
1366 * @param outerTpid Outer TPID of the next hop
1367 * @param outPort port that the next hop attaches to
1368 */
1369 void revokeDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
1370 VlanId outerVlan, EthType outerTpid, PortNumber outPort) {
1371 // Revoke route either if this node have the mastership (when device is available) or
1372 // if this node is the leader (even when device is unavailable)
1373 if (!srManager.mastershipService.isLocalMaster(deviceId)) {
1374 if (srManager.deviceService.isAvailable(deviceId)) {
1375 // Master node will revoke specified rule.
1376 log.debug("This node is not a master for {}, stop revoking route.", deviceId);
1377 return;
1378 }
1379
1380 // isLocalMaster will return false when the device is unavailable.
1381 // Verify if this node is the leader in that case.
1382 NodeId leader = srManager.leadershipService.runForLeadership(
1383 deviceId.toString()).leaderNodeId();
1384 if (!srManager.clusterService.getLocalNode().id().equals(leader)) {
1385 // Leader node will revoke specified rule.
1386 log.debug("This node is not a master for {}, stop revoking route.", deviceId);
1387 return;
1388 }
1389 }
1390
Charles Chan61c086d2019-07-26 17:46:15 -07001391 srManager.routingRulePopulator.revokeDoubleTaggedRoute(deviceId, prefix, hostMac,
1392 innerVlan, outerVlan, outerTpid, outPort);
1393 srManager.routingRulePopulator.processDoubleTaggedFilter(deviceId, outPort, outerVlan, innerVlan, false);
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001394 }
1395
1396
1397 /**
Saurav Das261c3002017-06-13 15:35:54 -07001398 * Remove ECMP graph entry for the given device. Typically called when
1399 * device is no longer available.
1400 *
1401 * @param deviceId the device for which graphs need to be purged
1402 */
Charles Chanfbcb8812018-04-18 18:41:05 -07001403 void purgeEcmpGraph(DeviceId deviceId) {
Saurav Das6430f412018-01-25 09:49:01 -08001404 statusLock.lock();
1405 try {
Saurav Das6430f412018-01-25 09:49:01 -08001406 if (populationStatus == Status.STARTED) {
1407 log.warn("Previous rule population is not finished. Cannot"
1408 + " proceeed with purgeEcmpGraph for {}", deviceId);
1409 return;
1410 }
1411 log.debug("Updating ECMPspg for unavailable dev:{}", deviceId);
1412 currentEcmpSpgMap.remove(deviceId);
1413 if (updatedEcmpSpgMap != null) {
1414 updatedEcmpSpgMap.remove(deviceId);
1415 }
1416 } finally {
1417 statusLock.unlock();
Saurav Das261c3002017-06-13 15:35:54 -07001418 }
1419 }
1420
Saurav Das00e553b2018-04-21 17:19:48 -07001421 /**
1422 * Attempts a full reroute of route-paths if topology has changed relatively
1423 * close to a mastership change event. Does not do a reroute if mastership
1424 * change is due to reasons other than a ONOS cluster event - for example a
1425 * call to balance-masters, or a switch up/down event.
1426 *
1427 * @param devId the device identifier for which mastership has changed
1428 * @param me the mastership event
1429 */
1430 void checkFullRerouteForMasterChange(DeviceId devId, MastershipEvent me) {
1431 // give small delay to absorb mastership events that are caused by
1432 // device that has disconnected from cluster
Saurav Das49368392018-04-23 18:42:12 -07001433 executorServiceMstChg.schedule(new MasterChange(devId, me),
1434 MASTER_CHANGE_DELAY, TimeUnit.MILLISECONDS);
Saurav Das00e553b2018-04-21 17:19:48 -07001435 }
1436
1437 protected final class MasterChange implements Runnable {
1438 private DeviceId devId;
1439 private MastershipEvent me;
1440 private static final long CLUSTER_EVENT_THRESHOLD = 4500; // ms
1441 private static final long DEVICE_EVENT_THRESHOLD = 2000; // ms
Saurav Dasec683dc2018-04-27 18:42:30 -07001442 private static final long EDGE_PORT_EVENT_THRESHOLD = 10000; //ms
Saurav Das68e1b6a2018-06-11 17:02:31 -07001443 private static final long FULL_REROUTE_THRESHOLD = 10000; // ms
Saurav Das00e553b2018-04-21 17:19:48 -07001444
1445 MasterChange(DeviceId devId, MastershipEvent me) {
1446 this.devId = devId;
1447 this.me = me;
1448 }
1449
1450 @Override
1451 public void run() {
1452 long lce = srManager.clusterListener.timeSinceLastClusterEvent();
1453 boolean clusterEvent = lce < CLUSTER_EVENT_THRESHOLD;
1454
1455 // ignore event for lost switch if cluster event hasn't happened -
1456 // device down event will handle it
1457 if ((me.roleInfo().master() == null
1458 || !srManager.deviceService.isAvailable(devId))
1459 && !clusterEvent) {
1460 log.debug("Full reroute not required for lost device: {}/{} "
1461 + "clusterEvent/timeSince: {}/{}",
1462 devId, me.roleInfo(), clusterEvent, lce);
1463 return;
1464 }
1465
1466 long update = srManager.deviceService.getLastUpdatedInstant(devId);
1467 long lde = Instant.now().toEpochMilli() - update;
1468 boolean deviceEvent = lde < DEVICE_EVENT_THRESHOLD;
1469
1470 // ignore event for recently connected switch if cluster event hasn't
1471 // happened - link up events will handle it
1472 if (srManager.deviceService.isAvailable(devId) && deviceEvent
1473 && !clusterEvent) {
1474 log.debug("Full reroute not required for recently available"
1475 + " device: {}/{} deviceEvent/timeSince: {}/{} "
1476 + "clusterEvent/timeSince: {}/{}",
1477 devId, me.roleInfo(), deviceEvent, lde, clusterEvent, lce);
1478 return;
1479 }
1480
Saurav Dasec683dc2018-04-27 18:42:30 -07001481 long lepe = Instant.now().toEpochMilli()
1482 - srManager.lastEdgePortEvent.toEpochMilli();
1483 boolean edgePortEvent = lepe < EDGE_PORT_EVENT_THRESHOLD;
1484
Saurav Das00e553b2018-04-21 17:19:48 -07001485 // if it gets here, then mastership change is likely due to onos
1486 // instance failure, or network partition in onos cluster
1487 // normally a mastership change like this does not require re-programming
1488 // but if topology changes happen at the same time then we may miss events
1489 if (!isRoutingStable() && clusterEvent) {
Saurav Dasec683dc2018-04-27 18:42:30 -07001490 log.warn("Mastership changed for dev: {}/{} while programming route-paths "
Saurav Das00e553b2018-04-21 17:19:48 -07001491 + "due to clusterEvent {} ms ago .. attempting full reroute",
1492 devId, me.roleInfo(), lce);
1493 if (srManager.mastershipService.isLocalMaster(devId)) {
1494 // old master could have died when populating filters
1495 populatePortAddressingRules(devId);
1496 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001497 // old master could have died when creating groups
Saurav Das00e553b2018-04-21 17:19:48 -07001498 // XXX right now we have no fine-grained way to only make changes
Saurav Das68e1b6a2018-06-11 17:02:31 -07001499 // for the route paths affected by this device. Thus we do a
1500 // full reroute after purging all hash groups. We also try to do
1501 // it only once, irrespective of the number of devices
1502 // that changed mastership when their master instance died.
1503 long lfrr = Instant.now().toEpochMilli() - lastFullReroute.toEpochMilli();
1504 boolean doFullReroute = lfrr > FULL_REROUTE_THRESHOLD;
1505 if (doFullReroute) {
1506 lastFullReroute = Instant.now();
1507 for (Device dev : srManager.deviceService.getDevices()) {
1508 if (shouldProgram(dev.id())) {
1509 srManager.purgeHashedNextObjectiveStore(dev.id());
1510 }
1511 }
1512 // give small delay to ensure entire store is purged
1513 executorServiceFRR.schedule(new FullRerouteAfterPurge(),
1514 PURGE_DELAY,
1515 TimeUnit.MILLISECONDS);
1516 } else {
1517 log.warn("Full reroute attempted {} ms ago .. skipping", lfrr);
1518 }
Saurav Dasec683dc2018-04-27 18:42:30 -07001519
1520 } else if (edgePortEvent && clusterEvent) {
1521 log.warn("Mastership changed for dev: {}/{} due to clusterEvent {} ms ago "
1522 + "while edge-port event happened {} ms ago "
1523 + " .. reprogramming all edge-ports",
1524 devId, me.roleInfo(), lce, lepe);
1525 if (shouldProgram(devId)) {
1526 srManager.deviceService.getPorts(devId).stream()
1527 .filter(p -> srManager.interfaceService
1528 .isConfigured(new ConnectPoint(devId, p.number())))
1529 .forEach(p -> srManager.processPortUpdated(devId, p));
1530 }
1531
Saurav Das00e553b2018-04-21 17:19:48 -07001532 } else {
1533 log.debug("Stable route-paths .. full reroute not attempted for "
1534 + "mastership change {}/{} deviceEvent/timeSince: {}/{} "
1535 + "clusterEvent/timeSince: {}/{}", devId, me.roleInfo(),
1536 deviceEvent, lde, clusterEvent, lce);
1537 }
1538 }
1539 }
1540
Saurav Das68e1b6a2018-06-11 17:02:31 -07001541 /**
1542 * Performs a full reroute of routing rules in all the switches. Assumes
1543 * caller has purged hash groups from the nextObjective store, otherwise
1544 * re-uses ones available in the store.
1545 */
1546 protected final class FullRerouteAfterPurge implements Runnable {
1547 @Override
1548 public void run() {
1549 populateAllRoutingRules();
1550 }
1551 }
1552
1553
Saurav Das261c3002017-06-13 15:35:54 -07001554 //////////////////////////////////////
1555 // Routing helper methods and classes
1556 //////////////////////////////////////
1557
1558 /**
Saurav Das68e1b6a2018-06-11 17:02:31 -07001559 * Computes set of affected routes due to failed link. Assumes previous ecmp
1560 * shortest-path graph exists for a switch in order to compute affected
1561 * routes. If such a graph does not exist, the method returns null.
Saurav Dasb149be12016-06-07 10:08:06 -07001562 *
1563 * @param linkFail the failed link
1564 * @return the set of affected routes which may be empty if no routes were
Saurav Das68e1b6a2018-06-11 17:02:31 -07001565 * affected
Saurav Dasb149be12016-06-07 10:08:06 -07001566 */
sanghofb7c7292015-04-13 15:15:58 -07001567 private Set<ArrayList<DeviceId>> computeDamagedRoutes(Link linkFail) {
sanghofb7c7292015-04-13 15:15:58 -07001568 Set<ArrayList<DeviceId>> routes = new HashSet<>();
1569
1570 for (Device sw : srManager.deviceService.getDevices()) {
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001571 log.debug("Computing the impacted routes for device {} due to link fail",
1572 sw.id());
Charles Chand66d6712018-03-29 16:03:41 -07001573 if (!shouldProgram(sw.id())) {
Saurav Das00e553b2018-04-21 17:19:48 -07001574 lastProgrammed.remove(sw.id());
sanghofb7c7292015-04-13 15:15:58 -07001575 continue;
1576 }
Charles Chand66d6712018-03-29 16:03:41 -07001577 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
Saurav Das00e553b2018-04-21 17:19:48 -07001578 // check for mastership change since last run
1579 if (!lastProgrammed.contains(sw.id())) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001580 log.warn("New responsibility for this node to program dev:{}"
Saurav Das00e553b2018-04-21 17:19:48 -07001581 + " ... nuking current ECMPspg", sw.id());
1582 currentEcmpSpgMap.remove(sw.id());
1583 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001584 lastProgrammed.add(sw.id());
1585
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001586 EcmpShortestPathGraph ecmpSpg = currentEcmpSpgMap.get(rootSw);
1587 if (ecmpSpg == null) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001588 log.warn("No existing ECMP graph for switch {}. Assuming "
1589 + "all route-paths have changed towards it.", rootSw);
1590 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
1591 if (targetSw.equals(rootSw)) {
1592 continue;
1593 }
1594 routes.add(Lists.newArrayList(targetSw, rootSw));
1595 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1596 }
1597 continue;
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001598 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001599
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001600 if (log.isDebugEnabled()) {
1601 log.debug("Root switch: {}", rootSw);
1602 log.debug(" Current/Existing SPG: {}", ecmpSpg);
1603 log.debug(" New/Updated SPG: {}", updatedEcmpSpgMap.get(rootSw));
1604 }
1605 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>>
1606 switchVia = ecmpSpg.getAllLearnedSwitchesAndVia();
1607 // figure out if the broken link affected any route-paths in this graph
1608 for (Integer itrIdx : switchVia.keySet()) {
1609 log.trace("Current/Exiting SPG Iterindex# {}", itrIdx);
1610 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1611 switchVia.get(itrIdx);
1612 for (DeviceId targetSw : swViaMap.keySet()) {
1613 log.trace("TargetSwitch {} --> RootSwitch {}",
1614 targetSw, rootSw);
Saurav Dasb149be12016-06-07 10:08:06 -07001615 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1616 log.trace(" Via:");
Pier Ventreadb4ae62016-11-23 09:57:42 -08001617 via.forEach(e -> log.trace(" {}", e));
Saurav Dasb149be12016-06-07 10:08:06 -07001618 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001619 Set<ArrayList<DeviceId>> subLinks =
1620 computeLinks(targetSw, rootSw, swViaMap);
1621 for (ArrayList<DeviceId> alink: subLinks) {
1622 if ((alink.get(0).equals(linkFail.src().deviceId()) &&
1623 alink.get(1).equals(linkFail.dst().deviceId()))
1624 ||
1625 (alink.get(0).equals(linkFail.dst().deviceId()) &&
1626 alink.get(1).equals(linkFail.src().deviceId()))) {
1627 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1628 ArrayList<DeviceId> aRoute = new ArrayList<>();
1629 aRoute.add(targetSw); // switch with rules to populate
1630 aRoute.add(rootSw); // towards this destination
1631 routes.add(aRoute);
1632 break;
1633 }
sanghofb7c7292015-04-13 15:15:58 -07001634 }
1635 }
1636 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001637
sanghofb7c7292015-04-13 15:15:58 -07001638 }
sangho28d0b6d2015-05-07 13:30:57 -07001639
sanghofb7c7292015-04-13 15:15:58 -07001640 }
sanghofb7c7292015-04-13 15:15:58 -07001641 return routes;
1642 }
1643
Saurav Das1b391d52016-11-29 14:27:25 -08001644 /**
1645 * Computes set of affected routes due to new links or failed switches.
1646 *
Saurav Dasdc7f2752018-03-18 21:28:15 -07001647 * @param failedSwitch deviceId of failed switch if any
Saurav Das1b391d52016-11-29 14:27:25 -08001648 * @return the set of affected routes which may be empty if no routes were
1649 * affected
1650 */
Saurav Dascea556f2018-03-05 14:37:16 -08001651 private Set<ArrayList<DeviceId>> computeRouteChange(DeviceId failedSwitch) {
Saurav Das261c3002017-06-13 15:35:54 -07001652 ImmutableSet.Builder<ArrayList<DeviceId>> changedRtBldr =
Saurav Das1b391d52016-11-29 14:27:25 -08001653 ImmutableSet.builder();
sanghofb7c7292015-04-13 15:15:58 -07001654
1655 for (Device sw : srManager.deviceService.getDevices()) {
Saurav Das261c3002017-06-13 15:35:54 -07001656 log.debug("Computing the impacted routes for device {}", sw.id());
Charles Chand66d6712018-03-29 16:03:41 -07001657 if (!shouldProgram(sw.id())) {
Saurav Das00e553b2018-04-21 17:19:48 -07001658 lastProgrammed.remove(sw.id());
sanghofb7c7292015-04-13 15:15:58 -07001659 continue;
1660 }
Charles Chand66d6712018-03-29 16:03:41 -07001661 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
Saurav Das261c3002017-06-13 15:35:54 -07001662 if (log.isTraceEnabled()) {
1663 log.trace("Device links for dev: {}", rootSw);
1664 for (Link link: srManager.linkService.getDeviceLinks(rootSw)) {
1665 log.trace("{} -> {} ", link.src().deviceId(),
1666 link.dst().deviceId());
1667 }
Saurav Dasb149be12016-06-07 10:08:06 -07001668 }
Saurav Das00e553b2018-04-21 17:19:48 -07001669 // check for mastership change since last run
1670 if (!lastProgrammed.contains(sw.id())) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001671 log.warn("New responsibility for this node to program dev:{}"
Saurav Das00e553b2018-04-21 17:19:48 -07001672 + " ... nuking current ECMPspg", sw.id());
1673 currentEcmpSpgMap.remove(sw.id());
1674 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001675 lastProgrammed.add(sw.id());
Saurav Das261c3002017-06-13 15:35:54 -07001676 EcmpShortestPathGraph currEcmpSpg = currentEcmpSpgMap.get(rootSw);
1677 if (currEcmpSpg == null) {
1678 log.debug("No existing ECMP graph for device {}.. adding self as "
1679 + "changed route", rootSw);
1680 changedRtBldr.add(Lists.newArrayList(rootSw));
1681 continue;
1682 }
1683 EcmpShortestPathGraph newEcmpSpg = updatedEcmpSpgMap.get(rootSw);
Saurav Dasdebcf882018-04-06 20:16:01 -07001684 if (newEcmpSpg == null) {
1685 log.warn("Cannot find updated ECMP graph for dev:{}", rootSw);
1686 continue;
1687 }
Saurav Das261c3002017-06-13 15:35:54 -07001688 if (log.isDebugEnabled()) {
1689 log.debug("Root switch: {}", rootSw);
1690 log.debug(" Current/Existing SPG: {}", currEcmpSpg);
1691 log.debug(" New/Updated SPG: {}", newEcmpSpg);
1692 }
1693 // first use the updated/new map to compare to current/existing map
1694 // as new links may have come up
1695 changedRtBldr.addAll(compareGraphs(newEcmpSpg, currEcmpSpg, rootSw));
1696 // then use the current/existing map to compare to updated/new map
1697 // as switch may have been removed
1698 changedRtBldr.addAll(compareGraphs(currEcmpSpg, newEcmpSpg, rootSw));
sangho28d0b6d2015-05-07 13:30:57 -07001699 }
Saurav Das1b391d52016-11-29 14:27:25 -08001700 }
sanghofb7c7292015-04-13 15:15:58 -07001701
Saurav Dascea556f2018-03-05 14:37:16 -08001702 // handle clearing state for a failed switch in case the switch does
1703 // not have a pair, or the pair is not available
1704 if (failedSwitch != null) {
Charles Chan6dbcd252018-04-02 11:46:38 -07001705 Optional<DeviceId> pairDev = srManager.getPairDeviceId(failedSwitch);
1706 if (!pairDev.isPresent() || !srManager.deviceService.isAvailable(pairDev.get())) {
Saurav Dascea556f2018-03-05 14:37:16 -08001707 log.debug("Proxy Route changes to downed Sw:{}", failedSwitch);
1708 srManager.deviceService.getDevices().forEach(dev -> {
1709 if (!dev.id().equals(failedSwitch) &&
1710 srManager.mastershipService.isLocalMaster(dev.id())) {
1711 log.debug(" : {}", dev.id());
1712 changedRtBldr.add(Lists.newArrayList(dev.id(), failedSwitch));
1713 }
1714 });
1715 }
1716 }
1717
Saurav Das261c3002017-06-13 15:35:54 -07001718 Set<ArrayList<DeviceId>> changedRoutes = changedRtBldr.build();
Saurav Das1b391d52016-11-29 14:27:25 -08001719 for (ArrayList<DeviceId> route: changedRoutes) {
1720 log.debug("Route changes Target -> Root");
1721 if (route.size() == 1) {
1722 log.debug(" : all -> {}", route.get(0));
1723 } else {
1724 log.debug(" : {} -> {}", route.get(0), route.get(1));
1725 }
1726 }
1727 return changedRoutes;
1728 }
1729
pier572d4a92019-04-25 18:51:51 +02001730 // Utility method to expands the route changes in two elements array using
1731 // the ECMP graph. Caller represents all to dst switch routes with an
1732 // array containing only the dst switch.
1733 private Set<ArrayList<DeviceId>> getExpandedRoutes(Set<ArrayList<DeviceId>> routeChanges) {
1734 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
1735 // Ensure each routeChanges entry has two elements
1736 for (ArrayList<DeviceId> route : routeChanges) {
1737 if (route.size() == 1) {
1738 DeviceId dstSw = route.get(0);
1739 EcmpShortestPathGraph ec = updatedEcmpSpgMap.get(dstSw);
1740 if (ec == null) {
1741 log.warn("No graph found for {} .. aborting redoRouting", dstSw);
1742 return Collections.emptySet();
1743 }
1744 ec.getAllLearnedSwitchesAndVia().keySet().forEach(key -> {
1745 ec.getAllLearnedSwitchesAndVia().get(key).keySet().forEach(target -> {
1746 changedRoutes.add(Lists.newArrayList(target, dstSw));
1747 });
1748 });
1749 } else {
1750 DeviceId targetSw = route.get(0);
1751 DeviceId dstSw = route.get(1);
1752 changedRoutes.add(Lists.newArrayList(targetSw, dstSw));
1753 }
1754 }
1755 return changedRoutes;
1756 }
1757
1758 // Utility method to expands the route changes in two elements array using
1759 // the available devices. Caller represents all to dst switch routes with an
1760 // array containing only the dst switch.
1761 private Set<ArrayList<DeviceId>> getAllExpandedRoutes(Set<ArrayList<DeviceId>> routeChanges) {
1762 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
1763 // Ensure each routeChanges entry has two elements
1764 for (ArrayList<DeviceId> route : routeChanges) {
1765 if (route.size() == 1) {
1766 // route-path changes are from everyone else to this switch
1767 DeviceId dstSw = route.get(0);
1768 srManager.deviceService.getAvailableDevices().forEach(sw -> {
1769 if (!sw.id().equals(dstSw)) {
1770 changedRoutes.add(Lists.newArrayList(sw.id(), dstSw));
1771 }
1772 });
1773 } else {
1774 changedRoutes.add(route);
1775 }
1776 }
1777 return changedRoutes;
1778 }
1779
Saurav Das1b391d52016-11-29 14:27:25 -08001780 /**
1781 * For the root switch, searches all the target nodes reachable in the base
1782 * graph, and compares paths to the ones in the comp graph.
1783 *
1784 * @param base the graph that is indexed for all reachable target nodes
1785 * from the root node
1786 * @param comp the graph that the base graph is compared to
1787 * @param rootSw both ecmp graphs are calculated for the root node
1788 * @return all the routes that have changed in the base graph
1789 */
1790 private Set<ArrayList<DeviceId>> compareGraphs(EcmpShortestPathGraph base,
1791 EcmpShortestPathGraph comp,
1792 DeviceId rootSw) {
1793 ImmutableSet.Builder<ArrayList<DeviceId>> changedRoutesBuilder =
1794 ImmutableSet.builder();
1795 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> baseMap =
1796 base.getAllLearnedSwitchesAndVia();
1797 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> compMap =
1798 comp.getAllLearnedSwitchesAndVia();
1799 for (Integer itrIdx : baseMap.keySet()) {
1800 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> baseViaMap =
1801 baseMap.get(itrIdx);
1802 for (DeviceId targetSw : baseViaMap.keySet()) {
1803 ArrayList<ArrayList<DeviceId>> basePath = baseViaMap.get(targetSw);
1804 ArrayList<ArrayList<DeviceId>> compPath = getVia(compMap, targetSw);
1805 if ((compPath == null) || !basePath.equals(compPath)) {
Saurav Das62ae6792017-05-15 15:34:25 -07001806 log.trace("Impacted route:{} -> {}", targetSw, rootSw);
Saurav Das1b391d52016-11-29 14:27:25 -08001807 ArrayList<DeviceId> route = new ArrayList<>();
Saurav Das261c3002017-06-13 15:35:54 -07001808 route.add(targetSw); // switch with rules to populate
1809 route.add(rootSw); // towards this destination
Saurav Das1b391d52016-11-29 14:27:25 -08001810 changedRoutesBuilder.add(route);
sanghofb7c7292015-04-13 15:15:58 -07001811 }
1812 }
sangho28d0b6d2015-05-07 13:30:57 -07001813 }
Saurav Das1b391d52016-11-29 14:27:25 -08001814 return changedRoutesBuilder.build();
sanghofb7c7292015-04-13 15:15:58 -07001815 }
1816
Saurav Das261c3002017-06-13 15:35:54 -07001817 /**
1818 * Returns the ECMP paths traversed to reach the target switch.
1819 *
1820 * @param switchVia a per-iteration view of the ECMP graph for a root switch
1821 * @param targetSw the switch to reach from the root switch
1822 * @return the nodes traversed on ECMP paths to the target switch
1823 */
sanghofb7c7292015-04-13 15:15:58 -07001824 private ArrayList<ArrayList<DeviceId>> getVia(HashMap<Integer, HashMap<DeviceId,
Saurav Das1b391d52016-11-29 14:27:25 -08001825 ArrayList<ArrayList<DeviceId>>>> switchVia, DeviceId targetSw) {
sanghofb7c7292015-04-13 15:15:58 -07001826 for (Integer itrIdx : switchVia.keySet()) {
1827 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1828 switchVia.get(itrIdx);
Saurav Das1b391d52016-11-29 14:27:25 -08001829 if (swViaMap.get(targetSw) == null) {
sanghofb7c7292015-04-13 15:15:58 -07001830 continue;
1831 } else {
Saurav Das1b391d52016-11-29 14:27:25 -08001832 return swViaMap.get(targetSw);
sanghofb7c7292015-04-13 15:15:58 -07001833 }
1834 }
1835
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001836 return null;
sanghofb7c7292015-04-13 15:15:58 -07001837 }
1838
Saurav Das261c3002017-06-13 15:35:54 -07001839 /**
1840 * Utility method to break down a path from src to dst device into a collection
1841 * of links.
1842 *
1843 * @param src src device of the path
1844 * @param dst dst device of the path
1845 * @param viaMap path taken from src to dst device
1846 * @return collection of links in the path
1847 */
sanghofb7c7292015-04-13 15:15:58 -07001848 private Set<ArrayList<DeviceId>> computeLinks(DeviceId src,
1849 DeviceId dst,
1850 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> viaMap) {
1851 Set<ArrayList<DeviceId>> subLinks = Sets.newHashSet();
1852 for (ArrayList<DeviceId> via : viaMap.get(src)) {
1853 DeviceId linkSrc = src;
1854 DeviceId linkDst = dst;
1855 for (DeviceId viaDevice: via) {
1856 ArrayList<DeviceId> link = new ArrayList<>();
1857 linkDst = viaDevice;
1858 link.add(linkSrc);
1859 link.add(linkDst);
1860 subLinks.add(link);
1861 linkSrc = viaDevice;
1862 }
1863 ArrayList<DeviceId> link = new ArrayList<>();
1864 link.add(linkSrc);
1865 link.add(dst);
1866 subLinks.add(link);
1867 }
1868
1869 return subLinks;
1870 }
1871
Charles Chanc22cef32016-04-29 14:38:22 -07001872 /**
Charles Chand66d6712018-03-29 16:03:41 -07001873 * Determines whether this controller instance should program the
Saurav Das261c3002017-06-13 15:35:54 -07001874 * given {@code deviceId}, based on mastership and pairDeviceId if one exists.
Charles Chand66d6712018-03-29 16:03:41 -07001875 * <p>
1876 * Once an instance is elected, it will be the only instance responsible for programming
1877 * both devices in the pair until it goes down.
Charles Chanc22cef32016-04-29 14:38:22 -07001878 *
Saurav Das261c3002017-06-13 15:35:54 -07001879 * @param deviceId device identifier to consider for routing
Charles Chand66d6712018-03-29 16:03:41 -07001880 * @return true if current instance should handle the routing for given device
Charles Chanc22cef32016-04-29 14:38:22 -07001881 */
Charles Chand66d6712018-03-29 16:03:41 -07001882 boolean shouldProgram(DeviceId deviceId) {
Charles Chanfbcb8812018-04-18 18:41:05 -07001883 Boolean cached = shouldProgramCache.get(deviceId);
1884 if (cached != null) {
Saurav Das00e553b2018-04-21 17:19:48 -07001885 log.debug("shouldProgram dev:{} cached:{}", deviceId, cached);
Charles Chanfbcb8812018-04-18 18:41:05 -07001886 return cached;
1887 }
1888
Charles Chand66d6712018-03-29 16:03:41 -07001889 Optional<DeviceId> pairDeviceId = srManager.getPairDeviceId(deviceId);
sangho80f11cb2015-04-01 13:05:26 -07001890
Charles Chand66d6712018-03-29 16:03:41 -07001891 NodeId currentNodeId = srManager.clusterService.getLocalNode().id();
1892 NodeId masterNodeId = srManager.mastershipService.getMasterFor(deviceId);
1893 Optional<NodeId> pairMasterNodeId = pairDeviceId.map(srManager.mastershipService::getMasterFor);
Saurav Das68e1b6a2018-06-11 17:02:31 -07001894 log.debug("Evaluate shouldProgram {}/pair={}. currentNodeId={}, master={}, pairMaster={}",
Charles Chand66d6712018-03-29 16:03:41 -07001895 deviceId, pairDeviceId, currentNodeId, masterNodeId, pairMasterNodeId);
1896
1897 // No pair device configured. Only handle when current instance is the master of the device
1898 if (!pairDeviceId.isPresent()) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001899 log.debug("No pair device. currentNodeId={}, master={}", currentNodeId, masterNodeId);
Charles Chand66d6712018-03-29 16:03:41 -07001900 return currentNodeId.equals(masterNodeId);
sangho80f11cb2015-04-01 13:05:26 -07001901 }
Charles Chand66d6712018-03-29 16:03:41 -07001902
1903 // Should not handle if current instance is not the master of either switch
1904 if (!currentNodeId.equals(masterNodeId) &&
1905 !(pairMasterNodeId.isPresent() && currentNodeId.equals(pairMasterNodeId.get()))) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001906 log.debug("Current nodeId {} is neither the master of target device {} nor pair device {}",
Charles Chand66d6712018-03-29 16:03:41 -07001907 currentNodeId, deviceId, pairDeviceId);
1908 return false;
1909 }
1910
1911 Set<DeviceId> key = Sets.newHashSet(deviceId, pairDeviceId.get());
1912
1913 NodeId king = shouldProgram.compute(key, ((k, v) -> {
1914 if (v == null) {
1915 // There is no value in the map. Elect a node
1916 return elect(Lists.newArrayList(masterNodeId, pairMasterNodeId.orElse(null)));
1917 } else {
1918 if (v.equals(masterNodeId) || v.equals(pairMasterNodeId.orElse(null))) {
1919 // Use the node in the map if it is still alive and is a master of any of the two switches
1920 return v;
1921 } else {
1922 // Previously elected node is no longer the master of either switch. Re-elect a node.
1923 return elect(Lists.newArrayList(masterNodeId, pairMasterNodeId.orElse(null)));
1924 }
1925 }
1926 }));
1927
1928 if (king != null) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001929 log.debug("{} is king, should handle routing for {}/pair={}", king, deviceId, pairDeviceId);
Charles Chanfbcb8812018-04-18 18:41:05 -07001930 shouldProgramCache.put(deviceId, king.equals(currentNodeId));
Charles Chand66d6712018-03-29 16:03:41 -07001931 return king.equals(currentNodeId);
1932 } else {
1933 log.error("Fail to elect a king for {}/pair={}. Abort.", deviceId, pairDeviceId);
Charles Chanfbcb8812018-04-18 18:41:05 -07001934 shouldProgramCache.remove(deviceId);
Charles Chand66d6712018-03-29 16:03:41 -07001935 return false;
1936 }
1937 }
1938
1939 /**
1940 * Elects a node who should take responsibility of programming devices.
1941 * @param nodeIds list of candidate node ID
1942 *
1943 * @return NodeId of the node that gets elected, or null if none of the node can be elected
1944 */
1945 private NodeId elect(List<NodeId> nodeIds) {
1946 // Remove all null elements. This could happen when some device has no master
1947 nodeIds.removeAll(Collections.singleton(null));
1948 nodeIds.sort(null);
1949 return nodeIds.size() == 0 ? null : nodeIds.get(0);
1950 }
1951
Charles Chanfbcb8812018-04-18 18:41:05 -07001952 void invalidateShouldProgramCache(DeviceId deviceId) {
1953 shouldProgramCache.remove(deviceId);
1954 }
1955
Charles Chand66d6712018-03-29 16:03:41 -07001956 /**
1957 * Returns a set of device ID, containing given device and its pair device if exist.
1958 *
1959 * @param deviceId Device ID
1960 * @return a set of device ID, containing given device and its pair device if exist.
1961 */
1962 private Set<DeviceId> deviceAndItsPair(DeviceId deviceId) {
1963 Set<DeviceId> ret = Sets.newHashSet(deviceId);
1964 srManager.getPairDeviceId(deviceId).ifPresent(ret::add);
1965 return ret;
sangho80f11cb2015-04-01 13:05:26 -07001966 }
1967
Charles Chanc22cef32016-04-29 14:38:22 -07001968 /**
Saurav Das261c3002017-06-13 15:35:54 -07001969 * Returns the set of deviceIds which are the next hops from the targetSw
1970 * to the dstSw according to the latest ECMP spg.
1971 *
1972 * @param targetSw the switch for which the next-hops are desired
1973 * @param dstSw the switch to which the next-hops lead to from the targetSw
1974 * @return set of next hop deviceIds, could be empty if no next hops are found
1975 */
1976 private Set<DeviceId> getNextHops(DeviceId targetSw, DeviceId dstSw) {
1977 boolean targetIsEdge = false;
1978 try {
1979 targetIsEdge = srManager.deviceConfiguration.isEdgeDevice(targetSw);
1980 } catch (DeviceConfigNotFoundException e) {
1981 log.warn(e.getMessage() + "Cannot determine if targetIsEdge {}.. "
1982 + "continuing to getNextHops", targetSw);
1983 }
1984
1985 EcmpShortestPathGraph ecmpSpg = updatedEcmpSpgMap.get(dstSw);
1986 if (ecmpSpg == null) {
1987 log.debug("No ecmpSpg found for dstSw: {}", dstSw);
1988 return ImmutableSet.of();
1989 }
1990 HashMap<Integer,
1991 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> switchVia =
1992 ecmpSpg.getAllLearnedSwitchesAndVia();
1993 for (Integer itrIdx : switchVia.keySet()) {
1994 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1995 switchVia.get(itrIdx);
1996 for (DeviceId target : swViaMap.keySet()) {
1997 if (!target.equals(targetSw)) {
1998 continue;
1999 }
Saurav Das49368392018-04-23 18:42:12 -07002000 // optimization for spines to not use leaves to get
2001 // to a spine or other leaves. Also leaves should not use other
2002 // leaves to get to the destination
2003 if ((!targetIsEdge && itrIdx > 1) || targetIsEdge) {
Saurav Das97241862018-02-14 14:14:54 -08002004 boolean pathdevIsEdge = false;
2005 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
Saurav Das49368392018-04-23 18:42:12 -07002006 log.debug("Evaluating next-hop in path: {}", via);
Saurav Das97241862018-02-14 14:14:54 -08002007 for (DeviceId pathdev : via) {
2008 try {
2009 pathdevIsEdge = srManager.deviceConfiguration
2010 .isEdgeDevice(pathdev);
2011 } catch (DeviceConfigNotFoundException e) {
2012 log.warn(e.getMessage());
2013 }
2014 if (pathdevIsEdge) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07002015 log.debug("Avoiding {} hop path for targetSw:{}"
Saurav Das97241862018-02-14 14:14:54 -08002016 + " --> dstSw:{} which goes through an edge"
2017 + " device {} in path {}", itrIdx,
2018 targetSw, dstSw, pathdev, via);
2019 return ImmutableSet.of();
2020 }
2021 }
2022 }
Saurav Das261c3002017-06-13 15:35:54 -07002023 }
2024 Set<DeviceId> nextHops = new HashSet<>();
2025 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
2026 if (via.isEmpty()) {
2027 // the dstSw is the next-hop from the targetSw
2028 nextHops.add(dstSw);
2029 } else {
2030 // first elem is next-hop in each ECMP path
2031 nextHops.add(via.get(0));
2032 }
2033 }
Saurav Das49368392018-04-23 18:42:12 -07002034 log.debug("target {} --> dst: {} has next-hops:{}", targetSw,
2035 dstSw, nextHops);
Saurav Das261c3002017-06-13 15:35:54 -07002036 return nextHops;
2037 }
2038 }
Saurav Das49368392018-04-23 18:42:12 -07002039 log.debug("No next hops found for target:{} --> dst: {}", targetSw, dstSw);
Saurav Das261c3002017-06-13 15:35:54 -07002040 return ImmutableSet.of(); //no next-hops found
2041 }
2042
Saurav Das261c3002017-06-13 15:35:54 -07002043 //////////////////////////////////////
2044 // Filtering rule creation
2045 //////////////////////////////////////
2046
2047 /**
Saurav Dasf9332192017-02-18 14:05:44 -08002048 * Populates filtering rules for port, and punting rules
2049 * for gateway IPs, loopback IPs and arp/ndp traffic.
2050 * Should only be called by the master instance for this device/port.
sangho80f11cb2015-04-01 13:05:26 -07002051 *
2052 * @param deviceId Switch ID to set the rules
2053 */
Charles Chanfbcb8812018-04-18 18:41:05 -07002054 void populatePortAddressingRules(DeviceId deviceId) {
Saurav Das07c74602016-04-27 18:35:50 -07002055 // Although device is added, sometimes device store does not have the
2056 // ports for this device yet. It results in missing filtering rules in the
2057 // switch. We will attempt it a few times. If it still does not work,
2058 // user can manually repopulate using CLI command sr-reroute-network
Charles Chan18fa4252017-02-08 16:10:40 -08002059 PortFilterInfo firstRun = rulePopulator.populateVlanMacFilters(deviceId);
Saurav Dasd1872b02016-12-02 15:43:47 -08002060 if (firstRun == null) {
2061 firstRun = new PortFilterInfo(0, 0, 0);
Saurav Das07c74602016-04-27 18:35:50 -07002062 }
Saurav Dasd1872b02016-12-02 15:43:47 -08002063 executorService.schedule(new RetryFilters(deviceId, firstRun),
2064 RETRY_INTERVAL_MS, TimeUnit.MILLISECONDS);
sangho80f11cb2015-04-01 13:05:26 -07002065 }
2066
2067 /**
Saurav Dasd1872b02016-12-02 15:43:47 -08002068 * RetryFilters populates filtering objectives for a device and keeps retrying
2069 * till the number of ports filtered are constant for a predefined number
2070 * of attempts.
2071 */
2072 protected final class RetryFilters implements Runnable {
2073 int constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS;
2074 DeviceId devId;
2075 int counter;
2076 PortFilterInfo prevRun;
2077
2078 private RetryFilters(DeviceId deviceId, PortFilterInfo previousRun) {
Saurav Das07c74602016-04-27 18:35:50 -07002079 devId = deviceId;
Saurav Dasd1872b02016-12-02 15:43:47 -08002080 prevRun = previousRun;
2081 counter = 0;
Saurav Das07c74602016-04-27 18:35:50 -07002082 }
2083
2084 @Override
2085 public void run() {
Charles Chan077314e2017-06-22 14:27:17 -07002086 log.debug("RETRY FILTER ATTEMPT {} ** dev:{}", ++counter, devId);
Charles Chan18fa4252017-02-08 16:10:40 -08002087 PortFilterInfo thisRun = rulePopulator.populateVlanMacFilters(devId);
Saurav Dasd1872b02016-12-02 15:43:47 -08002088 boolean sameResult = prevRun.equals(thisRun);
2089 log.debug("dev:{} prevRun:{} thisRun:{} sameResult:{}", devId, prevRun,
2090 thisRun, sameResult);
Ray Milkey614352e2018-02-26 09:36:31 -08002091 if (thisRun == null || !sameResult || (--constantAttempts > 0)) {
Saurav Dasf9332192017-02-18 14:05:44 -08002092 // exponentially increasing intervals for retries
2093 executorService.schedule(this,
2094 RETRY_INTERVAL_MS * (int) Math.pow(counter, RETRY_INTERVAL_SCALE),
2095 TimeUnit.MILLISECONDS);
Saurav Dasd1872b02016-12-02 15:43:47 -08002096 if (!sameResult) {
2097 constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS; //reset
2098 }
Saurav Das07c74602016-04-27 18:35:50 -07002099 }
Saurav Dasd1872b02016-12-02 15:43:47 -08002100 prevRun = (thisRun == null) ? prevRun : thisRun;
Saurav Das07c74602016-04-27 18:35:50 -07002101 }
Saurav Das07c74602016-04-27 18:35:50 -07002102 }
piera9941192019-04-24 16:12:47 +02002103
2104 // Check jobs completion. It returns false if one of the job fails
2105 // and cancel the remaining
2106 private boolean checkJobs(List<Future<Boolean>> futures) {
2107 boolean completed = true;
2108 for (Future<Boolean> future : futures) {
2109 try {
2110 if (completed) {
2111 if (!future.get()) {
2112 completed = false;
2113 }
2114 } else {
2115 future.cancel(true);
2116 }
2117 } catch (InterruptedException | ExecutionException e) {
2118 completed = false;
2119 }
2120 }
2121 return completed;
2122 }
sangho80f11cb2015-04-01 13:05:26 -07002123}