blob: 81fad17d69a5c7a62fa292ffb941e9b3bee628ae [file] [log] [blame]
sanghob35a6192015-04-01 13:05:26 -07001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2015-present Open Networking Foundation
sanghob35a6192015-04-01 13:05:26 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.segmentrouting;
17
Saurav Dasc88d4662017-05-15 15:34:25 -070018import com.google.common.collect.ImmutableMap;
19import com.google.common.collect.ImmutableMap.Builder;
Charles Chan93e71ba2016-04-29 14:38:22 -070020import com.google.common.collect.ImmutableSet;
Saurav Das4e3224f2016-11-29 14:27:25 -080021import com.google.common.collect.Lists;
sangho20eff1d2015-04-13 15:15:58 -070022import com.google.common.collect.Maps;
23import com.google.common.collect.Sets;
Saurav Dasceccf242017-08-03 18:30:35 -070024
Jonghwan Hyuna76bf032018-04-09 09:40:50 -070025import org.onlab.packet.EthType;
Charles Chan022d6672019-04-17 14:20:26 -070026import com.google.common.collect.Streams;
sangho666cd6d2015-04-14 16:27:13 -070027import org.onlab.packet.Ip4Address;
Pier Ventree0ae7a32016-11-23 09:57:42 -080028import org.onlab.packet.Ip6Address;
sanghob35a6192015-04-01 13:05:26 -070029import org.onlab.packet.IpPrefix;
Charles Chan2fde6d42017-08-23 14:46:43 -070030import org.onlab.packet.MacAddress;
31import org.onlab.packet.VlanId;
pier8b4ba992019-04-24 16:12:47 +020032import org.onlab.util.PredictableExecutor;
33import org.onlab.util.PredictableExecutor.PickyCallable;
Saurav Das7bcbe702017-06-13 15:35:54 -070034import org.onosproject.cluster.NodeId;
Saurav Dasc6dc1772018-04-21 17:19:48 -070035import org.onosproject.mastership.MastershipEvent;
Charles Chan93e71ba2016-04-29 14:38:22 -070036import org.onosproject.net.ConnectPoint;
sanghob35a6192015-04-01 13:05:26 -070037import org.onosproject.net.Device;
38import org.onosproject.net.DeviceId;
sangho20eff1d2015-04-13 15:15:58 -070039import org.onosproject.net.Link;
Charles Chan2fde6d42017-08-23 14:46:43 -070040import org.onosproject.net.PortNumber;
Charles Chan0b4e6182015-11-03 10:42:14 -080041import org.onosproject.segmentrouting.config.DeviceConfigNotFoundException;
42import org.onosproject.segmentrouting.config.DeviceConfiguration;
Saurav Dasc88d4662017-05-15 15:34:25 -070043import org.onosproject.segmentrouting.grouphandler.DefaultGroupHandler;
Jonghwan Hyuna76bf032018-04-09 09:40:50 -070044import org.onosproject.segmentrouting.storekey.DummyVlanIdStoreKey;
Charles Chan2ff1bac2018-03-29 16:03:41 -070045import org.onosproject.store.serializers.KryoNamespaces;
46import org.onosproject.store.service.Serializer;
sanghob35a6192015-04-01 13:05:26 -070047import org.slf4j.Logger;
48import org.slf4j.LoggerFactory;
49
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -070050import java.time.Instant;
sanghob35a6192015-04-01 13:05:26 -070051import java.util.ArrayList;
Charles Chan2ff1bac2018-03-29 16:03:41 -070052import java.util.Collections;
sanghob35a6192015-04-01 13:05:26 -070053import java.util.HashMap;
54import java.util.HashSet;
Saurav Das7bcbe702017-06-13 15:35:54 -070055import java.util.Iterator;
Charles Chan2ff1bac2018-03-29 16:03:41 -070056import java.util.List;
Saurav Das7bcbe702017-06-13 15:35:54 -070057import java.util.Map;
Saurav Dasd2fded02016-12-02 15:43:47 -080058import java.util.Objects;
Charles Chanba6c5752018-04-02 11:46:38 -070059import java.util.Optional;
sanghob35a6192015-04-01 13:05:26 -070060import java.util.Set;
pier8b4ba992019-04-24 16:12:47 +020061import java.util.concurrent.CompletableFuture;
62import java.util.concurrent.ExecutionException;
63import java.util.concurrent.ExecutorService;
64import java.util.concurrent.Future;
Saurav Das59232cf2016-04-27 18:35:50 -070065import java.util.concurrent.ScheduledExecutorService;
66import java.util.concurrent.TimeUnit;
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +090067import java.util.concurrent.locks.Lock;
68import java.util.concurrent.locks.ReentrantLock;
Charles Chan022d6672019-04-17 14:20:26 -070069import java.util.stream.Collectors;
Saurav Das604ab3a2018-03-18 21:28:15 -070070import java.util.stream.Stream;
71
Pier Ventree0ae7a32016-11-23 09:57:42 -080072import static com.google.common.base.Preconditions.checkNotNull;
73import static java.util.concurrent.Executors.newScheduledThreadPool;
74import static org.onlab.util.Tools.groupedThreads;
sanghob35a6192015-04-01 13:05:26 -070075
Charles Chane849c192016-01-11 18:28:54 -080076/**
77 * Default routing handler that is responsible for route computing and
78 * routing rule population.
79 */
sanghob35a6192015-04-01 13:05:26 -070080public class DefaultRoutingHandler {
Saurav Das018605f2017-02-18 14:05:44 -080081 private static final int MAX_CONSTANT_RETRY_ATTEMPTS = 5;
Ray Milkey3717e602018-02-01 13:49:47 -080082 private static final long RETRY_INTERVAL_MS = 250L;
Saurav Das018605f2017-02-18 14:05:44 -080083 private static final int RETRY_INTERVAL_SCALE = 1;
Saurav Dasceccf242017-08-03 18:30:35 -070084 private static final long STABLITY_THRESHOLD = 10; //secs
Saurav Dasc6dc1772018-04-21 17:19:48 -070085 private static final long MASTER_CHANGE_DELAY = 1000; // ms
Saurav Dasf1027d42018-06-11 17:02:31 -070086 private static final long PURGE_DELAY = 1000; // ms
Charles Chan93e71ba2016-04-29 14:38:22 -070087 private static Logger log = LoggerFactory.getLogger(DefaultRoutingHandler.class);
sanghob35a6192015-04-01 13:05:26 -070088
89 private SegmentRoutingManager srManager;
90 private RoutingRulePopulator rulePopulator;
Shashikanth VH013a7bc2015-12-11 01:32:44 +053091 private HashMap<DeviceId, EcmpShortestPathGraph> currentEcmpSpgMap;
92 private HashMap<DeviceId, EcmpShortestPathGraph> updatedEcmpSpgMap;
sangho666cd6d2015-04-14 16:27:13 -070093 private DeviceConfiguration config;
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +090094 private final Lock statusLock = new ReentrantLock();
95 private volatile Status populationStatus;
Yuta HIGUCHI1624df12016-07-21 16:54:33 -070096 private ScheduledExecutorService executorService
Saurav Dasd2fded02016-12-02 15:43:47 -080097 = newScheduledThreadPool(1, groupedThreads("retryftr", "retry-%d", log));
Saurav Das60ca8d52018-04-23 18:42:12 -070098 private ScheduledExecutorService executorServiceMstChg
99 = newScheduledThreadPool(1, groupedThreads("masterChg", "mstch-%d", log));
Saurav Dasf1027d42018-06-11 17:02:31 -0700100 private ScheduledExecutorService executorServiceFRR
101 = newScheduledThreadPool(1, groupedThreads("fullRR", "fullRR-%d", log));
pier8b4ba992019-04-24 16:12:47 +0200102 // Route populators - 0 will leverage available processors
103 private static final int DEFAULT_THREADS = 0;
104 private ExecutorService routePopulators;
Saurav Das60ca8d52018-04-23 18:42:12 -0700105
Saurav Dasc6dc1772018-04-21 17:19:48 -0700106 private Instant lastRoutingChange = Instant.EPOCH;
Saurav Dasf1027d42018-06-11 17:02:31 -0700107 private Instant lastFullReroute = Instant.EPOCH;
sanghob35a6192015-04-01 13:05:26 -0700108
Saurav Dasc6dc1772018-04-21 17:19:48 -0700109 // Distributed store to keep track of ONOS instance that should program the
110 // device pair. There should be only one instance (the king) that programs the same pair.
Charles Chan2ff1bac2018-03-29 16:03:41 -0700111 Map<Set<DeviceId>, NodeId> shouldProgram;
Charles Chana8487b02018-04-18 18:41:05 -0700112 Map<DeviceId, Boolean> shouldProgramCache;
Charles Chan2ff1bac2018-03-29 16:03:41 -0700113
Saurav Dasc6dc1772018-04-21 17:19:48 -0700114 // Local store to keep track of all devices that this instance was responsible
115 // for programming in the last run. Helps to determine if mastership changed
116 // during a run - only relevant for programming as a result of topo change.
117 Set<DeviceId> lastProgrammed;
118
sanghob35a6192015-04-01 13:05:26 -0700119 /**
120 * Represents the default routing population status.
121 */
122 public enum Status {
123 // population process is not started yet.
124 IDLE,
sanghob35a6192015-04-01 13:05:26 -0700125 // population process started.
126 STARTED,
pier8b4ba992019-04-24 16:12:47 +0200127 // population process was aborted due to errors, mostly for groups not found.
sanghob35a6192015-04-01 13:05:26 -0700128 ABORTED,
sanghob35a6192015-04-01 13:05:26 -0700129 // population process was finished successfully.
130 SUCCEEDED
131 }
132
133 /**
134 * Creates a DefaultRoutingHandler object.
135 *
136 * @param srManager SegmentRoutingManager object
137 */
Charles Chan2ff1bac2018-03-29 16:03:41 -0700138 DefaultRoutingHandler(SegmentRoutingManager srManager) {
Charles Chana8487b02018-04-18 18:41:05 -0700139 this.shouldProgram = srManager.storageService.<Set<DeviceId>, NodeId>consistentMapBuilder()
140 .withName("sr-should-program")
141 .withSerializer(Serializer.using(KryoNamespaces.API))
142 .withRelaxedReadConsistency()
143 .build().asJavaMap();
144 this.shouldProgramCache = Maps.newConcurrentMap();
145 update(srManager);
pier8b4ba992019-04-24 16:12:47 +0200146 this.routePopulators = new PredictableExecutor(DEFAULT_THREADS,
147 groupedThreads("onos/sr", "r-populator-%d", log));
Charles Chana8487b02018-04-18 18:41:05 -0700148 }
149
150 /**
151 * Updates a DefaultRoutingHandler object.
152 *
153 * @param srManager SegmentRoutingManager object
154 */
155 void update(SegmentRoutingManager srManager) {
sanghob35a6192015-04-01 13:05:26 -0700156 this.srManager = srManager;
157 this.rulePopulator = checkNotNull(srManager.routingRulePopulator);
sangho666cd6d2015-04-14 16:27:13 -0700158 this.config = checkNotNull(srManager.deviceConfiguration);
sanghob35a6192015-04-01 13:05:26 -0700159 this.populationStatus = Status.IDLE;
sangho20eff1d2015-04-13 15:15:58 -0700160 this.currentEcmpSpgMap = Maps.newHashMap();
Saurav Dasc6dc1772018-04-21 17:19:48 -0700161 this.lastProgrammed = Sets.newConcurrentHashSet();
sanghob35a6192015-04-01 13:05:26 -0700162 }
163
164 /**
Saurav Dasc88d4662017-05-15 15:34:25 -0700165 * Returns an immutable copy of the current ECMP shortest-path graph as
166 * computed by this controller instance.
167 *
Saurav Das7bcbe702017-06-13 15:35:54 -0700168 * @return immutable copy of the current ECMP graph
Saurav Dasc88d4662017-05-15 15:34:25 -0700169 */
170 public ImmutableMap<DeviceId, EcmpShortestPathGraph> getCurrentEmcpSpgMap() {
171 Builder<DeviceId, EcmpShortestPathGraph> builder = ImmutableMap.builder();
172 currentEcmpSpgMap.entrySet().forEach(entry -> {
173 if (entry.getValue() != null) {
174 builder.put(entry.getKey(), entry.getValue());
175 }
176 });
177 return builder.build();
178 }
179
Saurav Dasceccf242017-08-03 18:30:35 -0700180 /**
181 * Acquires the lock used when making routing changes.
182 */
183 public void acquireRoutingLock() {
184 statusLock.lock();
185 }
186
187 /**
188 * Releases the lock used when making routing changes.
189 */
190 public void releaseRoutingLock() {
191 statusLock.unlock();
192 }
193
194 /**
195 * Determines if routing in the network has been stable in the last
196 * STABLITY_THRESHOLD seconds, by comparing the current time to the last
197 * routing change timestamp.
198 *
199 * @return true if stable
200 */
201 public boolean isRoutingStable() {
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700202 long last = (long) (lastRoutingChange.toEpochMilli() / 1000.0);
203 long now = (long) (Instant.now().toEpochMilli() / 1000.0);
Saurav Das9df5b7c2017-08-14 16:44:43 -0700204 log.trace("Routing stable since {}s", now - last);
Saurav Dasceccf242017-08-03 18:30:35 -0700205 return (now - last) > STABLITY_THRESHOLD;
206 }
207
Saurav Das60ca8d52018-04-23 18:42:12 -0700208 /**
209 * Gracefully shuts down the defaultRoutingHandler. Typically called when
210 * the app is deactivated
211 */
212 public void shutdown() {
213 executorService.shutdown();
214 executorServiceMstChg.shutdown();
Saurav Dasf1027d42018-06-11 17:02:31 -0700215 executorServiceFRR.shutdown();
pier8b4ba992019-04-24 16:12:47 +0200216 routePopulators.shutdown();
Saurav Das60ca8d52018-04-23 18:42:12 -0700217 }
Saurav Dasceccf242017-08-03 18:30:35 -0700218
Saurav Das7bcbe702017-06-13 15:35:54 -0700219 //////////////////////////////////////
220 // Route path handling
221 //////////////////////////////////////
222
Saurav Das45f48152018-01-18 12:07:33 -0800223 /* The following three methods represent the three major ways in which
224 * route-path handling is triggered in the network
Saurav Das7bcbe702017-06-13 15:35:54 -0700225 * a) due to configuration change
226 * b) due to route-added event
227 * c) due to change in the topology
228 */
229
Saurav Dasc88d4662017-05-15 15:34:25 -0700230 /**
Saurav Das7bcbe702017-06-13 15:35:54 -0700231 * Populates all routing rules to all switches. Typically triggered at
232 * startup or after a configuration event.
sanghob35a6192015-04-01 13:05:26 -0700233 */
Saurav Dasc88d4662017-05-15 15:34:25 -0700234 public void populateAllRoutingRules() {
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700235 lastRoutingChange = Instant.now();
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900236 statusLock.lock();
237 try {
Saurav Das7bcbe702017-06-13 15:35:54 -0700238 if (populationStatus == Status.STARTED) {
239 log.warn("Previous rule population is not finished. Cannot"
240 + " proceed with populateAllRoutingRules");
241 return;
242 }
243
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900244 populationStatus = Status.STARTED;
245 rulePopulator.resetCounter();
Saurav Das7bcbe702017-06-13 15:35:54 -0700246 log.info("Starting to populate all routing rules");
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900247 log.debug("populateAllRoutingRules: populationStatus is STARTED");
sanghob35a6192015-04-01 13:05:26 -0700248
Saurav Das7bcbe702017-06-13 15:35:54 -0700249 // take a snapshot of the topology
250 updatedEcmpSpgMap = new HashMap<>();
251 Set<EdgePair> edgePairs = new HashSet<>();
252 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800253 for (DeviceId dstSw : srManager.deviceConfiguration.getRouters()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700254 EcmpShortestPathGraph ecmpSpgUpdated =
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800255 new EcmpShortestPathGraph(dstSw, srManager);
256 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
Charles Chanba6c5752018-04-02 11:46:38 -0700257 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
258 if (pairDev.isPresent()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700259 // pairDev may not be available yet, but we still need to add
Charles Chanba6c5752018-04-02 11:46:38 -0700260 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
261 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
262 edgePairs.add(new EdgePair(dstSw, pairDev.get()));
Saurav Das7bcbe702017-06-13 15:35:54 -0700263 }
Charles Chan2ff1bac2018-03-29 16:03:41 -0700264
265 if (!shouldProgram(dstSw)) {
Saurav Dasc6dc1772018-04-21 17:19:48 -0700266 lastProgrammed.remove(dstSw);
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900267 continue;
Saurav Dasc6dc1772018-04-21 17:19:48 -0700268 } else {
269 lastProgrammed.add(dstSw);
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900270 }
Saurav Dasc6dc1772018-04-21 17:19:48 -0700271 // To do a full reroute, assume all route-paths have changed
Charles Chan2ff1bac2018-03-29 16:03:41 -0700272 for (DeviceId dev : deviceAndItsPair(dstSw)) {
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800273 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
274 if (targetSw.equals(dev)) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700275 continue;
276 }
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800277 routeChanges.add(Lists.newArrayList(targetSw, dev));
Saurav Das7bcbe702017-06-13 15:35:54 -0700278 }
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900279 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700280 }
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900281
Saurav Das7bcbe702017-06-13 15:35:54 -0700282 if (!redoRouting(routeChanges, edgePairs, null)) {
283 log.debug("populateAllRoutingRules: populationStatus is ABORTED");
284 populationStatus = Status.ABORTED;
285 log.warn("Failed to repopulate all routing rules.");
286 return;
sanghob35a6192015-04-01 13:05:26 -0700287 }
288
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900289 log.debug("populateAllRoutingRules: populationStatus is SUCCEEDED");
290 populationStatus = Status.SUCCEEDED;
Saurav Das7bcbe702017-06-13 15:35:54 -0700291 log.info("Completed all routing rule population. Total # of rules pushed : {}",
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900292 rulePopulator.getCounter());
Saurav Dasc88d4662017-05-15 15:34:25 -0700293 return;
pier4bc3fa92019-04-19 20:55:53 +0200294 } catch (Exception e) {
295 log.error("populateAllRoutingRules thrown an exception: {}",
296 e.getMessage(), e);
297 populationStatus = Status.ABORTED;
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900298 } finally {
299 statusLock.unlock();
sanghob35a6192015-04-01 13:05:26 -0700300 }
sanghob35a6192015-04-01 13:05:26 -0700301 }
302
sangho20eff1d2015-04-13 15:15:58 -0700303 /**
Saurav Das7bcbe702017-06-13 15:35:54 -0700304 * Populate rules from all other edge devices to the connect-point(s)
305 * specified for the given subnets.
306 *
307 * @param cpts connect point(s) of the subnets being added
308 * @param subnets subnets being added
Charles Chan2fde6d42017-08-23 14:46:43 -0700309 */
310 // XXX refactor
Saurav Das7bcbe702017-06-13 15:35:54 -0700311 protected void populateSubnet(Set<ConnectPoint> cpts, Set<IpPrefix> subnets) {
Charles Chan71e64f12017-09-11 15:21:57 -0700312 if (cpts == null || cpts.size() < 1 || cpts.size() > 2) {
313 log.warn("Skipping populateSubnet due to illegal size of connect points. {}", cpts);
314 return;
315 }
316
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700317 lastRoutingChange = Instant.now();
Saurav Das7bcbe702017-06-13 15:35:54 -0700318 statusLock.lock();
319 try {
320 if (populationStatus == Status.STARTED) {
321 log.warn("Previous rule population is not finished. Cannot"
322 + " proceed with routing rules for added routes");
323 return;
324 }
325 populationStatus = Status.STARTED;
326 rulePopulator.resetCounter();
Charles Chan2fde6d42017-08-23 14:46:43 -0700327 log.info("Starting to populate routing rules for added routes, subnets={}, cpts={}",
328 subnets, cpts);
Saurav Dasc568c342018-01-25 09:49:01 -0800329 // In principle an update to a subnet/prefix should not require a
330 // new ECMPspg calculation as it is not a topology event. As a
331 // result, we use the current/existing ECMPspg in the updated map
332 // used by the redoRouting method.
Saurav Das15a81782018-02-09 09:15:03 -0800333 if (updatedEcmpSpgMap == null) {
334 updatedEcmpSpgMap = new HashMap<>();
335 }
Saurav Dasc568c342018-01-25 09:49:01 -0800336 currentEcmpSpgMap.entrySet().forEach(entry -> {
337 updatedEcmpSpgMap.put(entry.getKey(), entry.getValue());
Saurav Dase7f51012018-02-09 17:26:45 -0800338 if (log.isTraceEnabled()) {
339 log.trace("Root switch: {}", entry.getKey());
340 log.trace(" Current/Existing SPG: {}", entry.getValue());
Saurav Dasc568c342018-01-25 09:49:01 -0800341 }
342 });
Saurav Das7bcbe702017-06-13 15:35:54 -0700343 Set<EdgePair> edgePairs = new HashSet<>();
344 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
345 boolean handleRouting = false;
346
347 if (cpts.size() == 2) {
348 // ensure connect points are edge-pairs
349 Iterator<ConnectPoint> iter = cpts.iterator();
350 DeviceId dev1 = iter.next().deviceId();
Charles Chanba6c5752018-04-02 11:46:38 -0700351 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dev1);
352 if (pairDev.isPresent() && iter.next().deviceId().equals(pairDev.get())) {
353 edgePairs.add(new EdgePair(dev1, pairDev.get()));
Saurav Das7bcbe702017-06-13 15:35:54 -0700354 } else {
355 log.warn("Connectpoints {} for subnets {} not on "
356 + "pair-devices.. aborting populateSubnet", cpts, subnets);
357 populationStatus = Status.ABORTED;
358 return;
359 }
360 for (ConnectPoint cp : cpts) {
Saurav Dasc568c342018-01-25 09:49:01 -0800361 if (updatedEcmpSpgMap.get(cp.deviceId()) == null) {
362 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das7bcbe702017-06-13 15:35:54 -0700363 new EcmpShortestPathGraph(cp.deviceId(), srManager);
Saurav Dasc568c342018-01-25 09:49:01 -0800364 updatedEcmpSpgMap.put(cp.deviceId(), ecmpSpgUpdated);
365 log.warn("populateSubnet: no updated graph for dev:{}"
366 + " ... creating", cp.deviceId());
367 }
Charles Chan2ff1bac2018-03-29 16:03:41 -0700368 if (!shouldProgram(cp.deviceId())) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700369 continue;
370 }
371 handleRouting = true;
372 }
373 } else {
374 // single connect point
375 DeviceId dstSw = cpts.iterator().next().deviceId();
Saurav Dasc568c342018-01-25 09:49:01 -0800376 if (updatedEcmpSpgMap.get(dstSw) == null) {
377 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das7bcbe702017-06-13 15:35:54 -0700378 new EcmpShortestPathGraph(dstSw, srManager);
Saurav Dasc568c342018-01-25 09:49:01 -0800379 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
380 log.warn("populateSubnet: no updated graph for dev:{}"
381 + " ... creating", dstSw);
382 }
Charles Chan2ff1bac2018-03-29 16:03:41 -0700383 handleRouting = shouldProgram(dstSw);
Saurav Das7bcbe702017-06-13 15:35:54 -0700384 }
385
386 if (!handleRouting) {
387 log.debug("This instance is not handling ecmp routing to the "
388 + "connectPoint(s) {}", cpts);
389 populationStatus = Status.ABORTED;
390 return;
391 }
392
393 // if it gets here, this instance should handle routing for the
394 // connectpoint(s). Assume all route-paths have to be updated to
395 // the connectpoint(s) with the following exceptions
396 // 1. if target is non-edge no need for routing rules
397 // 2. if target is one of the connectpoints
398 for (ConnectPoint cp : cpts) {
399 DeviceId dstSw = cp.deviceId();
400 for (Device targetSw : srManager.deviceService.getDevices()) {
401 boolean isEdge = false;
402 try {
403 isEdge = config.isEdgeDevice(targetSw.id());
404 } catch (DeviceConfigNotFoundException e) {
Charles Chan92726132018-02-16 17:20:54 -0800405 log.warn(e.getMessage() + "aborting populateSubnet on targetSw {}", targetSw.id());
406 continue;
Saurav Das7bcbe702017-06-13 15:35:54 -0700407 }
Charles Chanba6c5752018-04-02 11:46:38 -0700408 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
Saurav Das7bcbe702017-06-13 15:35:54 -0700409 if (dstSw.equals(targetSw.id()) || !isEdge ||
Charles Chanba6c5752018-04-02 11:46:38 -0700410 (cpts.size() == 2 && pairDev.isPresent() && targetSw.id().equals(pairDev.get()))) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700411 continue;
412 }
413 routeChanges.add(Lists.newArrayList(targetSw.id(), dstSw));
414 }
415 }
416
417 if (!redoRouting(routeChanges, edgePairs, subnets)) {
418 log.debug("populateSubnet: populationStatus is ABORTED");
419 populationStatus = Status.ABORTED;
420 log.warn("Failed to repopulate the rules for subnet.");
421 return;
422 }
423
424 log.debug("populateSubnet: populationStatus is SUCCEEDED");
425 populationStatus = Status.SUCCEEDED;
426 log.info("Completed subnet population. Total # of rules pushed : {}",
427 rulePopulator.getCounter());
428 return;
429
pier4bc3fa92019-04-19 20:55:53 +0200430 } catch (Exception e) {
431 log.error("populateSubnet thrown an exception: {}",
432 e.getMessage(), e);
433 populationStatus = Status.ABORTED;
Saurav Das7bcbe702017-06-13 15:35:54 -0700434 } finally {
435 statusLock.unlock();
436 }
437 }
438
439 /**
Saurav Dasc88d4662017-05-15 15:34:25 -0700440 * Populates the routing rules or makes hash group changes according to the
441 * route-path changes due to link failure, switch failure or link up. This
442 * method should only be called for one of these three possible event-types.
Saurav Das604ab3a2018-03-18 21:28:15 -0700443 * Note that when a switch goes away, all of its links fail as well, but
444 * this is handled as a single switch removal event.
sangho20eff1d2015-04-13 15:15:58 -0700445 *
Saurav Das604ab3a2018-03-18 21:28:15 -0700446 * @param linkDown the single failed link, or null for other conditions such
447 * as link-up or a removed switch
Saurav Dasc88d4662017-05-15 15:34:25 -0700448 * @param linkUp the single link up, or null for other conditions such as
Saurav Das604ab3a2018-03-18 21:28:15 -0700449 * link-down or a removed switch
450 * @param switchDown the removed switch, or null for other conditions such
451 * as link-down or link-up
452 * @param seenBefore true if this event is for a linkUp or linkDown for a
453 * seen link
454 */
455 // TODO This method should be refactored into three separated methods
Charles Chan15281332018-06-19 20:56:33 -0700456 public void populateRoutingRulesForLinkStatusChange(Link linkDown, Link linkUp,
457 DeviceId switchDown, boolean seenBefore) {
Saurav Das604ab3a2018-03-18 21:28:15 -0700458 if (Stream.of(linkDown, linkUp, switchDown).filter(Objects::nonNull)
459 .count() != 1) {
Saurav Dasc88d4662017-05-15 15:34:25 -0700460 log.warn("Only one event can be handled for link status change .. aborting");
461 return;
462 }
Saurav Das604ab3a2018-03-18 21:28:15 -0700463
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700464 lastRoutingChange = Instant.now();
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900465 statusLock.lock();
466 try {
sangho20eff1d2015-04-13 15:15:58 -0700467
468 if (populationStatus == Status.STARTED) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700469 log.warn("Previous rule population is not finished. Cannot"
Saurav Dasc568c342018-01-25 09:49:01 -0800470 + " proceeed with routingRules for Topology change");
Saurav Dasc88d4662017-05-15 15:34:25 -0700471 return;
sangho20eff1d2015-04-13 15:15:58 -0700472 }
473
Saurav Das7bcbe702017-06-13 15:35:54 -0700474 // Take snapshots of the topology
sangho45b009c2015-05-07 13:30:57 -0700475 updatedEcmpSpgMap = new HashMap<>();
Saurav Das7bcbe702017-06-13 15:35:54 -0700476 Set<EdgePair> edgePairs = new HashSet<>();
sangho45b009c2015-05-07 13:30:57 -0700477 for (Device sw : srManager.deviceService.getDevices()) {
Shashikanth VH013a7bc2015-12-11 01:32:44 +0530478 EcmpShortestPathGraph ecmpSpgUpdated =
479 new EcmpShortestPathGraph(sw.id(), srManager);
sangho45b009c2015-05-07 13:30:57 -0700480 updatedEcmpSpgMap.put(sw.id(), ecmpSpgUpdated);
Charles Chanba6c5752018-04-02 11:46:38 -0700481 Optional<DeviceId> pairDev = srManager.getPairDeviceId(sw.id());
482 if (pairDev.isPresent()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700483 // pairDev may not be available yet, but we still need to add
Charles Chanba6c5752018-04-02 11:46:38 -0700484 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
485 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
486 edgePairs.add(new EdgePair(sw.id(), pairDev.get()));
Saurav Das7bcbe702017-06-13 15:35:54 -0700487 }
sangho45b009c2015-05-07 13:30:57 -0700488 }
489
Saurav Dasc568c342018-01-25 09:49:01 -0800490 log.info("Starting to populate routing rules from Topology change");
sangho52abe3a2015-05-05 14:13:34 -0700491
sangho20eff1d2015-04-13 15:15:58 -0700492 Set<ArrayList<DeviceId>> routeChanges;
Saurav Dasc88d4662017-05-15 15:34:25 -0700493 log.debug("populateRoutingRulesForLinkStatusChange: "
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700494 + "populationStatus is STARTED");
sangho20eff1d2015-04-13 15:15:58 -0700495 populationStatus = Status.STARTED;
Saurav Dasc568c342018-01-25 09:49:01 -0800496 rulePopulator.resetCounter(); //XXX maybe useful to have a rehash ctr
497 boolean hashGroupsChanged = false;
Saurav Das4e3224f2016-11-29 14:27:25 -0800498 // try optimized re-routing
Saurav Dasc88d4662017-05-15 15:34:25 -0700499 if (linkDown == null) {
500 // either a linkUp or a switchDown - compute all route changes by
501 // comparing all routes of existing ECMP SPG to new ECMP SPG
Saurav Dase0d4c872018-03-05 14:37:16 -0800502 routeChanges = computeRouteChange(switchDown);
Saurav Dasc88d4662017-05-15 15:34:25 -0700503
pier1c2ca732019-04-25 18:51:51 +0200504 // deal with linkUp
505 if (linkUp != null) {
506 // deal with linkUp of a seen-before link
507 if (seenBefore) {
508 // link previously seen before
509 // do hash-bucket changes instead of a re-route
510 processHashGroupChangeForLinkUp(routeChanges);
511 // clear out routesChanges so a re-route is not attempted
512 routeChanges = ImmutableSet.of();
513 hashGroupsChanged = true;
514 } else {
515 // do hash-bucket changes first, method will return changed routes;
516 // for each route not changed it will perform a reroute
517 Set<ArrayList<DeviceId>> changedRoutes = processHashGroupChangeForLinkUp(routeChanges);
518 Set<ArrayList<DeviceId>> routeChangesTemp = getExpandedRoutes(routeChanges);
519 changedRoutes.forEach(routeChangesTemp::remove);
520 // if routesChanges is empty a re-route is not attempted
521 routeChanges = routeChangesTemp;
522 for (ArrayList<DeviceId> route : routeChanges) {
523 log.debug("remaining routes Target -> Root");
524 if (route.size() == 1) {
525 log.debug(" : all -> {}", route.get(0));
526 } else {
527 log.debug(" : {} -> {}", route.get(0), route.get(1));
528 }
529 }
530 // Mark hash groups as changed
531 if (!changedRoutes.isEmpty()) {
532 hashGroupsChanged = true;
533 }
534 }
535
Saurav Dasc88d4662017-05-15 15:34:25 -0700536 }
537
Saurav Das9df5b7c2017-08-14 16:44:43 -0700538 //deal with switchDown
539 if (switchDown != null) {
pier1c2ca732019-04-25 18:51:51 +0200540 processHashGroupChangeForFailure(routeChanges, switchDown);
Saurav Das9df5b7c2017-08-14 16:44:43 -0700541 // clear out routesChanges so a re-route is not attempted
542 routeChanges = ImmutableSet.of();
Saurav Dasc568c342018-01-25 09:49:01 -0800543 hashGroupsChanged = true;
Saurav Das9df5b7c2017-08-14 16:44:43 -0700544 }
sangho20eff1d2015-04-13 15:15:58 -0700545 } else {
Saurav Dasc88d4662017-05-15 15:34:25 -0700546 // link has gone down
547 // Compare existing ECMP SPG only with the link that went down
548 routeChanges = computeDamagedRoutes(linkDown);
pier1c2ca732019-04-25 18:51:51 +0200549 processHashGroupChangeForFailure(routeChanges, null);
Saurav Dasf1027d42018-06-11 17:02:31 -0700550 // clear out routesChanges so a re-route is not attempted
551 routeChanges = ImmutableSet.of();
552 hashGroupsChanged = true;
Saurav Dasb5c236e2016-06-07 10:08:06 -0700553 }
554
sangho20eff1d2015-04-13 15:15:58 -0700555 if (routeChanges.isEmpty()) {
Saurav Dasc568c342018-01-25 09:49:01 -0800556 if (hashGroupsChanged) {
557 log.info("Hash-groups changed for link status change");
558 } else {
559 log.info("No re-route or re-hash attempted for the link"
560 + " status change");
561 updatedEcmpSpgMap.keySet().forEach(devId -> {
562 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
563 log.debug("Updating ECMPspg for remaining dev:{}", devId);
564 });
565 }
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700566 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sangho20eff1d2015-04-13 15:15:58 -0700567 populationStatus = Status.SUCCEEDED;
Saurav Dasc88d4662017-05-15 15:34:25 -0700568 return;
sangho20eff1d2015-04-13 15:15:58 -0700569 }
570
pier1c2ca732019-04-25 18:51:51 +0200571 if (hashGroupsChanged) {
572 log.debug("Hash-groups changed for link status change");
573 }
574
Saurav Dasc88d4662017-05-15 15:34:25 -0700575 // reroute of routeChanges
Saurav Das7bcbe702017-06-13 15:35:54 -0700576 if (redoRouting(routeChanges, edgePairs, null)) {
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700577 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sangho20eff1d2015-04-13 15:15:58 -0700578 populationStatus = Status.SUCCEEDED;
Saurav Das7bcbe702017-06-13 15:35:54 -0700579 log.info("Completed repopulation of rules for link-status change."
580 + " # of rules populated : {}", rulePopulator.getCounter());
Saurav Dasc88d4662017-05-15 15:34:25 -0700581 return;
sangho20eff1d2015-04-13 15:15:58 -0700582 } else {
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700583 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is ABORTED");
sangho20eff1d2015-04-13 15:15:58 -0700584 populationStatus = Status.ABORTED;
Saurav Das7bcbe702017-06-13 15:35:54 -0700585 log.warn("Failed to repopulate the rules for link status change.");
Saurav Dasc88d4662017-05-15 15:34:25 -0700586 return;
sangho20eff1d2015-04-13 15:15:58 -0700587 }
pier4bc3fa92019-04-19 20:55:53 +0200588 } catch (Exception e) {
589 log.error("populateRoutingRulesForLinkStatusChange thrown an exception: {}",
590 e.getMessage(), e);
591 populationStatus = Status.ABORTED;
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900592 } finally {
593 statusLock.unlock();
sangho20eff1d2015-04-13 15:15:58 -0700594 }
595 }
596
Saurav Dasc88d4662017-05-15 15:34:25 -0700597 /**
Saurav Das7bcbe702017-06-13 15:35:54 -0700598 * Processes a set a route-path changes by reprogramming routing rules and
599 * creating new hash-groups or editing them if necessary. This method also
600 * determines the next-hops for the route-path from the src-switch (target)
601 * of the path towards the dst-switch of the path.
Saurav Dasc88d4662017-05-15 15:34:25 -0700602 *
Saurav Das7bcbe702017-06-13 15:35:54 -0700603 * @param routeChanges a set of route-path changes, where each route-path is
604 * a list with its first element the src-switch (target)
605 * of the path, and the second element the dst-switch of
606 * the path.
607 * @param edgePairs a set of edge-switches that are paired by configuration
608 * @param subnets a set of prefixes that need to be populated in the routing
609 * table of the target switch in the route-path. Can be null,
610 * in which case all the prefixes belonging to the dst-switch
611 * will be populated in the target switch
612 * @return true if successful in repopulating all routes
Saurav Dasc88d4662017-05-15 15:34:25 -0700613 */
Saurav Das7bcbe702017-06-13 15:35:54 -0700614 private boolean redoRouting(Set<ArrayList<DeviceId>> routeChanges,
615 Set<EdgePair> edgePairs, Set<IpPrefix> subnets) {
616 // first make every entry two-elements
pier1c2ca732019-04-25 18:51:51 +0200617 Set<ArrayList<DeviceId>> changedRoutes = getExpandedRoutes(routeChanges);
618 // no valid routes - fail fast
619 if (changedRoutes.isEmpty()) {
620 return false;
Saurav Das7bcbe702017-06-13 15:35:54 -0700621 }
622
623 // now process changedRoutes according to edgePairs
624 if (!redoRoutingEdgePairs(edgePairs, subnets, changedRoutes)) {
625 return false; //abort routing and fail fast
626 }
627
628 // whatever is left in changedRoutes is now processed for individual dsts.
Saurav Dasc568c342018-01-25 09:49:01 -0800629 Set<DeviceId> updatedDevices = Sets.newHashSet();
630 if (!redoRoutingIndividualDests(subnets, changedRoutes,
631 updatedDevices)) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700632 return false; //abort routing and fail fast
633 }
634
Saurav Das7bcbe702017-06-13 15:35:54 -0700635 // update ecmpSPG for all edge-pairs
636 for (EdgePair ep : edgePairs) {
637 currentEcmpSpgMap.put(ep.dev1, updatedEcmpSpgMap.get(ep.dev1));
638 currentEcmpSpgMap.put(ep.dev2, updatedEcmpSpgMap.get(ep.dev2));
639 log.debug("Updating ECMPspg for edge-pair:{}-{}", ep.dev1, ep.dev2);
640 }
Saurav Dasc568c342018-01-25 09:49:01 -0800641
642 // here is where we update all devices not touched by this instance
643 updatedEcmpSpgMap.keySet().stream()
644 .filter(devId -> !edgePairs.stream().anyMatch(ep -> ep.includes(devId)))
645 .filter(devId -> !updatedDevices.contains(devId))
646 .forEach(devId -> {
647 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
648 log.debug("Updating ECMPspg for remaining dev:{}", devId);
649 });
Saurav Das7bcbe702017-06-13 15:35:54 -0700650 return true;
651 }
652
653 /**
654 * Programs targetSw in the changedRoutes for given prefixes reachable by
655 * an edgePair. If no prefixes are given, the method will use configured
656 * subnets/prefixes. If some configured subnets belong only to a specific
657 * destination in the edgePair, then the target switch will be programmed
658 * only to that destination.
659 *
660 * @param edgePairs set of edge-pairs for which target will be programmed
661 * @param subnets a set of prefixes that need to be populated in the routing
662 * table of the target switch in the changedRoutes. Can be null,
663 * in which case all the configured prefixes belonging to the
664 * paired switches will be populated in the target switch
665 * @param changedRoutes a set of route-path changes, where each route-path is
666 * a list with its first element the src-switch (target)
667 * of the path, and the second element the dst-switch of
668 * the path.
669 * @return true if successful
670 */
pier8b4ba992019-04-24 16:12:47 +0200671 private boolean redoRoutingEdgePairs(Set<EdgePair> edgePairs, Set<IpPrefix> subnets,
672 Set<ArrayList<DeviceId>> changedRoutes) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700673 for (EdgePair ep : edgePairs) {
674 // temp store for a target's changedRoutes to this edge-pair
675 Map<DeviceId, Set<ArrayList<DeviceId>>> targetRoutes = new HashMap<>();
676 Iterator<ArrayList<DeviceId>> i = changedRoutes.iterator();
677 while (i.hasNext()) {
678 ArrayList<DeviceId> route = i.next();
679 DeviceId dstSw = route.get(1);
680 if (ep.includes(dstSw)) {
681 // routeChange for edge pair found
682 // sort by target iff target is edge and remove from changedRoutes
683 DeviceId targetSw = route.get(0);
684 try {
685 if (!srManager.deviceConfiguration.isEdgeDevice(targetSw)) {
686 continue;
687 }
688 } catch (DeviceConfigNotFoundException e) {
689 log.warn(e.getMessage() + "aborting redoRouting");
690 return false;
691 }
692 // route is from another edge to this edge-pair
693 if (targetRoutes.containsKey(targetSw)) {
694 targetRoutes.get(targetSw).add(route);
695 } else {
696 Set<ArrayList<DeviceId>> temp = new HashSet<>();
697 temp.add(route);
698 targetRoutes.put(targetSw, temp);
699 }
700 i.remove();
701 }
702 }
703 // so now for this edgepair we have a per target set of routechanges
704 // process target->edgePair route
pier8b4ba992019-04-24 16:12:47 +0200705 List<Future<Boolean>> futures = Lists.newArrayList();
Saurav Das7bcbe702017-06-13 15:35:54 -0700706 for (Map.Entry<DeviceId, Set<ArrayList<DeviceId>>> entry :
707 targetRoutes.entrySet()) {
708 log.debug("* redoRoutingDstPair Target:{} -> edge-pair {}",
709 entry.getKey(), ep);
pier8b4ba992019-04-24 16:12:47 +0200710 futures.add(routePopulators.submit(new RedoRoutingEdgePair(entry.getKey(), entry.getValue(),
711 subnets, ep)));
712 }
713 if (!checkJobs(futures)) {
714 return false;
Saurav Das7bcbe702017-06-13 15:35:54 -0700715 }
716 // if it gets here it has succeeded for all targets to this edge-pair
717 }
718 return true;
719 }
720
pier8b4ba992019-04-24 16:12:47 +0200721 private final class RedoRoutingEdgePair implements PickyCallable<Boolean> {
722 private DeviceId targetSw;
723 private Set<ArrayList<DeviceId>> routes;
724 private Set<IpPrefix> subnets;
725 private EdgePair ep;
726
727 /**
728 * Builds a RedoRoutingEdgePair task which provides a result.
729 *
730 * @param targetSw the target switch
731 * @param routes the changed routes
732 * @param subnets the subnets
733 * @param ep the edge pair
734 */
735 RedoRoutingEdgePair(DeviceId targetSw, Set<ArrayList<DeviceId>> routes,
736 Set<IpPrefix> subnets, EdgePair ep) {
737 this.targetSw = targetSw;
738 this.routes = routes;
739 this.subnets = subnets;
740 this.ep = ep;
741 }
742
743 @Override
744 public Boolean call() throws Exception {
745 return redoRoutingEdgePair();
746 }
747
748 @Override
749 public int hint() {
750 return targetSw.hashCode();
751 }
752
753 private boolean redoRoutingEdgePair() {
754 Map<DeviceId, Set<DeviceId>> perDstNextHops = new HashMap<>();
755 routes.forEach(route -> {
756 Set<DeviceId> nhops = getNextHops(route.get(0), route.get(1));
757 log.debug("route: target {} -> dst {} found with next-hops {}",
758 route.get(0), route.get(1), nhops);
759 perDstNextHops.put(route.get(1), nhops);
760 });
761
762 List<Set<IpPrefix>> batchedSubnetDev1, batchedSubnetDev2;
763 if (subnets != null) {
764 batchedSubnetDev1 = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
765 batchedSubnetDev2 = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
766 } else {
767 batchedSubnetDev1 = config.getBatchedSubnets(ep.dev1);
768 batchedSubnetDev2 = config.getBatchedSubnets(ep.dev2);
769 }
770 List<Set<IpPrefix>> batchedSubnetBoth = Streams
771 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.intersection(a, b))
772 .filter(set -> !set.isEmpty())
773 .collect(Collectors.toList());
774 List<Set<IpPrefix>> batchedSubnetDev1Only = Streams
775 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.difference(a, b))
776 .filter(set -> !set.isEmpty())
777 .collect(Collectors.toList());
778 List<Set<IpPrefix>> batchedSubnetDev2Only = Streams
779 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.difference(b, a))
780 .filter(set -> !set.isEmpty())
781 .collect(Collectors.toList());
782
783 Set<DeviceId> nhDev1 = perDstNextHops.get(ep.dev1);
784 Set<DeviceId> nhDev2 = perDstNextHops.get(ep.dev2);
785
786 // handle routing to subnets common to edge-pair
787 // only if the targetSw is not part of the edge-pair and there
788 // exists a next hop to at least one of the devices in the edge-pair
789 if (!ep.includes(targetSw)
790 && ((nhDev1 != null && !nhDev1.isEmpty()) || (nhDev2 != null && !nhDev2.isEmpty()))) {
791 log.trace("getSubnets on both {} and {}: {}", ep.dev1, ep.dev2, batchedSubnetBoth);
792 for (Set<IpPrefix> prefixes : batchedSubnetBoth) {
793 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev1, ep.dev2,
794 perDstNextHops, prefixes)) {
795 return false; // abort everything and fail fast
796 }
797 }
798
799 }
800 // handle routing to subnets that only belong to dev1 only if
801 // a next-hop exists from the target to dev1
802 if (!batchedSubnetDev1Only.isEmpty() &&
803 batchedSubnetDev1Only.stream().anyMatch(subnet -> !subnet.isEmpty()) &&
804 nhDev1 != null && !nhDev1.isEmpty()) {
805 Map<DeviceId, Set<DeviceId>> onlyDev1NextHops = new HashMap<>();
806 onlyDev1NextHops.put(ep.dev1, nhDev1);
807 log.trace("getSubnets on {} only: {}", ep.dev1, batchedSubnetDev1Only);
808 for (Set<IpPrefix> prefixes : batchedSubnetDev1Only) {
809 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev1, null,
810 onlyDev1NextHops, prefixes)) {
811 return false; // abort everything and fail fast
812 }
813 }
814 }
815 // handle routing to subnets that only belong to dev2 only if
816 // a next-hop exists from the target to dev2
817 if (!batchedSubnetDev2Only.isEmpty() &&
818 batchedSubnetDev2Only.stream().anyMatch(subnet -> !subnet.isEmpty()) &&
819 nhDev2 != null && !nhDev2.isEmpty()) {
820 Map<DeviceId, Set<DeviceId>> onlyDev2NextHops = new HashMap<>();
821 onlyDev2NextHops.put(ep.dev2, nhDev2);
822 log.trace("getSubnets on {} only: {}", ep.dev2, batchedSubnetDev2Only);
823 for (Set<IpPrefix> prefixes : batchedSubnetDev2Only) {
824 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev2, null,
825 onlyDev2NextHops, prefixes)) {
826 return false; // abort everything and fail fast
827 }
828 }
829 }
830 return true;
831 }
832 }
833
Saurav Das7bcbe702017-06-13 15:35:54 -0700834 /**
835 * Programs targetSw in the changedRoutes for given prefixes reachable by
836 * a destination switch that is not part of an edge-pair.
837 * If no prefixes are given, the method will use configured subnets/prefixes.
838 *
839 * @param subnets a set of prefixes that need to be populated in the routing
840 * table of the target switch in the changedRoutes. Can be null,
841 * in which case all the configured prefixes belonging to the
842 * paired switches will be populated in the target switch
843 * @param changedRoutes a set of route-path changes, where each route-path is
844 * a list with its first element the src-switch (target)
845 * of the path, and the second element the dst-switch of
846 * the path.
847 * @return true if successful
848 */
pier8b4ba992019-04-24 16:12:47 +0200849 private boolean redoRoutingIndividualDests(Set<IpPrefix> subnets, Set<ArrayList<DeviceId>> changedRoutes,
Saurav Dasc568c342018-01-25 09:49:01 -0800850 Set<DeviceId> updatedDevices) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700851 // aggregate route-path changes for each dst device
852 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> routesBydevice =
853 new HashMap<>();
854 for (ArrayList<DeviceId> route: changedRoutes) {
855 DeviceId dstSw = route.get(1);
856 ArrayList<ArrayList<DeviceId>> deviceRoutes =
857 routesBydevice.get(dstSw);
858 if (deviceRoutes == null) {
859 deviceRoutes = new ArrayList<>();
860 routesBydevice.put(dstSw, deviceRoutes);
861 }
862 deviceRoutes.add(route);
863 }
pier8b4ba992019-04-24 16:12:47 +0200864 // iterate over the impacted devices
Saurav Das7bcbe702017-06-13 15:35:54 -0700865 for (DeviceId impactedDstDevice : routesBydevice.keySet()) {
866 ArrayList<ArrayList<DeviceId>> deviceRoutes =
867 routesBydevice.get(impactedDstDevice);
pier8b4ba992019-04-24 16:12:47 +0200868 List<Future<Boolean>> futures = Lists.newArrayList();
Saurav Das7bcbe702017-06-13 15:35:54 -0700869 for (ArrayList<DeviceId> route: deviceRoutes) {
870 log.debug("* redoRoutingIndiDst Target: {} -> dst: {}",
871 route.get(0), route.get(1));
pier8b4ba992019-04-24 16:12:47 +0200872 futures.add(routePopulators.submit(new RedoRoutingIndividualDest(subnets, route)));
873 }
874 // check the execution of each job
875 if (!checkJobs(futures)) {
876 return false;
Saurav Das7bcbe702017-06-13 15:35:54 -0700877 }
878 //Only if all the flows for all impacted routes to a
879 //specific target are pushed successfully, update the
880 //ECMP graph for that target. Or else the next event
881 //would not see any changes in the ECMP graphs.
882 //In another case, the target switch has gone away, so
883 //routes can't be installed. In that case, the current map
884 //is updated here, without any flows being pushed.
885 currentEcmpSpgMap.put(impactedDstDevice,
886 updatedEcmpSpgMap.get(impactedDstDevice));
Saurav Dasc568c342018-01-25 09:49:01 -0800887 updatedDevices.add(impactedDstDevice);
Saurav Das7bcbe702017-06-13 15:35:54 -0700888 log.debug("Updating ECMPspg for impacted dev:{}", impactedDstDevice);
889 }
890 return true;
891 }
892
pier8b4ba992019-04-24 16:12:47 +0200893 private final class RedoRoutingIndividualDest implements PickyCallable<Boolean> {
894 private DeviceId targetSw;
895 private ArrayList<DeviceId> route;
896 private Set<IpPrefix> subnets;
897
898 /**
899 * Builds a RedoRoutingIndividualDest task, which provides a result.
900 *
901 * @param subnets a set of prefixes
902 * @param route a route-path change
903 */
904 RedoRoutingIndividualDest(Set<IpPrefix> subnets, ArrayList<DeviceId> route) {
905 this.targetSw = route.get(0);
906 this.route = route;
907 this.subnets = subnets;
908 }
909
910 @Override
911 public Boolean call() throws Exception {
912 DeviceId dstSw = route.get(1); // same as impactedDstDevice
913 Set<DeviceId> nextHops = getNextHops(targetSw, dstSw);
914 if (nextHops.isEmpty()) {
915 log.debug("Could not find next hop from target:{} --> dst {} "
916 + "skipping this route", targetSw, dstSw);
917 return true;
918 }
919 Map<DeviceId, Set<DeviceId>> nhops = new HashMap<>();
920 nhops.put(dstSw, nextHops);
921 if (!populateEcmpRoutingRulePartial(targetSw, dstSw, null, nhops,
922 (subnets == null) ? Sets.newHashSet() : subnets)) {
923 return false; // abort routing and fail fast
924 }
925 log.debug("Populating flow rules from target: {} to dst: {}"
926 + " is successful", targetSw, dstSw);
927 return true;
928 }
929
930 @Override
931 public int hint() {
932 return targetSw.hashCode();
933 }
934 }
935
Saurav Das7bcbe702017-06-13 15:35:54 -0700936 /**
937 * Populate ECMP rules for subnets from target to destination via nexthops.
938 *
939 * @param targetSw Device ID of target switch in which rules will be programmed
940 * @param destSw1 Device ID of final destination switch to which the rules will forward
941 * @param destSw2 Device ID of paired destination switch to which the rules will forward
942 * A null deviceId indicates packets should only be sent to destSw1
Saurav Dasa4020382018-02-14 14:14:54 -0800943 * @param nextHops Map of a set of next hops per destSw
Saurav Das7bcbe702017-06-13 15:35:54 -0700944 * @param subnets Subnets to be populated. If empty, populate all configured subnets.
945 * @return true if it succeeds in populating rules
946 */ // refactor
pier8b4ba992019-04-24 16:12:47 +0200947 private boolean populateEcmpRoutingRulePartial(DeviceId targetSw, DeviceId destSw1, DeviceId destSw2,
948 Map<DeviceId, Set<DeviceId>> nextHops, Set<IpPrefix> subnets) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700949 boolean result;
950 // If both target switch and dest switch are edge routers, then set IP
951 // rule for both subnet and router IP.
952 boolean targetIsEdge;
953 boolean dest1IsEdge;
954 Ip4Address dest1RouterIpv4, dest2RouterIpv4 = null;
955 Ip6Address dest1RouterIpv6, dest2RouterIpv6 = null;
956
957 try {
958 targetIsEdge = config.isEdgeDevice(targetSw);
959 dest1IsEdge = config.isEdgeDevice(destSw1);
960 dest1RouterIpv4 = config.getRouterIpv4(destSw1);
961 dest1RouterIpv6 = config.getRouterIpv6(destSw1);
962 if (destSw2 != null) {
963 dest2RouterIpv4 = config.getRouterIpv4(destSw2);
964 dest2RouterIpv6 = config.getRouterIpv6(destSw2);
965 }
966 } catch (DeviceConfigNotFoundException e) {
967 log.warn(e.getMessage() + " Aborting populateEcmpRoutingRulePartial.");
Saurav Dasc88d4662017-05-15 15:34:25 -0700968 return false;
969 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700970
971 if (targetIsEdge && dest1IsEdge) {
Charles Chan022d6672019-04-17 14:20:26 -0700972 List<Set<IpPrefix>> batchedSubnets;
973 if (subnets != null && !subnets.isEmpty()) {
974 batchedSubnets = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
975 } else {
976 batchedSubnets = config.getBatchedSubnets(destSw1);
977 }
Saurav Dasa4020382018-02-14 14:14:54 -0800978 // XXX - Rethink this - ignoring routerIPs in all other switches
979 // even edge to edge switches
Saurav Das7bcbe702017-06-13 15:35:54 -0700980 /*subnets.add(dest1RouterIpv4.toIpPrefix());
981 if (dest1RouterIpv6 != null) {
982 subnets.add(dest1RouterIpv6.toIpPrefix());
983 }
984 if (destSw2 != null && dest2RouterIpv4 != null) {
985 subnets.add(dest2RouterIpv4.toIpPrefix());
986 if (dest2RouterIpv6 != null) {
987 subnets.add(dest2RouterIpv6.toIpPrefix());
988 }
989 }*/
Charles Chan022d6672019-04-17 14:20:26 -0700990 log.trace("getSubnets on {}: {}", destSw1, batchedSubnets);
991 for (Set<IpPrefix> prefixes : batchedSubnets) {
992 log.debug(". populateEcmpRoutingRulePartial in device {} towards {} {} "
993 + "for subnets {}", targetSw, destSw1,
994 (destSw2 != null) ? ("& " + destSw2) : "",
995 prefixes);
996 if (!rulePopulator.populateIpRuleForSubnet(targetSw, prefixes, destSw1, destSw2, nextHops)) {
997 return false;
998 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700999 }
Saurav Dasc88d4662017-05-15 15:34:25 -07001000 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001001
1002 if (!targetIsEdge && dest1IsEdge) {
1003 // MPLS rules in all non-edge target devices. These rules are for
1004 // individual destinations, even if the dsts are part of edge-pairs.
1005 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
1006 + "all MPLS rules", targetSw, destSw1);
pier8b4ba992019-04-24 16:12:47 +02001007 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1), dest1RouterIpv4);
Saurav Das7bcbe702017-06-13 15:35:54 -07001008 if (!result) {
1009 return false;
1010 }
1011 if (dest1RouterIpv6 != null) {
Saurav Dasa4020382018-02-14 14:14:54 -08001012 int v4sid = 0, v6sid = 0;
1013 try {
1014 v4sid = config.getIPv4SegmentId(destSw1);
1015 v6sid = config.getIPv6SegmentId(destSw1);
1016 } catch (DeviceConfigNotFoundException e) {
1017 log.warn(e.getMessage());
1018 }
1019 if (v4sid != v6sid) {
pier8b4ba992019-04-24 16:12:47 +02001020 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1),
Saurav Dasa4020382018-02-14 14:14:54 -08001021 dest1RouterIpv6);
1022 if (!result) {
1023 return false;
1024 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001025 }
1026 }
1027 }
1028
Andreas Pantelopoulosff691b72018-03-12 16:30:20 -07001029 if (!targetIsEdge && !dest1IsEdge) {
1030 // MPLS rules for inter-connected spines
1031 // can be merged with above if, left it here for clarity
1032 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
1033 + "all MPLS rules", targetSw, destSw1);
1034
pier8b4ba992019-04-24 16:12:47 +02001035 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1), dest1RouterIpv4);
Andreas Pantelopoulosff691b72018-03-12 16:30:20 -07001036 if (!result) {
1037 return false;
1038 }
1039
1040 if (dest1RouterIpv6 != null) {
1041 int v4sid = 0, v6sid = 0;
1042 try {
1043 v4sid = config.getIPv4SegmentId(destSw1);
1044 v6sid = config.getIPv6SegmentId(destSw1);
1045 } catch (DeviceConfigNotFoundException e) {
1046 log.warn(e.getMessage());
1047 }
1048 if (v4sid != v6sid) {
pier8b4ba992019-04-24 16:12:47 +02001049 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1),
Andreas Pantelopoulosff691b72018-03-12 16:30:20 -07001050 dest1RouterIpv6);
1051 if (!result) {
1052 return false;
1053 }
1054 }
1055 }
1056 }
1057
Saurav Das7bcbe702017-06-13 15:35:54 -07001058 // To save on ECMP groups
1059 // avoid MPLS rules in non-edge-devices to non-edge-devices
1060 // avoid MPLS transit rules in edge-devices
1061 // avoid loopback IP rules in edge-devices to non-edge-devices
1062 return true;
Saurav Dasc88d4662017-05-15 15:34:25 -07001063 }
1064
1065 /**
pier1c2ca732019-04-25 18:51:51 +02001066 * Processes a set a route-path changes due to a switch/link failure by editing hash groups.
Saurav Dasc88d4662017-05-15 15:34:25 -07001067 *
1068 * @param routeChanges a set of route-path changes, where each route-path is
1069 * a list with its first element the src-switch of the path
1070 * and the second element the dst-switch of the path.
Saurav Dasc88d4662017-05-15 15:34:25 -07001071 * @param failedSwitch the switchId if the route changes are for a failed switch,
1072 * otherwise null
1073 */
pier1c2ca732019-04-25 18:51:51 +02001074 private void processHashGroupChangeForFailure(Set<ArrayList<DeviceId>> routeChanges,
1075 DeviceId failedSwitch) {
Saurav Das9df5b7c2017-08-14 16:44:43 -07001076 // first, ensure each routeChanges entry has two elements
pier1c2ca732019-04-25 18:51:51 +02001077 Set<ArrayList<DeviceId>> changedRoutes = getAllExpandedRoutes(routeChanges);
Saurav Dasc568c342018-01-25 09:49:01 -08001078 boolean someFailed = false;
pier1c2ca732019-04-25 18:51:51 +02001079 boolean success;
Saurav Dasc568c342018-01-25 09:49:01 -08001080 Set<DeviceId> updatedDevices = Sets.newHashSet();
Saurav Das9df5b7c2017-08-14 16:44:43 -07001081 for (ArrayList<DeviceId> route : changedRoutes) {
1082 DeviceId targetSw = route.get(0);
1083 DeviceId dstSw = route.get(1);
pier1c2ca732019-04-25 18:51:51 +02001084 success = fixHashGroupsForRoute(route, true);
1085 // it's possible that we cannot fix hash groups for a route
1086 // if the target switch has failed. Nevertheless the ecmp graph
1087 // for the impacted switch must still be updated.
1088 if (!success && failedSwitch != null && targetSw.equals(failedSwitch)) {
1089 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1090 currentEcmpSpgMap.remove(targetSw);
1091 log.debug("Updating ECMPspg for dst:{} removing failed switch "
1092 + "target:{}", dstSw, targetSw);
1093 updatedDevices.add(targetSw);
1094 updatedDevices.add(dstSw);
1095 continue;
1096 }
1097 //linkfailed - update both sides
1098 if (success) {
1099 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1100 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1101 log.debug("Updating ECMPspg for dst:{} and target:{} for linkdown"
1102 + " or switchdown", dstSw, targetSw);
1103 updatedDevices.add(targetSw);
1104 updatedDevices.add(dstSw);
Saurav Das9df5b7c2017-08-14 16:44:43 -07001105 } else {
pier1c2ca732019-04-25 18:51:51 +02001106 someFailed = true;
Saurav Dasc88d4662017-05-15 15:34:25 -07001107 }
1108 }
Saurav Dasc568c342018-01-25 09:49:01 -08001109 if (!someFailed) {
1110 // here is where we update all devices not touched by this instance
1111 updatedEcmpSpgMap.keySet().stream()
1112 .filter(devId -> !updatedDevices.contains(devId))
1113 .forEach(devId -> {
1114 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1115 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1116 });
1117 }
Saurav Dasc88d4662017-05-15 15:34:25 -07001118 }
1119
1120 /**
pier1c2ca732019-04-25 18:51:51 +02001121 * Processes a set a route-path changes due to link up by editing hash groups.
1122 *
1123 * @param routeChanges a set of route-path changes, where each route-path is
1124 * a list with its first element the src-switch of the path
1125 * and the second element the dst-switch of the path.
1126 * @return set of changed routes
1127 */
1128 private Set<ArrayList<DeviceId>> processHashGroupChangeForLinkUp(Set<ArrayList<DeviceId>> routeChanges) {
1129 // Stores changed routes
1130 Set<ArrayList<DeviceId>> doneRoutes = new HashSet<>();
1131 // first, ensure each routeChanges entry has two elements
1132 Set<ArrayList<DeviceId>> changedRoutes = getAllExpandedRoutes(routeChanges);
1133 boolean someFailed = false;
1134 boolean success;
1135 Set<DeviceId> updatedDevices = Sets.newHashSet();
1136 for (ArrayList<DeviceId> route : changedRoutes) {
1137 DeviceId targetSw = route.get(0);
1138 DeviceId dstSw = route.get(1);
1139 // linkup - fix (if possible)
1140 success = fixHashGroupsForRoute(route, false);
1141 if (success) {
1142 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1143 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1144 log.debug("Updating ECMPspg for target:{} and dst:{} for linkup",
1145 targetSw, dstSw);
1146 updatedDevices.add(targetSw);
1147 updatedDevices.add(dstSw);
1148 doneRoutes.add(route);
1149 } else {
1150 someFailed = true;
1151 }
1152
1153 }
1154 if (!someFailed) {
1155 // here is where we update all devices not touched by this instance
1156 updatedEcmpSpgMap.keySet().stream()
1157 .filter(devId -> !updatedDevices.contains(devId))
1158 .forEach(devId -> {
1159 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1160 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1161 });
1162 }
1163 return doneRoutes;
1164 }
1165
1166 /**
Saurav Dasc88d4662017-05-15 15:34:25 -07001167 * Edits hash groups in the src-switch (targetSw) of a route-path by
1168 * calling the groupHandler to either add or remove buckets in an existing
1169 * hash group.
1170 *
1171 * @param route a single list representing a route-path where the first element
1172 * is the src-switch (targetSw) of the route-path and the
1173 * second element is the dst-switch
1174 * @param revoke true if buckets in the hash-groups need to be removed;
1175 * false if buckets in the hash-groups need to be added
1176 * @return true if the hash group editing is successful
1177 */
1178 private boolean fixHashGroupsForRoute(ArrayList<DeviceId> route,
1179 boolean revoke) {
1180 DeviceId targetSw = route.get(0);
1181 if (route.size() < 2) {
1182 log.warn("Cannot fixHashGroupsForRoute - no dstSw in route {}", route);
1183 return false;
1184 }
1185 DeviceId destSw = route.get(1);
Saurav Das9df5b7c2017-08-14 16:44:43 -07001186 log.debug("* processing fixHashGroupsForRoute: Target {} -> Dest {}",
Saurav Dasc88d4662017-05-15 15:34:25 -07001187 targetSw, destSw);
Saurav Dasc88d4662017-05-15 15:34:25 -07001188 // figure out the new next hops at the targetSw towards the destSw
Saurav Das9df5b7c2017-08-14 16:44:43 -07001189 Set<DeviceId> nextHops = getNextHops(targetSw, destSw);
Saurav Dasc88d4662017-05-15 15:34:25 -07001190 // call group handler to change hash group at targetSw
1191 DefaultGroupHandler grpHandler = srManager.getGroupHandler(targetSw);
1192 if (grpHandler == null) {
1193 log.warn("Cannot find grouphandler for dev:{} .. aborting"
1194 + " {} hash group buckets for route:{} ", targetSw,
1195 (revoke) ? "revoke" : "repopulate", route);
1196 return false;
1197 }
Saurav Dasf1027d42018-06-11 17:02:31 -07001198 log.debug("{} hash-groups buckets For Route {} -> {} to new next-hops {}",
Saurav Dasc88d4662017-05-15 15:34:25 -07001199 (revoke) ? "revoke" : "repopulating",
1200 targetSw, destSw, nextHops);
1201 return (revoke) ? grpHandler.fixHashGroups(targetSw, nextHops,
1202 destSw, true)
1203 : grpHandler.fixHashGroups(targetSw, nextHops,
1204 destSw, false);
1205 }
1206
1207 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001208 * Start the flow rule population process if it was never started. The
1209 * process finishes successfully when all flow rules are set and stops with
1210 * ABORTED status when any groups required for flows is not set yet.
Saurav Dasc88d4662017-05-15 15:34:25 -07001211 */
Saurav Das7bcbe702017-06-13 15:35:54 -07001212 public void startPopulationProcess() {
1213 statusLock.lock();
1214 try {
1215 if (populationStatus == Status.IDLE
1216 || populationStatus == Status.SUCCEEDED
1217 || populationStatus == Status.ABORTED) {
1218 populateAllRoutingRules();
sangho45b009c2015-05-07 13:30:57 -07001219 } else {
Saurav Das7bcbe702017-06-13 15:35:54 -07001220 log.warn("Not initiating startPopulationProcess as populationStatus is {}",
1221 populationStatus);
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001222 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001223 } finally {
1224 statusLock.unlock();
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001225 }
sangho20eff1d2015-04-13 15:15:58 -07001226 }
1227
Saurav Dasb5c236e2016-06-07 10:08:06 -07001228 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001229 * Revoke rules of given subnet in all edge switches.
1230 *
1231 * @param subnets subnet being removed
1232 * @return true if succeed
1233 */
1234 protected boolean revokeSubnet(Set<IpPrefix> subnets) {
pier8b4ba992019-04-24 16:12:47 +02001235 DeviceId targetSw;
1236 List<Future<Boolean>> futures = Lists.newArrayList();
1237 for (Device sw : srManager.deviceService.getAvailableDevices()) {
1238 targetSw = sw.id();
1239 if (shouldProgram(targetSw)) {
1240 futures.add(routePopulators.submit(new RevokeSubnet(targetSw, subnets)));
1241 } else {
1242 futures.add(CompletableFuture.completedFuture(true));
1243 }
1244 }
1245 // check the execution of each job
1246 return checkJobs(futures);
1247 }
1248
1249 private final class RevokeSubnet implements PickyCallable<Boolean> {
1250 private DeviceId targetSw;
1251 private Set<IpPrefix> subnets;
1252
1253 /**
1254 * Builds a RevokeSubnet task, which provides a result.
1255 *
1256 * @param subnets a set of prefixes
1257 * @param targetSw target switch
1258 */
1259 RevokeSubnet(DeviceId targetSw, Set<IpPrefix> subnets) {
1260 this.targetSw = targetSw;
1261 this.subnets = subnets;
1262 }
1263
1264 @Override
1265 public Boolean call() throws Exception {
1266 return srManager.routingRulePopulator.revokeIpRuleForSubnet(targetSw, subnets);
1267 }
1268
1269 @Override
1270 public int hint() {
1271 return targetSw.hashCode();
Saurav Das7bcbe702017-06-13 15:35:54 -07001272 }
1273 }
1274
1275 /**
Charles Chan2fde6d42017-08-23 14:46:43 -07001276 * Populates IP rules for a route that has direct connection to the switch
1277 * if the current instance is the master of the switch.
1278 *
1279 * @param deviceId device ID of the device that next hop attaches to
1280 * @param prefix IP prefix of the route
1281 * @param hostMac MAC address of the next hop
1282 * @param hostVlanId Vlan ID of the nexthop
1283 * @param outPort port where the next hop attaches to
Ruchi Sahota5d800282019-01-28 01:08:18 +00001284 * @param directHost host is of type direct or indirect
Charles Chan2fde6d42017-08-23 14:46:43 -07001285 */
1286 void populateRoute(DeviceId deviceId, IpPrefix prefix,
Ruchi Sahota5d800282019-01-28 01:08:18 +00001287 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort, boolean directHost) {
Charles Chan2ff1bac2018-03-29 16:03:41 -07001288 if (shouldProgram(deviceId)) {
Ruchi Sahota5d800282019-01-28 01:08:18 +00001289 srManager.routingRulePopulator.populateRoute(deviceId, prefix, hostMac, hostVlanId, outPort, directHost);
Charles Chan2fde6d42017-08-23 14:46:43 -07001290 }
1291 }
1292
1293 /**
1294 * Removes IP rules for a route when the next hop is gone.
1295 * if the current instance is the master of the switch.
1296 *
1297 * @param deviceId device ID of the device that next hop attaches to
1298 * @param prefix IP prefix of the route
1299 * @param hostMac MAC address of the next hop
1300 * @param hostVlanId Vlan ID of the nexthop
1301 * @param outPort port that next hop attaches to
Ruchi Sahota5d800282019-01-28 01:08:18 +00001302 * @param directHost host is of type direct or indirect
Charles Chan2fde6d42017-08-23 14:46:43 -07001303 */
1304 void revokeRoute(DeviceId deviceId, IpPrefix prefix,
Ruchi Sahota5d800282019-01-28 01:08:18 +00001305 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort, boolean directHost) {
Charles Chan2ff1bac2018-03-29 16:03:41 -07001306 if (shouldProgram(deviceId)) {
Ruchi Sahota5d800282019-01-28 01:08:18 +00001307 srManager.routingRulePopulator.revokeRoute(deviceId, prefix, hostMac, hostVlanId, outPort, directHost);
Charles Chan2fde6d42017-08-23 14:46:43 -07001308 }
1309 }
1310
Charles Chan2ff1bac2018-03-29 16:03:41 -07001311 void populateBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
1312 if (shouldProgram(deviceId)) {
1313 srManager.routingRulePopulator.populateBridging(deviceId, port, mac, vlanId);
1314 }
1315 }
1316
1317 void revokeBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
1318 if (shouldProgram(deviceId)) {
1319 srManager.routingRulePopulator.revokeBridging(deviceId, port, mac, vlanId);
1320 }
1321 }
1322
1323 void updateBridging(DeviceId deviceId, PortNumber portNum, MacAddress hostMac,
1324 VlanId vlanId, boolean popVlan, boolean install) {
1325 if (shouldProgram(deviceId)) {
1326 srManager.routingRulePopulator.updateBridging(deviceId, portNum, hostMac, vlanId, popVlan, install);
1327 }
1328 }
1329
1330 void updateFwdObj(DeviceId deviceId, PortNumber portNumber, IpPrefix prefix, MacAddress hostMac,
1331 VlanId vlanId, boolean popVlan, boolean install) {
1332 if (shouldProgram(deviceId)) {
1333 srManager.routingRulePopulator.updateFwdObj(deviceId, portNumber, prefix, hostMac,
1334 vlanId, popVlan, install);
1335 }
1336 }
1337
Charles Chan2fde6d42017-08-23 14:46:43 -07001338 /**
Jonghwan Hyuna76bf032018-04-09 09:40:50 -07001339 * Populates IP rules for a route when the next hop is double-tagged.
1340 *
1341 * @param deviceId device ID that next hop attaches to
1342 * @param prefix IP prefix of the route
1343 * @param hostMac MAC address of the next hop
1344 * @param innerVlan Inner Vlan ID of the next hop
1345 * @param outerVlan Outer Vlan ID of the next hop
1346 * @param outerTpid Outer TPID of the next hop
1347 * @param outPort port that the next hop attaches to
1348 */
1349 void populateDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
1350 VlanId outerVlan, EthType outerTpid, PortNumber outPort) {
1351 if (srManager.mastershipService.isLocalMaster(deviceId)) {
1352 VlanId dummyVlan = srManager.allocateDummyVlanId(
1353 new ConnectPoint(deviceId, outPort), prefix.address());
1354 if (!dummyVlan.equals(VlanId.NONE)) {
1355 srManager.routingRulePopulator.populateDoubleTaggedRoute(
1356 deviceId, prefix, hostMac, dummyVlan, innerVlan, outerVlan, outerTpid, outPort);
1357 srManager.routingRulePopulator.processDoubleTaggedFilter(
1358 deviceId, outPort, outerVlan, innerVlan, true);
1359 } else {
1360 log.error("Failed to allocate dummy VLAN ID for host {} at {}/{}",
1361 prefix.address(), deviceId, outPort);
1362 }
1363 }
1364 }
1365
1366 /**
1367 * Revokes IP rules for a route when the next hop is double-tagged.
1368 *
1369 * @param deviceId device ID that next hop attaches to
1370 * @param prefix IP prefix of the route
1371 * @param hostMac MAC address of the next hop
1372 * @param innerVlan Inner Vlan ID of the next hop
1373 * @param outerVlan Outer Vlan ID of the next hop
1374 * @param outerTpid Outer TPID of the next hop
1375 * @param outPort port that the next hop attaches to
1376 */
1377 void revokeDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
1378 VlanId outerVlan, EthType outerTpid, PortNumber outPort) {
1379 // Revoke route either if this node have the mastership (when device is available) or
1380 // if this node is the leader (even when device is unavailable)
1381 if (!srManager.mastershipService.isLocalMaster(deviceId)) {
1382 if (srManager.deviceService.isAvailable(deviceId)) {
1383 // Master node will revoke specified rule.
1384 log.debug("This node is not a master for {}, stop revoking route.", deviceId);
1385 return;
1386 }
1387
1388 // isLocalMaster will return false when the device is unavailable.
1389 // Verify if this node is the leader in that case.
1390 NodeId leader = srManager.leadershipService.runForLeadership(
1391 deviceId.toString()).leaderNodeId();
1392 if (!srManager.clusterService.getLocalNode().id().equals(leader)) {
1393 // Leader node will revoke specified rule.
1394 log.debug("This node is not a master for {}, stop revoking route.", deviceId);
1395 return;
1396 }
1397 }
1398
1399 VlanId dummyVlan = srManager.dummyVlanIdStore().get(new DummyVlanIdStoreKey(
1400 new ConnectPoint(deviceId, outPort), prefix.address()));
1401 if (dummyVlan == null) {
1402 log.error("Failed to get dummyVlanId for host {} at {}/{}.",
1403 prefix.address(), deviceId, outPort);
1404 } else {
1405 srManager.routingRulePopulator.revokeDoubleTaggedRoute(
1406 deviceId, prefix, hostMac, dummyVlan, innerVlan, outerVlan, outerTpid, outPort);
1407 srManager.routingRulePopulator.processDoubleTaggedFilter(
1408 deviceId, outPort, outerVlan, innerVlan, false);
1409 }
1410 }
1411
1412
1413 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001414 * Remove ECMP graph entry for the given device. Typically called when
1415 * device is no longer available.
1416 *
1417 * @param deviceId the device for which graphs need to be purged
1418 */
Charles Chana8487b02018-04-18 18:41:05 -07001419 void purgeEcmpGraph(DeviceId deviceId) {
Saurav Dasc568c342018-01-25 09:49:01 -08001420 statusLock.lock();
1421 try {
Saurav Dasc568c342018-01-25 09:49:01 -08001422 if (populationStatus == Status.STARTED) {
1423 log.warn("Previous rule population is not finished. Cannot"
1424 + " proceeed with purgeEcmpGraph for {}", deviceId);
1425 return;
1426 }
1427 log.debug("Updating ECMPspg for unavailable dev:{}", deviceId);
1428 currentEcmpSpgMap.remove(deviceId);
1429 if (updatedEcmpSpgMap != null) {
1430 updatedEcmpSpgMap.remove(deviceId);
1431 }
1432 } finally {
1433 statusLock.unlock();
Saurav Das7bcbe702017-06-13 15:35:54 -07001434 }
1435 }
1436
Saurav Dasc6dc1772018-04-21 17:19:48 -07001437 /**
1438 * Attempts a full reroute of route-paths if topology has changed relatively
1439 * close to a mastership change event. Does not do a reroute if mastership
1440 * change is due to reasons other than a ONOS cluster event - for example a
1441 * call to balance-masters, or a switch up/down event.
1442 *
1443 * @param devId the device identifier for which mastership has changed
1444 * @param me the mastership event
1445 */
1446 void checkFullRerouteForMasterChange(DeviceId devId, MastershipEvent me) {
1447 // give small delay to absorb mastership events that are caused by
1448 // device that has disconnected from cluster
Saurav Das60ca8d52018-04-23 18:42:12 -07001449 executorServiceMstChg.schedule(new MasterChange(devId, me),
1450 MASTER_CHANGE_DELAY, TimeUnit.MILLISECONDS);
Saurav Dasc6dc1772018-04-21 17:19:48 -07001451 }
1452
1453 protected final class MasterChange implements Runnable {
1454 private DeviceId devId;
1455 private MastershipEvent me;
1456 private static final long CLUSTER_EVENT_THRESHOLD = 4500; // ms
1457 private static final long DEVICE_EVENT_THRESHOLD = 2000; // ms
Saurav Das41b49a92018-04-27 18:42:30 -07001458 private static final long EDGE_PORT_EVENT_THRESHOLD = 10000; //ms
Saurav Dasf1027d42018-06-11 17:02:31 -07001459 private static final long FULL_REROUTE_THRESHOLD = 10000; // ms
Saurav Dasc6dc1772018-04-21 17:19:48 -07001460
1461 MasterChange(DeviceId devId, MastershipEvent me) {
1462 this.devId = devId;
1463 this.me = me;
1464 }
1465
1466 @Override
1467 public void run() {
1468 long lce = srManager.clusterListener.timeSinceLastClusterEvent();
1469 boolean clusterEvent = lce < CLUSTER_EVENT_THRESHOLD;
1470
1471 // ignore event for lost switch if cluster event hasn't happened -
1472 // device down event will handle it
1473 if ((me.roleInfo().master() == null
1474 || !srManager.deviceService.isAvailable(devId))
1475 && !clusterEvent) {
1476 log.debug("Full reroute not required for lost device: {}/{} "
1477 + "clusterEvent/timeSince: {}/{}",
1478 devId, me.roleInfo(), clusterEvent, lce);
1479 return;
1480 }
1481
1482 long update = srManager.deviceService.getLastUpdatedInstant(devId);
1483 long lde = Instant.now().toEpochMilli() - update;
1484 boolean deviceEvent = lde < DEVICE_EVENT_THRESHOLD;
1485
1486 // ignore event for recently connected switch if cluster event hasn't
1487 // happened - link up events will handle it
1488 if (srManager.deviceService.isAvailable(devId) && deviceEvent
1489 && !clusterEvent) {
1490 log.debug("Full reroute not required for recently available"
1491 + " device: {}/{} deviceEvent/timeSince: {}/{} "
1492 + "clusterEvent/timeSince: {}/{}",
1493 devId, me.roleInfo(), deviceEvent, lde, clusterEvent, lce);
1494 return;
1495 }
1496
Saurav Das41b49a92018-04-27 18:42:30 -07001497 long lepe = Instant.now().toEpochMilli()
1498 - srManager.lastEdgePortEvent.toEpochMilli();
1499 boolean edgePortEvent = lepe < EDGE_PORT_EVENT_THRESHOLD;
1500
Saurav Dasc6dc1772018-04-21 17:19:48 -07001501 // if it gets here, then mastership change is likely due to onos
1502 // instance failure, or network partition in onos cluster
1503 // normally a mastership change like this does not require re-programming
1504 // but if topology changes happen at the same time then we may miss events
1505 if (!isRoutingStable() && clusterEvent) {
Saurav Das41b49a92018-04-27 18:42:30 -07001506 log.warn("Mastership changed for dev: {}/{} while programming route-paths "
Saurav Dasc6dc1772018-04-21 17:19:48 -07001507 + "due to clusterEvent {} ms ago .. attempting full reroute",
1508 devId, me.roleInfo(), lce);
1509 if (srManager.mastershipService.isLocalMaster(devId)) {
1510 // old master could have died when populating filters
1511 populatePortAddressingRules(devId);
1512 }
Saurav Dasf1027d42018-06-11 17:02:31 -07001513 // old master could have died when creating groups
Saurav Dasc6dc1772018-04-21 17:19:48 -07001514 // XXX right now we have no fine-grained way to only make changes
Saurav Dasf1027d42018-06-11 17:02:31 -07001515 // for the route paths affected by this device. Thus we do a
1516 // full reroute after purging all hash groups. We also try to do
1517 // it only once, irrespective of the number of devices
1518 // that changed mastership when their master instance died.
1519 long lfrr = Instant.now().toEpochMilli() - lastFullReroute.toEpochMilli();
1520 boolean doFullReroute = lfrr > FULL_REROUTE_THRESHOLD;
1521 if (doFullReroute) {
1522 lastFullReroute = Instant.now();
1523 for (Device dev : srManager.deviceService.getDevices()) {
1524 if (shouldProgram(dev.id())) {
1525 srManager.purgeHashedNextObjectiveStore(dev.id());
1526 }
1527 }
1528 // give small delay to ensure entire store is purged
1529 executorServiceFRR.schedule(new FullRerouteAfterPurge(),
1530 PURGE_DELAY,
1531 TimeUnit.MILLISECONDS);
1532 } else {
1533 log.warn("Full reroute attempted {} ms ago .. skipping", lfrr);
1534 }
Saurav Das41b49a92018-04-27 18:42:30 -07001535
1536 } else if (edgePortEvent && clusterEvent) {
1537 log.warn("Mastership changed for dev: {}/{} due to clusterEvent {} ms ago "
1538 + "while edge-port event happened {} ms ago "
1539 + " .. reprogramming all edge-ports",
1540 devId, me.roleInfo(), lce, lepe);
1541 if (shouldProgram(devId)) {
1542 srManager.deviceService.getPorts(devId).stream()
1543 .filter(p -> srManager.interfaceService
1544 .isConfigured(new ConnectPoint(devId, p.number())))
1545 .forEach(p -> srManager.processPortUpdated(devId, p));
1546 }
1547
Saurav Dasc6dc1772018-04-21 17:19:48 -07001548 } else {
1549 log.debug("Stable route-paths .. full reroute not attempted for "
1550 + "mastership change {}/{} deviceEvent/timeSince: {}/{} "
1551 + "clusterEvent/timeSince: {}/{}", devId, me.roleInfo(),
1552 deviceEvent, lde, clusterEvent, lce);
1553 }
1554 }
1555 }
1556
Saurav Dasf1027d42018-06-11 17:02:31 -07001557 /**
1558 * Performs a full reroute of routing rules in all the switches. Assumes
1559 * caller has purged hash groups from the nextObjective store, otherwise
1560 * re-uses ones available in the store.
1561 */
1562 protected final class FullRerouteAfterPurge implements Runnable {
1563 @Override
1564 public void run() {
1565 populateAllRoutingRules();
1566 }
1567 }
1568
1569
Saurav Das7bcbe702017-06-13 15:35:54 -07001570 //////////////////////////////////////
1571 // Routing helper methods and classes
1572 //////////////////////////////////////
1573
1574 /**
Saurav Dasf1027d42018-06-11 17:02:31 -07001575 * Computes set of affected routes due to failed link. Assumes previous ecmp
1576 * shortest-path graph exists for a switch in order to compute affected
1577 * routes. If such a graph does not exist, the method returns null.
Saurav Dasb5c236e2016-06-07 10:08:06 -07001578 *
1579 * @param linkFail the failed link
1580 * @return the set of affected routes which may be empty if no routes were
Saurav Dasf1027d42018-06-11 17:02:31 -07001581 * affected
Saurav Dasb5c236e2016-06-07 10:08:06 -07001582 */
sangho20eff1d2015-04-13 15:15:58 -07001583 private Set<ArrayList<DeviceId>> computeDamagedRoutes(Link linkFail) {
sangho20eff1d2015-04-13 15:15:58 -07001584 Set<ArrayList<DeviceId>> routes = new HashSet<>();
1585
1586 for (Device sw : srManager.deviceService.getDevices()) {
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001587 log.debug("Computing the impacted routes for device {} due to link fail",
1588 sw.id());
Charles Chan2ff1bac2018-03-29 16:03:41 -07001589 if (!shouldProgram(sw.id())) {
Saurav Dasc6dc1772018-04-21 17:19:48 -07001590 lastProgrammed.remove(sw.id());
sangho20eff1d2015-04-13 15:15:58 -07001591 continue;
1592 }
Charles Chan2ff1bac2018-03-29 16:03:41 -07001593 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
Saurav Dasc6dc1772018-04-21 17:19:48 -07001594 // check for mastership change since last run
1595 if (!lastProgrammed.contains(sw.id())) {
Saurav Dasf1027d42018-06-11 17:02:31 -07001596 log.warn("New responsibility for this node to program dev:{}"
Saurav Dasc6dc1772018-04-21 17:19:48 -07001597 + " ... nuking current ECMPspg", sw.id());
1598 currentEcmpSpgMap.remove(sw.id());
1599 }
Saurav Dasf1027d42018-06-11 17:02:31 -07001600 lastProgrammed.add(sw.id());
1601
Saurav Das9df5b7c2017-08-14 16:44:43 -07001602 EcmpShortestPathGraph ecmpSpg = currentEcmpSpgMap.get(rootSw);
1603 if (ecmpSpg == null) {
Saurav Dasf1027d42018-06-11 17:02:31 -07001604 log.warn("No existing ECMP graph for switch {}. Assuming "
1605 + "all route-paths have changed towards it.", rootSw);
1606 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
1607 if (targetSw.equals(rootSw)) {
1608 continue;
1609 }
1610 routes.add(Lists.newArrayList(targetSw, rootSw));
1611 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1612 }
1613 continue;
Saurav Das9df5b7c2017-08-14 16:44:43 -07001614 }
Saurav Dasf1027d42018-06-11 17:02:31 -07001615
Saurav Das9df5b7c2017-08-14 16:44:43 -07001616 if (log.isDebugEnabled()) {
1617 log.debug("Root switch: {}", rootSw);
1618 log.debug(" Current/Existing SPG: {}", ecmpSpg);
1619 log.debug(" New/Updated SPG: {}", updatedEcmpSpgMap.get(rootSw));
1620 }
1621 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>>
1622 switchVia = ecmpSpg.getAllLearnedSwitchesAndVia();
1623 // figure out if the broken link affected any route-paths in this graph
1624 for (Integer itrIdx : switchVia.keySet()) {
1625 log.trace("Current/Exiting SPG Iterindex# {}", itrIdx);
1626 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1627 switchVia.get(itrIdx);
1628 for (DeviceId targetSw : swViaMap.keySet()) {
1629 log.trace("TargetSwitch {} --> RootSwitch {}",
1630 targetSw, rootSw);
Saurav Dasb5c236e2016-06-07 10:08:06 -07001631 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1632 log.trace(" Via:");
Pier Ventree0ae7a32016-11-23 09:57:42 -08001633 via.forEach(e -> log.trace(" {}", e));
Saurav Dasb5c236e2016-06-07 10:08:06 -07001634 }
Saurav Das9df5b7c2017-08-14 16:44:43 -07001635 Set<ArrayList<DeviceId>> subLinks =
1636 computeLinks(targetSw, rootSw, swViaMap);
1637 for (ArrayList<DeviceId> alink: subLinks) {
1638 if ((alink.get(0).equals(linkFail.src().deviceId()) &&
1639 alink.get(1).equals(linkFail.dst().deviceId()))
1640 ||
1641 (alink.get(0).equals(linkFail.dst().deviceId()) &&
1642 alink.get(1).equals(linkFail.src().deviceId()))) {
1643 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1644 ArrayList<DeviceId> aRoute = new ArrayList<>();
1645 aRoute.add(targetSw); // switch with rules to populate
1646 aRoute.add(rootSw); // towards this destination
1647 routes.add(aRoute);
1648 break;
1649 }
sangho20eff1d2015-04-13 15:15:58 -07001650 }
1651 }
1652 }
Saurav Das9df5b7c2017-08-14 16:44:43 -07001653
sangho20eff1d2015-04-13 15:15:58 -07001654 }
sangho45b009c2015-05-07 13:30:57 -07001655
sangho20eff1d2015-04-13 15:15:58 -07001656 }
sangho20eff1d2015-04-13 15:15:58 -07001657 return routes;
1658 }
1659
Saurav Das4e3224f2016-11-29 14:27:25 -08001660 /**
1661 * Computes set of affected routes due to new links or failed switches.
1662 *
Saurav Das604ab3a2018-03-18 21:28:15 -07001663 * @param failedSwitch deviceId of failed switch if any
Saurav Das4e3224f2016-11-29 14:27:25 -08001664 * @return the set of affected routes which may be empty if no routes were
1665 * affected
1666 */
Saurav Dase0d4c872018-03-05 14:37:16 -08001667 private Set<ArrayList<DeviceId>> computeRouteChange(DeviceId failedSwitch) {
Saurav Das7bcbe702017-06-13 15:35:54 -07001668 ImmutableSet.Builder<ArrayList<DeviceId>> changedRtBldr =
Saurav Das4e3224f2016-11-29 14:27:25 -08001669 ImmutableSet.builder();
sangho20eff1d2015-04-13 15:15:58 -07001670
1671 for (Device sw : srManager.deviceService.getDevices()) {
Saurav Das7bcbe702017-06-13 15:35:54 -07001672 log.debug("Computing the impacted routes for device {}", sw.id());
Charles Chan2ff1bac2018-03-29 16:03:41 -07001673 if (!shouldProgram(sw.id())) {
Saurav Dasc6dc1772018-04-21 17:19:48 -07001674 lastProgrammed.remove(sw.id());
sangho20eff1d2015-04-13 15:15:58 -07001675 continue;
1676 }
Charles Chan2ff1bac2018-03-29 16:03:41 -07001677 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
Saurav Das7bcbe702017-06-13 15:35:54 -07001678 if (log.isTraceEnabled()) {
1679 log.trace("Device links for dev: {}", rootSw);
1680 for (Link link: srManager.linkService.getDeviceLinks(rootSw)) {
1681 log.trace("{} -> {} ", link.src().deviceId(),
1682 link.dst().deviceId());
1683 }
Saurav Dasb5c236e2016-06-07 10:08:06 -07001684 }
Saurav Dasc6dc1772018-04-21 17:19:48 -07001685 // check for mastership change since last run
1686 if (!lastProgrammed.contains(sw.id())) {
Saurav Dasf1027d42018-06-11 17:02:31 -07001687 log.warn("New responsibility for this node to program dev:{}"
Saurav Dasc6dc1772018-04-21 17:19:48 -07001688 + " ... nuking current ECMPspg", sw.id());
1689 currentEcmpSpgMap.remove(sw.id());
1690 }
Saurav Dasf1027d42018-06-11 17:02:31 -07001691 lastProgrammed.add(sw.id());
Saurav Das7bcbe702017-06-13 15:35:54 -07001692 EcmpShortestPathGraph currEcmpSpg = currentEcmpSpgMap.get(rootSw);
1693 if (currEcmpSpg == null) {
1694 log.debug("No existing ECMP graph for device {}.. adding self as "
1695 + "changed route", rootSw);
1696 changedRtBldr.add(Lists.newArrayList(rootSw));
1697 continue;
1698 }
1699 EcmpShortestPathGraph newEcmpSpg = updatedEcmpSpgMap.get(rootSw);
Saurav Das5a356042018-04-06 20:16:01 -07001700 if (newEcmpSpg == null) {
1701 log.warn("Cannot find updated ECMP graph for dev:{}", rootSw);
1702 continue;
1703 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001704 if (log.isDebugEnabled()) {
1705 log.debug("Root switch: {}", rootSw);
1706 log.debug(" Current/Existing SPG: {}", currEcmpSpg);
1707 log.debug(" New/Updated SPG: {}", newEcmpSpg);
1708 }
1709 // first use the updated/new map to compare to current/existing map
1710 // as new links may have come up
1711 changedRtBldr.addAll(compareGraphs(newEcmpSpg, currEcmpSpg, rootSw));
1712 // then use the current/existing map to compare to updated/new map
1713 // as switch may have been removed
1714 changedRtBldr.addAll(compareGraphs(currEcmpSpg, newEcmpSpg, rootSw));
sangho45b009c2015-05-07 13:30:57 -07001715 }
Saurav Das4e3224f2016-11-29 14:27:25 -08001716 }
sangho20eff1d2015-04-13 15:15:58 -07001717
Saurav Dase0d4c872018-03-05 14:37:16 -08001718 // handle clearing state for a failed switch in case the switch does
1719 // not have a pair, or the pair is not available
1720 if (failedSwitch != null) {
Charles Chanba6c5752018-04-02 11:46:38 -07001721 Optional<DeviceId> pairDev = srManager.getPairDeviceId(failedSwitch);
1722 if (!pairDev.isPresent() || !srManager.deviceService.isAvailable(pairDev.get())) {
Saurav Dase0d4c872018-03-05 14:37:16 -08001723 log.debug("Proxy Route changes to downed Sw:{}", failedSwitch);
1724 srManager.deviceService.getDevices().forEach(dev -> {
1725 if (!dev.id().equals(failedSwitch) &&
1726 srManager.mastershipService.isLocalMaster(dev.id())) {
1727 log.debug(" : {}", dev.id());
1728 changedRtBldr.add(Lists.newArrayList(dev.id(), failedSwitch));
1729 }
1730 });
1731 }
1732 }
1733
Saurav Das7bcbe702017-06-13 15:35:54 -07001734 Set<ArrayList<DeviceId>> changedRoutes = changedRtBldr.build();
Saurav Das4e3224f2016-11-29 14:27:25 -08001735 for (ArrayList<DeviceId> route: changedRoutes) {
1736 log.debug("Route changes Target -> Root");
1737 if (route.size() == 1) {
1738 log.debug(" : all -> {}", route.get(0));
1739 } else {
1740 log.debug(" : {} -> {}", route.get(0), route.get(1));
1741 }
1742 }
1743 return changedRoutes;
1744 }
1745
pier1c2ca732019-04-25 18:51:51 +02001746 // Utility method to expands the route changes in two elements array using
1747 // the ECMP graph. Caller represents all to dst switch routes with an
1748 // array containing only the dst switch.
1749 private Set<ArrayList<DeviceId>> getExpandedRoutes(Set<ArrayList<DeviceId>> routeChanges) {
1750 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
1751 // Ensure each routeChanges entry has two elements
1752 for (ArrayList<DeviceId> route : routeChanges) {
1753 if (route.size() == 1) {
1754 DeviceId dstSw = route.get(0);
1755 EcmpShortestPathGraph ec = updatedEcmpSpgMap.get(dstSw);
1756 if (ec == null) {
1757 log.warn("No graph found for {} .. aborting redoRouting", dstSw);
1758 return Collections.emptySet();
1759 }
1760 ec.getAllLearnedSwitchesAndVia().keySet().forEach(key -> {
1761 ec.getAllLearnedSwitchesAndVia().get(key).keySet().forEach(target -> {
1762 changedRoutes.add(Lists.newArrayList(target, dstSw));
1763 });
1764 });
1765 } else {
1766 DeviceId targetSw = route.get(0);
1767 DeviceId dstSw = route.get(1);
1768 changedRoutes.add(Lists.newArrayList(targetSw, dstSw));
1769 }
1770 }
1771 return changedRoutes;
1772 }
1773
1774 // Utility method to expands the route changes in two elements array using
1775 // the available devices. Caller represents all to dst switch routes with an
1776 // array containing only the dst switch.
1777 private Set<ArrayList<DeviceId>> getAllExpandedRoutes(Set<ArrayList<DeviceId>> routeChanges) {
1778 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
1779 // Ensure each routeChanges entry has two elements
1780 for (ArrayList<DeviceId> route : routeChanges) {
1781 if (route.size() == 1) {
1782 // route-path changes are from everyone else to this switch
1783 DeviceId dstSw = route.get(0);
1784 srManager.deviceService.getAvailableDevices().forEach(sw -> {
1785 if (!sw.id().equals(dstSw)) {
1786 changedRoutes.add(Lists.newArrayList(sw.id(), dstSw));
1787 }
1788 });
1789 } else {
1790 changedRoutes.add(route);
1791 }
1792 }
1793 return changedRoutes;
1794 }
1795
Saurav Das4e3224f2016-11-29 14:27:25 -08001796 /**
1797 * For the root switch, searches all the target nodes reachable in the base
1798 * graph, and compares paths to the ones in the comp graph.
1799 *
1800 * @param base the graph that is indexed for all reachable target nodes
1801 * from the root node
1802 * @param comp the graph that the base graph is compared to
1803 * @param rootSw both ecmp graphs are calculated for the root node
1804 * @return all the routes that have changed in the base graph
1805 */
1806 private Set<ArrayList<DeviceId>> compareGraphs(EcmpShortestPathGraph base,
1807 EcmpShortestPathGraph comp,
1808 DeviceId rootSw) {
1809 ImmutableSet.Builder<ArrayList<DeviceId>> changedRoutesBuilder =
1810 ImmutableSet.builder();
1811 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> baseMap =
1812 base.getAllLearnedSwitchesAndVia();
1813 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> compMap =
1814 comp.getAllLearnedSwitchesAndVia();
1815 for (Integer itrIdx : baseMap.keySet()) {
1816 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> baseViaMap =
1817 baseMap.get(itrIdx);
1818 for (DeviceId targetSw : baseViaMap.keySet()) {
1819 ArrayList<ArrayList<DeviceId>> basePath = baseViaMap.get(targetSw);
1820 ArrayList<ArrayList<DeviceId>> compPath = getVia(compMap, targetSw);
1821 if ((compPath == null) || !basePath.equals(compPath)) {
Saurav Dasc88d4662017-05-15 15:34:25 -07001822 log.trace("Impacted route:{} -> {}", targetSw, rootSw);
Saurav Das4e3224f2016-11-29 14:27:25 -08001823 ArrayList<DeviceId> route = new ArrayList<>();
Saurav Das7bcbe702017-06-13 15:35:54 -07001824 route.add(targetSw); // switch with rules to populate
1825 route.add(rootSw); // towards this destination
Saurav Das4e3224f2016-11-29 14:27:25 -08001826 changedRoutesBuilder.add(route);
sangho20eff1d2015-04-13 15:15:58 -07001827 }
1828 }
sangho45b009c2015-05-07 13:30:57 -07001829 }
Saurav Das4e3224f2016-11-29 14:27:25 -08001830 return changedRoutesBuilder.build();
sangho20eff1d2015-04-13 15:15:58 -07001831 }
1832
Saurav Das7bcbe702017-06-13 15:35:54 -07001833 /**
1834 * Returns the ECMP paths traversed to reach the target switch.
1835 *
1836 * @param switchVia a per-iteration view of the ECMP graph for a root switch
1837 * @param targetSw the switch to reach from the root switch
1838 * @return the nodes traversed on ECMP paths to the target switch
1839 */
sangho20eff1d2015-04-13 15:15:58 -07001840 private ArrayList<ArrayList<DeviceId>> getVia(HashMap<Integer, HashMap<DeviceId,
Saurav Das4e3224f2016-11-29 14:27:25 -08001841 ArrayList<ArrayList<DeviceId>>>> switchVia, DeviceId targetSw) {
sangho20eff1d2015-04-13 15:15:58 -07001842 for (Integer itrIdx : switchVia.keySet()) {
1843 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1844 switchVia.get(itrIdx);
Saurav Das4e3224f2016-11-29 14:27:25 -08001845 if (swViaMap.get(targetSw) == null) {
sangho20eff1d2015-04-13 15:15:58 -07001846 continue;
1847 } else {
Saurav Das4e3224f2016-11-29 14:27:25 -08001848 return swViaMap.get(targetSw);
sangho20eff1d2015-04-13 15:15:58 -07001849 }
1850 }
1851
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001852 return null;
sangho20eff1d2015-04-13 15:15:58 -07001853 }
1854
Saurav Das7bcbe702017-06-13 15:35:54 -07001855 /**
1856 * Utility method to break down a path from src to dst device into a collection
1857 * of links.
1858 *
1859 * @param src src device of the path
1860 * @param dst dst device of the path
1861 * @param viaMap path taken from src to dst device
1862 * @return collection of links in the path
1863 */
sangho20eff1d2015-04-13 15:15:58 -07001864 private Set<ArrayList<DeviceId>> computeLinks(DeviceId src,
1865 DeviceId dst,
1866 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> viaMap) {
1867 Set<ArrayList<DeviceId>> subLinks = Sets.newHashSet();
1868 for (ArrayList<DeviceId> via : viaMap.get(src)) {
1869 DeviceId linkSrc = src;
1870 DeviceId linkDst = dst;
1871 for (DeviceId viaDevice: via) {
1872 ArrayList<DeviceId> link = new ArrayList<>();
1873 linkDst = viaDevice;
1874 link.add(linkSrc);
1875 link.add(linkDst);
1876 subLinks.add(link);
1877 linkSrc = viaDevice;
1878 }
1879 ArrayList<DeviceId> link = new ArrayList<>();
1880 link.add(linkSrc);
1881 link.add(dst);
1882 subLinks.add(link);
1883 }
1884
1885 return subLinks;
1886 }
1887
Charles Chan93e71ba2016-04-29 14:38:22 -07001888 /**
Charles Chan2ff1bac2018-03-29 16:03:41 -07001889 * Determines whether this controller instance should program the
Saurav Das7bcbe702017-06-13 15:35:54 -07001890 * given {@code deviceId}, based on mastership and pairDeviceId if one exists.
Charles Chan2ff1bac2018-03-29 16:03:41 -07001891 * <p>
1892 * Once an instance is elected, it will be the only instance responsible for programming
1893 * both devices in the pair until it goes down.
Charles Chan93e71ba2016-04-29 14:38:22 -07001894 *
Saurav Das7bcbe702017-06-13 15:35:54 -07001895 * @param deviceId device identifier to consider for routing
Charles Chan2ff1bac2018-03-29 16:03:41 -07001896 * @return true if current instance should handle the routing for given device
Charles Chan93e71ba2016-04-29 14:38:22 -07001897 */
Charles Chan2ff1bac2018-03-29 16:03:41 -07001898 boolean shouldProgram(DeviceId deviceId) {
Charles Chana8487b02018-04-18 18:41:05 -07001899 Boolean cached = shouldProgramCache.get(deviceId);
1900 if (cached != null) {
Saurav Dasc6dc1772018-04-21 17:19:48 -07001901 log.debug("shouldProgram dev:{} cached:{}", deviceId, cached);
Charles Chana8487b02018-04-18 18:41:05 -07001902 return cached;
1903 }
1904
Charles Chan2ff1bac2018-03-29 16:03:41 -07001905 Optional<DeviceId> pairDeviceId = srManager.getPairDeviceId(deviceId);
sanghob35a6192015-04-01 13:05:26 -07001906
Charles Chan2ff1bac2018-03-29 16:03:41 -07001907 NodeId currentNodeId = srManager.clusterService.getLocalNode().id();
1908 NodeId masterNodeId = srManager.mastershipService.getMasterFor(deviceId);
1909 Optional<NodeId> pairMasterNodeId = pairDeviceId.map(srManager.mastershipService::getMasterFor);
Saurav Dasf1027d42018-06-11 17:02:31 -07001910 log.debug("Evaluate shouldProgram {}/pair={}. currentNodeId={}, master={}, pairMaster={}",
Charles Chan2ff1bac2018-03-29 16:03:41 -07001911 deviceId, pairDeviceId, currentNodeId, masterNodeId, pairMasterNodeId);
1912
1913 // No pair device configured. Only handle when current instance is the master of the device
1914 if (!pairDeviceId.isPresent()) {
Saurav Dasf1027d42018-06-11 17:02:31 -07001915 log.debug("No pair device. currentNodeId={}, master={}", currentNodeId, masterNodeId);
Charles Chan2ff1bac2018-03-29 16:03:41 -07001916 return currentNodeId.equals(masterNodeId);
sanghob35a6192015-04-01 13:05:26 -07001917 }
Charles Chan2ff1bac2018-03-29 16:03:41 -07001918
1919 // Should not handle if current instance is not the master of either switch
1920 if (!currentNodeId.equals(masterNodeId) &&
1921 !(pairMasterNodeId.isPresent() && currentNodeId.equals(pairMasterNodeId.get()))) {
Saurav Dasf1027d42018-06-11 17:02:31 -07001922 log.debug("Current nodeId {} is neither the master of target device {} nor pair device {}",
Charles Chan2ff1bac2018-03-29 16:03:41 -07001923 currentNodeId, deviceId, pairDeviceId);
1924 return false;
1925 }
1926
1927 Set<DeviceId> key = Sets.newHashSet(deviceId, pairDeviceId.get());
1928
1929 NodeId king = shouldProgram.compute(key, ((k, v) -> {
1930 if (v == null) {
1931 // There is no value in the map. Elect a node
1932 return elect(Lists.newArrayList(masterNodeId, pairMasterNodeId.orElse(null)));
1933 } else {
1934 if (v.equals(masterNodeId) || v.equals(pairMasterNodeId.orElse(null))) {
1935 // Use the node in the map if it is still alive and is a master of any of the two switches
1936 return v;
1937 } else {
1938 // Previously elected node is no longer the master of either switch. Re-elect a node.
1939 return elect(Lists.newArrayList(masterNodeId, pairMasterNodeId.orElse(null)));
1940 }
1941 }
1942 }));
1943
1944 if (king != null) {
Saurav Dasf1027d42018-06-11 17:02:31 -07001945 log.debug("{} is king, should handle routing for {}/pair={}", king, deviceId, pairDeviceId);
Charles Chana8487b02018-04-18 18:41:05 -07001946 shouldProgramCache.put(deviceId, king.equals(currentNodeId));
Charles Chan2ff1bac2018-03-29 16:03:41 -07001947 return king.equals(currentNodeId);
1948 } else {
1949 log.error("Fail to elect a king for {}/pair={}. Abort.", deviceId, pairDeviceId);
Charles Chana8487b02018-04-18 18:41:05 -07001950 shouldProgramCache.remove(deviceId);
Charles Chan2ff1bac2018-03-29 16:03:41 -07001951 return false;
1952 }
1953 }
1954
1955 /**
1956 * Elects a node who should take responsibility of programming devices.
1957 * @param nodeIds list of candidate node ID
1958 *
1959 * @return NodeId of the node that gets elected, or null if none of the node can be elected
1960 */
1961 private NodeId elect(List<NodeId> nodeIds) {
1962 // Remove all null elements. This could happen when some device has no master
1963 nodeIds.removeAll(Collections.singleton(null));
1964 nodeIds.sort(null);
1965 return nodeIds.size() == 0 ? null : nodeIds.get(0);
1966 }
1967
Charles Chana8487b02018-04-18 18:41:05 -07001968 void invalidateShouldProgramCache(DeviceId deviceId) {
1969 shouldProgramCache.remove(deviceId);
1970 }
1971
Charles Chan2ff1bac2018-03-29 16:03:41 -07001972 /**
1973 * Returns a set of device ID, containing given device and its pair device if exist.
1974 *
1975 * @param deviceId Device ID
1976 * @return a set of device ID, containing given device and its pair device if exist.
1977 */
1978 private Set<DeviceId> deviceAndItsPair(DeviceId deviceId) {
1979 Set<DeviceId> ret = Sets.newHashSet(deviceId);
1980 srManager.getPairDeviceId(deviceId).ifPresent(ret::add);
1981 return ret;
sanghob35a6192015-04-01 13:05:26 -07001982 }
1983
Charles Chan93e71ba2016-04-29 14:38:22 -07001984 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001985 * Returns the set of deviceIds which are the next hops from the targetSw
1986 * to the dstSw according to the latest ECMP spg.
1987 *
1988 * @param targetSw the switch for which the next-hops are desired
1989 * @param dstSw the switch to which the next-hops lead to from the targetSw
1990 * @return set of next hop deviceIds, could be empty if no next hops are found
1991 */
1992 private Set<DeviceId> getNextHops(DeviceId targetSw, DeviceId dstSw) {
1993 boolean targetIsEdge = false;
1994 try {
1995 targetIsEdge = srManager.deviceConfiguration.isEdgeDevice(targetSw);
1996 } catch (DeviceConfigNotFoundException e) {
1997 log.warn(e.getMessage() + "Cannot determine if targetIsEdge {}.. "
1998 + "continuing to getNextHops", targetSw);
1999 }
2000
2001 EcmpShortestPathGraph ecmpSpg = updatedEcmpSpgMap.get(dstSw);
2002 if (ecmpSpg == null) {
2003 log.debug("No ecmpSpg found for dstSw: {}", dstSw);
2004 return ImmutableSet.of();
2005 }
2006 HashMap<Integer,
2007 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> switchVia =
2008 ecmpSpg.getAllLearnedSwitchesAndVia();
2009 for (Integer itrIdx : switchVia.keySet()) {
2010 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
2011 switchVia.get(itrIdx);
2012 for (DeviceId target : swViaMap.keySet()) {
2013 if (!target.equals(targetSw)) {
2014 continue;
2015 }
Saurav Das60ca8d52018-04-23 18:42:12 -07002016 // optimization for spines to not use leaves to get
2017 // to a spine or other leaves. Also leaves should not use other
2018 // leaves to get to the destination
2019 if ((!targetIsEdge && itrIdx > 1) || targetIsEdge) {
Saurav Dasa4020382018-02-14 14:14:54 -08002020 boolean pathdevIsEdge = false;
2021 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
Saurav Das60ca8d52018-04-23 18:42:12 -07002022 log.debug("Evaluating next-hop in path: {}", via);
Saurav Dasa4020382018-02-14 14:14:54 -08002023 for (DeviceId pathdev : via) {
2024 try {
2025 pathdevIsEdge = srManager.deviceConfiguration
2026 .isEdgeDevice(pathdev);
2027 } catch (DeviceConfigNotFoundException e) {
2028 log.warn(e.getMessage());
2029 }
2030 if (pathdevIsEdge) {
Saurav Dasf1027d42018-06-11 17:02:31 -07002031 log.debug("Avoiding {} hop path for targetSw:{}"
Saurav Dasa4020382018-02-14 14:14:54 -08002032 + " --> dstSw:{} which goes through an edge"
2033 + " device {} in path {}", itrIdx,
2034 targetSw, dstSw, pathdev, via);
2035 return ImmutableSet.of();
2036 }
2037 }
2038 }
Saurav Das7bcbe702017-06-13 15:35:54 -07002039 }
2040 Set<DeviceId> nextHops = new HashSet<>();
2041 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
2042 if (via.isEmpty()) {
2043 // the dstSw is the next-hop from the targetSw
2044 nextHops.add(dstSw);
2045 } else {
2046 // first elem is next-hop in each ECMP path
2047 nextHops.add(via.get(0));
2048 }
2049 }
Saurav Das60ca8d52018-04-23 18:42:12 -07002050 log.debug("target {} --> dst: {} has next-hops:{}", targetSw,
2051 dstSw, nextHops);
Saurav Das7bcbe702017-06-13 15:35:54 -07002052 return nextHops;
2053 }
2054 }
Saurav Das60ca8d52018-04-23 18:42:12 -07002055 log.debug("No next hops found for target:{} --> dst: {}", targetSw, dstSw);
Saurav Das7bcbe702017-06-13 15:35:54 -07002056 return ImmutableSet.of(); //no next-hops found
2057 }
2058
Saurav Das7bcbe702017-06-13 15:35:54 -07002059 //////////////////////////////////////
2060 // Filtering rule creation
2061 //////////////////////////////////////
2062
2063 /**
Saurav Das018605f2017-02-18 14:05:44 -08002064 * Populates filtering rules for port, and punting rules
2065 * for gateway IPs, loopback IPs and arp/ndp traffic.
2066 * Should only be called by the master instance for this device/port.
sanghob35a6192015-04-01 13:05:26 -07002067 *
2068 * @param deviceId Switch ID to set the rules
2069 */
Charles Chana8487b02018-04-18 18:41:05 -07002070 void populatePortAddressingRules(DeviceId deviceId) {
Saurav Das59232cf2016-04-27 18:35:50 -07002071 // Although device is added, sometimes device store does not have the
2072 // ports for this device yet. It results in missing filtering rules in the
2073 // switch. We will attempt it a few times. If it still does not work,
2074 // user can manually repopulate using CLI command sr-reroute-network
Charles Chanf6ec1532017-02-08 16:10:40 -08002075 PortFilterInfo firstRun = rulePopulator.populateVlanMacFilters(deviceId);
Saurav Dasd2fded02016-12-02 15:43:47 -08002076 if (firstRun == null) {
2077 firstRun = new PortFilterInfo(0, 0, 0);
Saurav Das59232cf2016-04-27 18:35:50 -07002078 }
Saurav Dasd2fded02016-12-02 15:43:47 -08002079 executorService.schedule(new RetryFilters(deviceId, firstRun),
2080 RETRY_INTERVAL_MS, TimeUnit.MILLISECONDS);
sanghob35a6192015-04-01 13:05:26 -07002081 }
2082
2083 /**
Saurav Dasd2fded02016-12-02 15:43:47 -08002084 * RetryFilters populates filtering objectives for a device and keeps retrying
2085 * till the number of ports filtered are constant for a predefined number
2086 * of attempts.
2087 */
2088 protected final class RetryFilters implements Runnable {
2089 int constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS;
2090 DeviceId devId;
2091 int counter;
2092 PortFilterInfo prevRun;
2093
2094 private RetryFilters(DeviceId deviceId, PortFilterInfo previousRun) {
Saurav Das59232cf2016-04-27 18:35:50 -07002095 devId = deviceId;
Saurav Dasd2fded02016-12-02 15:43:47 -08002096 prevRun = previousRun;
2097 counter = 0;
Saurav Das59232cf2016-04-27 18:35:50 -07002098 }
2099
2100 @Override
2101 public void run() {
Charles Chan7f9737b2017-06-22 14:27:17 -07002102 log.debug("RETRY FILTER ATTEMPT {} ** dev:{}", ++counter, devId);
Charles Chanf6ec1532017-02-08 16:10:40 -08002103 PortFilterInfo thisRun = rulePopulator.populateVlanMacFilters(devId);
Saurav Dasd2fded02016-12-02 15:43:47 -08002104 boolean sameResult = prevRun.equals(thisRun);
2105 log.debug("dev:{} prevRun:{} thisRun:{} sameResult:{}", devId, prevRun,
2106 thisRun, sameResult);
Ray Milkeyc6c9b172018-02-26 09:36:31 -08002107 if (thisRun == null || !sameResult || (--constantAttempts > 0)) {
Saurav Das018605f2017-02-18 14:05:44 -08002108 // exponentially increasing intervals for retries
2109 executorService.schedule(this,
2110 RETRY_INTERVAL_MS * (int) Math.pow(counter, RETRY_INTERVAL_SCALE),
2111 TimeUnit.MILLISECONDS);
Saurav Dasd2fded02016-12-02 15:43:47 -08002112 if (!sameResult) {
2113 constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS; //reset
2114 }
Saurav Das59232cf2016-04-27 18:35:50 -07002115 }
Saurav Dasd2fded02016-12-02 15:43:47 -08002116 prevRun = (thisRun == null) ? prevRun : thisRun;
Saurav Das59232cf2016-04-27 18:35:50 -07002117 }
Saurav Das59232cf2016-04-27 18:35:50 -07002118 }
pier8b4ba992019-04-24 16:12:47 +02002119
2120 // Check jobs completion. It returns false if one of the job fails
2121 // and cancel the remaining
2122 private boolean checkJobs(List<Future<Boolean>> futures) {
2123 boolean completed = true;
2124 for (Future<Boolean> future : futures) {
2125 try {
2126 if (completed) {
2127 if (!future.get()) {
2128 completed = false;
2129 }
2130 } else {
2131 future.cancel(true);
2132 }
2133 } catch (InterruptedException | ExecutionException e) {
2134 completed = false;
2135 }
2136 }
2137 return completed;
2138 }
sanghob35a6192015-04-01 13:05:26 -07002139}