blob: 7a65e95bea79902f28d4f6a4b5897b0a1fb8277e [file] [log] [blame]
sangho80f11cb2015-04-01 13:05:26 -07001/*
Brian O'Connor0947d7e2017-08-03 21:12:30 -07002 * Copyright 2015-present Open Networking Foundation
sangho80f11cb2015-04-01 13:05:26 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.segmentrouting;
17
Saurav Das62ae6792017-05-15 15:34:25 -070018import com.google.common.collect.ImmutableMap;
19import com.google.common.collect.ImmutableMap.Builder;
Charles Chanc22cef32016-04-29 14:38:22 -070020import com.google.common.collect.ImmutableSet;
Saurav Das1b391d52016-11-29 14:27:25 -080021import com.google.common.collect.Lists;
sanghofb7c7292015-04-13 15:15:58 -070022import com.google.common.collect.Maps;
23import com.google.common.collect.Sets;
Saurav Dasfbe74572017-08-03 18:30:35 -070024
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -070025import org.onlab.packet.EthType;
Charles Chan19b70032019-04-17 14:20:26 -070026import com.google.common.collect.Streams;
sangho9b169e32015-04-14 16:27:13 -070027import org.onlab.packet.Ip4Address;
Pier Ventreadb4ae62016-11-23 09:57:42 -080028import org.onlab.packet.Ip6Address;
sangho80f11cb2015-04-01 13:05:26 -070029import org.onlab.packet.IpPrefix;
Charles Chan910be6a2017-08-23 14:46:43 -070030import org.onlab.packet.MacAddress;
31import org.onlab.packet.VlanId;
piera9941192019-04-24 16:12:47 +020032import org.onlab.util.PredictableExecutor;
33import org.onlab.util.PredictableExecutor.PickyCallable;
Saurav Das261c3002017-06-13 15:35:54 -070034import org.onosproject.cluster.NodeId;
Saurav Das00e553b2018-04-21 17:19:48 -070035import org.onosproject.mastership.MastershipEvent;
Charles Chanc22cef32016-04-29 14:38:22 -070036import org.onosproject.net.ConnectPoint;
sangho80f11cb2015-04-01 13:05:26 -070037import org.onosproject.net.Device;
38import org.onosproject.net.DeviceId;
sanghofb7c7292015-04-13 15:15:58 -070039import org.onosproject.net.Link;
Charles Chan910be6a2017-08-23 14:46:43 -070040import org.onosproject.net.PortNumber;
Charles Chan12a8a842020-02-14 13:23:57 -080041import org.onosproject.net.flowobjective.Objective;
Charles Chan319d1a22015-11-03 10:42:14 -080042import org.onosproject.segmentrouting.config.DeviceConfigNotFoundException;
Saurav Das62ae6792017-05-15 15:34:25 -070043import org.onosproject.segmentrouting.grouphandler.DefaultGroupHandler;
Charles Chand66d6712018-03-29 16:03:41 -070044import org.onosproject.store.serializers.KryoNamespaces;
pierf331a492020-01-07 15:39:39 +010045import org.onosproject.store.service.ConsistentMultimap;
Charles Chand66d6712018-03-29 16:03:41 -070046import org.onosproject.store.service.Serializer;
sangho80f11cb2015-04-01 13:05:26 -070047import org.slf4j.Logger;
48import org.slf4j.LoggerFactory;
49
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -070050import java.time.Instant;
sangho80f11cb2015-04-01 13:05:26 -070051import java.util.ArrayList;
Charles Chand66d6712018-03-29 16:03:41 -070052import java.util.Collections;
sangho80f11cb2015-04-01 13:05:26 -070053import java.util.HashMap;
54import java.util.HashSet;
Saurav Das261c3002017-06-13 15:35:54 -070055import java.util.Iterator;
Charles Chand66d6712018-03-29 16:03:41 -070056import java.util.List;
Saurav Das261c3002017-06-13 15:35:54 -070057import java.util.Map;
pierf331a492020-01-07 15:39:39 +010058import java.util.Map.Entry;
Saurav Dasd1872b02016-12-02 15:43:47 -080059import java.util.Objects;
Charles Chan6dbcd252018-04-02 11:46:38 -070060import java.util.Optional;
sangho80f11cb2015-04-01 13:05:26 -070061import java.util.Set;
piera9941192019-04-24 16:12:47 +020062import java.util.concurrent.CompletableFuture;
63import java.util.concurrent.ExecutionException;
64import java.util.concurrent.ExecutorService;
65import java.util.concurrent.Future;
Saurav Das07c74602016-04-27 18:35:50 -070066import java.util.concurrent.ScheduledExecutorService;
67import java.util.concurrent.TimeUnit;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +090068import java.util.concurrent.locks.Lock;
69import java.util.concurrent.locks.ReentrantLock;
Charles Chan19b70032019-04-17 14:20:26 -070070import java.util.stream.Collectors;
Saurav Dasdc7f2752018-03-18 21:28:15 -070071import java.util.stream.Stream;
72
Pier Ventreadb4ae62016-11-23 09:57:42 -080073import static com.google.common.base.Preconditions.checkNotNull;
74import static java.util.concurrent.Executors.newScheduledThreadPool;
75import static org.onlab.util.Tools.groupedThreads;
sangho80f11cb2015-04-01 13:05:26 -070076
Charles Chanb7f75ac2016-01-11 18:28:54 -080077/**
78 * Default routing handler that is responsible for route computing and
79 * routing rule population.
80 */
sangho80f11cb2015-04-01 13:05:26 -070081public class DefaultRoutingHandler {
Saurav Dasf9332192017-02-18 14:05:44 -080082 private static final int MAX_CONSTANT_RETRY_ATTEMPTS = 5;
Ray Milkey092e9e22018-02-01 13:49:47 -080083 private static final long RETRY_INTERVAL_MS = 250L;
Saurav Dasf9332192017-02-18 14:05:44 -080084 private static final int RETRY_INTERVAL_SCALE = 1;
Saurav Dasfbe74572017-08-03 18:30:35 -070085 private static final long STABLITY_THRESHOLD = 10; //secs
Saurav Das00e553b2018-04-21 17:19:48 -070086 private static final long MASTER_CHANGE_DELAY = 1000; // ms
Saurav Das68e1b6a2018-06-11 17:02:31 -070087 private static final long PURGE_DELAY = 1000; // ms
Charles Chanc22cef32016-04-29 14:38:22 -070088 private static Logger log = LoggerFactory.getLogger(DefaultRoutingHandler.class);
sangho80f11cb2015-04-01 13:05:26 -070089
90 private SegmentRoutingManager srManager;
91 private RoutingRulePopulator rulePopulator;
Shashikanth VH0637b162015-12-11 01:32:44 +053092 private HashMap<DeviceId, EcmpShortestPathGraph> currentEcmpSpgMap;
93 private HashMap<DeviceId, EcmpShortestPathGraph> updatedEcmpSpgMap;
sangho9b169e32015-04-14 16:27:13 -070094 private DeviceConfiguration config;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +090095 private final Lock statusLock = new ReentrantLock();
96 private volatile Status populationStatus;
Yuta HIGUCHIebee2f12016-07-21 16:54:33 -070097 private ScheduledExecutorService executorService
Saurav Dasd1872b02016-12-02 15:43:47 -080098 = newScheduledThreadPool(1, groupedThreads("retryftr", "retry-%d", log));
Saurav Das49368392018-04-23 18:42:12 -070099 private ScheduledExecutorService executorServiceMstChg
100 = newScheduledThreadPool(1, groupedThreads("masterChg", "mstch-%d", log));
Saurav Das68e1b6a2018-06-11 17:02:31 -0700101 private ScheduledExecutorService executorServiceFRR
102 = newScheduledThreadPool(1, groupedThreads("fullRR", "fullRR-%d", log));
piera9941192019-04-24 16:12:47 +0200103 // Route populators - 0 will leverage available processors
104 private static final int DEFAULT_THREADS = 0;
105 private ExecutorService routePopulators;
Saurav Das49368392018-04-23 18:42:12 -0700106
Saurav Das00e553b2018-04-21 17:19:48 -0700107 private Instant lastRoutingChange = Instant.EPOCH;
Saurav Das68e1b6a2018-06-11 17:02:31 -0700108 private Instant lastFullReroute = Instant.EPOCH;
sangho80f11cb2015-04-01 13:05:26 -0700109
Saurav Das00e553b2018-04-21 17:19:48 -0700110 // Distributed store to keep track of ONOS instance that should program the
111 // device pair. There should be only one instance (the king) that programs the same pair.
Charles Chand66d6712018-03-29 16:03:41 -0700112 Map<Set<DeviceId>, NodeId> shouldProgram;
Charles Chanfbcb8812018-04-18 18:41:05 -0700113 Map<DeviceId, Boolean> shouldProgramCache;
Charles Chand66d6712018-03-29 16:03:41 -0700114
pierf331a492020-01-07 15:39:39 +0100115 // Distributed routes store to keep track of the routes already seen
116 // destination device is the key and target sw is the value
117 ConsistentMultimap<DeviceId, DeviceId> seenBeforeRoutes;
118
Saurav Das00e553b2018-04-21 17:19:48 -0700119 // Local store to keep track of all devices that this instance was responsible
120 // for programming in the last run. Helps to determine if mastership changed
121 // during a run - only relevant for programming as a result of topo change.
122 Set<DeviceId> lastProgrammed;
123
sangho80f11cb2015-04-01 13:05:26 -0700124 /**
125 * Represents the default routing population status.
126 */
127 public enum Status {
128 // population process is not started yet.
129 IDLE,
sangho80f11cb2015-04-01 13:05:26 -0700130 // population process started.
131 STARTED,
piera9941192019-04-24 16:12:47 +0200132 // population process was aborted due to errors, mostly for groups not found.
sangho80f11cb2015-04-01 13:05:26 -0700133 ABORTED,
sangho80f11cb2015-04-01 13:05:26 -0700134 // population process was finished successfully.
135 SUCCEEDED
136 }
137
138 /**
139 * Creates a DefaultRoutingHandler object.
140 *
141 * @param srManager SegmentRoutingManager object
142 */
Charles Chand66d6712018-03-29 16:03:41 -0700143 DefaultRoutingHandler(SegmentRoutingManager srManager) {
Charles Chanfbcb8812018-04-18 18:41:05 -0700144 this.shouldProgram = srManager.storageService.<Set<DeviceId>, NodeId>consistentMapBuilder()
145 .withName("sr-should-program")
146 .withSerializer(Serializer.using(KryoNamespaces.API))
147 .withRelaxedReadConsistency()
148 .build().asJavaMap();
pierf331a492020-01-07 15:39:39 +0100149 this.seenBeforeRoutes = srManager.storageService.<DeviceId, DeviceId>consistentMultimapBuilder()
150 .withName("programmed-routes")
151 .withSerializer(Serializer.using(KryoNamespaces.API))
152 .withRelaxedReadConsistency()
153 .build();
Charles Chanfbcb8812018-04-18 18:41:05 -0700154 this.shouldProgramCache = Maps.newConcurrentMap();
155 update(srManager);
piera9941192019-04-24 16:12:47 +0200156 this.routePopulators = new PredictableExecutor(DEFAULT_THREADS,
157 groupedThreads("onos/sr", "r-populator-%d", log));
Charles Chanfbcb8812018-04-18 18:41:05 -0700158 }
159
160 /**
161 * Updates a DefaultRoutingHandler object.
162 *
163 * @param srManager SegmentRoutingManager object
164 */
165 void update(SegmentRoutingManager srManager) {
sangho80f11cb2015-04-01 13:05:26 -0700166 this.srManager = srManager;
167 this.rulePopulator = checkNotNull(srManager.routingRulePopulator);
sangho9b169e32015-04-14 16:27:13 -0700168 this.config = checkNotNull(srManager.deviceConfiguration);
sangho80f11cb2015-04-01 13:05:26 -0700169 this.populationStatus = Status.IDLE;
sanghofb7c7292015-04-13 15:15:58 -0700170 this.currentEcmpSpgMap = Maps.newHashMap();
Saurav Das00e553b2018-04-21 17:19:48 -0700171 this.lastProgrammed = Sets.newConcurrentHashSet();
sangho80f11cb2015-04-01 13:05:26 -0700172 }
173
174 /**
Saurav Das62ae6792017-05-15 15:34:25 -0700175 * Returns an immutable copy of the current ECMP shortest-path graph as
176 * computed by this controller instance.
177 *
Saurav Das261c3002017-06-13 15:35:54 -0700178 * @return immutable copy of the current ECMP graph
Saurav Das62ae6792017-05-15 15:34:25 -0700179 */
180 public ImmutableMap<DeviceId, EcmpShortestPathGraph> getCurrentEmcpSpgMap() {
181 Builder<DeviceId, EcmpShortestPathGraph> builder = ImmutableMap.builder();
182 currentEcmpSpgMap.entrySet().forEach(entry -> {
183 if (entry.getValue() != null) {
184 builder.put(entry.getKey(), entry.getValue());
185 }
186 });
187 return builder.build();
188 }
189
Saurav Dasfbe74572017-08-03 18:30:35 -0700190 /**
191 * Acquires the lock used when making routing changes.
192 */
193 public void acquireRoutingLock() {
194 statusLock.lock();
195 }
196
197 /**
198 * Releases the lock used when making routing changes.
199 */
200 public void releaseRoutingLock() {
201 statusLock.unlock();
202 }
203
204 /**
205 * Determines if routing in the network has been stable in the last
Charles Chan12a8a842020-02-14 13:23:57 -0800206 * STABILITY_THRESHOLD seconds, by comparing the current time to the last
Saurav Dasfbe74572017-08-03 18:30:35 -0700207 * routing change timestamp.
208 *
209 * @return true if stable
210 */
211 public boolean isRoutingStable() {
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700212 long last = (long) (lastRoutingChange.toEpochMilli() / 1000.0);
213 long now = (long) (Instant.now().toEpochMilli() / 1000.0);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700214 log.trace("Routing stable since {}s", now - last);
Saurav Dasfbe74572017-08-03 18:30:35 -0700215 return (now - last) > STABLITY_THRESHOLD;
216 }
217
Saurav Das49368392018-04-23 18:42:12 -0700218 /**
219 * Gracefully shuts down the defaultRoutingHandler. Typically called when
220 * the app is deactivated
221 */
222 public void shutdown() {
223 executorService.shutdown();
224 executorServiceMstChg.shutdown();
Saurav Das68e1b6a2018-06-11 17:02:31 -0700225 executorServiceFRR.shutdown();
piera9941192019-04-24 16:12:47 +0200226 routePopulators.shutdown();
Saurav Das49368392018-04-23 18:42:12 -0700227 }
Saurav Dasfbe74572017-08-03 18:30:35 -0700228
Saurav Das261c3002017-06-13 15:35:54 -0700229 //////////////////////////////////////
230 // Route path handling
231 //////////////////////////////////////
232
Saurav Dase6c448a2018-01-18 12:07:33 -0800233 /* The following three methods represent the three major ways in which
234 * route-path handling is triggered in the network
Saurav Das261c3002017-06-13 15:35:54 -0700235 * a) due to configuration change
236 * b) due to route-added event
237 * c) due to change in the topology
238 */
239
Saurav Das62ae6792017-05-15 15:34:25 -0700240 /**
Saurav Das261c3002017-06-13 15:35:54 -0700241 * Populates all routing rules to all switches. Typically triggered at
242 * startup or after a configuration event.
sangho80f11cb2015-04-01 13:05:26 -0700243 */
Saurav Das62ae6792017-05-15 15:34:25 -0700244 public void populateAllRoutingRules() {
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700245 lastRoutingChange = Instant.now();
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900246 statusLock.lock();
247 try {
Saurav Das261c3002017-06-13 15:35:54 -0700248 if (populationStatus == Status.STARTED) {
249 log.warn("Previous rule population is not finished. Cannot"
250 + " proceed with populateAllRoutingRules");
251 return;
252 }
253
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900254 populationStatus = Status.STARTED;
255 rulePopulator.resetCounter();
Saurav Das261c3002017-06-13 15:35:54 -0700256 log.info("Starting to populate all routing rules");
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900257 log.debug("populateAllRoutingRules: populationStatus is STARTED");
sangho80f11cb2015-04-01 13:05:26 -0700258
Saurav Das261c3002017-06-13 15:35:54 -0700259 // take a snapshot of the topology
260 updatedEcmpSpgMap = new HashMap<>();
261 Set<EdgePair> edgePairs = new HashSet<>();
262 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
Jonathan Hart61e24e12017-11-30 18:23:42 -0800263 for (DeviceId dstSw : srManager.deviceConfiguration.getRouters()) {
Saurav Das261c3002017-06-13 15:35:54 -0700264 EcmpShortestPathGraph ecmpSpgUpdated =
Jonathan Hart61e24e12017-11-30 18:23:42 -0800265 new EcmpShortestPathGraph(dstSw, srManager);
266 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
Charles Chan6dbcd252018-04-02 11:46:38 -0700267 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
268 if (pairDev.isPresent()) {
Saurav Das261c3002017-06-13 15:35:54 -0700269 // pairDev may not be available yet, but we still need to add
Charles Chan6dbcd252018-04-02 11:46:38 -0700270 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
271 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
272 edgePairs.add(new EdgePair(dstSw, pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700273 }
Charles Chand66d6712018-03-29 16:03:41 -0700274
275 if (!shouldProgram(dstSw)) {
Saurav Das00e553b2018-04-21 17:19:48 -0700276 lastProgrammed.remove(dstSw);
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900277 continue;
Saurav Das00e553b2018-04-21 17:19:48 -0700278 } else {
279 lastProgrammed.add(dstSw);
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900280 }
Saurav Das00e553b2018-04-21 17:19:48 -0700281 // To do a full reroute, assume all route-paths have changed
Charles Chand66d6712018-03-29 16:03:41 -0700282 for (DeviceId dev : deviceAndItsPair(dstSw)) {
Jonathan Hart61e24e12017-11-30 18:23:42 -0800283 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
284 if (targetSw.equals(dev)) {
Saurav Das261c3002017-06-13 15:35:54 -0700285 continue;
286 }
Jonathan Hart61e24e12017-11-30 18:23:42 -0800287 routeChanges.add(Lists.newArrayList(targetSw, dev));
Saurav Das261c3002017-06-13 15:35:54 -0700288 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900289 }
Saurav Das261c3002017-06-13 15:35:54 -0700290 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900291
pierf331a492020-01-07 15:39:39 +0100292 log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
Saurav Das261c3002017-06-13 15:35:54 -0700293 if (!redoRouting(routeChanges, edgePairs, null)) {
294 log.debug("populateAllRoutingRules: populationStatus is ABORTED");
295 populationStatus = Status.ABORTED;
296 log.warn("Failed to repopulate all routing rules.");
297 return;
sangho80f11cb2015-04-01 13:05:26 -0700298 }
299
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900300 log.debug("populateAllRoutingRules: populationStatus is SUCCEEDED");
301 populationStatus = Status.SUCCEEDED;
Saurav Das261c3002017-06-13 15:35:54 -0700302 log.info("Completed all routing rule population. Total # of rules pushed : {}",
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900303 rulePopulator.getCounter());
Saurav Das62ae6792017-05-15 15:34:25 -0700304 return;
pierdebd15c2019-04-19 20:55:53 +0200305 } catch (Exception e) {
306 log.error("populateAllRoutingRules thrown an exception: {}",
307 e.getMessage(), e);
308 populationStatus = Status.ABORTED;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900309 } finally {
310 statusLock.unlock();
sangho80f11cb2015-04-01 13:05:26 -0700311 }
sangho80f11cb2015-04-01 13:05:26 -0700312 }
313
sanghofb7c7292015-04-13 15:15:58 -0700314 /**
Saurav Das261c3002017-06-13 15:35:54 -0700315 * Populate rules from all other edge devices to the connect-point(s)
316 * specified for the given subnets.
317 *
318 * @param cpts connect point(s) of the subnets being added
319 * @param subnets subnets being added
Charles Chan910be6a2017-08-23 14:46:43 -0700320 */
321 // XXX refactor
Saurav Das261c3002017-06-13 15:35:54 -0700322 protected void populateSubnet(Set<ConnectPoint> cpts, Set<IpPrefix> subnets) {
Charles Chan6db55b92017-09-11 15:21:57 -0700323 if (cpts == null || cpts.size() < 1 || cpts.size() > 2) {
324 log.warn("Skipping populateSubnet due to illegal size of connect points. {}", cpts);
325 return;
326 }
327
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700328 lastRoutingChange = Instant.now();
Saurav Das261c3002017-06-13 15:35:54 -0700329 statusLock.lock();
330 try {
331 if (populationStatus == Status.STARTED) {
332 log.warn("Previous rule population is not finished. Cannot"
333 + " proceed with routing rules for added routes");
334 return;
335 }
336 populationStatus = Status.STARTED;
337 rulePopulator.resetCounter();
Charles Chan910be6a2017-08-23 14:46:43 -0700338 log.info("Starting to populate routing rules for added routes, subnets={}, cpts={}",
339 subnets, cpts);
Saurav Das6430f412018-01-25 09:49:01 -0800340 // In principle an update to a subnet/prefix should not require a
341 // new ECMPspg calculation as it is not a topology event. As a
342 // result, we use the current/existing ECMPspg in the updated map
343 // used by the redoRouting method.
Saurav Das6de6ffd2018-02-09 09:15:03 -0800344 if (updatedEcmpSpgMap == null) {
345 updatedEcmpSpgMap = new HashMap<>();
346 }
Saurav Das6430f412018-01-25 09:49:01 -0800347 currentEcmpSpgMap.entrySet().forEach(entry -> {
348 updatedEcmpSpgMap.put(entry.getKey(), entry.getValue());
Saurav Dase321cff2018-02-09 17:26:45 -0800349 if (log.isTraceEnabled()) {
350 log.trace("Root switch: {}", entry.getKey());
351 log.trace(" Current/Existing SPG: {}", entry.getValue());
Saurav Das6430f412018-01-25 09:49:01 -0800352 }
353 });
pierf331a492020-01-07 15:39:39 +0100354 log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
Saurav Das261c3002017-06-13 15:35:54 -0700355 Set<EdgePair> edgePairs = new HashSet<>();
356 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
357 boolean handleRouting = false;
358
359 if (cpts.size() == 2) {
360 // ensure connect points are edge-pairs
361 Iterator<ConnectPoint> iter = cpts.iterator();
362 DeviceId dev1 = iter.next().deviceId();
Charles Chan6dbcd252018-04-02 11:46:38 -0700363 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dev1);
364 if (pairDev.isPresent() && iter.next().deviceId().equals(pairDev.get())) {
365 edgePairs.add(new EdgePair(dev1, pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700366 } else {
367 log.warn("Connectpoints {} for subnets {} not on "
368 + "pair-devices.. aborting populateSubnet", cpts, subnets);
369 populationStatus = Status.ABORTED;
370 return;
371 }
372 for (ConnectPoint cp : cpts) {
Saurav Das6430f412018-01-25 09:49:01 -0800373 if (updatedEcmpSpgMap.get(cp.deviceId()) == null) {
374 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das261c3002017-06-13 15:35:54 -0700375 new EcmpShortestPathGraph(cp.deviceId(), srManager);
Saurav Das6430f412018-01-25 09:49:01 -0800376 updatedEcmpSpgMap.put(cp.deviceId(), ecmpSpgUpdated);
377 log.warn("populateSubnet: no updated graph for dev:{}"
378 + " ... creating", cp.deviceId());
379 }
Charles Chand66d6712018-03-29 16:03:41 -0700380 if (!shouldProgram(cp.deviceId())) {
Saurav Das261c3002017-06-13 15:35:54 -0700381 continue;
382 }
383 handleRouting = true;
384 }
385 } else {
386 // single connect point
387 DeviceId dstSw = cpts.iterator().next().deviceId();
Saurav Das6430f412018-01-25 09:49:01 -0800388 if (updatedEcmpSpgMap.get(dstSw) == null) {
389 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das261c3002017-06-13 15:35:54 -0700390 new EcmpShortestPathGraph(dstSw, srManager);
Saurav Das6430f412018-01-25 09:49:01 -0800391 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
392 log.warn("populateSubnet: no updated graph for dev:{}"
393 + " ... creating", dstSw);
394 }
Charles Chand66d6712018-03-29 16:03:41 -0700395 handleRouting = shouldProgram(dstSw);
Saurav Das261c3002017-06-13 15:35:54 -0700396 }
397
398 if (!handleRouting) {
399 log.debug("This instance is not handling ecmp routing to the "
400 + "connectPoint(s) {}", cpts);
401 populationStatus = Status.ABORTED;
402 return;
403 }
404
405 // if it gets here, this instance should handle routing for the
406 // connectpoint(s). Assume all route-paths have to be updated to
407 // the connectpoint(s) with the following exceptions
408 // 1. if target is non-edge no need for routing rules
409 // 2. if target is one of the connectpoints
410 for (ConnectPoint cp : cpts) {
411 DeviceId dstSw = cp.deviceId();
412 for (Device targetSw : srManager.deviceService.getDevices()) {
413 boolean isEdge = false;
414 try {
415 isEdge = config.isEdgeDevice(targetSw.id());
416 } catch (DeviceConfigNotFoundException e) {
Charles Chaneaf3c9b2018-02-16 17:20:54 -0800417 log.warn(e.getMessage() + "aborting populateSubnet on targetSw {}", targetSw.id());
418 continue;
Saurav Das261c3002017-06-13 15:35:54 -0700419 }
Charles Chan6dbcd252018-04-02 11:46:38 -0700420 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
Saurav Das261c3002017-06-13 15:35:54 -0700421 if (dstSw.equals(targetSw.id()) || !isEdge ||
Charles Chan6dbcd252018-04-02 11:46:38 -0700422 (cpts.size() == 2 && pairDev.isPresent() && targetSw.id().equals(pairDev.get()))) {
Saurav Das261c3002017-06-13 15:35:54 -0700423 continue;
424 }
425 routeChanges.add(Lists.newArrayList(targetSw.id(), dstSw));
426 }
427 }
428
429 if (!redoRouting(routeChanges, edgePairs, subnets)) {
430 log.debug("populateSubnet: populationStatus is ABORTED");
431 populationStatus = Status.ABORTED;
432 log.warn("Failed to repopulate the rules for subnet.");
433 return;
434 }
435
436 log.debug("populateSubnet: populationStatus is SUCCEEDED");
437 populationStatus = Status.SUCCEEDED;
438 log.info("Completed subnet population. Total # of rules pushed : {}",
439 rulePopulator.getCounter());
440 return;
441
pierdebd15c2019-04-19 20:55:53 +0200442 } catch (Exception e) {
443 log.error("populateSubnet thrown an exception: {}",
444 e.getMessage(), e);
445 populationStatus = Status.ABORTED;
Saurav Das261c3002017-06-13 15:35:54 -0700446 } finally {
447 statusLock.unlock();
448 }
449 }
450
451 /**
Saurav Das62ae6792017-05-15 15:34:25 -0700452 * Populates the routing rules or makes hash group changes according to the
453 * route-path changes due to link failure, switch failure or link up. This
454 * method should only be called for one of these three possible event-types.
Saurav Dasdc7f2752018-03-18 21:28:15 -0700455 * Note that when a switch goes away, all of its links fail as well, but
456 * this is handled as a single switch removal event.
sanghofb7c7292015-04-13 15:15:58 -0700457 *
Saurav Dasdc7f2752018-03-18 21:28:15 -0700458 * @param linkDown the single failed link, or null for other conditions such
459 * as link-up or a removed switch
Saurav Das62ae6792017-05-15 15:34:25 -0700460 * @param linkUp the single link up, or null for other conditions such as
Saurav Dasdc7f2752018-03-18 21:28:15 -0700461 * link-down or a removed switch
462 * @param switchDown the removed switch, or null for other conditions such
463 * as link-down or link-up
464 * @param seenBefore true if this event is for a linkUp or linkDown for a
465 * seen link
466 */
467 // TODO This method should be refactored into three separated methods
Charles Chan9d2dd552018-06-19 20:56:33 -0700468 public void populateRoutingRulesForLinkStatusChange(Link linkDown, Link linkUp,
469 DeviceId switchDown, boolean seenBefore) {
Saurav Dasdc7f2752018-03-18 21:28:15 -0700470 if (Stream.of(linkDown, linkUp, switchDown).filter(Objects::nonNull)
471 .count() != 1) {
Saurav Das62ae6792017-05-15 15:34:25 -0700472 log.warn("Only one event can be handled for link status change .. aborting");
473 return;
474 }
Saurav Dasdc7f2752018-03-18 21:28:15 -0700475
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700476 lastRoutingChange = Instant.now();
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900477 statusLock.lock();
478 try {
sanghofb7c7292015-04-13 15:15:58 -0700479
480 if (populationStatus == Status.STARTED) {
Saurav Das261c3002017-06-13 15:35:54 -0700481 log.warn("Previous rule population is not finished. Cannot"
Saurav Das6430f412018-01-25 09:49:01 -0800482 + " proceeed with routingRules for Topology change");
Saurav Das62ae6792017-05-15 15:34:25 -0700483 return;
sanghofb7c7292015-04-13 15:15:58 -0700484 }
485
Saurav Das261c3002017-06-13 15:35:54 -0700486 // Take snapshots of the topology
sangho28d0b6d2015-05-07 13:30:57 -0700487 updatedEcmpSpgMap = new HashMap<>();
Saurav Das261c3002017-06-13 15:35:54 -0700488 Set<EdgePair> edgePairs = new HashSet<>();
sangho28d0b6d2015-05-07 13:30:57 -0700489 for (Device sw : srManager.deviceService.getDevices()) {
Shashikanth VH0637b162015-12-11 01:32:44 +0530490 EcmpShortestPathGraph ecmpSpgUpdated =
491 new EcmpShortestPathGraph(sw.id(), srManager);
sangho28d0b6d2015-05-07 13:30:57 -0700492 updatedEcmpSpgMap.put(sw.id(), ecmpSpgUpdated);
Charles Chan6dbcd252018-04-02 11:46:38 -0700493 Optional<DeviceId> pairDev = srManager.getPairDeviceId(sw.id());
494 if (pairDev.isPresent()) {
Saurav Das261c3002017-06-13 15:35:54 -0700495 // pairDev may not be available yet, but we still need to add
Charles Chan6dbcd252018-04-02 11:46:38 -0700496 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
497 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
498 edgePairs.add(new EdgePair(sw.id(), pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700499 }
sangho28d0b6d2015-05-07 13:30:57 -0700500 }
501
Saurav Das6430f412018-01-25 09:49:01 -0800502 log.info("Starting to populate routing rules from Topology change");
sanghodf0153f2015-05-05 14:13:34 -0700503
sanghofb7c7292015-04-13 15:15:58 -0700504 Set<ArrayList<DeviceId>> routeChanges;
Saurav Das62ae6792017-05-15 15:34:25 -0700505 log.debug("populateRoutingRulesForLinkStatusChange: "
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700506 + "populationStatus is STARTED");
pierf331a492020-01-07 15:39:39 +0100507 log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
sanghofb7c7292015-04-13 15:15:58 -0700508 populationStatus = Status.STARTED;
Saurav Das6430f412018-01-25 09:49:01 -0800509 rulePopulator.resetCounter(); //XXX maybe useful to have a rehash ctr
510 boolean hashGroupsChanged = false;
Saurav Das1b391d52016-11-29 14:27:25 -0800511 // try optimized re-routing
Saurav Das62ae6792017-05-15 15:34:25 -0700512 if (linkDown == null) {
513 // either a linkUp or a switchDown - compute all route changes by
514 // comparing all routes of existing ECMP SPG to new ECMP SPG
Saurav Dascea556f2018-03-05 14:37:16 -0800515 routeChanges = computeRouteChange(switchDown);
Saurav Das62ae6792017-05-15 15:34:25 -0700516
pier572d4a92019-04-25 18:51:51 +0200517 // deal with linkUp
518 if (linkUp != null) {
519 // deal with linkUp of a seen-before link
520 if (seenBefore) {
521 // link previously seen before
522 // do hash-bucket changes instead of a re-route
523 processHashGroupChangeForLinkUp(routeChanges);
524 // clear out routesChanges so a re-route is not attempted
525 routeChanges = ImmutableSet.of();
526 hashGroupsChanged = true;
527 } else {
528 // do hash-bucket changes first, method will return changed routes;
529 // for each route not changed it will perform a reroute
530 Set<ArrayList<DeviceId>> changedRoutes = processHashGroupChangeForLinkUp(routeChanges);
531 Set<ArrayList<DeviceId>> routeChangesTemp = getExpandedRoutes(routeChanges);
532 changedRoutes.forEach(routeChangesTemp::remove);
533 // if routesChanges is empty a re-route is not attempted
534 routeChanges = routeChangesTemp;
535 for (ArrayList<DeviceId> route : routeChanges) {
536 log.debug("remaining routes Target -> Root");
537 if (route.size() == 1) {
538 log.debug(" : all -> {}", route.get(0));
539 } else {
540 log.debug(" : {} -> {}", route.get(0), route.get(1));
541 }
542 }
543 // Mark hash groups as changed
544 if (!changedRoutes.isEmpty()) {
545 hashGroupsChanged = true;
546 }
547 }
548
Saurav Das62ae6792017-05-15 15:34:25 -0700549 }
550
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700551 //deal with switchDown
552 if (switchDown != null) {
pier572d4a92019-04-25 18:51:51 +0200553 processHashGroupChangeForFailure(routeChanges, switchDown);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700554 // clear out routesChanges so a re-route is not attempted
555 routeChanges = ImmutableSet.of();
Saurav Das6430f412018-01-25 09:49:01 -0800556 hashGroupsChanged = true;
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700557 }
sanghofb7c7292015-04-13 15:15:58 -0700558 } else {
Saurav Das62ae6792017-05-15 15:34:25 -0700559 // link has gone down
560 // Compare existing ECMP SPG only with the link that went down
561 routeChanges = computeDamagedRoutes(linkDown);
pier572d4a92019-04-25 18:51:51 +0200562 processHashGroupChangeForFailure(routeChanges, null);
Saurav Das68e1b6a2018-06-11 17:02:31 -0700563 // clear out routesChanges so a re-route is not attempted
564 routeChanges = ImmutableSet.of();
565 hashGroupsChanged = true;
Saurav Dasb149be12016-06-07 10:08:06 -0700566 }
567
sanghofb7c7292015-04-13 15:15:58 -0700568 if (routeChanges.isEmpty()) {
Saurav Das6430f412018-01-25 09:49:01 -0800569 if (hashGroupsChanged) {
570 log.info("Hash-groups changed for link status change");
571 } else {
572 log.info("No re-route or re-hash attempted for the link"
573 + " status change");
574 updatedEcmpSpgMap.keySet().forEach(devId -> {
575 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
576 log.debug("Updating ECMPspg for remaining dev:{}", devId);
577 });
578 }
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700579 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sanghofb7c7292015-04-13 15:15:58 -0700580 populationStatus = Status.SUCCEEDED;
Saurav Das62ae6792017-05-15 15:34:25 -0700581 return;
sanghofb7c7292015-04-13 15:15:58 -0700582 }
583
pier572d4a92019-04-25 18:51:51 +0200584 if (hashGroupsChanged) {
585 log.debug("Hash-groups changed for link status change");
586 }
587
Saurav Das62ae6792017-05-15 15:34:25 -0700588 // reroute of routeChanges
Saurav Das261c3002017-06-13 15:35:54 -0700589 if (redoRouting(routeChanges, edgePairs, null)) {
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700590 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sanghofb7c7292015-04-13 15:15:58 -0700591 populationStatus = Status.SUCCEEDED;
Saurav Das261c3002017-06-13 15:35:54 -0700592 log.info("Completed repopulation of rules for link-status change."
593 + " # of rules populated : {}", rulePopulator.getCounter());
Saurav Das62ae6792017-05-15 15:34:25 -0700594 return;
sanghofb7c7292015-04-13 15:15:58 -0700595 } else {
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700596 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is ABORTED");
sanghofb7c7292015-04-13 15:15:58 -0700597 populationStatus = Status.ABORTED;
Saurav Das261c3002017-06-13 15:35:54 -0700598 log.warn("Failed to repopulate the rules for link status change.");
Saurav Das62ae6792017-05-15 15:34:25 -0700599 return;
sanghofb7c7292015-04-13 15:15:58 -0700600 }
pierdebd15c2019-04-19 20:55:53 +0200601 } catch (Exception e) {
602 log.error("populateRoutingRulesForLinkStatusChange thrown an exception: {}",
603 e.getMessage(), e);
604 populationStatus = Status.ABORTED;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900605 } finally {
606 statusLock.unlock();
sanghofb7c7292015-04-13 15:15:58 -0700607 }
608 }
609
Saurav Das62ae6792017-05-15 15:34:25 -0700610 /**
Saurav Das261c3002017-06-13 15:35:54 -0700611 * Processes a set a route-path changes by reprogramming routing rules and
612 * creating new hash-groups or editing them if necessary. This method also
613 * determines the next-hops for the route-path from the src-switch (target)
614 * of the path towards the dst-switch of the path.
Saurav Das62ae6792017-05-15 15:34:25 -0700615 *
Saurav Das261c3002017-06-13 15:35:54 -0700616 * @param routeChanges a set of route-path changes, where each route-path is
617 * a list with its first element the src-switch (target)
618 * of the path, and the second element the dst-switch of
619 * the path.
620 * @param edgePairs a set of edge-switches that are paired by configuration
621 * @param subnets a set of prefixes that need to be populated in the routing
622 * table of the target switch in the route-path. Can be null,
623 * in which case all the prefixes belonging to the dst-switch
624 * will be populated in the target switch
625 * @return true if successful in repopulating all routes
Saurav Das62ae6792017-05-15 15:34:25 -0700626 */
Saurav Das261c3002017-06-13 15:35:54 -0700627 private boolean redoRouting(Set<ArrayList<DeviceId>> routeChanges,
628 Set<EdgePair> edgePairs, Set<IpPrefix> subnets) {
629 // first make every entry two-elements
pier572d4a92019-04-25 18:51:51 +0200630 Set<ArrayList<DeviceId>> changedRoutes = getExpandedRoutes(routeChanges);
631 // no valid routes - fail fast
632 if (changedRoutes.isEmpty()) {
633 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700634 }
635
pierf331a492020-01-07 15:39:39 +0100636 // Temporary stores the changed routes
637 Set<ArrayList<DeviceId>> tempRoutes = ImmutableSet.copyOf(changedRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700638 // now process changedRoutes according to edgePairs
639 if (!redoRoutingEdgePairs(edgePairs, subnets, changedRoutes)) {
640 return false; //abort routing and fail fast
641 }
pierf331a492020-01-07 15:39:39 +0100642 // Calculate the programmed routes pointing to the pairs
643 Set<ArrayList<DeviceId>> programmedPairRoutes = Sets.difference(tempRoutes, changedRoutes);
644 log.debug("Evaluating programmed pair routes");
645 storeSeenBeforeRoutes(programmedPairRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700646
pierf331a492020-01-07 15:39:39 +0100647 // Temporary stores the left routes
648 tempRoutes = ImmutableSet.copyOf(changedRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700649 // whatever is left in changedRoutes is now processed for individual dsts.
Saurav Das6430f412018-01-25 09:49:01 -0800650 Set<DeviceId> updatedDevices = Sets.newHashSet();
651 if (!redoRoutingIndividualDests(subnets, changedRoutes,
652 updatedDevices)) {
Saurav Das261c3002017-06-13 15:35:54 -0700653 return false; //abort routing and fail fast
654 }
pierf331a492020-01-07 15:39:39 +0100655 // Calculate the individual programmed routes
656 Set<ArrayList<DeviceId>> programmedIndividualRoutes = Sets.difference(tempRoutes, changedRoutes);
657 log.debug("Evaluating individual programmed routes");
658 storeSeenBeforeRoutes(programmedIndividualRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700659
Saurav Das261c3002017-06-13 15:35:54 -0700660 // update ecmpSPG for all edge-pairs
661 for (EdgePair ep : edgePairs) {
662 currentEcmpSpgMap.put(ep.dev1, updatedEcmpSpgMap.get(ep.dev1));
663 currentEcmpSpgMap.put(ep.dev2, updatedEcmpSpgMap.get(ep.dev2));
664 log.debug("Updating ECMPspg for edge-pair:{}-{}", ep.dev1, ep.dev2);
665 }
Saurav Das6430f412018-01-25 09:49:01 -0800666
667 // here is where we update all devices not touched by this instance
668 updatedEcmpSpgMap.keySet().stream()
669 .filter(devId -> !edgePairs.stream().anyMatch(ep -> ep.includes(devId)))
670 .filter(devId -> !updatedDevices.contains(devId))
671 .forEach(devId -> {
672 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
673 log.debug("Updating ECMPspg for remaining dev:{}", devId);
674 });
Saurav Das261c3002017-06-13 15:35:54 -0700675 return true;
676 }
677
678 /**
pierf331a492020-01-07 15:39:39 +0100679 * Stores the routes seen before. Routes are two-elements arrays.
680 * @param seenRoutes seen before routes
681 */
682 private void storeSeenBeforeRoutes(Set<ArrayList<DeviceId>> seenRoutes) {
683 Set<DeviceId> nextHops;
684 for (ArrayList<DeviceId> route : seenRoutes) {
685 log.debug("Route {} -> {} has been programmed", route.get(0), route.get(1));
686 nextHops = getNextHops(route.get(0), route.get(1));
687 // No valid next hops - cannot be considered a programmed route
688 if (nextHops.isEmpty()) {
689 log.debug("Could not find next hop from target:{} --> dst {} "
690 + "skipping this route", route.get(0), route.get(1));
691 continue;
692 }
693 // Already present - do not add again
694 if (seenBeforeRoutes.containsEntry(route.get(1), route.get(0))) {
695 log.debug("Route from target:{} --> dst {} " +
696 "already present, skipping this route", route.get(0), route.get(1));
697 continue;
698 }
699 seenBeforeRoutes.put(route.get(1), route.get(0));
700 }
701 }
702
703 /**
Saurav Das261c3002017-06-13 15:35:54 -0700704 * Programs targetSw in the changedRoutes for given prefixes reachable by
705 * an edgePair. If no prefixes are given, the method will use configured
706 * subnets/prefixes. If some configured subnets belong only to a specific
707 * destination in the edgePair, then the target switch will be programmed
708 * only to that destination.
709 *
710 * @param edgePairs set of edge-pairs for which target will be programmed
711 * @param subnets a set of prefixes that need to be populated in the routing
712 * table of the target switch in the changedRoutes. Can be null,
713 * in which case all the configured prefixes belonging to the
714 * paired switches will be populated in the target switch
715 * @param changedRoutes a set of route-path changes, where each route-path is
716 * a list with its first element the src-switch (target)
717 * of the path, and the second element the dst-switch of
718 * the path.
719 * @return true if successful
720 */
piera9941192019-04-24 16:12:47 +0200721 private boolean redoRoutingEdgePairs(Set<EdgePair> edgePairs, Set<IpPrefix> subnets,
722 Set<ArrayList<DeviceId>> changedRoutes) {
Saurav Das261c3002017-06-13 15:35:54 -0700723 for (EdgePair ep : edgePairs) {
724 // temp store for a target's changedRoutes to this edge-pair
725 Map<DeviceId, Set<ArrayList<DeviceId>>> targetRoutes = new HashMap<>();
726 Iterator<ArrayList<DeviceId>> i = changedRoutes.iterator();
727 while (i.hasNext()) {
728 ArrayList<DeviceId> route = i.next();
729 DeviceId dstSw = route.get(1);
730 if (ep.includes(dstSw)) {
731 // routeChange for edge pair found
732 // sort by target iff target is edge and remove from changedRoutes
733 DeviceId targetSw = route.get(0);
734 try {
735 if (!srManager.deviceConfiguration.isEdgeDevice(targetSw)) {
736 continue;
737 }
738 } catch (DeviceConfigNotFoundException e) {
739 log.warn(e.getMessage() + "aborting redoRouting");
740 return false;
741 }
742 // route is from another edge to this edge-pair
743 if (targetRoutes.containsKey(targetSw)) {
744 targetRoutes.get(targetSw).add(route);
745 } else {
746 Set<ArrayList<DeviceId>> temp = new HashSet<>();
747 temp.add(route);
748 targetRoutes.put(targetSw, temp);
749 }
750 i.remove();
751 }
752 }
753 // so now for this edgepair we have a per target set of routechanges
754 // process target->edgePair route
piera9941192019-04-24 16:12:47 +0200755 List<Future<Boolean>> futures = Lists.newArrayList();
pierf331a492020-01-07 15:39:39 +0100756 for (Entry<DeviceId, Set<ArrayList<DeviceId>>> entry :
Saurav Das261c3002017-06-13 15:35:54 -0700757 targetRoutes.entrySet()) {
758 log.debug("* redoRoutingDstPair Target:{} -> edge-pair {}",
759 entry.getKey(), ep);
piera9941192019-04-24 16:12:47 +0200760 futures.add(routePopulators.submit(new RedoRoutingEdgePair(entry.getKey(), entry.getValue(),
761 subnets, ep)));
762 }
763 if (!checkJobs(futures)) {
764 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700765 }
766 // if it gets here it has succeeded for all targets to this edge-pair
767 }
768 return true;
769 }
770
piera9941192019-04-24 16:12:47 +0200771 private final class RedoRoutingEdgePair implements PickyCallable<Boolean> {
772 private DeviceId targetSw;
773 private Set<ArrayList<DeviceId>> routes;
774 private Set<IpPrefix> subnets;
775 private EdgePair ep;
776
777 /**
778 * Builds a RedoRoutingEdgePair task which provides a result.
779 *
780 * @param targetSw the target switch
781 * @param routes the changed routes
782 * @param subnets the subnets
783 * @param ep the edge pair
784 */
785 RedoRoutingEdgePair(DeviceId targetSw, Set<ArrayList<DeviceId>> routes,
786 Set<IpPrefix> subnets, EdgePair ep) {
787 this.targetSw = targetSw;
788 this.routes = routes;
789 this.subnets = subnets;
790 this.ep = ep;
791 }
792
793 @Override
794 public Boolean call() throws Exception {
795 return redoRoutingEdgePair();
796 }
797
798 @Override
799 public int hint() {
800 return targetSw.hashCode();
801 }
802
803 private boolean redoRoutingEdgePair() {
804 Map<DeviceId, Set<DeviceId>> perDstNextHops = new HashMap<>();
805 routes.forEach(route -> {
806 Set<DeviceId> nhops = getNextHops(route.get(0), route.get(1));
807 log.debug("route: target {} -> dst {} found with next-hops {}",
808 route.get(0), route.get(1), nhops);
809 perDstNextHops.put(route.get(1), nhops);
810 });
811
812 List<Set<IpPrefix>> batchedSubnetDev1, batchedSubnetDev2;
813 if (subnets != null) {
814 batchedSubnetDev1 = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
815 batchedSubnetDev2 = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
816 } else {
817 batchedSubnetDev1 = config.getBatchedSubnets(ep.dev1);
818 batchedSubnetDev2 = config.getBatchedSubnets(ep.dev2);
819 }
820 List<Set<IpPrefix>> batchedSubnetBoth = Streams
821 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.intersection(a, b))
822 .filter(set -> !set.isEmpty())
823 .collect(Collectors.toList());
824 List<Set<IpPrefix>> batchedSubnetDev1Only = Streams
825 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.difference(a, b))
826 .filter(set -> !set.isEmpty())
827 .collect(Collectors.toList());
828 List<Set<IpPrefix>> batchedSubnetDev2Only = Streams
829 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.difference(b, a))
830 .filter(set -> !set.isEmpty())
831 .collect(Collectors.toList());
832
833 Set<DeviceId> nhDev1 = perDstNextHops.get(ep.dev1);
834 Set<DeviceId> nhDev2 = perDstNextHops.get(ep.dev2);
835
836 // handle routing to subnets common to edge-pair
837 // only if the targetSw is not part of the edge-pair and there
838 // exists a next hop to at least one of the devices in the edge-pair
839 if (!ep.includes(targetSw)
840 && ((nhDev1 != null && !nhDev1.isEmpty()) || (nhDev2 != null && !nhDev2.isEmpty()))) {
841 log.trace("getSubnets on both {} and {}: {}", ep.dev1, ep.dev2, batchedSubnetBoth);
842 for (Set<IpPrefix> prefixes : batchedSubnetBoth) {
843 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev1, ep.dev2,
844 perDstNextHops, prefixes)) {
845 return false; // abort everything and fail fast
846 }
847 }
848
849 }
850 // handle routing to subnets that only belong to dev1 only if
851 // a next-hop exists from the target to dev1
852 if (!batchedSubnetDev1Only.isEmpty() &&
853 batchedSubnetDev1Only.stream().anyMatch(subnet -> !subnet.isEmpty()) &&
854 nhDev1 != null && !nhDev1.isEmpty()) {
855 Map<DeviceId, Set<DeviceId>> onlyDev1NextHops = new HashMap<>();
856 onlyDev1NextHops.put(ep.dev1, nhDev1);
857 log.trace("getSubnets on {} only: {}", ep.dev1, batchedSubnetDev1Only);
858 for (Set<IpPrefix> prefixes : batchedSubnetDev1Only) {
859 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev1, null,
860 onlyDev1NextHops, prefixes)) {
861 return false; // abort everything and fail fast
862 }
863 }
864 }
865 // handle routing to subnets that only belong to dev2 only if
866 // a next-hop exists from the target to dev2
867 if (!batchedSubnetDev2Only.isEmpty() &&
868 batchedSubnetDev2Only.stream().anyMatch(subnet -> !subnet.isEmpty()) &&
869 nhDev2 != null && !nhDev2.isEmpty()) {
870 Map<DeviceId, Set<DeviceId>> onlyDev2NextHops = new HashMap<>();
871 onlyDev2NextHops.put(ep.dev2, nhDev2);
872 log.trace("getSubnets on {} only: {}", ep.dev2, batchedSubnetDev2Only);
873 for (Set<IpPrefix> prefixes : batchedSubnetDev2Only) {
874 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev2, null,
875 onlyDev2NextHops, prefixes)) {
876 return false; // abort everything and fail fast
877 }
878 }
879 }
880 return true;
881 }
882 }
883
Saurav Das261c3002017-06-13 15:35:54 -0700884 /**
885 * Programs targetSw in the changedRoutes for given prefixes reachable by
886 * a destination switch that is not part of an edge-pair.
887 * If no prefixes are given, the method will use configured subnets/prefixes.
888 *
889 * @param subnets a set of prefixes that need to be populated in the routing
890 * table of the target switch in the changedRoutes. Can be null,
891 * in which case all the configured prefixes belonging to the
892 * paired switches will be populated in the target switch
893 * @param changedRoutes a set of route-path changes, where each route-path is
894 * a list with its first element the src-switch (target)
895 * of the path, and the second element the dst-switch of
896 * the path.
897 * @return true if successful
898 */
piera9941192019-04-24 16:12:47 +0200899 private boolean redoRoutingIndividualDests(Set<IpPrefix> subnets, Set<ArrayList<DeviceId>> changedRoutes,
Saurav Das6430f412018-01-25 09:49:01 -0800900 Set<DeviceId> updatedDevices) {
Saurav Das261c3002017-06-13 15:35:54 -0700901 // aggregate route-path changes for each dst device
902 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> routesBydevice =
903 new HashMap<>();
904 for (ArrayList<DeviceId> route: changedRoutes) {
905 DeviceId dstSw = route.get(1);
906 ArrayList<ArrayList<DeviceId>> deviceRoutes =
907 routesBydevice.get(dstSw);
908 if (deviceRoutes == null) {
909 deviceRoutes = new ArrayList<>();
910 routesBydevice.put(dstSw, deviceRoutes);
911 }
912 deviceRoutes.add(route);
913 }
piera9941192019-04-24 16:12:47 +0200914 // iterate over the impacted devices
Saurav Das261c3002017-06-13 15:35:54 -0700915 for (DeviceId impactedDstDevice : routesBydevice.keySet()) {
916 ArrayList<ArrayList<DeviceId>> deviceRoutes =
917 routesBydevice.get(impactedDstDevice);
piera9941192019-04-24 16:12:47 +0200918 List<Future<Boolean>> futures = Lists.newArrayList();
Saurav Das261c3002017-06-13 15:35:54 -0700919 for (ArrayList<DeviceId> route: deviceRoutes) {
920 log.debug("* redoRoutingIndiDst Target: {} -> dst: {}",
921 route.get(0), route.get(1));
piera9941192019-04-24 16:12:47 +0200922 futures.add(routePopulators.submit(new RedoRoutingIndividualDest(subnets, route)));
pierf331a492020-01-07 15:39:39 +0100923 changedRoutes.remove(route);
piera9941192019-04-24 16:12:47 +0200924 }
925 // check the execution of each job
926 if (!checkJobs(futures)) {
927 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700928 }
929 //Only if all the flows for all impacted routes to a
930 //specific target are pushed successfully, update the
931 //ECMP graph for that target. Or else the next event
932 //would not see any changes in the ECMP graphs.
933 //In another case, the target switch has gone away, so
934 //routes can't be installed. In that case, the current map
935 //is updated here, without any flows being pushed.
936 currentEcmpSpgMap.put(impactedDstDevice,
937 updatedEcmpSpgMap.get(impactedDstDevice));
Saurav Das6430f412018-01-25 09:49:01 -0800938 updatedDevices.add(impactedDstDevice);
Saurav Das261c3002017-06-13 15:35:54 -0700939 log.debug("Updating ECMPspg for impacted dev:{}", impactedDstDevice);
940 }
941 return true;
942 }
943
piera9941192019-04-24 16:12:47 +0200944 private final class RedoRoutingIndividualDest implements PickyCallable<Boolean> {
945 private DeviceId targetSw;
946 private ArrayList<DeviceId> route;
947 private Set<IpPrefix> subnets;
948
949 /**
950 * Builds a RedoRoutingIndividualDest task, which provides a result.
951 *
952 * @param subnets a set of prefixes
953 * @param route a route-path change
954 */
955 RedoRoutingIndividualDest(Set<IpPrefix> subnets, ArrayList<DeviceId> route) {
956 this.targetSw = route.get(0);
957 this.route = route;
958 this.subnets = subnets;
959 }
960
961 @Override
962 public Boolean call() throws Exception {
963 DeviceId dstSw = route.get(1); // same as impactedDstDevice
964 Set<DeviceId> nextHops = getNextHops(targetSw, dstSw);
965 if (nextHops.isEmpty()) {
966 log.debug("Could not find next hop from target:{} --> dst {} "
967 + "skipping this route", targetSw, dstSw);
968 return true;
969 }
970 Map<DeviceId, Set<DeviceId>> nhops = new HashMap<>();
971 nhops.put(dstSw, nextHops);
972 if (!populateEcmpRoutingRulePartial(targetSw, dstSw, null, nhops,
973 (subnets == null) ? Sets.newHashSet() : subnets)) {
974 return false; // abort routing and fail fast
975 }
976 log.debug("Populating flow rules from target: {} to dst: {}"
977 + " is successful", targetSw, dstSw);
978 return true;
979 }
980
981 @Override
982 public int hint() {
983 return targetSw.hashCode();
984 }
985 }
986
Saurav Das261c3002017-06-13 15:35:54 -0700987 /**
988 * Populate ECMP rules for subnets from target to destination via nexthops.
989 *
990 * @param targetSw Device ID of target switch in which rules will be programmed
991 * @param destSw1 Device ID of final destination switch to which the rules will forward
992 * @param destSw2 Device ID of paired destination switch to which the rules will forward
993 * A null deviceId indicates packets should only be sent to destSw1
Saurav Das97241862018-02-14 14:14:54 -0800994 * @param nextHops Map of a set of next hops per destSw
Saurav Das261c3002017-06-13 15:35:54 -0700995 * @param subnets Subnets to be populated. If empty, populate all configured subnets.
996 * @return true if it succeeds in populating rules
997 */ // refactor
piera9941192019-04-24 16:12:47 +0200998 private boolean populateEcmpRoutingRulePartial(DeviceId targetSw, DeviceId destSw1, DeviceId destSw2,
999 Map<DeviceId, Set<DeviceId>> nextHops, Set<IpPrefix> subnets) {
Saurav Das261c3002017-06-13 15:35:54 -07001000 boolean result;
1001 // If both target switch and dest switch are edge routers, then set IP
1002 // rule for both subnet and router IP.
1003 boolean targetIsEdge;
1004 boolean dest1IsEdge;
1005 Ip4Address dest1RouterIpv4, dest2RouterIpv4 = null;
1006 Ip6Address dest1RouterIpv6, dest2RouterIpv6 = null;
1007
1008 try {
1009 targetIsEdge = config.isEdgeDevice(targetSw);
1010 dest1IsEdge = config.isEdgeDevice(destSw1);
1011 dest1RouterIpv4 = config.getRouterIpv4(destSw1);
1012 dest1RouterIpv6 = config.getRouterIpv6(destSw1);
1013 if (destSw2 != null) {
1014 dest2RouterIpv4 = config.getRouterIpv4(destSw2);
1015 dest2RouterIpv6 = config.getRouterIpv6(destSw2);
1016 }
1017 } catch (DeviceConfigNotFoundException e) {
1018 log.warn(e.getMessage() + " Aborting populateEcmpRoutingRulePartial.");
Saurav Das62ae6792017-05-15 15:34:25 -07001019 return false;
1020 }
Saurav Das261c3002017-06-13 15:35:54 -07001021
1022 if (targetIsEdge && dest1IsEdge) {
Charles Chan19b70032019-04-17 14:20:26 -07001023 List<Set<IpPrefix>> batchedSubnets;
1024 if (subnets != null && !subnets.isEmpty()) {
1025 batchedSubnets = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
1026 } else {
1027 batchedSubnets = config.getBatchedSubnets(destSw1);
1028 }
Saurav Das97241862018-02-14 14:14:54 -08001029 // XXX - Rethink this - ignoring routerIPs in all other switches
1030 // even edge to edge switches
Saurav Das261c3002017-06-13 15:35:54 -07001031 /*subnets.add(dest1RouterIpv4.toIpPrefix());
1032 if (dest1RouterIpv6 != null) {
1033 subnets.add(dest1RouterIpv6.toIpPrefix());
1034 }
1035 if (destSw2 != null && dest2RouterIpv4 != null) {
1036 subnets.add(dest2RouterIpv4.toIpPrefix());
1037 if (dest2RouterIpv6 != null) {
1038 subnets.add(dest2RouterIpv6.toIpPrefix());
1039 }
1040 }*/
Charles Chan19b70032019-04-17 14:20:26 -07001041 log.trace("getSubnets on {}: {}", destSw1, batchedSubnets);
1042 for (Set<IpPrefix> prefixes : batchedSubnets) {
1043 log.debug(". populateEcmpRoutingRulePartial in device {} towards {} {} "
1044 + "for subnets {}", targetSw, destSw1,
1045 (destSw2 != null) ? ("& " + destSw2) : "",
1046 prefixes);
1047 if (!rulePopulator.populateIpRuleForSubnet(targetSw, prefixes, destSw1, destSw2, nextHops)) {
1048 return false;
1049 }
Saurav Das261c3002017-06-13 15:35:54 -07001050 }
Saurav Das62ae6792017-05-15 15:34:25 -07001051 }
Saurav Das261c3002017-06-13 15:35:54 -07001052
1053 if (!targetIsEdge && dest1IsEdge) {
1054 // MPLS rules in all non-edge target devices. These rules are for
1055 // individual destinations, even if the dsts are part of edge-pairs.
1056 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
1057 + "all MPLS rules", targetSw, destSw1);
piera9941192019-04-24 16:12:47 +02001058 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1), dest1RouterIpv4);
Saurav Das261c3002017-06-13 15:35:54 -07001059 if (!result) {
1060 return false;
1061 }
1062 if (dest1RouterIpv6 != null) {
Saurav Das97241862018-02-14 14:14:54 -08001063 int v4sid = 0, v6sid = 0;
1064 try {
1065 v4sid = config.getIPv4SegmentId(destSw1);
1066 v6sid = config.getIPv6SegmentId(destSw1);
1067 } catch (DeviceConfigNotFoundException e) {
1068 log.warn(e.getMessage());
1069 }
1070 if (v4sid != v6sid) {
piera9941192019-04-24 16:12:47 +02001071 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1),
Saurav Das97241862018-02-14 14:14:54 -08001072 dest1RouterIpv6);
1073 if (!result) {
1074 return false;
1075 }
Saurav Das261c3002017-06-13 15:35:54 -07001076 }
1077 }
1078 }
1079
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -07001080 if (!targetIsEdge && !dest1IsEdge) {
1081 // MPLS rules for inter-connected spines
1082 // can be merged with above if, left it here for clarity
1083 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
1084 + "all MPLS rules", targetSw, destSw1);
1085
piera9941192019-04-24 16:12:47 +02001086 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1), dest1RouterIpv4);
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -07001087 if (!result) {
1088 return false;
1089 }
1090
1091 if (dest1RouterIpv6 != null) {
1092 int v4sid = 0, v6sid = 0;
1093 try {
1094 v4sid = config.getIPv4SegmentId(destSw1);
1095 v6sid = config.getIPv6SegmentId(destSw1);
1096 } catch (DeviceConfigNotFoundException e) {
1097 log.warn(e.getMessage());
1098 }
1099 if (v4sid != v6sid) {
piera9941192019-04-24 16:12:47 +02001100 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1),
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -07001101 dest1RouterIpv6);
1102 if (!result) {
1103 return false;
1104 }
1105 }
1106 }
1107 }
1108
Saurav Das261c3002017-06-13 15:35:54 -07001109 // To save on ECMP groups
1110 // avoid MPLS rules in non-edge-devices to non-edge-devices
1111 // avoid MPLS transit rules in edge-devices
1112 // avoid loopback IP rules in edge-devices to non-edge-devices
1113 return true;
Saurav Das62ae6792017-05-15 15:34:25 -07001114 }
1115
1116 /**
pier572d4a92019-04-25 18:51:51 +02001117 * Processes a set a route-path changes due to a switch/link failure by editing hash groups.
Saurav Das62ae6792017-05-15 15:34:25 -07001118 *
1119 * @param routeChanges a set of route-path changes, where each route-path is
1120 * a list with its first element the src-switch of the path
1121 * and the second element the dst-switch of the path.
Saurav Das62ae6792017-05-15 15:34:25 -07001122 * @param failedSwitch the switchId if the route changes are for a failed switch,
1123 * otherwise null
1124 */
pier572d4a92019-04-25 18:51:51 +02001125 private void processHashGroupChangeForFailure(Set<ArrayList<DeviceId>> routeChanges,
1126 DeviceId failedSwitch) {
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001127 // first, ensure each routeChanges entry has two elements
pier572d4a92019-04-25 18:51:51 +02001128 Set<ArrayList<DeviceId>> changedRoutes = getAllExpandedRoutes(routeChanges);
Saurav Das6430f412018-01-25 09:49:01 -08001129 boolean someFailed = false;
pier572d4a92019-04-25 18:51:51 +02001130 boolean success;
Saurav Das6430f412018-01-25 09:49:01 -08001131 Set<DeviceId> updatedDevices = Sets.newHashSet();
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001132 for (ArrayList<DeviceId> route : changedRoutes) {
1133 DeviceId targetSw = route.get(0);
1134 DeviceId dstSw = route.get(1);
pier572d4a92019-04-25 18:51:51 +02001135 success = fixHashGroupsForRoute(route, true);
1136 // it's possible that we cannot fix hash groups for a route
1137 // if the target switch has failed. Nevertheless the ecmp graph
1138 // for the impacted switch must still be updated.
1139 if (!success && failedSwitch != null && targetSw.equals(failedSwitch)) {
1140 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1141 currentEcmpSpgMap.remove(targetSw);
1142 log.debug("Updating ECMPspg for dst:{} removing failed switch "
1143 + "target:{}", dstSw, targetSw);
1144 updatedDevices.add(targetSw);
1145 updatedDevices.add(dstSw);
1146 continue;
pierf331a492020-01-07 15:39:39 +01001147
pier572d4a92019-04-25 18:51:51 +02001148 }
1149 //linkfailed - update both sides
1150 if (success) {
1151 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1152 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1153 log.debug("Updating ECMPspg for dst:{} and target:{} for linkdown"
1154 + " or switchdown", dstSw, targetSw);
1155 updatedDevices.add(targetSw);
1156 updatedDevices.add(dstSw);
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001157 } else {
pier572d4a92019-04-25 18:51:51 +02001158 someFailed = true;
Saurav Das62ae6792017-05-15 15:34:25 -07001159 }
1160 }
Saurav Das6430f412018-01-25 09:49:01 -08001161 if (!someFailed) {
1162 // here is where we update all devices not touched by this instance
1163 updatedEcmpSpgMap.keySet().stream()
1164 .filter(devId -> !updatedDevices.contains(devId))
1165 .forEach(devId -> {
1166 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1167 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1168 });
1169 }
Saurav Das62ae6792017-05-15 15:34:25 -07001170 }
1171
1172 /**
pier572d4a92019-04-25 18:51:51 +02001173 * Processes a set a route-path changes due to link up by editing hash groups.
1174 *
1175 * @param routeChanges a set of route-path changes, where each route-path is
1176 * a list with its first element the src-switch of the path
1177 * and the second element the dst-switch of the path.
1178 * @return set of changed routes
1179 */
1180 private Set<ArrayList<DeviceId>> processHashGroupChangeForLinkUp(Set<ArrayList<DeviceId>> routeChanges) {
1181 // Stores changed routes
1182 Set<ArrayList<DeviceId>> doneRoutes = new HashSet<>();
1183 // first, ensure each routeChanges entry has two elements
1184 Set<ArrayList<DeviceId>> changedRoutes = getAllExpandedRoutes(routeChanges);
1185 boolean someFailed = false;
1186 boolean success;
1187 Set<DeviceId> updatedDevices = Sets.newHashSet();
1188 for (ArrayList<DeviceId> route : changedRoutes) {
1189 DeviceId targetSw = route.get(0);
1190 DeviceId dstSw = route.get(1);
1191 // linkup - fix (if possible)
1192 success = fixHashGroupsForRoute(route, false);
1193 if (success) {
1194 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1195 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1196 log.debug("Updating ECMPspg for target:{} and dst:{} for linkup",
1197 targetSw, dstSw);
1198 updatedDevices.add(targetSw);
1199 updatedDevices.add(dstSw);
1200 doneRoutes.add(route);
1201 } else {
1202 someFailed = true;
1203 }
1204
1205 }
1206 if (!someFailed) {
1207 // here is where we update all devices not touched by this instance
1208 updatedEcmpSpgMap.keySet().stream()
1209 .filter(devId -> !updatedDevices.contains(devId))
1210 .forEach(devId -> {
1211 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1212 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1213 });
1214 }
1215 return doneRoutes;
1216 }
1217
1218 /**
Saurav Das62ae6792017-05-15 15:34:25 -07001219 * Edits hash groups in the src-switch (targetSw) of a route-path by
1220 * calling the groupHandler to either add or remove buckets in an existing
1221 * hash group.
1222 *
1223 * @param route a single list representing a route-path where the first element
1224 * is the src-switch (targetSw) of the route-path and the
1225 * second element is the dst-switch
1226 * @param revoke true if buckets in the hash-groups need to be removed;
1227 * false if buckets in the hash-groups need to be added
1228 * @return true if the hash group editing is successful
1229 */
1230 private boolean fixHashGroupsForRoute(ArrayList<DeviceId> route,
1231 boolean revoke) {
1232 DeviceId targetSw = route.get(0);
1233 if (route.size() < 2) {
1234 log.warn("Cannot fixHashGroupsForRoute - no dstSw in route {}", route);
1235 return false;
1236 }
1237 DeviceId destSw = route.get(1);
pierf331a492020-01-07 15:39:39 +01001238 if (!seenBeforeRoutes.containsEntry(destSw, targetSw)) {
1239 log.warn("Cannot fixHashGroupsForRoute {} -> {} has not been programmed before",
1240 targetSw, destSw);
1241 return false;
1242 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001243 log.debug("* processing fixHashGroupsForRoute: Target {} -> Dest {}",
Saurav Das62ae6792017-05-15 15:34:25 -07001244 targetSw, destSw);
Saurav Das62ae6792017-05-15 15:34:25 -07001245 // figure out the new next hops at the targetSw towards the destSw
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001246 Set<DeviceId> nextHops = getNextHops(targetSw, destSw);
Saurav Das62ae6792017-05-15 15:34:25 -07001247 // call group handler to change hash group at targetSw
1248 DefaultGroupHandler grpHandler = srManager.getGroupHandler(targetSw);
1249 if (grpHandler == null) {
1250 log.warn("Cannot find grouphandler for dev:{} .. aborting"
1251 + " {} hash group buckets for route:{} ", targetSw,
1252 (revoke) ? "revoke" : "repopulate", route);
1253 return false;
1254 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001255 log.debug("{} hash-groups buckets For Route {} -> {} to new next-hops {}",
Saurav Das62ae6792017-05-15 15:34:25 -07001256 (revoke) ? "revoke" : "repopulating",
1257 targetSw, destSw, nextHops);
1258 return (revoke) ? grpHandler.fixHashGroups(targetSw, nextHops,
1259 destSw, true)
1260 : grpHandler.fixHashGroups(targetSw, nextHops,
1261 destSw, false);
1262 }
1263
1264 /**
Saurav Das261c3002017-06-13 15:35:54 -07001265 * Start the flow rule population process if it was never started. The
1266 * process finishes successfully when all flow rules are set and stops with
1267 * ABORTED status when any groups required for flows is not set yet.
Saurav Das62ae6792017-05-15 15:34:25 -07001268 */
Saurav Das261c3002017-06-13 15:35:54 -07001269 public void startPopulationProcess() {
1270 statusLock.lock();
1271 try {
1272 if (populationStatus == Status.IDLE
1273 || populationStatus == Status.SUCCEEDED
1274 || populationStatus == Status.ABORTED) {
1275 populateAllRoutingRules();
sangho28d0b6d2015-05-07 13:30:57 -07001276 } else {
Saurav Das261c3002017-06-13 15:35:54 -07001277 log.warn("Not initiating startPopulationProcess as populationStatus is {}",
1278 populationStatus);
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001279 }
Saurav Das261c3002017-06-13 15:35:54 -07001280 } finally {
1281 statusLock.unlock();
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001282 }
sanghofb7c7292015-04-13 15:15:58 -07001283 }
1284
Saurav Dasb149be12016-06-07 10:08:06 -07001285 /**
Saurav Das261c3002017-06-13 15:35:54 -07001286 * Revoke rules of given subnet in all edge switches.
1287 *
1288 * @param subnets subnet being removed
1289 * @return true if succeed
1290 */
1291 protected boolean revokeSubnet(Set<IpPrefix> subnets) {
piera9941192019-04-24 16:12:47 +02001292 DeviceId targetSw;
1293 List<Future<Boolean>> futures = Lists.newArrayList();
1294 for (Device sw : srManager.deviceService.getAvailableDevices()) {
1295 targetSw = sw.id();
1296 if (shouldProgram(targetSw)) {
1297 futures.add(routePopulators.submit(new RevokeSubnet(targetSw, subnets)));
1298 } else {
1299 futures.add(CompletableFuture.completedFuture(true));
1300 }
1301 }
1302 // check the execution of each job
1303 return checkJobs(futures);
1304 }
1305
Shibu Vijayakumar5e26f8c2020-01-07 11:45:09 +00001306 /**
1307 * Revoke rules of given subnets in the given switches.
1308 *
1309 * @param targetSwitches switched from which subnets to be removed
1310 * @param subnets subnet bring removed
1311 * @return true if succeed
1312 */
1313 protected boolean revokeSubnet(Set<DeviceId> targetSwitches, Set<IpPrefix> subnets) {
1314 List<Future<Boolean>> futures = Lists.newArrayList();
1315 for (DeviceId targetSw : targetSwitches) {
1316 if (shouldProgram(targetSw)) {
1317 futures.add(routePopulators.submit(new RevokeSubnet(targetSw, subnets)));
1318 } else {
1319 futures.add(CompletableFuture.completedFuture(true));
1320 }
1321 }
1322 // check the execution of each job
1323 return checkJobs(futures);
1324 }
1325
piera9941192019-04-24 16:12:47 +02001326 private final class RevokeSubnet implements PickyCallable<Boolean> {
1327 private DeviceId targetSw;
1328 private Set<IpPrefix> subnets;
1329
1330 /**
1331 * Builds a RevokeSubnet task, which provides a result.
1332 *
1333 * @param subnets a set of prefixes
1334 * @param targetSw target switch
1335 */
1336 RevokeSubnet(DeviceId targetSw, Set<IpPrefix> subnets) {
1337 this.targetSw = targetSw;
1338 this.subnets = subnets;
1339 }
1340
1341 @Override
1342 public Boolean call() throws Exception {
1343 return srManager.routingRulePopulator.revokeIpRuleForSubnet(targetSw, subnets);
1344 }
1345
1346 @Override
1347 public int hint() {
1348 return targetSw.hashCode();
Saurav Das261c3002017-06-13 15:35:54 -07001349 }
1350 }
1351
1352 /**
Charles Chan910be6a2017-08-23 14:46:43 -07001353 * Populates IP rules for a route that has direct connection to the switch
1354 * if the current instance is the master of the switch.
1355 *
1356 * @param deviceId device ID of the device that next hop attaches to
1357 * @param prefix IP prefix of the route
1358 * @param hostMac MAC address of the next hop
1359 * @param hostVlanId Vlan ID of the nexthop
1360 * @param outPort port where the next hop attaches to
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001361 * @param directHost host is of type direct or indirect
Charles Chan12a8a842020-02-14 13:23:57 -08001362 * @return future that includes the flow objective if succeeded, null if otherwise
Charles Chan910be6a2017-08-23 14:46:43 -07001363 */
Charles Chan12a8a842020-02-14 13:23:57 -08001364 CompletableFuture<Objective> populateRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac,
1365 VlanId hostVlanId, PortNumber outPort, boolean directHost) {
Charles Chand66d6712018-03-29 16:03:41 -07001366 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001367 return srManager.routingRulePopulator.populateRoute(deviceId, prefix,
1368 hostMac, hostVlanId, outPort, directHost);
Charles Chan910be6a2017-08-23 14:46:43 -07001369 }
Charles Chan12a8a842020-02-14 13:23:57 -08001370 return CompletableFuture.completedFuture(null);
Charles Chan910be6a2017-08-23 14:46:43 -07001371 }
1372
1373 /**
1374 * Removes IP rules for a route when the next hop is gone.
1375 * if the current instance is the master of the switch.
1376 *
1377 * @param deviceId device ID of the device that next hop attaches to
1378 * @param prefix IP prefix of the route
1379 * @param hostMac MAC address of the next hop
1380 * @param hostVlanId Vlan ID of the nexthop
1381 * @param outPort port that next hop attaches to
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001382 * @param directHost host is of type direct or indirect
Charles Chan12a8a842020-02-14 13:23:57 -08001383 * @return future that carries the flow objective if succeeded, null if otherwise
Charles Chan910be6a2017-08-23 14:46:43 -07001384 */
Charles Chan12a8a842020-02-14 13:23:57 -08001385 CompletableFuture<Objective> revokeRoute(DeviceId deviceId, IpPrefix prefix,
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001386 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort, boolean directHost) {
Charles Chand66d6712018-03-29 16:03:41 -07001387 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001388 return srManager.routingRulePopulator.revokeRoute(deviceId, prefix, hostMac, hostVlanId,
1389 outPort, directHost);
Charles Chan910be6a2017-08-23 14:46:43 -07001390 }
Charles Chan12a8a842020-02-14 13:23:57 -08001391 return CompletableFuture.completedFuture(null);
Charles Chan910be6a2017-08-23 14:46:43 -07001392 }
1393
Charles Chan12a8a842020-02-14 13:23:57 -08001394 CompletableFuture<Objective> populateBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
Charles Chand66d6712018-03-29 16:03:41 -07001395 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001396 return srManager.routingRulePopulator.populateBridging(deviceId, port, mac, vlanId);
Charles Chand66d6712018-03-29 16:03:41 -07001397 }
Charles Chan12a8a842020-02-14 13:23:57 -08001398 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001399 }
1400
Charles Chan12a8a842020-02-14 13:23:57 -08001401 CompletableFuture<Objective> revokeBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
Charles Chand66d6712018-03-29 16:03:41 -07001402 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001403 return srManager.routingRulePopulator.revokeBridging(deviceId, port, mac, vlanId);
Charles Chand66d6712018-03-29 16:03:41 -07001404 }
Charles Chan12a8a842020-02-14 13:23:57 -08001405 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001406 }
1407
pierventrea3989be2021-01-08 16:43:17 +01001408 CompletableFuture<Objective> updateBridging(DeviceId deviceId, PortNumber portNum, MacAddress hostMac,
1409 VlanId vlanId, boolean popVlan, boolean install) {
Charles Chand66d6712018-03-29 16:03:41 -07001410 if (shouldProgram(deviceId)) {
pierventrea3989be2021-01-08 16:43:17 +01001411 return srManager.routingRulePopulator.updateBridging(deviceId, portNum, hostMac, vlanId, popVlan, install);
Charles Chand66d6712018-03-29 16:03:41 -07001412 }
pierventrea3989be2021-01-08 16:43:17 +01001413 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001414 }
1415
pierventrea3989be2021-01-08 16:43:17 +01001416 CompletableFuture<Objective> updateFwdObj(DeviceId deviceId, PortNumber portNumber, IpPrefix prefix,
1417 MacAddress hostMac, VlanId vlanId, boolean popVlan, boolean install) {
Charles Chand66d6712018-03-29 16:03:41 -07001418 if (shouldProgram(deviceId)) {
pierventrea3989be2021-01-08 16:43:17 +01001419 return srManager.routingRulePopulator.updateFwdObj(deviceId, portNumber, prefix, hostMac,
Charles Chand66d6712018-03-29 16:03:41 -07001420 vlanId, popVlan, install);
1421 }
pierventrea3989be2021-01-08 16:43:17 +01001422 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001423 }
1424
Charles Chan910be6a2017-08-23 14:46:43 -07001425 /**
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001426 * Populates IP rules for a route when the next hop is double-tagged.
1427 *
1428 * @param deviceId device ID that next hop attaches to
1429 * @param prefix IP prefix of the route
1430 * @param hostMac MAC address of the next hop
1431 * @param innerVlan Inner Vlan ID of the next hop
1432 * @param outerVlan Outer Vlan ID of the next hop
1433 * @param outerTpid Outer TPID of the next hop
1434 * @param outPort port that the next hop attaches to
1435 */
1436 void populateDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
1437 VlanId outerVlan, EthType outerTpid, PortNumber outPort) {
1438 if (srManager.mastershipService.isLocalMaster(deviceId)) {
Charles Chan61c086d2019-07-26 17:46:15 -07001439 srManager.routingRulePopulator.populateDoubleTaggedRoute(
1440 deviceId, prefix, hostMac, innerVlan, outerVlan, outerTpid, outPort);
1441 srManager.routingRulePopulator.processDoubleTaggedFilter(
1442 deviceId, outPort, outerVlan, innerVlan, true);
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001443 }
1444 }
1445
1446 /**
1447 * Revokes IP rules for a route when the next hop is double-tagged.
1448 *
1449 * @param deviceId device ID that next hop attaches to
1450 * @param prefix IP prefix of the route
1451 * @param hostMac MAC address of the next hop
1452 * @param innerVlan Inner Vlan ID of the next hop
1453 * @param outerVlan Outer Vlan ID of the next hop
1454 * @param outerTpid Outer TPID of the next hop
1455 * @param outPort port that the next hop attaches to
1456 */
1457 void revokeDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
1458 VlanId outerVlan, EthType outerTpid, PortNumber outPort) {
1459 // Revoke route either if this node have the mastership (when device is available) or
1460 // if this node is the leader (even when device is unavailable)
1461 if (!srManager.mastershipService.isLocalMaster(deviceId)) {
1462 if (srManager.deviceService.isAvailable(deviceId)) {
1463 // Master node will revoke specified rule.
1464 log.debug("This node is not a master for {}, stop revoking route.", deviceId);
1465 return;
1466 }
1467
1468 // isLocalMaster will return false when the device is unavailable.
1469 // Verify if this node is the leader in that case.
1470 NodeId leader = srManager.leadershipService.runForLeadership(
1471 deviceId.toString()).leaderNodeId();
1472 if (!srManager.clusterService.getLocalNode().id().equals(leader)) {
1473 // Leader node will revoke specified rule.
1474 log.debug("This node is not a master for {}, stop revoking route.", deviceId);
1475 return;
1476 }
1477 }
1478
Charles Chan61c086d2019-07-26 17:46:15 -07001479 srManager.routingRulePopulator.revokeDoubleTaggedRoute(deviceId, prefix, hostMac,
1480 innerVlan, outerVlan, outerTpid, outPort);
1481 srManager.routingRulePopulator.processDoubleTaggedFilter(deviceId, outPort, outerVlan, innerVlan, false);
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001482 }
1483
pierf331a492020-01-07 15:39:39 +01001484 /**
1485 * Purges seen before routes for a given device.
1486 * @param deviceId the device id
1487 */
1488 void purgeSeenBeforeRoutes(DeviceId deviceId) {
1489 log.debug("Purging seen before routes having as target {}", deviceId);
1490 Set<Entry<DeviceId, DeviceId>> routesToPurge = seenBeforeRoutes.stream()
1491 .filter(entry -> entry.getValue().equals(deviceId))
1492 .collect(Collectors.toSet());
1493 routesToPurge.forEach(entry -> seenBeforeRoutes.remove(entry.getKey(), entry.getValue()));
1494 }
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001495
1496 /**
Saurav Das261c3002017-06-13 15:35:54 -07001497 * Remove ECMP graph entry for the given device. Typically called when
1498 * device is no longer available.
1499 *
1500 * @param deviceId the device for which graphs need to be purged
1501 */
Charles Chanfbcb8812018-04-18 18:41:05 -07001502 void purgeEcmpGraph(DeviceId deviceId) {
Saurav Das6430f412018-01-25 09:49:01 -08001503 statusLock.lock();
1504 try {
Saurav Das6430f412018-01-25 09:49:01 -08001505 if (populationStatus == Status.STARTED) {
1506 log.warn("Previous rule population is not finished. Cannot"
1507 + " proceeed with purgeEcmpGraph for {}", deviceId);
1508 return;
1509 }
1510 log.debug("Updating ECMPspg for unavailable dev:{}", deviceId);
1511 currentEcmpSpgMap.remove(deviceId);
1512 if (updatedEcmpSpgMap != null) {
1513 updatedEcmpSpgMap.remove(deviceId);
1514 }
1515 } finally {
1516 statusLock.unlock();
Saurav Das261c3002017-06-13 15:35:54 -07001517 }
1518 }
1519
Saurav Das00e553b2018-04-21 17:19:48 -07001520 /**
1521 * Attempts a full reroute of route-paths if topology has changed relatively
1522 * close to a mastership change event. Does not do a reroute if mastership
1523 * change is due to reasons other than a ONOS cluster event - for example a
1524 * call to balance-masters, or a switch up/down event.
1525 *
1526 * @param devId the device identifier for which mastership has changed
1527 * @param me the mastership event
1528 */
1529 void checkFullRerouteForMasterChange(DeviceId devId, MastershipEvent me) {
1530 // give small delay to absorb mastership events that are caused by
1531 // device that has disconnected from cluster
Saurav Das49368392018-04-23 18:42:12 -07001532 executorServiceMstChg.schedule(new MasterChange(devId, me),
1533 MASTER_CHANGE_DELAY, TimeUnit.MILLISECONDS);
Saurav Das00e553b2018-04-21 17:19:48 -07001534 }
1535
1536 protected final class MasterChange implements Runnable {
1537 private DeviceId devId;
1538 private MastershipEvent me;
1539 private static final long CLUSTER_EVENT_THRESHOLD = 4500; // ms
1540 private static final long DEVICE_EVENT_THRESHOLD = 2000; // ms
Saurav Dasec683dc2018-04-27 18:42:30 -07001541 private static final long EDGE_PORT_EVENT_THRESHOLD = 10000; //ms
Saurav Das68e1b6a2018-06-11 17:02:31 -07001542 private static final long FULL_REROUTE_THRESHOLD = 10000; // ms
Saurav Das00e553b2018-04-21 17:19:48 -07001543
1544 MasterChange(DeviceId devId, MastershipEvent me) {
1545 this.devId = devId;
1546 this.me = me;
1547 }
1548
1549 @Override
1550 public void run() {
1551 long lce = srManager.clusterListener.timeSinceLastClusterEvent();
1552 boolean clusterEvent = lce < CLUSTER_EVENT_THRESHOLD;
1553
1554 // ignore event for lost switch if cluster event hasn't happened -
1555 // device down event will handle it
1556 if ((me.roleInfo().master() == null
1557 || !srManager.deviceService.isAvailable(devId))
1558 && !clusterEvent) {
1559 log.debug("Full reroute not required for lost device: {}/{} "
1560 + "clusterEvent/timeSince: {}/{}",
1561 devId, me.roleInfo(), clusterEvent, lce);
1562 return;
1563 }
1564
1565 long update = srManager.deviceService.getLastUpdatedInstant(devId);
1566 long lde = Instant.now().toEpochMilli() - update;
1567 boolean deviceEvent = lde < DEVICE_EVENT_THRESHOLD;
1568
1569 // ignore event for recently connected switch if cluster event hasn't
1570 // happened - link up events will handle it
1571 if (srManager.deviceService.isAvailable(devId) && deviceEvent
1572 && !clusterEvent) {
1573 log.debug("Full reroute not required for recently available"
1574 + " device: {}/{} deviceEvent/timeSince: {}/{} "
1575 + "clusterEvent/timeSince: {}/{}",
1576 devId, me.roleInfo(), deviceEvent, lde, clusterEvent, lce);
1577 return;
1578 }
1579
Saurav Dasec683dc2018-04-27 18:42:30 -07001580 long lepe = Instant.now().toEpochMilli()
1581 - srManager.lastEdgePortEvent.toEpochMilli();
1582 boolean edgePortEvent = lepe < EDGE_PORT_EVENT_THRESHOLD;
1583
Saurav Das00e553b2018-04-21 17:19:48 -07001584 // if it gets here, then mastership change is likely due to onos
1585 // instance failure, or network partition in onos cluster
1586 // normally a mastership change like this does not require re-programming
1587 // but if topology changes happen at the same time then we may miss events
1588 if (!isRoutingStable() && clusterEvent) {
Saurav Dasec683dc2018-04-27 18:42:30 -07001589 log.warn("Mastership changed for dev: {}/{} while programming route-paths "
Saurav Das00e553b2018-04-21 17:19:48 -07001590 + "due to clusterEvent {} ms ago .. attempting full reroute",
1591 devId, me.roleInfo(), lce);
1592 if (srManager.mastershipService.isLocalMaster(devId)) {
1593 // old master could have died when populating filters
1594 populatePortAddressingRules(devId);
1595 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001596 // old master could have died when creating groups
Saurav Das00e553b2018-04-21 17:19:48 -07001597 // XXX right now we have no fine-grained way to only make changes
Saurav Das68e1b6a2018-06-11 17:02:31 -07001598 // for the route paths affected by this device. Thus we do a
1599 // full reroute after purging all hash groups. We also try to do
1600 // it only once, irrespective of the number of devices
1601 // that changed mastership when their master instance died.
1602 long lfrr = Instant.now().toEpochMilli() - lastFullReroute.toEpochMilli();
1603 boolean doFullReroute = lfrr > FULL_REROUTE_THRESHOLD;
1604 if (doFullReroute) {
1605 lastFullReroute = Instant.now();
1606 for (Device dev : srManager.deviceService.getDevices()) {
1607 if (shouldProgram(dev.id())) {
1608 srManager.purgeHashedNextObjectiveStore(dev.id());
pierf331a492020-01-07 15:39:39 +01001609 seenBeforeRoutes.removeAll(dev.id());
Saurav Das68e1b6a2018-06-11 17:02:31 -07001610 }
1611 }
1612 // give small delay to ensure entire store is purged
1613 executorServiceFRR.schedule(new FullRerouteAfterPurge(),
1614 PURGE_DELAY,
1615 TimeUnit.MILLISECONDS);
1616 } else {
1617 log.warn("Full reroute attempted {} ms ago .. skipping", lfrr);
1618 }
Saurav Dasec683dc2018-04-27 18:42:30 -07001619
1620 } else if (edgePortEvent && clusterEvent) {
1621 log.warn("Mastership changed for dev: {}/{} due to clusterEvent {} ms ago "
1622 + "while edge-port event happened {} ms ago "
1623 + " .. reprogramming all edge-ports",
1624 devId, me.roleInfo(), lce, lepe);
1625 if (shouldProgram(devId)) {
1626 srManager.deviceService.getPorts(devId).stream()
1627 .filter(p -> srManager.interfaceService
1628 .isConfigured(new ConnectPoint(devId, p.number())))
1629 .forEach(p -> srManager.processPortUpdated(devId, p));
1630 }
1631
Saurav Das00e553b2018-04-21 17:19:48 -07001632 } else {
1633 log.debug("Stable route-paths .. full reroute not attempted for "
1634 + "mastership change {}/{} deviceEvent/timeSince: {}/{} "
1635 + "clusterEvent/timeSince: {}/{}", devId, me.roleInfo(),
1636 deviceEvent, lde, clusterEvent, lce);
1637 }
1638 }
1639 }
1640
Saurav Das68e1b6a2018-06-11 17:02:31 -07001641 /**
1642 * Performs a full reroute of routing rules in all the switches. Assumes
1643 * caller has purged hash groups from the nextObjective store, otherwise
1644 * re-uses ones available in the store.
1645 */
1646 protected final class FullRerouteAfterPurge implements Runnable {
1647 @Override
1648 public void run() {
1649 populateAllRoutingRules();
1650 }
1651 }
1652
1653
Saurav Das261c3002017-06-13 15:35:54 -07001654 //////////////////////////////////////
1655 // Routing helper methods and classes
1656 //////////////////////////////////////
1657
1658 /**
Saurav Das68e1b6a2018-06-11 17:02:31 -07001659 * Computes set of affected routes due to failed link. Assumes previous ecmp
1660 * shortest-path graph exists for a switch in order to compute affected
1661 * routes. If such a graph does not exist, the method returns null.
Saurav Dasb149be12016-06-07 10:08:06 -07001662 *
1663 * @param linkFail the failed link
1664 * @return the set of affected routes which may be empty if no routes were
Saurav Das68e1b6a2018-06-11 17:02:31 -07001665 * affected
Saurav Dasb149be12016-06-07 10:08:06 -07001666 */
sanghofb7c7292015-04-13 15:15:58 -07001667 private Set<ArrayList<DeviceId>> computeDamagedRoutes(Link linkFail) {
sanghofb7c7292015-04-13 15:15:58 -07001668 Set<ArrayList<DeviceId>> routes = new HashSet<>();
1669
1670 for (Device sw : srManager.deviceService.getDevices()) {
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001671 log.debug("Computing the impacted routes for device {} due to link fail",
1672 sw.id());
Charles Chand66d6712018-03-29 16:03:41 -07001673 if (!shouldProgram(sw.id())) {
Saurav Das00e553b2018-04-21 17:19:48 -07001674 lastProgrammed.remove(sw.id());
sanghofb7c7292015-04-13 15:15:58 -07001675 continue;
1676 }
Charles Chand66d6712018-03-29 16:03:41 -07001677 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
Saurav Das00e553b2018-04-21 17:19:48 -07001678 // check for mastership change since last run
1679 if (!lastProgrammed.contains(sw.id())) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001680 log.warn("New responsibility for this node to program dev:{}"
Saurav Das00e553b2018-04-21 17:19:48 -07001681 + " ... nuking current ECMPspg", sw.id());
1682 currentEcmpSpgMap.remove(sw.id());
1683 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001684 lastProgrammed.add(sw.id());
1685
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001686 EcmpShortestPathGraph ecmpSpg = currentEcmpSpgMap.get(rootSw);
1687 if (ecmpSpg == null) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001688 log.warn("No existing ECMP graph for switch {}. Assuming "
1689 + "all route-paths have changed towards it.", rootSw);
1690 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
1691 if (targetSw.equals(rootSw)) {
1692 continue;
1693 }
1694 routes.add(Lists.newArrayList(targetSw, rootSw));
1695 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1696 }
1697 continue;
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001698 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001699
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001700 if (log.isDebugEnabled()) {
1701 log.debug("Root switch: {}", rootSw);
1702 log.debug(" Current/Existing SPG: {}", ecmpSpg);
1703 log.debug(" New/Updated SPG: {}", updatedEcmpSpgMap.get(rootSw));
1704 }
1705 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>>
1706 switchVia = ecmpSpg.getAllLearnedSwitchesAndVia();
1707 // figure out if the broken link affected any route-paths in this graph
1708 for (Integer itrIdx : switchVia.keySet()) {
1709 log.trace("Current/Exiting SPG Iterindex# {}", itrIdx);
1710 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1711 switchVia.get(itrIdx);
1712 for (DeviceId targetSw : swViaMap.keySet()) {
1713 log.trace("TargetSwitch {} --> RootSwitch {}",
1714 targetSw, rootSw);
Saurav Dasb149be12016-06-07 10:08:06 -07001715 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1716 log.trace(" Via:");
Pier Ventreadb4ae62016-11-23 09:57:42 -08001717 via.forEach(e -> log.trace(" {}", e));
Saurav Dasb149be12016-06-07 10:08:06 -07001718 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001719 Set<ArrayList<DeviceId>> subLinks =
1720 computeLinks(targetSw, rootSw, swViaMap);
1721 for (ArrayList<DeviceId> alink: subLinks) {
1722 if ((alink.get(0).equals(linkFail.src().deviceId()) &&
1723 alink.get(1).equals(linkFail.dst().deviceId()))
1724 ||
1725 (alink.get(0).equals(linkFail.dst().deviceId()) &&
1726 alink.get(1).equals(linkFail.src().deviceId()))) {
1727 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1728 ArrayList<DeviceId> aRoute = new ArrayList<>();
1729 aRoute.add(targetSw); // switch with rules to populate
1730 aRoute.add(rootSw); // towards this destination
1731 routes.add(aRoute);
1732 break;
1733 }
sanghofb7c7292015-04-13 15:15:58 -07001734 }
1735 }
1736 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001737
sanghofb7c7292015-04-13 15:15:58 -07001738 }
sangho28d0b6d2015-05-07 13:30:57 -07001739
sanghofb7c7292015-04-13 15:15:58 -07001740 }
sanghofb7c7292015-04-13 15:15:58 -07001741 return routes;
1742 }
1743
Saurav Das1b391d52016-11-29 14:27:25 -08001744 /**
1745 * Computes set of affected routes due to new links or failed switches.
1746 *
Saurav Dasdc7f2752018-03-18 21:28:15 -07001747 * @param failedSwitch deviceId of failed switch if any
Saurav Das1b391d52016-11-29 14:27:25 -08001748 * @return the set of affected routes which may be empty if no routes were
1749 * affected
1750 */
Saurav Dascea556f2018-03-05 14:37:16 -08001751 private Set<ArrayList<DeviceId>> computeRouteChange(DeviceId failedSwitch) {
Saurav Das261c3002017-06-13 15:35:54 -07001752 ImmutableSet.Builder<ArrayList<DeviceId>> changedRtBldr =
Saurav Das1b391d52016-11-29 14:27:25 -08001753 ImmutableSet.builder();
sanghofb7c7292015-04-13 15:15:58 -07001754
1755 for (Device sw : srManager.deviceService.getDevices()) {
Saurav Das261c3002017-06-13 15:35:54 -07001756 log.debug("Computing the impacted routes for device {}", sw.id());
Charles Chand66d6712018-03-29 16:03:41 -07001757 if (!shouldProgram(sw.id())) {
Saurav Das00e553b2018-04-21 17:19:48 -07001758 lastProgrammed.remove(sw.id());
sanghofb7c7292015-04-13 15:15:58 -07001759 continue;
1760 }
Charles Chand66d6712018-03-29 16:03:41 -07001761 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
Saurav Das261c3002017-06-13 15:35:54 -07001762 if (log.isTraceEnabled()) {
1763 log.trace("Device links for dev: {}", rootSw);
1764 for (Link link: srManager.linkService.getDeviceLinks(rootSw)) {
1765 log.trace("{} -> {} ", link.src().deviceId(),
1766 link.dst().deviceId());
1767 }
Saurav Dasb149be12016-06-07 10:08:06 -07001768 }
Saurav Das00e553b2018-04-21 17:19:48 -07001769 // check for mastership change since last run
1770 if (!lastProgrammed.contains(sw.id())) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001771 log.warn("New responsibility for this node to program dev:{}"
Saurav Das00e553b2018-04-21 17:19:48 -07001772 + " ... nuking current ECMPspg", sw.id());
1773 currentEcmpSpgMap.remove(sw.id());
1774 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001775 lastProgrammed.add(sw.id());
Saurav Das261c3002017-06-13 15:35:54 -07001776 EcmpShortestPathGraph currEcmpSpg = currentEcmpSpgMap.get(rootSw);
1777 if (currEcmpSpg == null) {
1778 log.debug("No existing ECMP graph for device {}.. adding self as "
1779 + "changed route", rootSw);
1780 changedRtBldr.add(Lists.newArrayList(rootSw));
1781 continue;
1782 }
1783 EcmpShortestPathGraph newEcmpSpg = updatedEcmpSpgMap.get(rootSw);
Saurav Dasdebcf882018-04-06 20:16:01 -07001784 if (newEcmpSpg == null) {
1785 log.warn("Cannot find updated ECMP graph for dev:{}", rootSw);
1786 continue;
1787 }
Saurav Das261c3002017-06-13 15:35:54 -07001788 if (log.isDebugEnabled()) {
1789 log.debug("Root switch: {}", rootSw);
1790 log.debug(" Current/Existing SPG: {}", currEcmpSpg);
1791 log.debug(" New/Updated SPG: {}", newEcmpSpg);
1792 }
1793 // first use the updated/new map to compare to current/existing map
1794 // as new links may have come up
1795 changedRtBldr.addAll(compareGraphs(newEcmpSpg, currEcmpSpg, rootSw));
1796 // then use the current/existing map to compare to updated/new map
1797 // as switch may have been removed
1798 changedRtBldr.addAll(compareGraphs(currEcmpSpg, newEcmpSpg, rootSw));
sangho28d0b6d2015-05-07 13:30:57 -07001799 }
Saurav Das1b391d52016-11-29 14:27:25 -08001800 }
sanghofb7c7292015-04-13 15:15:58 -07001801
Saurav Dascea556f2018-03-05 14:37:16 -08001802 // handle clearing state for a failed switch in case the switch does
1803 // not have a pair, or the pair is not available
1804 if (failedSwitch != null) {
Charles Chan6dbcd252018-04-02 11:46:38 -07001805 Optional<DeviceId> pairDev = srManager.getPairDeviceId(failedSwitch);
1806 if (!pairDev.isPresent() || !srManager.deviceService.isAvailable(pairDev.get())) {
Saurav Dascea556f2018-03-05 14:37:16 -08001807 log.debug("Proxy Route changes to downed Sw:{}", failedSwitch);
1808 srManager.deviceService.getDevices().forEach(dev -> {
1809 if (!dev.id().equals(failedSwitch) &&
1810 srManager.mastershipService.isLocalMaster(dev.id())) {
1811 log.debug(" : {}", dev.id());
1812 changedRtBldr.add(Lists.newArrayList(dev.id(), failedSwitch));
1813 }
1814 });
1815 }
1816 }
1817
Saurav Das261c3002017-06-13 15:35:54 -07001818 Set<ArrayList<DeviceId>> changedRoutes = changedRtBldr.build();
Saurav Das1b391d52016-11-29 14:27:25 -08001819 for (ArrayList<DeviceId> route: changedRoutes) {
1820 log.debug("Route changes Target -> Root");
1821 if (route.size() == 1) {
1822 log.debug(" : all -> {}", route.get(0));
1823 } else {
1824 log.debug(" : {} -> {}", route.get(0), route.get(1));
1825 }
1826 }
1827 return changedRoutes;
1828 }
1829
pier572d4a92019-04-25 18:51:51 +02001830 // Utility method to expands the route changes in two elements array using
1831 // the ECMP graph. Caller represents all to dst switch routes with an
1832 // array containing only the dst switch.
1833 private Set<ArrayList<DeviceId>> getExpandedRoutes(Set<ArrayList<DeviceId>> routeChanges) {
1834 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
1835 // Ensure each routeChanges entry has two elements
1836 for (ArrayList<DeviceId> route : routeChanges) {
1837 if (route.size() == 1) {
1838 DeviceId dstSw = route.get(0);
1839 EcmpShortestPathGraph ec = updatedEcmpSpgMap.get(dstSw);
1840 if (ec == null) {
1841 log.warn("No graph found for {} .. aborting redoRouting", dstSw);
1842 return Collections.emptySet();
1843 }
1844 ec.getAllLearnedSwitchesAndVia().keySet().forEach(key -> {
1845 ec.getAllLearnedSwitchesAndVia().get(key).keySet().forEach(target -> {
1846 changedRoutes.add(Lists.newArrayList(target, dstSw));
1847 });
1848 });
1849 } else {
1850 DeviceId targetSw = route.get(0);
1851 DeviceId dstSw = route.get(1);
1852 changedRoutes.add(Lists.newArrayList(targetSw, dstSw));
1853 }
1854 }
1855 return changedRoutes;
1856 }
1857
1858 // Utility method to expands the route changes in two elements array using
1859 // the available devices. Caller represents all to dst switch routes with an
1860 // array containing only the dst switch.
1861 private Set<ArrayList<DeviceId>> getAllExpandedRoutes(Set<ArrayList<DeviceId>> routeChanges) {
1862 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
1863 // Ensure each routeChanges entry has two elements
1864 for (ArrayList<DeviceId> route : routeChanges) {
1865 if (route.size() == 1) {
1866 // route-path changes are from everyone else to this switch
1867 DeviceId dstSw = route.get(0);
1868 srManager.deviceService.getAvailableDevices().forEach(sw -> {
1869 if (!sw.id().equals(dstSw)) {
1870 changedRoutes.add(Lists.newArrayList(sw.id(), dstSw));
1871 }
1872 });
1873 } else {
1874 changedRoutes.add(route);
1875 }
1876 }
1877 return changedRoutes;
1878 }
1879
Saurav Das1b391d52016-11-29 14:27:25 -08001880 /**
1881 * For the root switch, searches all the target nodes reachable in the base
1882 * graph, and compares paths to the ones in the comp graph.
1883 *
1884 * @param base the graph that is indexed for all reachable target nodes
1885 * from the root node
1886 * @param comp the graph that the base graph is compared to
1887 * @param rootSw both ecmp graphs are calculated for the root node
1888 * @return all the routes that have changed in the base graph
1889 */
1890 private Set<ArrayList<DeviceId>> compareGraphs(EcmpShortestPathGraph base,
1891 EcmpShortestPathGraph comp,
1892 DeviceId rootSw) {
1893 ImmutableSet.Builder<ArrayList<DeviceId>> changedRoutesBuilder =
1894 ImmutableSet.builder();
1895 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> baseMap =
1896 base.getAllLearnedSwitchesAndVia();
1897 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> compMap =
1898 comp.getAllLearnedSwitchesAndVia();
1899 for (Integer itrIdx : baseMap.keySet()) {
1900 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> baseViaMap =
1901 baseMap.get(itrIdx);
1902 for (DeviceId targetSw : baseViaMap.keySet()) {
1903 ArrayList<ArrayList<DeviceId>> basePath = baseViaMap.get(targetSw);
1904 ArrayList<ArrayList<DeviceId>> compPath = getVia(compMap, targetSw);
1905 if ((compPath == null) || !basePath.equals(compPath)) {
Saurav Das62ae6792017-05-15 15:34:25 -07001906 log.trace("Impacted route:{} -> {}", targetSw, rootSw);
Saurav Das1b391d52016-11-29 14:27:25 -08001907 ArrayList<DeviceId> route = new ArrayList<>();
Saurav Das261c3002017-06-13 15:35:54 -07001908 route.add(targetSw); // switch with rules to populate
1909 route.add(rootSw); // towards this destination
Saurav Das1b391d52016-11-29 14:27:25 -08001910 changedRoutesBuilder.add(route);
sanghofb7c7292015-04-13 15:15:58 -07001911 }
1912 }
sangho28d0b6d2015-05-07 13:30:57 -07001913 }
Saurav Das1b391d52016-11-29 14:27:25 -08001914 return changedRoutesBuilder.build();
sanghofb7c7292015-04-13 15:15:58 -07001915 }
1916
Saurav Das261c3002017-06-13 15:35:54 -07001917 /**
1918 * Returns the ECMP paths traversed to reach the target switch.
1919 *
1920 * @param switchVia a per-iteration view of the ECMP graph for a root switch
1921 * @param targetSw the switch to reach from the root switch
1922 * @return the nodes traversed on ECMP paths to the target switch
1923 */
sanghofb7c7292015-04-13 15:15:58 -07001924 private ArrayList<ArrayList<DeviceId>> getVia(HashMap<Integer, HashMap<DeviceId,
Saurav Das1b391d52016-11-29 14:27:25 -08001925 ArrayList<ArrayList<DeviceId>>>> switchVia, DeviceId targetSw) {
sanghofb7c7292015-04-13 15:15:58 -07001926 for (Integer itrIdx : switchVia.keySet()) {
1927 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1928 switchVia.get(itrIdx);
Saurav Das1b391d52016-11-29 14:27:25 -08001929 if (swViaMap.get(targetSw) == null) {
sanghofb7c7292015-04-13 15:15:58 -07001930 continue;
1931 } else {
Saurav Das1b391d52016-11-29 14:27:25 -08001932 return swViaMap.get(targetSw);
sanghofb7c7292015-04-13 15:15:58 -07001933 }
1934 }
1935
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001936 return null;
sanghofb7c7292015-04-13 15:15:58 -07001937 }
1938
Saurav Das261c3002017-06-13 15:35:54 -07001939 /**
1940 * Utility method to break down a path from src to dst device into a collection
1941 * of links.
1942 *
1943 * @param src src device of the path
1944 * @param dst dst device of the path
1945 * @param viaMap path taken from src to dst device
1946 * @return collection of links in the path
1947 */
sanghofb7c7292015-04-13 15:15:58 -07001948 private Set<ArrayList<DeviceId>> computeLinks(DeviceId src,
1949 DeviceId dst,
1950 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> viaMap) {
1951 Set<ArrayList<DeviceId>> subLinks = Sets.newHashSet();
1952 for (ArrayList<DeviceId> via : viaMap.get(src)) {
1953 DeviceId linkSrc = src;
1954 DeviceId linkDst = dst;
1955 for (DeviceId viaDevice: via) {
1956 ArrayList<DeviceId> link = new ArrayList<>();
1957 linkDst = viaDevice;
1958 link.add(linkSrc);
1959 link.add(linkDst);
1960 subLinks.add(link);
1961 linkSrc = viaDevice;
1962 }
1963 ArrayList<DeviceId> link = new ArrayList<>();
1964 link.add(linkSrc);
1965 link.add(dst);
1966 subLinks.add(link);
1967 }
1968
1969 return subLinks;
1970 }
1971
Charles Chanc22cef32016-04-29 14:38:22 -07001972 /**
Charles Chand66d6712018-03-29 16:03:41 -07001973 * Determines whether this controller instance should program the
Saurav Das261c3002017-06-13 15:35:54 -07001974 * given {@code deviceId}, based on mastership and pairDeviceId if one exists.
Charles Chand66d6712018-03-29 16:03:41 -07001975 * <p>
1976 * Once an instance is elected, it will be the only instance responsible for programming
1977 * both devices in the pair until it goes down.
Charles Chanc22cef32016-04-29 14:38:22 -07001978 *
Saurav Das261c3002017-06-13 15:35:54 -07001979 * @param deviceId device identifier to consider for routing
Charles Chand66d6712018-03-29 16:03:41 -07001980 * @return true if current instance should handle the routing for given device
Charles Chanc22cef32016-04-29 14:38:22 -07001981 */
Charles Chand66d6712018-03-29 16:03:41 -07001982 boolean shouldProgram(DeviceId deviceId) {
Charles Chanfbcb8812018-04-18 18:41:05 -07001983 Boolean cached = shouldProgramCache.get(deviceId);
1984 if (cached != null) {
Saurav Das00e553b2018-04-21 17:19:48 -07001985 log.debug("shouldProgram dev:{} cached:{}", deviceId, cached);
Charles Chanfbcb8812018-04-18 18:41:05 -07001986 return cached;
1987 }
1988
Charles Chand66d6712018-03-29 16:03:41 -07001989 Optional<DeviceId> pairDeviceId = srManager.getPairDeviceId(deviceId);
sangho80f11cb2015-04-01 13:05:26 -07001990
Charles Chand66d6712018-03-29 16:03:41 -07001991 NodeId currentNodeId = srManager.clusterService.getLocalNode().id();
1992 NodeId masterNodeId = srManager.mastershipService.getMasterFor(deviceId);
1993 Optional<NodeId> pairMasterNodeId = pairDeviceId.map(srManager.mastershipService::getMasterFor);
Saurav Das68e1b6a2018-06-11 17:02:31 -07001994 log.debug("Evaluate shouldProgram {}/pair={}. currentNodeId={}, master={}, pairMaster={}",
Charles Chand66d6712018-03-29 16:03:41 -07001995 deviceId, pairDeviceId, currentNodeId, masterNodeId, pairMasterNodeId);
1996
1997 // No pair device configured. Only handle when current instance is the master of the device
1998 if (!pairDeviceId.isPresent()) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001999 log.debug("No pair device. currentNodeId={}, master={}", currentNodeId, masterNodeId);
Charles Chand66d6712018-03-29 16:03:41 -07002000 return currentNodeId.equals(masterNodeId);
sangho80f11cb2015-04-01 13:05:26 -07002001 }
Charles Chand66d6712018-03-29 16:03:41 -07002002
2003 // Should not handle if current instance is not the master of either switch
2004 if (!currentNodeId.equals(masterNodeId) &&
2005 !(pairMasterNodeId.isPresent() && currentNodeId.equals(pairMasterNodeId.get()))) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07002006 log.debug("Current nodeId {} is neither the master of target device {} nor pair device {}",
Charles Chand66d6712018-03-29 16:03:41 -07002007 currentNodeId, deviceId, pairDeviceId);
2008 return false;
2009 }
2010
2011 Set<DeviceId> key = Sets.newHashSet(deviceId, pairDeviceId.get());
2012
2013 NodeId king = shouldProgram.compute(key, ((k, v) -> {
2014 if (v == null) {
2015 // There is no value in the map. Elect a node
2016 return elect(Lists.newArrayList(masterNodeId, pairMasterNodeId.orElse(null)));
2017 } else {
2018 if (v.equals(masterNodeId) || v.equals(pairMasterNodeId.orElse(null))) {
2019 // Use the node in the map if it is still alive and is a master of any of the two switches
2020 return v;
2021 } else {
2022 // Previously elected node is no longer the master of either switch. Re-elect a node.
2023 return elect(Lists.newArrayList(masterNodeId, pairMasterNodeId.orElse(null)));
2024 }
2025 }
2026 }));
2027
2028 if (king != null) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07002029 log.debug("{} is king, should handle routing for {}/pair={}", king, deviceId, pairDeviceId);
Charles Chanfbcb8812018-04-18 18:41:05 -07002030 shouldProgramCache.put(deviceId, king.equals(currentNodeId));
Charles Chand66d6712018-03-29 16:03:41 -07002031 return king.equals(currentNodeId);
2032 } else {
2033 log.error("Fail to elect a king for {}/pair={}. Abort.", deviceId, pairDeviceId);
Charles Chanfbcb8812018-04-18 18:41:05 -07002034 shouldProgramCache.remove(deviceId);
Charles Chand66d6712018-03-29 16:03:41 -07002035 return false;
2036 }
2037 }
2038
2039 /**
2040 * Elects a node who should take responsibility of programming devices.
2041 * @param nodeIds list of candidate node ID
2042 *
2043 * @return NodeId of the node that gets elected, or null if none of the node can be elected
2044 */
2045 private NodeId elect(List<NodeId> nodeIds) {
2046 // Remove all null elements. This could happen when some device has no master
2047 nodeIds.removeAll(Collections.singleton(null));
2048 nodeIds.sort(null);
2049 return nodeIds.size() == 0 ? null : nodeIds.get(0);
2050 }
2051
Charles Chanfbcb8812018-04-18 18:41:05 -07002052 void invalidateShouldProgramCache(DeviceId deviceId) {
2053 shouldProgramCache.remove(deviceId);
2054 }
2055
Charles Chand66d6712018-03-29 16:03:41 -07002056 /**
2057 * Returns a set of device ID, containing given device and its pair device if exist.
2058 *
2059 * @param deviceId Device ID
2060 * @return a set of device ID, containing given device and its pair device if exist.
2061 */
2062 private Set<DeviceId> deviceAndItsPair(DeviceId deviceId) {
2063 Set<DeviceId> ret = Sets.newHashSet(deviceId);
2064 srManager.getPairDeviceId(deviceId).ifPresent(ret::add);
2065 return ret;
sangho80f11cb2015-04-01 13:05:26 -07002066 }
2067
Charles Chanc22cef32016-04-29 14:38:22 -07002068 /**
Saurav Das261c3002017-06-13 15:35:54 -07002069 * Returns the set of deviceIds which are the next hops from the targetSw
2070 * to the dstSw according to the latest ECMP spg.
2071 *
2072 * @param targetSw the switch for which the next-hops are desired
2073 * @param dstSw the switch to which the next-hops lead to from the targetSw
2074 * @return set of next hop deviceIds, could be empty if no next hops are found
2075 */
2076 private Set<DeviceId> getNextHops(DeviceId targetSw, DeviceId dstSw) {
2077 boolean targetIsEdge = false;
2078 try {
2079 targetIsEdge = srManager.deviceConfiguration.isEdgeDevice(targetSw);
2080 } catch (DeviceConfigNotFoundException e) {
2081 log.warn(e.getMessage() + "Cannot determine if targetIsEdge {}.. "
2082 + "continuing to getNextHops", targetSw);
2083 }
2084
2085 EcmpShortestPathGraph ecmpSpg = updatedEcmpSpgMap.get(dstSw);
2086 if (ecmpSpg == null) {
2087 log.debug("No ecmpSpg found for dstSw: {}", dstSw);
2088 return ImmutableSet.of();
2089 }
2090 HashMap<Integer,
2091 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> switchVia =
2092 ecmpSpg.getAllLearnedSwitchesAndVia();
2093 for (Integer itrIdx : switchVia.keySet()) {
2094 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
2095 switchVia.get(itrIdx);
2096 for (DeviceId target : swViaMap.keySet()) {
2097 if (!target.equals(targetSw)) {
2098 continue;
2099 }
Saurav Das49368392018-04-23 18:42:12 -07002100 // optimization for spines to not use leaves to get
2101 // to a spine or other leaves. Also leaves should not use other
2102 // leaves to get to the destination
2103 if ((!targetIsEdge && itrIdx > 1) || targetIsEdge) {
Saurav Das97241862018-02-14 14:14:54 -08002104 boolean pathdevIsEdge = false;
2105 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
Saurav Das49368392018-04-23 18:42:12 -07002106 log.debug("Evaluating next-hop in path: {}", via);
Saurav Das97241862018-02-14 14:14:54 -08002107 for (DeviceId pathdev : via) {
2108 try {
2109 pathdevIsEdge = srManager.deviceConfiguration
2110 .isEdgeDevice(pathdev);
2111 } catch (DeviceConfigNotFoundException e) {
2112 log.warn(e.getMessage());
2113 }
2114 if (pathdevIsEdge) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07002115 log.debug("Avoiding {} hop path for targetSw:{}"
Saurav Das97241862018-02-14 14:14:54 -08002116 + " --> dstSw:{} which goes through an edge"
2117 + " device {} in path {}", itrIdx,
2118 targetSw, dstSw, pathdev, via);
2119 return ImmutableSet.of();
2120 }
2121 }
2122 }
Saurav Das261c3002017-06-13 15:35:54 -07002123 }
2124 Set<DeviceId> nextHops = new HashSet<>();
2125 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
2126 if (via.isEmpty()) {
2127 // the dstSw is the next-hop from the targetSw
2128 nextHops.add(dstSw);
2129 } else {
2130 // first elem is next-hop in each ECMP path
2131 nextHops.add(via.get(0));
2132 }
2133 }
Saurav Das49368392018-04-23 18:42:12 -07002134 log.debug("target {} --> dst: {} has next-hops:{}", targetSw,
2135 dstSw, nextHops);
Saurav Das261c3002017-06-13 15:35:54 -07002136 return nextHops;
2137 }
2138 }
Saurav Das49368392018-04-23 18:42:12 -07002139 log.debug("No next hops found for target:{} --> dst: {}", targetSw, dstSw);
Saurav Das261c3002017-06-13 15:35:54 -07002140 return ImmutableSet.of(); //no next-hops found
2141 }
2142
Saurav Das261c3002017-06-13 15:35:54 -07002143 //////////////////////////////////////
2144 // Filtering rule creation
2145 //////////////////////////////////////
2146
2147 /**
Saurav Dasf9332192017-02-18 14:05:44 -08002148 * Populates filtering rules for port, and punting rules
2149 * for gateway IPs, loopback IPs and arp/ndp traffic.
2150 * Should only be called by the master instance for this device/port.
sangho80f11cb2015-04-01 13:05:26 -07002151 *
2152 * @param deviceId Switch ID to set the rules
2153 */
Charles Chanfbcb8812018-04-18 18:41:05 -07002154 void populatePortAddressingRules(DeviceId deviceId) {
Saurav Das07c74602016-04-27 18:35:50 -07002155 // Although device is added, sometimes device store does not have the
2156 // ports for this device yet. It results in missing filtering rules in the
2157 // switch. We will attempt it a few times. If it still does not work,
2158 // user can manually repopulate using CLI command sr-reroute-network
Charles Chan18fa4252017-02-08 16:10:40 -08002159 PortFilterInfo firstRun = rulePopulator.populateVlanMacFilters(deviceId);
Saurav Dasd1872b02016-12-02 15:43:47 -08002160 if (firstRun == null) {
2161 firstRun = new PortFilterInfo(0, 0, 0);
Saurav Das07c74602016-04-27 18:35:50 -07002162 }
Saurav Dasd1872b02016-12-02 15:43:47 -08002163 executorService.schedule(new RetryFilters(deviceId, firstRun),
2164 RETRY_INTERVAL_MS, TimeUnit.MILLISECONDS);
sangho80f11cb2015-04-01 13:05:26 -07002165 }
2166
2167 /**
Saurav Dasd1872b02016-12-02 15:43:47 -08002168 * RetryFilters populates filtering objectives for a device and keeps retrying
2169 * till the number of ports filtered are constant for a predefined number
2170 * of attempts.
2171 */
2172 protected final class RetryFilters implements Runnable {
2173 int constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS;
2174 DeviceId devId;
2175 int counter;
2176 PortFilterInfo prevRun;
2177
2178 private RetryFilters(DeviceId deviceId, PortFilterInfo previousRun) {
Saurav Das07c74602016-04-27 18:35:50 -07002179 devId = deviceId;
Saurav Dasd1872b02016-12-02 15:43:47 -08002180 prevRun = previousRun;
2181 counter = 0;
Saurav Das07c74602016-04-27 18:35:50 -07002182 }
2183
2184 @Override
2185 public void run() {
Charles Chan077314e2017-06-22 14:27:17 -07002186 log.debug("RETRY FILTER ATTEMPT {} ** dev:{}", ++counter, devId);
Charles Chan18fa4252017-02-08 16:10:40 -08002187 PortFilterInfo thisRun = rulePopulator.populateVlanMacFilters(devId);
Saurav Dasd1872b02016-12-02 15:43:47 -08002188 boolean sameResult = prevRun.equals(thisRun);
2189 log.debug("dev:{} prevRun:{} thisRun:{} sameResult:{}", devId, prevRun,
2190 thisRun, sameResult);
Ray Milkey614352e2018-02-26 09:36:31 -08002191 if (thisRun == null || !sameResult || (--constantAttempts > 0)) {
Saurav Dasf9332192017-02-18 14:05:44 -08002192 // exponentially increasing intervals for retries
2193 executorService.schedule(this,
2194 RETRY_INTERVAL_MS * (int) Math.pow(counter, RETRY_INTERVAL_SCALE),
2195 TimeUnit.MILLISECONDS);
Saurav Dasd1872b02016-12-02 15:43:47 -08002196 if (!sameResult) {
2197 constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS; //reset
2198 }
Saurav Das07c74602016-04-27 18:35:50 -07002199 }
Saurav Dasd1872b02016-12-02 15:43:47 -08002200 prevRun = (thisRun == null) ? prevRun : thisRun;
Saurav Das07c74602016-04-27 18:35:50 -07002201 }
Saurav Das07c74602016-04-27 18:35:50 -07002202 }
piera9941192019-04-24 16:12:47 +02002203
2204 // Check jobs completion. It returns false if one of the job fails
2205 // and cancel the remaining
2206 private boolean checkJobs(List<Future<Boolean>> futures) {
2207 boolean completed = true;
2208 for (Future<Boolean> future : futures) {
2209 try {
2210 if (completed) {
2211 if (!future.get()) {
2212 completed = false;
2213 }
2214 } else {
2215 future.cancel(true);
2216 }
2217 } catch (InterruptedException | ExecutionException e) {
2218 completed = false;
2219 }
2220 }
2221 return completed;
2222 }
sangho80f11cb2015-04-01 13:05:26 -07002223}