blob: 96f26cc588ae0cc4665c69ab6efffe112ba76429 [file] [log] [blame]
sangho80f11cb2015-04-01 13:05:26 -07001/*
Brian O'Connor0947d7e2017-08-03 21:12:30 -07002 * Copyright 2015-present Open Networking Foundation
sangho80f11cb2015-04-01 13:05:26 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.segmentrouting;
17
Saurav Dasd1872b02016-12-02 15:43:47 -080018import com.google.common.base.MoreObjects;
Saurav Das62ae6792017-05-15 15:34:25 -070019import com.google.common.collect.ImmutableMap;
20import com.google.common.collect.ImmutableMap.Builder;
Charles Chanc22cef32016-04-29 14:38:22 -070021import com.google.common.collect.ImmutableSet;
Saurav Das1b391d52016-11-29 14:27:25 -080022import com.google.common.collect.Lists;
sanghofb7c7292015-04-13 15:15:58 -070023import com.google.common.collect.Maps;
24import com.google.common.collect.Sets;
Saurav Dasfbe74572017-08-03 18:30:35 -070025
sangho9b169e32015-04-14 16:27:13 -070026import org.onlab.packet.Ip4Address;
Pier Ventreadb4ae62016-11-23 09:57:42 -080027import org.onlab.packet.Ip6Address;
sangho80f11cb2015-04-01 13:05:26 -070028import org.onlab.packet.IpPrefix;
Charles Chan910be6a2017-08-23 14:46:43 -070029import org.onlab.packet.MacAddress;
30import org.onlab.packet.VlanId;
Saurav Das261c3002017-06-13 15:35:54 -070031import org.onosproject.cluster.NodeId;
Charles Chanc22cef32016-04-29 14:38:22 -070032import org.onosproject.net.ConnectPoint;
sangho80f11cb2015-04-01 13:05:26 -070033import org.onosproject.net.Device;
34import org.onosproject.net.DeviceId;
sanghofb7c7292015-04-13 15:15:58 -070035import org.onosproject.net.Link;
Charles Chan910be6a2017-08-23 14:46:43 -070036import org.onosproject.net.PortNumber;
Charles Chan319d1a22015-11-03 10:42:14 -080037import org.onosproject.segmentrouting.config.DeviceConfigNotFoundException;
38import org.onosproject.segmentrouting.config.DeviceConfiguration;
Saurav Das62ae6792017-05-15 15:34:25 -070039import org.onosproject.segmentrouting.grouphandler.DefaultGroupHandler;
sangho80f11cb2015-04-01 13:05:26 -070040import org.slf4j.Logger;
41import org.slf4j.LoggerFactory;
42
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -070043import java.time.Instant;
sangho80f11cb2015-04-01 13:05:26 -070044import java.util.ArrayList;
45import java.util.HashMap;
46import java.util.HashSet;
Saurav Das261c3002017-06-13 15:35:54 -070047import java.util.Iterator;
48import java.util.Map;
Saurav Dasd1872b02016-12-02 15:43:47 -080049import java.util.Objects;
sangho80f11cb2015-04-01 13:05:26 -070050import java.util.Set;
Saurav Das07c74602016-04-27 18:35:50 -070051import java.util.concurrent.ScheduledExecutorService;
52import java.util.concurrent.TimeUnit;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +090053import java.util.concurrent.locks.Lock;
54import java.util.concurrent.locks.ReentrantLock;
Saurav Dasdc7f2752018-03-18 21:28:15 -070055import java.util.stream.Stream;
56
Saurav Dasd1872b02016-12-02 15:43:47 -080057import static com.google.common.base.MoreObjects.toStringHelper;
Pier Ventreadb4ae62016-11-23 09:57:42 -080058import static com.google.common.base.Preconditions.checkNotNull;
59import static java.util.concurrent.Executors.newScheduledThreadPool;
60import static org.onlab.util.Tools.groupedThreads;
sangho80f11cb2015-04-01 13:05:26 -070061
Charles Chanb7f75ac2016-01-11 18:28:54 -080062/**
63 * Default routing handler that is responsible for route computing and
64 * routing rule population.
65 */
sangho80f11cb2015-04-01 13:05:26 -070066public class DefaultRoutingHandler {
Saurav Dasf9332192017-02-18 14:05:44 -080067 private static final int MAX_CONSTANT_RETRY_ATTEMPTS = 5;
Ray Milkey092e9e22018-02-01 13:49:47 -080068 private static final long RETRY_INTERVAL_MS = 250L;
Saurav Dasf9332192017-02-18 14:05:44 -080069 private static final int RETRY_INTERVAL_SCALE = 1;
Saurav Dasfbe74572017-08-03 18:30:35 -070070 private static final long STABLITY_THRESHOLD = 10; //secs
Charles Chanc22cef32016-04-29 14:38:22 -070071 private static Logger log = LoggerFactory.getLogger(DefaultRoutingHandler.class);
sangho80f11cb2015-04-01 13:05:26 -070072
73 private SegmentRoutingManager srManager;
74 private RoutingRulePopulator rulePopulator;
Shashikanth VH0637b162015-12-11 01:32:44 +053075 private HashMap<DeviceId, EcmpShortestPathGraph> currentEcmpSpgMap;
76 private HashMap<DeviceId, EcmpShortestPathGraph> updatedEcmpSpgMap;
sangho9b169e32015-04-14 16:27:13 -070077 private DeviceConfiguration config;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +090078 private final Lock statusLock = new ReentrantLock();
79 private volatile Status populationStatus;
Yuta HIGUCHIebee2f12016-07-21 16:54:33 -070080 private ScheduledExecutorService executorService
Saurav Dasd1872b02016-12-02 15:43:47 -080081 = newScheduledThreadPool(1, groupedThreads("retryftr", "retry-%d", log));
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -070082 private Instant lastRoutingChange;
sangho80f11cb2015-04-01 13:05:26 -070083
84 /**
85 * Represents the default routing population status.
86 */
87 public enum Status {
88 // population process is not started yet.
89 IDLE,
90
91 // population process started.
92 STARTED,
93
Srikanth Vavilapalli64505482015-04-21 13:04:13 -070094 // population process was aborted due to errors, mostly for groups not
95 // found.
sangho80f11cb2015-04-01 13:05:26 -070096 ABORTED,
97
98 // population process was finished successfully.
99 SUCCEEDED
100 }
101
102 /**
103 * Creates a DefaultRoutingHandler object.
104 *
105 * @param srManager SegmentRoutingManager object
106 */
107 public DefaultRoutingHandler(SegmentRoutingManager srManager) {
108 this.srManager = srManager;
109 this.rulePopulator = checkNotNull(srManager.routingRulePopulator);
sangho9b169e32015-04-14 16:27:13 -0700110 this.config = checkNotNull(srManager.deviceConfiguration);
sangho80f11cb2015-04-01 13:05:26 -0700111 this.populationStatus = Status.IDLE;
sanghofb7c7292015-04-13 15:15:58 -0700112 this.currentEcmpSpgMap = Maps.newHashMap();
sangho80f11cb2015-04-01 13:05:26 -0700113 }
114
115 /**
Saurav Das62ae6792017-05-15 15:34:25 -0700116 * Returns an immutable copy of the current ECMP shortest-path graph as
117 * computed by this controller instance.
118 *
Saurav Das261c3002017-06-13 15:35:54 -0700119 * @return immutable copy of the current ECMP graph
Saurav Das62ae6792017-05-15 15:34:25 -0700120 */
121 public ImmutableMap<DeviceId, EcmpShortestPathGraph> getCurrentEmcpSpgMap() {
122 Builder<DeviceId, EcmpShortestPathGraph> builder = ImmutableMap.builder();
123 currentEcmpSpgMap.entrySet().forEach(entry -> {
124 if (entry.getValue() != null) {
125 builder.put(entry.getKey(), entry.getValue());
126 }
127 });
128 return builder.build();
129 }
130
Saurav Dasfbe74572017-08-03 18:30:35 -0700131 /**
132 * Acquires the lock used when making routing changes.
133 */
134 public void acquireRoutingLock() {
135 statusLock.lock();
136 }
137
138 /**
139 * Releases the lock used when making routing changes.
140 */
141 public void releaseRoutingLock() {
142 statusLock.unlock();
143 }
144
145 /**
146 * Determines if routing in the network has been stable in the last
147 * STABLITY_THRESHOLD seconds, by comparing the current time to the last
148 * routing change timestamp.
149 *
150 * @return true if stable
151 */
152 public boolean isRoutingStable() {
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700153 long last = (long) (lastRoutingChange.toEpochMilli() / 1000.0);
154 long now = (long) (Instant.now().toEpochMilli() / 1000.0);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700155 log.trace("Routing stable since {}s", now - last);
Saurav Dasfbe74572017-08-03 18:30:35 -0700156 return (now - last) > STABLITY_THRESHOLD;
157 }
158
159
Saurav Das261c3002017-06-13 15:35:54 -0700160 //////////////////////////////////////
161 // Route path handling
162 //////////////////////////////////////
163
Saurav Dase6c448a2018-01-18 12:07:33 -0800164 /* The following three methods represent the three major ways in which
165 * route-path handling is triggered in the network
Saurav Das261c3002017-06-13 15:35:54 -0700166 * a) due to configuration change
167 * b) due to route-added event
168 * c) due to change in the topology
169 */
170
Saurav Das62ae6792017-05-15 15:34:25 -0700171 /**
Saurav Das261c3002017-06-13 15:35:54 -0700172 * Populates all routing rules to all switches. Typically triggered at
173 * startup or after a configuration event.
sangho80f11cb2015-04-01 13:05:26 -0700174 */
Saurav Das62ae6792017-05-15 15:34:25 -0700175 public void populateAllRoutingRules() {
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700176 lastRoutingChange = Instant.now();
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900177 statusLock.lock();
178 try {
Saurav Das261c3002017-06-13 15:35:54 -0700179 if (populationStatus == Status.STARTED) {
180 log.warn("Previous rule population is not finished. Cannot"
181 + " proceed with populateAllRoutingRules");
182 return;
183 }
184
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900185 populationStatus = Status.STARTED;
186 rulePopulator.resetCounter();
Saurav Das261c3002017-06-13 15:35:54 -0700187 log.info("Starting to populate all routing rules");
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900188 log.debug("populateAllRoutingRules: populationStatus is STARTED");
sangho80f11cb2015-04-01 13:05:26 -0700189
Saurav Das261c3002017-06-13 15:35:54 -0700190 // take a snapshot of the topology
191 updatedEcmpSpgMap = new HashMap<>();
192 Set<EdgePair> edgePairs = new HashSet<>();
193 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
Jonathan Hart61e24e12017-11-30 18:23:42 -0800194 for (DeviceId dstSw : srManager.deviceConfiguration.getRouters()) {
Saurav Das261c3002017-06-13 15:35:54 -0700195 EcmpShortestPathGraph ecmpSpgUpdated =
Jonathan Hart61e24e12017-11-30 18:23:42 -0800196 new EcmpShortestPathGraph(dstSw, srManager);
197 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
198 DeviceId pairDev = getPairDev(dstSw);
Saurav Das261c3002017-06-13 15:35:54 -0700199 if (pairDev != null) {
200 // pairDev may not be available yet, but we still need to add
201 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev, srManager);
202 updatedEcmpSpgMap.put(pairDev, ecmpSpgUpdated);
Jonathan Hart61e24e12017-11-30 18:23:42 -0800203 edgePairs.add(new EdgePair(dstSw, pairDev));
Saurav Das261c3002017-06-13 15:35:54 -0700204 }
Jonathan Hart61e24e12017-11-30 18:23:42 -0800205 DeviceId ret = shouldHandleRouting(dstSw);
Saurav Das261c3002017-06-13 15:35:54 -0700206 if (ret == null) {
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900207 continue;
208 }
Jonathan Hart61e24e12017-11-30 18:23:42 -0800209 Set<DeviceId> devsToProcess = Sets.newHashSet(dstSw, ret);
Saurav Das261c3002017-06-13 15:35:54 -0700210 // To do a full reroute, assume all routes have changed
211 for (DeviceId dev : devsToProcess) {
Jonathan Hart61e24e12017-11-30 18:23:42 -0800212 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
213 if (targetSw.equals(dev)) {
Saurav Das261c3002017-06-13 15:35:54 -0700214 continue;
215 }
Jonathan Hart61e24e12017-11-30 18:23:42 -0800216 routeChanges.add(Lists.newArrayList(targetSw, dev));
Saurav Das261c3002017-06-13 15:35:54 -0700217 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900218 }
Saurav Das261c3002017-06-13 15:35:54 -0700219 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900220
Saurav Das261c3002017-06-13 15:35:54 -0700221 if (!redoRouting(routeChanges, edgePairs, null)) {
222 log.debug("populateAllRoutingRules: populationStatus is ABORTED");
223 populationStatus = Status.ABORTED;
224 log.warn("Failed to repopulate all routing rules.");
225 return;
sangho80f11cb2015-04-01 13:05:26 -0700226 }
227
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900228 log.debug("populateAllRoutingRules: populationStatus is SUCCEEDED");
229 populationStatus = Status.SUCCEEDED;
Saurav Das261c3002017-06-13 15:35:54 -0700230 log.info("Completed all routing rule population. Total # of rules pushed : {}",
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900231 rulePopulator.getCounter());
Saurav Das62ae6792017-05-15 15:34:25 -0700232 return;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900233 } finally {
234 statusLock.unlock();
sangho80f11cb2015-04-01 13:05:26 -0700235 }
sangho80f11cb2015-04-01 13:05:26 -0700236 }
237
sanghofb7c7292015-04-13 15:15:58 -0700238 /**
Saurav Das261c3002017-06-13 15:35:54 -0700239 * Populate rules from all other edge devices to the connect-point(s)
240 * specified for the given subnets.
241 *
242 * @param cpts connect point(s) of the subnets being added
243 * @param subnets subnets being added
Charles Chan910be6a2017-08-23 14:46:43 -0700244 */
245 // XXX refactor
Saurav Das261c3002017-06-13 15:35:54 -0700246 protected void populateSubnet(Set<ConnectPoint> cpts, Set<IpPrefix> subnets) {
Charles Chan6db55b92017-09-11 15:21:57 -0700247 if (cpts == null || cpts.size() < 1 || cpts.size() > 2) {
248 log.warn("Skipping populateSubnet due to illegal size of connect points. {}", cpts);
249 return;
250 }
251
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700252 lastRoutingChange = Instant.now();
Saurav Das261c3002017-06-13 15:35:54 -0700253 statusLock.lock();
254 try {
255 if (populationStatus == Status.STARTED) {
256 log.warn("Previous rule population is not finished. Cannot"
257 + " proceed with routing rules for added routes");
258 return;
259 }
260 populationStatus = Status.STARTED;
261 rulePopulator.resetCounter();
Charles Chan910be6a2017-08-23 14:46:43 -0700262 log.info("Starting to populate routing rules for added routes, subnets={}, cpts={}",
263 subnets, cpts);
Saurav Das6430f412018-01-25 09:49:01 -0800264 // In principle an update to a subnet/prefix should not require a
265 // new ECMPspg calculation as it is not a topology event. As a
266 // result, we use the current/existing ECMPspg in the updated map
267 // used by the redoRouting method.
Saurav Das6de6ffd2018-02-09 09:15:03 -0800268 if (updatedEcmpSpgMap == null) {
269 updatedEcmpSpgMap = new HashMap<>();
270 }
Saurav Das6430f412018-01-25 09:49:01 -0800271 currentEcmpSpgMap.entrySet().forEach(entry -> {
272 updatedEcmpSpgMap.put(entry.getKey(), entry.getValue());
Saurav Dase321cff2018-02-09 17:26:45 -0800273 if (log.isTraceEnabled()) {
274 log.trace("Root switch: {}", entry.getKey());
275 log.trace(" Current/Existing SPG: {}", entry.getValue());
Saurav Das6430f412018-01-25 09:49:01 -0800276 }
277 });
Saurav Das261c3002017-06-13 15:35:54 -0700278 Set<EdgePair> edgePairs = new HashSet<>();
279 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
280 boolean handleRouting = false;
281
282 if (cpts.size() == 2) {
283 // ensure connect points are edge-pairs
284 Iterator<ConnectPoint> iter = cpts.iterator();
285 DeviceId dev1 = iter.next().deviceId();
286 DeviceId pairDev = getPairDev(dev1);
287 if (iter.next().deviceId().equals(pairDev)) {
288 edgePairs.add(new EdgePair(dev1, pairDev));
289 } else {
290 log.warn("Connectpoints {} for subnets {} not on "
291 + "pair-devices.. aborting populateSubnet", cpts, subnets);
292 populationStatus = Status.ABORTED;
293 return;
294 }
295 for (ConnectPoint cp : cpts) {
Saurav Das6430f412018-01-25 09:49:01 -0800296 if (updatedEcmpSpgMap.get(cp.deviceId()) == null) {
297 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das261c3002017-06-13 15:35:54 -0700298 new EcmpShortestPathGraph(cp.deviceId(), srManager);
Saurav Das6430f412018-01-25 09:49:01 -0800299 updatedEcmpSpgMap.put(cp.deviceId(), ecmpSpgUpdated);
300 log.warn("populateSubnet: no updated graph for dev:{}"
301 + " ... creating", cp.deviceId());
302 }
Saurav Das261c3002017-06-13 15:35:54 -0700303 DeviceId retId = shouldHandleRouting(cp.deviceId());
304 if (retId == null) {
305 continue;
306 }
307 handleRouting = true;
308 }
309 } else {
310 // single connect point
311 DeviceId dstSw = cpts.iterator().next().deviceId();
Saurav Das6430f412018-01-25 09:49:01 -0800312 if (updatedEcmpSpgMap.get(dstSw) == null) {
313 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das261c3002017-06-13 15:35:54 -0700314 new EcmpShortestPathGraph(dstSw, srManager);
Saurav Das6430f412018-01-25 09:49:01 -0800315 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
316 log.warn("populateSubnet: no updated graph for dev:{}"
317 + " ... creating", dstSw);
318 }
Saurav Das261c3002017-06-13 15:35:54 -0700319 if (srManager.mastershipService.isLocalMaster(dstSw)) {
320 handleRouting = true;
321 }
322 }
323
324 if (!handleRouting) {
325 log.debug("This instance is not handling ecmp routing to the "
326 + "connectPoint(s) {}", cpts);
327 populationStatus = Status.ABORTED;
328 return;
329 }
330
331 // if it gets here, this instance should handle routing for the
332 // connectpoint(s). Assume all route-paths have to be updated to
333 // the connectpoint(s) with the following exceptions
334 // 1. if target is non-edge no need for routing rules
335 // 2. if target is one of the connectpoints
336 for (ConnectPoint cp : cpts) {
337 DeviceId dstSw = cp.deviceId();
338 for (Device targetSw : srManager.deviceService.getDevices()) {
339 boolean isEdge = false;
340 try {
341 isEdge = config.isEdgeDevice(targetSw.id());
342 } catch (DeviceConfigNotFoundException e) {
Charles Chaneaf3c9b2018-02-16 17:20:54 -0800343 log.warn(e.getMessage() + "aborting populateSubnet on targetSw {}", targetSw.id());
344 continue;
Saurav Das261c3002017-06-13 15:35:54 -0700345 }
346 if (dstSw.equals(targetSw.id()) || !isEdge ||
347 (cpts.size() == 2 &&
348 targetSw.id().equals(getPairDev(dstSw)))) {
349 continue;
350 }
351 routeChanges.add(Lists.newArrayList(targetSw.id(), dstSw));
352 }
353 }
354
355 if (!redoRouting(routeChanges, edgePairs, subnets)) {
356 log.debug("populateSubnet: populationStatus is ABORTED");
357 populationStatus = Status.ABORTED;
358 log.warn("Failed to repopulate the rules for subnet.");
359 return;
360 }
361
362 log.debug("populateSubnet: populationStatus is SUCCEEDED");
363 populationStatus = Status.SUCCEEDED;
364 log.info("Completed subnet population. Total # of rules pushed : {}",
365 rulePopulator.getCounter());
366 return;
367
368 } finally {
369 statusLock.unlock();
370 }
371 }
372
373 /**
Saurav Das62ae6792017-05-15 15:34:25 -0700374 * Populates the routing rules or makes hash group changes according to the
375 * route-path changes due to link failure, switch failure or link up. This
376 * method should only be called for one of these three possible event-types.
Saurav Dasdc7f2752018-03-18 21:28:15 -0700377 * Note that when a switch goes away, all of its links fail as well, but
378 * this is handled as a single switch removal event.
sanghofb7c7292015-04-13 15:15:58 -0700379 *
Saurav Dasdc7f2752018-03-18 21:28:15 -0700380 * @param linkDown the single failed link, or null for other conditions such
381 * as link-up or a removed switch
Saurav Das62ae6792017-05-15 15:34:25 -0700382 * @param linkUp the single link up, or null for other conditions such as
Saurav Dasdc7f2752018-03-18 21:28:15 -0700383 * link-down or a removed switch
384 * @param switchDown the removed switch, or null for other conditions such
385 * as link-down or link-up
386 * @param seenBefore true if this event is for a linkUp or linkDown for a
387 * seen link
388 */
389 // TODO This method should be refactored into three separated methods
Saurav Das62ae6792017-05-15 15:34:25 -0700390 public void populateRoutingRulesForLinkStatusChange(Link linkDown,
391 Link linkUp,
Saurav Dasdc7f2752018-03-18 21:28:15 -0700392 DeviceId switchDown,
393 boolean seenBefore) {
394 if (Stream.of(linkDown, linkUp, switchDown).filter(Objects::nonNull)
395 .count() != 1) {
Saurav Das62ae6792017-05-15 15:34:25 -0700396 log.warn("Only one event can be handled for link status change .. aborting");
397 return;
398 }
Saurav Dasdc7f2752018-03-18 21:28:15 -0700399
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700400 lastRoutingChange = Instant.now();
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900401 statusLock.lock();
402 try {
sanghofb7c7292015-04-13 15:15:58 -0700403
404 if (populationStatus == Status.STARTED) {
Saurav Das261c3002017-06-13 15:35:54 -0700405 log.warn("Previous rule population is not finished. Cannot"
Saurav Das6430f412018-01-25 09:49:01 -0800406 + " proceeed with routingRules for Topology change");
Saurav Das62ae6792017-05-15 15:34:25 -0700407 return;
sanghofb7c7292015-04-13 15:15:58 -0700408 }
409
Saurav Das261c3002017-06-13 15:35:54 -0700410 // Take snapshots of the topology
sangho28d0b6d2015-05-07 13:30:57 -0700411 updatedEcmpSpgMap = new HashMap<>();
Saurav Das261c3002017-06-13 15:35:54 -0700412 Set<EdgePair> edgePairs = new HashSet<>();
sangho28d0b6d2015-05-07 13:30:57 -0700413 for (Device sw : srManager.deviceService.getDevices()) {
Shashikanth VH0637b162015-12-11 01:32:44 +0530414 EcmpShortestPathGraph ecmpSpgUpdated =
415 new EcmpShortestPathGraph(sw.id(), srManager);
sangho28d0b6d2015-05-07 13:30:57 -0700416 updatedEcmpSpgMap.put(sw.id(), ecmpSpgUpdated);
Saurav Das261c3002017-06-13 15:35:54 -0700417 DeviceId pairDev = getPairDev(sw.id());
418 if (pairDev != null) {
419 // pairDev may not be available yet, but we still need to add
420 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev, srManager);
421 updatedEcmpSpgMap.put(pairDev, ecmpSpgUpdated);
422 edgePairs.add(new EdgePair(sw.id(), pairDev));
423 }
sangho28d0b6d2015-05-07 13:30:57 -0700424 }
425
Saurav Das6430f412018-01-25 09:49:01 -0800426 log.info("Starting to populate routing rules from Topology change");
sanghodf0153f2015-05-05 14:13:34 -0700427
sanghofb7c7292015-04-13 15:15:58 -0700428 Set<ArrayList<DeviceId>> routeChanges;
Saurav Das62ae6792017-05-15 15:34:25 -0700429 log.debug("populateRoutingRulesForLinkStatusChange: "
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700430 + "populationStatus is STARTED");
sanghofb7c7292015-04-13 15:15:58 -0700431 populationStatus = Status.STARTED;
Saurav Das6430f412018-01-25 09:49:01 -0800432 rulePopulator.resetCounter(); //XXX maybe useful to have a rehash ctr
433 boolean hashGroupsChanged = false;
Saurav Das1b391d52016-11-29 14:27:25 -0800434 // try optimized re-routing
Saurav Das62ae6792017-05-15 15:34:25 -0700435 if (linkDown == null) {
436 // either a linkUp or a switchDown - compute all route changes by
437 // comparing all routes of existing ECMP SPG to new ECMP SPG
Saurav Dascea556f2018-03-05 14:37:16 -0800438 routeChanges = computeRouteChange(switchDown);
Saurav Das62ae6792017-05-15 15:34:25 -0700439
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700440 // deal with linkUp of a seen-before link
Saurav Dasdc7f2752018-03-18 21:28:15 -0700441 if (linkUp != null && seenBefore) {
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700442 // link previously seen before
443 // do hash-bucket changes instead of a re-route
444 processHashGroupChange(routeChanges, false, null);
445 // clear out routesChanges so a re-route is not attempted
446 routeChanges = ImmutableSet.of();
Saurav Das6430f412018-01-25 09:49:01 -0800447 hashGroupsChanged = true;
Saurav Das62ae6792017-05-15 15:34:25 -0700448 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700449 // for a linkUp of a never-seen-before link
450 // let it fall through to a reroute of the routeChanges
Saurav Das62ae6792017-05-15 15:34:25 -0700451
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700452 //deal with switchDown
453 if (switchDown != null) {
454 processHashGroupChange(routeChanges, true, switchDown);
455 // clear out routesChanges so a re-route is not attempted
456 routeChanges = ImmutableSet.of();
Saurav Das6430f412018-01-25 09:49:01 -0800457 hashGroupsChanged = true;
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700458 }
sanghofb7c7292015-04-13 15:15:58 -0700459 } else {
Saurav Das62ae6792017-05-15 15:34:25 -0700460 // link has gone down
461 // Compare existing ECMP SPG only with the link that went down
462 routeChanges = computeDamagedRoutes(linkDown);
463 if (routeChanges != null) {
464 processHashGroupChange(routeChanges, true, null);
465 // clear out routesChanges so a re-route is not attempted
466 routeChanges = ImmutableSet.of();
Saurav Das6430f412018-01-25 09:49:01 -0800467 hashGroupsChanged = true;
Saurav Das62ae6792017-05-15 15:34:25 -0700468 }
sanghofb7c7292015-04-13 15:15:58 -0700469 }
470
Saurav Das1b391d52016-11-29 14:27:25 -0800471 // do full re-routing if optimized routing returns null routeChanges
Saurav Dasb149be12016-06-07 10:08:06 -0700472 if (routeChanges == null) {
Saurav Das6430f412018-01-25 09:49:01 -0800473 log.warn("Optimized routing failed... opting for full reroute");
Saurav Das261c3002017-06-13 15:35:54 -0700474 populationStatus = Status.ABORTED;
Saurav Das62ae6792017-05-15 15:34:25 -0700475 populateAllRoutingRules();
476 return;
Saurav Dasb149be12016-06-07 10:08:06 -0700477 }
478
sanghofb7c7292015-04-13 15:15:58 -0700479 if (routeChanges.isEmpty()) {
Saurav Das6430f412018-01-25 09:49:01 -0800480 if (hashGroupsChanged) {
481 log.info("Hash-groups changed for link status change");
482 } else {
483 log.info("No re-route or re-hash attempted for the link"
484 + " status change");
485 updatedEcmpSpgMap.keySet().forEach(devId -> {
486 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
487 log.debug("Updating ECMPspg for remaining dev:{}", devId);
488 });
489 }
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700490 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sanghofb7c7292015-04-13 15:15:58 -0700491 populationStatus = Status.SUCCEEDED;
Saurav Das62ae6792017-05-15 15:34:25 -0700492 return;
sanghofb7c7292015-04-13 15:15:58 -0700493 }
494
Saurav Das62ae6792017-05-15 15:34:25 -0700495 // reroute of routeChanges
Saurav Das261c3002017-06-13 15:35:54 -0700496 if (redoRouting(routeChanges, edgePairs, null)) {
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700497 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sanghofb7c7292015-04-13 15:15:58 -0700498 populationStatus = Status.SUCCEEDED;
Saurav Das261c3002017-06-13 15:35:54 -0700499 log.info("Completed repopulation of rules for link-status change."
500 + " # of rules populated : {}", rulePopulator.getCounter());
Saurav Das62ae6792017-05-15 15:34:25 -0700501 return;
sanghofb7c7292015-04-13 15:15:58 -0700502 } else {
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700503 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is ABORTED");
sanghofb7c7292015-04-13 15:15:58 -0700504 populationStatus = Status.ABORTED;
Saurav Das261c3002017-06-13 15:35:54 -0700505 log.warn("Failed to repopulate the rules for link status change.");
Saurav Das62ae6792017-05-15 15:34:25 -0700506 return;
sanghofb7c7292015-04-13 15:15:58 -0700507 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900508 } finally {
509 statusLock.unlock();
sanghofb7c7292015-04-13 15:15:58 -0700510 }
511 }
512
Saurav Das62ae6792017-05-15 15:34:25 -0700513 /**
Saurav Das261c3002017-06-13 15:35:54 -0700514 * Processes a set a route-path changes by reprogramming routing rules and
515 * creating new hash-groups or editing them if necessary. This method also
516 * determines the next-hops for the route-path from the src-switch (target)
517 * of the path towards the dst-switch of the path.
Saurav Das62ae6792017-05-15 15:34:25 -0700518 *
Saurav Das261c3002017-06-13 15:35:54 -0700519 * @param routeChanges a set of route-path changes, where each route-path is
520 * a list with its first element the src-switch (target)
521 * of the path, and the second element the dst-switch of
522 * the path.
523 * @param edgePairs a set of edge-switches that are paired by configuration
524 * @param subnets a set of prefixes that need to be populated in the routing
525 * table of the target switch in the route-path. Can be null,
526 * in which case all the prefixes belonging to the dst-switch
527 * will be populated in the target switch
528 * @return true if successful in repopulating all routes
Saurav Das62ae6792017-05-15 15:34:25 -0700529 */
Saurav Das261c3002017-06-13 15:35:54 -0700530 private boolean redoRouting(Set<ArrayList<DeviceId>> routeChanges,
531 Set<EdgePair> edgePairs, Set<IpPrefix> subnets) {
532 // first make every entry two-elements
533 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
534 for (ArrayList<DeviceId> route : routeChanges) {
535 if (route.size() == 1) {
536 DeviceId dstSw = route.get(0);
537 EcmpShortestPathGraph ec = updatedEcmpSpgMap.get(dstSw);
538 if (ec == null) {
539 log.warn("No graph found for {} .. aborting redoRouting", dstSw);
540 return false;
541 }
542 ec.getAllLearnedSwitchesAndVia().keySet().forEach(key -> {
543 ec.getAllLearnedSwitchesAndVia().get(key).keySet().forEach(target -> {
544 changedRoutes.add(Lists.newArrayList(target, dstSw));
545 });
546 });
547 } else {
548 DeviceId targetSw = route.get(0);
549 DeviceId dstSw = route.get(1);
550 changedRoutes.add(Lists.newArrayList(targetSw, dstSw));
551 }
552 }
553
554 // now process changedRoutes according to edgePairs
555 if (!redoRoutingEdgePairs(edgePairs, subnets, changedRoutes)) {
556 return false; //abort routing and fail fast
557 }
558
559 // whatever is left in changedRoutes is now processed for individual dsts.
Saurav Das6430f412018-01-25 09:49:01 -0800560 Set<DeviceId> updatedDevices = Sets.newHashSet();
561 if (!redoRoutingIndividualDests(subnets, changedRoutes,
562 updatedDevices)) {
Saurav Das261c3002017-06-13 15:35:54 -0700563 return false; //abort routing and fail fast
564 }
565
Saurav Das261c3002017-06-13 15:35:54 -0700566 // update ecmpSPG for all edge-pairs
567 for (EdgePair ep : edgePairs) {
568 currentEcmpSpgMap.put(ep.dev1, updatedEcmpSpgMap.get(ep.dev1));
569 currentEcmpSpgMap.put(ep.dev2, updatedEcmpSpgMap.get(ep.dev2));
570 log.debug("Updating ECMPspg for edge-pair:{}-{}", ep.dev1, ep.dev2);
571 }
Saurav Das6430f412018-01-25 09:49:01 -0800572
573 // here is where we update all devices not touched by this instance
574 updatedEcmpSpgMap.keySet().stream()
575 .filter(devId -> !edgePairs.stream().anyMatch(ep -> ep.includes(devId)))
576 .filter(devId -> !updatedDevices.contains(devId))
577 .forEach(devId -> {
578 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
579 log.debug("Updating ECMPspg for remaining dev:{}", devId);
580 });
Saurav Das261c3002017-06-13 15:35:54 -0700581 return true;
582 }
583
584 /**
585 * Programs targetSw in the changedRoutes for given prefixes reachable by
586 * an edgePair. If no prefixes are given, the method will use configured
587 * subnets/prefixes. If some configured subnets belong only to a specific
588 * destination in the edgePair, then the target switch will be programmed
589 * only to that destination.
590 *
591 * @param edgePairs set of edge-pairs for which target will be programmed
592 * @param subnets a set of prefixes that need to be populated in the routing
593 * table of the target switch in the changedRoutes. Can be null,
594 * in which case all the configured prefixes belonging to the
595 * paired switches will be populated in the target switch
596 * @param changedRoutes a set of route-path changes, where each route-path is
597 * a list with its first element the src-switch (target)
598 * of the path, and the second element the dst-switch of
599 * the path.
600 * @return true if successful
601 */
602 private boolean redoRoutingEdgePairs(Set<EdgePair> edgePairs,
603 Set<IpPrefix> subnets,
604 Set<ArrayList<DeviceId>> changedRoutes) {
605 for (EdgePair ep : edgePairs) {
606 // temp store for a target's changedRoutes to this edge-pair
607 Map<DeviceId, Set<ArrayList<DeviceId>>> targetRoutes = new HashMap<>();
608 Iterator<ArrayList<DeviceId>> i = changedRoutes.iterator();
609 while (i.hasNext()) {
610 ArrayList<DeviceId> route = i.next();
611 DeviceId dstSw = route.get(1);
612 if (ep.includes(dstSw)) {
613 // routeChange for edge pair found
614 // sort by target iff target is edge and remove from changedRoutes
615 DeviceId targetSw = route.get(0);
616 try {
617 if (!srManager.deviceConfiguration.isEdgeDevice(targetSw)) {
618 continue;
619 }
620 } catch (DeviceConfigNotFoundException e) {
621 log.warn(e.getMessage() + "aborting redoRouting");
622 return false;
623 }
624 // route is from another edge to this edge-pair
625 if (targetRoutes.containsKey(targetSw)) {
626 targetRoutes.get(targetSw).add(route);
627 } else {
628 Set<ArrayList<DeviceId>> temp = new HashSet<>();
629 temp.add(route);
630 targetRoutes.put(targetSw, temp);
631 }
632 i.remove();
633 }
634 }
635 // so now for this edgepair we have a per target set of routechanges
636 // process target->edgePair route
637 for (Map.Entry<DeviceId, Set<ArrayList<DeviceId>>> entry :
638 targetRoutes.entrySet()) {
639 log.debug("* redoRoutingDstPair Target:{} -> edge-pair {}",
640 entry.getKey(), ep);
641 DeviceId targetSw = entry.getKey();
642 Map<DeviceId, Set<DeviceId>> perDstNextHops = new HashMap<>();
643 entry.getValue().forEach(route -> {
644 Set<DeviceId> nhops = getNextHops(route.get(0), route.get(1));
645 log.debug("route: target {} -> dst {} found with next-hops {}",
646 route.get(0), route.get(1), nhops);
647 perDstNextHops.put(route.get(1), nhops);
648 });
649 Set<IpPrefix> ipDev1 = (subnets == null) ? config.getSubnets(ep.dev1)
650 : subnets;
651 Set<IpPrefix> ipDev2 = (subnets == null) ? config.getSubnets(ep.dev2)
652 : subnets;
653 ipDev1 = (ipDev1 == null) ? Sets.newHashSet() : ipDev1;
654 ipDev2 = (ipDev2 == null) ? Sets.newHashSet() : ipDev2;
Saurav Das6430f412018-01-25 09:49:01 -0800655 Set<DeviceId> nhDev1 = perDstNextHops.get(ep.dev1);
656 Set<DeviceId> nhDev2 = perDstNextHops.get(ep.dev2);
Saurav Das261c3002017-06-13 15:35:54 -0700657 // handle routing to subnets common to edge-pair
Saurav Das6430f412018-01-25 09:49:01 -0800658 // only if the targetSw is not part of the edge-pair and there
659 // exists a next hop to at least one of the devices in the edge-pair
660 if (!ep.includes(targetSw)
661 && ((nhDev1 != null && !nhDev1.isEmpty())
662 || (nhDev2 != null && !nhDev2.isEmpty()))) {
Saurav Das261c3002017-06-13 15:35:54 -0700663 if (!populateEcmpRoutingRulePartial(
664 targetSw,
665 ep.dev1, ep.dev2,
666 perDstNextHops,
667 Sets.intersection(ipDev1, ipDev2))) {
668 return false; // abort everything and fail fast
669 }
670 }
Saurav Das6430f412018-01-25 09:49:01 -0800671 // handle routing to subnets that only belong to dev1 only if
672 // a next-hop exists from the target to dev1
Saurav Das261c3002017-06-13 15:35:54 -0700673 Set<IpPrefix> onlyDev1Subnets = Sets.difference(ipDev1, ipDev2);
Saurav Das6430f412018-01-25 09:49:01 -0800674 if (!onlyDev1Subnets.isEmpty()
675 && nhDev1 != null && !nhDev1.isEmpty()) {
Saurav Das261c3002017-06-13 15:35:54 -0700676 Map<DeviceId, Set<DeviceId>> onlyDev1NextHops = new HashMap<>();
Saurav Das6430f412018-01-25 09:49:01 -0800677 onlyDev1NextHops.put(ep.dev1, nhDev1);
Saurav Das261c3002017-06-13 15:35:54 -0700678 if (!populateEcmpRoutingRulePartial(
679 targetSw,
680 ep.dev1, null,
681 onlyDev1NextHops,
682 onlyDev1Subnets)) {
683 return false; // abort everything and fail fast
684 }
685 }
Saurav Das6430f412018-01-25 09:49:01 -0800686 // handle routing to subnets that only belong to dev2 only if
687 // a next-hop exists from the target to dev2
Saurav Das261c3002017-06-13 15:35:54 -0700688 Set<IpPrefix> onlyDev2Subnets = Sets.difference(ipDev2, ipDev1);
Saurav Das6430f412018-01-25 09:49:01 -0800689 if (!onlyDev2Subnets.isEmpty()
690 && nhDev2 != null && !nhDev2.isEmpty()) {
Saurav Das261c3002017-06-13 15:35:54 -0700691 Map<DeviceId, Set<DeviceId>> onlyDev2NextHops = new HashMap<>();
Saurav Das6430f412018-01-25 09:49:01 -0800692 onlyDev2NextHops.put(ep.dev2, nhDev2);
Saurav Das261c3002017-06-13 15:35:54 -0700693 if (!populateEcmpRoutingRulePartial(
694 targetSw,
695 ep.dev2, null,
696 onlyDev2NextHops,
697 onlyDev2Subnets)) {
698 return false; // abort everything and fail fast
699 }
700 }
701 }
702 // if it gets here it has succeeded for all targets to this edge-pair
703 }
704 return true;
705 }
706
707 /**
708 * Programs targetSw in the changedRoutes for given prefixes reachable by
709 * a destination switch that is not part of an edge-pair.
710 * If no prefixes are given, the method will use configured subnets/prefixes.
711 *
712 * @param subnets a set of prefixes that need to be populated in the routing
713 * table of the target switch in the changedRoutes. Can be null,
714 * in which case all the configured prefixes belonging to the
715 * paired switches will be populated in the target switch
716 * @param changedRoutes a set of route-path changes, where each route-path is
717 * a list with its first element the src-switch (target)
718 * of the path, and the second element the dst-switch of
719 * the path.
720 * @return true if successful
721 */
722 private boolean redoRoutingIndividualDests(Set<IpPrefix> subnets,
Saurav Das6430f412018-01-25 09:49:01 -0800723 Set<ArrayList<DeviceId>> changedRoutes,
724 Set<DeviceId> updatedDevices) {
Saurav Das261c3002017-06-13 15:35:54 -0700725 // aggregate route-path changes for each dst device
726 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> routesBydevice =
727 new HashMap<>();
728 for (ArrayList<DeviceId> route: changedRoutes) {
729 DeviceId dstSw = route.get(1);
730 ArrayList<ArrayList<DeviceId>> deviceRoutes =
731 routesBydevice.get(dstSw);
732 if (deviceRoutes == null) {
733 deviceRoutes = new ArrayList<>();
734 routesBydevice.put(dstSw, deviceRoutes);
735 }
736 deviceRoutes.add(route);
737 }
738 for (DeviceId impactedDstDevice : routesBydevice.keySet()) {
739 ArrayList<ArrayList<DeviceId>> deviceRoutes =
740 routesBydevice.get(impactedDstDevice);
741 for (ArrayList<DeviceId> route: deviceRoutes) {
742 log.debug("* redoRoutingIndiDst Target: {} -> dst: {}",
743 route.get(0), route.get(1));
744 DeviceId targetSw = route.get(0);
745 DeviceId dstSw = route.get(1); // same as impactedDstDevice
746 Set<DeviceId> nextHops = getNextHops(targetSw, dstSw);
Saurav Das8e46aa72018-01-09 17:38:44 -0800747 if (nextHops.isEmpty()) {
748 log.warn("Could not find next hop from target:{} --> dst {} "
749 + "skipping this route", targetSw, dstSw);
750 continue;
751 }
Saurav Das261c3002017-06-13 15:35:54 -0700752 Map<DeviceId, Set<DeviceId>> nhops = new HashMap<>();
753 nhops.put(dstSw, nextHops);
754 if (!populateEcmpRoutingRulePartial(targetSw, dstSw, null, nhops,
755 (subnets == null) ? Sets.newHashSet() : subnets)) {
756 return false; // abort routing and fail fast
757 }
758 log.debug("Populating flow rules from target: {} to dst: {}"
759 + " is successful", targetSw, dstSw);
760 }
761 //Only if all the flows for all impacted routes to a
762 //specific target are pushed successfully, update the
763 //ECMP graph for that target. Or else the next event
764 //would not see any changes in the ECMP graphs.
765 //In another case, the target switch has gone away, so
766 //routes can't be installed. In that case, the current map
767 //is updated here, without any flows being pushed.
768 currentEcmpSpgMap.put(impactedDstDevice,
769 updatedEcmpSpgMap.get(impactedDstDevice));
Saurav Das6430f412018-01-25 09:49:01 -0800770 updatedDevices.add(impactedDstDevice);
Saurav Das261c3002017-06-13 15:35:54 -0700771 log.debug("Updating ECMPspg for impacted dev:{}", impactedDstDevice);
772 }
773 return true;
774 }
775
776 /**
777 * Populate ECMP rules for subnets from target to destination via nexthops.
778 *
779 * @param targetSw Device ID of target switch in which rules will be programmed
780 * @param destSw1 Device ID of final destination switch to which the rules will forward
781 * @param destSw2 Device ID of paired destination switch to which the rules will forward
782 * A null deviceId indicates packets should only be sent to destSw1
Saurav Das97241862018-02-14 14:14:54 -0800783 * @param nextHops Map of a set of next hops per destSw
Saurav Das261c3002017-06-13 15:35:54 -0700784 * @param subnets Subnets to be populated. If empty, populate all configured subnets.
785 * @return true if it succeeds in populating rules
786 */ // refactor
787 private boolean populateEcmpRoutingRulePartial(DeviceId targetSw,
788 DeviceId destSw1,
789 DeviceId destSw2,
790 Map<DeviceId, Set<DeviceId>> nextHops,
791 Set<IpPrefix> subnets) {
792 boolean result;
793 // If both target switch and dest switch are edge routers, then set IP
794 // rule for both subnet and router IP.
795 boolean targetIsEdge;
796 boolean dest1IsEdge;
797 Ip4Address dest1RouterIpv4, dest2RouterIpv4 = null;
798 Ip6Address dest1RouterIpv6, dest2RouterIpv6 = null;
799
800 try {
801 targetIsEdge = config.isEdgeDevice(targetSw);
802 dest1IsEdge = config.isEdgeDevice(destSw1);
803 dest1RouterIpv4 = config.getRouterIpv4(destSw1);
804 dest1RouterIpv6 = config.getRouterIpv6(destSw1);
805 if (destSw2 != null) {
806 dest2RouterIpv4 = config.getRouterIpv4(destSw2);
807 dest2RouterIpv6 = config.getRouterIpv6(destSw2);
808 }
809 } catch (DeviceConfigNotFoundException e) {
810 log.warn(e.getMessage() + " Aborting populateEcmpRoutingRulePartial.");
Saurav Das62ae6792017-05-15 15:34:25 -0700811 return false;
812 }
Saurav Das261c3002017-06-13 15:35:54 -0700813
814 if (targetIsEdge && dest1IsEdge) {
815 subnets = (subnets != null && !subnets.isEmpty())
816 ? Sets.newHashSet(subnets)
817 : Sets.newHashSet(config.getSubnets(destSw1));
Saurav Das97241862018-02-14 14:14:54 -0800818 // XXX - Rethink this - ignoring routerIPs in all other switches
819 // even edge to edge switches
Saurav Das261c3002017-06-13 15:35:54 -0700820 /*subnets.add(dest1RouterIpv4.toIpPrefix());
821 if (dest1RouterIpv6 != null) {
822 subnets.add(dest1RouterIpv6.toIpPrefix());
823 }
824 if (destSw2 != null && dest2RouterIpv4 != null) {
825 subnets.add(dest2RouterIpv4.toIpPrefix());
826 if (dest2RouterIpv6 != null) {
827 subnets.add(dest2RouterIpv6.toIpPrefix());
828 }
829 }*/
830 log.debug(". populateEcmpRoutingRulePartial in device {} towards {} {} "
831 + "for subnets {}", targetSw, destSw1,
832 (destSw2 != null) ? ("& " + destSw2) : "",
833 subnets);
834 result = rulePopulator.populateIpRuleForSubnet(targetSw, subnets,
835 destSw1, destSw2,
836 nextHops);
837 if (!result) {
838 return false;
839 }
Saurav Das62ae6792017-05-15 15:34:25 -0700840 }
Saurav Das261c3002017-06-13 15:35:54 -0700841
842 if (!targetIsEdge && dest1IsEdge) {
843 // MPLS rules in all non-edge target devices. These rules are for
844 // individual destinations, even if the dsts are part of edge-pairs.
845 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
846 + "all MPLS rules", targetSw, destSw1);
847 result = rulePopulator.populateMplsRule(targetSw, destSw1,
848 nextHops.get(destSw1),
849 dest1RouterIpv4);
850 if (!result) {
851 return false;
852 }
853 if (dest1RouterIpv6 != null) {
Saurav Das97241862018-02-14 14:14:54 -0800854 int v4sid = 0, v6sid = 0;
855 try {
856 v4sid = config.getIPv4SegmentId(destSw1);
857 v6sid = config.getIPv6SegmentId(destSw1);
858 } catch (DeviceConfigNotFoundException e) {
859 log.warn(e.getMessage());
860 }
861 if (v4sid != v6sid) {
862 result = rulePopulator.populateMplsRule(targetSw, destSw1,
863 nextHops.get(destSw1),
864 dest1RouterIpv6);
865 if (!result) {
866 return false;
867 }
Saurav Das261c3002017-06-13 15:35:54 -0700868 }
869 }
870 }
871
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -0700872 if (!targetIsEdge && !dest1IsEdge) {
873 // MPLS rules for inter-connected spines
874 // can be merged with above if, left it here for clarity
875 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
876 + "all MPLS rules", targetSw, destSw1);
877
878 result = rulePopulator.populateMplsRule(targetSw, destSw1,
879 nextHops.get(destSw1),
880 dest1RouterIpv4);
881 if (!result) {
882 return false;
883 }
884
885 if (dest1RouterIpv6 != null) {
886 int v4sid = 0, v6sid = 0;
887 try {
888 v4sid = config.getIPv4SegmentId(destSw1);
889 v6sid = config.getIPv6SegmentId(destSw1);
890 } catch (DeviceConfigNotFoundException e) {
891 log.warn(e.getMessage());
892 }
893 if (v4sid != v6sid) {
894 result = rulePopulator.populateMplsRule(targetSw, destSw1,
895 nextHops.get(destSw1),
896 dest1RouterIpv6);
897 if (!result) {
898 return false;
899 }
900 }
901 }
902 }
903
904
Saurav Das261c3002017-06-13 15:35:54 -0700905 // To save on ECMP groups
906 // avoid MPLS rules in non-edge-devices to non-edge-devices
907 // avoid MPLS transit rules in edge-devices
908 // avoid loopback IP rules in edge-devices to non-edge-devices
909 return true;
Saurav Das62ae6792017-05-15 15:34:25 -0700910 }
911
912 /**
913 * Processes a set a route-path changes by editing hash groups.
914 *
915 * @param routeChanges a set of route-path changes, where each route-path is
916 * a list with its first element the src-switch of the path
917 * and the second element the dst-switch of the path.
918 * @param linkOrSwitchFailed true if the route changes are for a failed
919 * switch or linkDown event
920 * @param failedSwitch the switchId if the route changes are for a failed switch,
921 * otherwise null
922 */
923 private void processHashGroupChange(Set<ArrayList<DeviceId>> routeChanges,
924 boolean linkOrSwitchFailed,
925 DeviceId failedSwitch) {
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700926 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
927 // first, ensure each routeChanges entry has two elements
Saurav Das62ae6792017-05-15 15:34:25 -0700928 for (ArrayList<DeviceId> route : routeChanges) {
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700929 if (route.size() == 1) {
930 // route-path changes are from everyone else to this switch
931 DeviceId dstSw = route.get(0);
932 srManager.deviceService.getAvailableDevices().forEach(sw -> {
933 if (!sw.id().equals(dstSw)) {
934 changedRoutes.add(Lists.newArrayList(sw.id(), dstSw));
935 }
936 });
937 } else {
938 changedRoutes.add(route);
Saurav Das62ae6792017-05-15 15:34:25 -0700939 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700940 }
Saurav Das6430f412018-01-25 09:49:01 -0800941 boolean someFailed = false;
942 Set<DeviceId> updatedDevices = Sets.newHashSet();
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700943 for (ArrayList<DeviceId> route : changedRoutes) {
944 DeviceId targetSw = route.get(0);
945 DeviceId dstSw = route.get(1);
Saurav Das62ae6792017-05-15 15:34:25 -0700946 if (linkOrSwitchFailed) {
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700947 boolean success = fixHashGroupsForRoute(route, true);
Saurav Das62ae6792017-05-15 15:34:25 -0700948 // it's possible that we cannot fix hash groups for a route
949 // if the target switch has failed. Nevertheless the ecmp graph
950 // for the impacted switch must still be updated.
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700951 if (!success && failedSwitch != null && targetSw.equals(failedSwitch)) {
Saurav Das62ae6792017-05-15 15:34:25 -0700952 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
953 currentEcmpSpgMap.remove(targetSw);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700954 log.debug("Updating ECMPspg for dst:{} removing failed switch "
Saurav Das62ae6792017-05-15 15:34:25 -0700955 + "target:{}", dstSw, targetSw);
Saurav Das6430f412018-01-25 09:49:01 -0800956 updatedDevices.add(targetSw);
957 updatedDevices.add(dstSw);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700958 continue;
Saurav Das62ae6792017-05-15 15:34:25 -0700959 }
960 //linkfailed - update both sides
Saurav Das62ae6792017-05-15 15:34:25 -0700961 if (success) {
962 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700963 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
Saurav Das6430f412018-01-25 09:49:01 -0800964 log.debug("Updating ECMPspg for dst:{} and target:{} for linkdown"
965 + " or switchdown", dstSw, targetSw);
966 updatedDevices.add(targetSw);
967 updatedDevices.add(dstSw);
968 } else {
969 someFailed = true;
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700970 }
971 } else {
972 //linkup of seen before link
973 boolean success = fixHashGroupsForRoute(route, false);
974 if (success) {
975 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
976 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
977 log.debug("Updating ECMPspg for target:{} and dst:{} for linkup",
Saurav Das62ae6792017-05-15 15:34:25 -0700978 targetSw, dstSw);
Saurav Das6430f412018-01-25 09:49:01 -0800979 updatedDevices.add(targetSw);
980 updatedDevices.add(dstSw);
981 } else {
982 someFailed = true;
Saurav Das62ae6792017-05-15 15:34:25 -0700983 }
984 }
985 }
Saurav Das6430f412018-01-25 09:49:01 -0800986 if (!someFailed) {
987 // here is where we update all devices not touched by this instance
988 updatedEcmpSpgMap.keySet().stream()
989 .filter(devId -> !updatedDevices.contains(devId))
990 .forEach(devId -> {
991 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
992 log.debug("Updating ECMPspg for remaining dev:{}", devId);
993 });
994 }
Saurav Das62ae6792017-05-15 15:34:25 -0700995 }
996
997 /**
998 * Edits hash groups in the src-switch (targetSw) of a route-path by
999 * calling the groupHandler to either add or remove buckets in an existing
1000 * hash group.
1001 *
1002 * @param route a single list representing a route-path where the first element
1003 * is the src-switch (targetSw) of the route-path and the
1004 * second element is the dst-switch
1005 * @param revoke true if buckets in the hash-groups need to be removed;
1006 * false if buckets in the hash-groups need to be added
1007 * @return true if the hash group editing is successful
1008 */
1009 private boolean fixHashGroupsForRoute(ArrayList<DeviceId> route,
1010 boolean revoke) {
1011 DeviceId targetSw = route.get(0);
1012 if (route.size() < 2) {
1013 log.warn("Cannot fixHashGroupsForRoute - no dstSw in route {}", route);
1014 return false;
1015 }
1016 DeviceId destSw = route.get(1);
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001017 log.debug("* processing fixHashGroupsForRoute: Target {} -> Dest {}",
Saurav Das62ae6792017-05-15 15:34:25 -07001018 targetSw, destSw);
Saurav Das62ae6792017-05-15 15:34:25 -07001019 // figure out the new next hops at the targetSw towards the destSw
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001020 Set<DeviceId> nextHops = getNextHops(targetSw, destSw);
Saurav Das62ae6792017-05-15 15:34:25 -07001021 // call group handler to change hash group at targetSw
1022 DefaultGroupHandler grpHandler = srManager.getGroupHandler(targetSw);
1023 if (grpHandler == null) {
1024 log.warn("Cannot find grouphandler for dev:{} .. aborting"
1025 + " {} hash group buckets for route:{} ", targetSw,
1026 (revoke) ? "revoke" : "repopulate", route);
1027 return false;
1028 }
1029 log.debug("{} hash-groups buckets For Route {} -> {} to next-hops {}",
1030 (revoke) ? "revoke" : "repopulating",
1031 targetSw, destSw, nextHops);
1032 return (revoke) ? grpHandler.fixHashGroups(targetSw, nextHops,
1033 destSw, true)
1034 : grpHandler.fixHashGroups(targetSw, nextHops,
1035 destSw, false);
1036 }
1037
1038 /**
Saurav Das261c3002017-06-13 15:35:54 -07001039 * Start the flow rule population process if it was never started. The
1040 * process finishes successfully when all flow rules are set and stops with
1041 * ABORTED status when any groups required for flows is not set yet.
Saurav Das62ae6792017-05-15 15:34:25 -07001042 */
Saurav Das261c3002017-06-13 15:35:54 -07001043 public void startPopulationProcess() {
1044 statusLock.lock();
1045 try {
1046 if (populationStatus == Status.IDLE
1047 || populationStatus == Status.SUCCEEDED
1048 || populationStatus == Status.ABORTED) {
1049 populateAllRoutingRules();
sangho28d0b6d2015-05-07 13:30:57 -07001050 } else {
Saurav Das261c3002017-06-13 15:35:54 -07001051 log.warn("Not initiating startPopulationProcess as populationStatus is {}",
1052 populationStatus);
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001053 }
Saurav Das261c3002017-06-13 15:35:54 -07001054 } finally {
1055 statusLock.unlock();
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001056 }
sanghofb7c7292015-04-13 15:15:58 -07001057 }
1058
Saurav Dasb149be12016-06-07 10:08:06 -07001059 /**
Saurav Das261c3002017-06-13 15:35:54 -07001060 * Revoke rules of given subnet in all edge switches.
1061 *
1062 * @param subnets subnet being removed
1063 * @return true if succeed
1064 */
1065 protected boolean revokeSubnet(Set<IpPrefix> subnets) {
1066 statusLock.lock();
1067 try {
1068 return srManager.routingRulePopulator.revokeIpRuleForSubnet(subnets);
1069 } finally {
1070 statusLock.unlock();
1071 }
1072 }
1073
1074 /**
Charles Chan910be6a2017-08-23 14:46:43 -07001075 * Populates IP rules for a route that has direct connection to the switch
1076 * if the current instance is the master of the switch.
1077 *
1078 * @param deviceId device ID of the device that next hop attaches to
1079 * @param prefix IP prefix of the route
1080 * @param hostMac MAC address of the next hop
1081 * @param hostVlanId Vlan ID of the nexthop
1082 * @param outPort port where the next hop attaches to
1083 */
1084 void populateRoute(DeviceId deviceId, IpPrefix prefix,
1085 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort) {
1086 if (srManager.mastershipService.isLocalMaster(deviceId)) {
1087 srManager.routingRulePopulator.populateRoute(deviceId, prefix, hostMac, hostVlanId, outPort);
1088 }
1089 }
1090
1091 /**
1092 * Removes IP rules for a route when the next hop is gone.
1093 * if the current instance is the master of the switch.
1094 *
1095 * @param deviceId device ID of the device that next hop attaches to
1096 * @param prefix IP prefix of the route
1097 * @param hostMac MAC address of the next hop
1098 * @param hostVlanId Vlan ID of the nexthop
1099 * @param outPort port that next hop attaches to
1100 */
1101 void revokeRoute(DeviceId deviceId, IpPrefix prefix,
1102 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort) {
1103 if (srManager.mastershipService.isLocalMaster(deviceId)) {
1104 srManager.routingRulePopulator.revokeRoute(deviceId, prefix, hostMac, hostVlanId, outPort);
1105 }
1106 }
1107
1108 /**
Saurav Das261c3002017-06-13 15:35:54 -07001109 * Remove ECMP graph entry for the given device. Typically called when
1110 * device is no longer available.
1111 *
1112 * @param deviceId the device for which graphs need to be purged
1113 */
1114 protected void purgeEcmpGraph(DeviceId deviceId) {
Saurav Das6430f412018-01-25 09:49:01 -08001115 statusLock.lock();
1116 try {
1117
1118 if (populationStatus == Status.STARTED) {
1119 log.warn("Previous rule population is not finished. Cannot"
1120 + " proceeed with purgeEcmpGraph for {}", deviceId);
1121 return;
1122 }
1123 log.debug("Updating ECMPspg for unavailable dev:{}", deviceId);
1124 currentEcmpSpgMap.remove(deviceId);
1125 if (updatedEcmpSpgMap != null) {
1126 updatedEcmpSpgMap.remove(deviceId);
1127 }
1128 } finally {
1129 statusLock.unlock();
Saurav Das261c3002017-06-13 15:35:54 -07001130 }
1131 }
1132
1133 //////////////////////////////////////
1134 // Routing helper methods and classes
1135 //////////////////////////////////////
1136
1137 /**
Saurav Das1b391d52016-11-29 14:27:25 -08001138 * Computes set of affected routes due to failed link. Assumes
Saurav Dasb149be12016-06-07 10:08:06 -07001139 * previous ecmp shortest-path graph exists for a switch in order to compute
1140 * affected routes. If such a graph does not exist, the method returns null.
1141 *
1142 * @param linkFail the failed link
1143 * @return the set of affected routes which may be empty if no routes were
1144 * affected, or null if no previous ecmp spg was found for comparison
1145 */
sanghofb7c7292015-04-13 15:15:58 -07001146 private Set<ArrayList<DeviceId>> computeDamagedRoutes(Link linkFail) {
sanghofb7c7292015-04-13 15:15:58 -07001147 Set<ArrayList<DeviceId>> routes = new HashSet<>();
1148
1149 for (Device sw : srManager.deviceService.getDevices()) {
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001150 log.debug("Computing the impacted routes for device {} due to link fail",
1151 sw.id());
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001152 DeviceId retId = shouldHandleRouting(sw.id());
1153 if (retId == null) {
sanghofb7c7292015-04-13 15:15:58 -07001154 continue;
1155 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001156 Set<DeviceId> devicesToProcess = Sets.newHashSet(retId, sw.id());
1157 for (DeviceId rootSw : devicesToProcess) {
1158 EcmpShortestPathGraph ecmpSpg = currentEcmpSpgMap.get(rootSw);
1159 if (ecmpSpg == null) {
1160 log.warn("No existing ECMP graph for switch {}. Aborting optimized"
1161 + " rerouting and opting for full-reroute", rootSw);
1162 return null;
1163 }
1164 if (log.isDebugEnabled()) {
1165 log.debug("Root switch: {}", rootSw);
1166 log.debug(" Current/Existing SPG: {}", ecmpSpg);
1167 log.debug(" New/Updated SPG: {}", updatedEcmpSpgMap.get(rootSw));
1168 }
1169 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>>
1170 switchVia = ecmpSpg.getAllLearnedSwitchesAndVia();
1171 // figure out if the broken link affected any route-paths in this graph
1172 for (Integer itrIdx : switchVia.keySet()) {
1173 log.trace("Current/Exiting SPG Iterindex# {}", itrIdx);
1174 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1175 switchVia.get(itrIdx);
1176 for (DeviceId targetSw : swViaMap.keySet()) {
1177 log.trace("TargetSwitch {} --> RootSwitch {}",
1178 targetSw, rootSw);
Saurav Dasb149be12016-06-07 10:08:06 -07001179 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1180 log.trace(" Via:");
Pier Ventreadb4ae62016-11-23 09:57:42 -08001181 via.forEach(e -> log.trace(" {}", e));
Saurav Dasb149be12016-06-07 10:08:06 -07001182 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001183 Set<ArrayList<DeviceId>> subLinks =
1184 computeLinks(targetSw, rootSw, swViaMap);
1185 for (ArrayList<DeviceId> alink: subLinks) {
1186 if ((alink.get(0).equals(linkFail.src().deviceId()) &&
1187 alink.get(1).equals(linkFail.dst().deviceId()))
1188 ||
1189 (alink.get(0).equals(linkFail.dst().deviceId()) &&
1190 alink.get(1).equals(linkFail.src().deviceId()))) {
1191 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1192 ArrayList<DeviceId> aRoute = new ArrayList<>();
1193 aRoute.add(targetSw); // switch with rules to populate
1194 aRoute.add(rootSw); // towards this destination
1195 routes.add(aRoute);
1196 break;
1197 }
sanghofb7c7292015-04-13 15:15:58 -07001198 }
1199 }
1200 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001201
sanghofb7c7292015-04-13 15:15:58 -07001202 }
sangho28d0b6d2015-05-07 13:30:57 -07001203
sanghofb7c7292015-04-13 15:15:58 -07001204 }
sanghofb7c7292015-04-13 15:15:58 -07001205 return routes;
1206 }
1207
Saurav Das1b391d52016-11-29 14:27:25 -08001208 /**
1209 * Computes set of affected routes due to new links or failed switches.
1210 *
Saurav Dasdc7f2752018-03-18 21:28:15 -07001211 * @param failedSwitch deviceId of failed switch if any
Saurav Das1b391d52016-11-29 14:27:25 -08001212 * @return the set of affected routes which may be empty if no routes were
1213 * affected
1214 */
Saurav Dascea556f2018-03-05 14:37:16 -08001215 private Set<ArrayList<DeviceId>> computeRouteChange(DeviceId failedSwitch) {
Saurav Das261c3002017-06-13 15:35:54 -07001216 ImmutableSet.Builder<ArrayList<DeviceId>> changedRtBldr =
Saurav Das1b391d52016-11-29 14:27:25 -08001217 ImmutableSet.builder();
sanghofb7c7292015-04-13 15:15:58 -07001218
1219 for (Device sw : srManager.deviceService.getDevices()) {
Saurav Das261c3002017-06-13 15:35:54 -07001220 log.debug("Computing the impacted routes for device {}", sw.id());
1221 DeviceId retId = shouldHandleRouting(sw.id());
1222 if (retId == null) {
sanghofb7c7292015-04-13 15:15:58 -07001223 continue;
1224 }
Saurav Das261c3002017-06-13 15:35:54 -07001225 Set<DeviceId> devicesToProcess = Sets.newHashSet(retId, sw.id());
1226 for (DeviceId rootSw : devicesToProcess) {
1227 if (log.isTraceEnabled()) {
1228 log.trace("Device links for dev: {}", rootSw);
1229 for (Link link: srManager.linkService.getDeviceLinks(rootSw)) {
1230 log.trace("{} -> {} ", link.src().deviceId(),
1231 link.dst().deviceId());
1232 }
Saurav Dasb149be12016-06-07 10:08:06 -07001233 }
Saurav Das261c3002017-06-13 15:35:54 -07001234 EcmpShortestPathGraph currEcmpSpg = currentEcmpSpgMap.get(rootSw);
1235 if (currEcmpSpg == null) {
1236 log.debug("No existing ECMP graph for device {}.. adding self as "
1237 + "changed route", rootSw);
1238 changedRtBldr.add(Lists.newArrayList(rootSw));
1239 continue;
1240 }
1241 EcmpShortestPathGraph newEcmpSpg = updatedEcmpSpgMap.get(rootSw);
1242 if (log.isDebugEnabled()) {
1243 log.debug("Root switch: {}", rootSw);
1244 log.debug(" Current/Existing SPG: {}", currEcmpSpg);
1245 log.debug(" New/Updated SPG: {}", newEcmpSpg);
1246 }
1247 // first use the updated/new map to compare to current/existing map
1248 // as new links may have come up
1249 changedRtBldr.addAll(compareGraphs(newEcmpSpg, currEcmpSpg, rootSw));
1250 // then use the current/existing map to compare to updated/new map
1251 // as switch may have been removed
1252 changedRtBldr.addAll(compareGraphs(currEcmpSpg, newEcmpSpg, rootSw));
sangho28d0b6d2015-05-07 13:30:57 -07001253 }
Saurav Das1b391d52016-11-29 14:27:25 -08001254 }
sanghofb7c7292015-04-13 15:15:58 -07001255
Saurav Dascea556f2018-03-05 14:37:16 -08001256 // handle clearing state for a failed switch in case the switch does
1257 // not have a pair, or the pair is not available
1258 if (failedSwitch != null) {
1259 DeviceId pairDev = getPairDev(failedSwitch);
1260 if (pairDev == null || !srManager.deviceService.isAvailable(pairDev)) {
1261 log.debug("Proxy Route changes to downed Sw:{}", failedSwitch);
1262 srManager.deviceService.getDevices().forEach(dev -> {
1263 if (!dev.id().equals(failedSwitch) &&
1264 srManager.mastershipService.isLocalMaster(dev.id())) {
1265 log.debug(" : {}", dev.id());
1266 changedRtBldr.add(Lists.newArrayList(dev.id(), failedSwitch));
1267 }
1268 });
1269 }
1270 }
1271
Saurav Das261c3002017-06-13 15:35:54 -07001272 Set<ArrayList<DeviceId>> changedRoutes = changedRtBldr.build();
Saurav Das1b391d52016-11-29 14:27:25 -08001273 for (ArrayList<DeviceId> route: changedRoutes) {
1274 log.debug("Route changes Target -> Root");
1275 if (route.size() == 1) {
1276 log.debug(" : all -> {}", route.get(0));
1277 } else {
1278 log.debug(" : {} -> {}", route.get(0), route.get(1));
1279 }
1280 }
1281 return changedRoutes;
1282 }
1283
1284 /**
1285 * For the root switch, searches all the target nodes reachable in the base
1286 * graph, and compares paths to the ones in the comp graph.
1287 *
1288 * @param base the graph that is indexed for all reachable target nodes
1289 * from the root node
1290 * @param comp the graph that the base graph is compared to
1291 * @param rootSw both ecmp graphs are calculated for the root node
1292 * @return all the routes that have changed in the base graph
1293 */
1294 private Set<ArrayList<DeviceId>> compareGraphs(EcmpShortestPathGraph base,
1295 EcmpShortestPathGraph comp,
1296 DeviceId rootSw) {
1297 ImmutableSet.Builder<ArrayList<DeviceId>> changedRoutesBuilder =
1298 ImmutableSet.builder();
1299 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> baseMap =
1300 base.getAllLearnedSwitchesAndVia();
1301 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> compMap =
1302 comp.getAllLearnedSwitchesAndVia();
1303 for (Integer itrIdx : baseMap.keySet()) {
1304 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> baseViaMap =
1305 baseMap.get(itrIdx);
1306 for (DeviceId targetSw : baseViaMap.keySet()) {
1307 ArrayList<ArrayList<DeviceId>> basePath = baseViaMap.get(targetSw);
1308 ArrayList<ArrayList<DeviceId>> compPath = getVia(compMap, targetSw);
1309 if ((compPath == null) || !basePath.equals(compPath)) {
Saurav Das62ae6792017-05-15 15:34:25 -07001310 log.trace("Impacted route:{} -> {}", targetSw, rootSw);
Saurav Das1b391d52016-11-29 14:27:25 -08001311 ArrayList<DeviceId> route = new ArrayList<>();
Saurav Das261c3002017-06-13 15:35:54 -07001312 route.add(targetSw); // switch with rules to populate
1313 route.add(rootSw); // towards this destination
Saurav Das1b391d52016-11-29 14:27:25 -08001314 changedRoutesBuilder.add(route);
sanghofb7c7292015-04-13 15:15:58 -07001315 }
1316 }
sangho28d0b6d2015-05-07 13:30:57 -07001317 }
Saurav Das1b391d52016-11-29 14:27:25 -08001318 return changedRoutesBuilder.build();
sanghofb7c7292015-04-13 15:15:58 -07001319 }
1320
Saurav Das261c3002017-06-13 15:35:54 -07001321 /**
1322 * Returns the ECMP paths traversed to reach the target switch.
1323 *
1324 * @param switchVia a per-iteration view of the ECMP graph for a root switch
1325 * @param targetSw the switch to reach from the root switch
1326 * @return the nodes traversed on ECMP paths to the target switch
1327 */
sanghofb7c7292015-04-13 15:15:58 -07001328 private ArrayList<ArrayList<DeviceId>> getVia(HashMap<Integer, HashMap<DeviceId,
Saurav Das1b391d52016-11-29 14:27:25 -08001329 ArrayList<ArrayList<DeviceId>>>> switchVia, DeviceId targetSw) {
sanghofb7c7292015-04-13 15:15:58 -07001330 for (Integer itrIdx : switchVia.keySet()) {
1331 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1332 switchVia.get(itrIdx);
Saurav Das1b391d52016-11-29 14:27:25 -08001333 if (swViaMap.get(targetSw) == null) {
sanghofb7c7292015-04-13 15:15:58 -07001334 continue;
1335 } else {
Saurav Das1b391d52016-11-29 14:27:25 -08001336 return swViaMap.get(targetSw);
sanghofb7c7292015-04-13 15:15:58 -07001337 }
1338 }
1339
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001340 return null;
sanghofb7c7292015-04-13 15:15:58 -07001341 }
1342
Saurav Das261c3002017-06-13 15:35:54 -07001343 /**
1344 * Utility method to break down a path from src to dst device into a collection
1345 * of links.
1346 *
1347 * @param src src device of the path
1348 * @param dst dst device of the path
1349 * @param viaMap path taken from src to dst device
1350 * @return collection of links in the path
1351 */
sanghofb7c7292015-04-13 15:15:58 -07001352 private Set<ArrayList<DeviceId>> computeLinks(DeviceId src,
1353 DeviceId dst,
1354 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> viaMap) {
1355 Set<ArrayList<DeviceId>> subLinks = Sets.newHashSet();
1356 for (ArrayList<DeviceId> via : viaMap.get(src)) {
1357 DeviceId linkSrc = src;
1358 DeviceId linkDst = dst;
1359 for (DeviceId viaDevice: via) {
1360 ArrayList<DeviceId> link = new ArrayList<>();
1361 linkDst = viaDevice;
1362 link.add(linkSrc);
1363 link.add(linkDst);
1364 subLinks.add(link);
1365 linkSrc = viaDevice;
1366 }
1367 ArrayList<DeviceId> link = new ArrayList<>();
1368 link.add(linkSrc);
1369 link.add(dst);
1370 subLinks.add(link);
1371 }
1372
1373 return subLinks;
1374 }
1375
Charles Chanc22cef32016-04-29 14:38:22 -07001376 /**
Saurav Das261c3002017-06-13 15:35:54 -07001377 * Determines whether this controller instance should handle routing for the
1378 * given {@code deviceId}, based on mastership and pairDeviceId if one exists.
1379 * Returns null if this instance should not handle routing for given {@code deviceId}.
1380 * Otherwise the returned value could be the given deviceId itself, or the
1381 * deviceId for the paired edge device. In the latter case, this instance
1382 * should handle routing for both the given device and the paired device.
Charles Chanc22cef32016-04-29 14:38:22 -07001383 *
Saurav Das261c3002017-06-13 15:35:54 -07001384 * @param deviceId device identifier to consider for routing
1385 * @return null or deviceId which could be the same as the given deviceId
1386 * or the deviceId of a paired edge device
Charles Chanc22cef32016-04-29 14:38:22 -07001387 */
Saurav Das261c3002017-06-13 15:35:54 -07001388 private DeviceId shouldHandleRouting(DeviceId deviceId) {
1389 if (!srManager.mastershipService.isLocalMaster(deviceId)) {
1390 log.debug("Not master for dev:{} .. skipping routing, may get handled "
1391 + "elsewhere as part of paired devices", deviceId);
1392 return null;
1393 }
1394 NodeId myNode = srManager.mastershipService.getMasterFor(deviceId);
1395 DeviceId pairDev = getPairDev(deviceId);
sangho80f11cb2015-04-01 13:05:26 -07001396
Saurav Das261c3002017-06-13 15:35:54 -07001397 if (pairDev != null) {
1398 if (!srManager.deviceService.isAvailable(pairDev)) {
Saurav Dascea556f2018-03-05 14:37:16 -08001399 log.warn("pairedDev {} not available .. routing both this dev:{} "
1400 + "and pair without mastership check for pair",
Saurav Das261c3002017-06-13 15:35:54 -07001401 pairDev, deviceId);
1402 return pairDev; // handle both temporarily
1403 }
1404 NodeId pairMasterNode = srManager.mastershipService.getMasterFor(pairDev);
1405 if (myNode.compareTo(pairMasterNode) <= 0) {
1406 log.debug("Handling routing for both dev:{} pair-dev:{}; myNode: {}"
1407 + " pairMaster:{} compare:{}", deviceId, pairDev,
1408 myNode, pairMasterNode,
1409 myNode.compareTo(pairMasterNode));
1410 return pairDev; // handle both
1411 } else {
1412 log.debug("PairDev node: {} should handle routing for dev:{} and "
1413 + "pair-dev:{}", pairMasterNode, deviceId, pairDev);
1414 return null; // handle neither
sangho80f11cb2015-04-01 13:05:26 -07001415 }
1416 }
Saurav Das261c3002017-06-13 15:35:54 -07001417 return deviceId; // not paired, just handle given device
sangho80f11cb2015-04-01 13:05:26 -07001418 }
1419
Charles Chanc22cef32016-04-29 14:38:22 -07001420 /**
Saurav Das261c3002017-06-13 15:35:54 -07001421 * Returns the configured paired DeviceId for the given Device, or null
1422 * if no such paired device has been configured.
Charles Chanc22cef32016-04-29 14:38:22 -07001423 *
Saurav Das261c3002017-06-13 15:35:54 -07001424 * @param deviceId
1425 * @return configured pair deviceId or null
Charles Chanc22cef32016-04-29 14:38:22 -07001426 */
Saurav Das261c3002017-06-13 15:35:54 -07001427 private DeviceId getPairDev(DeviceId deviceId) {
1428 DeviceId pairDev;
Charles Chan319d1a22015-11-03 10:42:14 -08001429 try {
Saurav Das261c3002017-06-13 15:35:54 -07001430 pairDev = srManager.deviceConfiguration.getPairDeviceId(deviceId);
Charles Chan319d1a22015-11-03 10:42:14 -08001431 } catch (DeviceConfigNotFoundException e) {
Saurav Das261c3002017-06-13 15:35:54 -07001432 log.warn(e.getMessage() + " .. cannot continue routing for dev: {}");
1433 return null;
Charles Chan319d1a22015-11-03 10:42:14 -08001434 }
Saurav Das261c3002017-06-13 15:35:54 -07001435 return pairDev;
sangho80f11cb2015-04-01 13:05:26 -07001436 }
1437
1438 /**
Saurav Das261c3002017-06-13 15:35:54 -07001439 * Returns the set of deviceIds which are the next hops from the targetSw
1440 * to the dstSw according to the latest ECMP spg.
1441 *
1442 * @param targetSw the switch for which the next-hops are desired
1443 * @param dstSw the switch to which the next-hops lead to from the targetSw
1444 * @return set of next hop deviceIds, could be empty if no next hops are found
1445 */
1446 private Set<DeviceId> getNextHops(DeviceId targetSw, DeviceId dstSw) {
1447 boolean targetIsEdge = false;
1448 try {
1449 targetIsEdge = srManager.deviceConfiguration.isEdgeDevice(targetSw);
1450 } catch (DeviceConfigNotFoundException e) {
1451 log.warn(e.getMessage() + "Cannot determine if targetIsEdge {}.. "
1452 + "continuing to getNextHops", targetSw);
1453 }
1454
1455 EcmpShortestPathGraph ecmpSpg = updatedEcmpSpgMap.get(dstSw);
1456 if (ecmpSpg == null) {
1457 log.debug("No ecmpSpg found for dstSw: {}", dstSw);
1458 return ImmutableSet.of();
1459 }
1460 HashMap<Integer,
1461 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> switchVia =
1462 ecmpSpg.getAllLearnedSwitchesAndVia();
1463 for (Integer itrIdx : switchVia.keySet()) {
1464 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1465 switchVia.get(itrIdx);
1466 for (DeviceId target : swViaMap.keySet()) {
1467 if (!target.equals(targetSw)) {
1468 continue;
1469 }
1470 if (!targetIsEdge && itrIdx > 1) {
Saurav Das97241862018-02-14 14:14:54 -08001471 // optimization for spines to not use leaves to get
1472 // to a spine or other leaves
1473 boolean pathdevIsEdge = false;
1474 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1475 for (DeviceId pathdev : via) {
1476 try {
1477 pathdevIsEdge = srManager.deviceConfiguration
1478 .isEdgeDevice(pathdev);
1479 } catch (DeviceConfigNotFoundException e) {
1480 log.warn(e.getMessage());
1481 }
1482 if (pathdevIsEdge) {
1483 log.debug("Avoiding {} hop path for non-edge targetSw:{}"
1484 + " --> dstSw:{} which goes through an edge"
1485 + " device {} in path {}", itrIdx,
1486 targetSw, dstSw, pathdev, via);
1487 return ImmutableSet.of();
1488 }
1489 }
1490 }
Saurav Das261c3002017-06-13 15:35:54 -07001491 }
1492 Set<DeviceId> nextHops = new HashSet<>();
1493 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1494 if (via.isEmpty()) {
1495 // the dstSw is the next-hop from the targetSw
1496 nextHops.add(dstSw);
1497 } else {
1498 // first elem is next-hop in each ECMP path
1499 nextHops.add(via.get(0));
1500 }
1501 }
1502 return nextHops;
1503 }
1504 }
1505 return ImmutableSet.of(); //no next-hops found
1506 }
1507
1508 /**
1509 * Represents two devices that are paired by configuration. An EdgePair for
1510 * (dev1, dev2) is the same as as EdgePair for (dev2, dev1)
1511 */
1512 protected final class EdgePair {
1513 DeviceId dev1;
1514 DeviceId dev2;
1515
1516 EdgePair(DeviceId dev1, DeviceId dev2) {
1517 this.dev1 = dev1;
1518 this.dev2 = dev2;
1519 }
1520
1521 boolean includes(DeviceId dev) {
1522 return dev1.equals(dev) || dev2.equals(dev);
1523 }
1524
1525 @Override
1526 public boolean equals(Object o) {
1527 if (this == o) {
1528 return true;
1529 }
1530 if (!(o instanceof EdgePair)) {
1531 return false;
1532 }
1533 EdgePair that = (EdgePair) o;
1534 return ((this.dev1.equals(that.dev1) && this.dev2.equals(that.dev2)) ||
1535 (this.dev1.equals(that.dev2) && this.dev2.equals(that.dev1)));
1536 }
1537
1538 @Override
1539 public int hashCode() {
1540 if (dev1.toString().compareTo(dev2.toString()) <= 0) {
1541 return Objects.hash(dev1, dev2);
1542 } else {
1543 return Objects.hash(dev2, dev1);
1544 }
1545 }
1546
1547 @Override
1548 public String toString() {
1549 return toStringHelper(this)
1550 .add("Dev1", dev1)
1551 .add("Dev2", dev2)
1552 .toString();
1553 }
1554 }
1555
1556 //////////////////////////////////////
1557 // Filtering rule creation
1558 //////////////////////////////////////
1559
1560 /**
Saurav Dasf9332192017-02-18 14:05:44 -08001561 * Populates filtering rules for port, and punting rules
1562 * for gateway IPs, loopback IPs and arp/ndp traffic.
1563 * Should only be called by the master instance for this device/port.
sangho80f11cb2015-04-01 13:05:26 -07001564 *
1565 * @param deviceId Switch ID to set the rules
1566 */
Saurav Das9f1c42e2015-10-23 10:51:11 -07001567 public void populatePortAddressingRules(DeviceId deviceId) {
Saurav Das07c74602016-04-27 18:35:50 -07001568 // Although device is added, sometimes device store does not have the
1569 // ports for this device yet. It results in missing filtering rules in the
1570 // switch. We will attempt it a few times. If it still does not work,
1571 // user can manually repopulate using CLI command sr-reroute-network
Charles Chan18fa4252017-02-08 16:10:40 -08001572 PortFilterInfo firstRun = rulePopulator.populateVlanMacFilters(deviceId);
Saurav Dasd1872b02016-12-02 15:43:47 -08001573 if (firstRun == null) {
1574 firstRun = new PortFilterInfo(0, 0, 0);
Saurav Das07c74602016-04-27 18:35:50 -07001575 }
Saurav Dasd1872b02016-12-02 15:43:47 -08001576 executorService.schedule(new RetryFilters(deviceId, firstRun),
1577 RETRY_INTERVAL_MS, TimeUnit.MILLISECONDS);
sangho80f11cb2015-04-01 13:05:26 -07001578 }
1579
1580 /**
Saurav Dasd1872b02016-12-02 15:43:47 -08001581 * Utility class used to temporarily store information about the ports on a
1582 * device processed for filtering objectives.
Saurav Dasd1872b02016-12-02 15:43:47 -08001583 */
1584 public final class PortFilterInfo {
Saurav Dasf9332192017-02-18 14:05:44 -08001585 int disabledPorts = 0, errorPorts = 0, filteredPorts = 0;
Saurav Das07c74602016-04-27 18:35:50 -07001586
Saurav Dasf9332192017-02-18 14:05:44 -08001587 public PortFilterInfo(int disabledPorts, int errorPorts,
Saurav Dasd1872b02016-12-02 15:43:47 -08001588 int filteredPorts) {
1589 this.disabledPorts = disabledPorts;
1590 this.filteredPorts = filteredPorts;
Saurav Dasf9332192017-02-18 14:05:44 -08001591 this.errorPorts = errorPorts;
Saurav Dasd1872b02016-12-02 15:43:47 -08001592 }
1593
1594 @Override
1595 public int hashCode() {
Saurav Dasf9332192017-02-18 14:05:44 -08001596 return Objects.hash(disabledPorts, filteredPorts, errorPorts);
Saurav Dasd1872b02016-12-02 15:43:47 -08001597 }
1598
1599 @Override
1600 public boolean equals(Object obj) {
1601 if (this == obj) {
1602 return true;
1603 }
1604 if ((obj == null) || (!(obj instanceof PortFilterInfo))) {
1605 return false;
1606 }
1607 PortFilterInfo other = (PortFilterInfo) obj;
1608 return ((disabledPorts == other.disabledPorts) &&
1609 (filteredPorts == other.filteredPorts) &&
Saurav Dasf9332192017-02-18 14:05:44 -08001610 (errorPorts == other.errorPorts));
Saurav Dasd1872b02016-12-02 15:43:47 -08001611 }
1612
1613 @Override
1614 public String toString() {
1615 MoreObjects.ToStringHelper helper = toStringHelper(this)
1616 .add("disabledPorts", disabledPorts)
Saurav Dasf9332192017-02-18 14:05:44 -08001617 .add("errorPorts", errorPorts)
Saurav Dasd1872b02016-12-02 15:43:47 -08001618 .add("filteredPorts", filteredPorts);
1619 return helper.toString();
1620 }
1621 }
1622
1623 /**
1624 * RetryFilters populates filtering objectives for a device and keeps retrying
1625 * till the number of ports filtered are constant for a predefined number
1626 * of attempts.
1627 */
1628 protected final class RetryFilters implements Runnable {
1629 int constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS;
1630 DeviceId devId;
1631 int counter;
1632 PortFilterInfo prevRun;
1633
1634 private RetryFilters(DeviceId deviceId, PortFilterInfo previousRun) {
Saurav Das07c74602016-04-27 18:35:50 -07001635 devId = deviceId;
Saurav Dasd1872b02016-12-02 15:43:47 -08001636 prevRun = previousRun;
1637 counter = 0;
Saurav Das07c74602016-04-27 18:35:50 -07001638 }
1639
1640 @Override
1641 public void run() {
Charles Chan077314e2017-06-22 14:27:17 -07001642 log.debug("RETRY FILTER ATTEMPT {} ** dev:{}", ++counter, devId);
Charles Chan18fa4252017-02-08 16:10:40 -08001643 PortFilterInfo thisRun = rulePopulator.populateVlanMacFilters(devId);
Saurav Dasd1872b02016-12-02 15:43:47 -08001644 boolean sameResult = prevRun.equals(thisRun);
1645 log.debug("dev:{} prevRun:{} thisRun:{} sameResult:{}", devId, prevRun,
1646 thisRun, sameResult);
Ray Milkey614352e2018-02-26 09:36:31 -08001647 if (thisRun == null || !sameResult || (--constantAttempts > 0)) {
Saurav Dasf9332192017-02-18 14:05:44 -08001648 // exponentially increasing intervals for retries
1649 executorService.schedule(this,
1650 RETRY_INTERVAL_MS * (int) Math.pow(counter, RETRY_INTERVAL_SCALE),
1651 TimeUnit.MILLISECONDS);
Saurav Dasd1872b02016-12-02 15:43:47 -08001652 if (!sameResult) {
1653 constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS; //reset
1654 }
Saurav Das07c74602016-04-27 18:35:50 -07001655 }
Saurav Dasd1872b02016-12-02 15:43:47 -08001656 prevRun = (thisRun == null) ? prevRun : thisRun;
Saurav Das07c74602016-04-27 18:35:50 -07001657 }
Saurav Das07c74602016-04-27 18:35:50 -07001658 }
1659
sangho80f11cb2015-04-01 13:05:26 -07001660}