blob: ae20b3f9f0dcfb2c38c9862807612ad7c4fda8b5 [file] [log] [blame]
sanghob35a6192015-04-01 13:05:26 -07001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2015-present Open Networking Foundation
sanghob35a6192015-04-01 13:05:26 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.segmentrouting;
17
Saurav Dasd2fded02016-12-02 15:43:47 -080018import com.google.common.base.MoreObjects;
Saurav Dasc88d4662017-05-15 15:34:25 -070019import com.google.common.collect.ImmutableMap;
20import com.google.common.collect.ImmutableMap.Builder;
Charles Chan93e71ba2016-04-29 14:38:22 -070021import com.google.common.collect.ImmutableSet;
Saurav Das4e3224f2016-11-29 14:27:25 -080022import com.google.common.collect.Lists;
sangho20eff1d2015-04-13 15:15:58 -070023import com.google.common.collect.Maps;
24import com.google.common.collect.Sets;
Saurav Dasceccf242017-08-03 18:30:35 -070025
sangho666cd6d2015-04-14 16:27:13 -070026import org.onlab.packet.Ip4Address;
Pier Ventree0ae7a32016-11-23 09:57:42 -080027import org.onlab.packet.Ip6Address;
sanghob35a6192015-04-01 13:05:26 -070028import org.onlab.packet.IpPrefix;
Charles Chan2fde6d42017-08-23 14:46:43 -070029import org.onlab.packet.MacAddress;
30import org.onlab.packet.VlanId;
Saurav Das7bcbe702017-06-13 15:35:54 -070031import org.onosproject.cluster.NodeId;
Charles Chan93e71ba2016-04-29 14:38:22 -070032import org.onosproject.net.ConnectPoint;
sanghob35a6192015-04-01 13:05:26 -070033import org.onosproject.net.Device;
34import org.onosproject.net.DeviceId;
sangho20eff1d2015-04-13 15:15:58 -070035import org.onosproject.net.Link;
Charles Chan2fde6d42017-08-23 14:46:43 -070036import org.onosproject.net.PortNumber;
Charles Chan0b4e6182015-11-03 10:42:14 -080037import org.onosproject.segmentrouting.config.DeviceConfigNotFoundException;
38import org.onosproject.segmentrouting.config.DeviceConfiguration;
Saurav Dasc88d4662017-05-15 15:34:25 -070039import org.onosproject.segmentrouting.grouphandler.DefaultGroupHandler;
sanghob35a6192015-04-01 13:05:26 -070040import org.slf4j.Logger;
41import org.slf4j.LoggerFactory;
42
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -070043import java.time.Instant;
sanghob35a6192015-04-01 13:05:26 -070044import java.util.ArrayList;
45import java.util.HashMap;
46import java.util.HashSet;
Saurav Das7bcbe702017-06-13 15:35:54 -070047import java.util.Iterator;
48import java.util.Map;
Saurav Dasd2fded02016-12-02 15:43:47 -080049import java.util.Objects;
Charles Chanba6c5752018-04-02 11:46:38 -070050import java.util.Optional;
sanghob35a6192015-04-01 13:05:26 -070051import java.util.Set;
Saurav Das59232cf2016-04-27 18:35:50 -070052import java.util.concurrent.ScheduledExecutorService;
53import java.util.concurrent.TimeUnit;
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +090054import java.util.concurrent.locks.Lock;
55import java.util.concurrent.locks.ReentrantLock;
Saurav Das604ab3a2018-03-18 21:28:15 -070056import java.util.stream.Stream;
57
Saurav Dasd2fded02016-12-02 15:43:47 -080058import static com.google.common.base.MoreObjects.toStringHelper;
Pier Ventree0ae7a32016-11-23 09:57:42 -080059import static com.google.common.base.Preconditions.checkNotNull;
60import static java.util.concurrent.Executors.newScheduledThreadPool;
61import static org.onlab.util.Tools.groupedThreads;
sanghob35a6192015-04-01 13:05:26 -070062
Charles Chane849c192016-01-11 18:28:54 -080063/**
64 * Default routing handler that is responsible for route computing and
65 * routing rule population.
66 */
sanghob35a6192015-04-01 13:05:26 -070067public class DefaultRoutingHandler {
Saurav Das018605f2017-02-18 14:05:44 -080068 private static final int MAX_CONSTANT_RETRY_ATTEMPTS = 5;
Ray Milkey3717e602018-02-01 13:49:47 -080069 private static final long RETRY_INTERVAL_MS = 250L;
Saurav Das018605f2017-02-18 14:05:44 -080070 private static final int RETRY_INTERVAL_SCALE = 1;
Saurav Dasceccf242017-08-03 18:30:35 -070071 private static final long STABLITY_THRESHOLD = 10; //secs
Charles Chan93e71ba2016-04-29 14:38:22 -070072 private static Logger log = LoggerFactory.getLogger(DefaultRoutingHandler.class);
sanghob35a6192015-04-01 13:05:26 -070073
74 private SegmentRoutingManager srManager;
75 private RoutingRulePopulator rulePopulator;
Shashikanth VH013a7bc2015-12-11 01:32:44 +053076 private HashMap<DeviceId, EcmpShortestPathGraph> currentEcmpSpgMap;
77 private HashMap<DeviceId, EcmpShortestPathGraph> updatedEcmpSpgMap;
sangho666cd6d2015-04-14 16:27:13 -070078 private DeviceConfiguration config;
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +090079 private final Lock statusLock = new ReentrantLock();
80 private volatile Status populationStatus;
Yuta HIGUCHI1624df12016-07-21 16:54:33 -070081 private ScheduledExecutorService executorService
Saurav Dasd2fded02016-12-02 15:43:47 -080082 = newScheduledThreadPool(1, groupedThreads("retryftr", "retry-%d", log));
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -070083 private Instant lastRoutingChange;
sanghob35a6192015-04-01 13:05:26 -070084
85 /**
86 * Represents the default routing population status.
87 */
88 public enum Status {
89 // population process is not started yet.
90 IDLE,
91
92 // population process started.
93 STARTED,
94
Srikanth Vavilapallif5b234a2015-04-21 13:04:13 -070095 // population process was aborted due to errors, mostly for groups not
96 // found.
sanghob35a6192015-04-01 13:05:26 -070097 ABORTED,
98
99 // population process was finished successfully.
100 SUCCEEDED
101 }
102
103 /**
104 * Creates a DefaultRoutingHandler object.
105 *
106 * @param srManager SegmentRoutingManager object
107 */
108 public DefaultRoutingHandler(SegmentRoutingManager srManager) {
109 this.srManager = srManager;
110 this.rulePopulator = checkNotNull(srManager.routingRulePopulator);
sangho666cd6d2015-04-14 16:27:13 -0700111 this.config = checkNotNull(srManager.deviceConfiguration);
sanghob35a6192015-04-01 13:05:26 -0700112 this.populationStatus = Status.IDLE;
sangho20eff1d2015-04-13 15:15:58 -0700113 this.currentEcmpSpgMap = Maps.newHashMap();
sanghob35a6192015-04-01 13:05:26 -0700114 }
115
116 /**
Saurav Dasc88d4662017-05-15 15:34:25 -0700117 * Returns an immutable copy of the current ECMP shortest-path graph as
118 * computed by this controller instance.
119 *
Saurav Das7bcbe702017-06-13 15:35:54 -0700120 * @return immutable copy of the current ECMP graph
Saurav Dasc88d4662017-05-15 15:34:25 -0700121 */
122 public ImmutableMap<DeviceId, EcmpShortestPathGraph> getCurrentEmcpSpgMap() {
123 Builder<DeviceId, EcmpShortestPathGraph> builder = ImmutableMap.builder();
124 currentEcmpSpgMap.entrySet().forEach(entry -> {
125 if (entry.getValue() != null) {
126 builder.put(entry.getKey(), entry.getValue());
127 }
128 });
129 return builder.build();
130 }
131
Saurav Dasceccf242017-08-03 18:30:35 -0700132 /**
133 * Acquires the lock used when making routing changes.
134 */
135 public void acquireRoutingLock() {
136 statusLock.lock();
137 }
138
139 /**
140 * Releases the lock used when making routing changes.
141 */
142 public void releaseRoutingLock() {
143 statusLock.unlock();
144 }
145
146 /**
147 * Determines if routing in the network has been stable in the last
148 * STABLITY_THRESHOLD seconds, by comparing the current time to the last
149 * routing change timestamp.
150 *
151 * @return true if stable
152 */
153 public boolean isRoutingStable() {
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700154 long last = (long) (lastRoutingChange.toEpochMilli() / 1000.0);
155 long now = (long) (Instant.now().toEpochMilli() / 1000.0);
Saurav Das9df5b7c2017-08-14 16:44:43 -0700156 log.trace("Routing stable since {}s", now - last);
Saurav Dasceccf242017-08-03 18:30:35 -0700157 return (now - last) > STABLITY_THRESHOLD;
158 }
159
160
Saurav Das7bcbe702017-06-13 15:35:54 -0700161 //////////////////////////////////////
162 // Route path handling
163 //////////////////////////////////////
164
Saurav Das45f48152018-01-18 12:07:33 -0800165 /* The following three methods represent the three major ways in which
166 * route-path handling is triggered in the network
Saurav Das7bcbe702017-06-13 15:35:54 -0700167 * a) due to configuration change
168 * b) due to route-added event
169 * c) due to change in the topology
170 */
171
Saurav Dasc88d4662017-05-15 15:34:25 -0700172 /**
Saurav Das7bcbe702017-06-13 15:35:54 -0700173 * Populates all routing rules to all switches. Typically triggered at
174 * startup or after a configuration event.
sanghob35a6192015-04-01 13:05:26 -0700175 */
Saurav Dasc88d4662017-05-15 15:34:25 -0700176 public void populateAllRoutingRules() {
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700177 lastRoutingChange = Instant.now();
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900178 statusLock.lock();
179 try {
Saurav Das7bcbe702017-06-13 15:35:54 -0700180 if (populationStatus == Status.STARTED) {
181 log.warn("Previous rule population is not finished. Cannot"
182 + " proceed with populateAllRoutingRules");
183 return;
184 }
185
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900186 populationStatus = Status.STARTED;
187 rulePopulator.resetCounter();
Saurav Das7bcbe702017-06-13 15:35:54 -0700188 log.info("Starting to populate all routing rules");
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900189 log.debug("populateAllRoutingRules: populationStatus is STARTED");
sanghob35a6192015-04-01 13:05:26 -0700190
Saurav Das7bcbe702017-06-13 15:35:54 -0700191 // take a snapshot of the topology
192 updatedEcmpSpgMap = new HashMap<>();
193 Set<EdgePair> edgePairs = new HashSet<>();
194 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800195 for (DeviceId dstSw : srManager.deviceConfiguration.getRouters()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700196 EcmpShortestPathGraph ecmpSpgUpdated =
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800197 new EcmpShortestPathGraph(dstSw, srManager);
198 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
Charles Chanba6c5752018-04-02 11:46:38 -0700199 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
200 if (pairDev.isPresent()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700201 // pairDev may not be available yet, but we still need to add
Charles Chanba6c5752018-04-02 11:46:38 -0700202 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
203 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
204 edgePairs.add(new EdgePair(dstSw, pairDev.get()));
Saurav Das7bcbe702017-06-13 15:35:54 -0700205 }
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800206 DeviceId ret = shouldHandleRouting(dstSw);
Saurav Das7bcbe702017-06-13 15:35:54 -0700207 if (ret == null) {
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900208 continue;
209 }
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800210 Set<DeviceId> devsToProcess = Sets.newHashSet(dstSw, ret);
Saurav Das7bcbe702017-06-13 15:35:54 -0700211 // To do a full reroute, assume all routes have changed
212 for (DeviceId dev : devsToProcess) {
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800213 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
214 if (targetSw.equals(dev)) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700215 continue;
216 }
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800217 routeChanges.add(Lists.newArrayList(targetSw, dev));
Saurav Das7bcbe702017-06-13 15:35:54 -0700218 }
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900219 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700220 }
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900221
Saurav Das7bcbe702017-06-13 15:35:54 -0700222 if (!redoRouting(routeChanges, edgePairs, null)) {
223 log.debug("populateAllRoutingRules: populationStatus is ABORTED");
224 populationStatus = Status.ABORTED;
225 log.warn("Failed to repopulate all routing rules.");
226 return;
sanghob35a6192015-04-01 13:05:26 -0700227 }
228
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900229 log.debug("populateAllRoutingRules: populationStatus is SUCCEEDED");
230 populationStatus = Status.SUCCEEDED;
Saurav Das7bcbe702017-06-13 15:35:54 -0700231 log.info("Completed all routing rule population. Total # of rules pushed : {}",
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900232 rulePopulator.getCounter());
Saurav Dasc88d4662017-05-15 15:34:25 -0700233 return;
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900234 } finally {
235 statusLock.unlock();
sanghob35a6192015-04-01 13:05:26 -0700236 }
sanghob35a6192015-04-01 13:05:26 -0700237 }
238
sangho20eff1d2015-04-13 15:15:58 -0700239 /**
Saurav Das7bcbe702017-06-13 15:35:54 -0700240 * Populate rules from all other edge devices to the connect-point(s)
241 * specified for the given subnets.
242 *
243 * @param cpts connect point(s) of the subnets being added
244 * @param subnets subnets being added
Charles Chan2fde6d42017-08-23 14:46:43 -0700245 */
246 // XXX refactor
Saurav Das7bcbe702017-06-13 15:35:54 -0700247 protected void populateSubnet(Set<ConnectPoint> cpts, Set<IpPrefix> subnets) {
Charles Chan71e64f12017-09-11 15:21:57 -0700248 if (cpts == null || cpts.size() < 1 || cpts.size() > 2) {
249 log.warn("Skipping populateSubnet due to illegal size of connect points. {}", cpts);
250 return;
251 }
252
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700253 lastRoutingChange = Instant.now();
Saurav Das7bcbe702017-06-13 15:35:54 -0700254 statusLock.lock();
255 try {
256 if (populationStatus == Status.STARTED) {
257 log.warn("Previous rule population is not finished. Cannot"
258 + " proceed with routing rules for added routes");
259 return;
260 }
261 populationStatus = Status.STARTED;
262 rulePopulator.resetCounter();
Charles Chan2fde6d42017-08-23 14:46:43 -0700263 log.info("Starting to populate routing rules for added routes, subnets={}, cpts={}",
264 subnets, cpts);
Saurav Dasc568c342018-01-25 09:49:01 -0800265 // In principle an update to a subnet/prefix should not require a
266 // new ECMPspg calculation as it is not a topology event. As a
267 // result, we use the current/existing ECMPspg in the updated map
268 // used by the redoRouting method.
Saurav Das15a81782018-02-09 09:15:03 -0800269 if (updatedEcmpSpgMap == null) {
270 updatedEcmpSpgMap = new HashMap<>();
271 }
Saurav Dasc568c342018-01-25 09:49:01 -0800272 currentEcmpSpgMap.entrySet().forEach(entry -> {
273 updatedEcmpSpgMap.put(entry.getKey(), entry.getValue());
Saurav Dase7f51012018-02-09 17:26:45 -0800274 if (log.isTraceEnabled()) {
275 log.trace("Root switch: {}", entry.getKey());
276 log.trace(" Current/Existing SPG: {}", entry.getValue());
Saurav Dasc568c342018-01-25 09:49:01 -0800277 }
278 });
Saurav Das7bcbe702017-06-13 15:35:54 -0700279 Set<EdgePair> edgePairs = new HashSet<>();
280 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
281 boolean handleRouting = false;
282
283 if (cpts.size() == 2) {
284 // ensure connect points are edge-pairs
285 Iterator<ConnectPoint> iter = cpts.iterator();
286 DeviceId dev1 = iter.next().deviceId();
Charles Chanba6c5752018-04-02 11:46:38 -0700287 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dev1);
288 if (pairDev.isPresent() && iter.next().deviceId().equals(pairDev.get())) {
289 edgePairs.add(new EdgePair(dev1, pairDev.get()));
Saurav Das7bcbe702017-06-13 15:35:54 -0700290 } else {
291 log.warn("Connectpoints {} for subnets {} not on "
292 + "pair-devices.. aborting populateSubnet", cpts, subnets);
293 populationStatus = Status.ABORTED;
294 return;
295 }
296 for (ConnectPoint cp : cpts) {
Saurav Dasc568c342018-01-25 09:49:01 -0800297 if (updatedEcmpSpgMap.get(cp.deviceId()) == null) {
298 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das7bcbe702017-06-13 15:35:54 -0700299 new EcmpShortestPathGraph(cp.deviceId(), srManager);
Saurav Dasc568c342018-01-25 09:49:01 -0800300 updatedEcmpSpgMap.put(cp.deviceId(), ecmpSpgUpdated);
301 log.warn("populateSubnet: no updated graph for dev:{}"
302 + " ... creating", cp.deviceId());
303 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700304 DeviceId retId = shouldHandleRouting(cp.deviceId());
305 if (retId == null) {
306 continue;
307 }
308 handleRouting = true;
309 }
310 } else {
311 // single connect point
312 DeviceId dstSw = cpts.iterator().next().deviceId();
Saurav Dasc568c342018-01-25 09:49:01 -0800313 if (updatedEcmpSpgMap.get(dstSw) == null) {
314 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das7bcbe702017-06-13 15:35:54 -0700315 new EcmpShortestPathGraph(dstSw, srManager);
Saurav Dasc568c342018-01-25 09:49:01 -0800316 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
317 log.warn("populateSubnet: no updated graph for dev:{}"
318 + " ... creating", dstSw);
319 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700320 if (srManager.mastershipService.isLocalMaster(dstSw)) {
321 handleRouting = true;
322 }
323 }
324
325 if (!handleRouting) {
326 log.debug("This instance is not handling ecmp routing to the "
327 + "connectPoint(s) {}", cpts);
328 populationStatus = Status.ABORTED;
329 return;
330 }
331
332 // if it gets here, this instance should handle routing for the
333 // connectpoint(s). Assume all route-paths have to be updated to
334 // the connectpoint(s) with the following exceptions
335 // 1. if target is non-edge no need for routing rules
336 // 2. if target is one of the connectpoints
337 for (ConnectPoint cp : cpts) {
338 DeviceId dstSw = cp.deviceId();
339 for (Device targetSw : srManager.deviceService.getDevices()) {
340 boolean isEdge = false;
341 try {
342 isEdge = config.isEdgeDevice(targetSw.id());
343 } catch (DeviceConfigNotFoundException e) {
Charles Chan92726132018-02-16 17:20:54 -0800344 log.warn(e.getMessage() + "aborting populateSubnet on targetSw {}", targetSw.id());
345 continue;
Saurav Das7bcbe702017-06-13 15:35:54 -0700346 }
Charles Chanba6c5752018-04-02 11:46:38 -0700347 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
Saurav Das7bcbe702017-06-13 15:35:54 -0700348 if (dstSw.equals(targetSw.id()) || !isEdge ||
Charles Chanba6c5752018-04-02 11:46:38 -0700349 (cpts.size() == 2 && pairDev.isPresent() && targetSw.id().equals(pairDev.get()))) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700350 continue;
351 }
352 routeChanges.add(Lists.newArrayList(targetSw.id(), dstSw));
353 }
354 }
355
356 if (!redoRouting(routeChanges, edgePairs, subnets)) {
357 log.debug("populateSubnet: populationStatus is ABORTED");
358 populationStatus = Status.ABORTED;
359 log.warn("Failed to repopulate the rules for subnet.");
360 return;
361 }
362
363 log.debug("populateSubnet: populationStatus is SUCCEEDED");
364 populationStatus = Status.SUCCEEDED;
365 log.info("Completed subnet population. Total # of rules pushed : {}",
366 rulePopulator.getCounter());
367 return;
368
369 } finally {
370 statusLock.unlock();
371 }
372 }
373
374 /**
Saurav Dasc88d4662017-05-15 15:34:25 -0700375 * Populates the routing rules or makes hash group changes according to the
376 * route-path changes due to link failure, switch failure or link up. This
377 * method should only be called for one of these three possible event-types.
Saurav Das604ab3a2018-03-18 21:28:15 -0700378 * Note that when a switch goes away, all of its links fail as well, but
379 * this is handled as a single switch removal event.
sangho20eff1d2015-04-13 15:15:58 -0700380 *
Saurav Das604ab3a2018-03-18 21:28:15 -0700381 * @param linkDown the single failed link, or null for other conditions such
382 * as link-up or a removed switch
Saurav Dasc88d4662017-05-15 15:34:25 -0700383 * @param linkUp the single link up, or null for other conditions such as
Saurav Das604ab3a2018-03-18 21:28:15 -0700384 * link-down or a removed switch
385 * @param switchDown the removed switch, or null for other conditions such
386 * as link-down or link-up
387 * @param seenBefore true if this event is for a linkUp or linkDown for a
388 * seen link
389 */
390 // TODO This method should be refactored into three separated methods
Saurav Dasc88d4662017-05-15 15:34:25 -0700391 public void populateRoutingRulesForLinkStatusChange(Link linkDown,
392 Link linkUp,
Saurav Das604ab3a2018-03-18 21:28:15 -0700393 DeviceId switchDown,
394 boolean seenBefore) {
395 if (Stream.of(linkDown, linkUp, switchDown).filter(Objects::nonNull)
396 .count() != 1) {
Saurav Dasc88d4662017-05-15 15:34:25 -0700397 log.warn("Only one event can be handled for link status change .. aborting");
398 return;
399 }
Saurav Das604ab3a2018-03-18 21:28:15 -0700400
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700401 lastRoutingChange = Instant.now();
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900402 statusLock.lock();
403 try {
sangho20eff1d2015-04-13 15:15:58 -0700404
405 if (populationStatus == Status.STARTED) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700406 log.warn("Previous rule population is not finished. Cannot"
Saurav Dasc568c342018-01-25 09:49:01 -0800407 + " proceeed with routingRules for Topology change");
Saurav Dasc88d4662017-05-15 15:34:25 -0700408 return;
sangho20eff1d2015-04-13 15:15:58 -0700409 }
410
Saurav Das7bcbe702017-06-13 15:35:54 -0700411 // Take snapshots of the topology
sangho45b009c2015-05-07 13:30:57 -0700412 updatedEcmpSpgMap = new HashMap<>();
Saurav Das7bcbe702017-06-13 15:35:54 -0700413 Set<EdgePair> edgePairs = new HashSet<>();
sangho45b009c2015-05-07 13:30:57 -0700414 for (Device sw : srManager.deviceService.getDevices()) {
Shashikanth VH013a7bc2015-12-11 01:32:44 +0530415 EcmpShortestPathGraph ecmpSpgUpdated =
416 new EcmpShortestPathGraph(sw.id(), srManager);
sangho45b009c2015-05-07 13:30:57 -0700417 updatedEcmpSpgMap.put(sw.id(), ecmpSpgUpdated);
Charles Chanba6c5752018-04-02 11:46:38 -0700418 Optional<DeviceId> pairDev = srManager.getPairDeviceId(sw.id());
419 if (pairDev.isPresent()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700420 // pairDev may not be available yet, but we still need to add
Charles Chanba6c5752018-04-02 11:46:38 -0700421 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
422 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
423 edgePairs.add(new EdgePair(sw.id(), pairDev.get()));
Saurav Das7bcbe702017-06-13 15:35:54 -0700424 }
sangho45b009c2015-05-07 13:30:57 -0700425 }
426
Saurav Dasc568c342018-01-25 09:49:01 -0800427 log.info("Starting to populate routing rules from Topology change");
sangho52abe3a2015-05-05 14:13:34 -0700428
sangho20eff1d2015-04-13 15:15:58 -0700429 Set<ArrayList<DeviceId>> routeChanges;
Saurav Dasc88d4662017-05-15 15:34:25 -0700430 log.debug("populateRoutingRulesForLinkStatusChange: "
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700431 + "populationStatus is STARTED");
sangho20eff1d2015-04-13 15:15:58 -0700432 populationStatus = Status.STARTED;
Saurav Dasc568c342018-01-25 09:49:01 -0800433 rulePopulator.resetCounter(); //XXX maybe useful to have a rehash ctr
434 boolean hashGroupsChanged = false;
Saurav Das4e3224f2016-11-29 14:27:25 -0800435 // try optimized re-routing
Saurav Dasc88d4662017-05-15 15:34:25 -0700436 if (linkDown == null) {
437 // either a linkUp or a switchDown - compute all route changes by
438 // comparing all routes of existing ECMP SPG to new ECMP SPG
Saurav Dase0d4c872018-03-05 14:37:16 -0800439 routeChanges = computeRouteChange(switchDown);
Saurav Dasc88d4662017-05-15 15:34:25 -0700440
Saurav Das9df5b7c2017-08-14 16:44:43 -0700441 // deal with linkUp of a seen-before link
Saurav Das604ab3a2018-03-18 21:28:15 -0700442 if (linkUp != null && seenBefore) {
Saurav Das9df5b7c2017-08-14 16:44:43 -0700443 // link previously seen before
444 // do hash-bucket changes instead of a re-route
445 processHashGroupChange(routeChanges, false, null);
446 // clear out routesChanges so a re-route is not attempted
447 routeChanges = ImmutableSet.of();
Saurav Dasc568c342018-01-25 09:49:01 -0800448 hashGroupsChanged = true;
Saurav Dasc88d4662017-05-15 15:34:25 -0700449 }
Saurav Das9df5b7c2017-08-14 16:44:43 -0700450 // for a linkUp of a never-seen-before link
451 // let it fall through to a reroute of the routeChanges
Saurav Dasc88d4662017-05-15 15:34:25 -0700452
Saurav Das9df5b7c2017-08-14 16:44:43 -0700453 //deal with switchDown
454 if (switchDown != null) {
455 processHashGroupChange(routeChanges, true, switchDown);
456 // clear out routesChanges so a re-route is not attempted
457 routeChanges = ImmutableSet.of();
Saurav Dasc568c342018-01-25 09:49:01 -0800458 hashGroupsChanged = true;
Saurav Das9df5b7c2017-08-14 16:44:43 -0700459 }
sangho20eff1d2015-04-13 15:15:58 -0700460 } else {
Saurav Dasc88d4662017-05-15 15:34:25 -0700461 // link has gone down
462 // Compare existing ECMP SPG only with the link that went down
463 routeChanges = computeDamagedRoutes(linkDown);
464 if (routeChanges != null) {
465 processHashGroupChange(routeChanges, true, null);
466 // clear out routesChanges so a re-route is not attempted
467 routeChanges = ImmutableSet.of();
Saurav Dasc568c342018-01-25 09:49:01 -0800468 hashGroupsChanged = true;
Saurav Dasc88d4662017-05-15 15:34:25 -0700469 }
sangho20eff1d2015-04-13 15:15:58 -0700470 }
471
Saurav Das4e3224f2016-11-29 14:27:25 -0800472 // do full re-routing if optimized routing returns null routeChanges
Saurav Dasb5c236e2016-06-07 10:08:06 -0700473 if (routeChanges == null) {
Saurav Dasc568c342018-01-25 09:49:01 -0800474 log.warn("Optimized routing failed... opting for full reroute");
Saurav Das7bcbe702017-06-13 15:35:54 -0700475 populationStatus = Status.ABORTED;
Saurav Dasc88d4662017-05-15 15:34:25 -0700476 populateAllRoutingRules();
477 return;
Saurav Dasb5c236e2016-06-07 10:08:06 -0700478 }
479
sangho20eff1d2015-04-13 15:15:58 -0700480 if (routeChanges.isEmpty()) {
Saurav Dasc568c342018-01-25 09:49:01 -0800481 if (hashGroupsChanged) {
482 log.info("Hash-groups changed for link status change");
483 } else {
484 log.info("No re-route or re-hash attempted for the link"
485 + " status change");
486 updatedEcmpSpgMap.keySet().forEach(devId -> {
487 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
488 log.debug("Updating ECMPspg for remaining dev:{}", devId);
489 });
490 }
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700491 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sangho20eff1d2015-04-13 15:15:58 -0700492 populationStatus = Status.SUCCEEDED;
Saurav Dasc88d4662017-05-15 15:34:25 -0700493 return;
sangho20eff1d2015-04-13 15:15:58 -0700494 }
495
Saurav Dasc88d4662017-05-15 15:34:25 -0700496 // reroute of routeChanges
Saurav Das7bcbe702017-06-13 15:35:54 -0700497 if (redoRouting(routeChanges, edgePairs, null)) {
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700498 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sangho20eff1d2015-04-13 15:15:58 -0700499 populationStatus = Status.SUCCEEDED;
Saurav Das7bcbe702017-06-13 15:35:54 -0700500 log.info("Completed repopulation of rules for link-status change."
501 + " # of rules populated : {}", rulePopulator.getCounter());
Saurav Dasc88d4662017-05-15 15:34:25 -0700502 return;
sangho20eff1d2015-04-13 15:15:58 -0700503 } else {
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700504 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is ABORTED");
sangho20eff1d2015-04-13 15:15:58 -0700505 populationStatus = Status.ABORTED;
Saurav Das7bcbe702017-06-13 15:35:54 -0700506 log.warn("Failed to repopulate the rules for link status change.");
Saurav Dasc88d4662017-05-15 15:34:25 -0700507 return;
sangho20eff1d2015-04-13 15:15:58 -0700508 }
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900509 } finally {
510 statusLock.unlock();
sangho20eff1d2015-04-13 15:15:58 -0700511 }
512 }
513
Saurav Dasc88d4662017-05-15 15:34:25 -0700514 /**
Saurav Das7bcbe702017-06-13 15:35:54 -0700515 * Processes a set a route-path changes by reprogramming routing rules and
516 * creating new hash-groups or editing them if necessary. This method also
517 * determines the next-hops for the route-path from the src-switch (target)
518 * of the path towards the dst-switch of the path.
Saurav Dasc88d4662017-05-15 15:34:25 -0700519 *
Saurav Das7bcbe702017-06-13 15:35:54 -0700520 * @param routeChanges a set of route-path changes, where each route-path is
521 * a list with its first element the src-switch (target)
522 * of the path, and the second element the dst-switch of
523 * the path.
524 * @param edgePairs a set of edge-switches that are paired by configuration
525 * @param subnets a set of prefixes that need to be populated in the routing
526 * table of the target switch in the route-path. Can be null,
527 * in which case all the prefixes belonging to the dst-switch
528 * will be populated in the target switch
529 * @return true if successful in repopulating all routes
Saurav Dasc88d4662017-05-15 15:34:25 -0700530 */
Saurav Das7bcbe702017-06-13 15:35:54 -0700531 private boolean redoRouting(Set<ArrayList<DeviceId>> routeChanges,
532 Set<EdgePair> edgePairs, Set<IpPrefix> subnets) {
533 // first make every entry two-elements
534 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
535 for (ArrayList<DeviceId> route : routeChanges) {
536 if (route.size() == 1) {
537 DeviceId dstSw = route.get(0);
538 EcmpShortestPathGraph ec = updatedEcmpSpgMap.get(dstSw);
539 if (ec == null) {
540 log.warn("No graph found for {} .. aborting redoRouting", dstSw);
541 return false;
542 }
543 ec.getAllLearnedSwitchesAndVia().keySet().forEach(key -> {
544 ec.getAllLearnedSwitchesAndVia().get(key).keySet().forEach(target -> {
545 changedRoutes.add(Lists.newArrayList(target, dstSw));
546 });
547 });
548 } else {
549 DeviceId targetSw = route.get(0);
550 DeviceId dstSw = route.get(1);
551 changedRoutes.add(Lists.newArrayList(targetSw, dstSw));
552 }
553 }
554
555 // now process changedRoutes according to edgePairs
556 if (!redoRoutingEdgePairs(edgePairs, subnets, changedRoutes)) {
557 return false; //abort routing and fail fast
558 }
559
560 // whatever is left in changedRoutes is now processed for individual dsts.
Saurav Dasc568c342018-01-25 09:49:01 -0800561 Set<DeviceId> updatedDevices = Sets.newHashSet();
562 if (!redoRoutingIndividualDests(subnets, changedRoutes,
563 updatedDevices)) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700564 return false; //abort routing and fail fast
565 }
566
Saurav Das7bcbe702017-06-13 15:35:54 -0700567 // update ecmpSPG for all edge-pairs
568 for (EdgePair ep : edgePairs) {
569 currentEcmpSpgMap.put(ep.dev1, updatedEcmpSpgMap.get(ep.dev1));
570 currentEcmpSpgMap.put(ep.dev2, updatedEcmpSpgMap.get(ep.dev2));
571 log.debug("Updating ECMPspg for edge-pair:{}-{}", ep.dev1, ep.dev2);
572 }
Saurav Dasc568c342018-01-25 09:49:01 -0800573
574 // here is where we update all devices not touched by this instance
575 updatedEcmpSpgMap.keySet().stream()
576 .filter(devId -> !edgePairs.stream().anyMatch(ep -> ep.includes(devId)))
577 .filter(devId -> !updatedDevices.contains(devId))
578 .forEach(devId -> {
579 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
580 log.debug("Updating ECMPspg for remaining dev:{}", devId);
581 });
Saurav Das7bcbe702017-06-13 15:35:54 -0700582 return true;
583 }
584
585 /**
586 * Programs targetSw in the changedRoutes for given prefixes reachable by
587 * an edgePair. If no prefixes are given, the method will use configured
588 * subnets/prefixes. If some configured subnets belong only to a specific
589 * destination in the edgePair, then the target switch will be programmed
590 * only to that destination.
591 *
592 * @param edgePairs set of edge-pairs for which target will be programmed
593 * @param subnets a set of prefixes that need to be populated in the routing
594 * table of the target switch in the changedRoutes. Can be null,
595 * in which case all the configured prefixes belonging to the
596 * paired switches will be populated in the target switch
597 * @param changedRoutes a set of route-path changes, where each route-path is
598 * a list with its first element the src-switch (target)
599 * of the path, and the second element the dst-switch of
600 * the path.
601 * @return true if successful
602 */
603 private boolean redoRoutingEdgePairs(Set<EdgePair> edgePairs,
604 Set<IpPrefix> subnets,
605 Set<ArrayList<DeviceId>> changedRoutes) {
606 for (EdgePair ep : edgePairs) {
607 // temp store for a target's changedRoutes to this edge-pair
608 Map<DeviceId, Set<ArrayList<DeviceId>>> targetRoutes = new HashMap<>();
609 Iterator<ArrayList<DeviceId>> i = changedRoutes.iterator();
610 while (i.hasNext()) {
611 ArrayList<DeviceId> route = i.next();
612 DeviceId dstSw = route.get(1);
613 if (ep.includes(dstSw)) {
614 // routeChange for edge pair found
615 // sort by target iff target is edge and remove from changedRoutes
616 DeviceId targetSw = route.get(0);
617 try {
618 if (!srManager.deviceConfiguration.isEdgeDevice(targetSw)) {
619 continue;
620 }
621 } catch (DeviceConfigNotFoundException e) {
622 log.warn(e.getMessage() + "aborting redoRouting");
623 return false;
624 }
625 // route is from another edge to this edge-pair
626 if (targetRoutes.containsKey(targetSw)) {
627 targetRoutes.get(targetSw).add(route);
628 } else {
629 Set<ArrayList<DeviceId>> temp = new HashSet<>();
630 temp.add(route);
631 targetRoutes.put(targetSw, temp);
632 }
633 i.remove();
634 }
635 }
636 // so now for this edgepair we have a per target set of routechanges
637 // process target->edgePair route
638 for (Map.Entry<DeviceId, Set<ArrayList<DeviceId>>> entry :
639 targetRoutes.entrySet()) {
640 log.debug("* redoRoutingDstPair Target:{} -> edge-pair {}",
641 entry.getKey(), ep);
642 DeviceId targetSw = entry.getKey();
643 Map<DeviceId, Set<DeviceId>> perDstNextHops = new HashMap<>();
644 entry.getValue().forEach(route -> {
645 Set<DeviceId> nhops = getNextHops(route.get(0), route.get(1));
646 log.debug("route: target {} -> dst {} found with next-hops {}",
647 route.get(0), route.get(1), nhops);
648 perDstNextHops.put(route.get(1), nhops);
649 });
650 Set<IpPrefix> ipDev1 = (subnets == null) ? config.getSubnets(ep.dev1)
651 : subnets;
652 Set<IpPrefix> ipDev2 = (subnets == null) ? config.getSubnets(ep.dev2)
653 : subnets;
654 ipDev1 = (ipDev1 == null) ? Sets.newHashSet() : ipDev1;
655 ipDev2 = (ipDev2 == null) ? Sets.newHashSet() : ipDev2;
Saurav Dasc568c342018-01-25 09:49:01 -0800656 Set<DeviceId> nhDev1 = perDstNextHops.get(ep.dev1);
657 Set<DeviceId> nhDev2 = perDstNextHops.get(ep.dev2);
Saurav Das7bcbe702017-06-13 15:35:54 -0700658 // handle routing to subnets common to edge-pair
Saurav Dasc568c342018-01-25 09:49:01 -0800659 // only if the targetSw is not part of the edge-pair and there
660 // exists a next hop to at least one of the devices in the edge-pair
661 if (!ep.includes(targetSw)
662 && ((nhDev1 != null && !nhDev1.isEmpty())
663 || (nhDev2 != null && !nhDev2.isEmpty()))) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700664 if (!populateEcmpRoutingRulePartial(
665 targetSw,
666 ep.dev1, ep.dev2,
667 perDstNextHops,
668 Sets.intersection(ipDev1, ipDev2))) {
669 return false; // abort everything and fail fast
670 }
671 }
Saurav Dasc568c342018-01-25 09:49:01 -0800672 // handle routing to subnets that only belong to dev1 only if
673 // a next-hop exists from the target to dev1
Saurav Das7bcbe702017-06-13 15:35:54 -0700674 Set<IpPrefix> onlyDev1Subnets = Sets.difference(ipDev1, ipDev2);
Saurav Dasc568c342018-01-25 09:49:01 -0800675 if (!onlyDev1Subnets.isEmpty()
676 && nhDev1 != null && !nhDev1.isEmpty()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700677 Map<DeviceId, Set<DeviceId>> onlyDev1NextHops = new HashMap<>();
Saurav Dasc568c342018-01-25 09:49:01 -0800678 onlyDev1NextHops.put(ep.dev1, nhDev1);
Saurav Das7bcbe702017-06-13 15:35:54 -0700679 if (!populateEcmpRoutingRulePartial(
680 targetSw,
681 ep.dev1, null,
682 onlyDev1NextHops,
683 onlyDev1Subnets)) {
684 return false; // abort everything and fail fast
685 }
686 }
Saurav Dasc568c342018-01-25 09:49:01 -0800687 // handle routing to subnets that only belong to dev2 only if
688 // a next-hop exists from the target to dev2
Saurav Das7bcbe702017-06-13 15:35:54 -0700689 Set<IpPrefix> onlyDev2Subnets = Sets.difference(ipDev2, ipDev1);
Saurav Dasc568c342018-01-25 09:49:01 -0800690 if (!onlyDev2Subnets.isEmpty()
691 && nhDev2 != null && !nhDev2.isEmpty()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700692 Map<DeviceId, Set<DeviceId>> onlyDev2NextHops = new HashMap<>();
Saurav Dasc568c342018-01-25 09:49:01 -0800693 onlyDev2NextHops.put(ep.dev2, nhDev2);
Saurav Das7bcbe702017-06-13 15:35:54 -0700694 if (!populateEcmpRoutingRulePartial(
695 targetSw,
696 ep.dev2, null,
697 onlyDev2NextHops,
698 onlyDev2Subnets)) {
699 return false; // abort everything and fail fast
700 }
701 }
702 }
703 // if it gets here it has succeeded for all targets to this edge-pair
704 }
705 return true;
706 }
707
708 /**
709 * Programs targetSw in the changedRoutes for given prefixes reachable by
710 * a destination switch that is not part of an edge-pair.
711 * If no prefixes are given, the method will use configured subnets/prefixes.
712 *
713 * @param subnets a set of prefixes that need to be populated in the routing
714 * table of the target switch in the changedRoutes. Can be null,
715 * in which case all the configured prefixes belonging to the
716 * paired switches will be populated in the target switch
717 * @param changedRoutes a set of route-path changes, where each route-path is
718 * a list with its first element the src-switch (target)
719 * of the path, and the second element the dst-switch of
720 * the path.
721 * @return true if successful
722 */
723 private boolean redoRoutingIndividualDests(Set<IpPrefix> subnets,
Saurav Dasc568c342018-01-25 09:49:01 -0800724 Set<ArrayList<DeviceId>> changedRoutes,
725 Set<DeviceId> updatedDevices) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700726 // aggregate route-path changes for each dst device
727 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> routesBydevice =
728 new HashMap<>();
729 for (ArrayList<DeviceId> route: changedRoutes) {
730 DeviceId dstSw = route.get(1);
731 ArrayList<ArrayList<DeviceId>> deviceRoutes =
732 routesBydevice.get(dstSw);
733 if (deviceRoutes == null) {
734 deviceRoutes = new ArrayList<>();
735 routesBydevice.put(dstSw, deviceRoutes);
736 }
737 deviceRoutes.add(route);
738 }
739 for (DeviceId impactedDstDevice : routesBydevice.keySet()) {
740 ArrayList<ArrayList<DeviceId>> deviceRoutes =
741 routesBydevice.get(impactedDstDevice);
742 for (ArrayList<DeviceId> route: deviceRoutes) {
743 log.debug("* redoRoutingIndiDst Target: {} -> dst: {}",
744 route.get(0), route.get(1));
745 DeviceId targetSw = route.get(0);
746 DeviceId dstSw = route.get(1); // same as impactedDstDevice
747 Set<DeviceId> nextHops = getNextHops(targetSw, dstSw);
Saurav Dasbd071d82018-01-09 17:38:44 -0800748 if (nextHops.isEmpty()) {
749 log.warn("Could not find next hop from target:{} --> dst {} "
750 + "skipping this route", targetSw, dstSw);
751 continue;
752 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700753 Map<DeviceId, Set<DeviceId>> nhops = new HashMap<>();
754 nhops.put(dstSw, nextHops);
755 if (!populateEcmpRoutingRulePartial(targetSw, dstSw, null, nhops,
756 (subnets == null) ? Sets.newHashSet() : subnets)) {
757 return false; // abort routing and fail fast
758 }
759 log.debug("Populating flow rules from target: {} to dst: {}"
760 + " is successful", targetSw, dstSw);
761 }
762 //Only if all the flows for all impacted routes to a
763 //specific target are pushed successfully, update the
764 //ECMP graph for that target. Or else the next event
765 //would not see any changes in the ECMP graphs.
766 //In another case, the target switch has gone away, so
767 //routes can't be installed. In that case, the current map
768 //is updated here, without any flows being pushed.
769 currentEcmpSpgMap.put(impactedDstDevice,
770 updatedEcmpSpgMap.get(impactedDstDevice));
Saurav Dasc568c342018-01-25 09:49:01 -0800771 updatedDevices.add(impactedDstDevice);
Saurav Das7bcbe702017-06-13 15:35:54 -0700772 log.debug("Updating ECMPspg for impacted dev:{}", impactedDstDevice);
773 }
774 return true;
775 }
776
777 /**
778 * Populate ECMP rules for subnets from target to destination via nexthops.
779 *
780 * @param targetSw Device ID of target switch in which rules will be programmed
781 * @param destSw1 Device ID of final destination switch to which the rules will forward
782 * @param destSw2 Device ID of paired destination switch to which the rules will forward
783 * A null deviceId indicates packets should only be sent to destSw1
Saurav Dasa4020382018-02-14 14:14:54 -0800784 * @param nextHops Map of a set of next hops per destSw
Saurav Das7bcbe702017-06-13 15:35:54 -0700785 * @param subnets Subnets to be populated. If empty, populate all configured subnets.
786 * @return true if it succeeds in populating rules
787 */ // refactor
788 private boolean populateEcmpRoutingRulePartial(DeviceId targetSw,
789 DeviceId destSw1,
790 DeviceId destSw2,
791 Map<DeviceId, Set<DeviceId>> nextHops,
792 Set<IpPrefix> subnets) {
793 boolean result;
794 // If both target switch and dest switch are edge routers, then set IP
795 // rule for both subnet and router IP.
796 boolean targetIsEdge;
797 boolean dest1IsEdge;
798 Ip4Address dest1RouterIpv4, dest2RouterIpv4 = null;
799 Ip6Address dest1RouterIpv6, dest2RouterIpv6 = null;
800
801 try {
802 targetIsEdge = config.isEdgeDevice(targetSw);
803 dest1IsEdge = config.isEdgeDevice(destSw1);
804 dest1RouterIpv4 = config.getRouterIpv4(destSw1);
805 dest1RouterIpv6 = config.getRouterIpv6(destSw1);
806 if (destSw2 != null) {
807 dest2RouterIpv4 = config.getRouterIpv4(destSw2);
808 dest2RouterIpv6 = config.getRouterIpv6(destSw2);
809 }
810 } catch (DeviceConfigNotFoundException e) {
811 log.warn(e.getMessage() + " Aborting populateEcmpRoutingRulePartial.");
Saurav Dasc88d4662017-05-15 15:34:25 -0700812 return false;
813 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700814
815 if (targetIsEdge && dest1IsEdge) {
816 subnets = (subnets != null && !subnets.isEmpty())
817 ? Sets.newHashSet(subnets)
818 : Sets.newHashSet(config.getSubnets(destSw1));
Saurav Dasa4020382018-02-14 14:14:54 -0800819 // XXX - Rethink this - ignoring routerIPs in all other switches
820 // even edge to edge switches
Saurav Das7bcbe702017-06-13 15:35:54 -0700821 /*subnets.add(dest1RouterIpv4.toIpPrefix());
822 if (dest1RouterIpv6 != null) {
823 subnets.add(dest1RouterIpv6.toIpPrefix());
824 }
825 if (destSw2 != null && dest2RouterIpv4 != null) {
826 subnets.add(dest2RouterIpv4.toIpPrefix());
827 if (dest2RouterIpv6 != null) {
828 subnets.add(dest2RouterIpv6.toIpPrefix());
829 }
830 }*/
831 log.debug(". populateEcmpRoutingRulePartial in device {} towards {} {} "
832 + "for subnets {}", targetSw, destSw1,
833 (destSw2 != null) ? ("& " + destSw2) : "",
834 subnets);
835 result = rulePopulator.populateIpRuleForSubnet(targetSw, subnets,
836 destSw1, destSw2,
837 nextHops);
838 if (!result) {
839 return false;
840 }
Saurav Dasc88d4662017-05-15 15:34:25 -0700841 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700842
843 if (!targetIsEdge && dest1IsEdge) {
844 // MPLS rules in all non-edge target devices. These rules are for
845 // individual destinations, even if the dsts are part of edge-pairs.
846 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
847 + "all MPLS rules", targetSw, destSw1);
848 result = rulePopulator.populateMplsRule(targetSw, destSw1,
849 nextHops.get(destSw1),
850 dest1RouterIpv4);
851 if (!result) {
852 return false;
853 }
854 if (dest1RouterIpv6 != null) {
Saurav Dasa4020382018-02-14 14:14:54 -0800855 int v4sid = 0, v6sid = 0;
856 try {
857 v4sid = config.getIPv4SegmentId(destSw1);
858 v6sid = config.getIPv6SegmentId(destSw1);
859 } catch (DeviceConfigNotFoundException e) {
860 log.warn(e.getMessage());
861 }
862 if (v4sid != v6sid) {
863 result = rulePopulator.populateMplsRule(targetSw, destSw1,
864 nextHops.get(destSw1),
865 dest1RouterIpv6);
866 if (!result) {
867 return false;
868 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700869 }
870 }
871 }
872
Andreas Pantelopoulosff691b72018-03-12 16:30:20 -0700873 if (!targetIsEdge && !dest1IsEdge) {
874 // MPLS rules for inter-connected spines
875 // can be merged with above if, left it here for clarity
876 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
877 + "all MPLS rules", targetSw, destSw1);
878
879 result = rulePopulator.populateMplsRule(targetSw, destSw1,
880 nextHops.get(destSw1),
881 dest1RouterIpv4);
882 if (!result) {
883 return false;
884 }
885
886 if (dest1RouterIpv6 != null) {
887 int v4sid = 0, v6sid = 0;
888 try {
889 v4sid = config.getIPv4SegmentId(destSw1);
890 v6sid = config.getIPv6SegmentId(destSw1);
891 } catch (DeviceConfigNotFoundException e) {
892 log.warn(e.getMessage());
893 }
894 if (v4sid != v6sid) {
895 result = rulePopulator.populateMplsRule(targetSw, destSw1,
896 nextHops.get(destSw1),
897 dest1RouterIpv6);
898 if (!result) {
899 return false;
900 }
901 }
902 }
903 }
904
905
Saurav Das7bcbe702017-06-13 15:35:54 -0700906 // To save on ECMP groups
907 // avoid MPLS rules in non-edge-devices to non-edge-devices
908 // avoid MPLS transit rules in edge-devices
909 // avoid loopback IP rules in edge-devices to non-edge-devices
910 return true;
Saurav Dasc88d4662017-05-15 15:34:25 -0700911 }
912
913 /**
914 * Processes a set a route-path changes by editing hash groups.
915 *
916 * @param routeChanges a set of route-path changes, where each route-path is
917 * a list with its first element the src-switch of the path
918 * and the second element the dst-switch of the path.
919 * @param linkOrSwitchFailed true if the route changes are for a failed
920 * switch or linkDown event
921 * @param failedSwitch the switchId if the route changes are for a failed switch,
922 * otherwise null
923 */
924 private void processHashGroupChange(Set<ArrayList<DeviceId>> routeChanges,
925 boolean linkOrSwitchFailed,
926 DeviceId failedSwitch) {
Saurav Das9df5b7c2017-08-14 16:44:43 -0700927 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
928 // first, ensure each routeChanges entry has two elements
Saurav Dasc88d4662017-05-15 15:34:25 -0700929 for (ArrayList<DeviceId> route : routeChanges) {
Saurav Das9df5b7c2017-08-14 16:44:43 -0700930 if (route.size() == 1) {
931 // route-path changes are from everyone else to this switch
932 DeviceId dstSw = route.get(0);
933 srManager.deviceService.getAvailableDevices().forEach(sw -> {
934 if (!sw.id().equals(dstSw)) {
935 changedRoutes.add(Lists.newArrayList(sw.id(), dstSw));
936 }
937 });
938 } else {
939 changedRoutes.add(route);
Saurav Dasc88d4662017-05-15 15:34:25 -0700940 }
Saurav Das9df5b7c2017-08-14 16:44:43 -0700941 }
Saurav Dasc568c342018-01-25 09:49:01 -0800942 boolean someFailed = false;
943 Set<DeviceId> updatedDevices = Sets.newHashSet();
Saurav Das9df5b7c2017-08-14 16:44:43 -0700944 for (ArrayList<DeviceId> route : changedRoutes) {
945 DeviceId targetSw = route.get(0);
946 DeviceId dstSw = route.get(1);
Saurav Dasc88d4662017-05-15 15:34:25 -0700947 if (linkOrSwitchFailed) {
Saurav Das9df5b7c2017-08-14 16:44:43 -0700948 boolean success = fixHashGroupsForRoute(route, true);
Saurav Dasc88d4662017-05-15 15:34:25 -0700949 // it's possible that we cannot fix hash groups for a route
950 // if the target switch has failed. Nevertheless the ecmp graph
951 // for the impacted switch must still be updated.
Saurav Das9df5b7c2017-08-14 16:44:43 -0700952 if (!success && failedSwitch != null && targetSw.equals(failedSwitch)) {
Saurav Dasc88d4662017-05-15 15:34:25 -0700953 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
954 currentEcmpSpgMap.remove(targetSw);
Saurav Das9df5b7c2017-08-14 16:44:43 -0700955 log.debug("Updating ECMPspg for dst:{} removing failed switch "
Saurav Dasc88d4662017-05-15 15:34:25 -0700956 + "target:{}", dstSw, targetSw);
Saurav Dasc568c342018-01-25 09:49:01 -0800957 updatedDevices.add(targetSw);
958 updatedDevices.add(dstSw);
Saurav Das9df5b7c2017-08-14 16:44:43 -0700959 continue;
Saurav Dasc88d4662017-05-15 15:34:25 -0700960 }
961 //linkfailed - update both sides
Saurav Dasc88d4662017-05-15 15:34:25 -0700962 if (success) {
963 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
Saurav Das9df5b7c2017-08-14 16:44:43 -0700964 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
Saurav Dasc568c342018-01-25 09:49:01 -0800965 log.debug("Updating ECMPspg for dst:{} and target:{} for linkdown"
966 + " or switchdown", dstSw, targetSw);
967 updatedDevices.add(targetSw);
968 updatedDevices.add(dstSw);
969 } else {
970 someFailed = true;
Saurav Das9df5b7c2017-08-14 16:44:43 -0700971 }
972 } else {
973 //linkup of seen before link
974 boolean success = fixHashGroupsForRoute(route, false);
975 if (success) {
976 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
977 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
978 log.debug("Updating ECMPspg for target:{} and dst:{} for linkup",
Saurav Dasc88d4662017-05-15 15:34:25 -0700979 targetSw, dstSw);
Saurav Dasc568c342018-01-25 09:49:01 -0800980 updatedDevices.add(targetSw);
981 updatedDevices.add(dstSw);
982 } else {
983 someFailed = true;
Saurav Dasc88d4662017-05-15 15:34:25 -0700984 }
985 }
986 }
Saurav Dasc568c342018-01-25 09:49:01 -0800987 if (!someFailed) {
988 // here is where we update all devices not touched by this instance
989 updatedEcmpSpgMap.keySet().stream()
990 .filter(devId -> !updatedDevices.contains(devId))
991 .forEach(devId -> {
992 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
993 log.debug("Updating ECMPspg for remaining dev:{}", devId);
994 });
995 }
Saurav Dasc88d4662017-05-15 15:34:25 -0700996 }
997
998 /**
999 * Edits hash groups in the src-switch (targetSw) of a route-path by
1000 * calling the groupHandler to either add or remove buckets in an existing
1001 * hash group.
1002 *
1003 * @param route a single list representing a route-path where the first element
1004 * is the src-switch (targetSw) of the route-path and the
1005 * second element is the dst-switch
1006 * @param revoke true if buckets in the hash-groups need to be removed;
1007 * false if buckets in the hash-groups need to be added
1008 * @return true if the hash group editing is successful
1009 */
1010 private boolean fixHashGroupsForRoute(ArrayList<DeviceId> route,
1011 boolean revoke) {
1012 DeviceId targetSw = route.get(0);
1013 if (route.size() < 2) {
1014 log.warn("Cannot fixHashGroupsForRoute - no dstSw in route {}", route);
1015 return false;
1016 }
1017 DeviceId destSw = route.get(1);
Saurav Das9df5b7c2017-08-14 16:44:43 -07001018 log.debug("* processing fixHashGroupsForRoute: Target {} -> Dest {}",
Saurav Dasc88d4662017-05-15 15:34:25 -07001019 targetSw, destSw);
Saurav Dasc88d4662017-05-15 15:34:25 -07001020 // figure out the new next hops at the targetSw towards the destSw
Saurav Das9df5b7c2017-08-14 16:44:43 -07001021 Set<DeviceId> nextHops = getNextHops(targetSw, destSw);
Saurav Dasc88d4662017-05-15 15:34:25 -07001022 // call group handler to change hash group at targetSw
1023 DefaultGroupHandler grpHandler = srManager.getGroupHandler(targetSw);
1024 if (grpHandler == null) {
1025 log.warn("Cannot find grouphandler for dev:{} .. aborting"
1026 + " {} hash group buckets for route:{} ", targetSw,
1027 (revoke) ? "revoke" : "repopulate", route);
1028 return false;
1029 }
1030 log.debug("{} hash-groups buckets For Route {} -> {} to next-hops {}",
1031 (revoke) ? "revoke" : "repopulating",
1032 targetSw, destSw, nextHops);
1033 return (revoke) ? grpHandler.fixHashGroups(targetSw, nextHops,
1034 destSw, true)
1035 : grpHandler.fixHashGroups(targetSw, nextHops,
1036 destSw, false);
1037 }
1038
1039 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001040 * Start the flow rule population process if it was never started. The
1041 * process finishes successfully when all flow rules are set and stops with
1042 * ABORTED status when any groups required for flows is not set yet.
Saurav Dasc88d4662017-05-15 15:34:25 -07001043 */
Saurav Das7bcbe702017-06-13 15:35:54 -07001044 public void startPopulationProcess() {
1045 statusLock.lock();
1046 try {
1047 if (populationStatus == Status.IDLE
1048 || populationStatus == Status.SUCCEEDED
1049 || populationStatus == Status.ABORTED) {
1050 populateAllRoutingRules();
sangho45b009c2015-05-07 13:30:57 -07001051 } else {
Saurav Das7bcbe702017-06-13 15:35:54 -07001052 log.warn("Not initiating startPopulationProcess as populationStatus is {}",
1053 populationStatus);
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001054 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001055 } finally {
1056 statusLock.unlock();
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001057 }
sangho20eff1d2015-04-13 15:15:58 -07001058 }
1059
Saurav Dasb5c236e2016-06-07 10:08:06 -07001060 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001061 * Revoke rules of given subnet in all edge switches.
1062 *
1063 * @param subnets subnet being removed
1064 * @return true if succeed
1065 */
1066 protected boolean revokeSubnet(Set<IpPrefix> subnets) {
1067 statusLock.lock();
1068 try {
1069 return srManager.routingRulePopulator.revokeIpRuleForSubnet(subnets);
1070 } finally {
1071 statusLock.unlock();
1072 }
1073 }
1074
1075 /**
Charles Chan2fde6d42017-08-23 14:46:43 -07001076 * Populates IP rules for a route that has direct connection to the switch
1077 * if the current instance is the master of the switch.
1078 *
1079 * @param deviceId device ID of the device that next hop attaches to
1080 * @param prefix IP prefix of the route
1081 * @param hostMac MAC address of the next hop
1082 * @param hostVlanId Vlan ID of the nexthop
1083 * @param outPort port where the next hop attaches to
1084 */
1085 void populateRoute(DeviceId deviceId, IpPrefix prefix,
1086 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort) {
1087 if (srManager.mastershipService.isLocalMaster(deviceId)) {
1088 srManager.routingRulePopulator.populateRoute(deviceId, prefix, hostMac, hostVlanId, outPort);
1089 }
1090 }
1091
1092 /**
1093 * Removes IP rules for a route when the next hop is gone.
1094 * if the current instance is the master of the switch.
1095 *
1096 * @param deviceId device ID of the device that next hop attaches to
1097 * @param prefix IP prefix of the route
1098 * @param hostMac MAC address of the next hop
1099 * @param hostVlanId Vlan ID of the nexthop
1100 * @param outPort port that next hop attaches to
1101 */
1102 void revokeRoute(DeviceId deviceId, IpPrefix prefix,
1103 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort) {
1104 if (srManager.mastershipService.isLocalMaster(deviceId)) {
1105 srManager.routingRulePopulator.revokeRoute(deviceId, prefix, hostMac, hostVlanId, outPort);
1106 }
1107 }
1108
1109 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001110 * Remove ECMP graph entry for the given device. Typically called when
1111 * device is no longer available.
1112 *
1113 * @param deviceId the device for which graphs need to be purged
1114 */
1115 protected void purgeEcmpGraph(DeviceId deviceId) {
Saurav Dasc568c342018-01-25 09:49:01 -08001116 statusLock.lock();
1117 try {
1118
1119 if (populationStatus == Status.STARTED) {
1120 log.warn("Previous rule population is not finished. Cannot"
1121 + " proceeed with purgeEcmpGraph for {}", deviceId);
1122 return;
1123 }
1124 log.debug("Updating ECMPspg for unavailable dev:{}", deviceId);
1125 currentEcmpSpgMap.remove(deviceId);
1126 if (updatedEcmpSpgMap != null) {
1127 updatedEcmpSpgMap.remove(deviceId);
1128 }
1129 } finally {
1130 statusLock.unlock();
Saurav Das7bcbe702017-06-13 15:35:54 -07001131 }
1132 }
1133
1134 //////////////////////////////////////
1135 // Routing helper methods and classes
1136 //////////////////////////////////////
1137
1138 /**
Saurav Das4e3224f2016-11-29 14:27:25 -08001139 * Computes set of affected routes due to failed link. Assumes
Saurav Dasb5c236e2016-06-07 10:08:06 -07001140 * previous ecmp shortest-path graph exists for a switch in order to compute
1141 * affected routes. If such a graph does not exist, the method returns null.
1142 *
1143 * @param linkFail the failed link
1144 * @return the set of affected routes which may be empty if no routes were
1145 * affected, or null if no previous ecmp spg was found for comparison
1146 */
sangho20eff1d2015-04-13 15:15:58 -07001147 private Set<ArrayList<DeviceId>> computeDamagedRoutes(Link linkFail) {
sangho20eff1d2015-04-13 15:15:58 -07001148 Set<ArrayList<DeviceId>> routes = new HashSet<>();
1149
1150 for (Device sw : srManager.deviceService.getDevices()) {
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001151 log.debug("Computing the impacted routes for device {} due to link fail",
1152 sw.id());
Saurav Das9df5b7c2017-08-14 16:44:43 -07001153 DeviceId retId = shouldHandleRouting(sw.id());
1154 if (retId == null) {
sangho20eff1d2015-04-13 15:15:58 -07001155 continue;
1156 }
Saurav Das9df5b7c2017-08-14 16:44:43 -07001157 Set<DeviceId> devicesToProcess = Sets.newHashSet(retId, sw.id());
1158 for (DeviceId rootSw : devicesToProcess) {
1159 EcmpShortestPathGraph ecmpSpg = currentEcmpSpgMap.get(rootSw);
1160 if (ecmpSpg == null) {
1161 log.warn("No existing ECMP graph for switch {}. Aborting optimized"
1162 + " rerouting and opting for full-reroute", rootSw);
1163 return null;
1164 }
1165 if (log.isDebugEnabled()) {
1166 log.debug("Root switch: {}", rootSw);
1167 log.debug(" Current/Existing SPG: {}", ecmpSpg);
1168 log.debug(" New/Updated SPG: {}", updatedEcmpSpgMap.get(rootSw));
1169 }
1170 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>>
1171 switchVia = ecmpSpg.getAllLearnedSwitchesAndVia();
1172 // figure out if the broken link affected any route-paths in this graph
1173 for (Integer itrIdx : switchVia.keySet()) {
1174 log.trace("Current/Exiting SPG Iterindex# {}", itrIdx);
1175 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1176 switchVia.get(itrIdx);
1177 for (DeviceId targetSw : swViaMap.keySet()) {
1178 log.trace("TargetSwitch {} --> RootSwitch {}",
1179 targetSw, rootSw);
Saurav Dasb5c236e2016-06-07 10:08:06 -07001180 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1181 log.trace(" Via:");
Pier Ventree0ae7a32016-11-23 09:57:42 -08001182 via.forEach(e -> log.trace(" {}", e));
Saurav Dasb5c236e2016-06-07 10:08:06 -07001183 }
Saurav Das9df5b7c2017-08-14 16:44:43 -07001184 Set<ArrayList<DeviceId>> subLinks =
1185 computeLinks(targetSw, rootSw, swViaMap);
1186 for (ArrayList<DeviceId> alink: subLinks) {
1187 if ((alink.get(0).equals(linkFail.src().deviceId()) &&
1188 alink.get(1).equals(linkFail.dst().deviceId()))
1189 ||
1190 (alink.get(0).equals(linkFail.dst().deviceId()) &&
1191 alink.get(1).equals(linkFail.src().deviceId()))) {
1192 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1193 ArrayList<DeviceId> aRoute = new ArrayList<>();
1194 aRoute.add(targetSw); // switch with rules to populate
1195 aRoute.add(rootSw); // towards this destination
1196 routes.add(aRoute);
1197 break;
1198 }
sangho20eff1d2015-04-13 15:15:58 -07001199 }
1200 }
1201 }
Saurav Das9df5b7c2017-08-14 16:44:43 -07001202
sangho20eff1d2015-04-13 15:15:58 -07001203 }
sangho45b009c2015-05-07 13:30:57 -07001204
sangho20eff1d2015-04-13 15:15:58 -07001205 }
sangho20eff1d2015-04-13 15:15:58 -07001206 return routes;
1207 }
1208
Saurav Das4e3224f2016-11-29 14:27:25 -08001209 /**
1210 * Computes set of affected routes due to new links or failed switches.
1211 *
Saurav Das604ab3a2018-03-18 21:28:15 -07001212 * @param failedSwitch deviceId of failed switch if any
Saurav Das4e3224f2016-11-29 14:27:25 -08001213 * @return the set of affected routes which may be empty if no routes were
1214 * affected
1215 */
Saurav Dase0d4c872018-03-05 14:37:16 -08001216 private Set<ArrayList<DeviceId>> computeRouteChange(DeviceId failedSwitch) {
Saurav Das7bcbe702017-06-13 15:35:54 -07001217 ImmutableSet.Builder<ArrayList<DeviceId>> changedRtBldr =
Saurav Das4e3224f2016-11-29 14:27:25 -08001218 ImmutableSet.builder();
sangho20eff1d2015-04-13 15:15:58 -07001219
1220 for (Device sw : srManager.deviceService.getDevices()) {
Saurav Das7bcbe702017-06-13 15:35:54 -07001221 log.debug("Computing the impacted routes for device {}", sw.id());
1222 DeviceId retId = shouldHandleRouting(sw.id());
1223 if (retId == null) {
sangho20eff1d2015-04-13 15:15:58 -07001224 continue;
1225 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001226 Set<DeviceId> devicesToProcess = Sets.newHashSet(retId, sw.id());
1227 for (DeviceId rootSw : devicesToProcess) {
1228 if (log.isTraceEnabled()) {
1229 log.trace("Device links for dev: {}", rootSw);
1230 for (Link link: srManager.linkService.getDeviceLinks(rootSw)) {
1231 log.trace("{} -> {} ", link.src().deviceId(),
1232 link.dst().deviceId());
1233 }
Saurav Dasb5c236e2016-06-07 10:08:06 -07001234 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001235 EcmpShortestPathGraph currEcmpSpg = currentEcmpSpgMap.get(rootSw);
1236 if (currEcmpSpg == null) {
1237 log.debug("No existing ECMP graph for device {}.. adding self as "
1238 + "changed route", rootSw);
1239 changedRtBldr.add(Lists.newArrayList(rootSw));
1240 continue;
1241 }
1242 EcmpShortestPathGraph newEcmpSpg = updatedEcmpSpgMap.get(rootSw);
1243 if (log.isDebugEnabled()) {
1244 log.debug("Root switch: {}", rootSw);
1245 log.debug(" Current/Existing SPG: {}", currEcmpSpg);
1246 log.debug(" New/Updated SPG: {}", newEcmpSpg);
1247 }
1248 // first use the updated/new map to compare to current/existing map
1249 // as new links may have come up
1250 changedRtBldr.addAll(compareGraphs(newEcmpSpg, currEcmpSpg, rootSw));
1251 // then use the current/existing map to compare to updated/new map
1252 // as switch may have been removed
1253 changedRtBldr.addAll(compareGraphs(currEcmpSpg, newEcmpSpg, rootSw));
sangho45b009c2015-05-07 13:30:57 -07001254 }
Saurav Das4e3224f2016-11-29 14:27:25 -08001255 }
sangho20eff1d2015-04-13 15:15:58 -07001256
Saurav Dase0d4c872018-03-05 14:37:16 -08001257 // handle clearing state for a failed switch in case the switch does
1258 // not have a pair, or the pair is not available
1259 if (failedSwitch != null) {
Charles Chanba6c5752018-04-02 11:46:38 -07001260 Optional<DeviceId> pairDev = srManager.getPairDeviceId(failedSwitch);
1261 if (!pairDev.isPresent() || !srManager.deviceService.isAvailable(pairDev.get())) {
Saurav Dase0d4c872018-03-05 14:37:16 -08001262 log.debug("Proxy Route changes to downed Sw:{}", failedSwitch);
1263 srManager.deviceService.getDevices().forEach(dev -> {
1264 if (!dev.id().equals(failedSwitch) &&
1265 srManager.mastershipService.isLocalMaster(dev.id())) {
1266 log.debug(" : {}", dev.id());
1267 changedRtBldr.add(Lists.newArrayList(dev.id(), failedSwitch));
1268 }
1269 });
1270 }
1271 }
1272
Saurav Das7bcbe702017-06-13 15:35:54 -07001273 Set<ArrayList<DeviceId>> changedRoutes = changedRtBldr.build();
Saurav Das4e3224f2016-11-29 14:27:25 -08001274 for (ArrayList<DeviceId> route: changedRoutes) {
1275 log.debug("Route changes Target -> Root");
1276 if (route.size() == 1) {
1277 log.debug(" : all -> {}", route.get(0));
1278 } else {
1279 log.debug(" : {} -> {}", route.get(0), route.get(1));
1280 }
1281 }
1282 return changedRoutes;
1283 }
1284
1285 /**
1286 * For the root switch, searches all the target nodes reachable in the base
1287 * graph, and compares paths to the ones in the comp graph.
1288 *
1289 * @param base the graph that is indexed for all reachable target nodes
1290 * from the root node
1291 * @param comp the graph that the base graph is compared to
1292 * @param rootSw both ecmp graphs are calculated for the root node
1293 * @return all the routes that have changed in the base graph
1294 */
1295 private Set<ArrayList<DeviceId>> compareGraphs(EcmpShortestPathGraph base,
1296 EcmpShortestPathGraph comp,
1297 DeviceId rootSw) {
1298 ImmutableSet.Builder<ArrayList<DeviceId>> changedRoutesBuilder =
1299 ImmutableSet.builder();
1300 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> baseMap =
1301 base.getAllLearnedSwitchesAndVia();
1302 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> compMap =
1303 comp.getAllLearnedSwitchesAndVia();
1304 for (Integer itrIdx : baseMap.keySet()) {
1305 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> baseViaMap =
1306 baseMap.get(itrIdx);
1307 for (DeviceId targetSw : baseViaMap.keySet()) {
1308 ArrayList<ArrayList<DeviceId>> basePath = baseViaMap.get(targetSw);
1309 ArrayList<ArrayList<DeviceId>> compPath = getVia(compMap, targetSw);
1310 if ((compPath == null) || !basePath.equals(compPath)) {
Saurav Dasc88d4662017-05-15 15:34:25 -07001311 log.trace("Impacted route:{} -> {}", targetSw, rootSw);
Saurav Das4e3224f2016-11-29 14:27:25 -08001312 ArrayList<DeviceId> route = new ArrayList<>();
Saurav Das7bcbe702017-06-13 15:35:54 -07001313 route.add(targetSw); // switch with rules to populate
1314 route.add(rootSw); // towards this destination
Saurav Das4e3224f2016-11-29 14:27:25 -08001315 changedRoutesBuilder.add(route);
sangho20eff1d2015-04-13 15:15:58 -07001316 }
1317 }
sangho45b009c2015-05-07 13:30:57 -07001318 }
Saurav Das4e3224f2016-11-29 14:27:25 -08001319 return changedRoutesBuilder.build();
sangho20eff1d2015-04-13 15:15:58 -07001320 }
1321
Saurav Das7bcbe702017-06-13 15:35:54 -07001322 /**
1323 * Returns the ECMP paths traversed to reach the target switch.
1324 *
1325 * @param switchVia a per-iteration view of the ECMP graph for a root switch
1326 * @param targetSw the switch to reach from the root switch
1327 * @return the nodes traversed on ECMP paths to the target switch
1328 */
sangho20eff1d2015-04-13 15:15:58 -07001329 private ArrayList<ArrayList<DeviceId>> getVia(HashMap<Integer, HashMap<DeviceId,
Saurav Das4e3224f2016-11-29 14:27:25 -08001330 ArrayList<ArrayList<DeviceId>>>> switchVia, DeviceId targetSw) {
sangho20eff1d2015-04-13 15:15:58 -07001331 for (Integer itrIdx : switchVia.keySet()) {
1332 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1333 switchVia.get(itrIdx);
Saurav Das4e3224f2016-11-29 14:27:25 -08001334 if (swViaMap.get(targetSw) == null) {
sangho20eff1d2015-04-13 15:15:58 -07001335 continue;
1336 } else {
Saurav Das4e3224f2016-11-29 14:27:25 -08001337 return swViaMap.get(targetSw);
sangho20eff1d2015-04-13 15:15:58 -07001338 }
1339 }
1340
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001341 return null;
sangho20eff1d2015-04-13 15:15:58 -07001342 }
1343
Saurav Das7bcbe702017-06-13 15:35:54 -07001344 /**
1345 * Utility method to break down a path from src to dst device into a collection
1346 * of links.
1347 *
1348 * @param src src device of the path
1349 * @param dst dst device of the path
1350 * @param viaMap path taken from src to dst device
1351 * @return collection of links in the path
1352 */
sangho20eff1d2015-04-13 15:15:58 -07001353 private Set<ArrayList<DeviceId>> computeLinks(DeviceId src,
1354 DeviceId dst,
1355 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> viaMap) {
1356 Set<ArrayList<DeviceId>> subLinks = Sets.newHashSet();
1357 for (ArrayList<DeviceId> via : viaMap.get(src)) {
1358 DeviceId linkSrc = src;
1359 DeviceId linkDst = dst;
1360 for (DeviceId viaDevice: via) {
1361 ArrayList<DeviceId> link = new ArrayList<>();
1362 linkDst = viaDevice;
1363 link.add(linkSrc);
1364 link.add(linkDst);
1365 subLinks.add(link);
1366 linkSrc = viaDevice;
1367 }
1368 ArrayList<DeviceId> link = new ArrayList<>();
1369 link.add(linkSrc);
1370 link.add(dst);
1371 subLinks.add(link);
1372 }
1373
1374 return subLinks;
1375 }
1376
Charles Chan93e71ba2016-04-29 14:38:22 -07001377 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001378 * Determines whether this controller instance should handle routing for the
1379 * given {@code deviceId}, based on mastership and pairDeviceId if one exists.
1380 * Returns null if this instance should not handle routing for given {@code deviceId}.
1381 * Otherwise the returned value could be the given deviceId itself, or the
1382 * deviceId for the paired edge device. In the latter case, this instance
1383 * should handle routing for both the given device and the paired device.
Charles Chan93e71ba2016-04-29 14:38:22 -07001384 *
Saurav Das7bcbe702017-06-13 15:35:54 -07001385 * @param deviceId device identifier to consider for routing
1386 * @return null or deviceId which could be the same as the given deviceId
1387 * or the deviceId of a paired edge device
Charles Chan93e71ba2016-04-29 14:38:22 -07001388 */
Saurav Das7bcbe702017-06-13 15:35:54 -07001389 private DeviceId shouldHandleRouting(DeviceId deviceId) {
1390 if (!srManager.mastershipService.isLocalMaster(deviceId)) {
1391 log.debug("Not master for dev:{} .. skipping routing, may get handled "
1392 + "elsewhere as part of paired devices", deviceId);
1393 return null;
1394 }
1395 NodeId myNode = srManager.mastershipService.getMasterFor(deviceId);
Charles Chanba6c5752018-04-02 11:46:38 -07001396 Optional<DeviceId> pairDev = srManager.getPairDeviceId(deviceId);
sanghob35a6192015-04-01 13:05:26 -07001397
Charles Chanba6c5752018-04-02 11:46:38 -07001398 if (pairDev.isPresent()) {
1399 if (!srManager.deviceService.isAvailable(pairDev.get())) {
Saurav Dase0d4c872018-03-05 14:37:16 -08001400 log.warn("pairedDev {} not available .. routing both this dev:{} "
1401 + "and pair without mastership check for pair",
Saurav Das7bcbe702017-06-13 15:35:54 -07001402 pairDev, deviceId);
Charles Chanba6c5752018-04-02 11:46:38 -07001403 return pairDev.get(); // handle both temporarily
Saurav Das7bcbe702017-06-13 15:35:54 -07001404 }
Charles Chanba6c5752018-04-02 11:46:38 -07001405 NodeId pairMasterNode = srManager.mastershipService.getMasterFor(pairDev.get());
Saurav Das7bcbe702017-06-13 15:35:54 -07001406 if (myNode.compareTo(pairMasterNode) <= 0) {
1407 log.debug("Handling routing for both dev:{} pair-dev:{}; myNode: {}"
1408 + " pairMaster:{} compare:{}", deviceId, pairDev,
1409 myNode, pairMasterNode,
1410 myNode.compareTo(pairMasterNode));
Charles Chanba6c5752018-04-02 11:46:38 -07001411 return pairDev.get(); // handle both
Saurav Das7bcbe702017-06-13 15:35:54 -07001412 } else {
1413 log.debug("PairDev node: {} should handle routing for dev:{} and "
1414 + "pair-dev:{}", pairMasterNode, deviceId, pairDev);
1415 return null; // handle neither
sanghob35a6192015-04-01 13:05:26 -07001416 }
1417 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001418 return deviceId; // not paired, just handle given device
sanghob35a6192015-04-01 13:05:26 -07001419 }
1420
Charles Chan93e71ba2016-04-29 14:38:22 -07001421 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001422 * Returns the set of deviceIds which are the next hops from the targetSw
1423 * to the dstSw according to the latest ECMP spg.
1424 *
1425 * @param targetSw the switch for which the next-hops are desired
1426 * @param dstSw the switch to which the next-hops lead to from the targetSw
1427 * @return set of next hop deviceIds, could be empty if no next hops are found
1428 */
1429 private Set<DeviceId> getNextHops(DeviceId targetSw, DeviceId dstSw) {
1430 boolean targetIsEdge = false;
1431 try {
1432 targetIsEdge = srManager.deviceConfiguration.isEdgeDevice(targetSw);
1433 } catch (DeviceConfigNotFoundException e) {
1434 log.warn(e.getMessage() + "Cannot determine if targetIsEdge {}.. "
1435 + "continuing to getNextHops", targetSw);
1436 }
1437
1438 EcmpShortestPathGraph ecmpSpg = updatedEcmpSpgMap.get(dstSw);
1439 if (ecmpSpg == null) {
1440 log.debug("No ecmpSpg found for dstSw: {}", dstSw);
1441 return ImmutableSet.of();
1442 }
1443 HashMap<Integer,
1444 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> switchVia =
1445 ecmpSpg.getAllLearnedSwitchesAndVia();
1446 for (Integer itrIdx : switchVia.keySet()) {
1447 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1448 switchVia.get(itrIdx);
1449 for (DeviceId target : swViaMap.keySet()) {
1450 if (!target.equals(targetSw)) {
1451 continue;
1452 }
1453 if (!targetIsEdge && itrIdx > 1) {
Saurav Dasa4020382018-02-14 14:14:54 -08001454 // optimization for spines to not use leaves to get
1455 // to a spine or other leaves
1456 boolean pathdevIsEdge = false;
1457 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1458 for (DeviceId pathdev : via) {
1459 try {
1460 pathdevIsEdge = srManager.deviceConfiguration
1461 .isEdgeDevice(pathdev);
1462 } catch (DeviceConfigNotFoundException e) {
1463 log.warn(e.getMessage());
1464 }
1465 if (pathdevIsEdge) {
1466 log.debug("Avoiding {} hop path for non-edge targetSw:{}"
1467 + " --> dstSw:{} which goes through an edge"
1468 + " device {} in path {}", itrIdx,
1469 targetSw, dstSw, pathdev, via);
1470 return ImmutableSet.of();
1471 }
1472 }
1473 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001474 }
1475 Set<DeviceId> nextHops = new HashSet<>();
1476 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1477 if (via.isEmpty()) {
1478 // the dstSw is the next-hop from the targetSw
1479 nextHops.add(dstSw);
1480 } else {
1481 // first elem is next-hop in each ECMP path
1482 nextHops.add(via.get(0));
1483 }
1484 }
1485 return nextHops;
1486 }
1487 }
1488 return ImmutableSet.of(); //no next-hops found
1489 }
1490
1491 /**
1492 * Represents two devices that are paired by configuration. An EdgePair for
1493 * (dev1, dev2) is the same as as EdgePair for (dev2, dev1)
1494 */
1495 protected final class EdgePair {
1496 DeviceId dev1;
1497 DeviceId dev2;
1498
1499 EdgePair(DeviceId dev1, DeviceId dev2) {
1500 this.dev1 = dev1;
1501 this.dev2 = dev2;
1502 }
1503
1504 boolean includes(DeviceId dev) {
1505 return dev1.equals(dev) || dev2.equals(dev);
1506 }
1507
1508 @Override
1509 public boolean equals(Object o) {
1510 if (this == o) {
1511 return true;
1512 }
1513 if (!(o instanceof EdgePair)) {
1514 return false;
1515 }
1516 EdgePair that = (EdgePair) o;
1517 return ((this.dev1.equals(that.dev1) && this.dev2.equals(that.dev2)) ||
1518 (this.dev1.equals(that.dev2) && this.dev2.equals(that.dev1)));
1519 }
1520
1521 @Override
1522 public int hashCode() {
1523 if (dev1.toString().compareTo(dev2.toString()) <= 0) {
1524 return Objects.hash(dev1, dev2);
1525 } else {
1526 return Objects.hash(dev2, dev1);
1527 }
1528 }
1529
1530 @Override
1531 public String toString() {
1532 return toStringHelper(this)
1533 .add("Dev1", dev1)
1534 .add("Dev2", dev2)
1535 .toString();
1536 }
1537 }
1538
1539 //////////////////////////////////////
1540 // Filtering rule creation
1541 //////////////////////////////////////
1542
1543 /**
Saurav Das018605f2017-02-18 14:05:44 -08001544 * Populates filtering rules for port, and punting rules
1545 * for gateway IPs, loopback IPs and arp/ndp traffic.
1546 * Should only be called by the master instance for this device/port.
sanghob35a6192015-04-01 13:05:26 -07001547 *
1548 * @param deviceId Switch ID to set the rules
1549 */
Saurav Das822c4e22015-10-23 10:51:11 -07001550 public void populatePortAddressingRules(DeviceId deviceId) {
Saurav Das59232cf2016-04-27 18:35:50 -07001551 // Although device is added, sometimes device store does not have the
1552 // ports for this device yet. It results in missing filtering rules in the
1553 // switch. We will attempt it a few times. If it still does not work,
1554 // user can manually repopulate using CLI command sr-reroute-network
Charles Chanf6ec1532017-02-08 16:10:40 -08001555 PortFilterInfo firstRun = rulePopulator.populateVlanMacFilters(deviceId);
Saurav Dasd2fded02016-12-02 15:43:47 -08001556 if (firstRun == null) {
1557 firstRun = new PortFilterInfo(0, 0, 0);
Saurav Das59232cf2016-04-27 18:35:50 -07001558 }
Saurav Dasd2fded02016-12-02 15:43:47 -08001559 executorService.schedule(new RetryFilters(deviceId, firstRun),
1560 RETRY_INTERVAL_MS, TimeUnit.MILLISECONDS);
sanghob35a6192015-04-01 13:05:26 -07001561 }
1562
1563 /**
Saurav Dasd2fded02016-12-02 15:43:47 -08001564 * Utility class used to temporarily store information about the ports on a
1565 * device processed for filtering objectives.
Saurav Dasd2fded02016-12-02 15:43:47 -08001566 */
1567 public final class PortFilterInfo {
Saurav Das018605f2017-02-18 14:05:44 -08001568 int disabledPorts = 0, errorPorts = 0, filteredPorts = 0;
Saurav Das59232cf2016-04-27 18:35:50 -07001569
Saurav Das018605f2017-02-18 14:05:44 -08001570 public PortFilterInfo(int disabledPorts, int errorPorts,
Saurav Dasd2fded02016-12-02 15:43:47 -08001571 int filteredPorts) {
1572 this.disabledPorts = disabledPorts;
1573 this.filteredPorts = filteredPorts;
Saurav Das018605f2017-02-18 14:05:44 -08001574 this.errorPorts = errorPorts;
Saurav Dasd2fded02016-12-02 15:43:47 -08001575 }
1576
1577 @Override
1578 public int hashCode() {
Saurav Das018605f2017-02-18 14:05:44 -08001579 return Objects.hash(disabledPorts, filteredPorts, errorPorts);
Saurav Dasd2fded02016-12-02 15:43:47 -08001580 }
1581
1582 @Override
1583 public boolean equals(Object obj) {
1584 if (this == obj) {
1585 return true;
1586 }
1587 if ((obj == null) || (!(obj instanceof PortFilterInfo))) {
1588 return false;
1589 }
1590 PortFilterInfo other = (PortFilterInfo) obj;
1591 return ((disabledPorts == other.disabledPorts) &&
1592 (filteredPorts == other.filteredPorts) &&
Saurav Das018605f2017-02-18 14:05:44 -08001593 (errorPorts == other.errorPorts));
Saurav Dasd2fded02016-12-02 15:43:47 -08001594 }
1595
1596 @Override
1597 public String toString() {
1598 MoreObjects.ToStringHelper helper = toStringHelper(this)
1599 .add("disabledPorts", disabledPorts)
Saurav Das018605f2017-02-18 14:05:44 -08001600 .add("errorPorts", errorPorts)
Saurav Dasd2fded02016-12-02 15:43:47 -08001601 .add("filteredPorts", filteredPorts);
1602 return helper.toString();
1603 }
1604 }
1605
1606 /**
1607 * RetryFilters populates filtering objectives for a device and keeps retrying
1608 * till the number of ports filtered are constant for a predefined number
1609 * of attempts.
1610 */
1611 protected final class RetryFilters implements Runnable {
1612 int constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS;
1613 DeviceId devId;
1614 int counter;
1615 PortFilterInfo prevRun;
1616
1617 private RetryFilters(DeviceId deviceId, PortFilterInfo previousRun) {
Saurav Das59232cf2016-04-27 18:35:50 -07001618 devId = deviceId;
Saurav Dasd2fded02016-12-02 15:43:47 -08001619 prevRun = previousRun;
1620 counter = 0;
Saurav Das59232cf2016-04-27 18:35:50 -07001621 }
1622
1623 @Override
1624 public void run() {
Charles Chan7f9737b2017-06-22 14:27:17 -07001625 log.debug("RETRY FILTER ATTEMPT {} ** dev:{}", ++counter, devId);
Charles Chanf6ec1532017-02-08 16:10:40 -08001626 PortFilterInfo thisRun = rulePopulator.populateVlanMacFilters(devId);
Saurav Dasd2fded02016-12-02 15:43:47 -08001627 boolean sameResult = prevRun.equals(thisRun);
1628 log.debug("dev:{} prevRun:{} thisRun:{} sameResult:{}", devId, prevRun,
1629 thisRun, sameResult);
Ray Milkeyc6c9b172018-02-26 09:36:31 -08001630 if (thisRun == null || !sameResult || (--constantAttempts > 0)) {
Saurav Das018605f2017-02-18 14:05:44 -08001631 // exponentially increasing intervals for retries
1632 executorService.schedule(this,
1633 RETRY_INTERVAL_MS * (int) Math.pow(counter, RETRY_INTERVAL_SCALE),
1634 TimeUnit.MILLISECONDS);
Saurav Dasd2fded02016-12-02 15:43:47 -08001635 if (!sameResult) {
1636 constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS; //reset
1637 }
Saurav Das59232cf2016-04-27 18:35:50 -07001638 }
Saurav Dasd2fded02016-12-02 15:43:47 -08001639 prevRun = (thisRun == null) ? prevRun : thisRun;
Saurav Das59232cf2016-04-27 18:35:50 -07001640 }
Saurav Das59232cf2016-04-27 18:35:50 -07001641 }
1642
sanghob35a6192015-04-01 13:05:26 -07001643}