blob: c64d4521e95b8f93fb87f1e17b43e5e197ae017b [file] [log] [blame]
sanghob35a6192015-04-01 13:05:26 -07001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2015-present Open Networking Foundation
sanghob35a6192015-04-01 13:05:26 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.segmentrouting;
17
Saurav Dasd2fded02016-12-02 15:43:47 -080018import com.google.common.base.MoreObjects;
Saurav Dasc88d4662017-05-15 15:34:25 -070019import com.google.common.collect.ImmutableMap;
20import com.google.common.collect.ImmutableMap.Builder;
Charles Chan93e71ba2016-04-29 14:38:22 -070021import com.google.common.collect.ImmutableSet;
Saurav Das4e3224f2016-11-29 14:27:25 -080022import com.google.common.collect.Lists;
sangho20eff1d2015-04-13 15:15:58 -070023import com.google.common.collect.Maps;
24import com.google.common.collect.Sets;
Saurav Dasceccf242017-08-03 18:30:35 -070025
sangho666cd6d2015-04-14 16:27:13 -070026import org.onlab.packet.Ip4Address;
Pier Ventree0ae7a32016-11-23 09:57:42 -080027import org.onlab.packet.Ip6Address;
sanghob35a6192015-04-01 13:05:26 -070028import org.onlab.packet.IpPrefix;
Charles Chan2fde6d42017-08-23 14:46:43 -070029import org.onlab.packet.MacAddress;
30import org.onlab.packet.VlanId;
Saurav Das7bcbe702017-06-13 15:35:54 -070031import org.onosproject.cluster.NodeId;
Saurav Das201762d2018-04-21 17:19:48 -070032import org.onosproject.mastership.MastershipEvent;
Charles Chan93e71ba2016-04-29 14:38:22 -070033import org.onosproject.net.ConnectPoint;
sanghob35a6192015-04-01 13:05:26 -070034import org.onosproject.net.Device;
35import org.onosproject.net.DeviceId;
sangho20eff1d2015-04-13 15:15:58 -070036import org.onosproject.net.Link;
Charles Chan2fde6d42017-08-23 14:46:43 -070037import org.onosproject.net.PortNumber;
Charles Chan0b4e6182015-11-03 10:42:14 -080038import org.onosproject.segmentrouting.config.DeviceConfigNotFoundException;
39import org.onosproject.segmentrouting.config.DeviceConfiguration;
Saurav Dasc88d4662017-05-15 15:34:25 -070040import org.onosproject.segmentrouting.grouphandler.DefaultGroupHandler;
Charles Chan2ff1bac2018-03-29 16:03:41 -070041import org.onosproject.store.serializers.KryoNamespaces;
42import org.onosproject.store.service.Serializer;
sanghob35a6192015-04-01 13:05:26 -070043import org.slf4j.Logger;
44import org.slf4j.LoggerFactory;
45
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -070046import java.time.Instant;
sanghob35a6192015-04-01 13:05:26 -070047import java.util.ArrayList;
Charles Chan2ff1bac2018-03-29 16:03:41 -070048import java.util.Collections;
sanghob35a6192015-04-01 13:05:26 -070049import java.util.HashMap;
50import java.util.HashSet;
Saurav Das7bcbe702017-06-13 15:35:54 -070051import java.util.Iterator;
Charles Chan2ff1bac2018-03-29 16:03:41 -070052import java.util.List;
Saurav Das7bcbe702017-06-13 15:35:54 -070053import java.util.Map;
Saurav Dasd2fded02016-12-02 15:43:47 -080054import java.util.Objects;
Charles Chanba6c5752018-04-02 11:46:38 -070055import java.util.Optional;
sanghob35a6192015-04-01 13:05:26 -070056import java.util.Set;
Saurav Das59232cf2016-04-27 18:35:50 -070057import java.util.concurrent.ScheduledExecutorService;
58import java.util.concurrent.TimeUnit;
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +090059import java.util.concurrent.locks.Lock;
60import java.util.concurrent.locks.ReentrantLock;
Saurav Das604ab3a2018-03-18 21:28:15 -070061import java.util.stream.Stream;
62
Saurav Dasd2fded02016-12-02 15:43:47 -080063import static com.google.common.base.MoreObjects.toStringHelper;
Pier Ventree0ae7a32016-11-23 09:57:42 -080064import static com.google.common.base.Preconditions.checkNotNull;
65import static java.util.concurrent.Executors.newScheduledThreadPool;
66import static org.onlab.util.Tools.groupedThreads;
sanghob35a6192015-04-01 13:05:26 -070067
Charles Chane849c192016-01-11 18:28:54 -080068/**
69 * Default routing handler that is responsible for route computing and
70 * routing rule population.
71 */
sanghob35a6192015-04-01 13:05:26 -070072public class DefaultRoutingHandler {
Saurav Das018605f2017-02-18 14:05:44 -080073 private static final int MAX_CONSTANT_RETRY_ATTEMPTS = 5;
Ray Milkey3717e602018-02-01 13:49:47 -080074 private static final long RETRY_INTERVAL_MS = 250L;
Saurav Das018605f2017-02-18 14:05:44 -080075 private static final int RETRY_INTERVAL_SCALE = 1;
Saurav Dasceccf242017-08-03 18:30:35 -070076 private static final long STABLITY_THRESHOLD = 10; //secs
Saurav Das201762d2018-04-21 17:19:48 -070077 private static final long MASTER_CHANGE_DELAY = 1000; // ms
Charles Chan93e71ba2016-04-29 14:38:22 -070078 private static Logger log = LoggerFactory.getLogger(DefaultRoutingHandler.class);
sanghob35a6192015-04-01 13:05:26 -070079
80 private SegmentRoutingManager srManager;
81 private RoutingRulePopulator rulePopulator;
Shashikanth VH013a7bc2015-12-11 01:32:44 +053082 private HashMap<DeviceId, EcmpShortestPathGraph> currentEcmpSpgMap;
83 private HashMap<DeviceId, EcmpShortestPathGraph> updatedEcmpSpgMap;
sangho666cd6d2015-04-14 16:27:13 -070084 private DeviceConfiguration config;
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +090085 private final Lock statusLock = new ReentrantLock();
86 private volatile Status populationStatus;
Yuta HIGUCHI1624df12016-07-21 16:54:33 -070087 private ScheduledExecutorService executorService
Saurav Dasd2fded02016-12-02 15:43:47 -080088 = newScheduledThreadPool(1, groupedThreads("retryftr", "retry-%d", log));
Saurav Das201762d2018-04-21 17:19:48 -070089 private Instant lastRoutingChange = Instant.EPOCH;
sanghob35a6192015-04-01 13:05:26 -070090
Saurav Das201762d2018-04-21 17:19:48 -070091 // Distributed store to keep track of ONOS instance that should program the
92 // device pair. There should be only one instance (the king) that programs the same pair.
Charles Chan2ff1bac2018-03-29 16:03:41 -070093 Map<Set<DeviceId>, NodeId> shouldProgram;
Charles Chan50bb6ef2018-04-18 18:41:05 -070094 Map<DeviceId, Boolean> shouldProgramCache;
Charles Chan2ff1bac2018-03-29 16:03:41 -070095
Saurav Das201762d2018-04-21 17:19:48 -070096 // Local store to keep track of all devices that this instance was responsible
97 // for programming in the last run. Helps to determine if mastership changed
98 // during a run - only relevant for programming as a result of topo change.
99 Set<DeviceId> lastProgrammed;
100
sanghob35a6192015-04-01 13:05:26 -0700101 /**
102 * Represents the default routing population status.
103 */
104 public enum Status {
105 // population process is not started yet.
106 IDLE,
107
108 // population process started.
109 STARTED,
110
Srikanth Vavilapallif5b234a2015-04-21 13:04:13 -0700111 // population process was aborted due to errors, mostly for groups not
112 // found.
sanghob35a6192015-04-01 13:05:26 -0700113 ABORTED,
114
115 // population process was finished successfully.
116 SUCCEEDED
117 }
118
119 /**
120 * Creates a DefaultRoutingHandler object.
121 *
122 * @param srManager SegmentRoutingManager object
123 */
Charles Chan2ff1bac2018-03-29 16:03:41 -0700124 DefaultRoutingHandler(SegmentRoutingManager srManager) {
Charles Chan50bb6ef2018-04-18 18:41:05 -0700125 this.shouldProgram = srManager.storageService.<Set<DeviceId>, NodeId>consistentMapBuilder()
126 .withName("sr-should-program")
127 .withSerializer(Serializer.using(KryoNamespaces.API))
128 .withRelaxedReadConsistency()
129 .build().asJavaMap();
130 this.shouldProgramCache = Maps.newConcurrentMap();
131 update(srManager);
132 }
133
134 /**
135 * Updates a DefaultRoutingHandler object.
136 *
137 * @param srManager SegmentRoutingManager object
138 */
139 void update(SegmentRoutingManager srManager) {
sanghob35a6192015-04-01 13:05:26 -0700140 this.srManager = srManager;
141 this.rulePopulator = checkNotNull(srManager.routingRulePopulator);
sangho666cd6d2015-04-14 16:27:13 -0700142 this.config = checkNotNull(srManager.deviceConfiguration);
sanghob35a6192015-04-01 13:05:26 -0700143 this.populationStatus = Status.IDLE;
sangho20eff1d2015-04-13 15:15:58 -0700144 this.currentEcmpSpgMap = Maps.newHashMap();
Saurav Das201762d2018-04-21 17:19:48 -0700145 this.lastProgrammed = Sets.newConcurrentHashSet();
sanghob35a6192015-04-01 13:05:26 -0700146 }
147
148 /**
Saurav Dasc88d4662017-05-15 15:34:25 -0700149 * Returns an immutable copy of the current ECMP shortest-path graph as
150 * computed by this controller instance.
151 *
Saurav Das7bcbe702017-06-13 15:35:54 -0700152 * @return immutable copy of the current ECMP graph
Saurav Dasc88d4662017-05-15 15:34:25 -0700153 */
154 public ImmutableMap<DeviceId, EcmpShortestPathGraph> getCurrentEmcpSpgMap() {
155 Builder<DeviceId, EcmpShortestPathGraph> builder = ImmutableMap.builder();
156 currentEcmpSpgMap.entrySet().forEach(entry -> {
157 if (entry.getValue() != null) {
158 builder.put(entry.getKey(), entry.getValue());
159 }
160 });
161 return builder.build();
162 }
163
Saurav Dasceccf242017-08-03 18:30:35 -0700164 /**
165 * Acquires the lock used when making routing changes.
166 */
167 public void acquireRoutingLock() {
168 statusLock.lock();
169 }
170
171 /**
172 * Releases the lock used when making routing changes.
173 */
174 public void releaseRoutingLock() {
175 statusLock.unlock();
176 }
177
178 /**
179 * Determines if routing in the network has been stable in the last
180 * STABLITY_THRESHOLD seconds, by comparing the current time to the last
181 * routing change timestamp.
182 *
183 * @return true if stable
184 */
185 public boolean isRoutingStable() {
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700186 long last = (long) (lastRoutingChange.toEpochMilli() / 1000.0);
187 long now = (long) (Instant.now().toEpochMilli() / 1000.0);
Saurav Das9df5b7c2017-08-14 16:44:43 -0700188 log.trace("Routing stable since {}s", now - last);
Saurav Dasceccf242017-08-03 18:30:35 -0700189 return (now - last) > STABLITY_THRESHOLD;
190 }
191
192
Saurav Das7bcbe702017-06-13 15:35:54 -0700193 //////////////////////////////////////
194 // Route path handling
195 //////////////////////////////////////
196
Saurav Das45f48152018-01-18 12:07:33 -0800197 /* The following three methods represent the three major ways in which
198 * route-path handling is triggered in the network
Saurav Das7bcbe702017-06-13 15:35:54 -0700199 * a) due to configuration change
200 * b) due to route-added event
201 * c) due to change in the topology
202 */
203
Saurav Dasc88d4662017-05-15 15:34:25 -0700204 /**
Saurav Das7bcbe702017-06-13 15:35:54 -0700205 * Populates all routing rules to all switches. Typically triggered at
206 * startup or after a configuration event.
sanghob35a6192015-04-01 13:05:26 -0700207 */
Saurav Dasc88d4662017-05-15 15:34:25 -0700208 public void populateAllRoutingRules() {
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700209 lastRoutingChange = Instant.now();
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900210 statusLock.lock();
211 try {
Saurav Das7bcbe702017-06-13 15:35:54 -0700212 if (populationStatus == Status.STARTED) {
213 log.warn("Previous rule population is not finished. Cannot"
214 + " proceed with populateAllRoutingRules");
215 return;
216 }
217
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900218 populationStatus = Status.STARTED;
219 rulePopulator.resetCounter();
Saurav Das7bcbe702017-06-13 15:35:54 -0700220 log.info("Starting to populate all routing rules");
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900221 log.debug("populateAllRoutingRules: populationStatus is STARTED");
sanghob35a6192015-04-01 13:05:26 -0700222
Saurav Das7bcbe702017-06-13 15:35:54 -0700223 // take a snapshot of the topology
224 updatedEcmpSpgMap = new HashMap<>();
225 Set<EdgePair> edgePairs = new HashSet<>();
226 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800227 for (DeviceId dstSw : srManager.deviceConfiguration.getRouters()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700228 EcmpShortestPathGraph ecmpSpgUpdated =
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800229 new EcmpShortestPathGraph(dstSw, srManager);
230 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
Charles Chanba6c5752018-04-02 11:46:38 -0700231 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
232 if (pairDev.isPresent()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700233 // pairDev may not be available yet, but we still need to add
Charles Chanba6c5752018-04-02 11:46:38 -0700234 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
235 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
236 edgePairs.add(new EdgePair(dstSw, pairDev.get()));
Saurav Das7bcbe702017-06-13 15:35:54 -0700237 }
Charles Chan2ff1bac2018-03-29 16:03:41 -0700238
239 if (!shouldProgram(dstSw)) {
Saurav Das201762d2018-04-21 17:19:48 -0700240 lastProgrammed.remove(dstSw);
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900241 continue;
Saurav Das201762d2018-04-21 17:19:48 -0700242 } else {
243 lastProgrammed.add(dstSw);
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900244 }
Saurav Das201762d2018-04-21 17:19:48 -0700245 // To do a full reroute, assume all route-paths have changed
Charles Chan2ff1bac2018-03-29 16:03:41 -0700246 for (DeviceId dev : deviceAndItsPair(dstSw)) {
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800247 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
248 if (targetSw.equals(dev)) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700249 continue;
250 }
Jonathan Hart8ca2bc02017-11-30 18:23:42 -0800251 routeChanges.add(Lists.newArrayList(targetSw, dev));
Saurav Das7bcbe702017-06-13 15:35:54 -0700252 }
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900253 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700254 }
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900255
Saurav Das7bcbe702017-06-13 15:35:54 -0700256 if (!redoRouting(routeChanges, edgePairs, null)) {
257 log.debug("populateAllRoutingRules: populationStatus is ABORTED");
258 populationStatus = Status.ABORTED;
259 log.warn("Failed to repopulate all routing rules.");
260 return;
sanghob35a6192015-04-01 13:05:26 -0700261 }
262
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900263 log.debug("populateAllRoutingRules: populationStatus is SUCCEEDED");
264 populationStatus = Status.SUCCEEDED;
Saurav Das7bcbe702017-06-13 15:35:54 -0700265 log.info("Completed all routing rule population. Total # of rules pushed : {}",
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900266 rulePopulator.getCounter());
Saurav Dasc88d4662017-05-15 15:34:25 -0700267 return;
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900268 } finally {
269 statusLock.unlock();
sanghob35a6192015-04-01 13:05:26 -0700270 }
sanghob35a6192015-04-01 13:05:26 -0700271 }
272
sangho20eff1d2015-04-13 15:15:58 -0700273 /**
Saurav Das7bcbe702017-06-13 15:35:54 -0700274 * Populate rules from all other edge devices to the connect-point(s)
275 * specified for the given subnets.
276 *
277 * @param cpts connect point(s) of the subnets being added
278 * @param subnets subnets being added
Charles Chan2fde6d42017-08-23 14:46:43 -0700279 */
280 // XXX refactor
Saurav Das7bcbe702017-06-13 15:35:54 -0700281 protected void populateSubnet(Set<ConnectPoint> cpts, Set<IpPrefix> subnets) {
Charles Chan71e64f12017-09-11 15:21:57 -0700282 if (cpts == null || cpts.size() < 1 || cpts.size() > 2) {
283 log.warn("Skipping populateSubnet due to illegal size of connect points. {}", cpts);
284 return;
285 }
286
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700287 lastRoutingChange = Instant.now();
Saurav Das7bcbe702017-06-13 15:35:54 -0700288 statusLock.lock();
289 try {
290 if (populationStatus == Status.STARTED) {
291 log.warn("Previous rule population is not finished. Cannot"
292 + " proceed with routing rules for added routes");
293 return;
294 }
295 populationStatus = Status.STARTED;
296 rulePopulator.resetCounter();
Charles Chan2fde6d42017-08-23 14:46:43 -0700297 log.info("Starting to populate routing rules for added routes, subnets={}, cpts={}",
298 subnets, cpts);
Saurav Dasc568c342018-01-25 09:49:01 -0800299 // In principle an update to a subnet/prefix should not require a
300 // new ECMPspg calculation as it is not a topology event. As a
301 // result, we use the current/existing ECMPspg in the updated map
302 // used by the redoRouting method.
Saurav Das15a81782018-02-09 09:15:03 -0800303 if (updatedEcmpSpgMap == null) {
304 updatedEcmpSpgMap = new HashMap<>();
305 }
Saurav Dasc568c342018-01-25 09:49:01 -0800306 currentEcmpSpgMap.entrySet().forEach(entry -> {
307 updatedEcmpSpgMap.put(entry.getKey(), entry.getValue());
Saurav Dase7f51012018-02-09 17:26:45 -0800308 if (log.isTraceEnabled()) {
309 log.trace("Root switch: {}", entry.getKey());
310 log.trace(" Current/Existing SPG: {}", entry.getValue());
Saurav Dasc568c342018-01-25 09:49:01 -0800311 }
312 });
Saurav Das7bcbe702017-06-13 15:35:54 -0700313 Set<EdgePair> edgePairs = new HashSet<>();
314 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
315 boolean handleRouting = false;
316
317 if (cpts.size() == 2) {
318 // ensure connect points are edge-pairs
319 Iterator<ConnectPoint> iter = cpts.iterator();
320 DeviceId dev1 = iter.next().deviceId();
Charles Chanba6c5752018-04-02 11:46:38 -0700321 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dev1);
322 if (pairDev.isPresent() && iter.next().deviceId().equals(pairDev.get())) {
323 edgePairs.add(new EdgePair(dev1, pairDev.get()));
Saurav Das7bcbe702017-06-13 15:35:54 -0700324 } else {
325 log.warn("Connectpoints {} for subnets {} not on "
326 + "pair-devices.. aborting populateSubnet", cpts, subnets);
327 populationStatus = Status.ABORTED;
328 return;
329 }
330 for (ConnectPoint cp : cpts) {
Saurav Dasc568c342018-01-25 09:49:01 -0800331 if (updatedEcmpSpgMap.get(cp.deviceId()) == null) {
332 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das7bcbe702017-06-13 15:35:54 -0700333 new EcmpShortestPathGraph(cp.deviceId(), srManager);
Saurav Dasc568c342018-01-25 09:49:01 -0800334 updatedEcmpSpgMap.put(cp.deviceId(), ecmpSpgUpdated);
335 log.warn("populateSubnet: no updated graph for dev:{}"
336 + " ... creating", cp.deviceId());
337 }
Charles Chan2ff1bac2018-03-29 16:03:41 -0700338 if (!shouldProgram(cp.deviceId())) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700339 continue;
340 }
341 handleRouting = true;
342 }
343 } else {
344 // single connect point
345 DeviceId dstSw = cpts.iterator().next().deviceId();
Saurav Dasc568c342018-01-25 09:49:01 -0800346 if (updatedEcmpSpgMap.get(dstSw) == null) {
347 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das7bcbe702017-06-13 15:35:54 -0700348 new EcmpShortestPathGraph(dstSw, srManager);
Saurav Dasc568c342018-01-25 09:49:01 -0800349 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
350 log.warn("populateSubnet: no updated graph for dev:{}"
351 + " ... creating", dstSw);
352 }
Charles Chan2ff1bac2018-03-29 16:03:41 -0700353 handleRouting = shouldProgram(dstSw);
Saurav Das7bcbe702017-06-13 15:35:54 -0700354 }
355
356 if (!handleRouting) {
357 log.debug("This instance is not handling ecmp routing to the "
358 + "connectPoint(s) {}", cpts);
359 populationStatus = Status.ABORTED;
360 return;
361 }
362
363 // if it gets here, this instance should handle routing for the
364 // connectpoint(s). Assume all route-paths have to be updated to
365 // the connectpoint(s) with the following exceptions
366 // 1. if target is non-edge no need for routing rules
367 // 2. if target is one of the connectpoints
368 for (ConnectPoint cp : cpts) {
369 DeviceId dstSw = cp.deviceId();
370 for (Device targetSw : srManager.deviceService.getDevices()) {
371 boolean isEdge = false;
372 try {
373 isEdge = config.isEdgeDevice(targetSw.id());
374 } catch (DeviceConfigNotFoundException e) {
Charles Chan92726132018-02-16 17:20:54 -0800375 log.warn(e.getMessage() + "aborting populateSubnet on targetSw {}", targetSw.id());
376 continue;
Saurav Das7bcbe702017-06-13 15:35:54 -0700377 }
Charles Chanba6c5752018-04-02 11:46:38 -0700378 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
Saurav Das7bcbe702017-06-13 15:35:54 -0700379 if (dstSw.equals(targetSw.id()) || !isEdge ||
Charles Chanba6c5752018-04-02 11:46:38 -0700380 (cpts.size() == 2 && pairDev.isPresent() && targetSw.id().equals(pairDev.get()))) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700381 continue;
382 }
383 routeChanges.add(Lists.newArrayList(targetSw.id(), dstSw));
384 }
385 }
386
387 if (!redoRouting(routeChanges, edgePairs, subnets)) {
388 log.debug("populateSubnet: populationStatus is ABORTED");
389 populationStatus = Status.ABORTED;
390 log.warn("Failed to repopulate the rules for subnet.");
391 return;
392 }
393
394 log.debug("populateSubnet: populationStatus is SUCCEEDED");
395 populationStatus = Status.SUCCEEDED;
396 log.info("Completed subnet population. Total # of rules pushed : {}",
397 rulePopulator.getCounter());
398 return;
399
400 } finally {
401 statusLock.unlock();
402 }
403 }
404
405 /**
Saurav Dasc88d4662017-05-15 15:34:25 -0700406 * Populates the routing rules or makes hash group changes according to the
407 * route-path changes due to link failure, switch failure or link up. This
408 * method should only be called for one of these three possible event-types.
Saurav Das604ab3a2018-03-18 21:28:15 -0700409 * Note that when a switch goes away, all of its links fail as well, but
410 * this is handled as a single switch removal event.
sangho20eff1d2015-04-13 15:15:58 -0700411 *
Saurav Das604ab3a2018-03-18 21:28:15 -0700412 * @param linkDown the single failed link, or null for other conditions such
413 * as link-up or a removed switch
Saurav Dasc88d4662017-05-15 15:34:25 -0700414 * @param linkUp the single link up, or null for other conditions such as
Saurav Das604ab3a2018-03-18 21:28:15 -0700415 * link-down or a removed switch
416 * @param switchDown the removed switch, or null for other conditions such
417 * as link-down or link-up
418 * @param seenBefore true if this event is for a linkUp or linkDown for a
419 * seen link
420 */
421 // TODO This method should be refactored into three separated methods
Saurav Dasc88d4662017-05-15 15:34:25 -0700422 public void populateRoutingRulesForLinkStatusChange(Link linkDown,
423 Link linkUp,
Saurav Das604ab3a2018-03-18 21:28:15 -0700424 DeviceId switchDown,
425 boolean seenBefore) {
426 if (Stream.of(linkDown, linkUp, switchDown).filter(Objects::nonNull)
427 .count() != 1) {
Saurav Dasc88d4662017-05-15 15:34:25 -0700428 log.warn("Only one event can be handled for link status change .. aborting");
429 return;
430 }
Saurav Das604ab3a2018-03-18 21:28:15 -0700431
Yuta HIGUCHI0c47d532017-08-18 23:16:35 -0700432 lastRoutingChange = Instant.now();
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900433 statusLock.lock();
434 try {
sangho20eff1d2015-04-13 15:15:58 -0700435
436 if (populationStatus == Status.STARTED) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700437 log.warn("Previous rule population is not finished. Cannot"
Saurav Dasc568c342018-01-25 09:49:01 -0800438 + " proceeed with routingRules for Topology change");
Saurav Dasc88d4662017-05-15 15:34:25 -0700439 return;
sangho20eff1d2015-04-13 15:15:58 -0700440 }
441
Saurav Das7bcbe702017-06-13 15:35:54 -0700442 // Take snapshots of the topology
sangho45b009c2015-05-07 13:30:57 -0700443 updatedEcmpSpgMap = new HashMap<>();
Saurav Das7bcbe702017-06-13 15:35:54 -0700444 Set<EdgePair> edgePairs = new HashSet<>();
sangho45b009c2015-05-07 13:30:57 -0700445 for (Device sw : srManager.deviceService.getDevices()) {
Shashikanth VH013a7bc2015-12-11 01:32:44 +0530446 EcmpShortestPathGraph ecmpSpgUpdated =
447 new EcmpShortestPathGraph(sw.id(), srManager);
sangho45b009c2015-05-07 13:30:57 -0700448 updatedEcmpSpgMap.put(sw.id(), ecmpSpgUpdated);
Charles Chanba6c5752018-04-02 11:46:38 -0700449 Optional<DeviceId> pairDev = srManager.getPairDeviceId(sw.id());
450 if (pairDev.isPresent()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700451 // pairDev may not be available yet, but we still need to add
Charles Chanba6c5752018-04-02 11:46:38 -0700452 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
453 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
454 edgePairs.add(new EdgePair(sw.id(), pairDev.get()));
Saurav Das7bcbe702017-06-13 15:35:54 -0700455 }
sangho45b009c2015-05-07 13:30:57 -0700456 }
457
Saurav Dasc568c342018-01-25 09:49:01 -0800458 log.info("Starting to populate routing rules from Topology change");
sangho52abe3a2015-05-05 14:13:34 -0700459
sangho20eff1d2015-04-13 15:15:58 -0700460 Set<ArrayList<DeviceId>> routeChanges;
Saurav Dasc88d4662017-05-15 15:34:25 -0700461 log.debug("populateRoutingRulesForLinkStatusChange: "
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700462 + "populationStatus is STARTED");
sangho20eff1d2015-04-13 15:15:58 -0700463 populationStatus = Status.STARTED;
Saurav Dasc568c342018-01-25 09:49:01 -0800464 rulePopulator.resetCounter(); //XXX maybe useful to have a rehash ctr
465 boolean hashGroupsChanged = false;
Saurav Das4e3224f2016-11-29 14:27:25 -0800466 // try optimized re-routing
Saurav Dasc88d4662017-05-15 15:34:25 -0700467 if (linkDown == null) {
468 // either a linkUp or a switchDown - compute all route changes by
469 // comparing all routes of existing ECMP SPG to new ECMP SPG
Saurav Dase0d4c872018-03-05 14:37:16 -0800470 routeChanges = computeRouteChange(switchDown);
Saurav Dasc88d4662017-05-15 15:34:25 -0700471
Saurav Das9df5b7c2017-08-14 16:44:43 -0700472 // deal with linkUp of a seen-before link
Saurav Das604ab3a2018-03-18 21:28:15 -0700473 if (linkUp != null && seenBefore) {
Saurav Das9df5b7c2017-08-14 16:44:43 -0700474 // link previously seen before
475 // do hash-bucket changes instead of a re-route
476 processHashGroupChange(routeChanges, false, null);
477 // clear out routesChanges so a re-route is not attempted
478 routeChanges = ImmutableSet.of();
Saurav Dasc568c342018-01-25 09:49:01 -0800479 hashGroupsChanged = true;
Saurav Dasc88d4662017-05-15 15:34:25 -0700480 }
Saurav Das9df5b7c2017-08-14 16:44:43 -0700481 // for a linkUp of a never-seen-before link
482 // let it fall through to a reroute of the routeChanges
Saurav Dasc88d4662017-05-15 15:34:25 -0700483
Saurav Das9df5b7c2017-08-14 16:44:43 -0700484 //deal with switchDown
485 if (switchDown != null) {
486 processHashGroupChange(routeChanges, true, switchDown);
487 // clear out routesChanges so a re-route is not attempted
488 routeChanges = ImmutableSet.of();
Saurav Dasc568c342018-01-25 09:49:01 -0800489 hashGroupsChanged = true;
Saurav Das9df5b7c2017-08-14 16:44:43 -0700490 }
sangho20eff1d2015-04-13 15:15:58 -0700491 } else {
Saurav Dasc88d4662017-05-15 15:34:25 -0700492 // link has gone down
493 // Compare existing ECMP SPG only with the link that went down
494 routeChanges = computeDamagedRoutes(linkDown);
495 if (routeChanges != null) {
496 processHashGroupChange(routeChanges, true, null);
497 // clear out routesChanges so a re-route is not attempted
498 routeChanges = ImmutableSet.of();
Saurav Dasc568c342018-01-25 09:49:01 -0800499 hashGroupsChanged = true;
Saurav Dasc88d4662017-05-15 15:34:25 -0700500 }
sangho20eff1d2015-04-13 15:15:58 -0700501 }
502
Saurav Das4e3224f2016-11-29 14:27:25 -0800503 // do full re-routing if optimized routing returns null routeChanges
Saurav Dasb5c236e2016-06-07 10:08:06 -0700504 if (routeChanges == null) {
Saurav Dasc568c342018-01-25 09:49:01 -0800505 log.warn("Optimized routing failed... opting for full reroute");
Saurav Das7bcbe702017-06-13 15:35:54 -0700506 populationStatus = Status.ABORTED;
Saurav Dasc88d4662017-05-15 15:34:25 -0700507 populateAllRoutingRules();
508 return;
Saurav Dasb5c236e2016-06-07 10:08:06 -0700509 }
510
sangho20eff1d2015-04-13 15:15:58 -0700511 if (routeChanges.isEmpty()) {
Saurav Dasc568c342018-01-25 09:49:01 -0800512 if (hashGroupsChanged) {
513 log.info("Hash-groups changed for link status change");
514 } else {
515 log.info("No re-route or re-hash attempted for the link"
516 + " status change");
517 updatedEcmpSpgMap.keySet().forEach(devId -> {
518 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
519 log.debug("Updating ECMPspg for remaining dev:{}", devId);
520 });
521 }
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700522 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sangho20eff1d2015-04-13 15:15:58 -0700523 populationStatus = Status.SUCCEEDED;
Saurav Dasc88d4662017-05-15 15:34:25 -0700524 return;
sangho20eff1d2015-04-13 15:15:58 -0700525 }
526
Saurav Dasc88d4662017-05-15 15:34:25 -0700527 // reroute of routeChanges
Saurav Das7bcbe702017-06-13 15:35:54 -0700528 if (redoRouting(routeChanges, edgePairs, null)) {
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700529 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sangho20eff1d2015-04-13 15:15:58 -0700530 populationStatus = Status.SUCCEEDED;
Saurav Das7bcbe702017-06-13 15:35:54 -0700531 log.info("Completed repopulation of rules for link-status change."
532 + " # of rules populated : {}", rulePopulator.getCounter());
Saurav Dasc88d4662017-05-15 15:34:25 -0700533 return;
sangho20eff1d2015-04-13 15:15:58 -0700534 } else {
Srikanth Vavilapalli23181912015-05-04 09:48:09 -0700535 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is ABORTED");
sangho20eff1d2015-04-13 15:15:58 -0700536 populationStatus = Status.ABORTED;
Saurav Das7bcbe702017-06-13 15:35:54 -0700537 log.warn("Failed to repopulate the rules for link status change.");
Saurav Dasc88d4662017-05-15 15:34:25 -0700538 return;
sangho20eff1d2015-04-13 15:15:58 -0700539 }
HIGUCHI Yuta84a25fc2015-09-08 16:16:31 +0900540 } finally {
541 statusLock.unlock();
sangho20eff1d2015-04-13 15:15:58 -0700542 }
543 }
544
Saurav Dasc88d4662017-05-15 15:34:25 -0700545 /**
Saurav Das7bcbe702017-06-13 15:35:54 -0700546 * Processes a set a route-path changes by reprogramming routing rules and
547 * creating new hash-groups or editing them if necessary. This method also
548 * determines the next-hops for the route-path from the src-switch (target)
549 * of the path towards the dst-switch of the path.
Saurav Dasc88d4662017-05-15 15:34:25 -0700550 *
Saurav Das7bcbe702017-06-13 15:35:54 -0700551 * @param routeChanges a set of route-path changes, where each route-path is
552 * a list with its first element the src-switch (target)
553 * of the path, and the second element the dst-switch of
554 * the path.
555 * @param edgePairs a set of edge-switches that are paired by configuration
556 * @param subnets a set of prefixes that need to be populated in the routing
557 * table of the target switch in the route-path. Can be null,
558 * in which case all the prefixes belonging to the dst-switch
559 * will be populated in the target switch
560 * @return true if successful in repopulating all routes
Saurav Dasc88d4662017-05-15 15:34:25 -0700561 */
Saurav Das7bcbe702017-06-13 15:35:54 -0700562 private boolean redoRouting(Set<ArrayList<DeviceId>> routeChanges,
563 Set<EdgePair> edgePairs, Set<IpPrefix> subnets) {
564 // first make every entry two-elements
565 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
566 for (ArrayList<DeviceId> route : routeChanges) {
567 if (route.size() == 1) {
568 DeviceId dstSw = route.get(0);
569 EcmpShortestPathGraph ec = updatedEcmpSpgMap.get(dstSw);
570 if (ec == null) {
571 log.warn("No graph found for {} .. aborting redoRouting", dstSw);
572 return false;
573 }
574 ec.getAllLearnedSwitchesAndVia().keySet().forEach(key -> {
575 ec.getAllLearnedSwitchesAndVia().get(key).keySet().forEach(target -> {
576 changedRoutes.add(Lists.newArrayList(target, dstSw));
577 });
578 });
579 } else {
580 DeviceId targetSw = route.get(0);
581 DeviceId dstSw = route.get(1);
582 changedRoutes.add(Lists.newArrayList(targetSw, dstSw));
583 }
584 }
585
586 // now process changedRoutes according to edgePairs
587 if (!redoRoutingEdgePairs(edgePairs, subnets, changedRoutes)) {
588 return false; //abort routing and fail fast
589 }
590
591 // whatever is left in changedRoutes is now processed for individual dsts.
Saurav Dasc568c342018-01-25 09:49:01 -0800592 Set<DeviceId> updatedDevices = Sets.newHashSet();
593 if (!redoRoutingIndividualDests(subnets, changedRoutes,
594 updatedDevices)) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700595 return false; //abort routing and fail fast
596 }
597
Saurav Das7bcbe702017-06-13 15:35:54 -0700598 // update ecmpSPG for all edge-pairs
599 for (EdgePair ep : edgePairs) {
600 currentEcmpSpgMap.put(ep.dev1, updatedEcmpSpgMap.get(ep.dev1));
601 currentEcmpSpgMap.put(ep.dev2, updatedEcmpSpgMap.get(ep.dev2));
602 log.debug("Updating ECMPspg for edge-pair:{}-{}", ep.dev1, ep.dev2);
603 }
Saurav Dasc568c342018-01-25 09:49:01 -0800604
605 // here is where we update all devices not touched by this instance
606 updatedEcmpSpgMap.keySet().stream()
607 .filter(devId -> !edgePairs.stream().anyMatch(ep -> ep.includes(devId)))
608 .filter(devId -> !updatedDevices.contains(devId))
609 .forEach(devId -> {
610 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
611 log.debug("Updating ECMPspg for remaining dev:{}", devId);
612 });
Saurav Das7bcbe702017-06-13 15:35:54 -0700613 return true;
614 }
615
616 /**
617 * Programs targetSw in the changedRoutes for given prefixes reachable by
618 * an edgePair. If no prefixes are given, the method will use configured
619 * subnets/prefixes. If some configured subnets belong only to a specific
620 * destination in the edgePair, then the target switch will be programmed
621 * only to that destination.
622 *
623 * @param edgePairs set of edge-pairs for which target will be programmed
624 * @param subnets a set of prefixes that need to be populated in the routing
625 * table of the target switch in the changedRoutes. Can be null,
626 * in which case all the configured prefixes belonging to the
627 * paired switches will be populated in the target switch
628 * @param changedRoutes a set of route-path changes, where each route-path is
629 * a list with its first element the src-switch (target)
630 * of the path, and the second element the dst-switch of
631 * the path.
632 * @return true if successful
633 */
634 private boolean redoRoutingEdgePairs(Set<EdgePair> edgePairs,
635 Set<IpPrefix> subnets,
636 Set<ArrayList<DeviceId>> changedRoutes) {
637 for (EdgePair ep : edgePairs) {
638 // temp store for a target's changedRoutes to this edge-pair
639 Map<DeviceId, Set<ArrayList<DeviceId>>> targetRoutes = new HashMap<>();
640 Iterator<ArrayList<DeviceId>> i = changedRoutes.iterator();
641 while (i.hasNext()) {
642 ArrayList<DeviceId> route = i.next();
643 DeviceId dstSw = route.get(1);
644 if (ep.includes(dstSw)) {
645 // routeChange for edge pair found
646 // sort by target iff target is edge and remove from changedRoutes
647 DeviceId targetSw = route.get(0);
648 try {
649 if (!srManager.deviceConfiguration.isEdgeDevice(targetSw)) {
650 continue;
651 }
652 } catch (DeviceConfigNotFoundException e) {
653 log.warn(e.getMessage() + "aborting redoRouting");
654 return false;
655 }
656 // route is from another edge to this edge-pair
657 if (targetRoutes.containsKey(targetSw)) {
658 targetRoutes.get(targetSw).add(route);
659 } else {
660 Set<ArrayList<DeviceId>> temp = new HashSet<>();
661 temp.add(route);
662 targetRoutes.put(targetSw, temp);
663 }
664 i.remove();
665 }
666 }
667 // so now for this edgepair we have a per target set of routechanges
668 // process target->edgePair route
669 for (Map.Entry<DeviceId, Set<ArrayList<DeviceId>>> entry :
670 targetRoutes.entrySet()) {
671 log.debug("* redoRoutingDstPair Target:{} -> edge-pair {}",
672 entry.getKey(), ep);
673 DeviceId targetSw = entry.getKey();
674 Map<DeviceId, Set<DeviceId>> perDstNextHops = new HashMap<>();
675 entry.getValue().forEach(route -> {
676 Set<DeviceId> nhops = getNextHops(route.get(0), route.get(1));
677 log.debug("route: target {} -> dst {} found with next-hops {}",
678 route.get(0), route.get(1), nhops);
679 perDstNextHops.put(route.get(1), nhops);
680 });
681 Set<IpPrefix> ipDev1 = (subnets == null) ? config.getSubnets(ep.dev1)
682 : subnets;
683 Set<IpPrefix> ipDev2 = (subnets == null) ? config.getSubnets(ep.dev2)
684 : subnets;
685 ipDev1 = (ipDev1 == null) ? Sets.newHashSet() : ipDev1;
686 ipDev2 = (ipDev2 == null) ? Sets.newHashSet() : ipDev2;
Saurav Dasc568c342018-01-25 09:49:01 -0800687 Set<DeviceId> nhDev1 = perDstNextHops.get(ep.dev1);
688 Set<DeviceId> nhDev2 = perDstNextHops.get(ep.dev2);
Saurav Das7bcbe702017-06-13 15:35:54 -0700689 // handle routing to subnets common to edge-pair
Saurav Dasc568c342018-01-25 09:49:01 -0800690 // only if the targetSw is not part of the edge-pair and there
691 // exists a next hop to at least one of the devices in the edge-pair
692 if (!ep.includes(targetSw)
693 && ((nhDev1 != null && !nhDev1.isEmpty())
694 || (nhDev2 != null && !nhDev2.isEmpty()))) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700695 if (!populateEcmpRoutingRulePartial(
696 targetSw,
697 ep.dev1, ep.dev2,
698 perDstNextHops,
699 Sets.intersection(ipDev1, ipDev2))) {
700 return false; // abort everything and fail fast
701 }
702 }
Saurav Dasc568c342018-01-25 09:49:01 -0800703 // handle routing to subnets that only belong to dev1 only if
704 // a next-hop exists from the target to dev1
Saurav Das7bcbe702017-06-13 15:35:54 -0700705 Set<IpPrefix> onlyDev1Subnets = Sets.difference(ipDev1, ipDev2);
Saurav Dasc568c342018-01-25 09:49:01 -0800706 if (!onlyDev1Subnets.isEmpty()
707 && nhDev1 != null && !nhDev1.isEmpty()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700708 Map<DeviceId, Set<DeviceId>> onlyDev1NextHops = new HashMap<>();
Saurav Dasc568c342018-01-25 09:49:01 -0800709 onlyDev1NextHops.put(ep.dev1, nhDev1);
Saurav Das7bcbe702017-06-13 15:35:54 -0700710 if (!populateEcmpRoutingRulePartial(
711 targetSw,
712 ep.dev1, null,
713 onlyDev1NextHops,
714 onlyDev1Subnets)) {
715 return false; // abort everything and fail fast
716 }
717 }
Saurav Dasc568c342018-01-25 09:49:01 -0800718 // handle routing to subnets that only belong to dev2 only if
719 // a next-hop exists from the target to dev2
Saurav Das7bcbe702017-06-13 15:35:54 -0700720 Set<IpPrefix> onlyDev2Subnets = Sets.difference(ipDev2, ipDev1);
Saurav Dasc568c342018-01-25 09:49:01 -0800721 if (!onlyDev2Subnets.isEmpty()
722 && nhDev2 != null && !nhDev2.isEmpty()) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700723 Map<DeviceId, Set<DeviceId>> onlyDev2NextHops = new HashMap<>();
Saurav Dasc568c342018-01-25 09:49:01 -0800724 onlyDev2NextHops.put(ep.dev2, nhDev2);
Saurav Das7bcbe702017-06-13 15:35:54 -0700725 if (!populateEcmpRoutingRulePartial(
726 targetSw,
727 ep.dev2, null,
728 onlyDev2NextHops,
729 onlyDev2Subnets)) {
730 return false; // abort everything and fail fast
731 }
732 }
733 }
734 // if it gets here it has succeeded for all targets to this edge-pair
735 }
736 return true;
737 }
738
739 /**
740 * Programs targetSw in the changedRoutes for given prefixes reachable by
741 * a destination switch that is not part of an edge-pair.
742 * If no prefixes are given, the method will use configured subnets/prefixes.
743 *
744 * @param subnets a set of prefixes that need to be populated in the routing
745 * table of the target switch in the changedRoutes. Can be null,
746 * in which case all the configured prefixes belonging to the
747 * paired switches will be populated in the target switch
748 * @param changedRoutes a set of route-path changes, where each route-path is
749 * a list with its first element the src-switch (target)
750 * of the path, and the second element the dst-switch of
751 * the path.
752 * @return true if successful
753 */
754 private boolean redoRoutingIndividualDests(Set<IpPrefix> subnets,
Saurav Dasc568c342018-01-25 09:49:01 -0800755 Set<ArrayList<DeviceId>> changedRoutes,
756 Set<DeviceId> updatedDevices) {
Saurav Das7bcbe702017-06-13 15:35:54 -0700757 // aggregate route-path changes for each dst device
758 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> routesBydevice =
759 new HashMap<>();
760 for (ArrayList<DeviceId> route: changedRoutes) {
761 DeviceId dstSw = route.get(1);
762 ArrayList<ArrayList<DeviceId>> deviceRoutes =
763 routesBydevice.get(dstSw);
764 if (deviceRoutes == null) {
765 deviceRoutes = new ArrayList<>();
766 routesBydevice.put(dstSw, deviceRoutes);
767 }
768 deviceRoutes.add(route);
769 }
770 for (DeviceId impactedDstDevice : routesBydevice.keySet()) {
771 ArrayList<ArrayList<DeviceId>> deviceRoutes =
772 routesBydevice.get(impactedDstDevice);
773 for (ArrayList<DeviceId> route: deviceRoutes) {
774 log.debug("* redoRoutingIndiDst Target: {} -> dst: {}",
775 route.get(0), route.get(1));
776 DeviceId targetSw = route.get(0);
777 DeviceId dstSw = route.get(1); // same as impactedDstDevice
778 Set<DeviceId> nextHops = getNextHops(targetSw, dstSw);
Saurav Dasbd071d82018-01-09 17:38:44 -0800779 if (nextHops.isEmpty()) {
780 log.warn("Could not find next hop from target:{} --> dst {} "
781 + "skipping this route", targetSw, dstSw);
782 continue;
783 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700784 Map<DeviceId, Set<DeviceId>> nhops = new HashMap<>();
785 nhops.put(dstSw, nextHops);
786 if (!populateEcmpRoutingRulePartial(targetSw, dstSw, null, nhops,
787 (subnets == null) ? Sets.newHashSet() : subnets)) {
788 return false; // abort routing and fail fast
789 }
790 log.debug("Populating flow rules from target: {} to dst: {}"
791 + " is successful", targetSw, dstSw);
792 }
793 //Only if all the flows for all impacted routes to a
794 //specific target are pushed successfully, update the
795 //ECMP graph for that target. Or else the next event
796 //would not see any changes in the ECMP graphs.
797 //In another case, the target switch has gone away, so
798 //routes can't be installed. In that case, the current map
799 //is updated here, without any flows being pushed.
800 currentEcmpSpgMap.put(impactedDstDevice,
801 updatedEcmpSpgMap.get(impactedDstDevice));
Saurav Dasc568c342018-01-25 09:49:01 -0800802 updatedDevices.add(impactedDstDevice);
Saurav Das7bcbe702017-06-13 15:35:54 -0700803 log.debug("Updating ECMPspg for impacted dev:{}", impactedDstDevice);
804 }
805 return true;
806 }
807
808 /**
809 * Populate ECMP rules for subnets from target to destination via nexthops.
810 *
811 * @param targetSw Device ID of target switch in which rules will be programmed
812 * @param destSw1 Device ID of final destination switch to which the rules will forward
813 * @param destSw2 Device ID of paired destination switch to which the rules will forward
814 * A null deviceId indicates packets should only be sent to destSw1
Saurav Dasa4020382018-02-14 14:14:54 -0800815 * @param nextHops Map of a set of next hops per destSw
Saurav Das7bcbe702017-06-13 15:35:54 -0700816 * @param subnets Subnets to be populated. If empty, populate all configured subnets.
817 * @return true if it succeeds in populating rules
818 */ // refactor
819 private boolean populateEcmpRoutingRulePartial(DeviceId targetSw,
820 DeviceId destSw1,
821 DeviceId destSw2,
822 Map<DeviceId, Set<DeviceId>> nextHops,
823 Set<IpPrefix> subnets) {
824 boolean result;
825 // If both target switch and dest switch are edge routers, then set IP
826 // rule for both subnet and router IP.
827 boolean targetIsEdge;
828 boolean dest1IsEdge;
829 Ip4Address dest1RouterIpv4, dest2RouterIpv4 = null;
830 Ip6Address dest1RouterIpv6, dest2RouterIpv6 = null;
831
832 try {
833 targetIsEdge = config.isEdgeDevice(targetSw);
834 dest1IsEdge = config.isEdgeDevice(destSw1);
835 dest1RouterIpv4 = config.getRouterIpv4(destSw1);
836 dest1RouterIpv6 = config.getRouterIpv6(destSw1);
837 if (destSw2 != null) {
838 dest2RouterIpv4 = config.getRouterIpv4(destSw2);
839 dest2RouterIpv6 = config.getRouterIpv6(destSw2);
840 }
841 } catch (DeviceConfigNotFoundException e) {
842 log.warn(e.getMessage() + " Aborting populateEcmpRoutingRulePartial.");
Saurav Dasc88d4662017-05-15 15:34:25 -0700843 return false;
844 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700845
846 if (targetIsEdge && dest1IsEdge) {
847 subnets = (subnets != null && !subnets.isEmpty())
848 ? Sets.newHashSet(subnets)
849 : Sets.newHashSet(config.getSubnets(destSw1));
Saurav Dasa4020382018-02-14 14:14:54 -0800850 // XXX - Rethink this - ignoring routerIPs in all other switches
851 // even edge to edge switches
Saurav Das7bcbe702017-06-13 15:35:54 -0700852 /*subnets.add(dest1RouterIpv4.toIpPrefix());
853 if (dest1RouterIpv6 != null) {
854 subnets.add(dest1RouterIpv6.toIpPrefix());
855 }
856 if (destSw2 != null && dest2RouterIpv4 != null) {
857 subnets.add(dest2RouterIpv4.toIpPrefix());
858 if (dest2RouterIpv6 != null) {
859 subnets.add(dest2RouterIpv6.toIpPrefix());
860 }
861 }*/
862 log.debug(". populateEcmpRoutingRulePartial in device {} towards {} {} "
863 + "for subnets {}", targetSw, destSw1,
864 (destSw2 != null) ? ("& " + destSw2) : "",
865 subnets);
866 result = rulePopulator.populateIpRuleForSubnet(targetSw, subnets,
867 destSw1, destSw2,
868 nextHops);
869 if (!result) {
870 return false;
871 }
Saurav Dasc88d4662017-05-15 15:34:25 -0700872 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700873
874 if (!targetIsEdge && dest1IsEdge) {
875 // MPLS rules in all non-edge target devices. These rules are for
876 // individual destinations, even if the dsts are part of edge-pairs.
877 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
878 + "all MPLS rules", targetSw, destSw1);
879 result = rulePopulator.populateMplsRule(targetSw, destSw1,
880 nextHops.get(destSw1),
881 dest1RouterIpv4);
882 if (!result) {
883 return false;
884 }
885 if (dest1RouterIpv6 != null) {
Saurav Dasa4020382018-02-14 14:14:54 -0800886 int v4sid = 0, v6sid = 0;
887 try {
888 v4sid = config.getIPv4SegmentId(destSw1);
889 v6sid = config.getIPv6SegmentId(destSw1);
890 } catch (DeviceConfigNotFoundException e) {
891 log.warn(e.getMessage());
892 }
893 if (v4sid != v6sid) {
894 result = rulePopulator.populateMplsRule(targetSw, destSw1,
895 nextHops.get(destSw1),
896 dest1RouterIpv6);
897 if (!result) {
898 return false;
899 }
Saurav Das7bcbe702017-06-13 15:35:54 -0700900 }
901 }
902 }
903
Andreas Pantelopoulosff691b72018-03-12 16:30:20 -0700904 if (!targetIsEdge && !dest1IsEdge) {
905 // MPLS rules for inter-connected spines
906 // can be merged with above if, left it here for clarity
907 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
908 + "all MPLS rules", targetSw, destSw1);
909
910 result = rulePopulator.populateMplsRule(targetSw, destSw1,
911 nextHops.get(destSw1),
912 dest1RouterIpv4);
913 if (!result) {
914 return false;
915 }
916
917 if (dest1RouterIpv6 != null) {
918 int v4sid = 0, v6sid = 0;
919 try {
920 v4sid = config.getIPv4SegmentId(destSw1);
921 v6sid = config.getIPv6SegmentId(destSw1);
922 } catch (DeviceConfigNotFoundException e) {
923 log.warn(e.getMessage());
924 }
925 if (v4sid != v6sid) {
926 result = rulePopulator.populateMplsRule(targetSw, destSw1,
927 nextHops.get(destSw1),
928 dest1RouterIpv6);
929 if (!result) {
930 return false;
931 }
932 }
933 }
934 }
935
936
Saurav Das7bcbe702017-06-13 15:35:54 -0700937 // To save on ECMP groups
938 // avoid MPLS rules in non-edge-devices to non-edge-devices
939 // avoid MPLS transit rules in edge-devices
940 // avoid loopback IP rules in edge-devices to non-edge-devices
941 return true;
Saurav Dasc88d4662017-05-15 15:34:25 -0700942 }
943
944 /**
945 * Processes a set a route-path changes by editing hash groups.
946 *
947 * @param routeChanges a set of route-path changes, where each route-path is
948 * a list with its first element the src-switch of the path
949 * and the second element the dst-switch of the path.
950 * @param linkOrSwitchFailed true if the route changes are for a failed
951 * switch or linkDown event
952 * @param failedSwitch the switchId if the route changes are for a failed switch,
953 * otherwise null
954 */
955 private void processHashGroupChange(Set<ArrayList<DeviceId>> routeChanges,
956 boolean linkOrSwitchFailed,
957 DeviceId failedSwitch) {
Saurav Das9df5b7c2017-08-14 16:44:43 -0700958 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
959 // first, ensure each routeChanges entry has two elements
Saurav Dasc88d4662017-05-15 15:34:25 -0700960 for (ArrayList<DeviceId> route : routeChanges) {
Saurav Das9df5b7c2017-08-14 16:44:43 -0700961 if (route.size() == 1) {
962 // route-path changes are from everyone else to this switch
963 DeviceId dstSw = route.get(0);
964 srManager.deviceService.getAvailableDevices().forEach(sw -> {
965 if (!sw.id().equals(dstSw)) {
966 changedRoutes.add(Lists.newArrayList(sw.id(), dstSw));
967 }
968 });
969 } else {
970 changedRoutes.add(route);
Saurav Dasc88d4662017-05-15 15:34:25 -0700971 }
Saurav Das9df5b7c2017-08-14 16:44:43 -0700972 }
Saurav Dasc568c342018-01-25 09:49:01 -0800973 boolean someFailed = false;
974 Set<DeviceId> updatedDevices = Sets.newHashSet();
Saurav Das9df5b7c2017-08-14 16:44:43 -0700975 for (ArrayList<DeviceId> route : changedRoutes) {
976 DeviceId targetSw = route.get(0);
977 DeviceId dstSw = route.get(1);
Saurav Dasc88d4662017-05-15 15:34:25 -0700978 if (linkOrSwitchFailed) {
Saurav Das9df5b7c2017-08-14 16:44:43 -0700979 boolean success = fixHashGroupsForRoute(route, true);
Saurav Dasc88d4662017-05-15 15:34:25 -0700980 // it's possible that we cannot fix hash groups for a route
981 // if the target switch has failed. Nevertheless the ecmp graph
982 // for the impacted switch must still be updated.
Saurav Das9df5b7c2017-08-14 16:44:43 -0700983 if (!success && failedSwitch != null && targetSw.equals(failedSwitch)) {
Saurav Dasc88d4662017-05-15 15:34:25 -0700984 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
985 currentEcmpSpgMap.remove(targetSw);
Saurav Das9df5b7c2017-08-14 16:44:43 -0700986 log.debug("Updating ECMPspg for dst:{} removing failed switch "
Saurav Dasc88d4662017-05-15 15:34:25 -0700987 + "target:{}", dstSw, targetSw);
Saurav Dasc568c342018-01-25 09:49:01 -0800988 updatedDevices.add(targetSw);
989 updatedDevices.add(dstSw);
Saurav Das9df5b7c2017-08-14 16:44:43 -0700990 continue;
Saurav Dasc88d4662017-05-15 15:34:25 -0700991 }
992 //linkfailed - update both sides
Saurav Dasc88d4662017-05-15 15:34:25 -0700993 if (success) {
994 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
Saurav Das9df5b7c2017-08-14 16:44:43 -0700995 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
Saurav Dasc568c342018-01-25 09:49:01 -0800996 log.debug("Updating ECMPspg for dst:{} and target:{} for linkdown"
997 + " or switchdown", dstSw, targetSw);
998 updatedDevices.add(targetSw);
999 updatedDevices.add(dstSw);
1000 } else {
1001 someFailed = true;
Saurav Das9df5b7c2017-08-14 16:44:43 -07001002 }
1003 } else {
1004 //linkup of seen before link
1005 boolean success = fixHashGroupsForRoute(route, false);
1006 if (success) {
1007 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1008 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1009 log.debug("Updating ECMPspg for target:{} and dst:{} for linkup",
Saurav Dasc88d4662017-05-15 15:34:25 -07001010 targetSw, dstSw);
Saurav Dasc568c342018-01-25 09:49:01 -08001011 updatedDevices.add(targetSw);
1012 updatedDevices.add(dstSw);
1013 } else {
1014 someFailed = true;
Saurav Dasc88d4662017-05-15 15:34:25 -07001015 }
1016 }
1017 }
Saurav Dasc568c342018-01-25 09:49:01 -08001018 if (!someFailed) {
1019 // here is where we update all devices not touched by this instance
1020 updatedEcmpSpgMap.keySet().stream()
1021 .filter(devId -> !updatedDevices.contains(devId))
1022 .forEach(devId -> {
1023 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1024 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1025 });
1026 }
Saurav Dasc88d4662017-05-15 15:34:25 -07001027 }
1028
1029 /**
1030 * Edits hash groups in the src-switch (targetSw) of a route-path by
1031 * calling the groupHandler to either add or remove buckets in an existing
1032 * hash group.
1033 *
1034 * @param route a single list representing a route-path where the first element
1035 * is the src-switch (targetSw) of the route-path and the
1036 * second element is the dst-switch
1037 * @param revoke true if buckets in the hash-groups need to be removed;
1038 * false if buckets in the hash-groups need to be added
1039 * @return true if the hash group editing is successful
1040 */
1041 private boolean fixHashGroupsForRoute(ArrayList<DeviceId> route,
1042 boolean revoke) {
1043 DeviceId targetSw = route.get(0);
1044 if (route.size() < 2) {
1045 log.warn("Cannot fixHashGroupsForRoute - no dstSw in route {}", route);
1046 return false;
1047 }
1048 DeviceId destSw = route.get(1);
Saurav Das9df5b7c2017-08-14 16:44:43 -07001049 log.debug("* processing fixHashGroupsForRoute: Target {} -> Dest {}",
Saurav Dasc88d4662017-05-15 15:34:25 -07001050 targetSw, destSw);
Saurav Dasc88d4662017-05-15 15:34:25 -07001051 // figure out the new next hops at the targetSw towards the destSw
Saurav Das9df5b7c2017-08-14 16:44:43 -07001052 Set<DeviceId> nextHops = getNextHops(targetSw, destSw);
Saurav Dasc88d4662017-05-15 15:34:25 -07001053 // call group handler to change hash group at targetSw
1054 DefaultGroupHandler grpHandler = srManager.getGroupHandler(targetSw);
1055 if (grpHandler == null) {
1056 log.warn("Cannot find grouphandler for dev:{} .. aborting"
1057 + " {} hash group buckets for route:{} ", targetSw,
1058 (revoke) ? "revoke" : "repopulate", route);
1059 return false;
1060 }
1061 log.debug("{} hash-groups buckets For Route {} -> {} to next-hops {}",
1062 (revoke) ? "revoke" : "repopulating",
1063 targetSw, destSw, nextHops);
1064 return (revoke) ? grpHandler.fixHashGroups(targetSw, nextHops,
1065 destSw, true)
1066 : grpHandler.fixHashGroups(targetSw, nextHops,
1067 destSw, false);
1068 }
1069
1070 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001071 * Start the flow rule population process if it was never started. The
1072 * process finishes successfully when all flow rules are set and stops with
1073 * ABORTED status when any groups required for flows is not set yet.
Saurav Dasc88d4662017-05-15 15:34:25 -07001074 */
Saurav Das7bcbe702017-06-13 15:35:54 -07001075 public void startPopulationProcess() {
1076 statusLock.lock();
1077 try {
1078 if (populationStatus == Status.IDLE
1079 || populationStatus == Status.SUCCEEDED
1080 || populationStatus == Status.ABORTED) {
1081 populateAllRoutingRules();
sangho45b009c2015-05-07 13:30:57 -07001082 } else {
Saurav Das7bcbe702017-06-13 15:35:54 -07001083 log.warn("Not initiating startPopulationProcess as populationStatus is {}",
1084 populationStatus);
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001085 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001086 } finally {
1087 statusLock.unlock();
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001088 }
sangho20eff1d2015-04-13 15:15:58 -07001089 }
1090
Saurav Dasb5c236e2016-06-07 10:08:06 -07001091 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001092 * Revoke rules of given subnet in all edge switches.
1093 *
1094 * @param subnets subnet being removed
1095 * @return true if succeed
1096 */
1097 protected boolean revokeSubnet(Set<IpPrefix> subnets) {
1098 statusLock.lock();
1099 try {
Charles Chan2ff1bac2018-03-29 16:03:41 -07001100 return Sets.newHashSet(srManager.deviceService.getAvailableDevices()).stream()
1101 .map(Device::id)
1102 .filter(this::shouldProgram)
1103 .allMatch(targetSw -> srManager.routingRulePopulator.revokeIpRuleForSubnet(targetSw, subnets));
Saurav Das7bcbe702017-06-13 15:35:54 -07001104 } finally {
1105 statusLock.unlock();
1106 }
1107 }
1108
1109 /**
Charles Chan2fde6d42017-08-23 14:46:43 -07001110 * Populates IP rules for a route that has direct connection to the switch
1111 * if the current instance is the master of the switch.
1112 *
1113 * @param deviceId device ID of the device that next hop attaches to
1114 * @param prefix IP prefix of the route
1115 * @param hostMac MAC address of the next hop
1116 * @param hostVlanId Vlan ID of the nexthop
1117 * @param outPort port where the next hop attaches to
1118 */
1119 void populateRoute(DeviceId deviceId, IpPrefix prefix,
1120 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort) {
Charles Chan2ff1bac2018-03-29 16:03:41 -07001121 if (shouldProgram(deviceId)) {
Charles Chan2fde6d42017-08-23 14:46:43 -07001122 srManager.routingRulePopulator.populateRoute(deviceId, prefix, hostMac, hostVlanId, outPort);
1123 }
1124 }
1125
1126 /**
1127 * Removes IP rules for a route when the next hop is gone.
1128 * if the current instance is the master of the switch.
1129 *
1130 * @param deviceId device ID of the device that next hop attaches to
1131 * @param prefix IP prefix of the route
1132 * @param hostMac MAC address of the next hop
1133 * @param hostVlanId Vlan ID of the nexthop
1134 * @param outPort port that next hop attaches to
1135 */
1136 void revokeRoute(DeviceId deviceId, IpPrefix prefix,
1137 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort) {
Charles Chan2ff1bac2018-03-29 16:03:41 -07001138 if (shouldProgram(deviceId)) {
Charles Chan2fde6d42017-08-23 14:46:43 -07001139 srManager.routingRulePopulator.revokeRoute(deviceId, prefix, hostMac, hostVlanId, outPort);
1140 }
1141 }
1142
Charles Chan2ff1bac2018-03-29 16:03:41 -07001143 void populateBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
1144 if (shouldProgram(deviceId)) {
1145 srManager.routingRulePopulator.populateBridging(deviceId, port, mac, vlanId);
1146 }
1147 }
1148
1149 void revokeBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
1150 if (shouldProgram(deviceId)) {
1151 srManager.routingRulePopulator.revokeBridging(deviceId, port, mac, vlanId);
1152 }
1153 }
1154
1155 void updateBridging(DeviceId deviceId, PortNumber portNum, MacAddress hostMac,
1156 VlanId vlanId, boolean popVlan, boolean install) {
1157 if (shouldProgram(deviceId)) {
1158 srManager.routingRulePopulator.updateBridging(deviceId, portNum, hostMac, vlanId, popVlan, install);
1159 }
1160 }
1161
1162 void updateFwdObj(DeviceId deviceId, PortNumber portNumber, IpPrefix prefix, MacAddress hostMac,
1163 VlanId vlanId, boolean popVlan, boolean install) {
1164 if (shouldProgram(deviceId)) {
1165 srManager.routingRulePopulator.updateFwdObj(deviceId, portNumber, prefix, hostMac,
1166 vlanId, popVlan, install);
1167 }
1168 }
1169
Charles Chan2fde6d42017-08-23 14:46:43 -07001170 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001171 * Remove ECMP graph entry for the given device. Typically called when
1172 * device is no longer available.
1173 *
1174 * @param deviceId the device for which graphs need to be purged
1175 */
Charles Chan50bb6ef2018-04-18 18:41:05 -07001176 void purgeEcmpGraph(DeviceId deviceId) {
Saurav Dasc568c342018-01-25 09:49:01 -08001177 statusLock.lock();
1178 try {
1179
1180 if (populationStatus == Status.STARTED) {
1181 log.warn("Previous rule population is not finished. Cannot"
1182 + " proceeed with purgeEcmpGraph for {}", deviceId);
1183 return;
1184 }
1185 log.debug("Updating ECMPspg for unavailable dev:{}", deviceId);
1186 currentEcmpSpgMap.remove(deviceId);
1187 if (updatedEcmpSpgMap != null) {
1188 updatedEcmpSpgMap.remove(deviceId);
1189 }
1190 } finally {
1191 statusLock.unlock();
Saurav Das7bcbe702017-06-13 15:35:54 -07001192 }
1193 }
1194
Saurav Das201762d2018-04-21 17:19:48 -07001195 /**
1196 * Attempts a full reroute of route-paths if topology has changed relatively
1197 * close to a mastership change event. Does not do a reroute if mastership
1198 * change is due to reasons other than a ONOS cluster event - for example a
1199 * call to balance-masters, or a switch up/down event.
1200 *
1201 * @param devId the device identifier for which mastership has changed
1202 * @param me the mastership event
1203 */
1204 void checkFullRerouteForMasterChange(DeviceId devId, MastershipEvent me) {
1205 // give small delay to absorb mastership events that are caused by
1206 // device that has disconnected from cluster
1207 executorService.schedule(new MasterChange(devId, me),
1208 MASTER_CHANGE_DELAY, TimeUnit.MILLISECONDS);
1209 }
1210
1211 protected final class MasterChange implements Runnable {
1212 private DeviceId devId;
1213 private MastershipEvent me;
1214 private static final long CLUSTER_EVENT_THRESHOLD = 4500; // ms
1215 private static final long DEVICE_EVENT_THRESHOLD = 2000; // ms
1216
1217 MasterChange(DeviceId devId, MastershipEvent me) {
1218 this.devId = devId;
1219 this.me = me;
1220 }
1221
1222 @Override
1223 public void run() {
1224 long lce = srManager.clusterListener.timeSinceLastClusterEvent();
1225 boolean clusterEvent = lce < CLUSTER_EVENT_THRESHOLD;
1226
1227 // ignore event for lost switch if cluster event hasn't happened -
1228 // device down event will handle it
1229 if ((me.roleInfo().master() == null
1230 || !srManager.deviceService.isAvailable(devId))
1231 && !clusterEvent) {
1232 log.debug("Full reroute not required for lost device: {}/{} "
1233 + "clusterEvent/timeSince: {}/{}",
1234 devId, me.roleInfo(), clusterEvent, lce);
1235 return;
1236 }
1237
1238 long update = srManager.deviceService.getLastUpdatedInstant(devId);
1239 long lde = Instant.now().toEpochMilli() - update;
1240 boolean deviceEvent = lde < DEVICE_EVENT_THRESHOLD;
1241
1242 // ignore event for recently connected switch if cluster event hasn't
1243 // happened - link up events will handle it
1244 if (srManager.deviceService.isAvailable(devId) && deviceEvent
1245 && !clusterEvent) {
1246 log.debug("Full reroute not required for recently available"
1247 + " device: {}/{} deviceEvent/timeSince: {}/{} "
1248 + "clusterEvent/timeSince: {}/{}",
1249 devId, me.roleInfo(), deviceEvent, lde, clusterEvent, lce);
1250 return;
1251 }
1252
1253 // if it gets here, then mastership change is likely due to onos
1254 // instance failure, or network partition in onos cluster
1255 // normally a mastership change like this does not require re-programming
1256 // but if topology changes happen at the same time then we may miss events
1257 if (!isRoutingStable() && clusterEvent) {
1258 log.warn("Mastership changed for dev: {}/{} while programming "
1259 + "due to clusterEvent {} ms ago .. attempting full reroute",
1260 devId, me.roleInfo(), lce);
1261 if (srManager.mastershipService.isLocalMaster(devId)) {
1262 // old master could have died when populating filters
1263 populatePortAddressingRules(devId);
1264 }
1265 // old master could have died when creating groups
1266 srManager.purgeHashedNextObjectiveStore(devId);
1267 // XXX right now we have no fine-grained way to only make changes
1268 // for the route paths affected by this device.
1269 populateAllRoutingRules();
1270 } else {
1271 log.debug("Stable route-paths .. full reroute not attempted for "
1272 + "mastership change {}/{} deviceEvent/timeSince: {}/{} "
1273 + "clusterEvent/timeSince: {}/{}", devId, me.roleInfo(),
1274 deviceEvent, lde, clusterEvent, lce);
1275 }
1276 }
1277 }
1278
Saurav Das7bcbe702017-06-13 15:35:54 -07001279 //////////////////////////////////////
1280 // Routing helper methods and classes
1281 //////////////////////////////////////
1282
1283 /**
Saurav Das4e3224f2016-11-29 14:27:25 -08001284 * Computes set of affected routes due to failed link. Assumes
Saurav Dasb5c236e2016-06-07 10:08:06 -07001285 * previous ecmp shortest-path graph exists for a switch in order to compute
1286 * affected routes. If such a graph does not exist, the method returns null.
1287 *
1288 * @param linkFail the failed link
1289 * @return the set of affected routes which may be empty if no routes were
1290 * affected, or null if no previous ecmp spg was found for comparison
1291 */
sangho20eff1d2015-04-13 15:15:58 -07001292 private Set<ArrayList<DeviceId>> computeDamagedRoutes(Link linkFail) {
sangho20eff1d2015-04-13 15:15:58 -07001293 Set<ArrayList<DeviceId>> routes = new HashSet<>();
1294
1295 for (Device sw : srManager.deviceService.getDevices()) {
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001296 log.debug("Computing the impacted routes for device {} due to link fail",
1297 sw.id());
Charles Chan2ff1bac2018-03-29 16:03:41 -07001298 if (!shouldProgram(sw.id())) {
Saurav Das201762d2018-04-21 17:19:48 -07001299 lastProgrammed.remove(sw.id());
sangho20eff1d2015-04-13 15:15:58 -07001300 continue;
1301 }
Charles Chan2ff1bac2018-03-29 16:03:41 -07001302 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
Saurav Das201762d2018-04-21 17:19:48 -07001303 // check for mastership change since last run
1304 if (!lastProgrammed.contains(sw.id())) {
1305 lastProgrammed.add(sw.id());
1306 log.warn("New reponsibility for this node to program dev:{}"
1307 + " ... nuking current ECMPspg", sw.id());
1308 currentEcmpSpgMap.remove(sw.id());
1309 }
Saurav Das9df5b7c2017-08-14 16:44:43 -07001310 EcmpShortestPathGraph ecmpSpg = currentEcmpSpgMap.get(rootSw);
1311 if (ecmpSpg == null) {
1312 log.warn("No existing ECMP graph for switch {}. Aborting optimized"
1313 + " rerouting and opting for full-reroute", rootSw);
1314 return null;
1315 }
1316 if (log.isDebugEnabled()) {
1317 log.debug("Root switch: {}", rootSw);
1318 log.debug(" Current/Existing SPG: {}", ecmpSpg);
1319 log.debug(" New/Updated SPG: {}", updatedEcmpSpgMap.get(rootSw));
1320 }
1321 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>>
1322 switchVia = ecmpSpg.getAllLearnedSwitchesAndVia();
1323 // figure out if the broken link affected any route-paths in this graph
1324 for (Integer itrIdx : switchVia.keySet()) {
1325 log.trace("Current/Exiting SPG Iterindex# {}", itrIdx);
1326 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1327 switchVia.get(itrIdx);
1328 for (DeviceId targetSw : swViaMap.keySet()) {
1329 log.trace("TargetSwitch {} --> RootSwitch {}",
1330 targetSw, rootSw);
Saurav Dasb5c236e2016-06-07 10:08:06 -07001331 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1332 log.trace(" Via:");
Pier Ventree0ae7a32016-11-23 09:57:42 -08001333 via.forEach(e -> log.trace(" {}", e));
Saurav Dasb5c236e2016-06-07 10:08:06 -07001334 }
Saurav Das9df5b7c2017-08-14 16:44:43 -07001335 Set<ArrayList<DeviceId>> subLinks =
1336 computeLinks(targetSw, rootSw, swViaMap);
1337 for (ArrayList<DeviceId> alink: subLinks) {
1338 if ((alink.get(0).equals(linkFail.src().deviceId()) &&
1339 alink.get(1).equals(linkFail.dst().deviceId()))
1340 ||
1341 (alink.get(0).equals(linkFail.dst().deviceId()) &&
1342 alink.get(1).equals(linkFail.src().deviceId()))) {
1343 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1344 ArrayList<DeviceId> aRoute = new ArrayList<>();
1345 aRoute.add(targetSw); // switch with rules to populate
1346 aRoute.add(rootSw); // towards this destination
1347 routes.add(aRoute);
1348 break;
1349 }
sangho20eff1d2015-04-13 15:15:58 -07001350 }
1351 }
1352 }
Saurav Das9df5b7c2017-08-14 16:44:43 -07001353
sangho20eff1d2015-04-13 15:15:58 -07001354 }
sangho45b009c2015-05-07 13:30:57 -07001355
sangho20eff1d2015-04-13 15:15:58 -07001356 }
sangho20eff1d2015-04-13 15:15:58 -07001357 return routes;
1358 }
1359
Saurav Das4e3224f2016-11-29 14:27:25 -08001360 /**
1361 * Computes set of affected routes due to new links or failed switches.
1362 *
Saurav Das604ab3a2018-03-18 21:28:15 -07001363 * @param failedSwitch deviceId of failed switch if any
Saurav Das4e3224f2016-11-29 14:27:25 -08001364 * @return the set of affected routes which may be empty if no routes were
1365 * affected
1366 */
Saurav Dase0d4c872018-03-05 14:37:16 -08001367 private Set<ArrayList<DeviceId>> computeRouteChange(DeviceId failedSwitch) {
Saurav Das7bcbe702017-06-13 15:35:54 -07001368 ImmutableSet.Builder<ArrayList<DeviceId>> changedRtBldr =
Saurav Das4e3224f2016-11-29 14:27:25 -08001369 ImmutableSet.builder();
sangho20eff1d2015-04-13 15:15:58 -07001370
1371 for (Device sw : srManager.deviceService.getDevices()) {
Saurav Das7bcbe702017-06-13 15:35:54 -07001372 log.debug("Computing the impacted routes for device {}", sw.id());
Charles Chan2ff1bac2018-03-29 16:03:41 -07001373 if (!shouldProgram(sw.id())) {
Saurav Das201762d2018-04-21 17:19:48 -07001374 lastProgrammed.remove(sw.id());
sangho20eff1d2015-04-13 15:15:58 -07001375 continue;
1376 }
Charles Chan2ff1bac2018-03-29 16:03:41 -07001377 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
Saurav Das7bcbe702017-06-13 15:35:54 -07001378 if (log.isTraceEnabled()) {
1379 log.trace("Device links for dev: {}", rootSw);
1380 for (Link link: srManager.linkService.getDeviceLinks(rootSw)) {
1381 log.trace("{} -> {} ", link.src().deviceId(),
1382 link.dst().deviceId());
1383 }
Saurav Dasb5c236e2016-06-07 10:08:06 -07001384 }
Saurav Das201762d2018-04-21 17:19:48 -07001385 // check for mastership change since last run
1386 if (!lastProgrammed.contains(sw.id())) {
1387 lastProgrammed.add(sw.id());
1388 log.warn("New reponsibility for this node to program dev:{}"
1389 + " ... nuking current ECMPspg", sw.id());
1390 currentEcmpSpgMap.remove(sw.id());
1391 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001392 EcmpShortestPathGraph currEcmpSpg = currentEcmpSpgMap.get(rootSw);
1393 if (currEcmpSpg == null) {
1394 log.debug("No existing ECMP graph for device {}.. adding self as "
1395 + "changed route", rootSw);
1396 changedRtBldr.add(Lists.newArrayList(rootSw));
1397 continue;
1398 }
1399 EcmpShortestPathGraph newEcmpSpg = updatedEcmpSpgMap.get(rootSw);
Saurav Das5a356042018-04-06 20:16:01 -07001400 if (newEcmpSpg == null) {
1401 log.warn("Cannot find updated ECMP graph for dev:{}", rootSw);
1402 continue;
1403 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001404 if (log.isDebugEnabled()) {
1405 log.debug("Root switch: {}", rootSw);
1406 log.debug(" Current/Existing SPG: {}", currEcmpSpg);
1407 log.debug(" New/Updated SPG: {}", newEcmpSpg);
1408 }
1409 // first use the updated/new map to compare to current/existing map
1410 // as new links may have come up
1411 changedRtBldr.addAll(compareGraphs(newEcmpSpg, currEcmpSpg, rootSw));
1412 // then use the current/existing map to compare to updated/new map
1413 // as switch may have been removed
1414 changedRtBldr.addAll(compareGraphs(currEcmpSpg, newEcmpSpg, rootSw));
sangho45b009c2015-05-07 13:30:57 -07001415 }
Saurav Das4e3224f2016-11-29 14:27:25 -08001416 }
sangho20eff1d2015-04-13 15:15:58 -07001417
Saurav Dase0d4c872018-03-05 14:37:16 -08001418 // handle clearing state for a failed switch in case the switch does
1419 // not have a pair, or the pair is not available
1420 if (failedSwitch != null) {
Charles Chanba6c5752018-04-02 11:46:38 -07001421 Optional<DeviceId> pairDev = srManager.getPairDeviceId(failedSwitch);
1422 if (!pairDev.isPresent() || !srManager.deviceService.isAvailable(pairDev.get())) {
Saurav Dase0d4c872018-03-05 14:37:16 -08001423 log.debug("Proxy Route changes to downed Sw:{}", failedSwitch);
1424 srManager.deviceService.getDevices().forEach(dev -> {
1425 if (!dev.id().equals(failedSwitch) &&
1426 srManager.mastershipService.isLocalMaster(dev.id())) {
1427 log.debug(" : {}", dev.id());
1428 changedRtBldr.add(Lists.newArrayList(dev.id(), failedSwitch));
1429 }
1430 });
1431 }
1432 }
1433
Saurav Das7bcbe702017-06-13 15:35:54 -07001434 Set<ArrayList<DeviceId>> changedRoutes = changedRtBldr.build();
Saurav Das4e3224f2016-11-29 14:27:25 -08001435 for (ArrayList<DeviceId> route: changedRoutes) {
1436 log.debug("Route changes Target -> Root");
1437 if (route.size() == 1) {
1438 log.debug(" : all -> {}", route.get(0));
1439 } else {
1440 log.debug(" : {} -> {}", route.get(0), route.get(1));
1441 }
1442 }
1443 return changedRoutes;
1444 }
1445
1446 /**
1447 * For the root switch, searches all the target nodes reachable in the base
1448 * graph, and compares paths to the ones in the comp graph.
1449 *
1450 * @param base the graph that is indexed for all reachable target nodes
1451 * from the root node
1452 * @param comp the graph that the base graph is compared to
1453 * @param rootSw both ecmp graphs are calculated for the root node
1454 * @return all the routes that have changed in the base graph
1455 */
1456 private Set<ArrayList<DeviceId>> compareGraphs(EcmpShortestPathGraph base,
1457 EcmpShortestPathGraph comp,
1458 DeviceId rootSw) {
1459 ImmutableSet.Builder<ArrayList<DeviceId>> changedRoutesBuilder =
1460 ImmutableSet.builder();
1461 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> baseMap =
1462 base.getAllLearnedSwitchesAndVia();
1463 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> compMap =
1464 comp.getAllLearnedSwitchesAndVia();
1465 for (Integer itrIdx : baseMap.keySet()) {
1466 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> baseViaMap =
1467 baseMap.get(itrIdx);
1468 for (DeviceId targetSw : baseViaMap.keySet()) {
1469 ArrayList<ArrayList<DeviceId>> basePath = baseViaMap.get(targetSw);
1470 ArrayList<ArrayList<DeviceId>> compPath = getVia(compMap, targetSw);
1471 if ((compPath == null) || !basePath.equals(compPath)) {
Saurav Dasc88d4662017-05-15 15:34:25 -07001472 log.trace("Impacted route:{} -> {}", targetSw, rootSw);
Saurav Das4e3224f2016-11-29 14:27:25 -08001473 ArrayList<DeviceId> route = new ArrayList<>();
Saurav Das7bcbe702017-06-13 15:35:54 -07001474 route.add(targetSw); // switch with rules to populate
1475 route.add(rootSw); // towards this destination
Saurav Das4e3224f2016-11-29 14:27:25 -08001476 changedRoutesBuilder.add(route);
sangho20eff1d2015-04-13 15:15:58 -07001477 }
1478 }
sangho45b009c2015-05-07 13:30:57 -07001479 }
Saurav Das4e3224f2016-11-29 14:27:25 -08001480 return changedRoutesBuilder.build();
sangho20eff1d2015-04-13 15:15:58 -07001481 }
1482
Saurav Das7bcbe702017-06-13 15:35:54 -07001483 /**
1484 * Returns the ECMP paths traversed to reach the target switch.
1485 *
1486 * @param switchVia a per-iteration view of the ECMP graph for a root switch
1487 * @param targetSw the switch to reach from the root switch
1488 * @return the nodes traversed on ECMP paths to the target switch
1489 */
sangho20eff1d2015-04-13 15:15:58 -07001490 private ArrayList<ArrayList<DeviceId>> getVia(HashMap<Integer, HashMap<DeviceId,
Saurav Das4e3224f2016-11-29 14:27:25 -08001491 ArrayList<ArrayList<DeviceId>>>> switchVia, DeviceId targetSw) {
sangho20eff1d2015-04-13 15:15:58 -07001492 for (Integer itrIdx : switchVia.keySet()) {
1493 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1494 switchVia.get(itrIdx);
Saurav Das4e3224f2016-11-29 14:27:25 -08001495 if (swViaMap.get(targetSw) == null) {
sangho20eff1d2015-04-13 15:15:58 -07001496 continue;
1497 } else {
Saurav Das4e3224f2016-11-29 14:27:25 -08001498 return swViaMap.get(targetSw);
sangho20eff1d2015-04-13 15:15:58 -07001499 }
1500 }
1501
Srikanth Vavilapalli5428b6c2015-05-14 20:22:47 -07001502 return null;
sangho20eff1d2015-04-13 15:15:58 -07001503 }
1504
Saurav Das7bcbe702017-06-13 15:35:54 -07001505 /**
1506 * Utility method to break down a path from src to dst device into a collection
1507 * of links.
1508 *
1509 * @param src src device of the path
1510 * @param dst dst device of the path
1511 * @param viaMap path taken from src to dst device
1512 * @return collection of links in the path
1513 */
sangho20eff1d2015-04-13 15:15:58 -07001514 private Set<ArrayList<DeviceId>> computeLinks(DeviceId src,
1515 DeviceId dst,
1516 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> viaMap) {
1517 Set<ArrayList<DeviceId>> subLinks = Sets.newHashSet();
1518 for (ArrayList<DeviceId> via : viaMap.get(src)) {
1519 DeviceId linkSrc = src;
1520 DeviceId linkDst = dst;
1521 for (DeviceId viaDevice: via) {
1522 ArrayList<DeviceId> link = new ArrayList<>();
1523 linkDst = viaDevice;
1524 link.add(linkSrc);
1525 link.add(linkDst);
1526 subLinks.add(link);
1527 linkSrc = viaDevice;
1528 }
1529 ArrayList<DeviceId> link = new ArrayList<>();
1530 link.add(linkSrc);
1531 link.add(dst);
1532 subLinks.add(link);
1533 }
1534
1535 return subLinks;
1536 }
1537
Charles Chan93e71ba2016-04-29 14:38:22 -07001538 /**
Charles Chan2ff1bac2018-03-29 16:03:41 -07001539 * Determines whether this controller instance should program the
Saurav Das7bcbe702017-06-13 15:35:54 -07001540 * given {@code deviceId}, based on mastership and pairDeviceId if one exists.
Charles Chan2ff1bac2018-03-29 16:03:41 -07001541 * <p>
1542 * Once an instance is elected, it will be the only instance responsible for programming
1543 * both devices in the pair until it goes down.
Charles Chan93e71ba2016-04-29 14:38:22 -07001544 *
Saurav Das7bcbe702017-06-13 15:35:54 -07001545 * @param deviceId device identifier to consider for routing
Charles Chan2ff1bac2018-03-29 16:03:41 -07001546 * @return true if current instance should handle the routing for given device
Charles Chan93e71ba2016-04-29 14:38:22 -07001547 */
Charles Chan2ff1bac2018-03-29 16:03:41 -07001548 boolean shouldProgram(DeviceId deviceId) {
Charles Chan50bb6ef2018-04-18 18:41:05 -07001549 Boolean cached = shouldProgramCache.get(deviceId);
1550 if (cached != null) {
Saurav Das201762d2018-04-21 17:19:48 -07001551 log.debug("shouldProgram dev:{} cached:{}", deviceId, cached);
Charles Chan50bb6ef2018-04-18 18:41:05 -07001552 return cached;
1553 }
1554
Charles Chan2ff1bac2018-03-29 16:03:41 -07001555 Optional<DeviceId> pairDeviceId = srManager.getPairDeviceId(deviceId);
sanghob35a6192015-04-01 13:05:26 -07001556
Charles Chan2ff1bac2018-03-29 16:03:41 -07001557 NodeId currentNodeId = srManager.clusterService.getLocalNode().id();
1558 NodeId masterNodeId = srManager.mastershipService.getMasterFor(deviceId);
1559 Optional<NodeId> pairMasterNodeId = pairDeviceId.map(srManager.mastershipService::getMasterFor);
1560 log.debug("Evaluate shouldProgram {}/pair={}. current={}, master={}, pairMaster={}",
1561 deviceId, pairDeviceId, currentNodeId, masterNodeId, pairMasterNodeId);
1562
1563 // No pair device configured. Only handle when current instance is the master of the device
1564 if (!pairDeviceId.isPresent()) {
1565 log.debug("No pair device. current={}, master={}", currentNodeId, masterNodeId);
1566 return currentNodeId.equals(masterNodeId);
sanghob35a6192015-04-01 13:05:26 -07001567 }
Charles Chan2ff1bac2018-03-29 16:03:41 -07001568
1569 // Should not handle if current instance is not the master of either switch
1570 if (!currentNodeId.equals(masterNodeId) &&
1571 !(pairMasterNodeId.isPresent() && currentNodeId.equals(pairMasterNodeId.get()))) {
1572 log.debug("Current node {} is neither the master of target device {} nor pair device {}",
1573 currentNodeId, deviceId, pairDeviceId);
1574 return false;
1575 }
1576
1577 Set<DeviceId> key = Sets.newHashSet(deviceId, pairDeviceId.get());
1578
1579 NodeId king = shouldProgram.compute(key, ((k, v) -> {
1580 if (v == null) {
1581 // There is no value in the map. Elect a node
1582 return elect(Lists.newArrayList(masterNodeId, pairMasterNodeId.orElse(null)));
1583 } else {
1584 if (v.equals(masterNodeId) || v.equals(pairMasterNodeId.orElse(null))) {
1585 // Use the node in the map if it is still alive and is a master of any of the two switches
1586 return v;
1587 } else {
1588 // Previously elected node is no longer the master of either switch. Re-elect a node.
1589 return elect(Lists.newArrayList(masterNodeId, pairMasterNodeId.orElse(null)));
1590 }
1591 }
1592 }));
1593
1594 if (king != null) {
1595 log.debug("{} should handle routing for {}/pair={}", king, deviceId, pairDeviceId);
Charles Chan50bb6ef2018-04-18 18:41:05 -07001596 shouldProgramCache.put(deviceId, king.equals(currentNodeId));
Charles Chan2ff1bac2018-03-29 16:03:41 -07001597 return king.equals(currentNodeId);
1598 } else {
1599 log.error("Fail to elect a king for {}/pair={}. Abort.", deviceId, pairDeviceId);
Charles Chan50bb6ef2018-04-18 18:41:05 -07001600 shouldProgramCache.remove(deviceId);
Charles Chan2ff1bac2018-03-29 16:03:41 -07001601 return false;
1602 }
1603 }
1604
1605 /**
1606 * Elects a node who should take responsibility of programming devices.
1607 * @param nodeIds list of candidate node ID
1608 *
1609 * @return NodeId of the node that gets elected, or null if none of the node can be elected
1610 */
1611 private NodeId elect(List<NodeId> nodeIds) {
1612 // Remove all null elements. This could happen when some device has no master
1613 nodeIds.removeAll(Collections.singleton(null));
1614 nodeIds.sort(null);
1615 return nodeIds.size() == 0 ? null : nodeIds.get(0);
1616 }
1617
Charles Chan50bb6ef2018-04-18 18:41:05 -07001618 void invalidateShouldProgramCache(DeviceId deviceId) {
1619 shouldProgramCache.remove(deviceId);
1620 }
1621
Charles Chan2ff1bac2018-03-29 16:03:41 -07001622 /**
1623 * Returns a set of device ID, containing given device and its pair device if exist.
1624 *
1625 * @param deviceId Device ID
1626 * @return a set of device ID, containing given device and its pair device if exist.
1627 */
1628 private Set<DeviceId> deviceAndItsPair(DeviceId deviceId) {
1629 Set<DeviceId> ret = Sets.newHashSet(deviceId);
1630 srManager.getPairDeviceId(deviceId).ifPresent(ret::add);
1631 return ret;
sanghob35a6192015-04-01 13:05:26 -07001632 }
1633
Charles Chan93e71ba2016-04-29 14:38:22 -07001634 /**
Saurav Das7bcbe702017-06-13 15:35:54 -07001635 * Returns the set of deviceIds which are the next hops from the targetSw
1636 * to the dstSw according to the latest ECMP spg.
1637 *
1638 * @param targetSw the switch for which the next-hops are desired
1639 * @param dstSw the switch to which the next-hops lead to from the targetSw
1640 * @return set of next hop deviceIds, could be empty if no next hops are found
1641 */
1642 private Set<DeviceId> getNextHops(DeviceId targetSw, DeviceId dstSw) {
1643 boolean targetIsEdge = false;
1644 try {
1645 targetIsEdge = srManager.deviceConfiguration.isEdgeDevice(targetSw);
1646 } catch (DeviceConfigNotFoundException e) {
1647 log.warn(e.getMessage() + "Cannot determine if targetIsEdge {}.. "
1648 + "continuing to getNextHops", targetSw);
1649 }
1650
1651 EcmpShortestPathGraph ecmpSpg = updatedEcmpSpgMap.get(dstSw);
1652 if (ecmpSpg == null) {
1653 log.debug("No ecmpSpg found for dstSw: {}", dstSw);
1654 return ImmutableSet.of();
1655 }
1656 HashMap<Integer,
1657 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> switchVia =
1658 ecmpSpg.getAllLearnedSwitchesAndVia();
1659 for (Integer itrIdx : switchVia.keySet()) {
1660 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1661 switchVia.get(itrIdx);
1662 for (DeviceId target : swViaMap.keySet()) {
1663 if (!target.equals(targetSw)) {
1664 continue;
1665 }
1666 if (!targetIsEdge && itrIdx > 1) {
Saurav Dasa4020382018-02-14 14:14:54 -08001667 // optimization for spines to not use leaves to get
1668 // to a spine or other leaves
1669 boolean pathdevIsEdge = false;
1670 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1671 for (DeviceId pathdev : via) {
1672 try {
1673 pathdevIsEdge = srManager.deviceConfiguration
1674 .isEdgeDevice(pathdev);
1675 } catch (DeviceConfigNotFoundException e) {
1676 log.warn(e.getMessage());
1677 }
1678 if (pathdevIsEdge) {
1679 log.debug("Avoiding {} hop path for non-edge targetSw:{}"
1680 + " --> dstSw:{} which goes through an edge"
1681 + " device {} in path {}", itrIdx,
1682 targetSw, dstSw, pathdev, via);
1683 return ImmutableSet.of();
1684 }
1685 }
1686 }
Saurav Das7bcbe702017-06-13 15:35:54 -07001687 }
1688 Set<DeviceId> nextHops = new HashSet<>();
1689 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1690 if (via.isEmpty()) {
1691 // the dstSw is the next-hop from the targetSw
1692 nextHops.add(dstSw);
1693 } else {
1694 // first elem is next-hop in each ECMP path
1695 nextHops.add(via.get(0));
1696 }
1697 }
1698 return nextHops;
1699 }
1700 }
1701 return ImmutableSet.of(); //no next-hops found
1702 }
1703
1704 /**
1705 * Represents two devices that are paired by configuration. An EdgePair for
1706 * (dev1, dev2) is the same as as EdgePair for (dev2, dev1)
1707 */
1708 protected final class EdgePair {
1709 DeviceId dev1;
1710 DeviceId dev2;
1711
1712 EdgePair(DeviceId dev1, DeviceId dev2) {
1713 this.dev1 = dev1;
1714 this.dev2 = dev2;
1715 }
1716
1717 boolean includes(DeviceId dev) {
1718 return dev1.equals(dev) || dev2.equals(dev);
1719 }
1720
1721 @Override
1722 public boolean equals(Object o) {
1723 if (this == o) {
1724 return true;
1725 }
1726 if (!(o instanceof EdgePair)) {
1727 return false;
1728 }
1729 EdgePair that = (EdgePair) o;
1730 return ((this.dev1.equals(that.dev1) && this.dev2.equals(that.dev2)) ||
1731 (this.dev1.equals(that.dev2) && this.dev2.equals(that.dev1)));
1732 }
1733
1734 @Override
1735 public int hashCode() {
1736 if (dev1.toString().compareTo(dev2.toString()) <= 0) {
1737 return Objects.hash(dev1, dev2);
1738 } else {
1739 return Objects.hash(dev2, dev1);
1740 }
1741 }
1742
1743 @Override
1744 public String toString() {
1745 return toStringHelper(this)
1746 .add("Dev1", dev1)
1747 .add("Dev2", dev2)
1748 .toString();
1749 }
1750 }
1751
1752 //////////////////////////////////////
1753 // Filtering rule creation
1754 //////////////////////////////////////
1755
1756 /**
Saurav Das018605f2017-02-18 14:05:44 -08001757 * Populates filtering rules for port, and punting rules
1758 * for gateway IPs, loopback IPs and arp/ndp traffic.
1759 * Should only be called by the master instance for this device/port.
sanghob35a6192015-04-01 13:05:26 -07001760 *
1761 * @param deviceId Switch ID to set the rules
1762 */
Charles Chan50bb6ef2018-04-18 18:41:05 -07001763 void populatePortAddressingRules(DeviceId deviceId) {
Saurav Das59232cf2016-04-27 18:35:50 -07001764 // Although device is added, sometimes device store does not have the
1765 // ports for this device yet. It results in missing filtering rules in the
1766 // switch. We will attempt it a few times. If it still does not work,
1767 // user can manually repopulate using CLI command sr-reroute-network
Charles Chanf6ec1532017-02-08 16:10:40 -08001768 PortFilterInfo firstRun = rulePopulator.populateVlanMacFilters(deviceId);
Saurav Dasd2fded02016-12-02 15:43:47 -08001769 if (firstRun == null) {
1770 firstRun = new PortFilterInfo(0, 0, 0);
Saurav Das59232cf2016-04-27 18:35:50 -07001771 }
Saurav Dasd2fded02016-12-02 15:43:47 -08001772 executorService.schedule(new RetryFilters(deviceId, firstRun),
1773 RETRY_INTERVAL_MS, TimeUnit.MILLISECONDS);
sanghob35a6192015-04-01 13:05:26 -07001774 }
1775
1776 /**
Saurav Dasd2fded02016-12-02 15:43:47 -08001777 * Utility class used to temporarily store information about the ports on a
1778 * device processed for filtering objectives.
Saurav Dasd2fded02016-12-02 15:43:47 -08001779 */
1780 public final class PortFilterInfo {
Saurav Das018605f2017-02-18 14:05:44 -08001781 int disabledPorts = 0, errorPorts = 0, filteredPorts = 0;
Saurav Das59232cf2016-04-27 18:35:50 -07001782
Saurav Das018605f2017-02-18 14:05:44 -08001783 public PortFilterInfo(int disabledPorts, int errorPorts,
Saurav Dasd2fded02016-12-02 15:43:47 -08001784 int filteredPorts) {
1785 this.disabledPorts = disabledPorts;
1786 this.filteredPorts = filteredPorts;
Saurav Das018605f2017-02-18 14:05:44 -08001787 this.errorPorts = errorPorts;
Saurav Dasd2fded02016-12-02 15:43:47 -08001788 }
1789
1790 @Override
1791 public int hashCode() {
Saurav Das018605f2017-02-18 14:05:44 -08001792 return Objects.hash(disabledPorts, filteredPorts, errorPorts);
Saurav Dasd2fded02016-12-02 15:43:47 -08001793 }
1794
1795 @Override
1796 public boolean equals(Object obj) {
1797 if (this == obj) {
1798 return true;
1799 }
1800 if ((obj == null) || (!(obj instanceof PortFilterInfo))) {
1801 return false;
1802 }
1803 PortFilterInfo other = (PortFilterInfo) obj;
1804 return ((disabledPorts == other.disabledPorts) &&
1805 (filteredPorts == other.filteredPorts) &&
Saurav Das018605f2017-02-18 14:05:44 -08001806 (errorPorts == other.errorPorts));
Saurav Dasd2fded02016-12-02 15:43:47 -08001807 }
1808
1809 @Override
1810 public String toString() {
1811 MoreObjects.ToStringHelper helper = toStringHelper(this)
1812 .add("disabledPorts", disabledPorts)
Saurav Das018605f2017-02-18 14:05:44 -08001813 .add("errorPorts", errorPorts)
Saurav Dasd2fded02016-12-02 15:43:47 -08001814 .add("filteredPorts", filteredPorts);
1815 return helper.toString();
1816 }
1817 }
1818
1819 /**
1820 * RetryFilters populates filtering objectives for a device and keeps retrying
1821 * till the number of ports filtered are constant for a predefined number
1822 * of attempts.
1823 */
1824 protected final class RetryFilters implements Runnable {
1825 int constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS;
1826 DeviceId devId;
1827 int counter;
1828 PortFilterInfo prevRun;
1829
1830 private RetryFilters(DeviceId deviceId, PortFilterInfo previousRun) {
Saurav Das59232cf2016-04-27 18:35:50 -07001831 devId = deviceId;
Saurav Dasd2fded02016-12-02 15:43:47 -08001832 prevRun = previousRun;
1833 counter = 0;
Saurav Das59232cf2016-04-27 18:35:50 -07001834 }
1835
1836 @Override
1837 public void run() {
Charles Chan7f9737b2017-06-22 14:27:17 -07001838 log.debug("RETRY FILTER ATTEMPT {} ** dev:{}", ++counter, devId);
Charles Chanf6ec1532017-02-08 16:10:40 -08001839 PortFilterInfo thisRun = rulePopulator.populateVlanMacFilters(devId);
Saurav Dasd2fded02016-12-02 15:43:47 -08001840 boolean sameResult = prevRun.equals(thisRun);
1841 log.debug("dev:{} prevRun:{} thisRun:{} sameResult:{}", devId, prevRun,
1842 thisRun, sameResult);
Ray Milkeyc6c9b172018-02-26 09:36:31 -08001843 if (thisRun == null || !sameResult || (--constantAttempts > 0)) {
Saurav Das018605f2017-02-18 14:05:44 -08001844 // exponentially increasing intervals for retries
1845 executorService.schedule(this,
1846 RETRY_INTERVAL_MS * (int) Math.pow(counter, RETRY_INTERVAL_SCALE),
1847 TimeUnit.MILLISECONDS);
Saurav Dasd2fded02016-12-02 15:43:47 -08001848 if (!sameResult) {
1849 constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS; //reset
1850 }
Saurav Das59232cf2016-04-27 18:35:50 -07001851 }
Saurav Dasd2fded02016-12-02 15:43:47 -08001852 prevRun = (thisRun == null) ? prevRun : thisRun;
Saurav Das59232cf2016-04-27 18:35:50 -07001853 }
Saurav Das59232cf2016-04-27 18:35:50 -07001854 }
1855
sanghob35a6192015-04-01 13:05:26 -07001856}