blob: 84cae2d4728e33f7d1558bdd687840353dac8543 [file] [log] [blame]
sangho80f11cb2015-04-01 13:05:26 -07001/*
Brian O'Connor0947d7e2017-08-03 21:12:30 -07002 * Copyright 2015-present Open Networking Foundation
sangho80f11cb2015-04-01 13:05:26 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.segmentrouting;
17
Saurav Das62ae6792017-05-15 15:34:25 -070018import com.google.common.collect.ImmutableMap;
19import com.google.common.collect.ImmutableMap.Builder;
Charles Chanc22cef32016-04-29 14:38:22 -070020import com.google.common.collect.ImmutableSet;
Saurav Das1b391d52016-11-29 14:27:25 -080021import com.google.common.collect.Lists;
sanghofb7c7292015-04-13 15:15:58 -070022import com.google.common.collect.Maps;
23import com.google.common.collect.Sets;
Saurav Dasfbe74572017-08-03 18:30:35 -070024
pierventre37dcf4c2021-09-16 18:43:06 +020025import com.google.common.hash.Hasher;
26import com.google.common.hash.Hashing;
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -070027import org.onlab.packet.EthType;
Charles Chan19b70032019-04-17 14:20:26 -070028import com.google.common.collect.Streams;
sangho9b169e32015-04-14 16:27:13 -070029import org.onlab.packet.Ip4Address;
Pier Ventreadb4ae62016-11-23 09:57:42 -080030import org.onlab.packet.Ip6Address;
sangho80f11cb2015-04-01 13:05:26 -070031import org.onlab.packet.IpPrefix;
Charles Chan910be6a2017-08-23 14:46:43 -070032import org.onlab.packet.MacAddress;
33import org.onlab.packet.VlanId;
piera9941192019-04-24 16:12:47 +020034import org.onlab.util.PredictableExecutor;
35import org.onlab.util.PredictableExecutor.PickyCallable;
Saurav Das261c3002017-06-13 15:35:54 -070036import org.onosproject.cluster.NodeId;
Saurav Das00e553b2018-04-21 17:19:48 -070037import org.onosproject.mastership.MastershipEvent;
Charles Chanc22cef32016-04-29 14:38:22 -070038import org.onosproject.net.ConnectPoint;
sangho80f11cb2015-04-01 13:05:26 -070039import org.onosproject.net.Device;
40import org.onosproject.net.DeviceId;
sanghofb7c7292015-04-13 15:15:58 -070041import org.onosproject.net.Link;
Charles Chan910be6a2017-08-23 14:46:43 -070042import org.onosproject.net.PortNumber;
Charles Chan12a8a842020-02-14 13:23:57 -080043import org.onosproject.net.flowobjective.Objective;
Charles Chan319d1a22015-11-03 10:42:14 -080044import org.onosproject.segmentrouting.config.DeviceConfigNotFoundException;
Saurav Das62ae6792017-05-15 15:34:25 -070045import org.onosproject.segmentrouting.grouphandler.DefaultGroupHandler;
Charles Chand66d6712018-03-29 16:03:41 -070046import org.onosproject.store.serializers.KryoNamespaces;
pierf331a492020-01-07 15:39:39 +010047import org.onosproject.store.service.ConsistentMultimap;
Charles Chand66d6712018-03-29 16:03:41 -070048import org.onosproject.store.service.Serializer;
sangho80f11cb2015-04-01 13:05:26 -070049import org.slf4j.Logger;
50import org.slf4j.LoggerFactory;
51
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -070052import java.time.Instant;
sangho80f11cb2015-04-01 13:05:26 -070053import java.util.ArrayList;
Charles Chand66d6712018-03-29 16:03:41 -070054import java.util.Collections;
sangho80f11cb2015-04-01 13:05:26 -070055import java.util.HashMap;
56import java.util.HashSet;
Saurav Das261c3002017-06-13 15:35:54 -070057import java.util.Iterator;
Charles Chand66d6712018-03-29 16:03:41 -070058import java.util.List;
Saurav Das261c3002017-06-13 15:35:54 -070059import java.util.Map;
pierf331a492020-01-07 15:39:39 +010060import java.util.Map.Entry;
Saurav Dasd1872b02016-12-02 15:43:47 -080061import java.util.Objects;
Charles Chan6dbcd252018-04-02 11:46:38 -070062import java.util.Optional;
sangho80f11cb2015-04-01 13:05:26 -070063import java.util.Set;
piera9941192019-04-24 16:12:47 +020064import java.util.concurrent.CompletableFuture;
65import java.util.concurrent.ExecutionException;
66import java.util.concurrent.ExecutorService;
67import java.util.concurrent.Future;
Saurav Das07c74602016-04-27 18:35:50 -070068import java.util.concurrent.ScheduledExecutorService;
69import java.util.concurrent.TimeUnit;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +090070import java.util.concurrent.locks.Lock;
71import java.util.concurrent.locks.ReentrantLock;
pierventre37dcf4c2021-09-16 18:43:06 +020072import java.util.function.Function;
Charles Chan19b70032019-04-17 14:20:26 -070073import java.util.stream.Collectors;
Saurav Dasdc7f2752018-03-18 21:28:15 -070074import java.util.stream.Stream;
75
Pier Ventreadb4ae62016-11-23 09:57:42 -080076import static com.google.common.base.Preconditions.checkNotNull;
77import static java.util.concurrent.Executors.newScheduledThreadPool;
78import static org.onlab.util.Tools.groupedThreads;
sangho80f11cb2015-04-01 13:05:26 -070079
Charles Chanb7f75ac2016-01-11 18:28:54 -080080/**
81 * Default routing handler that is responsible for route computing and
82 * routing rule population.
83 */
sangho80f11cb2015-04-01 13:05:26 -070084public class DefaultRoutingHandler {
Saurav Dasf9332192017-02-18 14:05:44 -080085 private static final int MAX_CONSTANT_RETRY_ATTEMPTS = 5;
Ray Milkey092e9e22018-02-01 13:49:47 -080086 private static final long RETRY_INTERVAL_MS = 250L;
Saurav Dasf9332192017-02-18 14:05:44 -080087 private static final int RETRY_INTERVAL_SCALE = 1;
Saurav Dasfbe74572017-08-03 18:30:35 -070088 private static final long STABLITY_THRESHOLD = 10; //secs
Saurav Das00e553b2018-04-21 17:19:48 -070089 private static final long MASTER_CHANGE_DELAY = 1000; // ms
Saurav Das68e1b6a2018-06-11 17:02:31 -070090 private static final long PURGE_DELAY = 1000; // ms
Charles Chanc22cef32016-04-29 14:38:22 -070091 private static Logger log = LoggerFactory.getLogger(DefaultRoutingHandler.class);
sangho80f11cb2015-04-01 13:05:26 -070092
93 private SegmentRoutingManager srManager;
94 private RoutingRulePopulator rulePopulator;
Shashikanth VH0637b162015-12-11 01:32:44 +053095 private HashMap<DeviceId, EcmpShortestPathGraph> currentEcmpSpgMap;
96 private HashMap<DeviceId, EcmpShortestPathGraph> updatedEcmpSpgMap;
sangho9b169e32015-04-14 16:27:13 -070097 private DeviceConfiguration config;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +090098 private final Lock statusLock = new ReentrantLock();
99 private volatile Status populationStatus;
Yuta HIGUCHIebee2f12016-07-21 16:54:33 -0700100 private ScheduledExecutorService executorService
Saurav Dasd1872b02016-12-02 15:43:47 -0800101 = newScheduledThreadPool(1, groupedThreads("retryftr", "retry-%d", log));
Saurav Das49368392018-04-23 18:42:12 -0700102 private ScheduledExecutorService executorServiceMstChg
103 = newScheduledThreadPool(1, groupedThreads("masterChg", "mstch-%d", log));
Saurav Das68e1b6a2018-06-11 17:02:31 -0700104 private ScheduledExecutorService executorServiceFRR
105 = newScheduledThreadPool(1, groupedThreads("fullRR", "fullRR-%d", log));
piera9941192019-04-24 16:12:47 +0200106 // Route populators - 0 will leverage available processors
107 private static final int DEFAULT_THREADS = 0;
108 private ExecutorService routePopulators;
Saurav Das49368392018-04-23 18:42:12 -0700109
Saurav Das00e553b2018-04-21 17:19:48 -0700110 private Instant lastRoutingChange = Instant.EPOCH;
Saurav Das68e1b6a2018-06-11 17:02:31 -0700111 private Instant lastFullReroute = Instant.EPOCH;
sangho80f11cb2015-04-01 13:05:26 -0700112
pierventre37dcf4c2021-09-16 18:43:06 +0200113 /*
114 * Store to keep track of ONOS instance that should program the device pair.
115 * There should be only one instance (the leader) that programs the same pair.
116 * This EC map is used as first source of truth. WorkPartitionService is used
117 * to elect a leader when shouldProgram is empty.
118 */
119 Map<DeviceId, NodeId> shouldProgram;
Charles Chand66d6712018-03-29 16:03:41 -0700120
pierf331a492020-01-07 15:39:39 +0100121 // Distributed routes store to keep track of the routes already seen
122 // destination device is the key and target sw is the value
123 ConsistentMultimap<DeviceId, DeviceId> seenBeforeRoutes;
124
Saurav Das00e553b2018-04-21 17:19:48 -0700125 // Local store to keep track of all devices that this instance was responsible
126 // for programming in the last run. Helps to determine if mastership changed
127 // during a run - only relevant for programming as a result of topo change.
128 Set<DeviceId> lastProgrammed;
129
sangho80f11cb2015-04-01 13:05:26 -0700130 /**
131 * Represents the default routing population status.
132 */
133 public enum Status {
134 // population process is not started yet.
135 IDLE,
sangho80f11cb2015-04-01 13:05:26 -0700136 // population process started.
137 STARTED,
piera9941192019-04-24 16:12:47 +0200138 // population process was aborted due to errors, mostly for groups not found.
sangho80f11cb2015-04-01 13:05:26 -0700139 ABORTED,
sangho80f11cb2015-04-01 13:05:26 -0700140 // population process was finished successfully.
141 SUCCEEDED
142 }
143
144 /**
pierventre37dcf4c2021-09-16 18:43:06 +0200145 * Deterministic hashing for the shouldProgram logic.
146 */
147 private static Long consistentHasher(EdgePair pair) {
148 Hasher hasher = Hashing.md5().newHasher();
149 long dev1Hash = hasher.putUnencodedChars(pair.dev1.toString())
150 .hash()
151 .asLong();
152 hasher = Hashing.md5().newHasher();
153 long dev2Hash = hasher.putUnencodedChars(pair.dev2.toString())
154 .hash()
155 .asLong();
156 return dev1Hash + dev2Hash;
157 }
158
159 /**
160 * Implements the hash function for the shouldProgram logic.
161 */
162 protected static final Function<EdgePair, Long> HASH_FUNCTION = DefaultRoutingHandler::consistentHasher;
163
164 /**
sangho80f11cb2015-04-01 13:05:26 -0700165 * Creates a DefaultRoutingHandler object.
166 *
167 * @param srManager SegmentRoutingManager object
168 */
Charles Chand66d6712018-03-29 16:03:41 -0700169 DefaultRoutingHandler(SegmentRoutingManager srManager) {
pierventre37dcf4c2021-09-16 18:43:06 +0200170 this.shouldProgram = srManager.storageService.<DeviceId, NodeId>consistentMapBuilder()
Charles Chanfbcb8812018-04-18 18:41:05 -0700171 .withName("sr-should-program")
172 .withSerializer(Serializer.using(KryoNamespaces.API))
173 .withRelaxedReadConsistency()
174 .build().asJavaMap();
pierf331a492020-01-07 15:39:39 +0100175 this.seenBeforeRoutes = srManager.storageService.<DeviceId, DeviceId>consistentMultimapBuilder()
176 .withName("programmed-routes")
177 .withSerializer(Serializer.using(KryoNamespaces.API))
178 .withRelaxedReadConsistency()
179 .build();
Charles Chanfbcb8812018-04-18 18:41:05 -0700180 update(srManager);
piera9941192019-04-24 16:12:47 +0200181 this.routePopulators = new PredictableExecutor(DEFAULT_THREADS,
182 groupedThreads("onos/sr", "r-populator-%d", log));
Charles Chanfbcb8812018-04-18 18:41:05 -0700183 }
184
185 /**
186 * Updates a DefaultRoutingHandler object.
187 *
188 * @param srManager SegmentRoutingManager object
189 */
190 void update(SegmentRoutingManager srManager) {
sangho80f11cb2015-04-01 13:05:26 -0700191 this.srManager = srManager;
192 this.rulePopulator = checkNotNull(srManager.routingRulePopulator);
sangho9b169e32015-04-14 16:27:13 -0700193 this.config = checkNotNull(srManager.deviceConfiguration);
sangho80f11cb2015-04-01 13:05:26 -0700194 this.populationStatus = Status.IDLE;
sanghofb7c7292015-04-13 15:15:58 -0700195 this.currentEcmpSpgMap = Maps.newHashMap();
Saurav Das00e553b2018-04-21 17:19:48 -0700196 this.lastProgrammed = Sets.newConcurrentHashSet();
sangho80f11cb2015-04-01 13:05:26 -0700197 }
198
199 /**
Saurav Das62ae6792017-05-15 15:34:25 -0700200 * Returns an immutable copy of the current ECMP shortest-path graph as
201 * computed by this controller instance.
202 *
Saurav Das261c3002017-06-13 15:35:54 -0700203 * @return immutable copy of the current ECMP graph
Saurav Das62ae6792017-05-15 15:34:25 -0700204 */
205 public ImmutableMap<DeviceId, EcmpShortestPathGraph> getCurrentEmcpSpgMap() {
206 Builder<DeviceId, EcmpShortestPathGraph> builder = ImmutableMap.builder();
207 currentEcmpSpgMap.entrySet().forEach(entry -> {
208 if (entry.getValue() != null) {
209 builder.put(entry.getKey(), entry.getValue());
210 }
211 });
212 return builder.build();
213 }
214
Saurav Dasfbe74572017-08-03 18:30:35 -0700215 /**
216 * Acquires the lock used when making routing changes.
217 */
218 public void acquireRoutingLock() {
219 statusLock.lock();
220 }
221
222 /**
223 * Releases the lock used when making routing changes.
224 */
225 public void releaseRoutingLock() {
226 statusLock.unlock();
227 }
228
229 /**
230 * Determines if routing in the network has been stable in the last
Charles Chan12a8a842020-02-14 13:23:57 -0800231 * STABILITY_THRESHOLD seconds, by comparing the current time to the last
Saurav Dasfbe74572017-08-03 18:30:35 -0700232 * routing change timestamp.
233 *
234 * @return true if stable
235 */
236 public boolean isRoutingStable() {
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700237 long last = (long) (lastRoutingChange.toEpochMilli() / 1000.0);
238 long now = (long) (Instant.now().toEpochMilli() / 1000.0);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700239 log.trace("Routing stable since {}s", now - last);
Saurav Dasfbe74572017-08-03 18:30:35 -0700240 return (now - last) > STABLITY_THRESHOLD;
241 }
242
Saurav Das49368392018-04-23 18:42:12 -0700243 /**
244 * Gracefully shuts down the defaultRoutingHandler. Typically called when
245 * the app is deactivated
246 */
247 public void shutdown() {
248 executorService.shutdown();
249 executorServiceMstChg.shutdown();
Saurav Das68e1b6a2018-06-11 17:02:31 -0700250 executorServiceFRR.shutdown();
piera9941192019-04-24 16:12:47 +0200251 routePopulators.shutdown();
Saurav Das49368392018-04-23 18:42:12 -0700252 }
Saurav Dasfbe74572017-08-03 18:30:35 -0700253
Saurav Das261c3002017-06-13 15:35:54 -0700254 //////////////////////////////////////
255 // Route path handling
256 //////////////////////////////////////
257
Saurav Dase6c448a2018-01-18 12:07:33 -0800258 /* The following three methods represent the three major ways in which
259 * route-path handling is triggered in the network
Saurav Das261c3002017-06-13 15:35:54 -0700260 * a) due to configuration change
261 * b) due to route-added event
262 * c) due to change in the topology
263 */
264
Saurav Das62ae6792017-05-15 15:34:25 -0700265 /**
Saurav Das261c3002017-06-13 15:35:54 -0700266 * Populates all routing rules to all switches. Typically triggered at
267 * startup or after a configuration event.
sangho80f11cb2015-04-01 13:05:26 -0700268 */
Saurav Das62ae6792017-05-15 15:34:25 -0700269 public void populateAllRoutingRules() {
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700270 lastRoutingChange = Instant.now();
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900271 statusLock.lock();
272 try {
Saurav Das261c3002017-06-13 15:35:54 -0700273 if (populationStatus == Status.STARTED) {
274 log.warn("Previous rule population is not finished. Cannot"
275 + " proceed with populateAllRoutingRules");
276 return;
277 }
278
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900279 populationStatus = Status.STARTED;
280 rulePopulator.resetCounter();
Saurav Das261c3002017-06-13 15:35:54 -0700281 log.info("Starting to populate all routing rules");
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900282 log.debug("populateAllRoutingRules: populationStatus is STARTED");
sangho80f11cb2015-04-01 13:05:26 -0700283
Saurav Das261c3002017-06-13 15:35:54 -0700284 // take a snapshot of the topology
285 updatedEcmpSpgMap = new HashMap<>();
286 Set<EdgePair> edgePairs = new HashSet<>();
287 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
Jonathan Hart61e24e12017-11-30 18:23:42 -0800288 for (DeviceId dstSw : srManager.deviceConfiguration.getRouters()) {
Saurav Das261c3002017-06-13 15:35:54 -0700289 EcmpShortestPathGraph ecmpSpgUpdated =
Jonathan Hart61e24e12017-11-30 18:23:42 -0800290 new EcmpShortestPathGraph(dstSw, srManager);
291 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
Charles Chan6dbcd252018-04-02 11:46:38 -0700292 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
293 if (pairDev.isPresent()) {
Saurav Das261c3002017-06-13 15:35:54 -0700294 // pairDev may not be available yet, but we still need to add
Charles Chan6dbcd252018-04-02 11:46:38 -0700295 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
296 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
297 edgePairs.add(new EdgePair(dstSw, pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700298 }
Charles Chand66d6712018-03-29 16:03:41 -0700299
300 if (!shouldProgram(dstSw)) {
Saurav Das00e553b2018-04-21 17:19:48 -0700301 lastProgrammed.remove(dstSw);
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900302 continue;
Saurav Das00e553b2018-04-21 17:19:48 -0700303 } else {
304 lastProgrammed.add(dstSw);
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900305 }
Saurav Das00e553b2018-04-21 17:19:48 -0700306 // To do a full reroute, assume all route-paths have changed
Charles Chand66d6712018-03-29 16:03:41 -0700307 for (DeviceId dev : deviceAndItsPair(dstSw)) {
Jonathan Hart61e24e12017-11-30 18:23:42 -0800308 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
309 if (targetSw.equals(dev)) {
Saurav Das261c3002017-06-13 15:35:54 -0700310 continue;
311 }
Jonathan Hart61e24e12017-11-30 18:23:42 -0800312 routeChanges.add(Lists.newArrayList(targetSw, dev));
Saurav Das261c3002017-06-13 15:35:54 -0700313 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900314 }
Saurav Das261c3002017-06-13 15:35:54 -0700315 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900316
pierf331a492020-01-07 15:39:39 +0100317 log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
pierventre37dcf4c2021-09-16 18:43:06 +0200318 seenBeforeRoutes.forEach(entry -> log.debug("{} -> {}", entry.getValue(), entry.getKey()));
319
Saurav Das261c3002017-06-13 15:35:54 -0700320 if (!redoRouting(routeChanges, edgePairs, null)) {
321 log.debug("populateAllRoutingRules: populationStatus is ABORTED");
322 populationStatus = Status.ABORTED;
323 log.warn("Failed to repopulate all routing rules.");
324 return;
sangho80f11cb2015-04-01 13:05:26 -0700325 }
326
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900327 log.debug("populateAllRoutingRules: populationStatus is SUCCEEDED");
328 populationStatus = Status.SUCCEEDED;
Saurav Das261c3002017-06-13 15:35:54 -0700329 log.info("Completed all routing rule population. Total # of rules pushed : {}",
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900330 rulePopulator.getCounter());
Saurav Das62ae6792017-05-15 15:34:25 -0700331 return;
pierdebd15c2019-04-19 20:55:53 +0200332 } catch (Exception e) {
333 log.error("populateAllRoutingRules thrown an exception: {}",
334 e.getMessage(), e);
335 populationStatus = Status.ABORTED;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900336 } finally {
337 statusLock.unlock();
sangho80f11cb2015-04-01 13:05:26 -0700338 }
sangho80f11cb2015-04-01 13:05:26 -0700339 }
340
sanghofb7c7292015-04-13 15:15:58 -0700341 /**
Saurav Das261c3002017-06-13 15:35:54 -0700342 * Populate rules from all other edge devices to the connect-point(s)
343 * specified for the given subnets.
344 *
345 * @param cpts connect point(s) of the subnets being added
346 * @param subnets subnets being added
Charles Chan910be6a2017-08-23 14:46:43 -0700347 */
348 // XXX refactor
Saurav Das261c3002017-06-13 15:35:54 -0700349 protected void populateSubnet(Set<ConnectPoint> cpts, Set<IpPrefix> subnets) {
Charles Chan6db55b92017-09-11 15:21:57 -0700350 if (cpts == null || cpts.size() < 1 || cpts.size() > 2) {
351 log.warn("Skipping populateSubnet due to illegal size of connect points. {}", cpts);
352 return;
353 }
354
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700355 lastRoutingChange = Instant.now();
Saurav Das261c3002017-06-13 15:35:54 -0700356 statusLock.lock();
357 try {
358 if (populationStatus == Status.STARTED) {
359 log.warn("Previous rule population is not finished. Cannot"
360 + " proceed with routing rules for added routes");
361 return;
362 }
363 populationStatus = Status.STARTED;
364 rulePopulator.resetCounter();
Charles Chan910be6a2017-08-23 14:46:43 -0700365 log.info("Starting to populate routing rules for added routes, subnets={}, cpts={}",
366 subnets, cpts);
Saurav Das6430f412018-01-25 09:49:01 -0800367 // In principle an update to a subnet/prefix should not require a
368 // new ECMPspg calculation as it is not a topology event. As a
369 // result, we use the current/existing ECMPspg in the updated map
370 // used by the redoRouting method.
Saurav Das6de6ffd2018-02-09 09:15:03 -0800371 if (updatedEcmpSpgMap == null) {
372 updatedEcmpSpgMap = new HashMap<>();
373 }
Saurav Das6430f412018-01-25 09:49:01 -0800374 currentEcmpSpgMap.entrySet().forEach(entry -> {
375 updatedEcmpSpgMap.put(entry.getKey(), entry.getValue());
Saurav Dase321cff2018-02-09 17:26:45 -0800376 if (log.isTraceEnabled()) {
377 log.trace("Root switch: {}", entry.getKey());
378 log.trace(" Current/Existing SPG: {}", entry.getValue());
Saurav Das6430f412018-01-25 09:49:01 -0800379 }
380 });
pierventre37dcf4c2021-09-16 18:43:06 +0200381
pierf331a492020-01-07 15:39:39 +0100382 log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
pierventre37dcf4c2021-09-16 18:43:06 +0200383 seenBeforeRoutes.forEach(entry -> log.debug("{} -> {}", entry.getValue(), entry.getKey()));
384
Saurav Das261c3002017-06-13 15:35:54 -0700385 Set<EdgePair> edgePairs = new HashSet<>();
386 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
387 boolean handleRouting = false;
388
389 if (cpts.size() == 2) {
390 // ensure connect points are edge-pairs
391 Iterator<ConnectPoint> iter = cpts.iterator();
392 DeviceId dev1 = iter.next().deviceId();
Charles Chan6dbcd252018-04-02 11:46:38 -0700393 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dev1);
394 if (pairDev.isPresent() && iter.next().deviceId().equals(pairDev.get())) {
395 edgePairs.add(new EdgePair(dev1, pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700396 } else {
397 log.warn("Connectpoints {} for subnets {} not on "
398 + "pair-devices.. aborting populateSubnet", cpts, subnets);
399 populationStatus = Status.ABORTED;
400 return;
401 }
402 for (ConnectPoint cp : cpts) {
Saurav Das6430f412018-01-25 09:49:01 -0800403 if (updatedEcmpSpgMap.get(cp.deviceId()) == null) {
404 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das261c3002017-06-13 15:35:54 -0700405 new EcmpShortestPathGraph(cp.deviceId(), srManager);
Saurav Das6430f412018-01-25 09:49:01 -0800406 updatedEcmpSpgMap.put(cp.deviceId(), ecmpSpgUpdated);
407 log.warn("populateSubnet: no updated graph for dev:{}"
408 + " ... creating", cp.deviceId());
409 }
Charles Chand66d6712018-03-29 16:03:41 -0700410 if (!shouldProgram(cp.deviceId())) {
Saurav Das261c3002017-06-13 15:35:54 -0700411 continue;
412 }
413 handleRouting = true;
414 }
415 } else {
416 // single connect point
417 DeviceId dstSw = cpts.iterator().next().deviceId();
Saurav Das6430f412018-01-25 09:49:01 -0800418 if (updatedEcmpSpgMap.get(dstSw) == null) {
419 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das261c3002017-06-13 15:35:54 -0700420 new EcmpShortestPathGraph(dstSw, srManager);
Saurav Das6430f412018-01-25 09:49:01 -0800421 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
422 log.warn("populateSubnet: no updated graph for dev:{}"
423 + " ... creating", dstSw);
424 }
Charles Chand66d6712018-03-29 16:03:41 -0700425 handleRouting = shouldProgram(dstSw);
Saurav Das261c3002017-06-13 15:35:54 -0700426 }
427
428 if (!handleRouting) {
429 log.debug("This instance is not handling ecmp routing to the "
430 + "connectPoint(s) {}", cpts);
431 populationStatus = Status.ABORTED;
432 return;
433 }
434
435 // if it gets here, this instance should handle routing for the
436 // connectpoint(s). Assume all route-paths have to be updated to
437 // the connectpoint(s) with the following exceptions
438 // 1. if target is non-edge no need for routing rules
439 // 2. if target is one of the connectpoints
440 for (ConnectPoint cp : cpts) {
441 DeviceId dstSw = cp.deviceId();
442 for (Device targetSw : srManager.deviceService.getDevices()) {
443 boolean isEdge = false;
444 try {
445 isEdge = config.isEdgeDevice(targetSw.id());
446 } catch (DeviceConfigNotFoundException e) {
Charles Chaneaf3c9b2018-02-16 17:20:54 -0800447 log.warn(e.getMessage() + "aborting populateSubnet on targetSw {}", targetSw.id());
448 continue;
Saurav Das261c3002017-06-13 15:35:54 -0700449 }
Charles Chan6dbcd252018-04-02 11:46:38 -0700450 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
Daniele Moroa2aabe22021-06-07 16:28:41 +0200451// if (dstSw.equals(targetSw.id()) || !isEdge ||
452// (cpts.size() == 2 && pairDev.isPresent() && targetSw.id().equals(pairDev.get()))) {
453 if (dstSw.equals(targetSw.id()) ||
Charles Chan6dbcd252018-04-02 11:46:38 -0700454 (cpts.size() == 2 && pairDev.isPresent() && targetSw.id().equals(pairDev.get()))) {
Saurav Das261c3002017-06-13 15:35:54 -0700455 continue;
456 }
457 routeChanges.add(Lists.newArrayList(targetSw.id(), dstSw));
458 }
459 }
460
461 if (!redoRouting(routeChanges, edgePairs, subnets)) {
462 log.debug("populateSubnet: populationStatus is ABORTED");
463 populationStatus = Status.ABORTED;
464 log.warn("Failed to repopulate the rules for subnet.");
465 return;
466 }
467
468 log.debug("populateSubnet: populationStatus is SUCCEEDED");
469 populationStatus = Status.SUCCEEDED;
470 log.info("Completed subnet population. Total # of rules pushed : {}",
471 rulePopulator.getCounter());
472 return;
473
pierdebd15c2019-04-19 20:55:53 +0200474 } catch (Exception e) {
475 log.error("populateSubnet thrown an exception: {}",
476 e.getMessage(), e);
477 populationStatus = Status.ABORTED;
Saurav Das261c3002017-06-13 15:35:54 -0700478 } finally {
479 statusLock.unlock();
480 }
481 }
482
483 /**
Saurav Das62ae6792017-05-15 15:34:25 -0700484 * Populates the routing rules or makes hash group changes according to the
485 * route-path changes due to link failure, switch failure or link up. This
486 * method should only be called for one of these three possible event-types.
Saurav Dasdc7f2752018-03-18 21:28:15 -0700487 * Note that when a switch goes away, all of its links fail as well, but
488 * this is handled as a single switch removal event.
sanghofb7c7292015-04-13 15:15:58 -0700489 *
Saurav Dasdc7f2752018-03-18 21:28:15 -0700490 * @param linkDown the single failed link, or null for other conditions such
491 * as link-up or a removed switch
Saurav Das62ae6792017-05-15 15:34:25 -0700492 * @param linkUp the single link up, or null for other conditions such as
Saurav Dasdc7f2752018-03-18 21:28:15 -0700493 * link-down or a removed switch
494 * @param switchDown the removed switch, or null for other conditions such
495 * as link-down or link-up
496 * @param seenBefore true if this event is for a linkUp or linkDown for a
497 * seen link
498 */
499 // TODO This method should be refactored into three separated methods
Charles Chan9d2dd552018-06-19 20:56:33 -0700500 public void populateRoutingRulesForLinkStatusChange(Link linkDown, Link linkUp,
501 DeviceId switchDown, boolean seenBefore) {
Saurav Dasdc7f2752018-03-18 21:28:15 -0700502 if (Stream.of(linkDown, linkUp, switchDown).filter(Objects::nonNull)
503 .count() != 1) {
Saurav Das62ae6792017-05-15 15:34:25 -0700504 log.warn("Only one event can be handled for link status change .. aborting");
505 return;
506 }
Saurav Dasdc7f2752018-03-18 21:28:15 -0700507
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700508 lastRoutingChange = Instant.now();
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900509 statusLock.lock();
510 try {
sanghofb7c7292015-04-13 15:15:58 -0700511
512 if (populationStatus == Status.STARTED) {
Saurav Das261c3002017-06-13 15:35:54 -0700513 log.warn("Previous rule population is not finished. Cannot"
Saurav Das6430f412018-01-25 09:49:01 -0800514 + " proceeed with routingRules for Topology change");
Saurav Das62ae6792017-05-15 15:34:25 -0700515 return;
sanghofb7c7292015-04-13 15:15:58 -0700516 }
517
Saurav Das261c3002017-06-13 15:35:54 -0700518 // Take snapshots of the topology
sangho28d0b6d2015-05-07 13:30:57 -0700519 updatedEcmpSpgMap = new HashMap<>();
Saurav Das261c3002017-06-13 15:35:54 -0700520 Set<EdgePair> edgePairs = new HashSet<>();
sangho28d0b6d2015-05-07 13:30:57 -0700521 for (Device sw : srManager.deviceService.getDevices()) {
Shashikanth VH0637b162015-12-11 01:32:44 +0530522 EcmpShortestPathGraph ecmpSpgUpdated =
523 new EcmpShortestPathGraph(sw.id(), srManager);
sangho28d0b6d2015-05-07 13:30:57 -0700524 updatedEcmpSpgMap.put(sw.id(), ecmpSpgUpdated);
Charles Chan6dbcd252018-04-02 11:46:38 -0700525 Optional<DeviceId> pairDev = srManager.getPairDeviceId(sw.id());
526 if (pairDev.isPresent()) {
Saurav Das261c3002017-06-13 15:35:54 -0700527 // pairDev may not be available yet, but we still need to add
Charles Chan6dbcd252018-04-02 11:46:38 -0700528 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
529 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
530 edgePairs.add(new EdgePair(sw.id(), pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700531 }
sangho28d0b6d2015-05-07 13:30:57 -0700532 }
533
Saurav Das6430f412018-01-25 09:49:01 -0800534 log.info("Starting to populate routing rules from Topology change");
sanghodf0153f2015-05-05 14:13:34 -0700535
sanghofb7c7292015-04-13 15:15:58 -0700536 Set<ArrayList<DeviceId>> routeChanges;
Saurav Das62ae6792017-05-15 15:34:25 -0700537 log.debug("populateRoutingRulesForLinkStatusChange: "
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700538 + "populationStatus is STARTED");
pierf331a492020-01-07 15:39:39 +0100539 log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
pierventre37dcf4c2021-09-16 18:43:06 +0200540 seenBeforeRoutes.forEach(entry -> log.debug("{} -> {}", entry.getValue(), entry.getKey()));
sanghofb7c7292015-04-13 15:15:58 -0700541 populationStatus = Status.STARTED;
Saurav Das6430f412018-01-25 09:49:01 -0800542 rulePopulator.resetCounter(); //XXX maybe useful to have a rehash ctr
543 boolean hashGroupsChanged = false;
Saurav Das1b391d52016-11-29 14:27:25 -0800544 // try optimized re-routing
Saurav Das62ae6792017-05-15 15:34:25 -0700545 if (linkDown == null) {
546 // either a linkUp or a switchDown - compute all route changes by
547 // comparing all routes of existing ECMP SPG to new ECMP SPG
Saurav Dascea556f2018-03-05 14:37:16 -0800548 routeChanges = computeRouteChange(switchDown);
Saurav Das62ae6792017-05-15 15:34:25 -0700549
pier572d4a92019-04-25 18:51:51 +0200550 // deal with linkUp
551 if (linkUp != null) {
552 // deal with linkUp of a seen-before link
553 if (seenBefore) {
554 // link previously seen before
555 // do hash-bucket changes instead of a re-route
556 processHashGroupChangeForLinkUp(routeChanges);
557 // clear out routesChanges so a re-route is not attempted
558 routeChanges = ImmutableSet.of();
559 hashGroupsChanged = true;
560 } else {
561 // do hash-bucket changes first, method will return changed routes;
562 // for each route not changed it will perform a reroute
563 Set<ArrayList<DeviceId>> changedRoutes = processHashGroupChangeForLinkUp(routeChanges);
564 Set<ArrayList<DeviceId>> routeChangesTemp = getExpandedRoutes(routeChanges);
565 changedRoutes.forEach(routeChangesTemp::remove);
566 // if routesChanges is empty a re-route is not attempted
567 routeChanges = routeChangesTemp;
568 for (ArrayList<DeviceId> route : routeChanges) {
569 log.debug("remaining routes Target -> Root");
570 if (route.size() == 1) {
571 log.debug(" : all -> {}", route.get(0));
572 } else {
573 log.debug(" : {} -> {}", route.get(0), route.get(1));
574 }
575 }
576 // Mark hash groups as changed
577 if (!changedRoutes.isEmpty()) {
578 hashGroupsChanged = true;
579 }
580 }
581
Saurav Das62ae6792017-05-15 15:34:25 -0700582 }
583
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700584 //deal with switchDown
585 if (switchDown != null) {
pier572d4a92019-04-25 18:51:51 +0200586 processHashGroupChangeForFailure(routeChanges, switchDown);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700587 // clear out routesChanges so a re-route is not attempted
588 routeChanges = ImmutableSet.of();
Saurav Das6430f412018-01-25 09:49:01 -0800589 hashGroupsChanged = true;
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700590 }
sanghofb7c7292015-04-13 15:15:58 -0700591 } else {
Saurav Das62ae6792017-05-15 15:34:25 -0700592 // link has gone down
593 // Compare existing ECMP SPG only with the link that went down
594 routeChanges = computeDamagedRoutes(linkDown);
pier572d4a92019-04-25 18:51:51 +0200595 processHashGroupChangeForFailure(routeChanges, null);
Saurav Das68e1b6a2018-06-11 17:02:31 -0700596 // clear out routesChanges so a re-route is not attempted
597 routeChanges = ImmutableSet.of();
598 hashGroupsChanged = true;
Saurav Dasb149be12016-06-07 10:08:06 -0700599 }
600
sanghofb7c7292015-04-13 15:15:58 -0700601 if (routeChanges.isEmpty()) {
Saurav Das6430f412018-01-25 09:49:01 -0800602 if (hashGroupsChanged) {
603 log.info("Hash-groups changed for link status change");
604 } else {
605 log.info("No re-route or re-hash attempted for the link"
606 + " status change");
607 updatedEcmpSpgMap.keySet().forEach(devId -> {
608 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
609 log.debug("Updating ECMPspg for remaining dev:{}", devId);
610 });
611 }
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700612 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sanghofb7c7292015-04-13 15:15:58 -0700613 populationStatus = Status.SUCCEEDED;
Saurav Das62ae6792017-05-15 15:34:25 -0700614 return;
sanghofb7c7292015-04-13 15:15:58 -0700615 }
616
pier572d4a92019-04-25 18:51:51 +0200617 if (hashGroupsChanged) {
618 log.debug("Hash-groups changed for link status change");
619 }
620
Saurav Das62ae6792017-05-15 15:34:25 -0700621 // reroute of routeChanges
Saurav Das261c3002017-06-13 15:35:54 -0700622 if (redoRouting(routeChanges, edgePairs, null)) {
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700623 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sanghofb7c7292015-04-13 15:15:58 -0700624 populationStatus = Status.SUCCEEDED;
Saurav Das261c3002017-06-13 15:35:54 -0700625 log.info("Completed repopulation of rules for link-status change."
626 + " # of rules populated : {}", rulePopulator.getCounter());
Saurav Das62ae6792017-05-15 15:34:25 -0700627 return;
sanghofb7c7292015-04-13 15:15:58 -0700628 } else {
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700629 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is ABORTED");
sanghofb7c7292015-04-13 15:15:58 -0700630 populationStatus = Status.ABORTED;
Saurav Das261c3002017-06-13 15:35:54 -0700631 log.warn("Failed to repopulate the rules for link status change.");
Saurav Das62ae6792017-05-15 15:34:25 -0700632 return;
sanghofb7c7292015-04-13 15:15:58 -0700633 }
pierdebd15c2019-04-19 20:55:53 +0200634 } catch (Exception e) {
635 log.error("populateRoutingRulesForLinkStatusChange thrown an exception: {}",
636 e.getMessage(), e);
637 populationStatus = Status.ABORTED;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900638 } finally {
639 statusLock.unlock();
sanghofb7c7292015-04-13 15:15:58 -0700640 }
641 }
642
Saurav Das62ae6792017-05-15 15:34:25 -0700643 /**
Saurav Das261c3002017-06-13 15:35:54 -0700644 * Processes a set a route-path changes by reprogramming routing rules and
645 * creating new hash-groups or editing them if necessary. This method also
646 * determines the next-hops for the route-path from the src-switch (target)
647 * of the path towards the dst-switch of the path.
Saurav Das62ae6792017-05-15 15:34:25 -0700648 *
Saurav Das261c3002017-06-13 15:35:54 -0700649 * @param routeChanges a set of route-path changes, where each route-path is
650 * a list with its first element the src-switch (target)
651 * of the path, and the second element the dst-switch of
652 * the path.
653 * @param edgePairs a set of edge-switches that are paired by configuration
654 * @param subnets a set of prefixes that need to be populated in the routing
655 * table of the target switch in the route-path. Can be null,
656 * in which case all the prefixes belonging to the dst-switch
657 * will be populated in the target switch
658 * @return true if successful in repopulating all routes
Saurav Das62ae6792017-05-15 15:34:25 -0700659 */
Saurav Das261c3002017-06-13 15:35:54 -0700660 private boolean redoRouting(Set<ArrayList<DeviceId>> routeChanges,
661 Set<EdgePair> edgePairs, Set<IpPrefix> subnets) {
662 // first make every entry two-elements
pier572d4a92019-04-25 18:51:51 +0200663 Set<ArrayList<DeviceId>> changedRoutes = getExpandedRoutes(routeChanges);
664 // no valid routes - fail fast
665 if (changedRoutes.isEmpty()) {
666 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700667 }
668
pierf331a492020-01-07 15:39:39 +0100669 // Temporary stores the changed routes
670 Set<ArrayList<DeviceId>> tempRoutes = ImmutableSet.copyOf(changedRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700671 // now process changedRoutes according to edgePairs
672 if (!redoRoutingEdgePairs(edgePairs, subnets, changedRoutes)) {
673 return false; //abort routing and fail fast
674 }
pierf331a492020-01-07 15:39:39 +0100675 // Calculate the programmed routes pointing to the pairs
676 Set<ArrayList<DeviceId>> programmedPairRoutes = Sets.difference(tempRoutes, changedRoutes);
677 log.debug("Evaluating programmed pair routes");
678 storeSeenBeforeRoutes(programmedPairRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700679
pierf331a492020-01-07 15:39:39 +0100680 // Temporary stores the left routes
681 tempRoutes = ImmutableSet.copyOf(changedRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700682 // whatever is left in changedRoutes is now processed for individual dsts.
Saurav Das6430f412018-01-25 09:49:01 -0800683 Set<DeviceId> updatedDevices = Sets.newHashSet();
684 if (!redoRoutingIndividualDests(subnets, changedRoutes,
685 updatedDevices)) {
Saurav Das261c3002017-06-13 15:35:54 -0700686 return false; //abort routing and fail fast
687 }
pierf331a492020-01-07 15:39:39 +0100688 // Calculate the individual programmed routes
689 Set<ArrayList<DeviceId>> programmedIndividualRoutes = Sets.difference(tempRoutes, changedRoutes);
690 log.debug("Evaluating individual programmed routes");
691 storeSeenBeforeRoutes(programmedIndividualRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700692
Saurav Das261c3002017-06-13 15:35:54 -0700693 // update ecmpSPG for all edge-pairs
694 for (EdgePair ep : edgePairs) {
695 currentEcmpSpgMap.put(ep.dev1, updatedEcmpSpgMap.get(ep.dev1));
696 currentEcmpSpgMap.put(ep.dev2, updatedEcmpSpgMap.get(ep.dev2));
697 log.debug("Updating ECMPspg for edge-pair:{}-{}", ep.dev1, ep.dev2);
698 }
Saurav Das6430f412018-01-25 09:49:01 -0800699
700 // here is where we update all devices not touched by this instance
701 updatedEcmpSpgMap.keySet().stream()
702 .filter(devId -> !edgePairs.stream().anyMatch(ep -> ep.includes(devId)))
703 .filter(devId -> !updatedDevices.contains(devId))
704 .forEach(devId -> {
705 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
706 log.debug("Updating ECMPspg for remaining dev:{}", devId);
707 });
Saurav Das261c3002017-06-13 15:35:54 -0700708 return true;
709 }
710
711 /**
pierf331a492020-01-07 15:39:39 +0100712 * Stores the routes seen before. Routes are two-elements arrays.
713 * @param seenRoutes seen before routes
714 */
715 private void storeSeenBeforeRoutes(Set<ArrayList<DeviceId>> seenRoutes) {
716 Set<DeviceId> nextHops;
717 for (ArrayList<DeviceId> route : seenRoutes) {
718 log.debug("Route {} -> {} has been programmed", route.get(0), route.get(1));
719 nextHops = getNextHops(route.get(0), route.get(1));
720 // No valid next hops - cannot be considered a programmed route
721 if (nextHops.isEmpty()) {
722 log.debug("Could not find next hop from target:{} --> dst {} "
723 + "skipping this route", route.get(0), route.get(1));
724 continue;
725 }
726 // Already present - do not add again
727 if (seenBeforeRoutes.containsEntry(route.get(1), route.get(0))) {
728 log.debug("Route from target:{} --> dst {} " +
729 "already present, skipping this route", route.get(0), route.get(1));
730 continue;
731 }
732 seenBeforeRoutes.put(route.get(1), route.get(0));
733 }
734 }
735
736 /**
Saurav Das261c3002017-06-13 15:35:54 -0700737 * Programs targetSw in the changedRoutes for given prefixes reachable by
738 * an edgePair. If no prefixes are given, the method will use configured
739 * subnets/prefixes. If some configured subnets belong only to a specific
740 * destination in the edgePair, then the target switch will be programmed
741 * only to that destination.
742 *
743 * @param edgePairs set of edge-pairs for which target will be programmed
744 * @param subnets a set of prefixes that need to be populated in the routing
745 * table of the target switch in the changedRoutes. Can be null,
746 * in which case all the configured prefixes belonging to the
747 * paired switches will be populated in the target switch
748 * @param changedRoutes a set of route-path changes, where each route-path is
749 * a list with its first element the src-switch (target)
750 * of the path, and the second element the dst-switch of
751 * the path.
752 * @return true if successful
753 */
piera9941192019-04-24 16:12:47 +0200754 private boolean redoRoutingEdgePairs(Set<EdgePair> edgePairs, Set<IpPrefix> subnets,
755 Set<ArrayList<DeviceId>> changedRoutes) {
Saurav Das261c3002017-06-13 15:35:54 -0700756 for (EdgePair ep : edgePairs) {
757 // temp store for a target's changedRoutes to this edge-pair
758 Map<DeviceId, Set<ArrayList<DeviceId>>> targetRoutes = new HashMap<>();
759 Iterator<ArrayList<DeviceId>> i = changedRoutes.iterator();
760 while (i.hasNext()) {
761 ArrayList<DeviceId> route = i.next();
762 DeviceId dstSw = route.get(1);
763 if (ep.includes(dstSw)) {
764 // routeChange for edge pair found
765 // sort by target iff target is edge and remove from changedRoutes
766 DeviceId targetSw = route.get(0);
767 try {
768 if (!srManager.deviceConfiguration.isEdgeDevice(targetSw)) {
769 continue;
770 }
771 } catch (DeviceConfigNotFoundException e) {
772 log.warn(e.getMessage() + "aborting redoRouting");
773 return false;
774 }
775 // route is from another edge to this edge-pair
776 if (targetRoutes.containsKey(targetSw)) {
777 targetRoutes.get(targetSw).add(route);
778 } else {
779 Set<ArrayList<DeviceId>> temp = new HashSet<>();
780 temp.add(route);
781 targetRoutes.put(targetSw, temp);
782 }
783 i.remove();
784 }
785 }
786 // so now for this edgepair we have a per target set of routechanges
787 // process target->edgePair route
piera9941192019-04-24 16:12:47 +0200788 List<Future<Boolean>> futures = Lists.newArrayList();
pierf331a492020-01-07 15:39:39 +0100789 for (Entry<DeviceId, Set<ArrayList<DeviceId>>> entry :
Saurav Das261c3002017-06-13 15:35:54 -0700790 targetRoutes.entrySet()) {
791 log.debug("* redoRoutingDstPair Target:{} -> edge-pair {}",
792 entry.getKey(), ep);
piera9941192019-04-24 16:12:47 +0200793 futures.add(routePopulators.submit(new RedoRoutingEdgePair(entry.getKey(), entry.getValue(),
794 subnets, ep)));
795 }
796 if (!checkJobs(futures)) {
797 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700798 }
799 // if it gets here it has succeeded for all targets to this edge-pair
800 }
801 return true;
802 }
803
piera9941192019-04-24 16:12:47 +0200804 private final class RedoRoutingEdgePair implements PickyCallable<Boolean> {
805 private DeviceId targetSw;
806 private Set<ArrayList<DeviceId>> routes;
807 private Set<IpPrefix> subnets;
808 private EdgePair ep;
809
810 /**
811 * Builds a RedoRoutingEdgePair task which provides a result.
812 *
813 * @param targetSw the target switch
814 * @param routes the changed routes
815 * @param subnets the subnets
816 * @param ep the edge pair
817 */
818 RedoRoutingEdgePair(DeviceId targetSw, Set<ArrayList<DeviceId>> routes,
819 Set<IpPrefix> subnets, EdgePair ep) {
820 this.targetSw = targetSw;
821 this.routes = routes;
822 this.subnets = subnets;
823 this.ep = ep;
824 }
825
826 @Override
827 public Boolean call() throws Exception {
828 return redoRoutingEdgePair();
829 }
830
831 @Override
832 public int hint() {
833 return targetSw.hashCode();
834 }
835
836 private boolean redoRoutingEdgePair() {
837 Map<DeviceId, Set<DeviceId>> perDstNextHops = new HashMap<>();
838 routes.forEach(route -> {
839 Set<DeviceId> nhops = getNextHops(route.get(0), route.get(1));
840 log.debug("route: target {} -> dst {} found with next-hops {}",
841 route.get(0), route.get(1), nhops);
842 perDstNextHops.put(route.get(1), nhops);
843 });
844
845 List<Set<IpPrefix>> batchedSubnetDev1, batchedSubnetDev2;
846 if (subnets != null) {
847 batchedSubnetDev1 = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
848 batchedSubnetDev2 = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
849 } else {
850 batchedSubnetDev1 = config.getBatchedSubnets(ep.dev1);
851 batchedSubnetDev2 = config.getBatchedSubnets(ep.dev2);
852 }
853 List<Set<IpPrefix>> batchedSubnetBoth = Streams
854 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.intersection(a, b))
855 .filter(set -> !set.isEmpty())
856 .collect(Collectors.toList());
857 List<Set<IpPrefix>> batchedSubnetDev1Only = Streams
858 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.difference(a, b))
859 .filter(set -> !set.isEmpty())
860 .collect(Collectors.toList());
861 List<Set<IpPrefix>> batchedSubnetDev2Only = Streams
862 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.difference(b, a))
863 .filter(set -> !set.isEmpty())
864 .collect(Collectors.toList());
865
866 Set<DeviceId> nhDev1 = perDstNextHops.get(ep.dev1);
867 Set<DeviceId> nhDev2 = perDstNextHops.get(ep.dev2);
868
869 // handle routing to subnets common to edge-pair
870 // only if the targetSw is not part of the edge-pair and there
871 // exists a next hop to at least one of the devices in the edge-pair
872 if (!ep.includes(targetSw)
873 && ((nhDev1 != null && !nhDev1.isEmpty()) || (nhDev2 != null && !nhDev2.isEmpty()))) {
874 log.trace("getSubnets on both {} and {}: {}", ep.dev1, ep.dev2, batchedSubnetBoth);
875 for (Set<IpPrefix> prefixes : batchedSubnetBoth) {
876 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev1, ep.dev2,
877 perDstNextHops, prefixes)) {
878 return false; // abort everything and fail fast
879 }
880 }
881
882 }
883 // handle routing to subnets that only belong to dev1 only if
884 // a next-hop exists from the target to dev1
885 if (!batchedSubnetDev1Only.isEmpty() &&
886 batchedSubnetDev1Only.stream().anyMatch(subnet -> !subnet.isEmpty()) &&
887 nhDev1 != null && !nhDev1.isEmpty()) {
888 Map<DeviceId, Set<DeviceId>> onlyDev1NextHops = new HashMap<>();
889 onlyDev1NextHops.put(ep.dev1, nhDev1);
890 log.trace("getSubnets on {} only: {}", ep.dev1, batchedSubnetDev1Only);
891 for (Set<IpPrefix> prefixes : batchedSubnetDev1Only) {
892 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev1, null,
893 onlyDev1NextHops, prefixes)) {
894 return false; // abort everything and fail fast
895 }
896 }
897 }
898 // handle routing to subnets that only belong to dev2 only if
899 // a next-hop exists from the target to dev2
900 if (!batchedSubnetDev2Only.isEmpty() &&
901 batchedSubnetDev2Only.stream().anyMatch(subnet -> !subnet.isEmpty()) &&
902 nhDev2 != null && !nhDev2.isEmpty()) {
903 Map<DeviceId, Set<DeviceId>> onlyDev2NextHops = new HashMap<>();
904 onlyDev2NextHops.put(ep.dev2, nhDev2);
905 log.trace("getSubnets on {} only: {}", ep.dev2, batchedSubnetDev2Only);
906 for (Set<IpPrefix> prefixes : batchedSubnetDev2Only) {
907 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev2, null,
908 onlyDev2NextHops, prefixes)) {
909 return false; // abort everything and fail fast
910 }
911 }
912 }
913 return true;
914 }
915 }
916
Saurav Das261c3002017-06-13 15:35:54 -0700917 /**
918 * Programs targetSw in the changedRoutes for given prefixes reachable by
919 * a destination switch that is not part of an edge-pair.
920 * If no prefixes are given, the method will use configured subnets/prefixes.
921 *
922 * @param subnets a set of prefixes that need to be populated in the routing
923 * table of the target switch in the changedRoutes. Can be null,
924 * in which case all the configured prefixes belonging to the
925 * paired switches will be populated in the target switch
926 * @param changedRoutes a set of route-path changes, where each route-path is
927 * a list with its first element the src-switch (target)
928 * of the path, and the second element the dst-switch of
929 * the path.
930 * @return true if successful
931 */
piera9941192019-04-24 16:12:47 +0200932 private boolean redoRoutingIndividualDests(Set<IpPrefix> subnets, Set<ArrayList<DeviceId>> changedRoutes,
Saurav Das6430f412018-01-25 09:49:01 -0800933 Set<DeviceId> updatedDevices) {
Saurav Das261c3002017-06-13 15:35:54 -0700934 // aggregate route-path changes for each dst device
935 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> routesBydevice =
936 new HashMap<>();
937 for (ArrayList<DeviceId> route: changedRoutes) {
938 DeviceId dstSw = route.get(1);
939 ArrayList<ArrayList<DeviceId>> deviceRoutes =
940 routesBydevice.get(dstSw);
941 if (deviceRoutes == null) {
942 deviceRoutes = new ArrayList<>();
943 routesBydevice.put(dstSw, deviceRoutes);
944 }
945 deviceRoutes.add(route);
946 }
piera9941192019-04-24 16:12:47 +0200947 // iterate over the impacted devices
Saurav Das261c3002017-06-13 15:35:54 -0700948 for (DeviceId impactedDstDevice : routesBydevice.keySet()) {
949 ArrayList<ArrayList<DeviceId>> deviceRoutes =
950 routesBydevice.get(impactedDstDevice);
piera9941192019-04-24 16:12:47 +0200951 List<Future<Boolean>> futures = Lists.newArrayList();
Saurav Das261c3002017-06-13 15:35:54 -0700952 for (ArrayList<DeviceId> route: deviceRoutes) {
953 log.debug("* redoRoutingIndiDst Target: {} -> dst: {}",
954 route.get(0), route.get(1));
piera9941192019-04-24 16:12:47 +0200955 futures.add(routePopulators.submit(new RedoRoutingIndividualDest(subnets, route)));
pierf331a492020-01-07 15:39:39 +0100956 changedRoutes.remove(route);
piera9941192019-04-24 16:12:47 +0200957 }
958 // check the execution of each job
959 if (!checkJobs(futures)) {
960 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700961 }
962 //Only if all the flows for all impacted routes to a
963 //specific target are pushed successfully, update the
964 //ECMP graph for that target. Or else the next event
965 //would not see any changes in the ECMP graphs.
966 //In another case, the target switch has gone away, so
967 //routes can't be installed. In that case, the current map
968 //is updated here, without any flows being pushed.
969 currentEcmpSpgMap.put(impactedDstDevice,
970 updatedEcmpSpgMap.get(impactedDstDevice));
Saurav Das6430f412018-01-25 09:49:01 -0800971 updatedDevices.add(impactedDstDevice);
Saurav Das261c3002017-06-13 15:35:54 -0700972 log.debug("Updating ECMPspg for impacted dev:{}", impactedDstDevice);
973 }
974 return true;
975 }
976
piera9941192019-04-24 16:12:47 +0200977 private final class RedoRoutingIndividualDest implements PickyCallable<Boolean> {
978 private DeviceId targetSw;
979 private ArrayList<DeviceId> route;
980 private Set<IpPrefix> subnets;
981
982 /**
983 * Builds a RedoRoutingIndividualDest task, which provides a result.
984 *
985 * @param subnets a set of prefixes
986 * @param route a route-path change
987 */
988 RedoRoutingIndividualDest(Set<IpPrefix> subnets, ArrayList<DeviceId> route) {
989 this.targetSw = route.get(0);
990 this.route = route;
991 this.subnets = subnets;
992 }
993
994 @Override
995 public Boolean call() throws Exception {
996 DeviceId dstSw = route.get(1); // same as impactedDstDevice
997 Set<DeviceId> nextHops = getNextHops(targetSw, dstSw);
998 if (nextHops.isEmpty()) {
999 log.debug("Could not find next hop from target:{} --> dst {} "
1000 + "skipping this route", targetSw, dstSw);
1001 return true;
1002 }
1003 Map<DeviceId, Set<DeviceId>> nhops = new HashMap<>();
1004 nhops.put(dstSw, nextHops);
1005 if (!populateEcmpRoutingRulePartial(targetSw, dstSw, null, nhops,
1006 (subnets == null) ? Sets.newHashSet() : subnets)) {
1007 return false; // abort routing and fail fast
1008 }
1009 log.debug("Populating flow rules from target: {} to dst: {}"
1010 + " is successful", targetSw, dstSw);
1011 return true;
1012 }
1013
1014 @Override
1015 public int hint() {
1016 return targetSw.hashCode();
1017 }
1018 }
1019
Saurav Das261c3002017-06-13 15:35:54 -07001020 /**
1021 * Populate ECMP rules for subnets from target to destination via nexthops.
1022 *
1023 * @param targetSw Device ID of target switch in which rules will be programmed
1024 * @param destSw1 Device ID of final destination switch to which the rules will forward
1025 * @param destSw2 Device ID of paired destination switch to which the rules will forward
1026 * A null deviceId indicates packets should only be sent to destSw1
Saurav Das97241862018-02-14 14:14:54 -08001027 * @param nextHops Map of a set of next hops per destSw
Saurav Das261c3002017-06-13 15:35:54 -07001028 * @param subnets Subnets to be populated. If empty, populate all configured subnets.
1029 * @return true if it succeeds in populating rules
1030 */ // refactor
piera9941192019-04-24 16:12:47 +02001031 private boolean populateEcmpRoutingRulePartial(DeviceId targetSw, DeviceId destSw1, DeviceId destSw2,
1032 Map<DeviceId, Set<DeviceId>> nextHops, Set<IpPrefix> subnets) {
Saurav Das261c3002017-06-13 15:35:54 -07001033 boolean result;
1034 // If both target switch and dest switch are edge routers, then set IP
1035 // rule for both subnet and router IP.
1036 boolean targetIsEdge;
1037 boolean dest1IsEdge;
1038 Ip4Address dest1RouterIpv4, dest2RouterIpv4 = null;
1039 Ip6Address dest1RouterIpv6, dest2RouterIpv6 = null;
1040
1041 try {
1042 targetIsEdge = config.isEdgeDevice(targetSw);
1043 dest1IsEdge = config.isEdgeDevice(destSw1);
1044 dest1RouterIpv4 = config.getRouterIpv4(destSw1);
1045 dest1RouterIpv6 = config.getRouterIpv6(destSw1);
1046 if (destSw2 != null) {
1047 dest2RouterIpv4 = config.getRouterIpv4(destSw2);
1048 dest2RouterIpv6 = config.getRouterIpv6(destSw2);
1049 }
1050 } catch (DeviceConfigNotFoundException e) {
1051 log.warn(e.getMessage() + " Aborting populateEcmpRoutingRulePartial.");
Saurav Das62ae6792017-05-15 15:34:25 -07001052 return false;
1053 }
Saurav Das261c3002017-06-13 15:35:54 -07001054
Daniele Moroa2aabe22021-06-07 16:28:41 +02001055 //if (targetIsEdge && dest1IsEdge) {
Charles Chan19b70032019-04-17 14:20:26 -07001056 List<Set<IpPrefix>> batchedSubnets;
1057 if (subnets != null && !subnets.isEmpty()) {
1058 batchedSubnets = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
1059 } else {
1060 batchedSubnets = config.getBatchedSubnets(destSw1);
1061 }
Saurav Das97241862018-02-14 14:14:54 -08001062 // XXX - Rethink this - ignoring routerIPs in all other switches
1063 // even edge to edge switches
Saurav Das261c3002017-06-13 15:35:54 -07001064 /*subnets.add(dest1RouterIpv4.toIpPrefix());
1065 if (dest1RouterIpv6 != null) {
1066 subnets.add(dest1RouterIpv6.toIpPrefix());
1067 }
1068 if (destSw2 != null && dest2RouterIpv4 != null) {
1069 subnets.add(dest2RouterIpv4.toIpPrefix());
1070 if (dest2RouterIpv6 != null) {
1071 subnets.add(dest2RouterIpv6.toIpPrefix());
1072 }
1073 }*/
Charles Chan19b70032019-04-17 14:20:26 -07001074 log.trace("getSubnets on {}: {}", destSw1, batchedSubnets);
1075 for (Set<IpPrefix> prefixes : batchedSubnets) {
1076 log.debug(". populateEcmpRoutingRulePartial in device {} towards {} {} "
1077 + "for subnets {}", targetSw, destSw1,
1078 (destSw2 != null) ? ("& " + destSw2) : "",
1079 prefixes);
1080 if (!rulePopulator.populateIpRuleForSubnet(targetSw, prefixes, destSw1, destSw2, nextHops)) {
1081 return false;
1082 }
Saurav Das261c3002017-06-13 15:35:54 -07001083 }
Daniele Moroa2aabe22021-06-07 16:28:41 +02001084 //}
Saurav Das261c3002017-06-13 15:35:54 -07001085
Daniele Moroa2aabe22021-06-07 16:28:41 +02001086// if (!targetIsEdge && dest1IsEdge) {
1087// // MPLS rules in all non-edge target devices. These rules are for
1088// // individual destinations, even if the dsts are part of edge-pairs.
1089// log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
1090// + "all MPLS rules", targetSw, destSw1);
1091// result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1), dest1RouterIpv4);
1092// if (!result) {
1093// return false;
1094// }
1095// if (dest1RouterIpv6 != null) {
1096// int v4sid = 0, v6sid = 0;
1097// try {
1098// v4sid = config.getIPv4SegmentId(destSw1);
1099// v6sid = config.getIPv6SegmentId(destSw1);
1100// } catch (DeviceConfigNotFoundException e) {
1101// log.warn(e.getMessage());
1102// }
1103// if (v4sid != v6sid) {
1104// result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1),
1105// dest1RouterIpv6);
1106// if (!result) {
1107// return false;
1108// }
1109// }
1110// }
1111// }
1112//
1113// if (!targetIsEdge && !dest1IsEdge) {
1114// // MPLS rules for inter-connected spines
1115// // can be merged with above if, left it here for clarity
1116// log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
1117// + "all MPLS rules", targetSw, destSw1);
1118//
1119// result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1), dest1RouterIpv4);
1120// if (!result) {
1121// return false;
1122// }
1123//
1124// if (dest1RouterIpv6 != null) {
1125// int v4sid = 0, v6sid = 0;
1126// try {
1127// v4sid = config.getIPv4SegmentId(destSw1);
1128// v6sid = config.getIPv6SegmentId(destSw1);
1129// } catch (DeviceConfigNotFoundException e) {
1130// log.warn(e.getMessage());
1131// }
1132// if (v4sid != v6sid) {
1133// result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1),
1134// dest1RouterIpv6);
1135// if (!result) {
1136// return false;
1137// }
1138// }
1139// }
1140// }
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -07001141
Saurav Das261c3002017-06-13 15:35:54 -07001142 // To save on ECMP groups
1143 // avoid MPLS rules in non-edge-devices to non-edge-devices
1144 // avoid MPLS transit rules in edge-devices
1145 // avoid loopback IP rules in edge-devices to non-edge-devices
1146 return true;
Saurav Das62ae6792017-05-15 15:34:25 -07001147 }
1148
1149 /**
pier572d4a92019-04-25 18:51:51 +02001150 * Processes a set a route-path changes due to a switch/link failure by editing hash groups.
Saurav Das62ae6792017-05-15 15:34:25 -07001151 *
1152 * @param routeChanges a set of route-path changes, where each route-path is
1153 * a list with its first element the src-switch of the path
1154 * and the second element the dst-switch of the path.
Saurav Das62ae6792017-05-15 15:34:25 -07001155 * @param failedSwitch the switchId if the route changes are for a failed switch,
1156 * otherwise null
1157 */
pier572d4a92019-04-25 18:51:51 +02001158 private void processHashGroupChangeForFailure(Set<ArrayList<DeviceId>> routeChanges,
1159 DeviceId failedSwitch) {
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001160 // first, ensure each routeChanges entry has two elements
pier572d4a92019-04-25 18:51:51 +02001161 Set<ArrayList<DeviceId>> changedRoutes = getAllExpandedRoutes(routeChanges);
Saurav Das6430f412018-01-25 09:49:01 -08001162 boolean someFailed = false;
pier572d4a92019-04-25 18:51:51 +02001163 boolean success;
Saurav Das6430f412018-01-25 09:49:01 -08001164 Set<DeviceId> updatedDevices = Sets.newHashSet();
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001165 for (ArrayList<DeviceId> route : changedRoutes) {
1166 DeviceId targetSw = route.get(0);
1167 DeviceId dstSw = route.get(1);
pier572d4a92019-04-25 18:51:51 +02001168 success = fixHashGroupsForRoute(route, true);
1169 // it's possible that we cannot fix hash groups for a route
1170 // if the target switch has failed. Nevertheless the ecmp graph
1171 // for the impacted switch must still be updated.
1172 if (!success && failedSwitch != null && targetSw.equals(failedSwitch)) {
1173 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1174 currentEcmpSpgMap.remove(targetSw);
1175 log.debug("Updating ECMPspg for dst:{} removing failed switch "
1176 + "target:{}", dstSw, targetSw);
1177 updatedDevices.add(targetSw);
1178 updatedDevices.add(dstSw);
1179 continue;
pierf331a492020-01-07 15:39:39 +01001180
pier572d4a92019-04-25 18:51:51 +02001181 }
1182 //linkfailed - update both sides
1183 if (success) {
1184 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1185 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1186 log.debug("Updating ECMPspg for dst:{} and target:{} for linkdown"
1187 + " or switchdown", dstSw, targetSw);
1188 updatedDevices.add(targetSw);
1189 updatedDevices.add(dstSw);
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001190 } else {
pier572d4a92019-04-25 18:51:51 +02001191 someFailed = true;
Saurav Das62ae6792017-05-15 15:34:25 -07001192 }
1193 }
Saurav Das6430f412018-01-25 09:49:01 -08001194 if (!someFailed) {
1195 // here is where we update all devices not touched by this instance
1196 updatedEcmpSpgMap.keySet().stream()
1197 .filter(devId -> !updatedDevices.contains(devId))
1198 .forEach(devId -> {
1199 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1200 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1201 });
1202 }
Saurav Das62ae6792017-05-15 15:34:25 -07001203 }
1204
1205 /**
pier572d4a92019-04-25 18:51:51 +02001206 * Processes a set a route-path changes due to link up by editing hash groups.
1207 *
1208 * @param routeChanges a set of route-path changes, where each route-path is
1209 * a list with its first element the src-switch of the path
1210 * and the second element the dst-switch of the path.
1211 * @return set of changed routes
1212 */
1213 private Set<ArrayList<DeviceId>> processHashGroupChangeForLinkUp(Set<ArrayList<DeviceId>> routeChanges) {
1214 // Stores changed routes
1215 Set<ArrayList<DeviceId>> doneRoutes = new HashSet<>();
1216 // first, ensure each routeChanges entry has two elements
1217 Set<ArrayList<DeviceId>> changedRoutes = getAllExpandedRoutes(routeChanges);
1218 boolean someFailed = false;
1219 boolean success;
1220 Set<DeviceId> updatedDevices = Sets.newHashSet();
1221 for (ArrayList<DeviceId> route : changedRoutes) {
1222 DeviceId targetSw = route.get(0);
1223 DeviceId dstSw = route.get(1);
1224 // linkup - fix (if possible)
1225 success = fixHashGroupsForRoute(route, false);
1226 if (success) {
1227 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1228 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1229 log.debug("Updating ECMPspg for target:{} and dst:{} for linkup",
1230 targetSw, dstSw);
1231 updatedDevices.add(targetSw);
1232 updatedDevices.add(dstSw);
1233 doneRoutes.add(route);
1234 } else {
1235 someFailed = true;
1236 }
1237
1238 }
1239 if (!someFailed) {
1240 // here is where we update all devices not touched by this instance
1241 updatedEcmpSpgMap.keySet().stream()
1242 .filter(devId -> !updatedDevices.contains(devId))
1243 .forEach(devId -> {
1244 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1245 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1246 });
1247 }
1248 return doneRoutes;
1249 }
1250
1251 /**
Saurav Das62ae6792017-05-15 15:34:25 -07001252 * Edits hash groups in the src-switch (targetSw) of a route-path by
1253 * calling the groupHandler to either add or remove buckets in an existing
1254 * hash group.
1255 *
1256 * @param route a single list representing a route-path where the first element
1257 * is the src-switch (targetSw) of the route-path and the
1258 * second element is the dst-switch
1259 * @param revoke true if buckets in the hash-groups need to be removed;
1260 * false if buckets in the hash-groups need to be added
1261 * @return true if the hash group editing is successful
1262 */
1263 private boolean fixHashGroupsForRoute(ArrayList<DeviceId> route,
1264 boolean revoke) {
1265 DeviceId targetSw = route.get(0);
1266 if (route.size() < 2) {
1267 log.warn("Cannot fixHashGroupsForRoute - no dstSw in route {}", route);
1268 return false;
1269 }
1270 DeviceId destSw = route.get(1);
pierf331a492020-01-07 15:39:39 +01001271 if (!seenBeforeRoutes.containsEntry(destSw, targetSw)) {
1272 log.warn("Cannot fixHashGroupsForRoute {} -> {} has not been programmed before",
1273 targetSw, destSw);
1274 return false;
1275 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001276 log.debug("* processing fixHashGroupsForRoute: Target {} -> Dest {}",
Saurav Das62ae6792017-05-15 15:34:25 -07001277 targetSw, destSw);
Saurav Das62ae6792017-05-15 15:34:25 -07001278 // figure out the new next hops at the targetSw towards the destSw
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001279 Set<DeviceId> nextHops = getNextHops(targetSw, destSw);
Saurav Das62ae6792017-05-15 15:34:25 -07001280 // call group handler to change hash group at targetSw
1281 DefaultGroupHandler grpHandler = srManager.getGroupHandler(targetSw);
1282 if (grpHandler == null) {
1283 log.warn("Cannot find grouphandler for dev:{} .. aborting"
1284 + " {} hash group buckets for route:{} ", targetSw,
1285 (revoke) ? "revoke" : "repopulate", route);
1286 return false;
1287 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001288 log.debug("{} hash-groups buckets For Route {} -> {} to new next-hops {}",
Saurav Das62ae6792017-05-15 15:34:25 -07001289 (revoke) ? "revoke" : "repopulating",
1290 targetSw, destSw, nextHops);
1291 return (revoke) ? grpHandler.fixHashGroups(targetSw, nextHops,
1292 destSw, true)
1293 : grpHandler.fixHashGroups(targetSw, nextHops,
1294 destSw, false);
1295 }
1296
1297 /**
Saurav Das261c3002017-06-13 15:35:54 -07001298 * Start the flow rule population process if it was never started. The
1299 * process finishes successfully when all flow rules are set and stops with
1300 * ABORTED status when any groups required for flows is not set yet.
Saurav Das62ae6792017-05-15 15:34:25 -07001301 */
Saurav Das261c3002017-06-13 15:35:54 -07001302 public void startPopulationProcess() {
1303 statusLock.lock();
1304 try {
1305 if (populationStatus == Status.IDLE
1306 || populationStatus == Status.SUCCEEDED
1307 || populationStatus == Status.ABORTED) {
1308 populateAllRoutingRules();
sangho28d0b6d2015-05-07 13:30:57 -07001309 } else {
Saurav Das261c3002017-06-13 15:35:54 -07001310 log.warn("Not initiating startPopulationProcess as populationStatus is {}",
1311 populationStatus);
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001312 }
Saurav Das261c3002017-06-13 15:35:54 -07001313 } finally {
1314 statusLock.unlock();
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001315 }
sanghofb7c7292015-04-13 15:15:58 -07001316 }
1317
Saurav Dasb149be12016-06-07 10:08:06 -07001318 /**
pierventre0dcbf0e2021-10-11 13:07:09 +02001319 * Revoke rules of given subnet in all edge switches. Use the
1320 * destination switch (if it is provided) to provide coordination
1321 * among the instances. Otherwise, only the leader of the target
1322 * switch can remove this subnet.
Saurav Das261c3002017-06-13 15:35:54 -07001323 *
1324 * @param subnets subnet being removed
pierventre0dcbf0e2021-10-11 13:07:09 +02001325 * @param destSw destination switch. It is null when it is called from RouteHandler,
1326 * in this context we don't have a way to remember the old locations.
Saurav Das261c3002017-06-13 15:35:54 -07001327 * @return true if succeed
1328 */
pierventre0dcbf0e2021-10-11 13:07:09 +02001329 protected boolean revokeSubnet(Set<IpPrefix> subnets, DeviceId destSw) {
piera9941192019-04-24 16:12:47 +02001330 DeviceId targetSw;
1331 List<Future<Boolean>> futures = Lists.newArrayList();
1332 for (Device sw : srManager.deviceService.getAvailableDevices()) {
1333 targetSw = sw.id();
pierventre0dcbf0e2021-10-11 13:07:09 +02001334 // In some calls, we dont know anymore the destination switch
1335 if ((destSw != null && shouldProgram(destSw)) || shouldProgram(targetSw)) {
piera9941192019-04-24 16:12:47 +02001336 futures.add(routePopulators.submit(new RevokeSubnet(targetSw, subnets)));
1337 } else {
1338 futures.add(CompletableFuture.completedFuture(true));
1339 }
1340 }
1341 // check the execution of each job
1342 return checkJobs(futures);
1343 }
1344
Shibu Vijayakumar5e26f8c2020-01-07 11:45:09 +00001345 /**
1346 * Revoke rules of given subnets in the given switches.
1347 *
1348 * @param targetSwitches switched from which subnets to be removed
1349 * @param subnets subnet bring removed
1350 * @return true if succeed
1351 */
1352 protected boolean revokeSubnet(Set<DeviceId> targetSwitches, Set<IpPrefix> subnets) {
1353 List<Future<Boolean>> futures = Lists.newArrayList();
1354 for (DeviceId targetSw : targetSwitches) {
1355 if (shouldProgram(targetSw)) {
1356 futures.add(routePopulators.submit(new RevokeSubnet(targetSw, subnets)));
1357 } else {
1358 futures.add(CompletableFuture.completedFuture(true));
1359 }
1360 }
1361 // check the execution of each job
1362 return checkJobs(futures);
1363 }
1364
piera9941192019-04-24 16:12:47 +02001365 private final class RevokeSubnet implements PickyCallable<Boolean> {
1366 private DeviceId targetSw;
1367 private Set<IpPrefix> subnets;
1368
1369 /**
1370 * Builds a RevokeSubnet task, which provides a result.
1371 *
1372 * @param subnets a set of prefixes
1373 * @param targetSw target switch
1374 */
1375 RevokeSubnet(DeviceId targetSw, Set<IpPrefix> subnets) {
1376 this.targetSw = targetSw;
1377 this.subnets = subnets;
1378 }
1379
1380 @Override
1381 public Boolean call() throws Exception {
1382 return srManager.routingRulePopulator.revokeIpRuleForSubnet(targetSw, subnets);
1383 }
1384
1385 @Override
1386 public int hint() {
1387 return targetSw.hashCode();
Saurav Das261c3002017-06-13 15:35:54 -07001388 }
1389 }
1390
1391 /**
Charles Chan910be6a2017-08-23 14:46:43 -07001392 * Populates IP rules for a route that has direct connection to the switch
pierventre37dcf4c2021-09-16 18:43:06 +02001393 * if the current instance is leading the programming of the switch.
Charles Chan910be6a2017-08-23 14:46:43 -07001394 *
1395 * @param deviceId device ID of the device that next hop attaches to
1396 * @param prefix IP prefix of the route
1397 * @param hostMac MAC address of the next hop
1398 * @param hostVlanId Vlan ID of the nexthop
1399 * @param outPort port where the next hop attaches to
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001400 * @param directHost host is of type direct or indirect
Charles Chan12a8a842020-02-14 13:23:57 -08001401 * @return future that includes the flow objective if succeeded, null if otherwise
Charles Chan910be6a2017-08-23 14:46:43 -07001402 */
Charles Chan12a8a842020-02-14 13:23:57 -08001403 CompletableFuture<Objective> populateRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac,
1404 VlanId hostVlanId, PortNumber outPort, boolean directHost) {
Charles Chand66d6712018-03-29 16:03:41 -07001405 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001406 return srManager.routingRulePopulator.populateRoute(deviceId, prefix,
1407 hostMac, hostVlanId, outPort, directHost);
Charles Chan910be6a2017-08-23 14:46:43 -07001408 }
Charles Chan12a8a842020-02-14 13:23:57 -08001409 return CompletableFuture.completedFuture(null);
Charles Chan910be6a2017-08-23 14:46:43 -07001410 }
1411
1412 /**
1413 * Removes IP rules for a route when the next hop is gone.
pierventre37dcf4c2021-09-16 18:43:06 +02001414 * if the current instance is leading the programming of the switch.
Charles Chan910be6a2017-08-23 14:46:43 -07001415 *
1416 * @param deviceId device ID of the device that next hop attaches to
1417 * @param prefix IP prefix of the route
1418 * @param hostMac MAC address of the next hop
1419 * @param hostVlanId Vlan ID of the nexthop
1420 * @param outPort port that next hop attaches to
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001421 * @param directHost host is of type direct or indirect
Charles Chan12a8a842020-02-14 13:23:57 -08001422 * @return future that carries the flow objective if succeeded, null if otherwise
Charles Chan910be6a2017-08-23 14:46:43 -07001423 */
Charles Chan12a8a842020-02-14 13:23:57 -08001424 CompletableFuture<Objective> revokeRoute(DeviceId deviceId, IpPrefix prefix,
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001425 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort, boolean directHost) {
Charles Chand66d6712018-03-29 16:03:41 -07001426 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001427 return srManager.routingRulePopulator.revokeRoute(deviceId, prefix, hostMac, hostVlanId,
1428 outPort, directHost);
Charles Chan910be6a2017-08-23 14:46:43 -07001429 }
Charles Chan12a8a842020-02-14 13:23:57 -08001430 return CompletableFuture.completedFuture(null);
Charles Chan910be6a2017-08-23 14:46:43 -07001431 }
1432
Charles Chan12a8a842020-02-14 13:23:57 -08001433 CompletableFuture<Objective> populateBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
Charles Chand66d6712018-03-29 16:03:41 -07001434 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001435 return srManager.routingRulePopulator.populateBridging(deviceId, port, mac, vlanId);
Charles Chand66d6712018-03-29 16:03:41 -07001436 }
Charles Chan12a8a842020-02-14 13:23:57 -08001437 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001438 }
1439
Charles Chan12a8a842020-02-14 13:23:57 -08001440 CompletableFuture<Objective> revokeBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
Charles Chand66d6712018-03-29 16:03:41 -07001441 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001442 return srManager.routingRulePopulator.revokeBridging(deviceId, port, mac, vlanId);
Charles Chand66d6712018-03-29 16:03:41 -07001443 }
Charles Chan12a8a842020-02-14 13:23:57 -08001444 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001445 }
1446
pierventrea3989be2021-01-08 16:43:17 +01001447 CompletableFuture<Objective> updateBridging(DeviceId deviceId, PortNumber portNum, MacAddress hostMac,
1448 VlanId vlanId, boolean popVlan, boolean install) {
Charles Chand66d6712018-03-29 16:03:41 -07001449 if (shouldProgram(deviceId)) {
pierventrea3989be2021-01-08 16:43:17 +01001450 return srManager.routingRulePopulator.updateBridging(deviceId, portNum, hostMac, vlanId, popVlan, install);
Charles Chand66d6712018-03-29 16:03:41 -07001451 }
pierventrea3989be2021-01-08 16:43:17 +01001452 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001453 }
1454
pierventrea3989be2021-01-08 16:43:17 +01001455 CompletableFuture<Objective> updateFwdObj(DeviceId deviceId, PortNumber portNumber, IpPrefix prefix,
1456 MacAddress hostMac, VlanId vlanId, boolean popVlan, boolean install) {
Charles Chand66d6712018-03-29 16:03:41 -07001457 if (shouldProgram(deviceId)) {
pierventrea3989be2021-01-08 16:43:17 +01001458 return srManager.routingRulePopulator.updateFwdObj(deviceId, portNumber, prefix, hostMac,
Charles Chand66d6712018-03-29 16:03:41 -07001459 vlanId, popVlan, install);
1460 }
pierventrea3989be2021-01-08 16:43:17 +01001461 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001462 }
1463
Charles Chan910be6a2017-08-23 14:46:43 -07001464 /**
pierventre37dcf4c2021-09-16 18:43:06 +02001465 * Program IP rules for a route when the next hop is double-tagged.
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001466 *
1467 * @param deviceId device ID that next hop attaches to
1468 * @param prefix IP prefix of the route
1469 * @param hostMac MAC address of the next hop
1470 * @param innerVlan Inner Vlan ID of the next hop
1471 * @param outerVlan Outer Vlan ID of the next hop
1472 * @param outerTpid Outer TPID of the next hop
1473 * @param outPort port that the next hop attaches to
pierventre37dcf4c2021-09-16 18:43:06 +02001474 * @param install whether or not install the route
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001475 */
pierventre37dcf4c2021-09-16 18:43:06 +02001476 void programDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
1477 VlanId outerVlan, EthType outerTpid, PortNumber outPort, boolean install) {
1478 if (shouldProgram(deviceId)) {
1479 if (install) {
1480 srManager.routingRulePopulator.populateDoubleTaggedRoute(
1481 deviceId, prefix, hostMac, innerVlan, outerVlan, outerTpid, outPort);
1482 } else {
1483 srManager.routingRulePopulator.revokeDoubleTaggedRoute(
1484 deviceId, prefix, hostMac, innerVlan, outerVlan, outerTpid, outPort);
1485 }
Charles Chan61c086d2019-07-26 17:46:15 -07001486 srManager.routingRulePopulator.processDoubleTaggedFilter(
pierventre37dcf4c2021-09-16 18:43:06 +02001487 deviceId, outPort, outerVlan, innerVlan, install);
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001488 }
1489 }
1490
1491 /**
pierf331a492020-01-07 15:39:39 +01001492 * Purges seen before routes for a given device.
1493 * @param deviceId the device id
1494 */
1495 void purgeSeenBeforeRoutes(DeviceId deviceId) {
1496 log.debug("Purging seen before routes having as target {}", deviceId);
1497 Set<Entry<DeviceId, DeviceId>> routesToPurge = seenBeforeRoutes.stream()
1498 .filter(entry -> entry.getValue().equals(deviceId))
1499 .collect(Collectors.toSet());
1500 routesToPurge.forEach(entry -> seenBeforeRoutes.remove(entry.getKey(), entry.getValue()));
1501 }
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001502
1503 /**
Saurav Das261c3002017-06-13 15:35:54 -07001504 * Remove ECMP graph entry for the given device. Typically called when
1505 * device is no longer available.
1506 *
1507 * @param deviceId the device for which graphs need to be purged
1508 */
Charles Chanfbcb8812018-04-18 18:41:05 -07001509 void purgeEcmpGraph(DeviceId deviceId) {
Saurav Das6430f412018-01-25 09:49:01 -08001510 statusLock.lock();
1511 try {
Saurav Das6430f412018-01-25 09:49:01 -08001512 if (populationStatus == Status.STARTED) {
1513 log.warn("Previous rule population is not finished. Cannot"
1514 + " proceeed with purgeEcmpGraph for {}", deviceId);
1515 return;
1516 }
1517 log.debug("Updating ECMPspg for unavailable dev:{}", deviceId);
1518 currentEcmpSpgMap.remove(deviceId);
1519 if (updatedEcmpSpgMap != null) {
1520 updatedEcmpSpgMap.remove(deviceId);
1521 }
1522 } finally {
1523 statusLock.unlock();
Saurav Das261c3002017-06-13 15:35:54 -07001524 }
1525 }
1526
Saurav Das00e553b2018-04-21 17:19:48 -07001527 /**
1528 * Attempts a full reroute of route-paths if topology has changed relatively
1529 * close to a mastership change event. Does not do a reroute if mastership
1530 * change is due to reasons other than a ONOS cluster event - for example a
1531 * call to balance-masters, or a switch up/down event.
1532 *
1533 * @param devId the device identifier for which mastership has changed
1534 * @param me the mastership event
1535 */
1536 void checkFullRerouteForMasterChange(DeviceId devId, MastershipEvent me) {
1537 // give small delay to absorb mastership events that are caused by
1538 // device that has disconnected from cluster
Saurav Das49368392018-04-23 18:42:12 -07001539 executorServiceMstChg.schedule(new MasterChange(devId, me),
1540 MASTER_CHANGE_DELAY, TimeUnit.MILLISECONDS);
Saurav Das00e553b2018-04-21 17:19:48 -07001541 }
1542
pierventre37dcf4c2021-09-16 18:43:06 +02001543 /*
1544 * Even though the current implementation does not heavily rely
1545 * on mastership, we keep using the mastership and cluster events
1546 * as heuristic to perform full reroutes and to make sure we don't
1547 * lose any event when instances fail.
1548 */
Saurav Das00e553b2018-04-21 17:19:48 -07001549 protected final class MasterChange implements Runnable {
1550 private DeviceId devId;
1551 private MastershipEvent me;
1552 private static final long CLUSTER_EVENT_THRESHOLD = 4500; // ms
1553 private static final long DEVICE_EVENT_THRESHOLD = 2000; // ms
Saurav Dasec683dc2018-04-27 18:42:30 -07001554 private static final long EDGE_PORT_EVENT_THRESHOLD = 10000; //ms
Saurav Das68e1b6a2018-06-11 17:02:31 -07001555 private static final long FULL_REROUTE_THRESHOLD = 10000; // ms
Saurav Das00e553b2018-04-21 17:19:48 -07001556
1557 MasterChange(DeviceId devId, MastershipEvent me) {
1558 this.devId = devId;
1559 this.me = me;
1560 }
1561
1562 @Override
1563 public void run() {
1564 long lce = srManager.clusterListener.timeSinceLastClusterEvent();
1565 boolean clusterEvent = lce < CLUSTER_EVENT_THRESHOLD;
1566
1567 // ignore event for lost switch if cluster event hasn't happened -
1568 // device down event will handle it
1569 if ((me.roleInfo().master() == null
1570 || !srManager.deviceService.isAvailable(devId))
1571 && !clusterEvent) {
1572 log.debug("Full reroute not required for lost device: {}/{} "
1573 + "clusterEvent/timeSince: {}/{}",
1574 devId, me.roleInfo(), clusterEvent, lce);
1575 return;
1576 }
1577
1578 long update = srManager.deviceService.getLastUpdatedInstant(devId);
1579 long lde = Instant.now().toEpochMilli() - update;
1580 boolean deviceEvent = lde < DEVICE_EVENT_THRESHOLD;
1581
1582 // ignore event for recently connected switch if cluster event hasn't
1583 // happened - link up events will handle it
1584 if (srManager.deviceService.isAvailable(devId) && deviceEvent
1585 && !clusterEvent) {
1586 log.debug("Full reroute not required for recently available"
1587 + " device: {}/{} deviceEvent/timeSince: {}/{} "
1588 + "clusterEvent/timeSince: {}/{}",
1589 devId, me.roleInfo(), deviceEvent, lde, clusterEvent, lce);
1590 return;
1591 }
1592
Saurav Dasec683dc2018-04-27 18:42:30 -07001593 long lepe = Instant.now().toEpochMilli()
1594 - srManager.lastEdgePortEvent.toEpochMilli();
1595 boolean edgePortEvent = lepe < EDGE_PORT_EVENT_THRESHOLD;
1596
Saurav Das00e553b2018-04-21 17:19:48 -07001597 // if it gets here, then mastership change is likely due to onos
1598 // instance failure, or network partition in onos cluster
1599 // normally a mastership change like this does not require re-programming
1600 // but if topology changes happen at the same time then we may miss events
1601 if (!isRoutingStable() && clusterEvent) {
Saurav Dasec683dc2018-04-27 18:42:30 -07001602 log.warn("Mastership changed for dev: {}/{} while programming route-paths "
Saurav Das00e553b2018-04-21 17:19:48 -07001603 + "due to clusterEvent {} ms ago .. attempting full reroute",
1604 devId, me.roleInfo(), lce);
pierventre37dcf4c2021-09-16 18:43:06 +02001605 if (shouldProgram(devId)) {
1606 // old leader could have died when populating filters
Saurav Das00e553b2018-04-21 17:19:48 -07001607 populatePortAddressingRules(devId);
1608 }
pierventre37dcf4c2021-09-16 18:43:06 +02001609 // old leader could have died when creating groups
Saurav Das00e553b2018-04-21 17:19:48 -07001610 // XXX right now we have no fine-grained way to only make changes
Saurav Das68e1b6a2018-06-11 17:02:31 -07001611 // for the route paths affected by this device. Thus we do a
1612 // full reroute after purging all hash groups. We also try to do
1613 // it only once, irrespective of the number of devices
pierventre37dcf4c2021-09-16 18:43:06 +02001614 // that changed mastership when their leader instance died.
Saurav Das68e1b6a2018-06-11 17:02:31 -07001615 long lfrr = Instant.now().toEpochMilli() - lastFullReroute.toEpochMilli();
1616 boolean doFullReroute = lfrr > FULL_REROUTE_THRESHOLD;
1617 if (doFullReroute) {
1618 lastFullReroute = Instant.now();
1619 for (Device dev : srManager.deviceService.getDevices()) {
1620 if (shouldProgram(dev.id())) {
1621 srManager.purgeHashedNextObjectiveStore(dev.id());
pierf331a492020-01-07 15:39:39 +01001622 seenBeforeRoutes.removeAll(dev.id());
Saurav Das68e1b6a2018-06-11 17:02:31 -07001623 }
1624 }
1625 // give small delay to ensure entire store is purged
1626 executorServiceFRR.schedule(new FullRerouteAfterPurge(),
1627 PURGE_DELAY,
1628 TimeUnit.MILLISECONDS);
1629 } else {
1630 log.warn("Full reroute attempted {} ms ago .. skipping", lfrr);
1631 }
Saurav Dasec683dc2018-04-27 18:42:30 -07001632
1633 } else if (edgePortEvent && clusterEvent) {
1634 log.warn("Mastership changed for dev: {}/{} due to clusterEvent {} ms ago "
1635 + "while edge-port event happened {} ms ago "
1636 + " .. reprogramming all edge-ports",
1637 devId, me.roleInfo(), lce, lepe);
1638 if (shouldProgram(devId)) {
1639 srManager.deviceService.getPorts(devId).stream()
1640 .filter(p -> srManager.interfaceService
1641 .isConfigured(new ConnectPoint(devId, p.number())))
1642 .forEach(p -> srManager.processPortUpdated(devId, p));
1643 }
1644
Saurav Das00e553b2018-04-21 17:19:48 -07001645 } else {
1646 log.debug("Stable route-paths .. full reroute not attempted for "
1647 + "mastership change {}/{} deviceEvent/timeSince: {}/{} "
1648 + "clusterEvent/timeSince: {}/{}", devId, me.roleInfo(),
1649 deviceEvent, lde, clusterEvent, lce);
1650 }
1651 }
1652 }
1653
Saurav Das68e1b6a2018-06-11 17:02:31 -07001654 /**
1655 * Performs a full reroute of routing rules in all the switches. Assumes
1656 * caller has purged hash groups from the nextObjective store, otherwise
1657 * re-uses ones available in the store.
1658 */
1659 protected final class FullRerouteAfterPurge implements Runnable {
1660 @Override
1661 public void run() {
1662 populateAllRoutingRules();
1663 }
1664 }
1665
1666
Saurav Das261c3002017-06-13 15:35:54 -07001667 //////////////////////////////////////
1668 // Routing helper methods and classes
1669 //////////////////////////////////////
1670
1671 /**
Saurav Das68e1b6a2018-06-11 17:02:31 -07001672 * Computes set of affected routes due to failed link. Assumes previous ecmp
1673 * shortest-path graph exists for a switch in order to compute affected
1674 * routes. If such a graph does not exist, the method returns null.
Saurav Dasb149be12016-06-07 10:08:06 -07001675 *
1676 * @param linkFail the failed link
1677 * @return the set of affected routes which may be empty if no routes were
Saurav Das68e1b6a2018-06-11 17:02:31 -07001678 * affected
Saurav Dasb149be12016-06-07 10:08:06 -07001679 */
sanghofb7c7292015-04-13 15:15:58 -07001680 private Set<ArrayList<DeviceId>> computeDamagedRoutes(Link linkFail) {
sanghofb7c7292015-04-13 15:15:58 -07001681 Set<ArrayList<DeviceId>> routes = new HashSet<>();
1682
1683 for (Device sw : srManager.deviceService.getDevices()) {
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001684 log.debug("Computing the impacted routes for device {} due to link fail",
1685 sw.id());
Charles Chand66d6712018-03-29 16:03:41 -07001686 if (!shouldProgram(sw.id())) {
Saurav Das00e553b2018-04-21 17:19:48 -07001687 lastProgrammed.remove(sw.id());
sanghofb7c7292015-04-13 15:15:58 -07001688 continue;
1689 }
Charles Chand66d6712018-03-29 16:03:41 -07001690 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
pierventre37dcf4c2021-09-16 18:43:06 +02001691 // check for leadership change since last run
Saurav Das00e553b2018-04-21 17:19:48 -07001692 if (!lastProgrammed.contains(sw.id())) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001693 log.warn("New responsibility for this node to program dev:{}"
Saurav Das00e553b2018-04-21 17:19:48 -07001694 + " ... nuking current ECMPspg", sw.id());
1695 currentEcmpSpgMap.remove(sw.id());
1696 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001697 lastProgrammed.add(sw.id());
1698
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001699 EcmpShortestPathGraph ecmpSpg = currentEcmpSpgMap.get(rootSw);
1700 if (ecmpSpg == null) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001701 log.warn("No existing ECMP graph for switch {}. Assuming "
1702 + "all route-paths have changed towards it.", rootSw);
1703 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
1704 if (targetSw.equals(rootSw)) {
1705 continue;
1706 }
1707 routes.add(Lists.newArrayList(targetSw, rootSw));
1708 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1709 }
1710 continue;
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001711 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001712
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001713 if (log.isDebugEnabled()) {
1714 log.debug("Root switch: {}", rootSw);
1715 log.debug(" Current/Existing SPG: {}", ecmpSpg);
1716 log.debug(" New/Updated SPG: {}", updatedEcmpSpgMap.get(rootSw));
1717 }
1718 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>>
1719 switchVia = ecmpSpg.getAllLearnedSwitchesAndVia();
1720 // figure out if the broken link affected any route-paths in this graph
1721 for (Integer itrIdx : switchVia.keySet()) {
1722 log.trace("Current/Exiting SPG Iterindex# {}", itrIdx);
1723 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1724 switchVia.get(itrIdx);
1725 for (DeviceId targetSw : swViaMap.keySet()) {
1726 log.trace("TargetSwitch {} --> RootSwitch {}",
1727 targetSw, rootSw);
Saurav Dasb149be12016-06-07 10:08:06 -07001728 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1729 log.trace(" Via:");
Pier Ventreadb4ae62016-11-23 09:57:42 -08001730 via.forEach(e -> log.trace(" {}", e));
Saurav Dasb149be12016-06-07 10:08:06 -07001731 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001732 Set<ArrayList<DeviceId>> subLinks =
1733 computeLinks(targetSw, rootSw, swViaMap);
1734 for (ArrayList<DeviceId> alink: subLinks) {
1735 if ((alink.get(0).equals(linkFail.src().deviceId()) &&
1736 alink.get(1).equals(linkFail.dst().deviceId()))
1737 ||
1738 (alink.get(0).equals(linkFail.dst().deviceId()) &&
1739 alink.get(1).equals(linkFail.src().deviceId()))) {
1740 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1741 ArrayList<DeviceId> aRoute = new ArrayList<>();
1742 aRoute.add(targetSw); // switch with rules to populate
1743 aRoute.add(rootSw); // towards this destination
1744 routes.add(aRoute);
1745 break;
1746 }
sanghofb7c7292015-04-13 15:15:58 -07001747 }
1748 }
1749 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001750
sanghofb7c7292015-04-13 15:15:58 -07001751 }
sangho28d0b6d2015-05-07 13:30:57 -07001752
sanghofb7c7292015-04-13 15:15:58 -07001753 }
sanghofb7c7292015-04-13 15:15:58 -07001754 return routes;
1755 }
1756
Saurav Das1b391d52016-11-29 14:27:25 -08001757 /**
1758 * Computes set of affected routes due to new links or failed switches.
1759 *
Saurav Dasdc7f2752018-03-18 21:28:15 -07001760 * @param failedSwitch deviceId of failed switch if any
Saurav Das1b391d52016-11-29 14:27:25 -08001761 * @return the set of affected routes which may be empty if no routes were
1762 * affected
1763 */
Saurav Dascea556f2018-03-05 14:37:16 -08001764 private Set<ArrayList<DeviceId>> computeRouteChange(DeviceId failedSwitch) {
Saurav Das261c3002017-06-13 15:35:54 -07001765 ImmutableSet.Builder<ArrayList<DeviceId>> changedRtBldr =
Saurav Das1b391d52016-11-29 14:27:25 -08001766 ImmutableSet.builder();
sanghofb7c7292015-04-13 15:15:58 -07001767
1768 for (Device sw : srManager.deviceService.getDevices()) {
Saurav Das261c3002017-06-13 15:35:54 -07001769 log.debug("Computing the impacted routes for device {}", sw.id());
Charles Chand66d6712018-03-29 16:03:41 -07001770 if (!shouldProgram(sw.id())) {
Saurav Das00e553b2018-04-21 17:19:48 -07001771 lastProgrammed.remove(sw.id());
sanghofb7c7292015-04-13 15:15:58 -07001772 continue;
1773 }
Charles Chand66d6712018-03-29 16:03:41 -07001774 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
Saurav Das261c3002017-06-13 15:35:54 -07001775 if (log.isTraceEnabled()) {
1776 log.trace("Device links for dev: {}", rootSw);
1777 for (Link link: srManager.linkService.getDeviceLinks(rootSw)) {
1778 log.trace("{} -> {} ", link.src().deviceId(),
1779 link.dst().deviceId());
1780 }
Saurav Dasb149be12016-06-07 10:08:06 -07001781 }
pierventre37dcf4c2021-09-16 18:43:06 +02001782 // check for leadership change since last run
Saurav Das00e553b2018-04-21 17:19:48 -07001783 if (!lastProgrammed.contains(sw.id())) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001784 log.warn("New responsibility for this node to program dev:{}"
Saurav Das00e553b2018-04-21 17:19:48 -07001785 + " ... nuking current ECMPspg", sw.id());
1786 currentEcmpSpgMap.remove(sw.id());
1787 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001788 lastProgrammed.add(sw.id());
Saurav Das261c3002017-06-13 15:35:54 -07001789 EcmpShortestPathGraph currEcmpSpg = currentEcmpSpgMap.get(rootSw);
1790 if (currEcmpSpg == null) {
1791 log.debug("No existing ECMP graph for device {}.. adding self as "
1792 + "changed route", rootSw);
1793 changedRtBldr.add(Lists.newArrayList(rootSw));
1794 continue;
1795 }
1796 EcmpShortestPathGraph newEcmpSpg = updatedEcmpSpgMap.get(rootSw);
Saurav Dasdebcf882018-04-06 20:16:01 -07001797 if (newEcmpSpg == null) {
1798 log.warn("Cannot find updated ECMP graph for dev:{}", rootSw);
1799 continue;
1800 }
Saurav Das261c3002017-06-13 15:35:54 -07001801 if (log.isDebugEnabled()) {
1802 log.debug("Root switch: {}", rootSw);
1803 log.debug(" Current/Existing SPG: {}", currEcmpSpg);
1804 log.debug(" New/Updated SPG: {}", newEcmpSpg);
1805 }
1806 // first use the updated/new map to compare to current/existing map
1807 // as new links may have come up
1808 changedRtBldr.addAll(compareGraphs(newEcmpSpg, currEcmpSpg, rootSw));
1809 // then use the current/existing map to compare to updated/new map
1810 // as switch may have been removed
1811 changedRtBldr.addAll(compareGraphs(currEcmpSpg, newEcmpSpg, rootSw));
sangho28d0b6d2015-05-07 13:30:57 -07001812 }
Saurav Das1b391d52016-11-29 14:27:25 -08001813 }
sanghofb7c7292015-04-13 15:15:58 -07001814
Saurav Dascea556f2018-03-05 14:37:16 -08001815 // handle clearing state for a failed switch in case the switch does
1816 // not have a pair, or the pair is not available
1817 if (failedSwitch != null) {
Charles Chan6dbcd252018-04-02 11:46:38 -07001818 Optional<DeviceId> pairDev = srManager.getPairDeviceId(failedSwitch);
1819 if (!pairDev.isPresent() || !srManager.deviceService.isAvailable(pairDev.get())) {
Saurav Dascea556f2018-03-05 14:37:16 -08001820 log.debug("Proxy Route changes to downed Sw:{}", failedSwitch);
1821 srManager.deviceService.getDevices().forEach(dev -> {
pierventre37dcf4c2021-09-16 18:43:06 +02001822 if (!dev.id().equals(failedSwitch) && shouldProgram(dev.id())) {
Saurav Dascea556f2018-03-05 14:37:16 -08001823 log.debug(" : {}", dev.id());
1824 changedRtBldr.add(Lists.newArrayList(dev.id(), failedSwitch));
1825 }
1826 });
1827 }
1828 }
1829
Saurav Das261c3002017-06-13 15:35:54 -07001830 Set<ArrayList<DeviceId>> changedRoutes = changedRtBldr.build();
Saurav Das1b391d52016-11-29 14:27:25 -08001831 for (ArrayList<DeviceId> route: changedRoutes) {
1832 log.debug("Route changes Target -> Root");
1833 if (route.size() == 1) {
1834 log.debug(" : all -> {}", route.get(0));
1835 } else {
1836 log.debug(" : {} -> {}", route.get(0), route.get(1));
1837 }
1838 }
1839 return changedRoutes;
1840 }
1841
pier572d4a92019-04-25 18:51:51 +02001842 // Utility method to expands the route changes in two elements array using
1843 // the ECMP graph. Caller represents all to dst switch routes with an
1844 // array containing only the dst switch.
1845 private Set<ArrayList<DeviceId>> getExpandedRoutes(Set<ArrayList<DeviceId>> routeChanges) {
1846 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
1847 // Ensure each routeChanges entry has two elements
1848 for (ArrayList<DeviceId> route : routeChanges) {
1849 if (route.size() == 1) {
1850 DeviceId dstSw = route.get(0);
1851 EcmpShortestPathGraph ec = updatedEcmpSpgMap.get(dstSw);
1852 if (ec == null) {
1853 log.warn("No graph found for {} .. aborting redoRouting", dstSw);
1854 return Collections.emptySet();
1855 }
1856 ec.getAllLearnedSwitchesAndVia().keySet().forEach(key -> {
1857 ec.getAllLearnedSwitchesAndVia().get(key).keySet().forEach(target -> {
1858 changedRoutes.add(Lists.newArrayList(target, dstSw));
1859 });
1860 });
1861 } else {
1862 DeviceId targetSw = route.get(0);
1863 DeviceId dstSw = route.get(1);
1864 changedRoutes.add(Lists.newArrayList(targetSw, dstSw));
1865 }
1866 }
1867 return changedRoutes;
1868 }
1869
1870 // Utility method to expands the route changes in two elements array using
1871 // the available devices. Caller represents all to dst switch routes with an
1872 // array containing only the dst switch.
1873 private Set<ArrayList<DeviceId>> getAllExpandedRoutes(Set<ArrayList<DeviceId>> routeChanges) {
1874 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
1875 // Ensure each routeChanges entry has two elements
1876 for (ArrayList<DeviceId> route : routeChanges) {
1877 if (route.size() == 1) {
1878 // route-path changes are from everyone else to this switch
1879 DeviceId dstSw = route.get(0);
1880 srManager.deviceService.getAvailableDevices().forEach(sw -> {
1881 if (!sw.id().equals(dstSw)) {
1882 changedRoutes.add(Lists.newArrayList(sw.id(), dstSw));
1883 }
1884 });
1885 } else {
1886 changedRoutes.add(route);
1887 }
1888 }
1889 return changedRoutes;
1890 }
1891
Saurav Das1b391d52016-11-29 14:27:25 -08001892 /**
1893 * For the root switch, searches all the target nodes reachable in the base
1894 * graph, and compares paths to the ones in the comp graph.
1895 *
1896 * @param base the graph that is indexed for all reachable target nodes
1897 * from the root node
1898 * @param comp the graph that the base graph is compared to
1899 * @param rootSw both ecmp graphs are calculated for the root node
1900 * @return all the routes that have changed in the base graph
1901 */
1902 private Set<ArrayList<DeviceId>> compareGraphs(EcmpShortestPathGraph base,
1903 EcmpShortestPathGraph comp,
1904 DeviceId rootSw) {
1905 ImmutableSet.Builder<ArrayList<DeviceId>> changedRoutesBuilder =
1906 ImmutableSet.builder();
1907 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> baseMap =
1908 base.getAllLearnedSwitchesAndVia();
1909 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> compMap =
1910 comp.getAllLearnedSwitchesAndVia();
1911 for (Integer itrIdx : baseMap.keySet()) {
1912 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> baseViaMap =
1913 baseMap.get(itrIdx);
1914 for (DeviceId targetSw : baseViaMap.keySet()) {
1915 ArrayList<ArrayList<DeviceId>> basePath = baseViaMap.get(targetSw);
1916 ArrayList<ArrayList<DeviceId>> compPath = getVia(compMap, targetSw);
1917 if ((compPath == null) || !basePath.equals(compPath)) {
Saurav Das62ae6792017-05-15 15:34:25 -07001918 log.trace("Impacted route:{} -> {}", targetSw, rootSw);
Saurav Das1b391d52016-11-29 14:27:25 -08001919 ArrayList<DeviceId> route = new ArrayList<>();
Saurav Das261c3002017-06-13 15:35:54 -07001920 route.add(targetSw); // switch with rules to populate
1921 route.add(rootSw); // towards this destination
Saurav Das1b391d52016-11-29 14:27:25 -08001922 changedRoutesBuilder.add(route);
sanghofb7c7292015-04-13 15:15:58 -07001923 }
1924 }
sangho28d0b6d2015-05-07 13:30:57 -07001925 }
Saurav Das1b391d52016-11-29 14:27:25 -08001926 return changedRoutesBuilder.build();
sanghofb7c7292015-04-13 15:15:58 -07001927 }
1928
Saurav Das261c3002017-06-13 15:35:54 -07001929 /**
1930 * Returns the ECMP paths traversed to reach the target switch.
1931 *
1932 * @param switchVia a per-iteration view of the ECMP graph for a root switch
1933 * @param targetSw the switch to reach from the root switch
1934 * @return the nodes traversed on ECMP paths to the target switch
1935 */
sanghofb7c7292015-04-13 15:15:58 -07001936 private ArrayList<ArrayList<DeviceId>> getVia(HashMap<Integer, HashMap<DeviceId,
Saurav Das1b391d52016-11-29 14:27:25 -08001937 ArrayList<ArrayList<DeviceId>>>> switchVia, DeviceId targetSw) {
sanghofb7c7292015-04-13 15:15:58 -07001938 for (Integer itrIdx : switchVia.keySet()) {
1939 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1940 switchVia.get(itrIdx);
Saurav Das1b391d52016-11-29 14:27:25 -08001941 if (swViaMap.get(targetSw) == null) {
sanghofb7c7292015-04-13 15:15:58 -07001942 continue;
1943 } else {
Saurav Das1b391d52016-11-29 14:27:25 -08001944 return swViaMap.get(targetSw);
sanghofb7c7292015-04-13 15:15:58 -07001945 }
1946 }
1947
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001948 return null;
sanghofb7c7292015-04-13 15:15:58 -07001949 }
1950
Saurav Das261c3002017-06-13 15:35:54 -07001951 /**
1952 * Utility method to break down a path from src to dst device into a collection
1953 * of links.
1954 *
1955 * @param src src device of the path
1956 * @param dst dst device of the path
1957 * @param viaMap path taken from src to dst device
1958 * @return collection of links in the path
1959 */
sanghofb7c7292015-04-13 15:15:58 -07001960 private Set<ArrayList<DeviceId>> computeLinks(DeviceId src,
1961 DeviceId dst,
1962 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> viaMap) {
1963 Set<ArrayList<DeviceId>> subLinks = Sets.newHashSet();
1964 for (ArrayList<DeviceId> via : viaMap.get(src)) {
1965 DeviceId linkSrc = src;
1966 DeviceId linkDst = dst;
1967 for (DeviceId viaDevice: via) {
1968 ArrayList<DeviceId> link = new ArrayList<>();
1969 linkDst = viaDevice;
1970 link.add(linkSrc);
1971 link.add(linkDst);
1972 subLinks.add(link);
1973 linkSrc = viaDevice;
1974 }
1975 ArrayList<DeviceId> link = new ArrayList<>();
1976 link.add(linkSrc);
1977 link.add(dst);
1978 subLinks.add(link);
1979 }
1980
1981 return subLinks;
1982 }
1983
Charles Chanc22cef32016-04-29 14:38:22 -07001984 /**
pierventre37dcf4c2021-09-16 18:43:06 +02001985 * Determines whether this controller instance should program the given deviceId, based on
1986 * workPartitionService and pairDeviceId if one exists. Once an instance is elected, it will
1987 * be the only instance responsible for programming both devices in the pair until it goes down.
Charles Chanc22cef32016-04-29 14:38:22 -07001988 *
pierventre37dcf4c2021-09-16 18:43:06 +02001989 * @param deviceId the device id
1990 * @return true if this instance leads the programming, false otherwise
Charles Chanc22cef32016-04-29 14:38:22 -07001991 */
pierventre37dcf4c2021-09-16 18:43:06 +02001992 public boolean shouldProgram(DeviceId deviceId) {
1993 NodeId leader = shouldProgram.get(deviceId);
Charles Chand66d6712018-03-29 16:03:41 -07001994 NodeId currentNodeId = srManager.clusterService.getLocalNode().id();
pierventre37dcf4c2021-09-16 18:43:06 +02001995 if (leader != null) {
1996 log.trace("shouldProgram dev:{} leader:{}", deviceId, leader);
1997 return currentNodeId.equals(leader);
sangho80f11cb2015-04-01 13:05:26 -07001998 }
Charles Chand66d6712018-03-29 16:03:41 -07001999
pierventre37dcf4c2021-09-16 18:43:06 +02002000 // hash function is independent from the order of the devices in the edge pair
2001 Optional<DeviceId> pairDeviceId = srManager.getPairDeviceId(deviceId);
2002 EdgePair edgePair = new EdgePair(deviceId, pairDeviceId.orElse(DeviceId.NONE));
Charles Chand66d6712018-03-29 16:03:41 -07002003
pierventre37dcf4c2021-09-16 18:43:06 +02002004 leader = srManager.workPartitionService.getLeader(edgePair, HASH_FUNCTION);
2005 if (leader != null) {
2006 log.debug("{} is the leader, should handle routing for {}/pair={}", leader, deviceId,
2007 pairDeviceId);
2008 shouldProgram.put(deviceId, leader);
2009 return leader.equals(currentNodeId);
Charles Chand66d6712018-03-29 16:03:41 -07002010 } else {
pierventre37dcf4c2021-09-16 18:43:06 +02002011 log.error("Fail to elect a leader for {}/pair={}. Abort.", deviceId, pairDeviceId);
2012 shouldProgram.remove(deviceId);
Charles Chand66d6712018-03-29 16:03:41 -07002013 return false;
2014 }
2015 }
2016
pierventre37dcf4c2021-09-16 18:43:06 +02002017 void invalidateShouldProgram(DeviceId deviceId) {
2018 shouldProgram.remove(deviceId);
Charles Chand66d6712018-03-29 16:03:41 -07002019 }
2020
pierventre37dcf4c2021-09-16 18:43:06 +02002021 void invalidateShouldProgram() {
2022 shouldProgram.clear();
Charles Chanfbcb8812018-04-18 18:41:05 -07002023 }
2024
pierventre37dcf4c2021-09-16 18:43:06 +02002025
Charles Chand66d6712018-03-29 16:03:41 -07002026 /**
2027 * Returns a set of device ID, containing given device and its pair device if exist.
2028 *
2029 * @param deviceId Device ID
2030 * @return a set of device ID, containing given device and its pair device if exist.
2031 */
2032 private Set<DeviceId> deviceAndItsPair(DeviceId deviceId) {
2033 Set<DeviceId> ret = Sets.newHashSet(deviceId);
2034 srManager.getPairDeviceId(deviceId).ifPresent(ret::add);
2035 return ret;
sangho80f11cb2015-04-01 13:05:26 -07002036 }
2037
Charles Chanc22cef32016-04-29 14:38:22 -07002038 /**
Saurav Das261c3002017-06-13 15:35:54 -07002039 * Returns the set of deviceIds which are the next hops from the targetSw
2040 * to the dstSw according to the latest ECMP spg.
2041 *
2042 * @param targetSw the switch for which the next-hops are desired
2043 * @param dstSw the switch to which the next-hops lead to from the targetSw
2044 * @return set of next hop deviceIds, could be empty if no next hops are found
2045 */
2046 private Set<DeviceId> getNextHops(DeviceId targetSw, DeviceId dstSw) {
2047 boolean targetIsEdge = false;
2048 try {
2049 targetIsEdge = srManager.deviceConfiguration.isEdgeDevice(targetSw);
2050 } catch (DeviceConfigNotFoundException e) {
2051 log.warn(e.getMessage() + "Cannot determine if targetIsEdge {}.. "
2052 + "continuing to getNextHops", targetSw);
2053 }
2054
2055 EcmpShortestPathGraph ecmpSpg = updatedEcmpSpgMap.get(dstSw);
2056 if (ecmpSpg == null) {
2057 log.debug("No ecmpSpg found for dstSw: {}", dstSw);
2058 return ImmutableSet.of();
2059 }
2060 HashMap<Integer,
2061 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> switchVia =
2062 ecmpSpg.getAllLearnedSwitchesAndVia();
2063 for (Integer itrIdx : switchVia.keySet()) {
2064 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
2065 switchVia.get(itrIdx);
2066 for (DeviceId target : swViaMap.keySet()) {
2067 if (!target.equals(targetSw)) {
2068 continue;
2069 }
Saurav Das49368392018-04-23 18:42:12 -07002070 // optimization for spines to not use leaves to get
2071 // to a spine or other leaves. Also leaves should not use other
2072 // leaves to get to the destination
2073 if ((!targetIsEdge && itrIdx > 1) || targetIsEdge) {
Saurav Das97241862018-02-14 14:14:54 -08002074 boolean pathdevIsEdge = false;
2075 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
Saurav Das49368392018-04-23 18:42:12 -07002076 log.debug("Evaluating next-hop in path: {}", via);
Saurav Das97241862018-02-14 14:14:54 -08002077 for (DeviceId pathdev : via) {
2078 try {
2079 pathdevIsEdge = srManager.deviceConfiguration
2080 .isEdgeDevice(pathdev);
2081 } catch (DeviceConfigNotFoundException e) {
2082 log.warn(e.getMessage());
2083 }
2084 if (pathdevIsEdge) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07002085 log.debug("Avoiding {} hop path for targetSw:{}"
Saurav Das97241862018-02-14 14:14:54 -08002086 + " --> dstSw:{} which goes through an edge"
2087 + " device {} in path {}", itrIdx,
2088 targetSw, dstSw, pathdev, via);
2089 return ImmutableSet.of();
2090 }
2091 }
2092 }
Saurav Das261c3002017-06-13 15:35:54 -07002093 }
2094 Set<DeviceId> nextHops = new HashSet<>();
2095 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
2096 if (via.isEmpty()) {
2097 // the dstSw is the next-hop from the targetSw
2098 nextHops.add(dstSw);
2099 } else {
2100 // first elem is next-hop in each ECMP path
2101 nextHops.add(via.get(0));
2102 }
2103 }
Saurav Das49368392018-04-23 18:42:12 -07002104 log.debug("target {} --> dst: {} has next-hops:{}", targetSw,
2105 dstSw, nextHops);
Saurav Das261c3002017-06-13 15:35:54 -07002106 return nextHops;
2107 }
2108 }
Saurav Das49368392018-04-23 18:42:12 -07002109 log.debug("No next hops found for target:{} --> dst: {}", targetSw, dstSw);
Saurav Das261c3002017-06-13 15:35:54 -07002110 return ImmutableSet.of(); //no next-hops found
2111 }
2112
Saurav Das261c3002017-06-13 15:35:54 -07002113 //////////////////////////////////////
2114 // Filtering rule creation
2115 //////////////////////////////////////
2116
2117 /**
Saurav Dasf9332192017-02-18 14:05:44 -08002118 * Populates filtering rules for port, and punting rules
2119 * for gateway IPs, loopback IPs and arp/ndp traffic.
pierventre37dcf4c2021-09-16 18:43:06 +02002120 * Should only be called by the instance leading the programming
2121 * for this device/port.
sangho80f11cb2015-04-01 13:05:26 -07002122 *
2123 * @param deviceId Switch ID to set the rules
2124 */
Charles Chanfbcb8812018-04-18 18:41:05 -07002125 void populatePortAddressingRules(DeviceId deviceId) {
Saurav Das07c74602016-04-27 18:35:50 -07002126 // Although device is added, sometimes device store does not have the
2127 // ports for this device yet. It results in missing filtering rules in the
2128 // switch. We will attempt it a few times. If it still does not work,
2129 // user can manually repopulate using CLI command sr-reroute-network
Charles Chan18fa4252017-02-08 16:10:40 -08002130 PortFilterInfo firstRun = rulePopulator.populateVlanMacFilters(deviceId);
Saurav Dasd1872b02016-12-02 15:43:47 -08002131 if (firstRun == null) {
2132 firstRun = new PortFilterInfo(0, 0, 0);
Saurav Das07c74602016-04-27 18:35:50 -07002133 }
Saurav Dasd1872b02016-12-02 15:43:47 -08002134 executorService.schedule(new RetryFilters(deviceId, firstRun),
2135 RETRY_INTERVAL_MS, TimeUnit.MILLISECONDS);
sangho80f11cb2015-04-01 13:05:26 -07002136 }
2137
2138 /**
Saurav Dasd1872b02016-12-02 15:43:47 -08002139 * RetryFilters populates filtering objectives for a device and keeps retrying
2140 * till the number of ports filtered are constant for a predefined number
2141 * of attempts.
2142 */
2143 protected final class RetryFilters implements Runnable {
2144 int constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS;
2145 DeviceId devId;
2146 int counter;
2147 PortFilterInfo prevRun;
2148
2149 private RetryFilters(DeviceId deviceId, PortFilterInfo previousRun) {
Saurav Das07c74602016-04-27 18:35:50 -07002150 devId = deviceId;
Saurav Dasd1872b02016-12-02 15:43:47 -08002151 prevRun = previousRun;
2152 counter = 0;
Saurav Das07c74602016-04-27 18:35:50 -07002153 }
2154
2155 @Override
2156 public void run() {
Charles Chan077314e2017-06-22 14:27:17 -07002157 log.debug("RETRY FILTER ATTEMPT {} ** dev:{}", ++counter, devId);
Charles Chan18fa4252017-02-08 16:10:40 -08002158 PortFilterInfo thisRun = rulePopulator.populateVlanMacFilters(devId);
Saurav Dasd1872b02016-12-02 15:43:47 -08002159 boolean sameResult = prevRun.equals(thisRun);
2160 log.debug("dev:{} prevRun:{} thisRun:{} sameResult:{}", devId, prevRun,
2161 thisRun, sameResult);
Ray Milkey614352e2018-02-26 09:36:31 -08002162 if (thisRun == null || !sameResult || (--constantAttempts > 0)) {
Saurav Dasf9332192017-02-18 14:05:44 -08002163 // exponentially increasing intervals for retries
2164 executorService.schedule(this,
2165 RETRY_INTERVAL_MS * (int) Math.pow(counter, RETRY_INTERVAL_SCALE),
2166 TimeUnit.MILLISECONDS);
Saurav Dasd1872b02016-12-02 15:43:47 -08002167 if (!sameResult) {
2168 constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS; //reset
2169 }
Saurav Das07c74602016-04-27 18:35:50 -07002170 }
Saurav Dasd1872b02016-12-02 15:43:47 -08002171 prevRun = (thisRun == null) ? prevRun : thisRun;
Saurav Das07c74602016-04-27 18:35:50 -07002172 }
Saurav Das07c74602016-04-27 18:35:50 -07002173 }
piera9941192019-04-24 16:12:47 +02002174
2175 // Check jobs completion. It returns false if one of the job fails
2176 // and cancel the remaining
2177 private boolean checkJobs(List<Future<Boolean>> futures) {
2178 boolean completed = true;
2179 for (Future<Boolean> future : futures) {
2180 try {
2181 if (completed) {
2182 if (!future.get()) {
2183 completed = false;
2184 }
2185 } else {
2186 future.cancel(true);
2187 }
2188 } catch (InterruptedException | ExecutionException e) {
2189 completed = false;
2190 }
2191 }
2192 return completed;
2193 }
sangho80f11cb2015-04-01 13:05:26 -07002194}