blob: 1dbc70163f7963a4a2d1987295c36f915fca1816 [file] [log] [blame]
sangho80f11cb2015-04-01 13:05:26 -07001/*
Brian O'Connor0947d7e2017-08-03 21:12:30 -07002 * Copyright 2015-present Open Networking Foundation
sangho80f11cb2015-04-01 13:05:26 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.segmentrouting;
17
Saurav Das62ae6792017-05-15 15:34:25 -070018import com.google.common.collect.ImmutableMap;
19import com.google.common.collect.ImmutableMap.Builder;
Charles Chanc22cef32016-04-29 14:38:22 -070020import com.google.common.collect.ImmutableSet;
Saurav Das1b391d52016-11-29 14:27:25 -080021import com.google.common.collect.Lists;
sanghofb7c7292015-04-13 15:15:58 -070022import com.google.common.collect.Maps;
23import com.google.common.collect.Sets;
Saurav Dasfbe74572017-08-03 18:30:35 -070024
pierventre37dcf4c2021-09-16 18:43:06 +020025import com.google.common.hash.Hasher;
26import com.google.common.hash.Hashing;
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -070027import org.onlab.packet.EthType;
Charles Chan19b70032019-04-17 14:20:26 -070028import com.google.common.collect.Streams;
sangho9b169e32015-04-14 16:27:13 -070029import org.onlab.packet.Ip4Address;
Pier Ventreadb4ae62016-11-23 09:57:42 -080030import org.onlab.packet.Ip6Address;
sangho80f11cb2015-04-01 13:05:26 -070031import org.onlab.packet.IpPrefix;
Charles Chan910be6a2017-08-23 14:46:43 -070032import org.onlab.packet.MacAddress;
33import org.onlab.packet.VlanId;
piera9941192019-04-24 16:12:47 +020034import org.onlab.util.PredictableExecutor;
35import org.onlab.util.PredictableExecutor.PickyCallable;
Saurav Das261c3002017-06-13 15:35:54 -070036import org.onosproject.cluster.NodeId;
Saurav Das00e553b2018-04-21 17:19:48 -070037import org.onosproject.mastership.MastershipEvent;
Charles Chanc22cef32016-04-29 14:38:22 -070038import org.onosproject.net.ConnectPoint;
sangho80f11cb2015-04-01 13:05:26 -070039import org.onosproject.net.Device;
40import org.onosproject.net.DeviceId;
sanghofb7c7292015-04-13 15:15:58 -070041import org.onosproject.net.Link;
Charles Chan910be6a2017-08-23 14:46:43 -070042import org.onosproject.net.PortNumber;
Charles Chan12a8a842020-02-14 13:23:57 -080043import org.onosproject.net.flowobjective.Objective;
Charles Chan319d1a22015-11-03 10:42:14 -080044import org.onosproject.segmentrouting.config.DeviceConfigNotFoundException;
Saurav Das62ae6792017-05-15 15:34:25 -070045import org.onosproject.segmentrouting.grouphandler.DefaultGroupHandler;
Charles Chand66d6712018-03-29 16:03:41 -070046import org.onosproject.store.serializers.KryoNamespaces;
pierf331a492020-01-07 15:39:39 +010047import org.onosproject.store.service.ConsistentMultimap;
Charles Chand66d6712018-03-29 16:03:41 -070048import org.onosproject.store.service.Serializer;
sangho80f11cb2015-04-01 13:05:26 -070049import org.slf4j.Logger;
50import org.slf4j.LoggerFactory;
51
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -070052import java.time.Instant;
sangho80f11cb2015-04-01 13:05:26 -070053import java.util.ArrayList;
Charles Chand66d6712018-03-29 16:03:41 -070054import java.util.Collections;
sangho80f11cb2015-04-01 13:05:26 -070055import java.util.HashMap;
56import java.util.HashSet;
Saurav Das261c3002017-06-13 15:35:54 -070057import java.util.Iterator;
Charles Chand66d6712018-03-29 16:03:41 -070058import java.util.List;
Saurav Das261c3002017-06-13 15:35:54 -070059import java.util.Map;
pierf331a492020-01-07 15:39:39 +010060import java.util.Map.Entry;
Saurav Dasd1872b02016-12-02 15:43:47 -080061import java.util.Objects;
Charles Chan6dbcd252018-04-02 11:46:38 -070062import java.util.Optional;
sangho80f11cb2015-04-01 13:05:26 -070063import java.util.Set;
piera9941192019-04-24 16:12:47 +020064import java.util.concurrent.CompletableFuture;
65import java.util.concurrent.ExecutionException;
66import java.util.concurrent.ExecutorService;
67import java.util.concurrent.Future;
Saurav Das07c74602016-04-27 18:35:50 -070068import java.util.concurrent.ScheduledExecutorService;
69import java.util.concurrent.TimeUnit;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +090070import java.util.concurrent.locks.Lock;
71import java.util.concurrent.locks.ReentrantLock;
pierventre37dcf4c2021-09-16 18:43:06 +020072import java.util.function.Function;
Charles Chan19b70032019-04-17 14:20:26 -070073import java.util.stream.Collectors;
Saurav Dasdc7f2752018-03-18 21:28:15 -070074import java.util.stream.Stream;
75
Pier Ventreadb4ae62016-11-23 09:57:42 -080076import static com.google.common.base.Preconditions.checkNotNull;
77import static java.util.concurrent.Executors.newScheduledThreadPool;
78import static org.onlab.util.Tools.groupedThreads;
sangho80f11cb2015-04-01 13:05:26 -070079
Charles Chanb7f75ac2016-01-11 18:28:54 -080080/**
81 * Default routing handler that is responsible for route computing and
82 * routing rule population.
83 */
sangho80f11cb2015-04-01 13:05:26 -070084public class DefaultRoutingHandler {
Saurav Dasf9332192017-02-18 14:05:44 -080085 private static final int MAX_CONSTANT_RETRY_ATTEMPTS = 5;
Ray Milkey092e9e22018-02-01 13:49:47 -080086 private static final long RETRY_INTERVAL_MS = 250L;
Saurav Dasf9332192017-02-18 14:05:44 -080087 private static final int RETRY_INTERVAL_SCALE = 1;
Saurav Dasfbe74572017-08-03 18:30:35 -070088 private static final long STABLITY_THRESHOLD = 10; //secs
Saurav Das00e553b2018-04-21 17:19:48 -070089 private static final long MASTER_CHANGE_DELAY = 1000; // ms
Saurav Das68e1b6a2018-06-11 17:02:31 -070090 private static final long PURGE_DELAY = 1000; // ms
Charles Chanc22cef32016-04-29 14:38:22 -070091 private static Logger log = LoggerFactory.getLogger(DefaultRoutingHandler.class);
sangho80f11cb2015-04-01 13:05:26 -070092
93 private SegmentRoutingManager srManager;
94 private RoutingRulePopulator rulePopulator;
Shashikanth VH0637b162015-12-11 01:32:44 +053095 private HashMap<DeviceId, EcmpShortestPathGraph> currentEcmpSpgMap;
96 private HashMap<DeviceId, EcmpShortestPathGraph> updatedEcmpSpgMap;
sangho9b169e32015-04-14 16:27:13 -070097 private DeviceConfiguration config;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +090098 private final Lock statusLock = new ReentrantLock();
99 private volatile Status populationStatus;
Yuta HIGUCHIebee2f12016-07-21 16:54:33 -0700100 private ScheduledExecutorService executorService
Saurav Dasd1872b02016-12-02 15:43:47 -0800101 = newScheduledThreadPool(1, groupedThreads("retryftr", "retry-%d", log));
Saurav Das49368392018-04-23 18:42:12 -0700102 private ScheduledExecutorService executorServiceMstChg
103 = newScheduledThreadPool(1, groupedThreads("masterChg", "mstch-%d", log));
Saurav Das68e1b6a2018-06-11 17:02:31 -0700104 private ScheduledExecutorService executorServiceFRR
105 = newScheduledThreadPool(1, groupedThreads("fullRR", "fullRR-%d", log));
piera9941192019-04-24 16:12:47 +0200106 // Route populators - 0 will leverage available processors
107 private static final int DEFAULT_THREADS = 0;
108 private ExecutorService routePopulators;
Saurav Das49368392018-04-23 18:42:12 -0700109
Saurav Das00e553b2018-04-21 17:19:48 -0700110 private Instant lastRoutingChange = Instant.EPOCH;
Saurav Das68e1b6a2018-06-11 17:02:31 -0700111 private Instant lastFullReroute = Instant.EPOCH;
sangho80f11cb2015-04-01 13:05:26 -0700112
pierventre37dcf4c2021-09-16 18:43:06 +0200113 /*
114 * Store to keep track of ONOS instance that should program the device pair.
115 * There should be only one instance (the leader) that programs the same pair.
116 * This EC map is used as first source of truth. WorkPartitionService is used
117 * to elect a leader when shouldProgram is empty.
118 */
119 Map<DeviceId, NodeId> shouldProgram;
Charles Chand66d6712018-03-29 16:03:41 -0700120
pierf331a492020-01-07 15:39:39 +0100121 // Distributed routes store to keep track of the routes already seen
122 // destination device is the key and target sw is the value
123 ConsistentMultimap<DeviceId, DeviceId> seenBeforeRoutes;
124
Saurav Das00e553b2018-04-21 17:19:48 -0700125 // Local store to keep track of all devices that this instance was responsible
126 // for programming in the last run. Helps to determine if mastership changed
127 // during a run - only relevant for programming as a result of topo change.
128 Set<DeviceId> lastProgrammed;
129
sangho80f11cb2015-04-01 13:05:26 -0700130 /**
131 * Represents the default routing population status.
132 */
133 public enum Status {
134 // population process is not started yet.
135 IDLE,
sangho80f11cb2015-04-01 13:05:26 -0700136 // population process started.
137 STARTED,
piera9941192019-04-24 16:12:47 +0200138 // population process was aborted due to errors, mostly for groups not found.
sangho80f11cb2015-04-01 13:05:26 -0700139 ABORTED,
sangho80f11cb2015-04-01 13:05:26 -0700140 // population process was finished successfully.
141 SUCCEEDED
142 }
143
144 /**
pierventre37dcf4c2021-09-16 18:43:06 +0200145 * Deterministic hashing for the shouldProgram logic.
146 */
147 private static Long consistentHasher(EdgePair pair) {
148 Hasher hasher = Hashing.md5().newHasher();
149 long dev1Hash = hasher.putUnencodedChars(pair.dev1.toString())
150 .hash()
151 .asLong();
152 hasher = Hashing.md5().newHasher();
153 long dev2Hash = hasher.putUnencodedChars(pair.dev2.toString())
154 .hash()
155 .asLong();
156 return dev1Hash + dev2Hash;
157 }
158
159 /**
160 * Implements the hash function for the shouldProgram logic.
161 */
162 protected static final Function<EdgePair, Long> HASH_FUNCTION = DefaultRoutingHandler::consistentHasher;
163
164 /**
sangho80f11cb2015-04-01 13:05:26 -0700165 * Creates a DefaultRoutingHandler object.
166 *
167 * @param srManager SegmentRoutingManager object
168 */
Charles Chand66d6712018-03-29 16:03:41 -0700169 DefaultRoutingHandler(SegmentRoutingManager srManager) {
pierventre37dcf4c2021-09-16 18:43:06 +0200170 this.shouldProgram = srManager.storageService.<DeviceId, NodeId>consistentMapBuilder()
Charles Chanfbcb8812018-04-18 18:41:05 -0700171 .withName("sr-should-program")
172 .withSerializer(Serializer.using(KryoNamespaces.API))
173 .withRelaxedReadConsistency()
174 .build().asJavaMap();
pierf331a492020-01-07 15:39:39 +0100175 this.seenBeforeRoutes = srManager.storageService.<DeviceId, DeviceId>consistentMultimapBuilder()
176 .withName("programmed-routes")
177 .withSerializer(Serializer.using(KryoNamespaces.API))
178 .withRelaxedReadConsistency()
179 .build();
Charles Chanfbcb8812018-04-18 18:41:05 -0700180 update(srManager);
piera9941192019-04-24 16:12:47 +0200181 this.routePopulators = new PredictableExecutor(DEFAULT_THREADS,
182 groupedThreads("onos/sr", "r-populator-%d", log));
Charles Chanfbcb8812018-04-18 18:41:05 -0700183 }
184
185 /**
186 * Updates a DefaultRoutingHandler object.
187 *
188 * @param srManager SegmentRoutingManager object
189 */
190 void update(SegmentRoutingManager srManager) {
sangho80f11cb2015-04-01 13:05:26 -0700191 this.srManager = srManager;
192 this.rulePopulator = checkNotNull(srManager.routingRulePopulator);
sangho9b169e32015-04-14 16:27:13 -0700193 this.config = checkNotNull(srManager.deviceConfiguration);
sangho80f11cb2015-04-01 13:05:26 -0700194 this.populationStatus = Status.IDLE;
sanghofb7c7292015-04-13 15:15:58 -0700195 this.currentEcmpSpgMap = Maps.newHashMap();
Saurav Das00e553b2018-04-21 17:19:48 -0700196 this.lastProgrammed = Sets.newConcurrentHashSet();
sangho80f11cb2015-04-01 13:05:26 -0700197 }
198
199 /**
Saurav Das62ae6792017-05-15 15:34:25 -0700200 * Returns an immutable copy of the current ECMP shortest-path graph as
201 * computed by this controller instance.
202 *
Saurav Das261c3002017-06-13 15:35:54 -0700203 * @return immutable copy of the current ECMP graph
Saurav Das62ae6792017-05-15 15:34:25 -0700204 */
205 public ImmutableMap<DeviceId, EcmpShortestPathGraph> getCurrentEmcpSpgMap() {
206 Builder<DeviceId, EcmpShortestPathGraph> builder = ImmutableMap.builder();
207 currentEcmpSpgMap.entrySet().forEach(entry -> {
208 if (entry.getValue() != null) {
209 builder.put(entry.getKey(), entry.getValue());
210 }
211 });
212 return builder.build();
213 }
214
Saurav Dasfbe74572017-08-03 18:30:35 -0700215 /**
216 * Acquires the lock used when making routing changes.
217 */
218 public void acquireRoutingLock() {
219 statusLock.lock();
220 }
221
222 /**
223 * Releases the lock used when making routing changes.
224 */
225 public void releaseRoutingLock() {
226 statusLock.unlock();
227 }
228
229 /**
230 * Determines if routing in the network has been stable in the last
Charles Chan12a8a842020-02-14 13:23:57 -0800231 * STABILITY_THRESHOLD seconds, by comparing the current time to the last
Saurav Dasfbe74572017-08-03 18:30:35 -0700232 * routing change timestamp.
233 *
234 * @return true if stable
235 */
236 public boolean isRoutingStable() {
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700237 long last = (long) (lastRoutingChange.toEpochMilli() / 1000.0);
238 long now = (long) (Instant.now().toEpochMilli() / 1000.0);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700239 log.trace("Routing stable since {}s", now - last);
Saurav Dasfbe74572017-08-03 18:30:35 -0700240 return (now - last) > STABLITY_THRESHOLD;
241 }
242
Saurav Das49368392018-04-23 18:42:12 -0700243 /**
244 * Gracefully shuts down the defaultRoutingHandler. Typically called when
245 * the app is deactivated
246 */
247 public void shutdown() {
248 executorService.shutdown();
249 executorServiceMstChg.shutdown();
Saurav Das68e1b6a2018-06-11 17:02:31 -0700250 executorServiceFRR.shutdown();
piera9941192019-04-24 16:12:47 +0200251 routePopulators.shutdown();
Saurav Das49368392018-04-23 18:42:12 -0700252 }
Saurav Dasfbe74572017-08-03 18:30:35 -0700253
Saurav Das261c3002017-06-13 15:35:54 -0700254 //////////////////////////////////////
255 // Route path handling
256 //////////////////////////////////////
257
Saurav Dase6c448a2018-01-18 12:07:33 -0800258 /* The following three methods represent the three major ways in which
259 * route-path handling is triggered in the network
Saurav Das261c3002017-06-13 15:35:54 -0700260 * a) due to configuration change
261 * b) due to route-added event
262 * c) due to change in the topology
263 */
264
Saurav Das62ae6792017-05-15 15:34:25 -0700265 /**
Saurav Das261c3002017-06-13 15:35:54 -0700266 * Populates all routing rules to all switches. Typically triggered at
267 * startup or after a configuration event.
sangho80f11cb2015-04-01 13:05:26 -0700268 */
Saurav Das62ae6792017-05-15 15:34:25 -0700269 public void populateAllRoutingRules() {
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700270 lastRoutingChange = Instant.now();
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900271 statusLock.lock();
272 try {
Saurav Das261c3002017-06-13 15:35:54 -0700273 if (populationStatus == Status.STARTED) {
274 log.warn("Previous rule population is not finished. Cannot"
275 + " proceed with populateAllRoutingRules");
276 return;
277 }
278
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900279 populationStatus = Status.STARTED;
280 rulePopulator.resetCounter();
Saurav Das261c3002017-06-13 15:35:54 -0700281 log.info("Starting to populate all routing rules");
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900282 log.debug("populateAllRoutingRules: populationStatus is STARTED");
sangho80f11cb2015-04-01 13:05:26 -0700283
Saurav Das261c3002017-06-13 15:35:54 -0700284 // take a snapshot of the topology
285 updatedEcmpSpgMap = new HashMap<>();
286 Set<EdgePair> edgePairs = new HashSet<>();
287 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
Jonathan Hart61e24e12017-11-30 18:23:42 -0800288 for (DeviceId dstSw : srManager.deviceConfiguration.getRouters()) {
Saurav Das261c3002017-06-13 15:35:54 -0700289 EcmpShortestPathGraph ecmpSpgUpdated =
Jonathan Hart61e24e12017-11-30 18:23:42 -0800290 new EcmpShortestPathGraph(dstSw, srManager);
291 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
Charles Chan6dbcd252018-04-02 11:46:38 -0700292 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
293 if (pairDev.isPresent()) {
Saurav Das261c3002017-06-13 15:35:54 -0700294 // pairDev may not be available yet, but we still need to add
Charles Chan6dbcd252018-04-02 11:46:38 -0700295 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
296 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
297 edgePairs.add(new EdgePair(dstSw, pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700298 }
Charles Chand66d6712018-03-29 16:03:41 -0700299
300 if (!shouldProgram(dstSw)) {
Saurav Das00e553b2018-04-21 17:19:48 -0700301 lastProgrammed.remove(dstSw);
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900302 continue;
Saurav Das00e553b2018-04-21 17:19:48 -0700303 } else {
304 lastProgrammed.add(dstSw);
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900305 }
Saurav Das00e553b2018-04-21 17:19:48 -0700306 // To do a full reroute, assume all route-paths have changed
Charles Chand66d6712018-03-29 16:03:41 -0700307 for (DeviceId dev : deviceAndItsPair(dstSw)) {
Jonathan Hart61e24e12017-11-30 18:23:42 -0800308 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
309 if (targetSw.equals(dev)) {
Saurav Das261c3002017-06-13 15:35:54 -0700310 continue;
311 }
Jonathan Hart61e24e12017-11-30 18:23:42 -0800312 routeChanges.add(Lists.newArrayList(targetSw, dev));
Saurav Das261c3002017-06-13 15:35:54 -0700313 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900314 }
Saurav Das261c3002017-06-13 15:35:54 -0700315 }
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900316
pierf331a492020-01-07 15:39:39 +0100317 log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
pierventre37dcf4c2021-09-16 18:43:06 +0200318 seenBeforeRoutes.forEach(entry -> log.debug("{} -> {}", entry.getValue(), entry.getKey()));
319
Saurav Das261c3002017-06-13 15:35:54 -0700320 if (!redoRouting(routeChanges, edgePairs, null)) {
321 log.debug("populateAllRoutingRules: populationStatus is ABORTED");
322 populationStatus = Status.ABORTED;
323 log.warn("Failed to repopulate all routing rules.");
324 return;
sangho80f11cb2015-04-01 13:05:26 -0700325 }
326
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900327 log.debug("populateAllRoutingRules: populationStatus is SUCCEEDED");
328 populationStatus = Status.SUCCEEDED;
Saurav Das261c3002017-06-13 15:35:54 -0700329 log.info("Completed all routing rule population. Total # of rules pushed : {}",
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900330 rulePopulator.getCounter());
Saurav Das62ae6792017-05-15 15:34:25 -0700331 return;
pierdebd15c2019-04-19 20:55:53 +0200332 } catch (Exception e) {
333 log.error("populateAllRoutingRules thrown an exception: {}",
334 e.getMessage(), e);
335 populationStatus = Status.ABORTED;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900336 } finally {
337 statusLock.unlock();
sangho80f11cb2015-04-01 13:05:26 -0700338 }
sangho80f11cb2015-04-01 13:05:26 -0700339 }
340
sanghofb7c7292015-04-13 15:15:58 -0700341 /**
Saurav Das261c3002017-06-13 15:35:54 -0700342 * Populate rules from all other edge devices to the connect-point(s)
343 * specified for the given subnets.
344 *
345 * @param cpts connect point(s) of the subnets being added
346 * @param subnets subnets being added
Charles Chan910be6a2017-08-23 14:46:43 -0700347 */
348 // XXX refactor
Saurav Das261c3002017-06-13 15:35:54 -0700349 protected void populateSubnet(Set<ConnectPoint> cpts, Set<IpPrefix> subnets) {
Charles Chan6db55b92017-09-11 15:21:57 -0700350 if (cpts == null || cpts.size() < 1 || cpts.size() > 2) {
351 log.warn("Skipping populateSubnet due to illegal size of connect points. {}", cpts);
352 return;
353 }
354
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700355 lastRoutingChange = Instant.now();
Saurav Das261c3002017-06-13 15:35:54 -0700356 statusLock.lock();
357 try {
358 if (populationStatus == Status.STARTED) {
359 log.warn("Previous rule population is not finished. Cannot"
360 + " proceed with routing rules for added routes");
361 return;
362 }
363 populationStatus = Status.STARTED;
364 rulePopulator.resetCounter();
Charles Chan910be6a2017-08-23 14:46:43 -0700365 log.info("Starting to populate routing rules for added routes, subnets={}, cpts={}",
366 subnets, cpts);
Saurav Das6430f412018-01-25 09:49:01 -0800367 // In principle an update to a subnet/prefix should not require a
368 // new ECMPspg calculation as it is not a topology event. As a
369 // result, we use the current/existing ECMPspg in the updated map
370 // used by the redoRouting method.
Saurav Das6de6ffd2018-02-09 09:15:03 -0800371 if (updatedEcmpSpgMap == null) {
372 updatedEcmpSpgMap = new HashMap<>();
373 }
Saurav Das6430f412018-01-25 09:49:01 -0800374 currentEcmpSpgMap.entrySet().forEach(entry -> {
375 updatedEcmpSpgMap.put(entry.getKey(), entry.getValue());
Saurav Dase321cff2018-02-09 17:26:45 -0800376 if (log.isTraceEnabled()) {
377 log.trace("Root switch: {}", entry.getKey());
378 log.trace(" Current/Existing SPG: {}", entry.getValue());
Saurav Das6430f412018-01-25 09:49:01 -0800379 }
380 });
pierventre37dcf4c2021-09-16 18:43:06 +0200381
pierf331a492020-01-07 15:39:39 +0100382 log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
pierventre37dcf4c2021-09-16 18:43:06 +0200383 seenBeforeRoutes.forEach(entry -> log.debug("{} -> {}", entry.getValue(), entry.getKey()));
384
Saurav Das261c3002017-06-13 15:35:54 -0700385 Set<EdgePair> edgePairs = new HashSet<>();
386 Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
387 boolean handleRouting = false;
388
389 if (cpts.size() == 2) {
390 // ensure connect points are edge-pairs
391 Iterator<ConnectPoint> iter = cpts.iterator();
392 DeviceId dev1 = iter.next().deviceId();
Charles Chan6dbcd252018-04-02 11:46:38 -0700393 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dev1);
394 if (pairDev.isPresent() && iter.next().deviceId().equals(pairDev.get())) {
395 edgePairs.add(new EdgePair(dev1, pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700396 } else {
397 log.warn("Connectpoints {} for subnets {} not on "
398 + "pair-devices.. aborting populateSubnet", cpts, subnets);
399 populationStatus = Status.ABORTED;
400 return;
401 }
402 for (ConnectPoint cp : cpts) {
Saurav Das6430f412018-01-25 09:49:01 -0800403 if (updatedEcmpSpgMap.get(cp.deviceId()) == null) {
404 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das261c3002017-06-13 15:35:54 -0700405 new EcmpShortestPathGraph(cp.deviceId(), srManager);
Saurav Das6430f412018-01-25 09:49:01 -0800406 updatedEcmpSpgMap.put(cp.deviceId(), ecmpSpgUpdated);
407 log.warn("populateSubnet: no updated graph for dev:{}"
408 + " ... creating", cp.deviceId());
409 }
Charles Chand66d6712018-03-29 16:03:41 -0700410 if (!shouldProgram(cp.deviceId())) {
Saurav Das261c3002017-06-13 15:35:54 -0700411 continue;
412 }
413 handleRouting = true;
414 }
415 } else {
416 // single connect point
417 DeviceId dstSw = cpts.iterator().next().deviceId();
Saurav Das6430f412018-01-25 09:49:01 -0800418 if (updatedEcmpSpgMap.get(dstSw) == null) {
419 EcmpShortestPathGraph ecmpSpgUpdated =
Saurav Das261c3002017-06-13 15:35:54 -0700420 new EcmpShortestPathGraph(dstSw, srManager);
Saurav Das6430f412018-01-25 09:49:01 -0800421 updatedEcmpSpgMap.put(dstSw, ecmpSpgUpdated);
422 log.warn("populateSubnet: no updated graph for dev:{}"
423 + " ... creating", dstSw);
424 }
Charles Chand66d6712018-03-29 16:03:41 -0700425 handleRouting = shouldProgram(dstSw);
Saurav Das261c3002017-06-13 15:35:54 -0700426 }
427
428 if (!handleRouting) {
429 log.debug("This instance is not handling ecmp routing to the "
430 + "connectPoint(s) {}", cpts);
431 populationStatus = Status.ABORTED;
432 return;
433 }
434
435 // if it gets here, this instance should handle routing for the
436 // connectpoint(s). Assume all route-paths have to be updated to
437 // the connectpoint(s) with the following exceptions
438 // 1. if target is non-edge no need for routing rules
439 // 2. if target is one of the connectpoints
440 for (ConnectPoint cp : cpts) {
441 DeviceId dstSw = cp.deviceId();
442 for (Device targetSw : srManager.deviceService.getDevices()) {
443 boolean isEdge = false;
444 try {
445 isEdge = config.isEdgeDevice(targetSw.id());
446 } catch (DeviceConfigNotFoundException e) {
Charles Chaneaf3c9b2018-02-16 17:20:54 -0800447 log.warn(e.getMessage() + "aborting populateSubnet on targetSw {}", targetSw.id());
448 continue;
Saurav Das261c3002017-06-13 15:35:54 -0700449 }
Charles Chan6dbcd252018-04-02 11:46:38 -0700450 Optional<DeviceId> pairDev = srManager.getPairDeviceId(dstSw);
Saurav Das261c3002017-06-13 15:35:54 -0700451 if (dstSw.equals(targetSw.id()) || !isEdge ||
Charles Chan6dbcd252018-04-02 11:46:38 -0700452 (cpts.size() == 2 && pairDev.isPresent() && targetSw.id().equals(pairDev.get()))) {
Saurav Das261c3002017-06-13 15:35:54 -0700453 continue;
454 }
455 routeChanges.add(Lists.newArrayList(targetSw.id(), dstSw));
456 }
457 }
458
459 if (!redoRouting(routeChanges, edgePairs, subnets)) {
460 log.debug("populateSubnet: populationStatus is ABORTED");
461 populationStatus = Status.ABORTED;
462 log.warn("Failed to repopulate the rules for subnet.");
463 return;
464 }
465
466 log.debug("populateSubnet: populationStatus is SUCCEEDED");
467 populationStatus = Status.SUCCEEDED;
468 log.info("Completed subnet population. Total # of rules pushed : {}",
469 rulePopulator.getCounter());
470 return;
471
pierdebd15c2019-04-19 20:55:53 +0200472 } catch (Exception e) {
473 log.error("populateSubnet thrown an exception: {}",
474 e.getMessage(), e);
475 populationStatus = Status.ABORTED;
Saurav Das261c3002017-06-13 15:35:54 -0700476 } finally {
477 statusLock.unlock();
478 }
479 }
480
481 /**
Saurav Das62ae6792017-05-15 15:34:25 -0700482 * Populates the routing rules or makes hash group changes according to the
483 * route-path changes due to link failure, switch failure or link up. This
484 * method should only be called for one of these three possible event-types.
Saurav Dasdc7f2752018-03-18 21:28:15 -0700485 * Note that when a switch goes away, all of its links fail as well, but
486 * this is handled as a single switch removal event.
sanghofb7c7292015-04-13 15:15:58 -0700487 *
Saurav Dasdc7f2752018-03-18 21:28:15 -0700488 * @param linkDown the single failed link, or null for other conditions such
489 * as link-up or a removed switch
Saurav Das62ae6792017-05-15 15:34:25 -0700490 * @param linkUp the single link up, or null for other conditions such as
Saurav Dasdc7f2752018-03-18 21:28:15 -0700491 * link-down or a removed switch
492 * @param switchDown the removed switch, or null for other conditions such
493 * as link-down or link-up
494 * @param seenBefore true if this event is for a linkUp or linkDown for a
495 * seen link
496 */
497 // TODO This method should be refactored into three separated methods
Charles Chan9d2dd552018-06-19 20:56:33 -0700498 public void populateRoutingRulesForLinkStatusChange(Link linkDown, Link linkUp,
499 DeviceId switchDown, boolean seenBefore) {
Saurav Dasdc7f2752018-03-18 21:28:15 -0700500 if (Stream.of(linkDown, linkUp, switchDown).filter(Objects::nonNull)
501 .count() != 1) {
Saurav Das62ae6792017-05-15 15:34:25 -0700502 log.warn("Only one event can be handled for link status change .. aborting");
503 return;
504 }
Saurav Dasdc7f2752018-03-18 21:28:15 -0700505
Yuta HIGUCHIc9d93472017-08-18 23:16:35 -0700506 lastRoutingChange = Instant.now();
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900507 statusLock.lock();
508 try {
sanghofb7c7292015-04-13 15:15:58 -0700509
510 if (populationStatus == Status.STARTED) {
Saurav Das261c3002017-06-13 15:35:54 -0700511 log.warn("Previous rule population is not finished. Cannot"
Saurav Das6430f412018-01-25 09:49:01 -0800512 + " proceeed with routingRules for Topology change");
Saurav Das62ae6792017-05-15 15:34:25 -0700513 return;
sanghofb7c7292015-04-13 15:15:58 -0700514 }
515
Saurav Das261c3002017-06-13 15:35:54 -0700516 // Take snapshots of the topology
sangho28d0b6d2015-05-07 13:30:57 -0700517 updatedEcmpSpgMap = new HashMap<>();
Saurav Das261c3002017-06-13 15:35:54 -0700518 Set<EdgePair> edgePairs = new HashSet<>();
sangho28d0b6d2015-05-07 13:30:57 -0700519 for (Device sw : srManager.deviceService.getDevices()) {
Shashikanth VH0637b162015-12-11 01:32:44 +0530520 EcmpShortestPathGraph ecmpSpgUpdated =
521 new EcmpShortestPathGraph(sw.id(), srManager);
sangho28d0b6d2015-05-07 13:30:57 -0700522 updatedEcmpSpgMap.put(sw.id(), ecmpSpgUpdated);
Charles Chan6dbcd252018-04-02 11:46:38 -0700523 Optional<DeviceId> pairDev = srManager.getPairDeviceId(sw.id());
524 if (pairDev.isPresent()) {
Saurav Das261c3002017-06-13 15:35:54 -0700525 // pairDev may not be available yet, but we still need to add
Charles Chan6dbcd252018-04-02 11:46:38 -0700526 ecmpSpgUpdated = new EcmpShortestPathGraph(pairDev.get(), srManager);
527 updatedEcmpSpgMap.put(pairDev.get(), ecmpSpgUpdated);
528 edgePairs.add(new EdgePair(sw.id(), pairDev.get()));
Saurav Das261c3002017-06-13 15:35:54 -0700529 }
sangho28d0b6d2015-05-07 13:30:57 -0700530 }
531
Saurav Das6430f412018-01-25 09:49:01 -0800532 log.info("Starting to populate routing rules from Topology change");
sanghodf0153f2015-05-05 14:13:34 -0700533
sanghofb7c7292015-04-13 15:15:58 -0700534 Set<ArrayList<DeviceId>> routeChanges;
Saurav Das62ae6792017-05-15 15:34:25 -0700535 log.debug("populateRoutingRulesForLinkStatusChange: "
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700536 + "populationStatus is STARTED");
pierf331a492020-01-07 15:39:39 +0100537 log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
pierventre37dcf4c2021-09-16 18:43:06 +0200538 seenBeforeRoutes.forEach(entry -> log.debug("{} -> {}", entry.getValue(), entry.getKey()));
sanghofb7c7292015-04-13 15:15:58 -0700539 populationStatus = Status.STARTED;
Saurav Das6430f412018-01-25 09:49:01 -0800540 rulePopulator.resetCounter(); //XXX maybe useful to have a rehash ctr
541 boolean hashGroupsChanged = false;
Saurav Das1b391d52016-11-29 14:27:25 -0800542 // try optimized re-routing
Saurav Das62ae6792017-05-15 15:34:25 -0700543 if (linkDown == null) {
544 // either a linkUp or a switchDown - compute all route changes by
545 // comparing all routes of existing ECMP SPG to new ECMP SPG
Saurav Dascea556f2018-03-05 14:37:16 -0800546 routeChanges = computeRouteChange(switchDown);
Saurav Das62ae6792017-05-15 15:34:25 -0700547
pier572d4a92019-04-25 18:51:51 +0200548 // deal with linkUp
549 if (linkUp != null) {
550 // deal with linkUp of a seen-before link
551 if (seenBefore) {
552 // link previously seen before
553 // do hash-bucket changes instead of a re-route
554 processHashGroupChangeForLinkUp(routeChanges);
555 // clear out routesChanges so a re-route is not attempted
556 routeChanges = ImmutableSet.of();
557 hashGroupsChanged = true;
558 } else {
559 // do hash-bucket changes first, method will return changed routes;
560 // for each route not changed it will perform a reroute
561 Set<ArrayList<DeviceId>> changedRoutes = processHashGroupChangeForLinkUp(routeChanges);
562 Set<ArrayList<DeviceId>> routeChangesTemp = getExpandedRoutes(routeChanges);
563 changedRoutes.forEach(routeChangesTemp::remove);
564 // if routesChanges is empty a re-route is not attempted
565 routeChanges = routeChangesTemp;
566 for (ArrayList<DeviceId> route : routeChanges) {
567 log.debug("remaining routes Target -> Root");
568 if (route.size() == 1) {
569 log.debug(" : all -> {}", route.get(0));
570 } else {
571 log.debug(" : {} -> {}", route.get(0), route.get(1));
572 }
573 }
574 // Mark hash groups as changed
575 if (!changedRoutes.isEmpty()) {
576 hashGroupsChanged = true;
577 }
578 }
579
Saurav Das62ae6792017-05-15 15:34:25 -0700580 }
581
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700582 //deal with switchDown
583 if (switchDown != null) {
pier572d4a92019-04-25 18:51:51 +0200584 processHashGroupChangeForFailure(routeChanges, switchDown);
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700585 // clear out routesChanges so a re-route is not attempted
586 routeChanges = ImmutableSet.of();
Saurav Das6430f412018-01-25 09:49:01 -0800587 hashGroupsChanged = true;
Saurav Dasfe0b05e2017-08-14 16:44:43 -0700588 }
sanghofb7c7292015-04-13 15:15:58 -0700589 } else {
Saurav Das62ae6792017-05-15 15:34:25 -0700590 // link has gone down
591 // Compare existing ECMP SPG only with the link that went down
592 routeChanges = computeDamagedRoutes(linkDown);
pier572d4a92019-04-25 18:51:51 +0200593 processHashGroupChangeForFailure(routeChanges, null);
Saurav Das68e1b6a2018-06-11 17:02:31 -0700594 // clear out routesChanges so a re-route is not attempted
595 routeChanges = ImmutableSet.of();
596 hashGroupsChanged = true;
Saurav Dasb149be12016-06-07 10:08:06 -0700597 }
598
sanghofb7c7292015-04-13 15:15:58 -0700599 if (routeChanges.isEmpty()) {
Saurav Das6430f412018-01-25 09:49:01 -0800600 if (hashGroupsChanged) {
601 log.info("Hash-groups changed for link status change");
602 } else {
603 log.info("No re-route or re-hash attempted for the link"
604 + " status change");
605 updatedEcmpSpgMap.keySet().forEach(devId -> {
606 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
607 log.debug("Updating ECMPspg for remaining dev:{}", devId);
608 });
609 }
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700610 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sanghofb7c7292015-04-13 15:15:58 -0700611 populationStatus = Status.SUCCEEDED;
Saurav Das62ae6792017-05-15 15:34:25 -0700612 return;
sanghofb7c7292015-04-13 15:15:58 -0700613 }
614
pier572d4a92019-04-25 18:51:51 +0200615 if (hashGroupsChanged) {
616 log.debug("Hash-groups changed for link status change");
617 }
618
Saurav Das62ae6792017-05-15 15:34:25 -0700619 // reroute of routeChanges
Saurav Das261c3002017-06-13 15:35:54 -0700620 if (redoRouting(routeChanges, edgePairs, null)) {
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700621 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED");
sanghofb7c7292015-04-13 15:15:58 -0700622 populationStatus = Status.SUCCEEDED;
Saurav Das261c3002017-06-13 15:35:54 -0700623 log.info("Completed repopulation of rules for link-status change."
624 + " # of rules populated : {}", rulePopulator.getCounter());
Saurav Das62ae6792017-05-15 15:34:25 -0700625 return;
sanghofb7c7292015-04-13 15:15:58 -0700626 } else {
Srikanth Vavilapalli7cd16712015-05-04 09:48:09 -0700627 log.debug("populateRoutingRulesForLinkStatusChange: populationStatus is ABORTED");
sanghofb7c7292015-04-13 15:15:58 -0700628 populationStatus = Status.ABORTED;
Saurav Das261c3002017-06-13 15:35:54 -0700629 log.warn("Failed to repopulate the rules for link status change.");
Saurav Das62ae6792017-05-15 15:34:25 -0700630 return;
sanghofb7c7292015-04-13 15:15:58 -0700631 }
pierdebd15c2019-04-19 20:55:53 +0200632 } catch (Exception e) {
633 log.error("populateRoutingRulesForLinkStatusChange thrown an exception: {}",
634 e.getMessage(), e);
635 populationStatus = Status.ABORTED;
HIGUCHI Yuta16d8fd52015-09-08 16:16:31 +0900636 } finally {
637 statusLock.unlock();
sanghofb7c7292015-04-13 15:15:58 -0700638 }
639 }
640
Saurav Das62ae6792017-05-15 15:34:25 -0700641 /**
Saurav Das261c3002017-06-13 15:35:54 -0700642 * Processes a set a route-path changes by reprogramming routing rules and
643 * creating new hash-groups or editing them if necessary. This method also
644 * determines the next-hops for the route-path from the src-switch (target)
645 * of the path towards the dst-switch of the path.
Saurav Das62ae6792017-05-15 15:34:25 -0700646 *
Saurav Das261c3002017-06-13 15:35:54 -0700647 * @param routeChanges a set of route-path changes, where each route-path is
648 * a list with its first element the src-switch (target)
649 * of the path, and the second element the dst-switch of
650 * the path.
651 * @param edgePairs a set of edge-switches that are paired by configuration
652 * @param subnets a set of prefixes that need to be populated in the routing
653 * table of the target switch in the route-path. Can be null,
654 * in which case all the prefixes belonging to the dst-switch
655 * will be populated in the target switch
656 * @return true if successful in repopulating all routes
Saurav Das62ae6792017-05-15 15:34:25 -0700657 */
Saurav Das261c3002017-06-13 15:35:54 -0700658 private boolean redoRouting(Set<ArrayList<DeviceId>> routeChanges,
659 Set<EdgePair> edgePairs, Set<IpPrefix> subnets) {
660 // first make every entry two-elements
pier572d4a92019-04-25 18:51:51 +0200661 Set<ArrayList<DeviceId>> changedRoutes = getExpandedRoutes(routeChanges);
662 // no valid routes - fail fast
663 if (changedRoutes.isEmpty()) {
664 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700665 }
666
pierf331a492020-01-07 15:39:39 +0100667 // Temporary stores the changed routes
668 Set<ArrayList<DeviceId>> tempRoutes = ImmutableSet.copyOf(changedRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700669 // now process changedRoutes according to edgePairs
670 if (!redoRoutingEdgePairs(edgePairs, subnets, changedRoutes)) {
671 return false; //abort routing and fail fast
672 }
pierf331a492020-01-07 15:39:39 +0100673 // Calculate the programmed routes pointing to the pairs
674 Set<ArrayList<DeviceId>> programmedPairRoutes = Sets.difference(tempRoutes, changedRoutes);
675 log.debug("Evaluating programmed pair routes");
676 storeSeenBeforeRoutes(programmedPairRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700677
pierf331a492020-01-07 15:39:39 +0100678 // Temporary stores the left routes
679 tempRoutes = ImmutableSet.copyOf(changedRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700680 // whatever is left in changedRoutes is now processed for individual dsts.
Saurav Das6430f412018-01-25 09:49:01 -0800681 Set<DeviceId> updatedDevices = Sets.newHashSet();
682 if (!redoRoutingIndividualDests(subnets, changedRoutes,
683 updatedDevices)) {
Saurav Das261c3002017-06-13 15:35:54 -0700684 return false; //abort routing and fail fast
685 }
pierf331a492020-01-07 15:39:39 +0100686 // Calculate the individual programmed routes
687 Set<ArrayList<DeviceId>> programmedIndividualRoutes = Sets.difference(tempRoutes, changedRoutes);
688 log.debug("Evaluating individual programmed routes");
689 storeSeenBeforeRoutes(programmedIndividualRoutes);
Saurav Das261c3002017-06-13 15:35:54 -0700690
Saurav Das261c3002017-06-13 15:35:54 -0700691 // update ecmpSPG for all edge-pairs
692 for (EdgePair ep : edgePairs) {
693 currentEcmpSpgMap.put(ep.dev1, updatedEcmpSpgMap.get(ep.dev1));
694 currentEcmpSpgMap.put(ep.dev2, updatedEcmpSpgMap.get(ep.dev2));
695 log.debug("Updating ECMPspg for edge-pair:{}-{}", ep.dev1, ep.dev2);
696 }
Saurav Das6430f412018-01-25 09:49:01 -0800697
698 // here is where we update all devices not touched by this instance
699 updatedEcmpSpgMap.keySet().stream()
700 .filter(devId -> !edgePairs.stream().anyMatch(ep -> ep.includes(devId)))
701 .filter(devId -> !updatedDevices.contains(devId))
702 .forEach(devId -> {
703 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
704 log.debug("Updating ECMPspg for remaining dev:{}", devId);
705 });
Saurav Das261c3002017-06-13 15:35:54 -0700706 return true;
707 }
708
709 /**
pierf331a492020-01-07 15:39:39 +0100710 * Stores the routes seen before. Routes are two-elements arrays.
711 * @param seenRoutes seen before routes
712 */
713 private void storeSeenBeforeRoutes(Set<ArrayList<DeviceId>> seenRoutes) {
714 Set<DeviceId> nextHops;
715 for (ArrayList<DeviceId> route : seenRoutes) {
716 log.debug("Route {} -> {} has been programmed", route.get(0), route.get(1));
717 nextHops = getNextHops(route.get(0), route.get(1));
718 // No valid next hops - cannot be considered a programmed route
719 if (nextHops.isEmpty()) {
720 log.debug("Could not find next hop from target:{} --> dst {} "
721 + "skipping this route", route.get(0), route.get(1));
722 continue;
723 }
724 // Already present - do not add again
725 if (seenBeforeRoutes.containsEntry(route.get(1), route.get(0))) {
726 log.debug("Route from target:{} --> dst {} " +
727 "already present, skipping this route", route.get(0), route.get(1));
728 continue;
729 }
730 seenBeforeRoutes.put(route.get(1), route.get(0));
731 }
732 }
733
734 /**
Saurav Das261c3002017-06-13 15:35:54 -0700735 * Programs targetSw in the changedRoutes for given prefixes reachable by
736 * an edgePair. If no prefixes are given, the method will use configured
737 * subnets/prefixes. If some configured subnets belong only to a specific
738 * destination in the edgePair, then the target switch will be programmed
739 * only to that destination.
740 *
741 * @param edgePairs set of edge-pairs for which target will be programmed
742 * @param subnets a set of prefixes that need to be populated in the routing
743 * table of the target switch in the changedRoutes. Can be null,
744 * in which case all the configured prefixes belonging to the
745 * paired switches will be populated in the target switch
746 * @param changedRoutes a set of route-path changes, where each route-path is
747 * a list with its first element the src-switch (target)
748 * of the path, and the second element the dst-switch of
749 * the path.
750 * @return true if successful
751 */
piera9941192019-04-24 16:12:47 +0200752 private boolean redoRoutingEdgePairs(Set<EdgePair> edgePairs, Set<IpPrefix> subnets,
753 Set<ArrayList<DeviceId>> changedRoutes) {
Saurav Das261c3002017-06-13 15:35:54 -0700754 for (EdgePair ep : edgePairs) {
755 // temp store for a target's changedRoutes to this edge-pair
756 Map<DeviceId, Set<ArrayList<DeviceId>>> targetRoutes = new HashMap<>();
757 Iterator<ArrayList<DeviceId>> i = changedRoutes.iterator();
758 while (i.hasNext()) {
759 ArrayList<DeviceId> route = i.next();
760 DeviceId dstSw = route.get(1);
761 if (ep.includes(dstSw)) {
762 // routeChange for edge pair found
763 // sort by target iff target is edge and remove from changedRoutes
764 DeviceId targetSw = route.get(0);
765 try {
766 if (!srManager.deviceConfiguration.isEdgeDevice(targetSw)) {
767 continue;
768 }
769 } catch (DeviceConfigNotFoundException e) {
770 log.warn(e.getMessage() + "aborting redoRouting");
771 return false;
772 }
773 // route is from another edge to this edge-pair
774 if (targetRoutes.containsKey(targetSw)) {
775 targetRoutes.get(targetSw).add(route);
776 } else {
777 Set<ArrayList<DeviceId>> temp = new HashSet<>();
778 temp.add(route);
779 targetRoutes.put(targetSw, temp);
780 }
781 i.remove();
782 }
783 }
784 // so now for this edgepair we have a per target set of routechanges
785 // process target->edgePair route
piera9941192019-04-24 16:12:47 +0200786 List<Future<Boolean>> futures = Lists.newArrayList();
pierf331a492020-01-07 15:39:39 +0100787 for (Entry<DeviceId, Set<ArrayList<DeviceId>>> entry :
Saurav Das261c3002017-06-13 15:35:54 -0700788 targetRoutes.entrySet()) {
789 log.debug("* redoRoutingDstPair Target:{} -> edge-pair {}",
790 entry.getKey(), ep);
piera9941192019-04-24 16:12:47 +0200791 futures.add(routePopulators.submit(new RedoRoutingEdgePair(entry.getKey(), entry.getValue(),
792 subnets, ep)));
793 }
794 if (!checkJobs(futures)) {
795 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700796 }
797 // if it gets here it has succeeded for all targets to this edge-pair
798 }
799 return true;
800 }
801
piera9941192019-04-24 16:12:47 +0200802 private final class RedoRoutingEdgePair implements PickyCallable<Boolean> {
803 private DeviceId targetSw;
804 private Set<ArrayList<DeviceId>> routes;
805 private Set<IpPrefix> subnets;
806 private EdgePair ep;
807
808 /**
809 * Builds a RedoRoutingEdgePair task which provides a result.
810 *
811 * @param targetSw the target switch
812 * @param routes the changed routes
813 * @param subnets the subnets
814 * @param ep the edge pair
815 */
816 RedoRoutingEdgePair(DeviceId targetSw, Set<ArrayList<DeviceId>> routes,
817 Set<IpPrefix> subnets, EdgePair ep) {
818 this.targetSw = targetSw;
819 this.routes = routes;
820 this.subnets = subnets;
821 this.ep = ep;
822 }
823
824 @Override
825 public Boolean call() throws Exception {
826 return redoRoutingEdgePair();
827 }
828
829 @Override
830 public int hint() {
831 return targetSw.hashCode();
832 }
833
834 private boolean redoRoutingEdgePair() {
835 Map<DeviceId, Set<DeviceId>> perDstNextHops = new HashMap<>();
836 routes.forEach(route -> {
837 Set<DeviceId> nhops = getNextHops(route.get(0), route.get(1));
838 log.debug("route: target {} -> dst {} found with next-hops {}",
839 route.get(0), route.get(1), nhops);
840 perDstNextHops.put(route.get(1), nhops);
841 });
842
843 List<Set<IpPrefix>> batchedSubnetDev1, batchedSubnetDev2;
844 if (subnets != null) {
845 batchedSubnetDev1 = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
846 batchedSubnetDev2 = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
847 } else {
848 batchedSubnetDev1 = config.getBatchedSubnets(ep.dev1);
849 batchedSubnetDev2 = config.getBatchedSubnets(ep.dev2);
850 }
851 List<Set<IpPrefix>> batchedSubnetBoth = Streams
852 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.intersection(a, b))
853 .filter(set -> !set.isEmpty())
854 .collect(Collectors.toList());
855 List<Set<IpPrefix>> batchedSubnetDev1Only = Streams
856 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.difference(a, b))
857 .filter(set -> !set.isEmpty())
858 .collect(Collectors.toList());
859 List<Set<IpPrefix>> batchedSubnetDev2Only = Streams
860 .zip(batchedSubnetDev1.stream(), batchedSubnetDev2.stream(), (a, b) -> Sets.difference(b, a))
861 .filter(set -> !set.isEmpty())
862 .collect(Collectors.toList());
863
864 Set<DeviceId> nhDev1 = perDstNextHops.get(ep.dev1);
865 Set<DeviceId> nhDev2 = perDstNextHops.get(ep.dev2);
866
867 // handle routing to subnets common to edge-pair
868 // only if the targetSw is not part of the edge-pair and there
869 // exists a next hop to at least one of the devices in the edge-pair
870 if (!ep.includes(targetSw)
871 && ((nhDev1 != null && !nhDev1.isEmpty()) || (nhDev2 != null && !nhDev2.isEmpty()))) {
872 log.trace("getSubnets on both {} and {}: {}", ep.dev1, ep.dev2, batchedSubnetBoth);
873 for (Set<IpPrefix> prefixes : batchedSubnetBoth) {
874 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev1, ep.dev2,
875 perDstNextHops, prefixes)) {
876 return false; // abort everything and fail fast
877 }
878 }
879
880 }
881 // handle routing to subnets that only belong to dev1 only if
882 // a next-hop exists from the target to dev1
883 if (!batchedSubnetDev1Only.isEmpty() &&
884 batchedSubnetDev1Only.stream().anyMatch(subnet -> !subnet.isEmpty()) &&
885 nhDev1 != null && !nhDev1.isEmpty()) {
886 Map<DeviceId, Set<DeviceId>> onlyDev1NextHops = new HashMap<>();
887 onlyDev1NextHops.put(ep.dev1, nhDev1);
888 log.trace("getSubnets on {} only: {}", ep.dev1, batchedSubnetDev1Only);
889 for (Set<IpPrefix> prefixes : batchedSubnetDev1Only) {
890 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev1, null,
891 onlyDev1NextHops, prefixes)) {
892 return false; // abort everything and fail fast
893 }
894 }
895 }
896 // handle routing to subnets that only belong to dev2 only if
897 // a next-hop exists from the target to dev2
898 if (!batchedSubnetDev2Only.isEmpty() &&
899 batchedSubnetDev2Only.stream().anyMatch(subnet -> !subnet.isEmpty()) &&
900 nhDev2 != null && !nhDev2.isEmpty()) {
901 Map<DeviceId, Set<DeviceId>> onlyDev2NextHops = new HashMap<>();
902 onlyDev2NextHops.put(ep.dev2, nhDev2);
903 log.trace("getSubnets on {} only: {}", ep.dev2, batchedSubnetDev2Only);
904 for (Set<IpPrefix> prefixes : batchedSubnetDev2Only) {
905 if (!populateEcmpRoutingRulePartial(targetSw, ep.dev2, null,
906 onlyDev2NextHops, prefixes)) {
907 return false; // abort everything and fail fast
908 }
909 }
910 }
911 return true;
912 }
913 }
914
Saurav Das261c3002017-06-13 15:35:54 -0700915 /**
916 * Programs targetSw in the changedRoutes for given prefixes reachable by
917 * a destination switch that is not part of an edge-pair.
918 * If no prefixes are given, the method will use configured subnets/prefixes.
919 *
920 * @param subnets a set of prefixes that need to be populated in the routing
921 * table of the target switch in the changedRoutes. Can be null,
922 * in which case all the configured prefixes belonging to the
923 * paired switches will be populated in the target switch
924 * @param changedRoutes a set of route-path changes, where each route-path is
925 * a list with its first element the src-switch (target)
926 * of the path, and the second element the dst-switch of
927 * the path.
928 * @return true if successful
929 */
piera9941192019-04-24 16:12:47 +0200930 private boolean redoRoutingIndividualDests(Set<IpPrefix> subnets, Set<ArrayList<DeviceId>> changedRoutes,
Saurav Das6430f412018-01-25 09:49:01 -0800931 Set<DeviceId> updatedDevices) {
Saurav Das261c3002017-06-13 15:35:54 -0700932 // aggregate route-path changes for each dst device
933 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> routesBydevice =
934 new HashMap<>();
935 for (ArrayList<DeviceId> route: changedRoutes) {
936 DeviceId dstSw = route.get(1);
937 ArrayList<ArrayList<DeviceId>> deviceRoutes =
938 routesBydevice.get(dstSw);
939 if (deviceRoutes == null) {
940 deviceRoutes = new ArrayList<>();
941 routesBydevice.put(dstSw, deviceRoutes);
942 }
943 deviceRoutes.add(route);
944 }
piera9941192019-04-24 16:12:47 +0200945 // iterate over the impacted devices
Saurav Das261c3002017-06-13 15:35:54 -0700946 for (DeviceId impactedDstDevice : routesBydevice.keySet()) {
947 ArrayList<ArrayList<DeviceId>> deviceRoutes =
948 routesBydevice.get(impactedDstDevice);
piera9941192019-04-24 16:12:47 +0200949 List<Future<Boolean>> futures = Lists.newArrayList();
Saurav Das261c3002017-06-13 15:35:54 -0700950 for (ArrayList<DeviceId> route: deviceRoutes) {
951 log.debug("* redoRoutingIndiDst Target: {} -> dst: {}",
952 route.get(0), route.get(1));
piera9941192019-04-24 16:12:47 +0200953 futures.add(routePopulators.submit(new RedoRoutingIndividualDest(subnets, route)));
pierf331a492020-01-07 15:39:39 +0100954 changedRoutes.remove(route);
piera9941192019-04-24 16:12:47 +0200955 }
956 // check the execution of each job
957 if (!checkJobs(futures)) {
958 return false;
Saurav Das261c3002017-06-13 15:35:54 -0700959 }
960 //Only if all the flows for all impacted routes to a
961 //specific target are pushed successfully, update the
962 //ECMP graph for that target. Or else the next event
963 //would not see any changes in the ECMP graphs.
964 //In another case, the target switch has gone away, so
965 //routes can't be installed. In that case, the current map
966 //is updated here, without any flows being pushed.
967 currentEcmpSpgMap.put(impactedDstDevice,
968 updatedEcmpSpgMap.get(impactedDstDevice));
Saurav Das6430f412018-01-25 09:49:01 -0800969 updatedDevices.add(impactedDstDevice);
Saurav Das261c3002017-06-13 15:35:54 -0700970 log.debug("Updating ECMPspg for impacted dev:{}", impactedDstDevice);
971 }
972 return true;
973 }
974
piera9941192019-04-24 16:12:47 +0200975 private final class RedoRoutingIndividualDest implements PickyCallable<Boolean> {
976 private DeviceId targetSw;
977 private ArrayList<DeviceId> route;
978 private Set<IpPrefix> subnets;
979
980 /**
981 * Builds a RedoRoutingIndividualDest task, which provides a result.
982 *
983 * @param subnets a set of prefixes
984 * @param route a route-path change
985 */
986 RedoRoutingIndividualDest(Set<IpPrefix> subnets, ArrayList<DeviceId> route) {
987 this.targetSw = route.get(0);
988 this.route = route;
989 this.subnets = subnets;
990 }
991
992 @Override
993 public Boolean call() throws Exception {
994 DeviceId dstSw = route.get(1); // same as impactedDstDevice
995 Set<DeviceId> nextHops = getNextHops(targetSw, dstSw);
996 if (nextHops.isEmpty()) {
997 log.debug("Could not find next hop from target:{} --> dst {} "
998 + "skipping this route", targetSw, dstSw);
999 return true;
1000 }
1001 Map<DeviceId, Set<DeviceId>> nhops = new HashMap<>();
1002 nhops.put(dstSw, nextHops);
1003 if (!populateEcmpRoutingRulePartial(targetSw, dstSw, null, nhops,
1004 (subnets == null) ? Sets.newHashSet() : subnets)) {
1005 return false; // abort routing and fail fast
1006 }
1007 log.debug("Populating flow rules from target: {} to dst: {}"
1008 + " is successful", targetSw, dstSw);
1009 return true;
1010 }
1011
1012 @Override
1013 public int hint() {
1014 return targetSw.hashCode();
1015 }
1016 }
1017
Saurav Das261c3002017-06-13 15:35:54 -07001018 /**
1019 * Populate ECMP rules for subnets from target to destination via nexthops.
1020 *
1021 * @param targetSw Device ID of target switch in which rules will be programmed
1022 * @param destSw1 Device ID of final destination switch to which the rules will forward
1023 * @param destSw2 Device ID of paired destination switch to which the rules will forward
1024 * A null deviceId indicates packets should only be sent to destSw1
Saurav Das97241862018-02-14 14:14:54 -08001025 * @param nextHops Map of a set of next hops per destSw
Saurav Das261c3002017-06-13 15:35:54 -07001026 * @param subnets Subnets to be populated. If empty, populate all configured subnets.
1027 * @return true if it succeeds in populating rules
1028 */ // refactor
piera9941192019-04-24 16:12:47 +02001029 private boolean populateEcmpRoutingRulePartial(DeviceId targetSw, DeviceId destSw1, DeviceId destSw2,
1030 Map<DeviceId, Set<DeviceId>> nextHops, Set<IpPrefix> subnets) {
Saurav Das261c3002017-06-13 15:35:54 -07001031 boolean result;
1032 // If both target switch and dest switch are edge routers, then set IP
1033 // rule for both subnet and router IP.
1034 boolean targetIsEdge;
1035 boolean dest1IsEdge;
1036 Ip4Address dest1RouterIpv4, dest2RouterIpv4 = null;
1037 Ip6Address dest1RouterIpv6, dest2RouterIpv6 = null;
1038
1039 try {
1040 targetIsEdge = config.isEdgeDevice(targetSw);
1041 dest1IsEdge = config.isEdgeDevice(destSw1);
1042 dest1RouterIpv4 = config.getRouterIpv4(destSw1);
1043 dest1RouterIpv6 = config.getRouterIpv6(destSw1);
1044 if (destSw2 != null) {
1045 dest2RouterIpv4 = config.getRouterIpv4(destSw2);
1046 dest2RouterIpv6 = config.getRouterIpv6(destSw2);
1047 }
1048 } catch (DeviceConfigNotFoundException e) {
1049 log.warn(e.getMessage() + " Aborting populateEcmpRoutingRulePartial.");
Saurav Das62ae6792017-05-15 15:34:25 -07001050 return false;
1051 }
Saurav Das261c3002017-06-13 15:35:54 -07001052
1053 if (targetIsEdge && dest1IsEdge) {
Charles Chan19b70032019-04-17 14:20:26 -07001054 List<Set<IpPrefix>> batchedSubnets;
1055 if (subnets != null && !subnets.isEmpty()) {
1056 batchedSubnets = Lists.<Set<IpPrefix>>newArrayList(Sets.newHashSet(subnets));
1057 } else {
1058 batchedSubnets = config.getBatchedSubnets(destSw1);
1059 }
Saurav Das97241862018-02-14 14:14:54 -08001060 // XXX - Rethink this - ignoring routerIPs in all other switches
1061 // even edge to edge switches
Saurav Das261c3002017-06-13 15:35:54 -07001062 /*subnets.add(dest1RouterIpv4.toIpPrefix());
1063 if (dest1RouterIpv6 != null) {
1064 subnets.add(dest1RouterIpv6.toIpPrefix());
1065 }
1066 if (destSw2 != null && dest2RouterIpv4 != null) {
1067 subnets.add(dest2RouterIpv4.toIpPrefix());
1068 if (dest2RouterIpv6 != null) {
1069 subnets.add(dest2RouterIpv6.toIpPrefix());
1070 }
1071 }*/
Charles Chan19b70032019-04-17 14:20:26 -07001072 log.trace("getSubnets on {}: {}", destSw1, batchedSubnets);
1073 for (Set<IpPrefix> prefixes : batchedSubnets) {
1074 log.debug(". populateEcmpRoutingRulePartial in device {} towards {} {} "
1075 + "for subnets {}", targetSw, destSw1,
1076 (destSw2 != null) ? ("& " + destSw2) : "",
1077 prefixes);
1078 if (!rulePopulator.populateIpRuleForSubnet(targetSw, prefixes, destSw1, destSw2, nextHops)) {
1079 return false;
1080 }
Saurav Das261c3002017-06-13 15:35:54 -07001081 }
Saurav Das62ae6792017-05-15 15:34:25 -07001082 }
Saurav Das261c3002017-06-13 15:35:54 -07001083
1084 if (!targetIsEdge && dest1IsEdge) {
1085 // MPLS rules in all non-edge target devices. These rules are for
1086 // individual destinations, even if the dsts are part of edge-pairs.
1087 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
1088 + "all MPLS rules", targetSw, destSw1);
piera9941192019-04-24 16:12:47 +02001089 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1), dest1RouterIpv4);
Saurav Das261c3002017-06-13 15:35:54 -07001090 if (!result) {
1091 return false;
1092 }
1093 if (dest1RouterIpv6 != null) {
Saurav Das97241862018-02-14 14:14:54 -08001094 int v4sid = 0, v6sid = 0;
1095 try {
1096 v4sid = config.getIPv4SegmentId(destSw1);
1097 v6sid = config.getIPv6SegmentId(destSw1);
1098 } catch (DeviceConfigNotFoundException e) {
1099 log.warn(e.getMessage());
1100 }
1101 if (v4sid != v6sid) {
piera9941192019-04-24 16:12:47 +02001102 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1),
Saurav Das97241862018-02-14 14:14:54 -08001103 dest1RouterIpv6);
1104 if (!result) {
1105 return false;
1106 }
Saurav Das261c3002017-06-13 15:35:54 -07001107 }
1108 }
1109 }
1110
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -07001111 if (!targetIsEdge && !dest1IsEdge) {
1112 // MPLS rules for inter-connected spines
1113 // can be merged with above if, left it here for clarity
1114 log.debug(". populateEcmpRoutingRulePartial in device{} towards {} for "
1115 + "all MPLS rules", targetSw, destSw1);
1116
piera9941192019-04-24 16:12:47 +02001117 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1), dest1RouterIpv4);
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -07001118 if (!result) {
1119 return false;
1120 }
1121
1122 if (dest1RouterIpv6 != null) {
1123 int v4sid = 0, v6sid = 0;
1124 try {
1125 v4sid = config.getIPv4SegmentId(destSw1);
1126 v6sid = config.getIPv6SegmentId(destSw1);
1127 } catch (DeviceConfigNotFoundException e) {
1128 log.warn(e.getMessage());
1129 }
1130 if (v4sid != v6sid) {
piera9941192019-04-24 16:12:47 +02001131 result = rulePopulator.populateMplsRule(targetSw, destSw1, nextHops.get(destSw1),
Andreas Pantelopoulosfc4bc2a2018-03-12 16:30:20 -07001132 dest1RouterIpv6);
1133 if (!result) {
1134 return false;
1135 }
1136 }
1137 }
1138 }
1139
Saurav Das261c3002017-06-13 15:35:54 -07001140 // To save on ECMP groups
1141 // avoid MPLS rules in non-edge-devices to non-edge-devices
1142 // avoid MPLS transit rules in edge-devices
1143 // avoid loopback IP rules in edge-devices to non-edge-devices
1144 return true;
Saurav Das62ae6792017-05-15 15:34:25 -07001145 }
1146
1147 /**
pier572d4a92019-04-25 18:51:51 +02001148 * Processes a set a route-path changes due to a switch/link failure by editing hash groups.
Saurav Das62ae6792017-05-15 15:34:25 -07001149 *
1150 * @param routeChanges a set of route-path changes, where each route-path is
1151 * a list with its first element the src-switch of the path
1152 * and the second element the dst-switch of the path.
Saurav Das62ae6792017-05-15 15:34:25 -07001153 * @param failedSwitch the switchId if the route changes are for a failed switch,
1154 * otherwise null
1155 */
pier572d4a92019-04-25 18:51:51 +02001156 private void processHashGroupChangeForFailure(Set<ArrayList<DeviceId>> routeChanges,
1157 DeviceId failedSwitch) {
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001158 // first, ensure each routeChanges entry has two elements
pier572d4a92019-04-25 18:51:51 +02001159 Set<ArrayList<DeviceId>> changedRoutes = getAllExpandedRoutes(routeChanges);
Saurav Das6430f412018-01-25 09:49:01 -08001160 boolean someFailed = false;
pier572d4a92019-04-25 18:51:51 +02001161 boolean success;
Saurav Das6430f412018-01-25 09:49:01 -08001162 Set<DeviceId> updatedDevices = Sets.newHashSet();
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001163 for (ArrayList<DeviceId> route : changedRoutes) {
1164 DeviceId targetSw = route.get(0);
1165 DeviceId dstSw = route.get(1);
pier572d4a92019-04-25 18:51:51 +02001166 success = fixHashGroupsForRoute(route, true);
1167 // it's possible that we cannot fix hash groups for a route
1168 // if the target switch has failed. Nevertheless the ecmp graph
1169 // for the impacted switch must still be updated.
1170 if (!success && failedSwitch != null && targetSw.equals(failedSwitch)) {
1171 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1172 currentEcmpSpgMap.remove(targetSw);
1173 log.debug("Updating ECMPspg for dst:{} removing failed switch "
1174 + "target:{}", dstSw, targetSw);
1175 updatedDevices.add(targetSw);
1176 updatedDevices.add(dstSw);
1177 continue;
pierf331a492020-01-07 15:39:39 +01001178
pier572d4a92019-04-25 18:51:51 +02001179 }
1180 //linkfailed - update both sides
1181 if (success) {
1182 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1183 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1184 log.debug("Updating ECMPspg for dst:{} and target:{} for linkdown"
1185 + " or switchdown", dstSw, targetSw);
1186 updatedDevices.add(targetSw);
1187 updatedDevices.add(dstSw);
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001188 } else {
pier572d4a92019-04-25 18:51:51 +02001189 someFailed = true;
Saurav Das62ae6792017-05-15 15:34:25 -07001190 }
1191 }
Saurav Das6430f412018-01-25 09:49:01 -08001192 if (!someFailed) {
1193 // here is where we update all devices not touched by this instance
1194 updatedEcmpSpgMap.keySet().stream()
1195 .filter(devId -> !updatedDevices.contains(devId))
1196 .forEach(devId -> {
1197 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1198 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1199 });
1200 }
Saurav Das62ae6792017-05-15 15:34:25 -07001201 }
1202
1203 /**
pier572d4a92019-04-25 18:51:51 +02001204 * Processes a set a route-path changes due to link up by editing hash groups.
1205 *
1206 * @param routeChanges a set of route-path changes, where each route-path is
1207 * a list with its first element the src-switch of the path
1208 * and the second element the dst-switch of the path.
1209 * @return set of changed routes
1210 */
1211 private Set<ArrayList<DeviceId>> processHashGroupChangeForLinkUp(Set<ArrayList<DeviceId>> routeChanges) {
1212 // Stores changed routes
1213 Set<ArrayList<DeviceId>> doneRoutes = new HashSet<>();
1214 // first, ensure each routeChanges entry has two elements
1215 Set<ArrayList<DeviceId>> changedRoutes = getAllExpandedRoutes(routeChanges);
1216 boolean someFailed = false;
1217 boolean success;
1218 Set<DeviceId> updatedDevices = Sets.newHashSet();
1219 for (ArrayList<DeviceId> route : changedRoutes) {
1220 DeviceId targetSw = route.get(0);
1221 DeviceId dstSw = route.get(1);
1222 // linkup - fix (if possible)
1223 success = fixHashGroupsForRoute(route, false);
1224 if (success) {
1225 currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
1226 currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
1227 log.debug("Updating ECMPspg for target:{} and dst:{} for linkup",
1228 targetSw, dstSw);
1229 updatedDevices.add(targetSw);
1230 updatedDevices.add(dstSw);
1231 doneRoutes.add(route);
1232 } else {
1233 someFailed = true;
1234 }
1235
1236 }
1237 if (!someFailed) {
1238 // here is where we update all devices not touched by this instance
1239 updatedEcmpSpgMap.keySet().stream()
1240 .filter(devId -> !updatedDevices.contains(devId))
1241 .forEach(devId -> {
1242 currentEcmpSpgMap.put(devId, updatedEcmpSpgMap.get(devId));
1243 log.debug("Updating ECMPspg for remaining dev:{}", devId);
1244 });
1245 }
1246 return doneRoutes;
1247 }
1248
1249 /**
Saurav Das62ae6792017-05-15 15:34:25 -07001250 * Edits hash groups in the src-switch (targetSw) of a route-path by
1251 * calling the groupHandler to either add or remove buckets in an existing
1252 * hash group.
1253 *
1254 * @param route a single list representing a route-path where the first element
1255 * is the src-switch (targetSw) of the route-path and the
1256 * second element is the dst-switch
1257 * @param revoke true if buckets in the hash-groups need to be removed;
1258 * false if buckets in the hash-groups need to be added
1259 * @return true if the hash group editing is successful
1260 */
1261 private boolean fixHashGroupsForRoute(ArrayList<DeviceId> route,
1262 boolean revoke) {
1263 DeviceId targetSw = route.get(0);
1264 if (route.size() < 2) {
1265 log.warn("Cannot fixHashGroupsForRoute - no dstSw in route {}", route);
1266 return false;
1267 }
1268 DeviceId destSw = route.get(1);
pierf331a492020-01-07 15:39:39 +01001269 if (!seenBeforeRoutes.containsEntry(destSw, targetSw)) {
1270 log.warn("Cannot fixHashGroupsForRoute {} -> {} has not been programmed before",
1271 targetSw, destSw);
1272 return false;
1273 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001274 log.debug("* processing fixHashGroupsForRoute: Target {} -> Dest {}",
Saurav Das62ae6792017-05-15 15:34:25 -07001275 targetSw, destSw);
Saurav Das62ae6792017-05-15 15:34:25 -07001276 // figure out the new next hops at the targetSw towards the destSw
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001277 Set<DeviceId> nextHops = getNextHops(targetSw, destSw);
Saurav Das62ae6792017-05-15 15:34:25 -07001278 // call group handler to change hash group at targetSw
1279 DefaultGroupHandler grpHandler = srManager.getGroupHandler(targetSw);
1280 if (grpHandler == null) {
1281 log.warn("Cannot find grouphandler for dev:{} .. aborting"
1282 + " {} hash group buckets for route:{} ", targetSw,
1283 (revoke) ? "revoke" : "repopulate", route);
1284 return false;
1285 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001286 log.debug("{} hash-groups buckets For Route {} -> {} to new next-hops {}",
Saurav Das62ae6792017-05-15 15:34:25 -07001287 (revoke) ? "revoke" : "repopulating",
1288 targetSw, destSw, nextHops);
1289 return (revoke) ? grpHandler.fixHashGroups(targetSw, nextHops,
1290 destSw, true)
1291 : grpHandler.fixHashGroups(targetSw, nextHops,
1292 destSw, false);
1293 }
1294
1295 /**
Saurav Das261c3002017-06-13 15:35:54 -07001296 * Start the flow rule population process if it was never started. The
1297 * process finishes successfully when all flow rules are set and stops with
1298 * ABORTED status when any groups required for flows is not set yet.
Saurav Das62ae6792017-05-15 15:34:25 -07001299 */
Saurav Das261c3002017-06-13 15:35:54 -07001300 public void startPopulationProcess() {
1301 statusLock.lock();
1302 try {
1303 if (populationStatus == Status.IDLE
1304 || populationStatus == Status.SUCCEEDED
1305 || populationStatus == Status.ABORTED) {
1306 populateAllRoutingRules();
sangho28d0b6d2015-05-07 13:30:57 -07001307 } else {
Saurav Das261c3002017-06-13 15:35:54 -07001308 log.warn("Not initiating startPopulationProcess as populationStatus is {}",
1309 populationStatus);
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001310 }
Saurav Das261c3002017-06-13 15:35:54 -07001311 } finally {
1312 statusLock.unlock();
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001313 }
sanghofb7c7292015-04-13 15:15:58 -07001314 }
1315
Saurav Dasb149be12016-06-07 10:08:06 -07001316 /**
pierventre0dcbf0e2021-10-11 13:07:09 +02001317 * Revoke rules of given subnet in all edge switches. Use the
1318 * destination switch (if it is provided) to provide coordination
1319 * among the instances. Otherwise, only the leader of the target
1320 * switch can remove this subnet.
Saurav Das261c3002017-06-13 15:35:54 -07001321 *
1322 * @param subnets subnet being removed
pierventre0dcbf0e2021-10-11 13:07:09 +02001323 * @param destSw destination switch. It is null when it is called from RouteHandler,
1324 * in this context we don't have a way to remember the old locations.
Saurav Das261c3002017-06-13 15:35:54 -07001325 * @return true if succeed
1326 */
pierventre0dcbf0e2021-10-11 13:07:09 +02001327 protected boolean revokeSubnet(Set<IpPrefix> subnets, DeviceId destSw) {
piera9941192019-04-24 16:12:47 +02001328 DeviceId targetSw;
1329 List<Future<Boolean>> futures = Lists.newArrayList();
1330 for (Device sw : srManager.deviceService.getAvailableDevices()) {
1331 targetSw = sw.id();
pierventre0dcbf0e2021-10-11 13:07:09 +02001332 // In some calls, we dont know anymore the destination switch
1333 if ((destSw != null && shouldProgram(destSw)) || shouldProgram(targetSw)) {
piera9941192019-04-24 16:12:47 +02001334 futures.add(routePopulators.submit(new RevokeSubnet(targetSw, subnets)));
1335 } else {
1336 futures.add(CompletableFuture.completedFuture(true));
1337 }
1338 }
1339 // check the execution of each job
1340 return checkJobs(futures);
1341 }
1342
Shibu Vijayakumar5e26f8c2020-01-07 11:45:09 +00001343 /**
1344 * Revoke rules of given subnets in the given switches.
1345 *
1346 * @param targetSwitches switched from which subnets to be removed
1347 * @param subnets subnet bring removed
1348 * @return true if succeed
1349 */
1350 protected boolean revokeSubnet(Set<DeviceId> targetSwitches, Set<IpPrefix> subnets) {
1351 List<Future<Boolean>> futures = Lists.newArrayList();
1352 for (DeviceId targetSw : targetSwitches) {
1353 if (shouldProgram(targetSw)) {
1354 futures.add(routePopulators.submit(new RevokeSubnet(targetSw, subnets)));
1355 } else {
1356 futures.add(CompletableFuture.completedFuture(true));
1357 }
1358 }
1359 // check the execution of each job
1360 return checkJobs(futures);
1361 }
1362
piera9941192019-04-24 16:12:47 +02001363 private final class RevokeSubnet implements PickyCallable<Boolean> {
1364 private DeviceId targetSw;
1365 private Set<IpPrefix> subnets;
1366
1367 /**
1368 * Builds a RevokeSubnet task, which provides a result.
1369 *
1370 * @param subnets a set of prefixes
1371 * @param targetSw target switch
1372 */
1373 RevokeSubnet(DeviceId targetSw, Set<IpPrefix> subnets) {
1374 this.targetSw = targetSw;
1375 this.subnets = subnets;
1376 }
1377
1378 @Override
1379 public Boolean call() throws Exception {
1380 return srManager.routingRulePopulator.revokeIpRuleForSubnet(targetSw, subnets);
1381 }
1382
1383 @Override
1384 public int hint() {
1385 return targetSw.hashCode();
Saurav Das261c3002017-06-13 15:35:54 -07001386 }
1387 }
1388
1389 /**
Charles Chan910be6a2017-08-23 14:46:43 -07001390 * Populates IP rules for a route that has direct connection to the switch
pierventre37dcf4c2021-09-16 18:43:06 +02001391 * if the current instance is leading the programming of the switch.
Charles Chan910be6a2017-08-23 14:46:43 -07001392 *
1393 * @param deviceId device ID of the device that next hop attaches to
1394 * @param prefix IP prefix of the route
1395 * @param hostMac MAC address of the next hop
1396 * @param hostVlanId Vlan ID of the nexthop
1397 * @param outPort port where the next hop attaches to
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001398 * @param directHost host is of type direct or indirect
Charles Chan12a8a842020-02-14 13:23:57 -08001399 * @return future that includes the flow objective if succeeded, null if otherwise
Charles Chan910be6a2017-08-23 14:46:43 -07001400 */
Charles Chan12a8a842020-02-14 13:23:57 -08001401 CompletableFuture<Objective> populateRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac,
1402 VlanId hostVlanId, PortNumber outPort, boolean directHost) {
Charles Chand66d6712018-03-29 16:03:41 -07001403 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001404 return srManager.routingRulePopulator.populateRoute(deviceId, prefix,
1405 hostMac, hostVlanId, outPort, directHost);
Charles Chan910be6a2017-08-23 14:46:43 -07001406 }
Charles Chan12a8a842020-02-14 13:23:57 -08001407 return CompletableFuture.completedFuture(null);
Charles Chan910be6a2017-08-23 14:46:43 -07001408 }
1409
1410 /**
1411 * Removes IP rules for a route when the next hop is gone.
pierventre37dcf4c2021-09-16 18:43:06 +02001412 * if the current instance is leading the programming of the switch.
Charles Chan910be6a2017-08-23 14:46:43 -07001413 *
1414 * @param deviceId device ID of the device that next hop attaches to
1415 * @param prefix IP prefix of the route
1416 * @param hostMac MAC address of the next hop
1417 * @param hostVlanId Vlan ID of the nexthop
1418 * @param outPort port that next hop attaches to
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001419 * @param directHost host is of type direct or indirect
Charles Chan12a8a842020-02-14 13:23:57 -08001420 * @return future that carries the flow objective if succeeded, null if otherwise
Charles Chan910be6a2017-08-23 14:46:43 -07001421 */
Charles Chan12a8a842020-02-14 13:23:57 -08001422 CompletableFuture<Objective> revokeRoute(DeviceId deviceId, IpPrefix prefix,
Ruchi Sahota71bcb4e2019-01-28 01:08:18 +00001423 MacAddress hostMac, VlanId hostVlanId, PortNumber outPort, boolean directHost) {
Charles Chand66d6712018-03-29 16:03:41 -07001424 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001425 return srManager.routingRulePopulator.revokeRoute(deviceId, prefix, hostMac, hostVlanId,
1426 outPort, directHost);
Charles Chan910be6a2017-08-23 14:46:43 -07001427 }
Charles Chan12a8a842020-02-14 13:23:57 -08001428 return CompletableFuture.completedFuture(null);
Charles Chan910be6a2017-08-23 14:46:43 -07001429 }
1430
Charles Chan12a8a842020-02-14 13:23:57 -08001431 CompletableFuture<Objective> populateBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
Charles Chand66d6712018-03-29 16:03:41 -07001432 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001433 return srManager.routingRulePopulator.populateBridging(deviceId, port, mac, vlanId);
Charles Chand66d6712018-03-29 16:03:41 -07001434 }
Charles Chan12a8a842020-02-14 13:23:57 -08001435 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001436 }
1437
Charles Chan12a8a842020-02-14 13:23:57 -08001438 CompletableFuture<Objective> revokeBridging(DeviceId deviceId, PortNumber port, MacAddress mac, VlanId vlanId) {
Charles Chand66d6712018-03-29 16:03:41 -07001439 if (shouldProgram(deviceId)) {
Charles Chan12a8a842020-02-14 13:23:57 -08001440 return srManager.routingRulePopulator.revokeBridging(deviceId, port, mac, vlanId);
Charles Chand66d6712018-03-29 16:03:41 -07001441 }
Charles Chan12a8a842020-02-14 13:23:57 -08001442 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001443 }
1444
pierventrea3989be2021-01-08 16:43:17 +01001445 CompletableFuture<Objective> updateBridging(DeviceId deviceId, PortNumber portNum, MacAddress hostMac,
1446 VlanId vlanId, boolean popVlan, boolean install) {
Charles Chand66d6712018-03-29 16:03:41 -07001447 if (shouldProgram(deviceId)) {
pierventrea3989be2021-01-08 16:43:17 +01001448 return srManager.routingRulePopulator.updateBridging(deviceId, portNum, hostMac, vlanId, popVlan, install);
Charles Chand66d6712018-03-29 16:03:41 -07001449 }
pierventrea3989be2021-01-08 16:43:17 +01001450 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001451 }
1452
pierventrea3989be2021-01-08 16:43:17 +01001453 CompletableFuture<Objective> updateFwdObj(DeviceId deviceId, PortNumber portNumber, IpPrefix prefix,
1454 MacAddress hostMac, VlanId vlanId, boolean popVlan, boolean install) {
Charles Chand66d6712018-03-29 16:03:41 -07001455 if (shouldProgram(deviceId)) {
pierventrea3989be2021-01-08 16:43:17 +01001456 return srManager.routingRulePopulator.updateFwdObj(deviceId, portNumber, prefix, hostMac,
Charles Chand66d6712018-03-29 16:03:41 -07001457 vlanId, popVlan, install);
1458 }
pierventrea3989be2021-01-08 16:43:17 +01001459 return CompletableFuture.completedFuture(null);
Charles Chand66d6712018-03-29 16:03:41 -07001460 }
1461
Charles Chan910be6a2017-08-23 14:46:43 -07001462 /**
pierventre37dcf4c2021-09-16 18:43:06 +02001463 * Program IP rules for a route when the next hop is double-tagged.
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001464 *
1465 * @param deviceId device ID that next hop attaches to
1466 * @param prefix IP prefix of the route
1467 * @param hostMac MAC address of the next hop
1468 * @param innerVlan Inner Vlan ID of the next hop
1469 * @param outerVlan Outer Vlan ID of the next hop
1470 * @param outerTpid Outer TPID of the next hop
1471 * @param outPort port that the next hop attaches to
pierventre37dcf4c2021-09-16 18:43:06 +02001472 * @param install whether or not install the route
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001473 */
pierventre37dcf4c2021-09-16 18:43:06 +02001474 void programDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
1475 VlanId outerVlan, EthType outerTpid, PortNumber outPort, boolean install) {
1476 if (shouldProgram(deviceId)) {
1477 if (install) {
1478 srManager.routingRulePopulator.populateDoubleTaggedRoute(
1479 deviceId, prefix, hostMac, innerVlan, outerVlan, outerTpid, outPort);
1480 } else {
1481 srManager.routingRulePopulator.revokeDoubleTaggedRoute(
1482 deviceId, prefix, hostMac, innerVlan, outerVlan, outerTpid, outPort);
1483 }
Charles Chan61c086d2019-07-26 17:46:15 -07001484 srManager.routingRulePopulator.processDoubleTaggedFilter(
pierventre37dcf4c2021-09-16 18:43:06 +02001485 deviceId, outPort, outerVlan, innerVlan, install);
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001486 }
1487 }
1488
1489 /**
pierf331a492020-01-07 15:39:39 +01001490 * Purges seen before routes for a given device.
1491 * @param deviceId the device id
1492 */
1493 void purgeSeenBeforeRoutes(DeviceId deviceId) {
1494 log.debug("Purging seen before routes having as target {}", deviceId);
1495 Set<Entry<DeviceId, DeviceId>> routesToPurge = seenBeforeRoutes.stream()
1496 .filter(entry -> entry.getValue().equals(deviceId))
1497 .collect(Collectors.toSet());
1498 routesToPurge.forEach(entry -> seenBeforeRoutes.remove(entry.getKey(), entry.getValue()));
1499 }
Jonghwan Hyun9aaa34f2018-04-09 09:40:50 -07001500
1501 /**
Saurav Das261c3002017-06-13 15:35:54 -07001502 * Remove ECMP graph entry for the given device. Typically called when
1503 * device is no longer available.
1504 *
1505 * @param deviceId the device for which graphs need to be purged
1506 */
Charles Chanfbcb8812018-04-18 18:41:05 -07001507 void purgeEcmpGraph(DeviceId deviceId) {
Saurav Das6430f412018-01-25 09:49:01 -08001508 statusLock.lock();
1509 try {
Saurav Das6430f412018-01-25 09:49:01 -08001510 if (populationStatus == Status.STARTED) {
1511 log.warn("Previous rule population is not finished. Cannot"
1512 + " proceeed with purgeEcmpGraph for {}", deviceId);
1513 return;
1514 }
1515 log.debug("Updating ECMPspg for unavailable dev:{}", deviceId);
1516 currentEcmpSpgMap.remove(deviceId);
1517 if (updatedEcmpSpgMap != null) {
1518 updatedEcmpSpgMap.remove(deviceId);
1519 }
1520 } finally {
1521 statusLock.unlock();
Saurav Das261c3002017-06-13 15:35:54 -07001522 }
1523 }
1524
Saurav Das00e553b2018-04-21 17:19:48 -07001525 /**
1526 * Attempts a full reroute of route-paths if topology has changed relatively
1527 * close to a mastership change event. Does not do a reroute if mastership
1528 * change is due to reasons other than a ONOS cluster event - for example a
1529 * call to balance-masters, or a switch up/down event.
1530 *
1531 * @param devId the device identifier for which mastership has changed
1532 * @param me the mastership event
1533 */
1534 void checkFullRerouteForMasterChange(DeviceId devId, MastershipEvent me) {
1535 // give small delay to absorb mastership events that are caused by
1536 // device that has disconnected from cluster
Saurav Das49368392018-04-23 18:42:12 -07001537 executorServiceMstChg.schedule(new MasterChange(devId, me),
1538 MASTER_CHANGE_DELAY, TimeUnit.MILLISECONDS);
Saurav Das00e553b2018-04-21 17:19:48 -07001539 }
1540
pierventre37dcf4c2021-09-16 18:43:06 +02001541 /*
1542 * Even though the current implementation does not heavily rely
1543 * on mastership, we keep using the mastership and cluster events
1544 * as heuristic to perform full reroutes and to make sure we don't
1545 * lose any event when instances fail.
1546 */
Saurav Das00e553b2018-04-21 17:19:48 -07001547 protected final class MasterChange implements Runnable {
1548 private DeviceId devId;
1549 private MastershipEvent me;
1550 private static final long CLUSTER_EVENT_THRESHOLD = 4500; // ms
1551 private static final long DEVICE_EVENT_THRESHOLD = 2000; // ms
Saurav Dasec683dc2018-04-27 18:42:30 -07001552 private static final long EDGE_PORT_EVENT_THRESHOLD = 10000; //ms
Saurav Das68e1b6a2018-06-11 17:02:31 -07001553 private static final long FULL_REROUTE_THRESHOLD = 10000; // ms
Saurav Das00e553b2018-04-21 17:19:48 -07001554
1555 MasterChange(DeviceId devId, MastershipEvent me) {
1556 this.devId = devId;
1557 this.me = me;
1558 }
1559
1560 @Override
1561 public void run() {
1562 long lce = srManager.clusterListener.timeSinceLastClusterEvent();
1563 boolean clusterEvent = lce < CLUSTER_EVENT_THRESHOLD;
1564
1565 // ignore event for lost switch if cluster event hasn't happened -
1566 // device down event will handle it
1567 if ((me.roleInfo().master() == null
1568 || !srManager.deviceService.isAvailable(devId))
1569 && !clusterEvent) {
1570 log.debug("Full reroute not required for lost device: {}/{} "
1571 + "clusterEvent/timeSince: {}/{}",
1572 devId, me.roleInfo(), clusterEvent, lce);
1573 return;
1574 }
1575
1576 long update = srManager.deviceService.getLastUpdatedInstant(devId);
1577 long lde = Instant.now().toEpochMilli() - update;
1578 boolean deviceEvent = lde < DEVICE_EVENT_THRESHOLD;
1579
1580 // ignore event for recently connected switch if cluster event hasn't
1581 // happened - link up events will handle it
1582 if (srManager.deviceService.isAvailable(devId) && deviceEvent
1583 && !clusterEvent) {
1584 log.debug("Full reroute not required for recently available"
1585 + " device: {}/{} deviceEvent/timeSince: {}/{} "
1586 + "clusterEvent/timeSince: {}/{}",
1587 devId, me.roleInfo(), deviceEvent, lde, clusterEvent, lce);
1588 return;
1589 }
1590
Saurav Dasec683dc2018-04-27 18:42:30 -07001591 long lepe = Instant.now().toEpochMilli()
1592 - srManager.lastEdgePortEvent.toEpochMilli();
1593 boolean edgePortEvent = lepe < EDGE_PORT_EVENT_THRESHOLD;
1594
Saurav Das00e553b2018-04-21 17:19:48 -07001595 // if it gets here, then mastership change is likely due to onos
1596 // instance failure, or network partition in onos cluster
1597 // normally a mastership change like this does not require re-programming
1598 // but if topology changes happen at the same time then we may miss events
1599 if (!isRoutingStable() && clusterEvent) {
Saurav Dasec683dc2018-04-27 18:42:30 -07001600 log.warn("Mastership changed for dev: {}/{} while programming route-paths "
Saurav Das00e553b2018-04-21 17:19:48 -07001601 + "due to clusterEvent {} ms ago .. attempting full reroute",
1602 devId, me.roleInfo(), lce);
pierventre37dcf4c2021-09-16 18:43:06 +02001603 if (shouldProgram(devId)) {
1604 // old leader could have died when populating filters
Saurav Das00e553b2018-04-21 17:19:48 -07001605 populatePortAddressingRules(devId);
1606 }
pierventre37dcf4c2021-09-16 18:43:06 +02001607 // old leader could have died when creating groups
Saurav Das00e553b2018-04-21 17:19:48 -07001608 // XXX right now we have no fine-grained way to only make changes
Saurav Das68e1b6a2018-06-11 17:02:31 -07001609 // for the route paths affected by this device. Thus we do a
1610 // full reroute after purging all hash groups. We also try to do
1611 // it only once, irrespective of the number of devices
pierventre37dcf4c2021-09-16 18:43:06 +02001612 // that changed mastership when their leader instance died.
Saurav Das68e1b6a2018-06-11 17:02:31 -07001613 long lfrr = Instant.now().toEpochMilli() - lastFullReroute.toEpochMilli();
1614 boolean doFullReroute = lfrr > FULL_REROUTE_THRESHOLD;
1615 if (doFullReroute) {
1616 lastFullReroute = Instant.now();
1617 for (Device dev : srManager.deviceService.getDevices()) {
1618 if (shouldProgram(dev.id())) {
1619 srManager.purgeHashedNextObjectiveStore(dev.id());
pierf331a492020-01-07 15:39:39 +01001620 seenBeforeRoutes.removeAll(dev.id());
Saurav Das68e1b6a2018-06-11 17:02:31 -07001621 }
1622 }
1623 // give small delay to ensure entire store is purged
1624 executorServiceFRR.schedule(new FullRerouteAfterPurge(),
1625 PURGE_DELAY,
1626 TimeUnit.MILLISECONDS);
1627 } else {
1628 log.warn("Full reroute attempted {} ms ago .. skipping", lfrr);
1629 }
Saurav Dasec683dc2018-04-27 18:42:30 -07001630
1631 } else if (edgePortEvent && clusterEvent) {
1632 log.warn("Mastership changed for dev: {}/{} due to clusterEvent {} ms ago "
1633 + "while edge-port event happened {} ms ago "
1634 + " .. reprogramming all edge-ports",
1635 devId, me.roleInfo(), lce, lepe);
1636 if (shouldProgram(devId)) {
1637 srManager.deviceService.getPorts(devId).stream()
1638 .filter(p -> srManager.interfaceService
1639 .isConfigured(new ConnectPoint(devId, p.number())))
1640 .forEach(p -> srManager.processPortUpdated(devId, p));
1641 }
1642
Saurav Das00e553b2018-04-21 17:19:48 -07001643 } else {
1644 log.debug("Stable route-paths .. full reroute not attempted for "
1645 + "mastership change {}/{} deviceEvent/timeSince: {}/{} "
1646 + "clusterEvent/timeSince: {}/{}", devId, me.roleInfo(),
1647 deviceEvent, lde, clusterEvent, lce);
1648 }
1649 }
1650 }
1651
Saurav Das68e1b6a2018-06-11 17:02:31 -07001652 /**
1653 * Performs a full reroute of routing rules in all the switches. Assumes
1654 * caller has purged hash groups from the nextObjective store, otherwise
1655 * re-uses ones available in the store.
1656 */
1657 protected final class FullRerouteAfterPurge implements Runnable {
1658 @Override
1659 public void run() {
1660 populateAllRoutingRules();
1661 }
1662 }
1663
1664
Saurav Das261c3002017-06-13 15:35:54 -07001665 //////////////////////////////////////
1666 // Routing helper methods and classes
1667 //////////////////////////////////////
1668
1669 /**
Saurav Das68e1b6a2018-06-11 17:02:31 -07001670 * Computes set of affected routes due to failed link. Assumes previous ecmp
1671 * shortest-path graph exists for a switch in order to compute affected
1672 * routes. If such a graph does not exist, the method returns null.
Saurav Dasb149be12016-06-07 10:08:06 -07001673 *
1674 * @param linkFail the failed link
1675 * @return the set of affected routes which may be empty if no routes were
Saurav Das68e1b6a2018-06-11 17:02:31 -07001676 * affected
Saurav Dasb149be12016-06-07 10:08:06 -07001677 */
sanghofb7c7292015-04-13 15:15:58 -07001678 private Set<ArrayList<DeviceId>> computeDamagedRoutes(Link linkFail) {
sanghofb7c7292015-04-13 15:15:58 -07001679 Set<ArrayList<DeviceId>> routes = new HashSet<>();
1680
1681 for (Device sw : srManager.deviceService.getDevices()) {
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001682 log.debug("Computing the impacted routes for device {} due to link fail",
1683 sw.id());
Charles Chand66d6712018-03-29 16:03:41 -07001684 if (!shouldProgram(sw.id())) {
Saurav Das00e553b2018-04-21 17:19:48 -07001685 lastProgrammed.remove(sw.id());
sanghofb7c7292015-04-13 15:15:58 -07001686 continue;
1687 }
Charles Chand66d6712018-03-29 16:03:41 -07001688 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
pierventre37dcf4c2021-09-16 18:43:06 +02001689 // check for leadership change since last run
Saurav Das00e553b2018-04-21 17:19:48 -07001690 if (!lastProgrammed.contains(sw.id())) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001691 log.warn("New responsibility for this node to program dev:{}"
Saurav Das00e553b2018-04-21 17:19:48 -07001692 + " ... nuking current ECMPspg", sw.id());
1693 currentEcmpSpgMap.remove(sw.id());
1694 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001695 lastProgrammed.add(sw.id());
1696
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001697 EcmpShortestPathGraph ecmpSpg = currentEcmpSpgMap.get(rootSw);
1698 if (ecmpSpg == null) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001699 log.warn("No existing ECMP graph for switch {}. Assuming "
1700 + "all route-paths have changed towards it.", rootSw);
1701 for (DeviceId targetSw : srManager.deviceConfiguration.getRouters()) {
1702 if (targetSw.equals(rootSw)) {
1703 continue;
1704 }
1705 routes.add(Lists.newArrayList(targetSw, rootSw));
1706 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1707 }
1708 continue;
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001709 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001710
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001711 if (log.isDebugEnabled()) {
1712 log.debug("Root switch: {}", rootSw);
1713 log.debug(" Current/Existing SPG: {}", ecmpSpg);
1714 log.debug(" New/Updated SPG: {}", updatedEcmpSpgMap.get(rootSw));
1715 }
1716 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>>
1717 switchVia = ecmpSpg.getAllLearnedSwitchesAndVia();
1718 // figure out if the broken link affected any route-paths in this graph
1719 for (Integer itrIdx : switchVia.keySet()) {
1720 log.trace("Current/Exiting SPG Iterindex# {}", itrIdx);
1721 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1722 switchVia.get(itrIdx);
1723 for (DeviceId targetSw : swViaMap.keySet()) {
1724 log.trace("TargetSwitch {} --> RootSwitch {}",
1725 targetSw, rootSw);
Saurav Dasb149be12016-06-07 10:08:06 -07001726 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
1727 log.trace(" Via:");
Pier Ventreadb4ae62016-11-23 09:57:42 -08001728 via.forEach(e -> log.trace(" {}", e));
Saurav Dasb149be12016-06-07 10:08:06 -07001729 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001730 Set<ArrayList<DeviceId>> subLinks =
1731 computeLinks(targetSw, rootSw, swViaMap);
1732 for (ArrayList<DeviceId> alink: subLinks) {
1733 if ((alink.get(0).equals(linkFail.src().deviceId()) &&
1734 alink.get(1).equals(linkFail.dst().deviceId()))
1735 ||
1736 (alink.get(0).equals(linkFail.dst().deviceId()) &&
1737 alink.get(1).equals(linkFail.src().deviceId()))) {
1738 log.debug("Impacted route:{}->{}", targetSw, rootSw);
1739 ArrayList<DeviceId> aRoute = new ArrayList<>();
1740 aRoute.add(targetSw); // switch with rules to populate
1741 aRoute.add(rootSw); // towards this destination
1742 routes.add(aRoute);
1743 break;
1744 }
sanghofb7c7292015-04-13 15:15:58 -07001745 }
1746 }
1747 }
Saurav Dasfe0b05e2017-08-14 16:44:43 -07001748
sanghofb7c7292015-04-13 15:15:58 -07001749 }
sangho28d0b6d2015-05-07 13:30:57 -07001750
sanghofb7c7292015-04-13 15:15:58 -07001751 }
sanghofb7c7292015-04-13 15:15:58 -07001752 return routes;
1753 }
1754
Saurav Das1b391d52016-11-29 14:27:25 -08001755 /**
1756 * Computes set of affected routes due to new links or failed switches.
1757 *
Saurav Dasdc7f2752018-03-18 21:28:15 -07001758 * @param failedSwitch deviceId of failed switch if any
Saurav Das1b391d52016-11-29 14:27:25 -08001759 * @return the set of affected routes which may be empty if no routes were
1760 * affected
1761 */
Saurav Dascea556f2018-03-05 14:37:16 -08001762 private Set<ArrayList<DeviceId>> computeRouteChange(DeviceId failedSwitch) {
Saurav Das261c3002017-06-13 15:35:54 -07001763 ImmutableSet.Builder<ArrayList<DeviceId>> changedRtBldr =
Saurav Das1b391d52016-11-29 14:27:25 -08001764 ImmutableSet.builder();
sanghofb7c7292015-04-13 15:15:58 -07001765
1766 for (Device sw : srManager.deviceService.getDevices()) {
Saurav Das261c3002017-06-13 15:35:54 -07001767 log.debug("Computing the impacted routes for device {}", sw.id());
Charles Chand66d6712018-03-29 16:03:41 -07001768 if (!shouldProgram(sw.id())) {
Saurav Das00e553b2018-04-21 17:19:48 -07001769 lastProgrammed.remove(sw.id());
sanghofb7c7292015-04-13 15:15:58 -07001770 continue;
1771 }
Charles Chand66d6712018-03-29 16:03:41 -07001772 for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
Saurav Das261c3002017-06-13 15:35:54 -07001773 if (log.isTraceEnabled()) {
1774 log.trace("Device links for dev: {}", rootSw);
1775 for (Link link: srManager.linkService.getDeviceLinks(rootSw)) {
1776 log.trace("{} -> {} ", link.src().deviceId(),
1777 link.dst().deviceId());
1778 }
Saurav Dasb149be12016-06-07 10:08:06 -07001779 }
pierventre37dcf4c2021-09-16 18:43:06 +02001780 // check for leadership change since last run
Saurav Das00e553b2018-04-21 17:19:48 -07001781 if (!lastProgrammed.contains(sw.id())) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07001782 log.warn("New responsibility for this node to program dev:{}"
Saurav Das00e553b2018-04-21 17:19:48 -07001783 + " ... nuking current ECMPspg", sw.id());
1784 currentEcmpSpgMap.remove(sw.id());
1785 }
Saurav Das68e1b6a2018-06-11 17:02:31 -07001786 lastProgrammed.add(sw.id());
Saurav Das261c3002017-06-13 15:35:54 -07001787 EcmpShortestPathGraph currEcmpSpg = currentEcmpSpgMap.get(rootSw);
1788 if (currEcmpSpg == null) {
1789 log.debug("No existing ECMP graph for device {}.. adding self as "
1790 + "changed route", rootSw);
1791 changedRtBldr.add(Lists.newArrayList(rootSw));
1792 continue;
1793 }
1794 EcmpShortestPathGraph newEcmpSpg = updatedEcmpSpgMap.get(rootSw);
Saurav Dasdebcf882018-04-06 20:16:01 -07001795 if (newEcmpSpg == null) {
1796 log.warn("Cannot find updated ECMP graph for dev:{}", rootSw);
1797 continue;
1798 }
Saurav Das261c3002017-06-13 15:35:54 -07001799 if (log.isDebugEnabled()) {
1800 log.debug("Root switch: {}", rootSw);
1801 log.debug(" Current/Existing SPG: {}", currEcmpSpg);
1802 log.debug(" New/Updated SPG: {}", newEcmpSpg);
1803 }
1804 // first use the updated/new map to compare to current/existing map
1805 // as new links may have come up
1806 changedRtBldr.addAll(compareGraphs(newEcmpSpg, currEcmpSpg, rootSw));
1807 // then use the current/existing map to compare to updated/new map
1808 // as switch may have been removed
1809 changedRtBldr.addAll(compareGraphs(currEcmpSpg, newEcmpSpg, rootSw));
sangho28d0b6d2015-05-07 13:30:57 -07001810 }
Saurav Das1b391d52016-11-29 14:27:25 -08001811 }
sanghofb7c7292015-04-13 15:15:58 -07001812
Saurav Dascea556f2018-03-05 14:37:16 -08001813 // handle clearing state for a failed switch in case the switch does
1814 // not have a pair, or the pair is not available
1815 if (failedSwitch != null) {
Charles Chan6dbcd252018-04-02 11:46:38 -07001816 Optional<DeviceId> pairDev = srManager.getPairDeviceId(failedSwitch);
1817 if (!pairDev.isPresent() || !srManager.deviceService.isAvailable(pairDev.get())) {
Saurav Dascea556f2018-03-05 14:37:16 -08001818 log.debug("Proxy Route changes to downed Sw:{}", failedSwitch);
1819 srManager.deviceService.getDevices().forEach(dev -> {
pierventre37dcf4c2021-09-16 18:43:06 +02001820 if (!dev.id().equals(failedSwitch) && shouldProgram(dev.id())) {
Saurav Dascea556f2018-03-05 14:37:16 -08001821 log.debug(" : {}", dev.id());
1822 changedRtBldr.add(Lists.newArrayList(dev.id(), failedSwitch));
1823 }
1824 });
1825 }
1826 }
1827
Saurav Das261c3002017-06-13 15:35:54 -07001828 Set<ArrayList<DeviceId>> changedRoutes = changedRtBldr.build();
Saurav Das1b391d52016-11-29 14:27:25 -08001829 for (ArrayList<DeviceId> route: changedRoutes) {
1830 log.debug("Route changes Target -> Root");
1831 if (route.size() == 1) {
1832 log.debug(" : all -> {}", route.get(0));
1833 } else {
1834 log.debug(" : {} -> {}", route.get(0), route.get(1));
1835 }
1836 }
1837 return changedRoutes;
1838 }
1839
pier572d4a92019-04-25 18:51:51 +02001840 // Utility method to expands the route changes in two elements array using
1841 // the ECMP graph. Caller represents all to dst switch routes with an
1842 // array containing only the dst switch.
1843 private Set<ArrayList<DeviceId>> getExpandedRoutes(Set<ArrayList<DeviceId>> routeChanges) {
1844 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
1845 // Ensure each routeChanges entry has two elements
1846 for (ArrayList<DeviceId> route : routeChanges) {
1847 if (route.size() == 1) {
1848 DeviceId dstSw = route.get(0);
1849 EcmpShortestPathGraph ec = updatedEcmpSpgMap.get(dstSw);
1850 if (ec == null) {
1851 log.warn("No graph found for {} .. aborting redoRouting", dstSw);
1852 return Collections.emptySet();
1853 }
1854 ec.getAllLearnedSwitchesAndVia().keySet().forEach(key -> {
1855 ec.getAllLearnedSwitchesAndVia().get(key).keySet().forEach(target -> {
1856 changedRoutes.add(Lists.newArrayList(target, dstSw));
1857 });
1858 });
1859 } else {
1860 DeviceId targetSw = route.get(0);
1861 DeviceId dstSw = route.get(1);
1862 changedRoutes.add(Lists.newArrayList(targetSw, dstSw));
1863 }
1864 }
1865 return changedRoutes;
1866 }
1867
1868 // Utility method to expands the route changes in two elements array using
1869 // the available devices. Caller represents all to dst switch routes with an
1870 // array containing only the dst switch.
1871 private Set<ArrayList<DeviceId>> getAllExpandedRoutes(Set<ArrayList<DeviceId>> routeChanges) {
1872 Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
1873 // Ensure each routeChanges entry has two elements
1874 for (ArrayList<DeviceId> route : routeChanges) {
1875 if (route.size() == 1) {
1876 // route-path changes are from everyone else to this switch
1877 DeviceId dstSw = route.get(0);
1878 srManager.deviceService.getAvailableDevices().forEach(sw -> {
1879 if (!sw.id().equals(dstSw)) {
1880 changedRoutes.add(Lists.newArrayList(sw.id(), dstSw));
1881 }
1882 });
1883 } else {
1884 changedRoutes.add(route);
1885 }
1886 }
1887 return changedRoutes;
1888 }
1889
Saurav Das1b391d52016-11-29 14:27:25 -08001890 /**
1891 * For the root switch, searches all the target nodes reachable in the base
1892 * graph, and compares paths to the ones in the comp graph.
1893 *
1894 * @param base the graph that is indexed for all reachable target nodes
1895 * from the root node
1896 * @param comp the graph that the base graph is compared to
1897 * @param rootSw both ecmp graphs are calculated for the root node
1898 * @return all the routes that have changed in the base graph
1899 */
1900 private Set<ArrayList<DeviceId>> compareGraphs(EcmpShortestPathGraph base,
1901 EcmpShortestPathGraph comp,
1902 DeviceId rootSw) {
1903 ImmutableSet.Builder<ArrayList<DeviceId>> changedRoutesBuilder =
1904 ImmutableSet.builder();
1905 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> baseMap =
1906 base.getAllLearnedSwitchesAndVia();
1907 HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> compMap =
1908 comp.getAllLearnedSwitchesAndVia();
1909 for (Integer itrIdx : baseMap.keySet()) {
1910 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> baseViaMap =
1911 baseMap.get(itrIdx);
1912 for (DeviceId targetSw : baseViaMap.keySet()) {
1913 ArrayList<ArrayList<DeviceId>> basePath = baseViaMap.get(targetSw);
1914 ArrayList<ArrayList<DeviceId>> compPath = getVia(compMap, targetSw);
1915 if ((compPath == null) || !basePath.equals(compPath)) {
Saurav Das62ae6792017-05-15 15:34:25 -07001916 log.trace("Impacted route:{} -> {}", targetSw, rootSw);
Saurav Das1b391d52016-11-29 14:27:25 -08001917 ArrayList<DeviceId> route = new ArrayList<>();
Saurav Das261c3002017-06-13 15:35:54 -07001918 route.add(targetSw); // switch with rules to populate
1919 route.add(rootSw); // towards this destination
Saurav Das1b391d52016-11-29 14:27:25 -08001920 changedRoutesBuilder.add(route);
sanghofb7c7292015-04-13 15:15:58 -07001921 }
1922 }
sangho28d0b6d2015-05-07 13:30:57 -07001923 }
Saurav Das1b391d52016-11-29 14:27:25 -08001924 return changedRoutesBuilder.build();
sanghofb7c7292015-04-13 15:15:58 -07001925 }
1926
Saurav Das261c3002017-06-13 15:35:54 -07001927 /**
1928 * Returns the ECMP paths traversed to reach the target switch.
1929 *
1930 * @param switchVia a per-iteration view of the ECMP graph for a root switch
1931 * @param targetSw the switch to reach from the root switch
1932 * @return the nodes traversed on ECMP paths to the target switch
1933 */
sanghofb7c7292015-04-13 15:15:58 -07001934 private ArrayList<ArrayList<DeviceId>> getVia(HashMap<Integer, HashMap<DeviceId,
Saurav Das1b391d52016-11-29 14:27:25 -08001935 ArrayList<ArrayList<DeviceId>>>> switchVia, DeviceId targetSw) {
sanghofb7c7292015-04-13 15:15:58 -07001936 for (Integer itrIdx : switchVia.keySet()) {
1937 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
1938 switchVia.get(itrIdx);
Saurav Das1b391d52016-11-29 14:27:25 -08001939 if (swViaMap.get(targetSw) == null) {
sanghofb7c7292015-04-13 15:15:58 -07001940 continue;
1941 } else {
Saurav Das1b391d52016-11-29 14:27:25 -08001942 return swViaMap.get(targetSw);
sanghofb7c7292015-04-13 15:15:58 -07001943 }
1944 }
1945
Srikanth Vavilapalli64d96c12015-05-14 20:22:47 -07001946 return null;
sanghofb7c7292015-04-13 15:15:58 -07001947 }
1948
Saurav Das261c3002017-06-13 15:35:54 -07001949 /**
1950 * Utility method to break down a path from src to dst device into a collection
1951 * of links.
1952 *
1953 * @param src src device of the path
1954 * @param dst dst device of the path
1955 * @param viaMap path taken from src to dst device
1956 * @return collection of links in the path
1957 */
sanghofb7c7292015-04-13 15:15:58 -07001958 private Set<ArrayList<DeviceId>> computeLinks(DeviceId src,
1959 DeviceId dst,
1960 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> viaMap) {
1961 Set<ArrayList<DeviceId>> subLinks = Sets.newHashSet();
1962 for (ArrayList<DeviceId> via : viaMap.get(src)) {
1963 DeviceId linkSrc = src;
1964 DeviceId linkDst = dst;
1965 for (DeviceId viaDevice: via) {
1966 ArrayList<DeviceId> link = new ArrayList<>();
1967 linkDst = viaDevice;
1968 link.add(linkSrc);
1969 link.add(linkDst);
1970 subLinks.add(link);
1971 linkSrc = viaDevice;
1972 }
1973 ArrayList<DeviceId> link = new ArrayList<>();
1974 link.add(linkSrc);
1975 link.add(dst);
1976 subLinks.add(link);
1977 }
1978
1979 return subLinks;
1980 }
1981
Charles Chanc22cef32016-04-29 14:38:22 -07001982 /**
pierventre37dcf4c2021-09-16 18:43:06 +02001983 * Determines whether this controller instance should program the given deviceId, based on
1984 * workPartitionService and pairDeviceId if one exists. Once an instance is elected, it will
1985 * be the only instance responsible for programming both devices in the pair until it goes down.
Charles Chanc22cef32016-04-29 14:38:22 -07001986 *
pierventre37dcf4c2021-09-16 18:43:06 +02001987 * @param deviceId the device id
1988 * @return true if this instance leads the programming, false otherwise
Charles Chanc22cef32016-04-29 14:38:22 -07001989 */
pierventre37dcf4c2021-09-16 18:43:06 +02001990 public boolean shouldProgram(DeviceId deviceId) {
1991 NodeId leader = shouldProgram.get(deviceId);
Charles Chand66d6712018-03-29 16:03:41 -07001992 NodeId currentNodeId = srManager.clusterService.getLocalNode().id();
pierventre37dcf4c2021-09-16 18:43:06 +02001993 if (leader != null) {
1994 log.trace("shouldProgram dev:{} leader:{}", deviceId, leader);
1995 return currentNodeId.equals(leader);
sangho80f11cb2015-04-01 13:05:26 -07001996 }
Charles Chand66d6712018-03-29 16:03:41 -07001997
pierventre37dcf4c2021-09-16 18:43:06 +02001998 // hash function is independent from the order of the devices in the edge pair
1999 Optional<DeviceId> pairDeviceId = srManager.getPairDeviceId(deviceId);
2000 EdgePair edgePair = new EdgePair(deviceId, pairDeviceId.orElse(DeviceId.NONE));
Charles Chand66d6712018-03-29 16:03:41 -07002001
pierventre37dcf4c2021-09-16 18:43:06 +02002002 leader = srManager.workPartitionService.getLeader(edgePair, HASH_FUNCTION);
2003 if (leader != null) {
2004 log.debug("{} is the leader, should handle routing for {}/pair={}", leader, deviceId,
2005 pairDeviceId);
2006 shouldProgram.put(deviceId, leader);
2007 return leader.equals(currentNodeId);
Charles Chand66d6712018-03-29 16:03:41 -07002008 } else {
pierventre37dcf4c2021-09-16 18:43:06 +02002009 log.error("Fail to elect a leader for {}/pair={}. Abort.", deviceId, pairDeviceId);
2010 shouldProgram.remove(deviceId);
Charles Chand66d6712018-03-29 16:03:41 -07002011 return false;
2012 }
2013 }
2014
pierventre37dcf4c2021-09-16 18:43:06 +02002015 void invalidateShouldProgram(DeviceId deviceId) {
2016 shouldProgram.remove(deviceId);
Charles Chand66d6712018-03-29 16:03:41 -07002017 }
2018
pierventre37dcf4c2021-09-16 18:43:06 +02002019 void invalidateShouldProgram() {
2020 shouldProgram.clear();
Charles Chanfbcb8812018-04-18 18:41:05 -07002021 }
2022
pierventre37dcf4c2021-09-16 18:43:06 +02002023
Charles Chand66d6712018-03-29 16:03:41 -07002024 /**
2025 * Returns a set of device ID, containing given device and its pair device if exist.
2026 *
2027 * @param deviceId Device ID
2028 * @return a set of device ID, containing given device and its pair device if exist.
2029 */
2030 private Set<DeviceId> deviceAndItsPair(DeviceId deviceId) {
2031 Set<DeviceId> ret = Sets.newHashSet(deviceId);
2032 srManager.getPairDeviceId(deviceId).ifPresent(ret::add);
2033 return ret;
sangho80f11cb2015-04-01 13:05:26 -07002034 }
2035
Charles Chanc22cef32016-04-29 14:38:22 -07002036 /**
Saurav Das261c3002017-06-13 15:35:54 -07002037 * Returns the set of deviceIds which are the next hops from the targetSw
2038 * to the dstSw according to the latest ECMP spg.
2039 *
2040 * @param targetSw the switch for which the next-hops are desired
2041 * @param dstSw the switch to which the next-hops lead to from the targetSw
2042 * @return set of next hop deviceIds, could be empty if no next hops are found
2043 */
2044 private Set<DeviceId> getNextHops(DeviceId targetSw, DeviceId dstSw) {
2045 boolean targetIsEdge = false;
2046 try {
2047 targetIsEdge = srManager.deviceConfiguration.isEdgeDevice(targetSw);
2048 } catch (DeviceConfigNotFoundException e) {
2049 log.warn(e.getMessage() + "Cannot determine if targetIsEdge {}.. "
2050 + "continuing to getNextHops", targetSw);
2051 }
2052
2053 EcmpShortestPathGraph ecmpSpg = updatedEcmpSpgMap.get(dstSw);
2054 if (ecmpSpg == null) {
2055 log.debug("No ecmpSpg found for dstSw: {}", dstSw);
2056 return ImmutableSet.of();
2057 }
2058 HashMap<Integer,
2059 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> switchVia =
2060 ecmpSpg.getAllLearnedSwitchesAndVia();
2061 for (Integer itrIdx : switchVia.keySet()) {
2062 HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
2063 switchVia.get(itrIdx);
2064 for (DeviceId target : swViaMap.keySet()) {
2065 if (!target.equals(targetSw)) {
2066 continue;
2067 }
Saurav Das49368392018-04-23 18:42:12 -07002068 // optimization for spines to not use leaves to get
2069 // to a spine or other leaves. Also leaves should not use other
2070 // leaves to get to the destination
2071 if ((!targetIsEdge && itrIdx > 1) || targetIsEdge) {
Saurav Das97241862018-02-14 14:14:54 -08002072 boolean pathdevIsEdge = false;
2073 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
Saurav Das49368392018-04-23 18:42:12 -07002074 log.debug("Evaluating next-hop in path: {}", via);
Saurav Das97241862018-02-14 14:14:54 -08002075 for (DeviceId pathdev : via) {
2076 try {
2077 pathdevIsEdge = srManager.deviceConfiguration
2078 .isEdgeDevice(pathdev);
2079 } catch (DeviceConfigNotFoundException e) {
2080 log.warn(e.getMessage());
2081 }
2082 if (pathdevIsEdge) {
Saurav Das68e1b6a2018-06-11 17:02:31 -07002083 log.debug("Avoiding {} hop path for targetSw:{}"
Saurav Das97241862018-02-14 14:14:54 -08002084 + " --> dstSw:{} which goes through an edge"
2085 + " device {} in path {}", itrIdx,
2086 targetSw, dstSw, pathdev, via);
2087 return ImmutableSet.of();
2088 }
2089 }
2090 }
Saurav Das261c3002017-06-13 15:35:54 -07002091 }
2092 Set<DeviceId> nextHops = new HashSet<>();
2093 for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
2094 if (via.isEmpty()) {
2095 // the dstSw is the next-hop from the targetSw
2096 nextHops.add(dstSw);
2097 } else {
2098 // first elem is next-hop in each ECMP path
2099 nextHops.add(via.get(0));
2100 }
2101 }
Saurav Das49368392018-04-23 18:42:12 -07002102 log.debug("target {} --> dst: {} has next-hops:{}", targetSw,
2103 dstSw, nextHops);
Saurav Das261c3002017-06-13 15:35:54 -07002104 return nextHops;
2105 }
2106 }
Saurav Das49368392018-04-23 18:42:12 -07002107 log.debug("No next hops found for target:{} --> dst: {}", targetSw, dstSw);
Saurav Das261c3002017-06-13 15:35:54 -07002108 return ImmutableSet.of(); //no next-hops found
2109 }
2110
Saurav Das261c3002017-06-13 15:35:54 -07002111 //////////////////////////////////////
2112 // Filtering rule creation
2113 //////////////////////////////////////
2114
2115 /**
Saurav Dasf9332192017-02-18 14:05:44 -08002116 * Populates filtering rules for port, and punting rules
2117 * for gateway IPs, loopback IPs and arp/ndp traffic.
pierventre37dcf4c2021-09-16 18:43:06 +02002118 * Should only be called by the instance leading the programming
2119 * for this device/port.
sangho80f11cb2015-04-01 13:05:26 -07002120 *
2121 * @param deviceId Switch ID to set the rules
2122 */
Charles Chanfbcb8812018-04-18 18:41:05 -07002123 void populatePortAddressingRules(DeviceId deviceId) {
Saurav Das07c74602016-04-27 18:35:50 -07002124 // Although device is added, sometimes device store does not have the
2125 // ports for this device yet. It results in missing filtering rules in the
2126 // switch. We will attempt it a few times. If it still does not work,
2127 // user can manually repopulate using CLI command sr-reroute-network
Charles Chan18fa4252017-02-08 16:10:40 -08002128 PortFilterInfo firstRun = rulePopulator.populateVlanMacFilters(deviceId);
Saurav Dasd1872b02016-12-02 15:43:47 -08002129 if (firstRun == null) {
2130 firstRun = new PortFilterInfo(0, 0, 0);
Saurav Das07c74602016-04-27 18:35:50 -07002131 }
Saurav Dasd1872b02016-12-02 15:43:47 -08002132 executorService.schedule(new RetryFilters(deviceId, firstRun),
2133 RETRY_INTERVAL_MS, TimeUnit.MILLISECONDS);
sangho80f11cb2015-04-01 13:05:26 -07002134 }
2135
2136 /**
Saurav Dasd1872b02016-12-02 15:43:47 -08002137 * RetryFilters populates filtering objectives for a device and keeps retrying
2138 * till the number of ports filtered are constant for a predefined number
2139 * of attempts.
2140 */
2141 protected final class RetryFilters implements Runnable {
2142 int constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS;
2143 DeviceId devId;
2144 int counter;
2145 PortFilterInfo prevRun;
2146
2147 private RetryFilters(DeviceId deviceId, PortFilterInfo previousRun) {
Saurav Das07c74602016-04-27 18:35:50 -07002148 devId = deviceId;
Saurav Dasd1872b02016-12-02 15:43:47 -08002149 prevRun = previousRun;
2150 counter = 0;
Saurav Das07c74602016-04-27 18:35:50 -07002151 }
2152
2153 @Override
2154 public void run() {
Charles Chan077314e2017-06-22 14:27:17 -07002155 log.debug("RETRY FILTER ATTEMPT {} ** dev:{}", ++counter, devId);
Charles Chan18fa4252017-02-08 16:10:40 -08002156 PortFilterInfo thisRun = rulePopulator.populateVlanMacFilters(devId);
Saurav Dasd1872b02016-12-02 15:43:47 -08002157 boolean sameResult = prevRun.equals(thisRun);
2158 log.debug("dev:{} prevRun:{} thisRun:{} sameResult:{}", devId, prevRun,
2159 thisRun, sameResult);
Ray Milkey614352e2018-02-26 09:36:31 -08002160 if (thisRun == null || !sameResult || (--constantAttempts > 0)) {
Saurav Dasf9332192017-02-18 14:05:44 -08002161 // exponentially increasing intervals for retries
2162 executorService.schedule(this,
2163 RETRY_INTERVAL_MS * (int) Math.pow(counter, RETRY_INTERVAL_SCALE),
2164 TimeUnit.MILLISECONDS);
Saurav Dasd1872b02016-12-02 15:43:47 -08002165 if (!sameResult) {
2166 constantAttempts = MAX_CONSTANT_RETRY_ATTEMPTS; //reset
2167 }
Saurav Das07c74602016-04-27 18:35:50 -07002168 }
Saurav Dasd1872b02016-12-02 15:43:47 -08002169 prevRun = (thisRun == null) ? prevRun : thisRun;
Saurav Das07c74602016-04-27 18:35:50 -07002170 }
Saurav Das07c74602016-04-27 18:35:50 -07002171 }
piera9941192019-04-24 16:12:47 +02002172
2173 // Check jobs completion. It returns false if one of the job fails
2174 // and cancel the remaining
2175 private boolean checkJobs(List<Future<Boolean>> futures) {
2176 boolean completed = true;
2177 for (Future<Boolean> future : futures) {
2178 try {
2179 if (completed) {
2180 if (!future.get()) {
2181 completed = false;
2182 }
2183 } else {
2184 future.cancel(true);
2185 }
2186 } catch (InterruptedException | ExecutionException e) {
2187 completed = false;
2188 }
2189 }
2190 return completed;
2191 }
sangho80f11cb2015-04-01 13:05:26 -07002192}