Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 1 | /* |
Brian O'Connor | a09fe5b | 2017-08-03 21:12:30 -0700 | [diff] [blame] | 2 | * Copyright 2017-present Open Networking Foundation |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Ray Milkey | 69ec871 | 2017-08-08 13:00:43 -0700 | [diff] [blame] | 17 | package org.onosproject.routeservice.impl; |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 18 | |
| 19 | import org.onosproject.cluster.ClusterEvent; |
| 20 | import org.onosproject.cluster.ClusterEventListener; |
| 21 | import org.onosproject.cluster.ClusterService; |
Charles Chan | 0cc4450 | 2018-01-29 15:25:52 -0800 | [diff] [blame] | 22 | import org.onosproject.cluster.ControllerNode; |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 23 | import org.onosproject.cluster.NodeId; |
Ray Milkey | 69ec871 | 2017-08-08 13:00:43 -0700 | [diff] [blame] | 24 | import org.onosproject.routeservice.ResolvedRoute; |
| 25 | import org.onosproject.routeservice.Route; |
| 26 | import org.onosproject.routeservice.RouteAdminService; |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 27 | import org.onosproject.store.serializers.KryoNamespaces; |
Charles Chan | 0cc4450 | 2018-01-29 15:25:52 -0800 | [diff] [blame] | 28 | import org.onosproject.store.service.AsyncDistributedLock; |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 29 | import org.onosproject.store.service.DistributedPrimitive; |
| 30 | import org.onosproject.store.service.Serializer; |
| 31 | import org.onosproject.store.service.StorageService; |
| 32 | import org.onosproject.store.service.WorkQueue; |
| 33 | import org.slf4j.Logger; |
| 34 | import org.slf4j.LoggerFactory; |
| 35 | |
Charles Chan | 0cc4450 | 2018-01-29 15:25:52 -0800 | [diff] [blame] | 36 | import java.time.Duration; |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 37 | import java.util.Collection; |
pierventre | 52ef933 | 2021-07-09 22:42:17 +0200 | [diff] [blame] | 38 | import java.util.concurrent.ExecutorService; |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 39 | import java.util.concurrent.ScheduledExecutorService; |
| 40 | import java.util.stream.Collectors; |
| 41 | |
| 42 | import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor; |
| 43 | import static org.onlab.util.Tools.groupedThreads; |
| 44 | |
| 45 | /** |
| 46 | * Monitors cluster nodes and removes routes if a cluster node becomes unavailable. |
| 47 | */ |
| 48 | public class RouteMonitor { |
| 49 | |
| 50 | private final Logger log = LoggerFactory.getLogger(this.getClass()); |
| 51 | |
| 52 | private static final String TOPIC = "route-reaper"; |
Charles Chan | 0cc4450 | 2018-01-29 15:25:52 -0800 | [diff] [blame] | 53 | private static final String LOCK_NAME = "route-monitor-lock"; |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 54 | private static final int NUM_PARALLEL_JOBS = 10; |
| 55 | |
| 56 | private RouteAdminService routeService; |
| 57 | private final ClusterService clusterService; |
| 58 | private StorageService storageService; |
| 59 | |
Charles Chan | 0cc4450 | 2018-01-29 15:25:52 -0800 | [diff] [blame] | 60 | private final AsyncDistributedLock asyncLock; |
| 61 | |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 62 | private WorkQueue<NodeId> queue; |
| 63 | |
| 64 | private final InternalClusterListener clusterListener = new InternalClusterListener(); |
| 65 | |
| 66 | private final ScheduledExecutorService reaperExecutor = |
| 67 | newSingleThreadScheduledExecutor(groupedThreads("route/reaper", "", log)); |
| 68 | |
pierventre | 52ef933 | 2021-07-09 22:42:17 +0200 | [diff] [blame] | 69 | private final ExecutorService eventExecutor = newSingleThreadScheduledExecutor(groupedThreads( |
| 70 | "onos/routemonitor", "events-%d", log)); |
| 71 | |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 72 | /** |
| 73 | * Creates a new route monitor. |
| 74 | * |
| 75 | * @param routeService route service |
| 76 | * @param clusterService cluster service |
| 77 | * @param storageService storage service |
| 78 | */ |
| 79 | public RouteMonitor(RouteAdminService routeService, |
| 80 | ClusterService clusterService, StorageService storageService) { |
| 81 | this.routeService = routeService; |
| 82 | this.clusterService = clusterService; |
| 83 | this.storageService = storageService; |
| 84 | |
Charles Chan | 0cc4450 | 2018-01-29 15:25:52 -0800 | [diff] [blame] | 85 | asyncLock = storageService.lockBuilder().withName(LOCK_NAME).build(); |
| 86 | |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 87 | clusterService.addListener(clusterListener); |
| 88 | |
| 89 | queue = storageService.getWorkQueue(TOPIC, Serializer.using(KryoNamespaces.API)); |
| 90 | queue.addStatusChangeListener(this::statusChange); |
| 91 | |
| 92 | startProcessing(); |
| 93 | } |
| 94 | |
| 95 | /** |
| 96 | * Shuts down the route monitor. |
| 97 | */ |
| 98 | public void shutdown() { |
| 99 | stopProcessing(); |
| 100 | clusterService.removeListener(clusterListener); |
pierventre | 52ef933 | 2021-07-09 22:42:17 +0200 | [diff] [blame] | 101 | eventExecutor.shutdownNow(); |
| 102 | reaperExecutor.shutdownNow(); |
Charles Chan | 0cc4450 | 2018-01-29 15:25:52 -0800 | [diff] [blame] | 103 | asyncLock.unlock(); |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 104 | } |
| 105 | |
| 106 | private void statusChange(DistributedPrimitive.Status status) { |
| 107 | switch (status) { |
| 108 | case ACTIVE: |
| 109 | startProcessing(); |
| 110 | break; |
| 111 | case SUSPENDED: |
| 112 | stopProcessing(); |
| 113 | break; |
| 114 | case INACTIVE: |
| 115 | default: |
| 116 | break; |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | private void startProcessing() { |
| 121 | queue.registerTaskProcessor(this::cleanRoutes, NUM_PARALLEL_JOBS, reaperExecutor); |
| 122 | } |
| 123 | |
| 124 | private void stopProcessing() { |
| 125 | queue.stopProcessing(); |
| 126 | } |
| 127 | |
| 128 | private void cleanRoutes(NodeId node) { |
| 129 | log.info("Cleaning routes from unavailable node {}", node); |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 130 | Collection<Route> routes = routeService.getRouteTables().stream() |
| 131 | .flatMap(id -> routeService.getRoutes(id).stream()) |
| 132 | .flatMap(route -> route.allRoutes().stream()) |
| 133 | .map(ResolvedRoute::route) |
| 134 | .filter(r -> r.sourceNode().equals(node)) |
| 135 | .collect(Collectors.toList()); |
Charles Chan | 0cc4450 | 2018-01-29 15:25:52 -0800 | [diff] [blame] | 136 | if (node.equals(clusterService.getLocalNode().id())) { |
| 137 | log.debug("Do not remove routes from local nodes {}", node); |
| 138 | return; |
| 139 | } |
| 140 | |
| 141 | if (clusterService.getState(node) == ControllerNode.State.READY) { |
| 142 | log.debug("Do not remove routes from active nodes {}", node); |
| 143 | return; |
| 144 | } |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 145 | |
| 146 | log.debug("Withdrawing routes: {}", routes); |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 147 | routeService.withdraw(routes); |
| 148 | } |
| 149 | |
| 150 | private class InternalClusterListener implements ClusterEventListener { |
| 151 | |
| 152 | @Override |
| 153 | public void event(ClusterEvent event) { |
pierventre | 52ef933 | 2021-07-09 22:42:17 +0200 | [diff] [blame] | 154 | eventExecutor.execute(() -> { |
pierventre | e73a727 | 2021-11-09 20:06:01 +0100 | [diff] [blame^] | 155 | if (event.instanceType() == ClusterEvent.InstanceType.STORAGE) { |
| 156 | log.debug("Skipping cluster event for {}", event.subject().id().id()); |
| 157 | return; |
| 158 | } |
| 159 | |
pierventre | 52ef933 | 2021-07-09 22:42:17 +0200 | [diff] [blame] | 160 | switch (event.type()) { |
| 161 | case INSTANCE_DEACTIVATED: |
| 162 | NodeId id = event.subject().id(); |
| 163 | log.info("Node {} deactivated", id); |
Charles Chan | 0cc4450 | 2018-01-29 15:25:52 -0800 | [diff] [blame] | 164 | |
pierventre | 52ef933 | 2021-07-09 22:42:17 +0200 | [diff] [blame] | 165 | // DistributedLock is introduced to guarantee that minority nodes won't try to remove |
| 166 | // routes that originated from majority nodes. |
| 167 | // Adding 15 seconds retry for the leadership election to be completed. |
| 168 | asyncLock.tryLock(Duration.ofSeconds(15)).whenComplete((result, error) -> { |
| 169 | if (result != null && result.isPresent()) { |
| 170 | log.debug("Lock obtained. Put {} into removal queue", id); |
| 171 | queue.addOne(id); |
| 172 | asyncLock.unlock(); |
| 173 | } else { |
| 174 | log.debug("Fail to obtain lock. Do not remove routes from {}", id); |
| 175 | } |
| 176 | }); |
| 177 | break; |
| 178 | case INSTANCE_ADDED: |
| 179 | case INSTANCE_REMOVED: |
| 180 | case INSTANCE_ACTIVATED: |
| 181 | case INSTANCE_READY: |
| 182 | default: |
| 183 | break; |
| 184 | } |
| 185 | }); |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 186 | } |
| 187 | } |
Jonathan Hart | d4be52f | 2017-05-25 14:21:44 -0700 | [diff] [blame] | 188 | } |