blob: 27f38aff16e5c08791a94ce68ad2546451d405e7 [file] [log] [blame]
Brian O'Connor3c58e962015-04-28 23:21:51 -07001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2015-present Open Networking Foundation
Brian O'Connor3c58e962015-04-28 23:21:51 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.net.intent.impl;
17
18import org.apache.felix.scr.annotations.Activate;
19import org.apache.felix.scr.annotations.Component;
20import org.apache.felix.scr.annotations.Deactivate;
21import org.apache.felix.scr.annotations.Modified;
22import org.apache.felix.scr.annotations.Property;
23import org.apache.felix.scr.annotations.Reference;
24import org.apache.felix.scr.annotations.ReferenceCardinality;
25import org.onosproject.cfg.ComponentConfigService;
26import org.onosproject.net.intent.IntentData;
27import org.onosproject.net.intent.IntentEvent;
28import org.onosproject.net.intent.IntentListener;
29import org.onosproject.net.intent.IntentService;
30import org.onosproject.net.intent.IntentStore;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -070031import org.onosproject.net.intent.Key;
Pier Luigie6caf682017-01-26 15:25:09 -080032import org.onosproject.store.service.WallClockTimestamp;
Brian O'Connor3c58e962015-04-28 23:21:51 -070033import org.osgi.service.component.ComponentContext;
34import org.slf4j.Logger;
35
36import java.util.Dictionary;
37import java.util.Properties;
38import java.util.Timer;
39import java.util.TimerTask;
40import java.util.concurrent.ExecutorService;
41
42import static com.google.common.base.Strings.isNullOrEmpty;
43import static java.util.concurrent.Executors.newSingleThreadExecutor;
44import static org.onlab.util.Tools.get;
45import static org.onlab.util.Tools.groupedThreads;
Brian O'Connor3c58e962015-04-28 23:21:51 -070046import static org.slf4j.LoggerFactory.getLogger;
47
48/**
Brian O'Connora6c9b5c2015-04-29 22:38:29 -070049 * This component cleans up intents that have encountered errors or otherwise
50 * stalled during installation or withdrawal.
51 * <p>
52 * It periodically polls (based on configured period) for pending and CORRUPT
53 * intents from the store and retries. It also listens for CORRUPT event
54 * notifications, which signify errors in processing, and retries.
55 * </p>
Brian O'Connor3c58e962015-04-28 23:21:51 -070056 */
57@Component(immediate = true)
58public class IntentCleanup implements Runnable, IntentListener {
59
Brian O'Connorcdec4932015-04-30 16:16:47 -070060 private static final Logger log = getLogger(IntentCleanup.class);
Brian O'Connor3c58e962015-04-28 23:21:51 -070061
Pier Luigie6caf682017-01-26 15:25:09 -080062 // Logical timeout for stuck Intents in INSTALLING or WITHDRAWING. The unit is seconds
63 private static final int INSTALLING_WITHDRAWING_PERIOD = 120;
64
Brian O'Connor3c58e962015-04-28 23:21:51 -070065 private static final int DEFAULT_PERIOD = 5; //seconds
Brian O'Connor6d8e3172015-04-30 15:43:57 -070066 private static final int DEFAULT_THRESHOLD = 5; //tries
Brian O'Connor3c58e962015-04-28 23:21:51 -070067
Brian O'Connor5fcf6f52015-05-28 17:34:26 -070068 @Property(name = "enabled", boolValue = true,
69 label = "Enables/disables the intent cleanup component")
70 private boolean enabled = true;
71
Brian O'Connor3c58e962015-04-28 23:21:51 -070072 @Property(name = "period", intValue = DEFAULT_PERIOD,
73 label = "Frequency in ms between cleanup runs")
74 protected int period = DEFAULT_PERIOD;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -070075 private long periodMs;
Pier Luigie6caf682017-01-26 15:25:09 -080076 private long periodMsForStuck;
Brian O'Connor3c58e962015-04-28 23:21:51 -070077
Brian O'Connor6d8e3172015-04-30 15:43:57 -070078 @Property(name = "retryThreshold", intValue = DEFAULT_THRESHOLD,
79 label = "Number of times to retry CORRUPT intent without delay")
Brian O'Connorcdec4932015-04-30 16:16:47 -070080 protected int retryThreshold = DEFAULT_THRESHOLD;
Brian O'Connor6d8e3172015-04-30 15:43:57 -070081
Brian O'Connor3c58e962015-04-28 23:21:51 -070082 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
83 protected IntentService service;
84
85 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
86 protected IntentStore store;
87
88 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
89 protected ComponentConfigService cfgService;
90
91 private ExecutorService executor;
92 private Timer timer;
93 private TimerTask timerTask;
94
95 @Activate
96 public void activate() {
97 cfgService.registerProperties(getClass());
HIGUCHI Yutad9e01052016-04-14 09:31:42 -070098 executor = newSingleThreadExecutor(groupedThreads("onos/intent", "cleanup", log));
Brian O'Connor3c58e962015-04-28 23:21:51 -070099 timer = new Timer("onos-intent-cleanup-timer");
100 service.addListener(this);
101 adjustRate();
102 log.info("Started");
103 }
104
105 @Deactivate
106 public void deactivate() {
107 cfgService.unregisterProperties(getClass(), false);
108 service.removeListener(this);
109 timer.cancel();
110 timerTask = null;
111 executor.shutdown();
112 log.info("Stopped");
113 }
114
115 @Modified
116 public void modified(ComponentContext context) {
117 Dictionary<?, ?> properties = context != null ? context.getProperties() : new Properties();
118
119 int newPeriod;
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700120 boolean newEnabled;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700121 try {
122 String s = get(properties, "period");
123 newPeriod = isNullOrEmpty(s) ? period : Integer.parseInt(s.trim());
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700124
125 s = get(properties, "retryThreshold");
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700126 retryThreshold = isNullOrEmpty(s) ? retryThreshold : Integer.parseInt(s.trim());
127
128 s = get(properties, "enabled");
129 newEnabled = isNullOrEmpty(s) ? enabled : Boolean.parseBoolean(s.trim());
Brian O'Connor3c58e962015-04-28 23:21:51 -0700130 } catch (NumberFormatException e) {
131 log.warn(e.getMessage());
132 newPeriod = period;
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700133 newEnabled = enabled;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700134 }
135
136 // Any change in the following parameters implies hard restart
Pier Luigie6caf682017-01-26 15:25:09 -0800137 // We could further restrict only for values multiple of the period
138 // of the stuck intents
139 if (newPeriod != period || enabled != newEnabled || newPeriod <= INSTALLING_WITHDRAWING_PERIOD) {
Brian O'Connor3c58e962015-04-28 23:21:51 -0700140 period = newPeriod;
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700141 enabled = newEnabled;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700142 adjustRate();
143 }
144
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700145 log.info("Settings: enabled={}, period={}, retryThreshold={}",
146 enabled, period, retryThreshold);
Brian O'Connor3c58e962015-04-28 23:21:51 -0700147 }
148
Brian O'Connoreba4e342015-04-30 22:50:13 -0700149 protected void adjustRate() {
Brian O'Connor3c58e962015-04-28 23:21:51 -0700150 if (timerTask != null) {
151 timerTask.cancel();
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700152 timerTask = null;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700153 }
154
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700155 if (enabled) {
156 timerTask = new TimerTask() {
157 @Override
158 public void run() {
HIGUCHI Yutad9e01052016-04-14 09:31:42 -0700159 executor.execute(IntentCleanup.this);
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700160 }
161 };
Pier Luigie6caf682017-01-26 15:25:09 -0800162 // Convert to ms
Ray Milkey3717e602018-02-01 13:49:47 -0800163 periodMs = period * 1_000L;
164 periodMsForStuck = INSTALLING_WITHDRAWING_PERIOD * 1000L;
Pier Luigie6caf682017-01-26 15:25:09 -0800165 // Schedule the executions
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700166 timer.scheduleAtFixedRate(timerTask, periodMs, periodMs);
167 }
Brian O'Connor3c58e962015-04-28 23:21:51 -0700168 }
169
170
171 @Override
172 public void run() {
173 try {
174 cleanup();
175 } catch (Exception e) {
176 log.warn("Caught exception during Intent cleanup", e);
177 }
178 }
179
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700180 private void resubmitCorrupt(IntentData intentData, boolean checkThreshold) {
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700181 if (checkThreshold && intentData.errorCount() >= retryThreshold) {
Brian O'Connor38224302016-08-02 22:03:01 -0700182 //FIXME trace or debug statement?
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700183 return; // threshold met or exceeded
Brian O'Connor38224302016-08-02 22:03:01 -0700184 } // FIXME should we backoff here?
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700185
186 switch (intentData.request()) {
187 case INSTALL_REQ:
188 service.submit(intentData.intent());
189 break;
190 case WITHDRAW_REQ:
191 service.withdraw(intentData.intent());
192 break;
193 default:
Jonathan Hartaae93b22015-07-22 14:59:47 -0700194 log.warn("Trying to resubmit corrupt/failed intent {} in state {} with request {}",
Brian O'Connorb55d6e62015-06-01 15:25:53 -0700195 intentData.key(), intentData.state(), intentData.request());
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700196 break;
197 }
198 }
199
200 private void resubmitPendingRequest(IntentData intentData) {
Brian O'Connor38224302016-08-02 22:03:01 -0700201 // FIXME should we back off here?
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700202 switch (intentData.request()) {
203 case INSTALL_REQ:
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700204 case WITHDRAW_REQ:
Brian O'Connor105cf532016-04-19 13:07:38 -0700205 case PURGE_REQ:
Brian O'Connor38224302016-08-02 22:03:01 -0700206 service.addPending(intentData);
Brian O'Connor105cf532016-04-19 13:07:38 -0700207 break;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700208 default:
Brian O'Connorc90d1842015-10-01 15:48:00 -0700209 log.warn("Failed to resubmit pending intent {} in state {} with request {}",
Brian O'Connorb55d6e62015-06-01 15:25:53 -0700210 intentData.key(), intentData.state(), intentData.request());
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700211 break;
212 }
213 }
214
Brian O'Connor3c58e962015-04-28 23:21:51 -0700215 /**
Jonathan Hartaae93b22015-07-22 14:59:47 -0700216 * Iterates through corrupt, failed and pending intents and
217 * re-submit/withdraw appropriately.
Brian O'Connor3c58e962015-04-28 23:21:51 -0700218 */
219 private void cleanup() {
Pier Luigie6caf682017-01-26 15:25:09 -0800220 int corruptCount = 0, failedCount = 0, stuckCount = 0, pendingCount = 0, skipped = 0;
Jonathan Hartaae93b22015-07-22 14:59:47 -0700221
Brian O'Connorc590ebb2016-12-08 18:16:41 -0800222 // Check the pending map first, because the check of the current map
223 // will add items to the pending map.
224 for (IntentData intentData : store.getPendingData(true, periodMs)) {
Pier Luigi13b287f2017-01-10 15:07:52 -0800225 log.debug("Resubmit Pending Intent: key {}, state {}, request {}",
226 intentData.key(), intentData.state(), intentData.request());
Brian O'Connorc590ebb2016-12-08 18:16:41 -0800227 resubmitPendingRequest(intentData);
228 pendingCount++;
229 }
230
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700231 for (IntentData intentData : store.getIntentData(true, periodMs)) {
jaegonkimab7e59f2018-05-07 13:04:05 +0900232 IntentData pendingIntentData = store.getPendingData(intentData.key());
233 if (pendingIntentData != null) {
234 continue;
235 }
236
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700237 switch (intentData.state()) {
Jonathan Hartaae93b22015-07-22 14:59:47 -0700238 case FAILED:
Pier Luigi13b287f2017-01-10 15:07:52 -0800239 log.debug("Resubmit Failed Intent: key {}, state {}, request {}",
jaegonkimab7e59f2018-05-07 13:04:05 +0900240 intentData.key(), intentData.state(), intentData.request());
Jonathan Hartaae93b22015-07-22 14:59:47 -0700241 resubmitCorrupt(intentData, false);
242 failedCount++;
243 break;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700244 case CORRUPT:
Pier Luigi13b287f2017-01-10 15:07:52 -0800245 log.debug("Resubmit Corrupt Intent: key {}, state {}, request {}",
jaegonkimab7e59f2018-05-07 13:04:05 +0900246 intentData.key(), intentData.state(), intentData.request());
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700247 resubmitCorrupt(intentData, false);
248 corruptCount++;
Brian O'Connoreba4e342015-04-30 22:50:13 -0700249 break;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700250 case INSTALLING: //FALLTHROUGH
251 case WITHDRAWING:
Pier Luigie6caf682017-01-26 15:25:09 -0800252 // Instances can have different clocks and potentially we can have problems
253 // An Intent can be submitted again before the real period of the stuck intents
254 final WallClockTimestamp time = new WallClockTimestamp(
255 System.currentTimeMillis() - periodMsForStuck
256 );
257 if (intentData.version().isOlderThan(time)) {
258 resubmitPendingRequest(intentData);
259 stuckCount++;
260 } else {
261 skipped++;
262 }
Brian O'Connoreba4e342015-04-30 22:50:13 -0700263 break;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700264 default:
265 //NOOP
266 break;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700267 }
268 }
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700269
Jonathan Hart82efa692015-10-10 18:30:28 -0700270 if (corruptCount + failedCount + stuckCount + pendingCount > 0) {
271 log.debug("Intent cleanup ran and resubmitted {} corrupt, {} failed, {} stuck, and {} pending intents",
272 corruptCount, failedCount, stuckCount, pendingCount);
273 }
Pier Luigie6caf682017-01-26 15:25:09 -0800274 if (skipped > 0) {
275 log.debug("Intent cleanup skipped {} intents", skipped);
276 }
Brian O'Connor3c58e962015-04-28 23:21:51 -0700277 }
278
279 @Override
280 public void event(IntentEvent event) {
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700281 // this is the fast path for CORRUPT intents, retry on event notification.
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700282 //TODO we might consider using the timer to back off for subsequent retries
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700283 if (enabled && event.type() == IntentEvent.Type.CORRUPT) {
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700284 Key key = event.subject().key();
285 if (store.isMaster(key)) {
286 IntentData data = store.getIntentData(event.subject().key());
287 resubmitCorrupt(data, true);
288 }
Brian O'Connor3c58e962015-04-28 23:21:51 -0700289 }
290 }
291}