blob: 73b67080f007942e07a3284b1b8b2b2a29a3dd7f [file] [log] [blame]
Brian O'Connor3c58e962015-04-28 23:21:51 -07001/*
Brian O'Connor5ab426f2016-04-09 01:19:45 -07002 * Copyright 2015-present Open Networking Laboratory
Brian O'Connor3c58e962015-04-28 23:21:51 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.net.intent.impl;
17
18import org.apache.felix.scr.annotations.Activate;
19import org.apache.felix.scr.annotations.Component;
20import org.apache.felix.scr.annotations.Deactivate;
21import org.apache.felix.scr.annotations.Modified;
22import org.apache.felix.scr.annotations.Property;
23import org.apache.felix.scr.annotations.Reference;
24import org.apache.felix.scr.annotations.ReferenceCardinality;
25import org.onosproject.cfg.ComponentConfigService;
26import org.onosproject.net.intent.IntentData;
27import org.onosproject.net.intent.IntentEvent;
28import org.onosproject.net.intent.IntentListener;
29import org.onosproject.net.intent.IntentService;
30import org.onosproject.net.intent.IntentStore;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -070031import org.onosproject.net.intent.Key;
Brian O'Connor3c58e962015-04-28 23:21:51 -070032import org.osgi.service.component.ComponentContext;
33import org.slf4j.Logger;
34
35import java.util.Dictionary;
36import java.util.Properties;
37import java.util.Timer;
38import java.util.TimerTask;
39import java.util.concurrent.ExecutorService;
40
41import static com.google.common.base.Strings.isNullOrEmpty;
42import static java.util.concurrent.Executors.newSingleThreadExecutor;
43import static org.onlab.util.Tools.get;
44import static org.onlab.util.Tools.groupedThreads;
Brian O'Connor3c58e962015-04-28 23:21:51 -070045import static org.slf4j.LoggerFactory.getLogger;
46
47/**
Brian O'Connora6c9b5c2015-04-29 22:38:29 -070048 * This component cleans up intents that have encountered errors or otherwise
49 * stalled during installation or withdrawal.
50 * <p>
51 * It periodically polls (based on configured period) for pending and CORRUPT
52 * intents from the store and retries. It also listens for CORRUPT event
53 * notifications, which signify errors in processing, and retries.
54 * </p>
Brian O'Connor3c58e962015-04-28 23:21:51 -070055 */
56@Component(immediate = true)
57public class IntentCleanup implements Runnable, IntentListener {
58
Brian O'Connorcdec4932015-04-30 16:16:47 -070059 private static final Logger log = getLogger(IntentCleanup.class);
Brian O'Connor3c58e962015-04-28 23:21:51 -070060
61 private static final int DEFAULT_PERIOD = 5; //seconds
Brian O'Connor6d8e3172015-04-30 15:43:57 -070062 private static final int DEFAULT_THRESHOLD = 5; //tries
Brian O'Connor3c58e962015-04-28 23:21:51 -070063
Brian O'Connor5fcf6f52015-05-28 17:34:26 -070064 @Property(name = "enabled", boolValue = true,
65 label = "Enables/disables the intent cleanup component")
66 private boolean enabled = true;
67
Brian O'Connor3c58e962015-04-28 23:21:51 -070068 @Property(name = "period", intValue = DEFAULT_PERIOD,
69 label = "Frequency in ms between cleanup runs")
70 protected int period = DEFAULT_PERIOD;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -070071 private long periodMs;
Brian O'Connor3c58e962015-04-28 23:21:51 -070072
Brian O'Connor6d8e3172015-04-30 15:43:57 -070073 @Property(name = "retryThreshold", intValue = DEFAULT_THRESHOLD,
74 label = "Number of times to retry CORRUPT intent without delay")
Brian O'Connorcdec4932015-04-30 16:16:47 -070075 protected int retryThreshold = DEFAULT_THRESHOLD;
Brian O'Connor6d8e3172015-04-30 15:43:57 -070076
Brian O'Connor3c58e962015-04-28 23:21:51 -070077 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
78 protected IntentService service;
79
80 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
81 protected IntentStore store;
82
83 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
84 protected ComponentConfigService cfgService;
85
86 private ExecutorService executor;
87 private Timer timer;
88 private TimerTask timerTask;
89
90 @Activate
91 public void activate() {
92 cfgService.registerProperties(getClass());
HIGUCHI Yutad9e01052016-04-14 09:31:42 -070093 executor = newSingleThreadExecutor(groupedThreads("onos/intent", "cleanup", log));
Brian O'Connor3c58e962015-04-28 23:21:51 -070094 timer = new Timer("onos-intent-cleanup-timer");
95 service.addListener(this);
96 adjustRate();
97 log.info("Started");
98 }
99
100 @Deactivate
101 public void deactivate() {
102 cfgService.unregisterProperties(getClass(), false);
103 service.removeListener(this);
104 timer.cancel();
105 timerTask = null;
106 executor.shutdown();
107 log.info("Stopped");
108 }
109
110 @Modified
111 public void modified(ComponentContext context) {
112 Dictionary<?, ?> properties = context != null ? context.getProperties() : new Properties();
113
114 int newPeriod;
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700115 boolean newEnabled;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700116 try {
117 String s = get(properties, "period");
118 newPeriod = isNullOrEmpty(s) ? period : Integer.parseInt(s.trim());
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700119
120 s = get(properties, "retryThreshold");
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700121 retryThreshold = isNullOrEmpty(s) ? retryThreshold : Integer.parseInt(s.trim());
122
123 s = get(properties, "enabled");
124 newEnabled = isNullOrEmpty(s) ? enabled : Boolean.parseBoolean(s.trim());
Brian O'Connor3c58e962015-04-28 23:21:51 -0700125 } catch (NumberFormatException e) {
126 log.warn(e.getMessage());
127 newPeriod = period;
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700128 newEnabled = enabled;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700129 }
130
131 // Any change in the following parameters implies hard restart
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700132 if (newPeriod != period || enabled != newEnabled) {
Brian O'Connor3c58e962015-04-28 23:21:51 -0700133 period = newPeriod;
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700134 enabled = newEnabled;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700135 adjustRate();
136 }
137
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700138 log.info("Settings: enabled={}, period={}, retryThreshold={}",
139 enabled, period, retryThreshold);
Brian O'Connor3c58e962015-04-28 23:21:51 -0700140 }
141
Brian O'Connoreba4e342015-04-30 22:50:13 -0700142 protected void adjustRate() {
Brian O'Connor3c58e962015-04-28 23:21:51 -0700143 if (timerTask != null) {
144 timerTask.cancel();
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700145 timerTask = null;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700146 }
147
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700148 if (enabled) {
149 timerTask = new TimerTask() {
150 @Override
151 public void run() {
HIGUCHI Yutad9e01052016-04-14 09:31:42 -0700152 executor.execute(IntentCleanup.this);
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700153 }
154 };
Brian O'Connor3c58e962015-04-28 23:21:51 -0700155
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700156 periodMs = period * 1_000; //convert to ms
157 timer.scheduleAtFixedRate(timerTask, periodMs, periodMs);
158 }
Brian O'Connor3c58e962015-04-28 23:21:51 -0700159 }
160
161
162 @Override
163 public void run() {
164 try {
165 cleanup();
166 } catch (Exception e) {
167 log.warn("Caught exception during Intent cleanup", e);
168 }
169 }
170
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700171 private void resubmitCorrupt(IntentData intentData, boolean checkThreshold) {
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700172 if (checkThreshold && intentData.errorCount() >= retryThreshold) {
Brian O'Connor38224302016-08-02 22:03:01 -0700173 //FIXME trace or debug statement?
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700174 return; // threshold met or exceeded
Brian O'Connor38224302016-08-02 22:03:01 -0700175 } // FIXME should we backoff here?
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700176
177 switch (intentData.request()) {
178 case INSTALL_REQ:
179 service.submit(intentData.intent());
180 break;
181 case WITHDRAW_REQ:
182 service.withdraw(intentData.intent());
183 break;
184 default:
Jonathan Hartaae93b22015-07-22 14:59:47 -0700185 log.warn("Trying to resubmit corrupt/failed intent {} in state {} with request {}",
Brian O'Connorb55d6e62015-06-01 15:25:53 -0700186 intentData.key(), intentData.state(), intentData.request());
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700187 break;
188 }
189 }
190
191 private void resubmitPendingRequest(IntentData intentData) {
Brian O'Connor38224302016-08-02 22:03:01 -0700192 // FIXME should we back off here?
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700193 switch (intentData.request()) {
194 case INSTALL_REQ:
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700195 case WITHDRAW_REQ:
Brian O'Connor105cf532016-04-19 13:07:38 -0700196 case PURGE_REQ:
Brian O'Connor38224302016-08-02 22:03:01 -0700197 service.addPending(intentData);
Brian O'Connor105cf532016-04-19 13:07:38 -0700198 break;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700199 default:
Brian O'Connorc90d1842015-10-01 15:48:00 -0700200 log.warn("Failed to resubmit pending intent {} in state {} with request {}",
Brian O'Connorb55d6e62015-06-01 15:25:53 -0700201 intentData.key(), intentData.state(), intentData.request());
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700202 break;
203 }
204 }
205
Brian O'Connor3c58e962015-04-28 23:21:51 -0700206 /**
Jonathan Hartaae93b22015-07-22 14:59:47 -0700207 * Iterates through corrupt, failed and pending intents and
208 * re-submit/withdraw appropriately.
Brian O'Connor3c58e962015-04-28 23:21:51 -0700209 */
210 private void cleanup() {
Jonathan Hartaae93b22015-07-22 14:59:47 -0700211 int corruptCount = 0, failedCount = 0, stuckCount = 0, pendingCount = 0;
212
Brian O'Connorc590ebb2016-12-08 18:16:41 -0800213 // Check the pending map first, because the check of the current map
214 // will add items to the pending map.
215 for (IntentData intentData : store.getPendingData(true, periodMs)) {
Pier Luigi13b287f2017-01-10 15:07:52 -0800216 log.debug("Resubmit Pending Intent: key {}, state {}, request {}",
217 intentData.key(), intentData.state(), intentData.request());
Brian O'Connorc590ebb2016-12-08 18:16:41 -0800218 resubmitPendingRequest(intentData);
219 pendingCount++;
220 }
221
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700222 for (IntentData intentData : store.getIntentData(true, periodMs)) {
223 switch (intentData.state()) {
Jonathan Hartaae93b22015-07-22 14:59:47 -0700224 case FAILED:
Pier Luigi13b287f2017-01-10 15:07:52 -0800225 log.debug("Resubmit Failed Intent: key {}, state {}, request {}",
226 intentData.key(), intentData.state(), intentData.request());
Jonathan Hartaae93b22015-07-22 14:59:47 -0700227 resubmitCorrupt(intentData, false);
228 failedCount++;
229 break;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700230 case CORRUPT:
Pier Luigi13b287f2017-01-10 15:07:52 -0800231 log.debug("Resubmit Corrupt Intent: key {}, state {}, request {}",
232 intentData.key(), intentData.state(), intentData.request());
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700233 resubmitCorrupt(intentData, false);
234 corruptCount++;
Brian O'Connoreba4e342015-04-30 22:50:13 -0700235 break;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700236 case INSTALLING: //FALLTHROUGH
237 case WITHDRAWING:
Pier Luigi13b287f2017-01-10 15:07:52 -0800238 log.debug("Resubmit Pending Intent: key {}, state {}, request {}",
239 intentData.key(), intentData.state(), intentData.request());
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700240 resubmitPendingRequest(intentData);
241 stuckCount++;
Brian O'Connoreba4e342015-04-30 22:50:13 -0700242 break;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700243 default:
244 //NOOP
245 break;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700246 }
247 }
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700248
Jonathan Hart82efa692015-10-10 18:30:28 -0700249 if (corruptCount + failedCount + stuckCount + pendingCount > 0) {
250 log.debug("Intent cleanup ran and resubmitted {} corrupt, {} failed, {} stuck, and {} pending intents",
251 corruptCount, failedCount, stuckCount, pendingCount);
252 }
Brian O'Connor3c58e962015-04-28 23:21:51 -0700253 }
254
255 @Override
256 public void event(IntentEvent event) {
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700257 // this is the fast path for CORRUPT intents, retry on event notification.
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700258 //TODO we might consider using the timer to back off for subsequent retries
Brian O'Connor5fcf6f52015-05-28 17:34:26 -0700259 if (enabled && event.type() == IntentEvent.Type.CORRUPT) {
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700260 Key key = event.subject().key();
261 if (store.isMaster(key)) {
262 IntentData data = store.getIntentData(event.subject().key());
263 resubmitCorrupt(data, true);
264 }
Brian O'Connor3c58e962015-04-28 23:21:51 -0700265 }
266 }
267}