blob: 7d5417c96047de172b803e3c2b11c34d03695fef [file] [log] [blame]
Brian O'Connor3c58e962015-04-28 23:21:51 -07001/*
2 * Copyright 2015 Open Networking Laboratory
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package org.onosproject.net.intent.impl;
17
18import org.apache.felix.scr.annotations.Activate;
19import org.apache.felix.scr.annotations.Component;
20import org.apache.felix.scr.annotations.Deactivate;
21import org.apache.felix.scr.annotations.Modified;
22import org.apache.felix.scr.annotations.Property;
23import org.apache.felix.scr.annotations.Reference;
24import org.apache.felix.scr.annotations.ReferenceCardinality;
25import org.onosproject.cfg.ComponentConfigService;
26import org.onosproject.net.intent.IntentData;
27import org.onosproject.net.intent.IntentEvent;
28import org.onosproject.net.intent.IntentListener;
29import org.onosproject.net.intent.IntentService;
30import org.onosproject.net.intent.IntentStore;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -070031import org.onosproject.net.intent.Key;
Brian O'Connor3c58e962015-04-28 23:21:51 -070032import org.osgi.service.component.ComponentContext;
33import org.slf4j.Logger;
34
35import java.util.Dictionary;
36import java.util.Properties;
37import java.util.Timer;
38import java.util.TimerTask;
39import java.util.concurrent.ExecutorService;
40
41import static com.google.common.base.Strings.isNullOrEmpty;
42import static java.util.concurrent.Executors.newSingleThreadExecutor;
43import static org.onlab.util.Tools.get;
44import static org.onlab.util.Tools.groupedThreads;
Brian O'Connor3c58e962015-04-28 23:21:51 -070045import static org.slf4j.LoggerFactory.getLogger;
46
47/**
Brian O'Connora6c9b5c2015-04-29 22:38:29 -070048 * This component cleans up intents that have encountered errors or otherwise
49 * stalled during installation or withdrawal.
50 * <p>
51 * It periodically polls (based on configured period) for pending and CORRUPT
52 * intents from the store and retries. It also listens for CORRUPT event
53 * notifications, which signify errors in processing, and retries.
54 * </p>
Brian O'Connor3c58e962015-04-28 23:21:51 -070055 */
56@Component(immediate = true)
57public class IntentCleanup implements Runnable, IntentListener {
58
Brian O'Connorcdec4932015-04-30 16:16:47 -070059 private static final Logger log = getLogger(IntentCleanup.class);
Brian O'Connor3c58e962015-04-28 23:21:51 -070060
61 private static final int DEFAULT_PERIOD = 5; //seconds
Brian O'Connor6d8e3172015-04-30 15:43:57 -070062 private static final int DEFAULT_THRESHOLD = 5; //tries
Brian O'Connor3c58e962015-04-28 23:21:51 -070063
64 @Property(name = "period", intValue = DEFAULT_PERIOD,
65 label = "Frequency in ms between cleanup runs")
66 protected int period = DEFAULT_PERIOD;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -070067 private long periodMs;
Brian O'Connor3c58e962015-04-28 23:21:51 -070068
Brian O'Connor6d8e3172015-04-30 15:43:57 -070069 @Property(name = "retryThreshold", intValue = DEFAULT_THRESHOLD,
70 label = "Number of times to retry CORRUPT intent without delay")
Brian O'Connorcdec4932015-04-30 16:16:47 -070071 protected int retryThreshold = DEFAULT_THRESHOLD;
Brian O'Connor6d8e3172015-04-30 15:43:57 -070072
Brian O'Connor3c58e962015-04-28 23:21:51 -070073 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
74 protected IntentService service;
75
76 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
77 protected IntentStore store;
78
79 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
80 protected ComponentConfigService cfgService;
81
82 private ExecutorService executor;
83 private Timer timer;
84 private TimerTask timerTask;
85
86 @Activate
87 public void activate() {
88 cfgService.registerProperties(getClass());
89 executor = newSingleThreadExecutor(groupedThreads("onos/intent", "cleanup"));
90 timer = new Timer("onos-intent-cleanup-timer");
91 service.addListener(this);
92 adjustRate();
93 log.info("Started");
94 }
95
96 @Deactivate
97 public void deactivate() {
98 cfgService.unregisterProperties(getClass(), false);
99 service.removeListener(this);
100 timer.cancel();
101 timerTask = null;
102 executor.shutdown();
103 log.info("Stopped");
104 }
105
106 @Modified
107 public void modified(ComponentContext context) {
108 Dictionary<?, ?> properties = context != null ? context.getProperties() : new Properties();
109
110 int newPeriod;
111 try {
112 String s = get(properties, "period");
113 newPeriod = isNullOrEmpty(s) ? period : Integer.parseInt(s.trim());
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700114
115 s = get(properties, "retryThreshold");
116 retryThreshold = isNullOrEmpty(s) ? period : Integer.parseInt(s.trim());
Brian O'Connor3c58e962015-04-28 23:21:51 -0700117 } catch (NumberFormatException e) {
118 log.warn(e.getMessage());
119 newPeriod = period;
120 }
121
122 // Any change in the following parameters implies hard restart
123 if (newPeriod != period) {
124 period = newPeriod;
125 adjustRate();
126 }
127
128 log.info("Settings: period={}", period);
129 }
130
Brian O'Connoreba4e342015-04-30 22:50:13 -0700131 protected void adjustRate() {
Brian O'Connor3c58e962015-04-28 23:21:51 -0700132 if (timerTask != null) {
133 timerTask.cancel();
134 }
135
136 timerTask = new TimerTask() {
137 @Override
138 public void run() {
139 executor.submit(IntentCleanup.this);
140 }
141 };
142
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700143 periodMs = period * 1_000; //convert to ms
Brian O'Connor3c58e962015-04-28 23:21:51 -0700144 timer.scheduleAtFixedRate(timerTask, periodMs, periodMs);
145 }
146
147
148 @Override
149 public void run() {
150 try {
151 cleanup();
152 } catch (Exception e) {
153 log.warn("Caught exception during Intent cleanup", e);
154 }
155 }
156
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700157 private void resubmitCorrupt(IntentData intentData, boolean checkThreshold) {
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700158 if (checkThreshold && intentData.errorCount() >= retryThreshold) {
159 return; // threshold met or exceeded
160 }
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700161
162 switch (intentData.request()) {
163 case INSTALL_REQ:
164 service.submit(intentData.intent());
165 break;
166 case WITHDRAW_REQ:
167 service.withdraw(intentData.intent());
168 break;
169 default:
170 //TODO this is an error, might want to log it
171 break;
172 }
173 }
174
175 private void resubmitPendingRequest(IntentData intentData) {
176 switch (intentData.request()) {
177 case INSTALL_REQ:
178 service.submit(intentData.intent());
179 break;
180 case WITHDRAW_REQ:
181 service.withdraw(intentData.intent());
182 break;
183 default:
184 //TODO this is an error (or could be purge), might want to log it
185 break;
186 }
187 }
188
Brian O'Connor3c58e962015-04-28 23:21:51 -0700189 /**
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700190 * Iterate through CORRUPT intents and re-submit/withdraw appropriately.
Brian O'Connor3c58e962015-04-28 23:21:51 -0700191 *
Brian O'Connor3c58e962015-04-28 23:21:51 -0700192 */
193 private void cleanup() {
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700194 int corruptCount = 0, stuckCount = 0, pendingCount = 0;
Brian O'Connoreba4e342015-04-30 22:50:13 -0700195 store.getIntentData(true, periodMs);
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700196 for (IntentData intentData : store.getIntentData(true, periodMs)) {
197 switch (intentData.state()) {
198 case CORRUPT:
199 resubmitCorrupt(intentData, false);
200 corruptCount++;
Brian O'Connoreba4e342015-04-30 22:50:13 -0700201 break;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700202 case INSTALLING: //FALLTHROUGH
203 case WITHDRAWING:
204 resubmitPendingRequest(intentData);
205 stuckCount++;
Brian O'Connoreba4e342015-04-30 22:50:13 -0700206 break;
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700207 default:
208 //NOOP
209 break;
Brian O'Connor3c58e962015-04-28 23:21:51 -0700210 }
211 }
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700212
213 for (IntentData intentData : store.getPendingData(true, periodMs)) {
214 //TODO should we do age check here, or in the store?
215 resubmitPendingRequest(intentData);
216 stuckCount++;
217 }
218
219 log.debug("Intent cleanup ran and resubmitted {} corrupt, {} stuck, and {} pending intents",
220 corruptCount, stuckCount, pendingCount);
Brian O'Connor3c58e962015-04-28 23:21:51 -0700221 }
222
223 @Override
224 public void event(IntentEvent event) {
Brian O'Connor6d8e3172015-04-30 15:43:57 -0700225 // this is the fast path for CORRUPT intents, retry on event notification.
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700226 //TODO we might consider using the timer to back off for subsequent retries
Brian O'Connor3c58e962015-04-28 23:21:51 -0700227 if (event.type() == IntentEvent.Type.CORRUPT) {
Brian O'Connora6c9b5c2015-04-29 22:38:29 -0700228 Key key = event.subject().key();
229 if (store.isMaster(key)) {
230 IntentData data = store.getIntentData(event.subject().key());
231 resubmitCorrupt(data, true);
232 }
Brian O'Connor3c58e962015-04-28 23:21:51 -0700233 }
234 }
235}