Added performance-related metrics for the Topology and Intents:
===
* "Topology.EventNotification.LastEventTimestamp"
Timestamp of the last Topology event (system nanoseconds)
* "Topology.EventNotification.ListenerEventRate"
Rate of the Topology events published to the Topology listeners
===
* "Intents.AddOperation.BeginOperationTimestamp"
Timestamp of the incoming Add Intent API operation (system nanoseconds)
* "Intents.AddOperation.EndOperationTimestamp"
Timestamp of the Add Intent operation completion (system nanoseconds)
* "Intents.AddOperation.IncomingRate"
Rate of the incoming Add Intent API operations
* "Intents.AddOperation.ProcessingRate"
Rate of processing the Add Intent operations
===
* "Intents.RemoveOperation.BeginOperationTimestamp"
Timestamp of the incoming Remove Intent API operation (system nanoseconds)
* "Intents.RemoveOperation.EndOperationTimestamp"
Timestamp of the Remove Intent operation completion (system nanoseconds)
* "Intents.RemoveOperation.IncomingRate"
Rate of the incoming Remove Intent API operations
* "Intents.RemoveOperation.ProcessingRate"
Rate of processing the Remove Intent operations
===
All performance metrics are exposed via the Metrics REST API:
* GET all metrics:
url = "http://%s:%s/wm/onos/metrics" % (self.onos_ip, self.onos_port)
* GET a specific metric:
url = "http://%s:%s/wm/onos/metrics?ids=%s" % (self.onos_ip, self.onos_port, args.metric_id)
where "metric_id" is the name of the Metric. E.g.:
ids=Topology.EventNotification.LastEventTimestamp
* GET multiple metrics:
url = "http://%s:%s/wm/onos/metrics?ids=%s" % (self.onos_ip, self.onos_port, args.metric_id)
where "metric_id" is comma-separated list of Metric names. E.g:
ids=Topology.EventNotification.LastEventTimestamp,Topology.EventNotification.ListenerEventRate
===
The JSON format of the output is the following:
{
"meters": [
{
"name": "Intents.AddOperation.IncomingRate",
"meter": {
"count": 2,
"mean_rate": 0.007488255279864508,
"m5_rate": 0.006082798637345469,
"m15_rate": 0.021083988195124116,
"units": "events/second",
"m1_rate": 0.002155350653737004
}
},
...
],
"histograms": [],
"timers": [],
"gauges": [
{
"gauge": {
"value": 179956769775795
},
"name": "Intents.AddOperation.BeginOperationTimestamp"
},
...
],
"counters": []
}
where
- "meter.count" is the number of events
- "meter.mean_rate" is the mean rate of the events
- "meter.m5_rate" is the rate of the events over the last 5-minute interval
- "meter.m15_rate" is the rate of the events over the last 15-minute interval
- "meter.m1_rate" is the rate of the events over the last 1-minute interval
- "meter.units" is the units of the rate (should be "events/second")
- "gauge.value" is the value of the particular metric.
In case of the "*Timestamp" metrics, it is the system nanoseconds
timestamp for the particular event or operation.
Also, fixed some of the unit tests.
NOTE: Currently, for some of the tests we have to explicitly
remove all metrics when tearing-down a test.
In the future we should have a single base unit test class,
and only its tearDown() method should do such cleanup.
Change-Id: Iad5b47b908a29dcfd9fb08e7a010ddf2627fd808
diff --git a/src/main/java/net/onrc/onos/core/intent/runtime/PathCalcRuntimeModule.java b/src/main/java/net/onrc/onos/core/intent/runtime/PathCalcRuntimeModule.java
index 15a320f..f27cb1a 100644
--- a/src/main/java/net/onrc/onos/core/intent/runtime/PathCalcRuntimeModule.java
+++ b/src/main/java/net/onrc/onos/core/intent/runtime/PathCalcRuntimeModule.java
@@ -37,6 +37,9 @@
import net.onrc.onos.core.intent.PathIntentMap;
import net.onrc.onos.core.intent.ShortestPathIntent;
import net.onrc.onos.core.intent.runtime.web.IntentWebRoutable;
+import net.onrc.onos.core.metrics.OnosMetrics;
+import net.onrc.onos.core.metrics.OnosMetrics.MetricsComponent;
+import net.onrc.onos.core.metrics.OnosMetrics.MetricsFeature;
import net.onrc.onos.core.registry.IControllerRegistryService;
import net.onrc.onos.core.topology.ITopologyListener;
import net.onrc.onos.core.topology.ITopologyService;
@@ -50,6 +53,9 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.codahale.metrics.Gauge;
+import com.codahale.metrics.Meter;
+
/**
* The PathCalcRuntimeModule contains the PathCalcRuntime and PersistIntent.
* <p>
@@ -105,6 +111,61 @@
}
/**
+ * A class to track the status of high-level intents.
+ * Currently, it is used for monitoring and measurement purposes.
+ */
+ private class HighLevelIntentsTracker implements ChangedListener {
+ @Override
+ public void intentsChange(LinkedList<ChangedEvent> events) {
+ //
+ // Process the events one-by-one and collect measurements.
+ //
+ for (ChangedEvent event : events) {
+ log.debug("HighLevelIntentsTracker: Intent ID {}, eventType {}, intentState {}",
+ event.intent.getId(), event.eventType,
+ event.intent.getState());
+
+ //
+ // Update the metrics
+ //
+ switch (event.eventType) {
+ case ADDED:
+ break;
+ case REMOVED:
+ break;
+ case STATE_CHANGED:
+ IntentState state = event.intent.getState();
+ switch (state) {
+ case INST_REQ:
+ break;
+ case INST_ACK:
+ intentAddProcessingRate.mark(1);
+ intentAddEndTimestamp = System.nanoTime();
+ break;
+ case INST_NACK:
+ break;
+ case DEL_REQ:
+ break;
+ case DEL_ACK:
+ intentRemoveProcessingRate.mark(1);
+ intentRemoveEndTimestamp = System.nanoTime();
+ break;
+ case DEL_PENDING:
+ break;
+ case REROUTE_REQ:
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ /**
* A class to track the deletion of intents and purge them as appropriate.
*/
private class DeleteIntentsTracker implements ChangedListener {
@@ -119,7 +180,7 @@
//
for (ChangedEvent event : events) {
log.debug("DeleteIntentsTracker: Intent ID {}, eventType {}",
- event.intent.getId() , event.eventType);
+ event.intent.getId(), event.eventType);
PathIntent pathIntent = (PathIntent) pathIntents.getIntent(event.intent.getId());
if (pathIntent == null) {
continue;
@@ -209,6 +270,87 @@
private ConcurrentMap<String, Intent> staleIntents = new ConcurrentHashMap<String, Intent>();
private DeleteIntentsTracker deleteIntentsTracker = new DeleteIntentsTracker();
private Set<String> removedApplicationIntentIds = new HashSet<String>();
+ private HighLevelIntentsTracker highLevelIntentsTracker = new HighLevelIntentsTracker();
+
+ //
+ // Metrics
+ //
+ private static final MetricsComponent METRICS_COMPONENT =
+ OnosMetrics.registerComponent("Intents");
+ private static final MetricsFeature METRICS_FEATURE_ADD_OPERATION =
+ METRICS_COMPONENT.registerFeature("AddOperation");
+ private static final MetricsFeature METRICS_FEATURE_REMOVE_OPERATION =
+ METRICS_COMPONENT.registerFeature("RemoveOperation");
+ //
+ // Timestamp of the incoming Add Intent API operation (system nanoseconds)
+ private volatile long intentAddBeginTimestamp = 0;
+ private final Gauge<Long> gaugeIntentAddBeginTimestamp =
+ OnosMetrics.registerMetric(METRICS_COMPONENT,
+ METRICS_FEATURE_ADD_OPERATION,
+ "BeginOperationTimestamp",
+ new Gauge<Long>() {
+ @Override
+ public Long getValue() {
+ return intentAddBeginTimestamp;
+ }
+ });
+ // Timestamp of the Add Intent operation completion (system nanoseconds)
+ private volatile long intentAddEndTimestamp = 0;
+ private final Gauge<Long> gaugeIntentAddEndTimestamp =
+ OnosMetrics.registerMetric(METRICS_COMPONENT,
+ METRICS_FEATURE_ADD_OPERATION,
+ "EndOperationTimestamp",
+ new Gauge<Long>() {
+ @Override
+ public Long getValue() {
+ return intentAddEndTimestamp;
+ }
+ });
+ // Timestamp of the incoming Remove Intent API operation (system nanoseconds)
+ private volatile long intentRemoveBeginTimestamp = 0;
+ private final Gauge<Long> gaugeIntentRemoveBeginTimestamp =
+ OnosMetrics.registerMetric(METRICS_COMPONENT,
+ METRICS_FEATURE_REMOVE_OPERATION,
+ "BeginOperationTimestamp",
+ new Gauge<Long>() {
+ @Override
+ public Long getValue() {
+ return intentRemoveBeginTimestamp;
+ }
+ });
+ // Timestamp of the Remove Intent operation completion (system nanoseconds)
+ private volatile long intentRemoveEndTimestamp = 0;
+ private final Gauge<Long> gaugeIntentRemoveEndTimestamp =
+ OnosMetrics.registerMetric(METRICS_COMPONENT,
+ METRICS_FEATURE_REMOVE_OPERATION,
+ "EndOperationTimestamp",
+ new Gauge<Long>() {
+ @Override
+ public Long getValue() {
+ return intentRemoveEndTimestamp;
+ }
+ });
+ //
+ // Rate of the incoming Add Intent API operations
+ private final Meter intentAddIncomingRate =
+ OnosMetrics.createMeter(METRICS_COMPONENT,
+ METRICS_FEATURE_ADD_OPERATION,
+ "IncomingRate");
+ // Rate of processing the Add Intent operations
+ private final Meter intentAddProcessingRate =
+ OnosMetrics.createMeter(METRICS_COMPONENT,
+ METRICS_FEATURE_ADD_OPERATION,
+ "ProcessingRate");
+ // Rate of the incoming Remove Intent API operations
+ private final Meter intentRemoveIncomingRate =
+ OnosMetrics.createMeter(METRICS_COMPONENT,
+ METRICS_FEATURE_REMOVE_OPERATION,
+ "IncomingRate");
+ // Rate of processing the Remove Intent operations
+ private final Meter intentRemoveProcessingRate =
+ OnosMetrics.createMeter(METRICS_COMPONENT,
+ METRICS_FEATURE_REMOVE_OPERATION,
+ "ProcessingRate");
// ================================================================================
// private methods
@@ -376,6 +518,7 @@
@Override
public void startUp(FloodlightModuleContext context) {
highLevelIntents = new IntentMap();
+ highLevelIntents.addChangeListener(highLevelIntentsTracker);
runtime = new PathCalcRuntime(topologyService.getTopology());
pathIntents = new PathIntentMap();
pathIntents.addChangeListener(deleteIntentsTracker);
@@ -399,6 +542,14 @@
final String appId,
Collection<ApplicationIntent> appIntents) {
//
+ // Update the metrics
+ //
+ if (!appIntents.isEmpty()) {
+ this.intentAddBeginTimestamp = System.nanoTime();
+ this.intentAddIncomingRate.mark(appIntents.size());
+ }
+
+ //
// Process all intents one-by-one
//
// TODO: The Intent Type should be enum instead of a string,
@@ -460,6 +611,11 @@
@Override
public boolean removeApplicationIntents(final String appId,
Collection<String> intentIds) {
+ //
+ // Prepare the timestamp for metrics
+ //
+ long nanoTimeTimestamp = System.nanoTime();
+
IntentMap intentMap = getHighLevelIntents();
List<String> removeIntentIds = new LinkedList<String>();
@@ -481,8 +637,19 @@
}
}
+ //
+ // Update the metrics
+ //
+ if (!operations.isEmpty()) {
+ this.intentRemoveBeginTimestamp = nanoTimeTimestamp;
+ this.intentRemoveIncomingRate.mark(operations.size());
+ }
+
+ //
// Purge intents
+ //
if (!removeIntentIds.isEmpty()) {
+
lock.lock(); // TODO optimize locking using smaller steps
try {
highLevelIntents.purge(removeIntentIds);
@@ -501,14 +668,20 @@
*/
@Override
public boolean removeAllApplicationIntents(final String appId) {
- IntentMap intentMap = getHighLevelIntents();
- List<String> removeIntentIds = new LinkedList<String>();
+ //
+ // Prepare the timestamp for metrics
+ //
+ long nanoTimeTimestamp = System.nanoTime();
+
+ Collection<Intent> allHighLevelIntents =
+ getHighLevelIntents().getAllIntents();
//
// Remove all intents
//
+ List<String> removeIntentIds = new LinkedList<String>();
IntentOperationList operations = new IntentOperationList();
- for (Intent intent : intentMap.getAllIntents()) {
+ for (Intent intent : allHighLevelIntents) {
if (intent.getState() == IntentState.INST_NACK) {
// TODO: A hack to remove intents stuck in INST_NACK state
removeIntentIds.add(intent.getId());
@@ -518,7 +691,17 @@
removedApplicationIntentIds.add(intent.getId());
}
+ //
+ // Update the metrics
+ //
+ if (!operations.isEmpty()) {
+ this.intentRemoveBeginTimestamp = nanoTimeTimestamp;
+ this.intentRemoveIncomingRate.mark(operations.size());
+ }
+
+ //
// Purge intents
+ //
if (!removeIntentIds.isEmpty()) {
lock.lock(); // TODO optimize locking using smaller steps
try {
diff --git a/src/main/java/net/onrc/onos/core/topology/TopologyEvents.java b/src/main/java/net/onrc/onos/core/topology/TopologyEvents.java
index 1900132..9726269 100644
--- a/src/main/java/net/onrc/onos/core/topology/TopologyEvents.java
+++ b/src/main/java/net/onrc/onos/core/topology/TopologyEvents.java
@@ -30,7 +30,7 @@
*/
@JsonSerialize(using = TopologyEventsSerializer.class)
public final class TopologyEvents {
- private final long timestamp;
+ private final long timestamp; // Topology event timestamp (system ns)
private final Collection<SwitchEvent> addedSwitchEvents;
private final Collection<SwitchEvent> removedSwitchEvents;
private final Collection<PortEvent> addedPortEvents;
@@ -43,7 +43,7 @@
/**
* Constructor.
*
- * @param timestamp the timestamp for the event.
+ * @param timestamp the timestamp for the event (system nanoseconds)
* @param addedSwitchEvents the collection of added Switch Events.
* @param removedSwitchEvents the collection of removed Switch Events.
* @param addedPortEvents the collection of added Port Events.
@@ -84,9 +84,9 @@
}
/**
- * Gets the timestamp for the events.
+ * Gets the timestamp for the events (system nanoseconds).
*
- * @return the timestamp for the events.
+ * @return the timestamp for the events (system nanoseconds).
*/
public long getTimestamp() {
return timestamp;
diff --git a/src/main/java/net/onrc/onos/core/topology/TopologyManager.java b/src/main/java/net/onrc/onos/core/topology/TopologyManager.java
index 7e7f2b8..aeb06c2 100644
--- a/src/main/java/net/onrc/onos/core/topology/TopologyManager.java
+++ b/src/main/java/net/onrc/onos/core/topology/TopologyManager.java
@@ -26,6 +26,9 @@
import net.onrc.onos.core.datastore.topology.KVLink;
import net.onrc.onos.core.datastore.topology.KVPort;
import net.onrc.onos.core.datastore.topology.KVSwitch;
+import net.onrc.onos.core.metrics.OnosMetrics;
+import net.onrc.onos.core.metrics.OnosMetrics.MetricsComponent;
+import net.onrc.onos.core.metrics.OnosMetrics.MetricsFeature;
import net.onrc.onos.core.registry.IControllerRegistryService;
import net.onrc.onos.core.util.Dpid;
import net.onrc.onos.core.util.EventEntry;
@@ -36,6 +39,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.codahale.metrics.Gauge;
+import com.codahale.metrics.Meter;
import com.esotericsoftware.kryo.Kryo;
/**
@@ -69,6 +74,32 @@
private Kryo kryo = KryoFactory.newKryoObject();
//
+ // Metrics
+ //
+ private static final MetricsComponent METRICS_COMPONENT =
+ OnosMetrics.registerComponent("Topology");
+ private static final MetricsFeature METRICS_FEATURE_EVENT_NOTIFICATION =
+ METRICS_COMPONENT.registerFeature("EventNotification");
+ //
+ // Timestamp of the last Topology event (system nanoseconds)
+ private volatile long lastEventTimestamp = 0;
+ private final Gauge<Long> gaugeLastEventTimestamp =
+ OnosMetrics.registerMetric(METRICS_COMPONENT,
+ METRICS_FEATURE_EVENT_NOTIFICATION,
+ "LastEventTimestamp",
+ new Gauge<Long>() {
+ @Override
+ public Long getValue() {
+ return lastEventTimestamp;
+ }
+ });
+ // Rate of the Topology events published to the Topology listeners
+ private final Meter listenerEventRate =
+ OnosMetrics.createMeter(METRICS_COMPONENT,
+ METRICS_FEATURE_EVENT_NOTIFICATION,
+ "ListenerEventRate");
+
+ //
// Local state for keeping track of reordered events.
// NOTE: Switch Events are not affected by the event reordering.
//
@@ -466,11 +497,23 @@
}
}
+ //
+ // Update the metrics
+ //
+ long totalEvents =
+ apiAddedSwitchEvents.size() + apiRemovedSwitchEvents.size() +
+ apiAddedPortEvents.size() + apiRemovedPortEvents.size() +
+ apiAddedLinkEvents.size() + apiRemovedLinkEvents.size() +
+ apiAddedHostEvents.size() + apiRemovedHostEvents.size();
+ this.listenerEventRate.mark(totalEvents);
+ this.lastEventTimestamp = System.nanoTime();
+
+ //
// Deliver the events
- long timestamp = System.nanoTime();
+ //
for (ITopologyListener listener : this.topologyListeners) {
TopologyEvents events =
- new TopologyEvents(timestamp,
+ new TopologyEvents(lastEventTimestamp,
kryo.copy(apiAddedSwitchEvents),
kryo.copy(apiRemovedSwitchEvents),
kryo.copy(apiAddedPortEvents),