Improve scalability of P4Runtime subsystem
The P4Runtime client was hanging (deadlock) on a master arbitration
request. As such, all other requests (e.g. table write) were waiting
for the client's request lock to become available.
Apart from fixing those deadlocks, this patch brings a number of
improvements that all together allow to run networks of 100+ P4Runtime
devices on a single ONOS instance (before only ~20 devices)
Includes:
- Asynchrounous mastership handling in DevicHandshaker (as defined in
the P4Runtime and OpenFlow spec)
- Refactored arbitration handling in the P4RuntimeClient
to be consistent with the P4Runtime spec
- Report suspect deadlocks in P4RuntimeClientImpl
- Exploit write errors in P4RuntimeClient to quickly report
channel/mastership errors to upper layers
- Complete all futures with deadlines in P4Runtime driver
- Dump all tables in one request
- Re-purposed ChannelEvent to DeviceAgentEvent to carry also mastership
response events
- Fixed IntelliJ warnings
- Various code and log clean-ups
Change-Id: I9376793a9fe69d8eddf7e8ac2ef0ee4c14fbd198
diff --git a/core/api/src/main/java/org/onosproject/net/device/ChannelEvent.java b/core/api/src/main/java/org/onosproject/net/device/ChannelEvent.java
deleted file mode 100644
index 98ea5cb..0000000
--- a/core/api/src/main/java/org/onosproject/net/device/ChannelEvent.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright 2018-present Open Networking Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.onosproject.net.device;
-
-import org.onlab.util.Tools;
-import org.onosproject.event.AbstractEvent;
-import org.onosproject.net.DeviceId;
-
-import static com.google.common.base.MoreObjects.toStringHelper;
-
-/**
- * Describes event related to a channel established by ONOS with a device.
- */
-public class ChannelEvent extends AbstractEvent<ChannelEvent.Type, DeviceId> {
-
- private final Throwable throwable;
-
- /**
- * Type of device events.
- */
- public enum Type {
- /**
- * Signifies that the channel has properly connected.
- */
- CHANNEL_CONNECTED,
-
- /**
- * Signifies that the channel has disconnected.
- */
- CHANNEL_DISCONNECTED,
-
- /**
- * Signifies that an error happened on the channel with the given device.
- */
- CHANNEL_ERROR
-
- }
-
- /**
- * Creates an event of a given type and for the specified device.
- *
- * @param type device event type
- * @param deviceId event device subject
- */
- public ChannelEvent(Type type, DeviceId deviceId) {
- this(type, deviceId, null);
- }
-
- /**
- * Creates an event of a given type and for the specified device, given a certain throwable.
- *
- * @param type device event type
- * @param deviceId event device subject
- * @param throwable exception happened on the channel
- */
- public ChannelEvent(Type type, DeviceId deviceId, Throwable throwable) {
- super(type, deviceId);
- this.throwable = throwable;
- }
-
- /**
- * Creates an event of a given type and for the specified device and the current time.
- *
- * @param type device event type
- * @param deviceId event device subject
- * @param throwable exception happened on the channel
- * @param time occurrence time
- */
- public ChannelEvent(Type type, DeviceId deviceId, Throwable throwable, long time) {
- super(type, deviceId, time);
- this.throwable = throwable;
- }
-
- /**
- * Returns the exception that happened on the channel.
- *
- * @return a throwable if associated to the event, otherwise null.
- */
- public Throwable throwable() {
- return throwable;
- }
-
- @Override
- public String toString() {
- if (throwable == null) {
- return super.toString();
- }
- return toStringHelper(this)
- .add("time", Tools.defaultOffsetDataTime(time()))
- .add("type", type())
- .add("subject", subject())
- .add("throwable", throwable)
- .toString();
- }
-}
diff --git a/core/api/src/main/java/org/onosproject/net/device/DeviceAgentEvent.java b/core/api/src/main/java/org/onosproject/net/device/DeviceAgentEvent.java
new file mode 100644
index 0000000..22eab99
--- /dev/null
+++ b/core/api/src/main/java/org/onosproject/net/device/DeviceAgentEvent.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.onosproject.net.device;
+
+import org.onosproject.event.AbstractEvent;
+import org.onosproject.net.DeviceId;
+
+/**
+ * Describes and event related to a protocol agent used to interact with an
+ * infrastructure device.
+ */
+public final class DeviceAgentEvent
+ extends AbstractEvent<DeviceAgentEvent.Type, DeviceId> {
+
+ /**
+ * Type of device events.
+ */
+ public enum Type {
+ /**
+ * Signifies that a channel between the agent and the device is open and
+ * the two can communicate.
+ */
+ CHANNEL_OPEN,
+
+ /**
+ * Signifies that a channel between the agent and the device is closed
+ * and the two cannot communicate.
+ */
+ CHANNEL_CLOSED,
+
+ /**
+ * Signifies that a channel error has been detected. Further
+ * investigation should be performed to check if the channel is still
+ * open or closed.
+ */
+ CHANNEL_ERROR,
+
+ /**
+ * Signifies that the agent has acquired master role.
+ */
+ ROLE_MASTER,
+
+ /**
+ * Signifies that the agent has standby/slave mastership role.
+ */
+ ROLE_STANDBY,
+
+ /**
+ * Signifies that the agent cannot acquire any valid mastership role for
+ * the device.
+ */
+ ROLE_NONE,
+
+ }
+
+ /**
+ * Creates a new device agent event for the given type and device ID.
+ *
+ * @param type event type
+ * @param deviceId device ID
+ */
+ public DeviceAgentEvent(Type type, DeviceId deviceId) {
+ super(type, deviceId);
+ }
+
+ /**
+ * Creates a new device agent event for the given type, device ID and time.
+ *
+ * @param type event type
+ * @param deviceId device ID
+ * @param time occurrence time
+ */
+ public DeviceAgentEvent(Type type, DeviceId deviceId, long time) {
+ super(type, deviceId, time);
+ }
+}
diff --git a/core/api/src/main/java/org/onosproject/net/device/ChannelListener.java b/core/api/src/main/java/org/onosproject/net/device/DeviceAgentListener.java
similarity index 79%
rename from core/api/src/main/java/org/onosproject/net/device/ChannelListener.java
rename to core/api/src/main/java/org/onosproject/net/device/DeviceAgentListener.java
index cdc90bf70..c034dc2 100644
--- a/core/api/src/main/java/org/onosproject/net/device/ChannelListener.java
+++ b/core/api/src/main/java/org/onosproject/net/device/DeviceAgentListener.java
@@ -18,7 +18,8 @@
import org.onosproject.event.EventListener;
/**
- * A listener to receive events related to a transport channel established with a device.
+ * A listener to receive events related to a protocol agent controlling an
+ * infrastructure device.
*/
-public interface ChannelListener extends EventListener<ChannelEvent> {
+public interface DeviceAgentListener extends EventListener<DeviceAgentEvent> {
}
diff --git a/core/api/src/main/java/org/onosproject/net/device/DeviceHandshaker.java b/core/api/src/main/java/org/onosproject/net/device/DeviceHandshaker.java
index 606248a..bc870d3 100644
--- a/core/api/src/main/java/org/onosproject/net/device/DeviceHandshaker.java
+++ b/core/api/src/main/java/org/onosproject/net/device/DeviceHandshaker.java
@@ -30,38 +30,41 @@
public interface DeviceHandshaker extends DeviceConnect {
/**
- * Checks the reachability (connectivity) of a device.
- * Reachability, unlike availability, denotes whether THIS particular node
- * can send messages and receive replies from the specified device.
+ * Checks the reachability (connectivity) of a device. Reachability, unlike
+ * availability, denotes whether THIS particular node can send messages and
+ * receive replies from the specified device.
*
* @return CompletableFuture eventually true if reachable, false otherwise
*/
CompletableFuture<Boolean> isReachable();
/**
- * Applies on the device a mastership role change as decided by the core.
+ * Notifies the device a mastership role change as decided by the core. The
+ * implementation of this method should trigger a {@link DeviceAgentEvent}
+ * signaling the mastership role accepted by the device.
*
- * @param newRole newly determined mastership role
- * @return CompletableFuture with the mastership role accepted from the device
+ * @param newRole new mastership role
*/
- CompletableFuture<MastershipRole> roleChanged(MastershipRole newRole);
+ void roleChanged(MastershipRole newRole);
/**
- * Applies a listener to a channel established with the device.
+ * Adds a device agent listener.
*
- * @param listener the channel listener
+ * @param listener device agent listener
*/
- default void addChannelListener(ChannelListener listener) {
- throw new UnsupportedOperationException("Listener Registration not supported");
+ default void addDeviceAgentListener(DeviceAgentListener listener) {
+ throw new UnsupportedOperationException(
+ "Device agent listener registration not supported");
}
/**
- * Removes a listener to a channel established with the device.
+ * Removes a device agent listener.
*
- * @param listener the channel listener
+ * @param listener device agent listener
*/
- default void removeChannelListener(ChannelListener listener) {
- throw new UnsupportedOperationException("Listener Removal not supported");
+ default void removeDeviceAgentListener(DeviceAgentListener listener) {
+ throw new UnsupportedOperationException(
+ "Device agent listener removal not supported");
}
}
diff --git a/core/net/src/main/java/org/onosproject/net/device/impl/DeviceManager.java b/core/net/src/main/java/org/onosproject/net/device/impl/DeviceManager.java
index 414db4a..09d95b4 100644
--- a/core/net/src/main/java/org/onosproject/net/device/impl/DeviceManager.java
+++ b/core/net/src/main/java/org/onosproject/net/device/impl/DeviceManager.java
@@ -798,7 +798,8 @@
}
} else {
// we didn't get back what we asked for. Reelect someone else.
- log.warn("Failed to assert role [{}] onto Device {}", response, deviceId);
+ log.warn("Failed to assert role onto device {}. requested={}, response={}",
+ deviceId, requested, response);
if (requested == MastershipRole.MASTER) {
mastershipService.relinquishMastership(deviceId);
// TODO: Shouldn't we be triggering event?
diff --git a/core/net/src/main/java/org/onosproject/net/pi/impl/PiFlowRuleTranslatorImpl.java b/core/net/src/main/java/org/onosproject/net/pi/impl/PiFlowRuleTranslatorImpl.java
index 5741e65..38efdb9 100644
--- a/core/net/src/main/java/org/onosproject/net/pi/impl/PiFlowRuleTranslatorImpl.java
+++ b/core/net/src/main/java/org/onosproject/net/pi/impl/PiFlowRuleTranslatorImpl.java
@@ -146,7 +146,7 @@
if (tableModel.supportsAging()) {
tableEntryBuilder.withTimeout((double) rule.timeout());
} else {
- log.warn("Flow rule is temporary, but table '{}' doesn't support " +
+ log.debug("Flow rule is temporary, but table '{}' doesn't support " +
"aging, translating to permanent.", tableModel.id());
}
diff --git a/core/net/src/main/java/org/onosproject/net/pi/impl/PiPipeconfManager.java b/core/net/src/main/java/org/onosproject/net/pi/impl/PiPipeconfManager.java
index 212cf1c..55a4a11 100644
--- a/core/net/src/main/java/org/onosproject/net/pi/impl/PiPipeconfManager.java
+++ b/core/net/src/main/java/org/onosproject/net/pi/impl/PiPipeconfManager.java
@@ -139,7 +139,6 @@
@Override
public void register(PiPipeconf pipeconf) throws IllegalStateException {
- log.warn("Currently using local maps, needs to be moved to a distributed store");
if (piPipeconfs.containsKey(pipeconf.id())) {
throw new IllegalStateException(format("Pipeconf %s is already registered", pipeconf.id()));
}
@@ -262,7 +261,7 @@
cfgService.getConfig(deviceId, PiPipeconfConfig.class);
PiPipeconfId id = pipeconfConfig.piPipeconfId();
if (id.id().equals("")) {
- log.warn("Not adding empty pipeconfId for device {}", deviceId);
+ log.debug("Ignoring empty pipeconf ID for device {}", deviceId);
} else {
pipeconfMappingStore.createOrUpdateBinding(deviceId, id);
}