Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2016-present Open Networking Laboratory |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package org.onosproject.bmv2.demo.app.wcmp; |
| 18 | |
| 19 | import com.eclipsesource.json.Json; |
| 20 | import com.eclipsesource.json.JsonObject; |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 21 | import com.google.common.collect.ImmutableList; |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 22 | import com.google.common.collect.Lists; |
| 23 | import com.google.common.collect.Maps; |
| 24 | import com.google.common.collect.Sets; |
| 25 | import org.apache.commons.lang3.tuple.Pair; |
| 26 | import org.apache.felix.scr.annotations.Component; |
| 27 | import org.apache.felix.scr.annotations.Reference; |
| 28 | import org.apache.felix.scr.annotations.ReferenceCardinality; |
| 29 | import org.onosproject.bmv2.api.context.Bmv2Configuration; |
| 30 | import org.onosproject.bmv2.api.context.Bmv2DefaultConfiguration; |
| 31 | import org.onosproject.bmv2.api.context.Bmv2DeviceContext; |
| 32 | import org.onosproject.bmv2.api.runtime.Bmv2Action; |
| 33 | import org.onosproject.bmv2.api.runtime.Bmv2DeviceAgent; |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 34 | import org.onosproject.bmv2.api.runtime.Bmv2ExtensionSelector; |
| 35 | import org.onosproject.bmv2.api.runtime.Bmv2ExtensionTreatment; |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 36 | import org.onosproject.bmv2.api.runtime.Bmv2RuntimeException; |
| 37 | import org.onosproject.bmv2.api.service.Bmv2Controller; |
| 38 | import org.onosproject.bmv2.demo.app.common.AbstractUpgradableFabricApp; |
| 39 | import org.onosproject.net.DeviceId; |
| 40 | import org.onosproject.net.Host; |
| 41 | import org.onosproject.net.Path; |
| 42 | import org.onosproject.net.PortNumber; |
| 43 | import org.onosproject.net.flow.DefaultTrafficSelector; |
| 44 | import org.onosproject.net.flow.DefaultTrafficTreatment; |
| 45 | import org.onosproject.net.flow.FlowRule; |
| 46 | import org.onosproject.net.flow.TrafficTreatment; |
| 47 | import org.onosproject.net.flow.criteria.ExtensionSelector; |
| 48 | import org.onosproject.net.flow.instructions.ExtensionTreatment; |
| 49 | import org.onosproject.net.topology.DefaultTopologyVertex; |
| 50 | import org.onosproject.net.topology.Topology; |
| 51 | import org.onosproject.net.topology.TopologyGraph; |
| 52 | |
| 53 | import java.io.BufferedReader; |
| 54 | import java.io.IOException; |
| 55 | import java.io.InputStreamReader; |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 56 | import java.util.Arrays; |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 57 | import java.util.Collection; |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 58 | import java.util.Collections; |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 59 | import java.util.List; |
| 60 | import java.util.Map; |
| 61 | import java.util.Set; |
| 62 | import java.util.stream.Collectors; |
| 63 | |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 64 | import static java.util.stream.Collectors.toList; |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 65 | import static java.util.stream.Collectors.toSet; |
| 66 | import static org.onlab.packet.EthType.EtherType.IPV4; |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 67 | import static org.onosproject.bmv2.api.utils.Bmv2TranslatorUtils.roundToBytes; |
| 68 | import static org.onosproject.bmv2.demo.app.wcmp.WcmpInterpreter.*; |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 69 | |
| 70 | /** |
| 71 | * Implementation of an upgradable fabric app for the WCMP configuration. |
| 72 | */ |
| 73 | @Component(immediate = true) |
| 74 | public class WcmpFabricApp extends AbstractUpgradableFabricApp { |
| 75 | |
| 76 | private static final String APP_NAME = "org.onosproject.bmv2-wcmp-fabric"; |
| 77 | private static final String MODEL_NAME = "WCMP"; |
| 78 | private static final String JSON_CONFIG_PATH = "/wcmp.json"; |
| 79 | |
| 80 | private static final double MULTI_PORT_WEIGHT_COEFFICIENT = 0.85; |
| 81 | |
| 82 | private static final Bmv2Configuration WCMP_CONFIGURATION = loadConfiguration(); |
| 83 | private static final WcmpInterpreter WCMP_INTERPRETER = new WcmpInterpreter(); |
| 84 | protected static final Bmv2DeviceContext WCMP_CONTEXT = new Bmv2DeviceContext(WCMP_CONFIGURATION, WCMP_INTERPRETER); |
| 85 | |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 86 | private static final Map<DeviceId, Map<Map<PortNumber, Double>, Integer>> DEVICE_GROUP_ID_MAP = Maps.newHashMap(); |
| 87 | |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 88 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
| 89 | private Bmv2Controller bmv2Controller; |
| 90 | |
| 91 | /** |
| 92 | * TODO. |
| 93 | */ |
| 94 | public WcmpFabricApp() { |
| 95 | super(APP_NAME, MODEL_NAME, WCMP_CONTEXT); |
| 96 | } |
| 97 | |
| 98 | |
| 99 | @Override |
| 100 | public boolean initDevice(DeviceId deviceId) { |
| 101 | try { |
| 102 | Bmv2DeviceAgent agent = bmv2Controller.getAgent(deviceId); |
| 103 | for (Map.Entry<String, Bmv2Action> entry : WCMP_INTERPRETER.defaultActionsMap().entrySet()) { |
| 104 | agent.setTableDefaultAction(entry.getKey(), entry.getValue()); |
| 105 | } |
| 106 | return true; |
| 107 | } catch (Bmv2RuntimeException e) { |
Carmelo Cascone | fbc577b | 2016-06-17 23:19:09 -0700 | [diff] [blame] | 108 | log.debug("Exception while initializing device {}: {}", deviceId, e.explain()); |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 109 | return false; |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | @Override |
| 114 | public List<FlowRule> generateLeafRules(DeviceId deviceId, Host srcHost, Collection<Host> dstHosts, |
| 115 | Collection<DeviceId> availableSpines, Topology topo) |
| 116 | throws FlowRuleGeneratorException { |
| 117 | |
| 118 | Set<PortNumber> hostPortNumbers = Sets.newHashSet(); |
| 119 | Set<PortNumber> fabricPortNumbers = Sets.newHashSet(); |
| 120 | deviceService.getPorts(deviceId) |
| 121 | .forEach(p -> (isFabricPort(p, topo) ? fabricPortNumbers : hostPortNumbers).add(p.number())); |
| 122 | |
| 123 | if (hostPortNumbers.size() != 1 || fabricPortNumbers.size() == 0) { |
| 124 | log.error("Leaf switch has invalid port configuration: hostPorts={}, fabricPorts={}", |
| 125 | hostPortNumbers.size(), fabricPortNumbers.size()); |
| 126 | throw new FlowRuleGeneratorException(); |
| 127 | } |
| 128 | PortNumber hostPort = hostPortNumbers.iterator().next(); |
| 129 | |
| 130 | TopologyGraph graph = topologyService.getGraph(topo); |
| 131 | // Map key: spine device id, value: leaf switch ports which connect to spine in the key. |
| 132 | Map<DeviceId, Set<PortNumber>> spineToPortsMap = Maps.newHashMap(); |
| 133 | graph.getEdgesFrom(new DefaultTopologyVertex(deviceId)).forEach(edge -> { |
| 134 | spineToPortsMap.putIfAbsent(edge.dst().deviceId(), Sets.newHashSet()); |
| 135 | spineToPortsMap.get(edge.dst().deviceId()).add(edge.link().src().port()); |
| 136 | }); |
| 137 | |
| 138 | double baseWeight = 1d / spineToPortsMap.size(); |
| 139 | |
| 140 | int numSinglePorts = (int) spineToPortsMap.values().stream().filter(s -> s.size() == 1).count(); |
| 141 | int numMultiPorts = spineToPortsMap.size() - numSinglePorts; |
| 142 | |
| 143 | // Reduce weight portion assigned to multi-ports to mitigate flow assignment imbalance (measured empirically). |
| 144 | double multiPortBaseWeight = baseWeight * MULTI_PORT_WEIGHT_COEFFICIENT; |
| 145 | double excess = (baseWeight - multiPortBaseWeight) * numMultiPorts; |
| 146 | double singlePortBaseWeight = baseWeight + (excess / numSinglePorts); |
| 147 | |
| 148 | Map<PortNumber, Double> weighedPortNumbers = Maps.newHashMap(); |
| 149 | spineToPortsMap.forEach((did, portSet) -> { |
| 150 | double base = (portSet.size() == 1) ? singlePortBaseWeight : multiPortBaseWeight; |
| 151 | double weight = base / portSet.size(); |
| 152 | portSet.forEach(portNumber -> weighedPortNumbers.put(portNumber, weight)); |
| 153 | }); |
| 154 | |
| 155 | List<FlowRule> rules = Lists.newArrayList(); |
| 156 | |
| 157 | |
| 158 | Pair<ExtensionTreatment, List<FlowRule>> result = provisionWcmpTreatment(deviceId, weighedPortNumbers); |
| 159 | ExtensionTreatment wcmpTreatment = result.getLeft(); |
| 160 | rules.addAll(result.getRight()); |
| 161 | |
| 162 | // From src host to dst hosts, WCMP to all fabric ports. |
| 163 | for (Host dstHost : dstHosts) { |
| 164 | FlowRule rule = flowRuleBuilder(deviceId, TABLE0) |
| 165 | .withSelector( |
| 166 | DefaultTrafficSelector.builder() |
| 167 | .matchInPort(hostPort) |
| 168 | .matchEthType(IPV4.ethType().toShort()) |
| 169 | .matchEthSrc(srcHost.mac()) |
| 170 | .matchEthDst(dstHost.mac()) |
| 171 | .build()) |
| 172 | .withTreatment( |
| 173 | DefaultTrafficTreatment.builder() |
| 174 | .extension(wcmpTreatment, deviceId) |
| 175 | .build()) |
| 176 | .build(); |
| 177 | rules.add(rule); |
| 178 | } |
| 179 | |
| 180 | // From fabric ports to src host. |
| 181 | for (PortNumber port : fabricPortNumbers) { |
| 182 | FlowRule rule = flowRuleBuilder(deviceId, TABLE0) |
| 183 | .withSelector( |
| 184 | DefaultTrafficSelector.builder() |
| 185 | .matchInPort(port) |
| 186 | .matchEthType(IPV4.ethType().toShort()) |
| 187 | .matchEthDst(srcHost.mac()) |
| 188 | .build()) |
| 189 | .withTreatment( |
| 190 | DefaultTrafficTreatment.builder() |
| 191 | .setOutput(hostPort) |
| 192 | .build()) |
| 193 | .build(); |
| 194 | rules.add(rule); |
| 195 | } |
| 196 | |
| 197 | return rules; |
| 198 | } |
| 199 | |
| 200 | @Override |
| 201 | public List<FlowRule> generateSpineRules(DeviceId deviceId, Collection<Host> dstHosts, Topology topo) |
| 202 | throws FlowRuleGeneratorException { |
| 203 | |
| 204 | List<FlowRule> rules = Lists.newArrayList(); |
| 205 | |
| 206 | for (Host dstHost : dstHosts) { |
| 207 | |
| 208 | Set<Path> paths = topologyService.getPaths(topo, deviceId, dstHost.location().deviceId()); |
| 209 | |
| 210 | if (paths.size() == 0) { |
| 211 | log.warn("Can't find any path between spine {} and host {}", deviceId, dstHost); |
| 212 | throw new FlowRuleGeneratorException(); |
| 213 | } |
| 214 | |
| 215 | TrafficTreatment treatment; |
| 216 | |
| 217 | if (paths.size() == 1) { |
| 218 | // Only one path. |
| 219 | PortNumber port = paths.iterator().next().src().port(); |
| 220 | treatment = DefaultTrafficTreatment.builder().setOutput(port).build(); |
| 221 | } else { |
| 222 | // Multiple paths, do WCMP. |
| 223 | Set<PortNumber> portNumbers = paths.stream().map(p -> p.src().port()).collect(toSet()); |
| 224 | double weight = 1d / portNumbers.size(); |
| 225 | // Same weight for all ports. |
| 226 | Map<PortNumber, Double> weightedPortNumbers = portNumbers.stream() |
| 227 | .collect(Collectors.toMap(p -> p, p -> weight)); |
| 228 | Pair<ExtensionTreatment, List<FlowRule>> result = provisionWcmpTreatment(deviceId, weightedPortNumbers); |
| 229 | rules.addAll(result.getRight()); |
| 230 | treatment = DefaultTrafficTreatment.builder().extension(result.getLeft(), deviceId).build(); |
| 231 | } |
| 232 | |
| 233 | FlowRule rule = flowRuleBuilder(deviceId, TABLE0) |
| 234 | .withSelector( |
| 235 | DefaultTrafficSelector.builder() |
| 236 | .matchEthType(IPV4.ethType().toShort()) |
| 237 | .matchEthDst(dstHost.mac()) |
| 238 | .build()) |
| 239 | .withTreatment(treatment) |
| 240 | .build(); |
| 241 | |
| 242 | rules.add(rule); |
| 243 | } |
| 244 | |
| 245 | return rules; |
| 246 | } |
| 247 | |
| 248 | private Pair<ExtensionTreatment, List<FlowRule>> provisionWcmpTreatment(DeviceId deviceId, |
| 249 | Map<PortNumber, Double> weightedFabricPorts) |
| 250 | throws FlowRuleGeneratorException { |
| 251 | |
| 252 | // Install WCMP group table entries that map from hash values to fabric ports. |
| 253 | |
| 254 | int groupId = groupIdOf(deviceId, weightedFabricPorts); |
| 255 | List<PortNumber> portNumbers = Lists.newArrayList(); |
| 256 | List<Double> weights = Lists.newArrayList(); |
| 257 | weightedFabricPorts.forEach((p, w) -> { |
| 258 | portNumbers.add(p); |
| 259 | weights.add(w); |
| 260 | }); |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 261 | List<Integer> prefixLengths = toPrefixLengths(weights); |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 262 | |
| 263 | List<FlowRule> rules = Lists.newArrayList(); |
| 264 | for (int i = 0; i < portNumbers.size(); i++) { |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 265 | ExtensionSelector extSelector = buildWcmpSelector(groupId, prefixLengths.get(i)); |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 266 | FlowRule rule = flowRuleBuilder(deviceId, WCMP_GROUP_TABLE) |
| 267 | .withSelector(DefaultTrafficSelector.builder() |
| 268 | .extension(extSelector, deviceId) |
| 269 | .build()) |
| 270 | .withTreatment( |
| 271 | DefaultTrafficTreatment.builder() |
| 272 | .setOutput(portNumbers.get(i)) |
| 273 | .build()) |
| 274 | .build(); |
| 275 | rules.add(rule); |
| 276 | } |
| 277 | |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 278 | ExtensionTreatment extTreatment = buildWcmpTreatment(groupId); |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 279 | |
| 280 | return Pair.of(extTreatment, rules); |
| 281 | } |
| 282 | |
Carmelo Cascone | 0ec92f1 | 2016-06-17 14:41:40 -0700 | [diff] [blame] | 283 | private Bmv2ExtensionSelector buildWcmpSelector(int groupId, int prefixLength) { |
| 284 | byte[] ones = new byte[roundToBytes(prefixLength)]; |
| 285 | Arrays.fill(ones, (byte) 0xFF); |
| 286 | return Bmv2ExtensionSelector.builder() |
| 287 | .forConfiguration(WCMP_CONTEXT.configuration()) |
| 288 | .matchExact(WCMP_META, GROUP_ID, groupId) |
| 289 | .matchLpm(WCMP_META, SELECTOR, ones, prefixLength) |
| 290 | .build(); |
| 291 | } |
| 292 | |
| 293 | private Bmv2ExtensionTreatment buildWcmpTreatment(int groupId) { |
| 294 | return Bmv2ExtensionTreatment.builder() |
| 295 | .forConfiguration(WCMP_CONTEXT.configuration()) |
| 296 | .setActionName(WCMP_GROUP) |
| 297 | .addParameter(GROUP_ID, groupId) |
| 298 | .build(); |
| 299 | } |
| 300 | |
| 301 | public int groupIdOf(DeviceId did, Map<PortNumber, Double> weightedPorts) { |
| 302 | DEVICE_GROUP_ID_MAP.putIfAbsent(did, Maps.newHashMap()); |
| 303 | // Counts the number of unique portNumber sets for each device ID. |
| 304 | // Each distinct set of portNumbers will have a unique ID. |
| 305 | return DEVICE_GROUP_ID_MAP.get(did).computeIfAbsent(weightedPorts, |
| 306 | (pp) -> DEVICE_GROUP_ID_MAP.get(did).size() + 1); |
| 307 | } |
| 308 | |
| 309 | public List<Integer> toPrefixLengths(List<Double> weigths) { |
| 310 | |
| 311 | final double weightSum = weigths.stream() |
| 312 | .mapToDouble(Double::doubleValue) |
| 313 | .map(this::roundDouble) |
| 314 | .sum(); |
| 315 | |
| 316 | if (Math.abs(weightSum - 1) > 0.0001) { |
| 317 | throw new RuntimeException("WCMP weights sum is expected to be 1, found was " + weightSum); |
| 318 | } |
| 319 | |
| 320 | final int selectorBitWidth = WCMP_CONTEXT.configuration().headerType(WCMP_META_T).field(SELECTOR).bitWidth(); |
| 321 | final int availableBits = selectorBitWidth - 1; |
| 322 | |
| 323 | List<Long> prefixDiffs = weigths.stream().map(w -> Math.round(w * availableBits)).collect(toList()); |
| 324 | |
| 325 | final long bitSum = prefixDiffs.stream().mapToLong(Long::longValue).sum(); |
| 326 | final long error = availableBits - bitSum; |
| 327 | |
| 328 | if (error != 0) { |
| 329 | // Lazy intuition here is that the error can be absorbed by the longest prefixDiff with the minor impact. |
| 330 | Long maxDiff = Collections.max(prefixDiffs); |
| 331 | int idx = prefixDiffs.indexOf(maxDiff); |
| 332 | prefixDiffs.remove(idx); |
| 333 | prefixDiffs.add(idx, maxDiff + error); |
| 334 | } |
| 335 | List<Integer> prefixLengths = Lists.newArrayList(); |
| 336 | |
| 337 | int prefix = 1; |
| 338 | for (Long p : prefixDiffs) { |
| 339 | prefixLengths.add(prefix); |
| 340 | prefix += p; |
| 341 | } |
| 342 | return ImmutableList.copyOf(prefixLengths); |
| 343 | } |
| 344 | |
| 345 | private double roundDouble(double n) { |
| 346 | // 5 digits precision. |
| 347 | return (double) Math.round(n * 100000d) / 100000d; |
| 348 | } |
| 349 | |
Carmelo Cascone | efc0a92 | 2016-06-14 14:32:33 -0700 | [diff] [blame] | 350 | private static Bmv2Configuration loadConfiguration() { |
| 351 | try { |
| 352 | JsonObject json = Json.parse(new BufferedReader(new InputStreamReader( |
| 353 | WcmpFabricApp.class.getResourceAsStream(JSON_CONFIG_PATH)))).asObject(); |
| 354 | return Bmv2DefaultConfiguration.parse(json); |
| 355 | } catch (IOException e) { |
| 356 | throw new RuntimeException("Unable to load configuration", e); |
| 357 | } |
| 358 | } |
| 359 | } |