loci: only mask partially wildcarded fields

The of_match_values_mask function was consuming about 7% CPU time in my
benchmarks. For realistic flows most of the work it did was pointless, because
most fields are fully wildcarded and most of the rest are not wildcarded. Only
in the uncommon case of partially wildcarded fields does this function do
anything.

Fixed by calling a new of_memmask function only when parsing a masked OXM.
diff --git a/c_gen/c_match.py b/c_gen/c_match.py
index 0da5227..a3cc42a 100644
--- a/c_gen/c_match.py
+++ b/c_gen/c_match.py
@@ -189,6 +189,18 @@
     }
 }
 
+static inline void
+of_memmask(void *_fields, void *_masks, size_t len)
+{
+    int idx;
+    uint8_t *fields = _fields;
+    uint8_t *masks = _masks;
+
+    for (idx = 0; idx < len; idx++) {
+        fields[idx] &= masks[idx];
+    }
+}
+
 /**
  * IP Mask map.  IP maks wildcards from OF 1.0 are interpretted as
  * indices into the map below.
@@ -811,6 +823,7 @@
             of_oxm_%(key)s_masked_value_get(
                 &oxm_entry.%(key)s,
                 &dst->fields.%(key)s);
+            of_memmask(&dst->fields.%(key)s, &dst->masks.%(key)s, sizeof(&dst->fields.%(key)s));
             break;
         case OF_OXM_%(ku)s:
             OF_MATCH_MASK_%(ku)s_EXACT_SET(dst);
@@ -828,9 +841,6 @@
         rv = of_list_oxm_next(&oxm_list, &oxm_entry);
     } /* end OXM iteration */
 
-    /* Clear values outside of masks */
-    of_match_values_mask(dst);
-
     return OF_ERROR_NONE;
 }
 """)