loci: lazily zero wire buffer

Previously we zeroed the entire wire buffer when it was allocated. This was
wasteful because often the message didn't grow to fill the entire allocated
size, and the memset was consuming 80% of CPU time during one of my benchmarks.

With this change, we only zero memory right before giving it to the user.
Additionally, of_octets_t fields like packet-in data aren't zeroed at all.
diff --git a/c_gen/c_match.py b/c_gen/c_match.py
index 0da5227..d7fa178 100644
--- a/c_gen/c_match.py
+++ b/c_gen/c_match.py
@@ -873,7 +873,8 @@
                 of_match_v%(version)s_delete(wire_match);
                 return rv;
             }
-            octets->bytes = OF_MATCH_BYTES(wire_match->length);
+            of_wire_buffer_grow(wire_match->wbuf, OF_MATCH_BYTES(wire_match->length));
+            octets->bytes = wire_match->wbuf->current_bytes;
             of_object_wire_buffer_steal((of_object_t *)wire_match,
                                         &octets->data);
             of_match_v%(version)s_delete(wire_match);
diff --git a/c_gen/templates/of_wire_buf.h b/c_gen/templates/of_wire_buf.h
index 0723454..ec65098 100644
--- a/c_gen/templates/of_wire_buf.h
+++ b/c_gen/templates/of_wire_buf.h
@@ -174,7 +174,6 @@
         FREE(wbuf);
         return NULL;
     }
-    MEMSET(wbuf->buf, 0, a_bytes);
     wbuf->current_bytes = 0;
     wbuf->alloc_bytes = a_bytes;
 
@@ -245,6 +244,7 @@
     LOCI_ASSERT(wbuf != NULL);
     LOCI_ASSERT(wbuf->alloc_bytes >= bytes);
     if (bytes > wbuf->current_bytes) {
+        MEMSET(wbuf->buf + wbuf->current_bytes, 0, bytes - wbuf->current_bytes);
         wbuf->current_bytes = bytes;
     }
 }