loci: chunk multiple classes into each C file

Parsing headers was taking more than half of the compile time for many classes.
By putting multiple classes into a single C file we amortize the cost of header
parsing. These C files are still much smaller than the old loci.c.

I also experimented with chunking the list and header class files, but there
are relatively few of these and so there was little gain.

The code size reduction is presumably due to better sharing of code between
similar classes. Since the classes are sorted by name before chunking the
action, instruction, etc inheritance hierarchies will be grouped together.

Results:
time make -j4 loci.a: -44% (27.4s to 15.4s)
strip loci.a && du -sh loci.a: -25% (2.3 MB to 1.7 MB)
diff --git a/c_gen/codegen.py b/c_gen/codegen.py
index 3249747..121bc42 100644
--- a/c_gen/codegen.py
+++ b/c_gen/codegen.py
@@ -35,6 +35,7 @@
 from itertools import groupby
 from StringIO import StringIO
 import template_utils
+from generic_utils import chunks
 import loxi_globals
 import loxi_ir.ir as ir
 import util
@@ -43,6 +44,8 @@
 import c_gen.type_maps as type_maps
 import c_gen.c_type_maps as c_type_maps
 
+CLASS_CHUNK_SIZE = 32
+
 PushWireTypesData = namedtuple('PushWireTypesData',
     ['class_name', 'versioned_type_members'])
 PushWireTypesMember = namedtuple('PushWireTypesMember',
@@ -75,14 +78,18 @@
         class_name=uclass.name,
         versioned_type_members=versioned_type_members)
 
+# Output multiple LOCI classes into each C file. This reduces the overhead of
+# parsing header files, which takes longer than compiling the actual code
+# for many classes. It also reduces the compiled code size.
 def generate_classes(install_dir):
-    for uclass in loxi_globals.unified.classes:
-        with template_utils.open_output(install_dir, "loci/src/%s.c" % uclass.name) as out:
-            util.render_template(out, "class.c",
-                push_wire_types_data=push_wire_types_data(uclass))
-            # Append legacy generated code
-            c_code_gen.gen_new_function_definitions(out, uclass.name)
-            c_code_gen.gen_accessor_definitions(out, uclass.name)
+    for i, chunk in enumerate(chunks(loxi_globals.unified.classes, CLASS_CHUNK_SIZE)):
+        with template_utils.open_output(install_dir, "loci/src/class%02d.c" % i) as out:
+            for uclass in chunk:
+                util.render_template(out, "class.c",
+                    push_wire_types_data=push_wire_types_data(uclass))
+                # Append legacy generated code
+                c_code_gen.gen_new_function_definitions(out, uclass.name)
+                c_code_gen.gen_accessor_definitions(out, uclass.name)
 
 # TODO remove header classes and use the corresponding class instead
 def generate_header_classes(install_dir):
diff --git a/generic_utils.py b/generic_utils.py
index 1cfba86..5683aa1 100644
--- a/generic_utils.py
+++ b/generic_utils.py
@@ -214,3 +214,11 @@
         if func(i):
             c +=1
     return c
+
+def chunks(l, n):
+    """
+    Yield successive n-sized chunks from l.
+    From http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks-in-python
+    """
+    for i in xrange(0, len(l), n):
+        yield l[i:i+n]