Wilsonhut

Deal with it or don't

Monthly Archives: September 2011

Chunk

I wrote before about partitioning a string, which was a way to bust a string into Chunks. It used the string like an IEnumerable<char>. I thought it would be nice to be able to Chunk any enumerable (for batching, or whatever).

The Partition was written without regard to performance, but this time, it’s all about performance. Just try to make it faster.

Here’s the extension method:

    1 public static class Extensions

    2 {

    3   public static IEnumerable<IEnumerable<T>> ToChunks<T>(this IEnumerable<T> list, int chunkSize)

    4   {

    5     var enumerator = list.GetEnumerator();

    6

    7     for (;;)

    8     {

    9       var chunk = enumerator.GetNext(chunkSize);

   10       if (chunk.Length == 0)

   11       {

   12         break;

   13       }

   14       yield return chunk;

   15     }

   16   }

   17

   18   private static T[] GetNext<T>(this IEnumerator<T> enumerator, int count)

   19   {

   20     var ts = new T[count];

   21     int i;

   22     for (i = 0; i < count; i++)

   23     {

   24       if (!enumerator.MoveNext()) break;

   25       ts[i] = enumerator.Current;

   26     }

   27     if (i < count)

   28     {

   29       Array.Resize(ref ts, i);

   30     }

   31     return ts;

   32   }

   33 }

When I needed this recently, I also needed to know in each chunk where I was in the original IEnumerable, so instead of returning an IEnumerable of IEnumerables, I returned an IEnumerable of a new Chunk type that inherits from IEnumerable. I just had to make a small change on lines 3, 7, and 14:

    1 public static class Extensions

    2 {

    3   public static IEnumerable<Chunk<T>> ToChunks<T>(this IEnumerable<T> list, int chunkSize)

    4   {

    5     var enumerator = list.GetEnumerator();

    6

    7     for (var i = 0;; i++)

    8     {

    9       var chunk = enumerator.GetNext(chunkSize);

   10       if (chunk.Length == 0)

   11       {

   12         break;

   13       }

   14       yield return new Chunk<T>(chunk, i*chunkSize, chunk.Length);

   15     }

   16   }

   17

   18   private static T[] GetNext<T>(this IEnumerator<T> enumerator, int count)

   19   {

   20     var ts = new T[count];

   21     int i;

   22     for (i = 0; i < count; i++)

   23     {

   24       if (!enumerator.MoveNext()) break;

   25       ts[i] = enumerator.Current;

   26     }

   27     if (i < count)

   28     {

   29       Array.Resize(ref ts, i);

   30     }

   31     return ts;

   32   }

   33 }

   34

…and here’s Chunk

    1 public class Chunk<T> : IEnumerable<T>

    2 {

    3   private readonly IEnumerable<T> _chunk;

    4

    5   public Chunk(IEnumerable<T> chunk, int first, int length)

    6   {

    7     _chunk = chunk;

    8     FirstIndex = first;

    9     Length = length;

   10   }

   11

   12   public int FirstIndex { get; private set; }

   13   public int Length { get; private set; }

   14   public int LastIndex { get { return FirstIndex + Length 1; } }

   15

   16   public IEnumerator<T> GetEnumerator()

   17   {

   18     return _chunk.GetEnumerator();

   19   }

   20

   21   IEnumerator IEnumerable.GetEnumerator()

   22   {

   23     return GetEnumerator();

   24   }

   25 }