Wilsonhut

Deal with it or don't

Chunk

I wrote before about partitioning a string, which was a way to bust a string into Chunks. It used the string like an IEnumerable<char>. I thought it would be nice to be able to Chunk any enumerable (for batching, or whatever).

The Partition was written without regard to performance, but this time, it’s all about performance. Just try to make it faster.

Here’s the extension method:

    1 public static class Extensions

    2 {

    3   public static IEnumerable<IEnumerable<T>> ToChunks<T>(this IEnumerable<T> list, int chunkSize)

    4   {

    5     var enumerator = list.GetEnumerator();

    6

    7     for (;;)

    8     {

    9       var chunk = enumerator.GetNext(chunkSize);

   10       if (chunk.Length == 0)

   11       {

   12         break;

   13       }

   14       yield return chunk;

   15     }

   16   }

   17

   18   private static T[] GetNext<T>(this IEnumerator<T> enumerator, int count)

   19   {

   20     var ts = new T[count];

   21     int i;

   22     for (i = 0; i < count; i++)

   23     {

   24       if (!enumerator.MoveNext()) break;

   25       ts[i] = enumerator.Current;

   26     }

   27     if (i < count)

   28     {

   29       Array.Resize(ref ts, i);

   30     }

   31     return ts;

   32   }

   33 }

When I needed this recently, I also needed to know in each chunk where I was in the original IEnumerable, so instead of returning an IEnumerable of IEnumerables, I returned an IEnumerable of a new Chunk type that inherits from IEnumerable. I just had to make a small change on lines 3, 7, and 14:

    1 public static class Extensions

    2 {

    3   public static IEnumerable<Chunk<T>> ToChunks<T>(this IEnumerable<T> list, int chunkSize)

    4   {

    5     var enumerator = list.GetEnumerator();

    6

    7     for (var i = 0;; i++)

    8     {

    9       var chunk = enumerator.GetNext(chunkSize);

   10       if (chunk.Length == 0)

   11       {

   12         break;

   13       }

   14       yield return new Chunk<T>(chunk, i*chunkSize, chunk.Length);

   15     }

   16   }

   17

   18   private static T[] GetNext<T>(this IEnumerator<T> enumerator, int count)

   19   {

   20     var ts = new T[count];

   21     int i;

   22     for (i = 0; i < count; i++)

   23     {

   24       if (!enumerator.MoveNext()) break;

   25       ts[i] = enumerator.Current;

   26     }

   27     if (i < count)

   28     {

   29       Array.Resize(ref ts, i);

   30     }

   31     return ts;

   32   }

   33 }

   34

…and here’s Chunk

    1 public class Chunk<T> : IEnumerable<T>

    2 {

    3   private readonly IEnumerable<T> _chunk;

    4

    5   public Chunk(IEnumerable<T> chunk, int first, int length)

    6   {

    7     _chunk = chunk;

    8     FirstIndex = first;

    9     Length = length;

   10   }

   11

   12   public int FirstIndex { get; private set; }

   13   public int Length { get; private set; }

   14   public int LastIndex { get { return FirstIndex + Length 1; } }

   15

   16   public IEnumerator<T> GetEnumerator()

   17   {

   18     return _chunk.GetEnumerator();

   19   }

   20

   21   IEnumerator IEnumerable.GetEnumerator()

   22   {

   23     return GetEnumerator();

   24   }

   25 }

Advertisements

One response to “Chunk

  1. Pingback: Chunk is now public-er « Wilsonhut

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: