Basic compression in .NET

Following on from my previous post on encryption, a similar technique often used in conjunction with encryption is compression.

As an aside, if you are going to use encryption and compression together it’s important that you compress and then encrypt rather than the other way round. This is due to the way compression engines work. They reduce data size by looking for repeating patterns in data and then storing the pattern once along with the locations in the data in which it appears. It is often the case that unencrypted data is far more repetitive than the seemingly random output you get when using encryption. This is especially true for human readable text. Consider how many times words are repeated in a piece of prose versus how many repeating patterns there are likely to be in encrypted version of the prose.

As with encryption, we’ll start with a basic interface so that compression can be injected in other objects using dependency injection:

namespace Compression
{
    public interface ICompressor
    {
        string Compress(string text);
        string Decompress(string compressedText);
    }
}

The implementation for this is as follows:

using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Text;

namespace Compression
{
    public class Compressor : ICompressor
    {
        public string Compress(string value)
        {
            if (string.IsNullOrEmpty(value))
            {
                return value;
            }

            var inputArray = StringToByteArray(value);

            using (var outputStream = new MemoryStream())
            {
                using (var compressionStream = new GZipStream(outputStream, CompressionMode.Compress))
                {
                    // Compress:
                    compressionStream.Write(inputArray, 0, inputArray.Length);
                    // Close, but DO NOT FLUSH as this could result in data loss:
                    compressionStream.Close();

                    // Get a byte array from the output stream:
                    var outputArray = outputStream.ToArray();
                    outputStream.Close();

                    return ByteArrayToString(outputArray);
                }
            }
        }

        public string Decompress(string value)
        {
            if (string.IsNullOrEmpty(value))
            {
                return value;
            }

            var inputArray = StringToByteArray(value);

            using (var inputStream = new MemoryStream(inputArray))
            {
                using (var compressionStream = new GZipStream(inputStream, CompressionMode.Decompress))
                {
                    var outputList = new List<byte>();
                    int nextByte;
                    while ((nextByte = compressionStream.ReadByte()) != -1)
                    {
                        outputList.Add((byte)nextByte);
                    }

                    inputStream.Close();
                    compressionStream.Close();

                    return ByteArrayToString(outputList.ToArray());
                }
            }
        }

        private static byte[] StringToByteArray(string value)
        {
            var array = new byte[value.Length];
            for (var i = 0; i < array.Length; i++)
            {
                array[i] = (byte)value[i];
            }

            return array;
        }

        private static string ByteArrayToString(byte[] array)
        {
            var stringBuilder = new StringBuilder(array.Length);
            foreach (var b in array)
            {
                stringBuilder.Append((char)b);
            }

            return stringBuilder.ToString();
        }
    }
}

This can be demonstrated via the following simple test harness:

using System;
using Compression;

namespace TestHarness
{
    public static class Program
    {
        public static void Main()
        {
            const string text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
            var compressor = new Compressor();
            var compressedText = compressor.Compress(text);
            var decompressedText = compressor.Decompress(compressedText);

            ShowText("Text", text);
            ShowText("CompressedText", compressedText);
            ShowText("DecompressedText", decompressedText);
            
            Console.ReadLine();
        }

        private static void ShowText(string label, string text)
        {
            Console.WriteLine(label + ":");
            Console.WriteLine("Length: " + text.Length);
            Console.WriteLine(text);
            Console.WriteLine();
        }
    }
}

I’m not feeling particularly creative this morning so I opted for “Lorem ipsum” rather than anything witty as my input text.

The output of the program is as follows:

compression

Note that the compressed text is 282 characters in length while the original text is 445 characters.

Extending the Compressor class to be able to compress arrays and other data would be pretty simple too, given that the input strings are converted to arrays before the compression is performed.