Following on from my last post on stripping HTML from text using C#, once I had removed all signs of HTML from the incoming text, I was also required to show a short preview of the text. I originally went with a truncation method, as follows:
namespace ExtensionMethods
{
public static class StringExtensionMethods
{
public static string Truncate(this string text, int maximumLength)
{
if (string.IsNullOrEmpty(text))
{
return text;
}
return text.Length <= maximumLength ? text : text.Substring(0, maximumLength);
}
}
}
This works, but the results look a little odd if the truncate happens half-way through a word.
Instead, I came up this method to truncate at the first word break within the allowed number of characters:
using System.Linq;
namespace ExtensionMethods
{
public static class StringExtensionMethods
{
private static readonly char[] Punctuation = {'.', ',', ';', ':'};
public static string TruncateAtWordBoundary(this string text, int maximumLength)
{
if (string.IsNullOrEmpty(text))
{
return text;
}
if (text.Length <= maximumLength)
{
return text;
}
// If the character after the cut off is white space or punctuation
// then return what we've got using substring:
var isCutOffWhiteSpaceOrPunctuation = char.IsWhiteSpace(text[maximumLength]) || Punctuation.Contains(text[maximumLength]);
text = text.Substring(0, maximumLength);
if (isCutOffWhiteSpaceOrPunctuation)
{
return text;
}
// Find the last white-space or punctuation and chop off there:
var lastWhiteSpaceOrPunctuationPosition = 0;
for (var i = text.Length - 1; i >= 0; i--)
{
if (char.IsWhiteSpace(text[i]) || Punctuation.Contains(text[i]))
{
lastWhiteSpaceOrPunctuationPosition = i;
break;
}
}
text = text.Substring(0, lastWhiteSpaceOrPunctuationPosition).Trim();
return text;
}
}
}
While not perfect, this approach works a lot better. Please feel free to suggest improvements.