///
/// Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
/// *
/// Author: Alexander Gnauck AG-Software, mailto:gnauck@ag-software.de
/// *
/// This file is part of GNU Libidn.
/// *
/// This library is free software; you can redistribute it and/or
/// modify it under the terms of the GNU Lesser General Public License
/// as published by the Free Software Foundation; either version 2.1 of
/// the License, or (at your option) any later version.
/// *
/// This library is distributed in the hope that it will be useful, but
/// WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
/// Lesser General Public License for more details.
/// *
/// You should have received a copy of the GNU Lesser General Public
/// License along with this library; if not, write to the Free Software
/// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
/// USA
///
using System;
using System.Text;
namespace Gnu.Inet.Encoding
{
public class NFKC
{
///
/// Applies NFKC normalization to a string.
///
/// The string to normalize.
/// An NFKC normalized string.
public static string NormalizeNFKC(string sbIn)
{
StringBuilder sbOut = new StringBuilder();
for (int i = 0; i < sbIn.Length; i++)
{
char code = sbIn[i];
// In Unicode 3.0, Hangul was defined as the block from U+AC00
// to U+D7A3, however, since Unicode 3.2 the block extends until
// U+D7AF. The decomposeHangul function only decomposes until
// U+D7A3. Should this be changed?
if (code >= 0xAC00 && code <= 0xD7AF)
{
sbOut.Append(decomposeHangul(code));
}
else
{
int index = decomposeIndex(code);
if (index == - 1)
{
sbOut.Append(code);
}
else
{
sbOut.Append(DecompositionMappings.m[index]);
}
}
}
// Bring the stringbuffer into canonical order.
canonicalOrdering(sbOut);
// Do the canonical composition.
int last_cc = 0;
int last_start = 0;
for (int i = 0; i < sbOut.Length; i++)
{
int cc = combiningClass(sbOut[i]);
if (i > 0 && (last_cc == 0 || last_cc != cc))
{
// Try to combine characters
char a = sbOut[last_start];
char b = sbOut[i];
int c = compose(a, b);
if (c != - 1)
{
sbOut[last_start] = (char) c;
//sbOut.deleteCharAt(i);
sbOut.Remove(i, 1);
i--;
if (i == last_start)
{
last_cc = 0;
}
else
{
last_cc = combiningClass(sbOut[i - 1]);
}
continue;
}
}
if (cc == 0)
{
last_start = i;
}
last_cc = cc;
}
return sbOut.ToString();
}
///
/// Returns the index inside the decomposition table, implemented
/// using a binary search.
///
/// Character to look up.
/// Index if found, -1 otherwise.
internal static int decomposeIndex(char c)
{
int start = 0;
int end = DecompositionKeys.k.Length / 2;
while (true)
{
int half = (start + end) / 2;
int code = DecompositionKeys.k[half * 2];
if (c == code)
{
return DecompositionKeys.k[half * 2 + 1];
}
if (half == start)
{
// Character not found
return - 1;
}
else if (c > code)
{
start = half;
}
else
{
end = half;
}
}
}
///
/// Returns the combining class of a given character.
///
/// The character.
/// The combining class.
internal static int combiningClass(char c)
{
int h = c >> 8;
int l = c & 0xff;
int i = CombiningClass.i[h];
if (i > - 1)
{
return CombiningClass.c[i, l];
}
else
{
return 0;
}
}
///
/// Rearranges characters in a stringbuffer in order to respect the
/// canonical ordering properties.
///
/// StringBuffer to rearrange.
internal static void canonicalOrdering(StringBuilder sbIn)
{
bool isOrdered = false;
while (!isOrdered)
{
isOrdered = true;
// 24.10.2005
int lastCC = 0;
if (sbIn.Length > 0)
lastCC = combiningClass(sbIn[0]);
for (int i = 0; i < sbIn.Length - 1; i++)
{
int nextCC = combiningClass(sbIn[i + 1]);
if (nextCC != 0 && lastCC > nextCC)
{
for (int j = i + 1; j > 0; j--)
{
if (combiningClass(sbIn[j - 1]) <= nextCC)
{
break;
}
char t = sbIn[j];
sbIn[j] = sbIn[j - 1];
sbIn[j - 1] = t;
isOrdered = false;
}
nextCC = lastCC;
}
lastCC = nextCC;
}
}
}
///
/// Returns the index inside the composition table.
///
/// Character to look up.
/// Index if found, -1 otherwise.
internal static int composeIndex(char a)
{
if (a >> 8 >= Composition.composePage.Length)
{
return - 1;
}
int ap = Composition.composePage[a >> 8];
if (ap == - 1)
{
return - 1;
}
return Composition.composeData[ap, a & 0xff];
}
///
/// Tries to compose two characters canonically.
///
/// First character.
/// Second character.
/// The composed character or -1 if no composition could be found.
internal static int compose(char a, char b)
{
int h = composeHangul(a, b);
if (h != - 1)
{
return h;
}
int ai = composeIndex(a);
if (ai >= Composition.singleFirstStart && ai < Composition.singleSecondStart)
{
if (b == Composition.singleFirst[ai - Composition.singleFirstStart, 0])
{
return Composition.singleFirst[ai - Composition.singleFirstStart, 1];
}
else
{
return - 1;
}
}
int bi = composeIndex(b);
if (bi >= Composition.singleSecondStart)
{
if (a == Composition.singleSecond[bi - Composition.singleSecondStart,0])
{
return Composition.singleSecond[bi - Composition.singleSecondStart,1];
}
else
{
return - 1;
}
}
if (ai >= 0 && ai < Composition.multiSecondStart && bi >= Composition.multiSecondStart && bi < Composition.singleFirstStart)
{
char[] f = Composition.multiFirst[ai];
if (bi - Composition.multiSecondStart < f.Length)
{
char r = f[bi - Composition.multiSecondStart];
if (r == 0)
{
return - 1;
}
else
{
return r;
}
}
}
return - 1;
}
///
/// Entire hangul code copied from:
/// http://www.unicode.org/unicode/reports/tr15/
/// Several hangul specific constants
///
internal const int SBase = 0xAC00;
internal const int LBase = 0x1100;
internal const int VBase = 0x1161;
internal const int TBase = 0x11A7;
internal const int LCount = 19;
internal const int VCount = 21;
internal const int TCount = 28;
internal static readonly int NCount = VCount * TCount;
internal static readonly int SCount = LCount * NCount;
///
/// Decomposes a hangul character.
///
/// A character to decompose.
/// A string containing the hangul decomposition of the input
/// character. If no hangul decomposition can be found, a string
/// containing the character itself is returned.
internal static string decomposeHangul(char s)
{
int SIndex = s - SBase;
if (SIndex < 0 || SIndex >= SCount)
{
return s.ToString();
}
StringBuilder result = new StringBuilder();
int L = LBase + SIndex / NCount;
int V = VBase + (SIndex % NCount) / TCount;
int T = TBase + SIndex % TCount;
result.Append((char) L);
result.Append((char) V);
if (T != TBase)
result.Append((char) T);
return result.ToString();
}
///
/// Composes two hangul characters.
///
/// First character.
/// Second character.
/// Returns the composed character or -1 if the two characters cannot be composed.
internal static int composeHangul(char a, char b)
{
// 1. check to see if two current characters are L and V
int LIndex = a - LBase;
if (0 <= LIndex && LIndex < LCount)
{
int VIndex = b - VBase;
if (0 <= VIndex && VIndex < VCount)
{
// make syllable of form LV
return SBase + (LIndex * VCount + VIndex) * TCount;
}
}
// 2. check to see if two current characters are LV and T
int SIndex = a - SBase;
if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0)
{
int TIndex = b - TBase;
if (0 <= TIndex && TIndex <= TCount)
{
// make syllable of form LVT
return a + TIndex;
}
}
return - 1;
}
}
}