///
/// Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
/// *
/// Author: Alexander Gnauck AG-Software, mailto:gnauck@ag-software.de
/// *
/// This file is part of GNU Libidn.
/// *
/// This library is free software; you can redistribute it and/or
/// modify it under the terms of the GNU Lesser General Public License
/// as published by the Free Software Foundation; either version 2.1 of
/// the License, or (at your option) any later version.
/// *
/// This library is distributed in the hope that it will be useful, but
/// WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
/// Lesser General Public License for more details.
/// *
/// You should have received a copy of the GNU Lesser General Public
/// License along with this library; if not, write to the Free Software
/// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
/// USA
///
using System;
using System.Text;
namespace Gnu.Inet.Encoding
{
public class IDNA
{
public const string ACE_PREFIX = "xn--";
///
/// Converts a Unicode string to ASCII using the procedure in RFC3490
/// section 4.1. Unassigned characters are not allowed and STD3 ASCII
/// rules are enforced. The input string may be a domain name
/// containing dots.
///
/// Unicode string.
/// Encoded string.
public static string ToASCII(string input)
{
StringBuilder o = new StringBuilder();
StringBuilder h = new StringBuilder();
for (int i = 0; i < input.Length; i++)
{
char c = input[i];
if (c == '.' || c == '\u3002' || c == '\uff0e' || c == '\uff61')
{
o.Append(ToASCII(h.ToString(), false, true));
o.Append('.');
h = new StringBuilder();
}
else
{
h.Append(c);
}
}
o.Append(ToASCII(h.ToString(), false, true));
return o.ToString();
}
///
/// Converts a Unicode string to ASCII using the procedure in RFC3490
/// section 4.1. Unassigned characters are not allowed and STD3 ASCII
/// rules are enforced.
///
/// Unicode string.
/// Unassigned characters, allowed or not?
/// STD3 ASCII rules, enforced or not?
/// Encoded string.
public static string ToASCII(string input, bool allowUnassigned, bool useSTD3ASCIIRules)
{
// Step 1: Check if the string contains code points outside
// the ASCII range 0..0x7c.
bool nonASCII = false;
for (int i = 0; i < input.Length; i++)
{
int c = input[i];
if (c > 0x7f)
{
nonASCII = true;
break;
}
}
// Step 2: Perform the nameprep operation.
if (nonASCII)
{
try
{
input = Stringprep.NamePrep(input, allowUnassigned);
}
catch (StringprepException e)
{
// TODO
throw new IDNAException(e);
}
}
// Step 3: - Verify the absence of non-LDH ASCII code points
// (char) 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60,
// (char) 0x7b..0x7f
// - Verify the absence of leading and trailing
// hyphen-minus
if (useSTD3ASCIIRules)
{
for (int i = 0; i < input.Length; i++)
{
int c = input[i];
if ((c <= 0x2c) || (c >= 0x2e && c <= 0x2f) || (c >= 0x3a && c <= 0x40) || (c >= 0x5b && c <= 0x60) || (c >= 0x7b && c <= 0x7f))
{
throw new IDNAException(IDNAException.CONTAINS_NON_LDH);
}
}
if (input.StartsWith("-") || input.EndsWith("-"))
{
throw new IDNAException(IDNAException.CONTAINS_HYPHEN);
}
}
// Step 4: If all code points are inside 0..0x7f, skip to step 8
nonASCII = false;
for (int i = 0; i < input.Length; i++)
{
int c = input[i];
if (c > 0x7f)
{
nonASCII = true;
break;
}
}
string output = input;
if (nonASCII)
{
// Step 5: Verify that the sequence does not begin with the ACE prefix.
if (input.StartsWith(ACE_PREFIX))
{
throw new IDNAException(IDNAException.CONTAINS_ACE_PREFIX);
}
// Step 6: Punycode
try
{
output = Punycode.Encode(input);
}
catch (PunycodeException e)
{
// TODO
throw new IDNAException(e);
}
// Step 7: Prepend the ACE prefix.
output = ACE_PREFIX + output;
}
// Step 8: Check that the length is inside 1..63.
if (output.Length < 1 || output.Length > 63)
{
throw new IDNAException(IDNAException.TOO_LONG);
}
return output;
}
///
/// Converts an ASCII-encoded string to Unicode. Unassigned
/// characters are not allowed and STD3 hostnames are enforced. Input
/// may be domain name containing dots.
///
/// ASCII input string.
/// Unicode string.
public static string ToUnicode(string input)
{
input = input.ToLower();
StringBuilder o = new StringBuilder();
StringBuilder h = new StringBuilder();
for (int i = 0; i < input.Length; i++)
{
char c = input[i];
if (c == '.' || c == '\u3002' || c == '\uff0e' || c == '\uff61')
{
o.Append(ToUnicode(h.ToString(), false, true));
o.Append(c);
h = new StringBuilder();
}
else
{
h.Append(c);
}
}
o.Append(ToUnicode(h.ToString(), false, true));
return o.ToString();
}
///
/// Converts an ASCII-encoded string to Unicode.
///
/// ASCII input string.
/// Allow unassigned Unicode characters.
/// Check that the output conforms to STD3.
/// Unicode string.
public static string ToUnicode(string input, bool allowUnassigned, bool useSTD3ASCIIRules)
{
string original = input;
bool nonASCII = false;
// Step 1: If all code points are inside 0..0x7f, skip to step 3.
for (int i = 0; i < input.Length; i++)
{
int c = input[i];
if (c > 0x7f)
{
nonASCII = true;
break;
}
}
// Step 2: Perform the Nameprep operation.
if (nonASCII)
{
try
{
input = Stringprep.NamePrep(input, allowUnassigned);
}
catch (StringprepException e)
{
// ToUnicode never fails!
return original;
}
}
// Step 3: Verify the sequence starts with the ACE prefix.
if (!input.StartsWith(ACE_PREFIX))
{
// ToUnicode never fails!
return original;
}
string stored = input;
// Step 4: Remove the ACE prefix.
input = input.Substring(ACE_PREFIX.Length);
// Step 5: Decode using punycode
string output;
try
{
output = Punycode.Decode(input);
}
catch (PunycodeException e)
{
// ToUnicode never fails!
return original;
}
// Step 6: Apply toASCII
string ascii;
try
{
ascii = ToASCII(output, allowUnassigned, useSTD3ASCIIRules);
}
catch (IDNAException e)
{
// ToUnicode never fails!
return original;
}
// Step 7: Compare case-insensitively.
if (!ascii.ToUpper().Equals(stored.ToUpper()))
{
// ToUnicode never fails!
return original;
}
// Step 8: Return the result.
return output;
}
}
}