summaryrefslogtreecommitdiff
path: root/src/mscorlib/src/System/Globalization/IdnMapping.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/mscorlib/src/System/Globalization/IdnMapping.cs')
-rw-r--r--src/mscorlib/src/System/Globalization/IdnMapping.cs1189
1 files changed, 1189 insertions, 0 deletions
diff --git a/src/mscorlib/src/System/Globalization/IdnMapping.cs b/src/mscorlib/src/System/Globalization/IdnMapping.cs
new file mode 100644
index 0000000000..599a32ad87
--- /dev/null
+++ b/src/mscorlib/src/System/Globalization/IdnMapping.cs
@@ -0,0 +1,1189 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This file contains the IDN functions and implementation.
+//
+// This allows encoding of non-ASCII domain names in a "punycode" form,
+// for example:
+//
+// \u5B89\u5BA4\u5948\u7F8E\u6075-with-SUPER-MONKEYS
+//
+// is encoded as:
+//
+// xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n
+//
+// Additional options are provided to allow unassigned IDN characters and
+// to validate according to the Std3ASCII Rules (like DNS names).
+//
+// There are also rules regarding bidirectionality of text and the length
+// of segments.
+//
+// For additional rules see also:
+// RFC 3490 - Internationalizing Domain Names in Applications (IDNA)
+// RFC 3491 - Nameprep: A Stringprep Profile for Internationalized Domain Names (IDN)
+// RFC 3492 - Punycode: A Bootstring encoding of Unicode for Internationalized Domain Names in Applications (IDNA)
+//
+
+/*
+
+The punycode implementation is based on the sample code in RFC 3492
+
+Copyright (C) The Internet Society (2003). All Rights Reserved.
+
+This document and translations of it may be copied and furnished to
+others, and derivative works that comment on or otherwise explain it
+or assist in its implementation may be prepared, copied, published
+and distributed, in whole or in part, without restriction of any
+kind, provided that the above copyright notice and this paragraph are
+included on all such copies and derivative works. However, this
+document itself may not be modified in any way, such as by removing
+the copyright notice or references to the Internet Society or other
+Internet organizations, except as needed for the purpose of
+developing Internet standards in which case the procedures for
+copyrights defined in the Internet Standards process must be
+followed, or as required to translate it into languages other than
+English.
+
+The limited permissions granted above are perpetual and will not be
+revoked by the Internet Society or its successors or assigns.
+
+This document and the information contained herein is provided on an
+"AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+namespace System.Globalization
+{
+ using System;
+ using System.Security;
+ using System.Globalization;
+ using System.Text;
+ using System.Runtime.Versioning;
+ using System.Runtime.InteropServices;
+ using System.Diagnostics.Contracts;
+
+ // IdnMapping class used to map names to Punycode
+
+ public sealed class IdnMapping
+ {
+ // Legal name lengths for domain names
+ const int M_labelLimit = 63; // Not including dots
+ const int M_defaultNameLimit = 255; // Including dots
+
+ // IDNA prefix
+ const String M_strAcePrefix = "xn--";
+
+ // Legal "dot" seperators (i.e: . in www.microsoft.com)
+ static char[] M_Dots =
+ {
+ '.', '\u3002', '\uFF0E', '\uFF61'
+ };
+
+ bool m_bAllowUnassigned;
+ bool m_bUseStd3AsciiRules;
+
+ public IdnMapping()
+ {
+ }
+
+ public bool AllowUnassigned
+ {
+ get
+ {
+ return this.m_bAllowUnassigned;
+ }
+
+ set
+ {
+ this.m_bAllowUnassigned = value;
+ }
+ }
+
+ public bool UseStd3AsciiRules
+ {
+ get
+ {
+ return this.m_bUseStd3AsciiRules;
+ }
+
+ set
+ {
+ this.m_bUseStd3AsciiRules = value;
+ }
+ }
+
+ // Gets ASCII (Punycode) version of the string
+ public String GetAscii(String unicode)
+ {
+ return GetAscii(unicode, 0);
+ }
+
+ public String GetAscii(String unicode, int index)
+ {
+ if (unicode==null) throw new ArgumentNullException("unicode");
+ Contract.EndContractBlock();
+ return GetAscii(unicode, index, unicode.Length - index);
+ }
+
+ public String GetAscii(String unicode, int index, int count)
+ {
+ throw null;
+ /*if (unicode==null) throw new ArgumentNullException("unicode");
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0) ? "index" : "count",
+ Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
+ if (index > unicode.Length)
+ throw new ArgumentOutOfRangeException("byteIndex",
+ Environment.GetResourceString("ArgumentOutOfRange_Index"));
+ if (index > unicode.Length - count)
+ throw new ArgumentOutOfRangeException("unicode",
+ Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
+ Contract.EndContractBlock();
+
+ // We're only using part of the string
+ unicode = unicode.Substring(index, count);
+
+ if (Environment.IsWindows8OrAbove)
+ {
+ return GetAsciiUsingOS(unicode);
+ }
+
+ // Check for ASCII only string, which will be unchanged
+ if (ValidateStd3AndAscii(unicode, UseStd3AsciiRules, true))
+ {
+ return unicode;
+ }
+
+ // Cannot be null terminated (normalization won't help us with this one, and
+ // may have returned false before checking the whole string above)
+ Contract.Assert(unicode.Length >= 1, "[IdnMapping.GetAscii]Expected 0 length strings to fail before now.");
+ if (unicode[unicode.Length - 1] <= 0x1f)
+ {
+ throw new ArgumentException(
+ Environment.GetResourceString("Argument_InvalidCharSequence", unicode.Length-1 ),
+ "unicode");
+ }
+
+ // Have to correctly IDNA normalize the string and Unassigned flags
+ bool bHasLastDot = (unicode.Length > 0) && IsDot(unicode[unicode.Length - 1]);
+ unicode = unicode.Normalize((NormalizationForm)(m_bAllowUnassigned ?
+ ExtendedNormalizationForms.FormIdna : ExtendedNormalizationForms.FormIdnaDisallowUnassigned));
+
+ // Make sure we didn't normalize away something after a last dot
+ if ((!bHasLastDot) && unicode.Length > 0 && IsDot(unicode[unicode.Length - 1]))
+ {
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "unicode");
+ }
+
+ // May need to check Std3 rules again for non-ascii
+ if (UseStd3AsciiRules)
+ {
+ ValidateStd3AndAscii(unicode, true, false);
+ }
+
+ // Go ahead and encode it
+ return punycode_encode(unicode);*/
+ }
+
+
+ [System.Security.SecuritySafeCritical]
+ private String GetAsciiUsingOS(String unicode)
+ {
+ if (unicode.Length == 0)
+ {
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "unicode");
+ }
+
+ if (unicode[unicode.Length - 1] == 0)
+ {
+ throw new ArgumentException(
+ Environment.GetResourceString("Argument_InvalidCharSequence", unicode.Length - 1),
+ "unicode");
+ }
+
+ uint flags = (uint) ((AllowUnassigned ? IDN_ALLOW_UNASSIGNED : 0) | (UseStd3AsciiRules ? IDN_USE_STD3_ASCII_RULES : 0));
+ int length = IdnToAscii(flags, unicode, unicode.Length, null, 0);
+
+ int lastError;
+
+ if (length == 0)
+ {
+ lastError = Marshal.GetLastWin32Error();
+ if (lastError == ERROR_INVALID_NAME)
+ {
+ throw new ArgumentException(Environment.GetResourceString("Argument_IdnIllegalName"), "unicode");
+ }
+
+ throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex"), "unicode");
+ }
+
+ char [] output = new char[length];
+
+ length = IdnToAscii(flags, unicode, unicode.Length, output, length);
+ if (length == 0)
+ {
+ lastError = Marshal.GetLastWin32Error();
+ if (lastError == ERROR_INVALID_NAME)
+ {
+ throw new ArgumentException(Environment.GetResourceString("Argument_IdnIllegalName"), "unicode");
+ }
+
+ throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex"), "unicode");
+ }
+
+ return new String(output, 0, length);
+ }
+
+ // Gets Unicode version of the string. Normalized and limited to IDNA characters.
+ public String GetUnicode(String ascii)
+ {
+ return GetUnicode(ascii, 0);
+ }
+
+ public String GetUnicode(String ascii, int index)
+ {
+ if (ascii==null) throw new ArgumentNullException("ascii");
+ Contract.EndContractBlock();
+ return GetUnicode(ascii, index, ascii.Length - index);
+ }
+
+ public String GetUnicode(String ascii, int index, int count)
+ {
+ if (ascii==null) throw new ArgumentNullException("ascii");
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0) ? "index" : "count",
+ Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
+ if (index > ascii.Length)
+ throw new ArgumentOutOfRangeException("byteIndex",
+ Environment.GetResourceString("ArgumentOutOfRange_Index"));
+ if (index > ascii.Length - count)
+ throw new ArgumentOutOfRangeException("ascii",
+ Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
+
+ // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ.
+ // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null.
+ // The Win32 APIs fail on an embedded null, but not on a terminating null.
+ if (count > 0 && ascii[index + count - 1] == (char)0)
+ throw new ArgumentException("ascii",
+ Environment.GetResourceString("Argument_IdnBadPunycode"));
+ Contract.EndContractBlock();
+
+ // We're only using part of the string
+ ascii = ascii.Substring(index, count);
+
+ if (Environment.IsWindows8OrAbove)
+ {
+ return GetUnicodeUsingOS(ascii);
+ }
+
+ // Convert Punycode to Unicode
+ String strUnicode = punycode_decode(ascii);
+
+ // Output name MUST obey IDNA rules & round trip (casing differences are allowed)
+ if (!ascii.Equals(GetAscii(strUnicode), StringComparison.OrdinalIgnoreCase))
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnIllegalName"), "ascii");
+
+ return strUnicode;
+ }
+
+
+ [System.Security.SecuritySafeCritical]
+ private string GetUnicodeUsingOS(string ascii)
+ {
+ uint flags = (uint)((AllowUnassigned ? IDN_ALLOW_UNASSIGNED : 0) | (UseStd3AsciiRules ? IDN_USE_STD3_ASCII_RULES : 0));
+ int length = IdnToUnicode(flags, ascii, ascii.Length, null, 0);
+ int lastError;
+
+ if (length == 0)
+ {
+ lastError = Marshal.GetLastWin32Error();
+ if (lastError == ERROR_INVALID_NAME)
+ {
+ throw new ArgumentException(Environment.GetResourceString("Argument_IdnIllegalName"), "ascii");
+ }
+
+ throw new ArgumentException(Environment.GetResourceString("Argument_IdnBadPunycode"), "ascii");
+ }
+
+ char [] output = new char[length];
+
+ length = IdnToUnicode(flags, ascii, ascii.Length, output, length);
+ if (length == 0)
+ {
+ lastError = Marshal.GetLastWin32Error();
+ if (lastError == ERROR_INVALID_NAME)
+ {
+ throw new ArgumentException(Environment.GetResourceString("Argument_IdnIllegalName"), "ascii");
+ }
+
+ throw new ArgumentException(Environment.GetResourceString("Argument_IdnBadPunycode"), "ascii");
+ }
+
+ return new String(output, 0, length);
+ }
+
+ public override bool Equals(Object obj)
+ {
+ IdnMapping that = obj as IdnMapping;
+
+ if (that != null)
+ {
+ return this.m_bAllowUnassigned == that.m_bAllowUnassigned &&
+ this.m_bUseStd3AsciiRules == that.m_bUseStd3AsciiRules;
+ }
+
+ return (false);
+ }
+
+ public override int GetHashCode()
+ {
+ return (this.m_bAllowUnassigned ? 100 : 200) + (this.m_bUseStd3AsciiRules ? 1000 : 2000);
+ }
+
+ // Helpers
+ static bool IsSupplementary(int cTest)
+ {
+ return cTest >= 0x10000;
+ }
+
+ // Is it a dot?
+ // are we U+002E (., full stop), U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), or
+ // U+FF61 (halfwidth ideographic full stop).
+ // Note: IDNA Normalization gets rid of dots now, but testing for last dot is before normalization
+ static bool IsDot(char c)
+ {
+ return c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61';
+ }
+
+
+ // See if we're only ASCII
+ static bool ValidateStd3AndAscii(string unicode, bool bUseStd3, bool bCheckAscii)
+ {
+ // If its empty, then its too small
+ if (unicode.Length == 0)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "unicode");
+ Contract.EndContractBlock();
+
+ int iLastDot = -1;
+
+ // Loop the whole string
+ for (int i = 0; i < unicode.Length; i++)
+ {
+ // Aren't allowing control chars (or 7f, but idn tables catch that, they don't catch \0 at end though)
+ if (unicode[i] <= 0x1f)
+ {
+ throw new ArgumentException(
+ Environment.GetResourceString("Argument_InvalidCharSequence", i ),
+ "unicode");
+ }
+
+ // If its Unicode or a control character, return false (non-ascii)
+ if (bCheckAscii && unicode[i] >= 0x7f)
+ return false;
+
+ // Check for dots
+ if (IsDot(unicode[i]))
+ {
+ // Can't have 2 dots in a row
+ if (i == iLastDot + 1)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "unicode");
+
+ // If its too far between dots then fail
+ if (i - iLastDot > M_labelLimit + 1)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "Unicode");
+
+ // If validating Std3, then char before dot can't be - char
+ if (bUseStd3 && i > 0)
+ ValidateStd3(unicode[i-1], true);
+
+ // Remember where the last dot is
+ iLastDot = i;
+ continue;
+ }
+
+ // If necessary, make sure its a valid std3 character
+ if (bUseStd3)
+ {
+ ValidateStd3(unicode[i], (i == iLastDot + 1));
+ }
+ }
+
+ // If we never had a dot, then we need to be shorter than the label limit
+ if (iLastDot == -1 && unicode.Length > M_labelLimit)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "unicode");
+
+ // Need to validate entire string length, 1 shorter if last char wasn't a dot
+ if (unicode.Length > M_defaultNameLimit - (IsDot(unicode[unicode.Length-1])? 0 : 1))
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadNameSize",
+ M_defaultNameLimit - (IsDot(unicode[unicode.Length-1]) ? 0 : 1)),
+ "unicode");
+
+ // If last char wasn't a dot we need to check for trailing -
+ if (bUseStd3 && !IsDot(unicode[unicode.Length-1]))
+ ValidateStd3(unicode[unicode.Length-1], true);
+
+ return true;
+ }
+
+ // Validate Std3 rules for a character
+ static void ValidateStd3(char c, bool bNextToDot)
+ {
+ // Check for illegal characters
+ if ((c <= ',' || c == '/' || (c >= ':' && c <= '@') || // Lots of characters not allowed
+ (c >= '[' && c <= '`') || (c >= '{' && c <= (char)0x7F)) ||
+ (c == '-' && bNextToDot))
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadStd3", c), "Unicode");
+ }
+
+ //
+ // The following punycode implementation is ported from the sample punycode.c in RFC 3492
+ // Original sample code was written by Adam M. Costello.
+ //
+
+ // Return whether a punycode code point is flagged as being upper case.
+
+ static bool HasUpperCaseFlag(char punychar)
+ {
+ return (punychar >= 'A' && punychar <= 'Z');
+ }
+
+
+ /**********************************************************/
+ /* Implementation (would normally go in its own .c file): */
+
+ /*** Bootstring parameters for Punycode ***/
+ const int punycodeBase = 36;
+ const int tmin = 1;
+ const int tmax = 26;
+ const int skew = 38;
+ const int damp = 700;
+ const int initial_bias = 72;
+ const int initial_n = 0x80;
+ const char delimiter = '-';
+
+ /* basic(cp) tests whether cp is a basic code point: */
+ static bool basic(uint cp)
+ {
+ // Is it in ASCII range?
+ return cp < 0x80;
+ }
+
+ // decode_digit(cp) returns the numeric value of a basic code */
+ // point (for use in representing integers) in the range 0 to */
+ // punycodeBase-1, or <0 if cp is does not represent a value. */
+
+ static int decode_digit(char cp)
+ {
+ if (cp >= '0' && cp <= '9')
+ return cp - '0' + 26;
+
+ // Two flavors for case differences
+ if (cp >= 'a' && cp <= 'z')
+ return cp - 'a';
+
+ if (cp >= 'A' && cp <= 'Z')
+ return cp - 'A';
+
+ // Expected 0-9, A-Z or a-z, everything else is illegal
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadPunycode"), "ascii");
+ }
+
+ /* encode_digit(d,flag) returns the basic code point whose value */
+ /* (when used for representing integers) is d, which needs to be in */
+ /* the range 0 to punycodeBase-1. The lowercase form is used unless flag is */
+ /* true, in which case the uppercase form is used. */
+
+ static char encode_digit(int d)
+ {
+ Contract.Assert(d >= 0 && d < punycodeBase, "[IdnMapping.encode_digit]Expected 0 <= d < punycodeBase");
+ // 26-35 map to ASCII 0-9
+ if (d > 25) return (char)(d - 26 + '0');
+
+ // 0-25 map to a-z or A-Z
+ return (char)(d + 'a');
+ }
+
+
+
+ /* encode_basic(bcp,flag) forces a basic code point to lowercase */
+ /* if flag is false, uppercase if flag is true, and returns */
+ /* the resulting code point. The code point is unchanged if it */
+ /* is caseless. The behavior is undefined if bcp is not a basic */
+ /* code point. */
+
+ static char encode_basic(char bcp)
+ {
+ if (HasUpperCaseFlag(bcp))
+ bcp += (char)('a' - 'A');
+
+ return bcp;
+ }
+
+ /*** Platform-specific constants ***/
+
+ /* maxint is the maximum value of a uint variable: */
+ const int maxint = 0x7ffffff;
+
+ /*** Bias adaptation function ***/
+
+ static int adapt(
+ int delta, int numpoints, bool firsttime )
+ {
+ uint k;
+
+ delta = firsttime ? delta / damp : delta / 2;
+ Contract.Assert(numpoints != 0, "[IdnMapping.adapt]Expected non-zero numpoints.");
+ delta += delta / numpoints;
+
+ for (k = 0; delta > ((punycodeBase - tmin) * tmax) / 2; k += punycodeBase)
+ {
+ delta /= punycodeBase - tmin;
+ }
+
+ Contract.Assert(delta + skew != 0, "[IdnMapping.adapt]Expected non-zero delta+skew.");
+ return (int)(k + (punycodeBase - tmin + 1) * delta / (delta + skew));
+ }
+
+ /*** Main encode function ***/
+
+ /* punycode_encode() converts Unicode to Punycode. The input */
+ /* is represented as an array of Unicode code points (not code */
+ /* units; surrogate pairs are not allowed), and the output */
+ /* will be represented as an array of ASCII code points. The */
+ /* output string is *not* null-terminated; it will contain */
+ /* zeros if and only if the input contains zeros. (Of course */
+ /* the caller can leave room for a terminator and add one if */
+ /* needed.) The input_length is the number of code points in */
+ /* the input. The output_length is an in/out argument: the */
+ /* caller passes in the maximum number of code points that it */
+
+ /* can receive, and on successful return it will contain the */
+ /* number of code points actually output. The case_flags array */
+ /* holds input_length boolean values, where nonzero suggests that */
+ /* the corresponding Unicode character be forced to uppercase */
+ /* after being decoded (if possible), and zero suggests that */
+ /* it be forced to lowercase (if possible). ASCII code points */
+ /* are encoded literally, except that ASCII letters are forced */
+ /* to uppercase or lowercase according to the corresponding */
+ /* uppercase flags. If case_flags is a null pointer then ASCII */
+ /* letters are left as they are, and other code points are */
+ /* treated as if their uppercase flags were zero. The return */
+ /* value can be any of the punycode_status values defined above */
+ /* except punycode_bad_input; if not punycode_success, then */
+ /* output_size and output might contain garbage. */
+
+ static String punycode_encode(String unicode)
+ {
+ // 0 length strings aren't allowed
+ if (unicode.Length == 0)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "unicode");
+ Contract.EndContractBlock();
+
+ StringBuilder output = new StringBuilder(unicode.Length);
+ int iNextDot = 0;
+ int iAfterLastDot = 0;
+ int iOutputAfterLastDot = 0;
+
+ // Find the next dot
+ while (iNextDot < unicode.Length)
+ {
+ // Find end of this segment
+ iNextDot = unicode.IndexOfAny(M_Dots, iAfterLastDot);
+ Contract.Assert(iNextDot <= unicode.Length, "[IdnMapping.punycode_encode]IndexOfAny is broken");
+ if (iNextDot < 0)
+ iNextDot = unicode.Length;
+
+ // Only allowed to have empty . section at end (www.microsoft.com.)
+ if (iNextDot == iAfterLastDot)
+ {
+ // Only allowed to have empty sections as trailing .
+ if (iNextDot != unicode.Length)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "unicode");
+ // Last dot, stop
+ break;
+ }
+
+ // We'll need an Ace prefix
+ output.Append(M_strAcePrefix);
+
+ // Everything resets every segment.
+ bool bRightToLeft = false;
+
+ // Check for RTL. If right-to-left, then 1st & last chars must be RTL
+ BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(unicode, iAfterLastDot);
+ if (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)
+ {
+ // It has to be right to left.
+ bRightToLeft = true;
+
+ // Check last char
+ int iTest = iNextDot - 1;
+ if (Char.IsLowSurrogate(unicode, iTest))
+ {
+ iTest--;
+ }
+
+ eBidi = CharUnicodeInfo.GetBidiCategory(unicode, iTest);
+ if (eBidi != BidiCategory.RightToLeft && eBidi != BidiCategory.RightToLeftArabic)
+ {
+ // Oops, last wasn't RTL, last should be RTL if first is RTL
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadBidi"), "unicode");
+ }
+ }
+
+ // Handle the basic code points
+ int basicCount;
+ int numProcessed = 0; // Num code points that have been processed so far (this segment)
+ for (basicCount = iAfterLastDot; basicCount < iNextDot; basicCount++)
+ {
+ // Can't be lonely surrogate because it would've thrown in normalization
+ Contract.Assert(Char.IsLowSurrogate(unicode, basicCount) == false,
+ "[IdnMapping.punycode_encode]Unexpected low surrogate");
+
+ // Double check our bidi rules
+ BidiCategory testBidi = CharUnicodeInfo.GetBidiCategory(unicode, basicCount);
+
+ // If we're RTL, we can't have LTR chars
+ if (bRightToLeft && testBidi == BidiCategory.LeftToRight)
+ {
+ // Oops, throw error
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadBidi"), "unicode");
+ }
+
+ // If we're not RTL we can't have RTL chars
+ if (!bRightToLeft && (testBidi == BidiCategory.RightToLeft ||
+ testBidi == BidiCategory.RightToLeftArabic))
+ {
+ // Oops, throw error
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadBidi"), "unicode");
+ }
+
+ // If its basic then add it
+ if (basic(unicode[basicCount]))
+ {
+ output.Append(encode_basic(unicode[basicCount]));
+ numProcessed++;
+ }
+ // If its a surrogate, skip the next since our bidi category tester doesn't handle it.
+ else if (Char.IsSurrogatePair(unicode, basicCount))
+ basicCount++;
+ }
+
+ int numBasicCodePoints = numProcessed; // number of basic code points
+
+ // Stop if we ONLY had basic code points
+ if (numBasicCodePoints == iNextDot - iAfterLastDot)
+ {
+ // Get rid of xn-- and this segments done
+ output.Remove(iOutputAfterLastDot, M_strAcePrefix.Length);
+ }
+ else
+ {
+ // If it has some non-basic code points the input cannot start with xn--
+ if (unicode.Length - iAfterLastDot >= M_strAcePrefix.Length &&
+ unicode.Substring(iAfterLastDot, M_strAcePrefix.Length).Equals(
+ M_strAcePrefix, StringComparison.OrdinalIgnoreCase))
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadPunycode"), "unicode");
+
+ // Need to do ACE encoding
+ int numSurrogatePairs = 0; // number of surrogate pairs so far
+
+ // Add a delimiter (-) if we had any basic code points (between basic and encoded pieces)
+ if (numBasicCodePoints > 0)
+ {
+ output.Append(delimiter);
+ }
+
+ // Initialize the state
+ int n = initial_n;
+ int delta = 0;
+ int bias = initial_bias;
+
+ // Main loop
+ while (numProcessed < (iNextDot - iAfterLastDot))
+ {
+ /* All non-basic code points < n have been */
+ /* handled already. Find the next larger one: */
+ int j;
+ int m;
+ int test = 0;
+ for (m = maxint, j = iAfterLastDot;
+ j < iNextDot;
+ j += IsSupplementary(test) ? 2 : 1)
+ {
+ test = Char.ConvertToUtf32(unicode, j);
+ if (test >= n && test < m) m = test;
+ }
+
+ /* Increase delta enough to advance the decoder's */
+ /* <n,i> state to <m,0>, but guard against overflow: */
+ delta += (int)((m - n) * ((numProcessed - numSurrogatePairs) + 1));
+ Contract.Assert(delta > 0, "[IdnMapping.cs]1 punycode_encode - delta overflowed int");
+ n = m;
+
+ for (j = iAfterLastDot; j < iNextDot; j+= IsSupplementary(test) ? 2 : 1)
+ {
+ // Make sure we're aware of surrogates
+ test = Char.ConvertToUtf32(unicode, j);
+
+ // Adjust for character position (only the chars in our string already, some
+ // haven't been processed.
+
+ if (test < n)
+ {
+ delta++;
+ Contract.Assert(delta > 0, "[IdnMapping.cs]2 punycode_encode - delta overflowed int");
+ }
+
+ if (test == n)
+ {
+ // Represent delta as a generalized variable-length integer:
+ int q, k;
+ for (q = delta, k = punycodeBase; ; k += punycodeBase)
+ {
+ int t = k <= bias ? tmin :
+ k >= bias + tmax ? tmax : k - bias;
+ if (q < t) break;
+ Contract.Assert(punycodeBase != t, "[IdnMapping.punycode_encode]Expected punycodeBase (36) to be != t");
+ output.Append(encode_digit(t + (q - t) % (punycodeBase - t)));
+ q = (q - t) / (punycodeBase - t);
+ }
+
+ output.Append(encode_digit(q));
+ bias = adapt(delta, (numProcessed - numSurrogatePairs) + 1, numProcessed == numBasicCodePoints);
+ delta = 0;
+ numProcessed++;
+
+ if (IsSupplementary(m))
+ {
+ numProcessed++;
+ numSurrogatePairs++;
+ }
+ }
+ }
+ ++delta;
+ ++n;
+ Contract.Assert(delta > 0, "[IdnMapping.cs]3 punycode_encode - delta overflowed int");
+ }
+ }
+
+ // Make sure its not too big
+ if (output.Length - iOutputAfterLastDot > M_labelLimit)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "unicode");
+
+ // Done with this segment, add dot if necessary
+ if (iNextDot != unicode.Length)
+ output.Append('.');
+
+ iAfterLastDot = iNextDot + 1;
+ iOutputAfterLastDot = output.Length;
+ }
+
+ // Throw if we're too long
+ if (output.Length > M_defaultNameLimit - (IsDot(unicode[unicode.Length-1]) ? 0 : 1))
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadNameSize",
+ M_defaultNameLimit - (IsDot(unicode[unicode.Length-1]) ? 0 : 1)),
+ "unicode");
+
+ // Return our output string
+ return output.ToString();
+ }
+
+ /*** Main decode function ***/
+
+ /* punycode_decode() converts Punycode to Unicode. The input is */
+ /* represented as an array of ASCII code points, and the output */
+ /* will be represented as an array of Unicode code points. The */
+ /* input_length is the number of code points in the input. The */
+ /* output_length is an in/out argument: the caller passes in */
+ /* the maximum number of code points that it can receive, and */
+ /* on successful return it will contain the actual number of */
+ /* code points output. The case_flags array needs room for at */
+ /* least output_length values, or it can be a null pointer if the */
+ /* case information is not needed. A nonzero flag suggests that */
+ /* the corresponding Unicode character be forced to uppercase */
+ /* by the caller (if possible), while zero suggests that it be */
+ /* forced to lowercase (if possible). ASCII code points are */
+ /* output already in the proper case, but their flags will be set */
+ /* appropriately so that applying the flags would be harmless. */
+ /* The return value can be any of the punycode_status values */
+ /* defined above; if not punycode_success, then output_length, */
+ /* output, and case_flags might contain garbage. On success, the */
+ /* decoder will never need to write an output_length greater than */
+ /* input_length, because of how the encoding is defined. */
+
+ static String punycode_decode( String ascii )
+ {
+ // 0 length strings aren't allowed
+ if (ascii.Length == 0)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "ascii");
+ Contract.EndContractBlock();
+
+ // Throw if we're too long
+ if (ascii.Length > M_defaultNameLimit - (IsDot(ascii[ascii.Length-1]) ? 0 : 1))
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadNameSize",
+ M_defaultNameLimit - (IsDot(ascii[ascii.Length-1]) ? 0 : 1)), "ascii");
+
+ // output stringbuilder
+ StringBuilder output = new StringBuilder(ascii.Length);
+
+ // Dot searching
+ int iNextDot = 0;
+ int iAfterLastDot = 0;
+ int iOutputAfterLastDot = 0;
+
+ while (iNextDot < ascii.Length)
+ {
+ // Find end of this segment
+ iNextDot = ascii.IndexOf('.', iAfterLastDot);
+ if (iNextDot < 0 || iNextDot > ascii.Length)
+ iNextDot = ascii.Length;
+
+ // Only allowed to have empty . section at end (www.microsoft.com.)
+ if (iNextDot == iAfterLastDot)
+ {
+ // Only allowed to have empty sections as trailing .
+ if (iNextDot != ascii.Length)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "ascii");
+
+ // Last dot, stop
+ break;
+ }
+
+ // In either case it can't be bigger than segment size
+ if (iNextDot - iAfterLastDot > M_labelLimit)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "ascii");
+
+ // See if this section's ASCII or ACE
+ if (ascii.Length < M_strAcePrefix.Length + iAfterLastDot ||
+ !ascii.Substring(iAfterLastDot, M_strAcePrefix.Length).Equals(
+ M_strAcePrefix, StringComparison.OrdinalIgnoreCase))
+ {
+ // Its supposed to be just ASCII
+ // Actually, for non xn-- stuff do we want to allow Unicode?
+ // for (int i = iAfterLastDot; i < iNextDot; i++)
+ // {
+ // // Only ASCII is allowed
+ // if (ascii[i] >= 0x80)
+ // throw new ArgumentException(Environment.GetResourceString(
+ // "Argument_IdnBadPunycode"), "ascii");
+// }
+
+ // Its ASCII, copy it
+ output.Append(ascii.Substring(iAfterLastDot, iNextDot - iAfterLastDot));
+
+ // ASCII doesn't have BIDI issues
+ }
+ else
+ {
+ // Not ASCII, bump up iAfterLastDot to be after ACE Prefix
+ iAfterLastDot += M_strAcePrefix.Length;
+
+ // Get number of basic code points (where delimiter is)
+ // numBasicCodePoints < 0 if there're no basic code points
+ int iTemp = ascii.LastIndexOf(delimiter, iNextDot - 1);
+
+ // Trailing - not allowed
+ if (iTemp == iNextDot - 1)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadPunycode"), "ascii");
+
+ int numBasicCodePoints;
+ if (iTemp <= iAfterLastDot)
+ numBasicCodePoints = 0;
+ else
+ {
+ numBasicCodePoints = iTemp - iAfterLastDot;
+
+ // Copy all the basic code points, making sure they're all in the allowed range,
+ // and losing the casing for all of them.
+ for (int copyAscii = iAfterLastDot;
+ copyAscii < iAfterLastDot + numBasicCodePoints;
+ copyAscii++)
+ {
+ // Make sure we don't allow unicode in the ascii part
+ if (ascii[copyAscii] > 0x7f)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadPunycode"), "ascii");
+
+ // When appending make sure they get lower cased
+ output.Append((char)(ascii[copyAscii] >= 'A' && ascii[copyAscii] <='Z' ?
+ ascii[copyAscii] - 'A' + 'a' :
+ ascii[copyAscii]));
+ }
+ }
+
+ // Get ready for main loop. Start at beginning if we didn't have any
+ // basic code points, otherwise start after the -.
+ // asciiIndex will be next character to read from ascii
+ int asciiIndex = iAfterLastDot +
+ ( numBasicCodePoints > 0 ? numBasicCodePoints + 1 : 0);
+
+ // initialize our state
+ int n = initial_n;
+ int bias = initial_bias;
+ int i = 0;
+
+ int w, k;
+
+ // no Supplementary characters yet
+ int numSurrogatePairs = 0;
+
+ // Main loop, read rest of ascii
+ while (asciiIndex < iNextDot)
+ {
+ /* Decode a generalized variable-length integer into delta, */
+ /* which gets added to i. The overflow checking is easier */
+ /* if we increase i as we go, then subtract off its starting */
+ /* value at the end to obtain delta. */
+ int oldi = i;
+
+ for (w = 1, k = punycodeBase; ; k += punycodeBase)
+ {
+ // Check to make sure we aren't overrunning our ascii string
+ if (asciiIndex >= iNextDot)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadPunycode"), "ascii");
+
+ // decode the digit from the next char
+ int digit = decode_digit(ascii[asciiIndex++]);
+
+ Contract.Assert(w > 0, "[IdnMapping.punycode_decode]Expected w > 0");
+ if (digit > (maxint - i) / w)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadPunycode"), "ascii");
+
+ i += (int)(digit * w);
+ int t = k <= bias ? tmin :
+ k >= bias + tmax ? tmax : k - bias;
+ if (digit < t) break;
+ Contract.Assert(punycodeBase != t, "[IdnMapping.punycode_decode]Expected t != punycodeBase (36)");
+ if (w > maxint / (punycodeBase - t))
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadPunycode"), "ascii");
+ w *= (punycodeBase - t);
+ }
+
+ bias = adapt(i - oldi,
+ (output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1, oldi == 0);
+
+ /* i was supposed to wrap around from output.Length to 0, */
+ /* incrementing n each time, so we'll fix that now: */
+ Contract.Assert((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1 > 0,
+ "[IdnMapping.punycode_decode]Expected to have added > 0 characters this segment");
+ if (i / ((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1) > maxint - n)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadPunycode"), "ascii");
+ n += (int)(i / (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1));
+ i %= (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1);
+
+ // If it was flagged it needs to be capitalized
+ // if (HasUpperCaseFlag(ascii[asciiIndex - 1]))
+ // {
+ // /* Case of last character determines uppercase flag: */
+ // // Any casing stuff need to happen last.
+ // If we wanted to reverse the IDNA casing data
+ // n = MakeNUpperCase(n)
+ // }
+
+ // Make sure n is legal
+ if ((n < 0 || n > 0x10ffff) || (n >= 0xD800 && n <= 0xDFFF))
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadPunycode"), "ascii");
+
+ // insert n at position i of the output: Really tricky if we have surrogates
+ int iUseInsertLocation;
+ String strTemp = Char.ConvertFromUtf32(n);
+
+ // If we have supplimentary characters
+ if (numSurrogatePairs > 0)
+ {
+ // Hard way, we have supplimentary characters
+ int iCount;
+ for (iCount = i, iUseInsertLocation = iOutputAfterLastDot;
+ iCount > 0;
+ iCount--, iUseInsertLocation++)
+ {
+ // If its a surrogate, we have to go one more
+ if (iUseInsertLocation >= output.Length)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadPunycode"), "ascii");
+ if (Char.IsSurrogate(output[iUseInsertLocation]))
+ iUseInsertLocation++;
+ }
+ }
+ else
+ {
+ // No Supplementary chars yet, just add i
+ iUseInsertLocation = iOutputAfterLastDot + i;
+ }
+
+ // Insert it
+ output.Insert(iUseInsertLocation, strTemp);
+
+ // If it was a surrogate increment our counter
+ if (IsSupplementary(n))
+ numSurrogatePairs++;
+
+ // Index gets updated
+ i++;
+ }
+
+ // Do BIDI testing
+ bool bRightToLeft = false;
+
+ // Check for RTL. If right-to-left, then 1st & last chars must be RTL
+ BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iOutputAfterLastDot);
+ if (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)
+ {
+ // It has to be right to left.
+ bRightToLeft = true;
+ }
+
+ // Check the rest of them to make sure RTL/LTR is consistent
+ for (int iTest = iOutputAfterLastDot; iTest < output.Length; iTest++)
+ {
+ // This might happen if we run into a pair
+ if (Char.IsLowSurrogate(output.ToString(), iTest)) continue;
+
+ // Check to see if its LTR
+ eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iTest);
+ if ((bRightToLeft && eBidi == BidiCategory.LeftToRight) ||
+ (!bRightToLeft && (eBidi == BidiCategory.RightToLeft ||
+ eBidi == BidiCategory.RightToLeftArabic)))
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadBidi"), "ascii");
+
+ // Make it lower case if we must (so we can test IsNormalized later)
+ // if (output[iTest] >= 'A' && output[iTest] <= 'Z')
+ // output[iTest] = (char)(output[iTest] + (char)('a' - 'A'));
+ }
+
+ // Its also a requirement that the last one be RTL if 1st is RTL
+ if (bRightToLeft && eBidi != BidiCategory.RightToLeft && eBidi != BidiCategory.RightToLeftArabic)
+ {
+ // Oops, last wasn't RTL, last should be RTL if first is RTL
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadBidi"), "ascii");
+ }
+ }
+
+ // See if this label was too long
+ if (iNextDot - iAfterLastDot > M_labelLimit)
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadLabelSize"), "ascii");
+
+ // Done with this segment, add dot if necessary
+ if (iNextDot != ascii.Length)
+ output.Append('.');
+
+ iAfterLastDot = iNextDot + 1;
+ iOutputAfterLastDot = output.Length;
+ }
+
+ // Throw if we're too long
+ if (output.Length > M_defaultNameLimit - (IsDot(output[output.Length-1]) ? 0 : 1))
+ throw new ArgumentException(Environment.GetResourceString(
+ "Argument_IdnBadNameSize",
+ M_defaultNameLimit -(IsDot(output[output.Length-1]) ? 0 : 1)), "ascii");
+
+ // Return our output string
+ return output.ToString();
+ }
+
+ /*
+ The previous punycode implimentation is based on the sample code in RFC 3492
+
+ Full Copyright Statement
+
+ Copyright (C) The Internet Society (2003). All Rights Reserved.
+
+ This document and translations of it may be copied and furnished to
+ others, and derivative works that comment on or otherwise explain it
+ or assist in its implementation may be prepared, copied, published
+ and distributed, in whole or in part, without restriction of any
+ kind, provided that the above copyright notice and this paragraph are
+ included on all such copies and derivative works. However, this
+ document itself may not be modified in any way, such as by removing
+ the copyright notice or references to the Internet Society or other
+ Internet organizations, except as needed for the purpose of
+ developing Internet standards in which case the procedures for
+ copyrights defined in the Internet Standards process must be
+ followed, or as required to translate it into languages other than
+ English.
+
+ The limited permissions granted above are perpetual and will not be
+ revoked by the Internet Society or its successors or assigns.
+
+ This document and the information contained herein is provided on an
+ "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+ TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+ BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+ HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+ MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+
+ private const int IDN_ALLOW_UNASSIGNED = 0x1;
+ private const int IDN_USE_STD3_ASCII_RULES = 0x2;
+
+ private const int ERROR_INVALID_NAME = 123;
+
+
+ [System.Security.SecurityCritical]
+ [SuppressUnmanagedCodeSecurityAttribute()]
+ [DllImport("kernel32.dll", CharSet=CharSet.Unicode, SetLastError=true)]
+ private static extern int IdnToAscii(
+ uint dwFlags,
+ [InAttribute()]
+ [MarshalAsAttribute(System.Runtime.InteropServices.UnmanagedType.LPWStr)]
+ String lpUnicodeCharStr,
+ int cchUnicodeChar,
+ [System.Runtime.InteropServices.OutAttribute()]
+
+ char [] lpASCIICharStr,
+ int cchASCIIChar);
+
+ [System.Security.SecurityCritical]
+ [SuppressUnmanagedCodeSecurityAttribute()]
+ [DllImport("kernel32.dll", CharSet=CharSet.Unicode, SetLastError=true)]
+ private static extern int IdnToUnicode(
+ uint dwFlags,
+ [InAttribute()]
+ [MarshalAsAttribute(System.Runtime.InteropServices.UnmanagedType.LPWStr)]
+ string lpASCIICharStr,
+ int cchASCIIChar,
+ [System.Runtime.InteropServices.OutAttribute()]
+
+ char [] lpUnicodeCharStr,
+ int cchUnicodeChar);
+ }
+}
+