summaryrefslogtreecommitdiff
path: root/src/mscorlib/src/System/Text/GB18030Encoding.cs
blob: 8ed52a6ab835aa7ccaa401741c1947fdefb7a4db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.


//
//  Ported to managed code from c_gb18030.c and related gb18030 dll files
//
//
//  Abstract:
//
//      Managed implementation of GB18030-2000 (code page 54936) ported from implimentation in c_g18030.dll
//      This file contains functions to convert GB18030-2000 (code page 54936) into Unicode, and vice versa.
//
//  Notes:
//      GB18030-2000 (aka GBK2K) is designed to be mostly compatible with GBK (codepage 936),
//      while supports the full range of Unicode code points (BMP + 16 supplementary planes).
//
//      The structure for GB18030 is:
//          * Single byte:
//              0x00 ~ 0x7f
//          * Two-byte:
//              0x81 ~ 0xfe, 0x40 ~ 0x7e    (leading byte, trailing byte)
//              0x81 ~ 0xfe, 0x80 ~ 0xfe    (leading byte, trailing byte)
//          * Four-byte:
//              0x81 ~ 0xfe, 0x30 ~ 0x39, 0x81 ~ 0xfe, 0x30 ~ 0x39.
//              The surrogare pair will be encoded from 0x90, 0x30, 0x81, 0x30
//
//      The BMP range is fully supported in GB18030 using 1-byte, 2-byte and 4-byte sequences.
//      In valid 4-byte GB18030, there are two gaps that can not be mapped to Unicode characters.
//          0x84, 0x31, 0xa5, 0x30 (just after the GB18030 bytes for U+FFFF(*)) ~ 0x8f, 0x39, 0xfe, 0x39 (just before the first GB18030 bytes for U+D800,U+DC00)
//          0xe3, 0x32, 0x9a, 0x36 (just after the GB18030 bytes for U+DBFF U+DFFF(**)) ~ 0xfe, 0x39, 0xfe, 0x39
//
//
//          Note1: U+FFFF = 0x84, 0x31, 0xa4, 0x39
//          Note2: U+DBFF U+DFFF = 0xe3, 0x32, 0x9a, 0x35
//
//      Tables used in GB18030Encoding:
//
//          Our data is similar to the 936 Code Page, so we start from there to build our tables.  We build the
//          normal double byte mapUnicodeToBytes and mapBytesToUnicode tables by applying differences from 936.
//          We also build a map4BytesToUnicode table and a mapUnicodeTo4BytesFlags
//
//          * mapUnicodeTo4BytesFlags
//              This is an array of bytes, so we have to do a / 8 and << %8 to check the appropriate bit (see Is4Byte())
//              If the bit is set its true.
//
//              true    - If set/true this is a 4 byte code.  The value in mapUnicodeToBytes will be the 4 byte offset
//              false   - If cleared/false this is a 1 or 2 byte code.  The value in mapUnicodeToBytes will be the 2 bytes.
//
//          * mapUnicodeToBytes
//              Contains either the 2 byte value of double byte GB18030 or the 4 byte offset for 4 byte GB18030,
//              depending on the value of the flag in mapUnicodeTo4BytesFlags
//
//          * mapBytesToUnicode
//              mapBytesToUnicode maps 2 byte GB 18030 to Unicode like other DBCS code pages.
//
//          * map4BytesToUnicode
//              map4BytesToUnicode is indexed by the 4 byte offset and contains the unicode value for each 4 byte offset
//
//
//      4 Byte sequences
//          We generally use the offset for the 4 byte sequence, such as:
//
//          The index value is the offset of the 4-byte GB18030.
//
//          4-byte GB18030      Index value
//          ==============      ===========
//          81,30,81,30         0
//          81,30,81,31         1
//          81,30,81,32         2
//          ...                 ...
//
//          The value of map4BytesToUnicode cotains the Unicode codepoint for the offset of the
//          corresponding 4-byte GB18030.
//
//          E.g. map4BytesToUnicode[0] = 0x0080.  This means that GB18030 0x81, 0x30, 0x81, 0x30 will be converted to Unicode U+0800.
//
//      4 Byte Surrogate Sequences
//          Those work similarly to the normal 4 byte sequences, but start at a different offset
//
//  We don't override IsAlwaysNormalized because GB18030 covers all of the unicode space, so isn't guaranteed to be normal.
//
#if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
namespace System.Text
{
    using System;
    using System.Diagnostics;
    using System.Diagnostics.Contracts;
    using System.Text;
    using System.Runtime.InteropServices;
    using System.Security;
    using System.Runtime.CompilerServices;
    using System.Runtime.Serialization;
    using System.Runtime.Versioning;
    using System.Security.Permissions;
    using System.Globalization;

    /*=================================GB18030Encoding============================
    **
    ** This is used to support GB18030-2000 encoding (code page 54936).
    **
    ==============================================================================*/

    [Serializable]
    internal sealed class GB18030Encoding : DBCSCodePageEncoding, ISerializable
    {
        // This is the table of 4 byte conversions.
        private const int GBLast4ByteCode = 0x99FB;
        [NonSerialized]
        unsafe internal char* map4BytesToUnicode = null;       // new char[GBLast4ByteCode + 1]; // Need to map all 4 byte sequences to Unicode
        [NonSerialized]
        unsafe internal byte* mapUnicodeTo4BytesFlags = null;  // new byte[0x10000 / 8];         // Need 1 bit for each code point to say if its 4 byte or not

        private const int GB18030       = 54936;

        // First and last character of surrogate range as offset from 4 byte GB18030 GB81308130
        private const int GBSurrogateOffset = 0x2E248;      // GB90308130
        private const int GBLastSurrogateOffset = 0x12E247; // GBE3329A35

        // We have to load the 936 code page tables, so impersonate 936 as our base
        internal GB18030Encoding() : base(GB18030, 936)
        {
        }

        // Constructor called by serialization.
        internal GB18030Encoding(SerializationInfo info, StreamingContext context) :
                                          base(GB18030, 936)
        {
            // Set up our base, also throws if info was empty
            DeserializeEncoding(info, context);
            Debug.Assert(info!=null, "[GB18030Encoding(Serialization...)] Expected null info to throw");

            // Already build our code page, fallbacks & read only, so we're good to go!
        }

        // ISerializable implementation
        void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
        {
            // Make sure to get the base stuff too This throws if info is null
            SerializeEncoding(info, context);
            Debug.Assert(info!=null, "[GB18030.GetObjectData] Expected null info to throw");

            // Everett doesn't need more than the basics
        }

        // This loads our base 936 code page and then applys the changes from the tableUnicodeToGBDiffs table.
        // See table comments for table format.
        protected override unsafe void LoadManagedCodePage()
        {
            // Use base code page loading algorithm.
            // We need to use our main CP as our flag.
            this.bFlagDataTable = false;
            this.iExtraBytes = (GBLast4ByteCode + 1) * 2 + 0x10000 / 8;

            // Load most of our code page
            base.LoadManagedCodePage();

            // Point to our new data sections
            byte *pMemorySection = (byte *) safeMemorySectionHandle.DangerousGetHandle();
            mapUnicodeTo4BytesFlags = pMemorySection + 65536 * 2 * 2;
            map4BytesToUnicode = (char*)(pMemorySection + 65536 * 2 * 2 + 0x10000 / 8);

            // Need to check our pointer to see if we're loaded, return if we're built already
            if (*mapCodePageCached == this.CodePage)
                return;

            // Once we've done our base LoadManagedCodePage, we'll have to add our fixes
            char unicodeCount = (char)0;
            ushort count4Byte = 0;
            for (int index = 0; index < tableUnicodeToGBDiffs.Length; index++)
            {
                ushort data = tableUnicodeToGBDiffs[index];

                // Check high bit
                if ((data & 0x8000) != 0)
                {
                    // Make be exact value
                    if (data > 0x9000 && data != 0xD1A6)
                    {
                        // It was an exact value (gb18040[data] = unicode)
                        mapBytesToUnicode[data] = unicodeCount;
                        mapUnicodeToBytes[unicodeCount] = data;
                        unicodeCount++;
                    }
                    else
                    {
                        // It was a CP 936 compatible data, that table's already loaded, just increment our pointer
                        unicodeCount += unchecked((char)(data & 0x7FFF));
                    }
                }
                else
                {
                    // It was GB 18030 4 byte data, next <data> characters are 4 byte sequences.
                    while (data > 0)
                    {
                        Debug.Assert(count4Byte <= GBLast4ByteCode,
                            "[GB18030Encoding.LoadManagedCodePage] Found too many 4 byte codes in data table.");

                        // Set the 4 byte -> Unicode value
                        map4BytesToUnicode[count4Byte] = unicodeCount;
                        // Set the unicode -> 4 bytes value, including flag that its a 4 byte sequence
                        mapUnicodeToBytes[unicodeCount] = count4Byte;
                        // Set the flag saying its a 4 byte sequence
                        mapUnicodeTo4BytesFlags[unicodeCount / 8] |= unchecked((byte)(1 << (unicodeCount % 8)));
                        unicodeCount++;
                        count4Byte++;
                        data--;
                    }

                }
            }

            // unicodeCount should've wrapped back to 0
            Debug.Assert(unicodeCount == 0,
                "[GB18030Encoding.LoadManagedCodePage] Expected unicodeCount to wrap around to 0 as all chars were processed");

            // We should've read in GBLast4ByteCode 4 byte sequences
            Debug.Assert(count4Byte == GBLast4ByteCode + 1,
                "[GB18030Encoding.LoadManagedCodePage] Expected 0x99FB to be last 4 byte offset, found 0x" + count4Byte.ToString("X4", CultureInfo.InvariantCulture));

            // Need to flag ourselves saying we've built this CP.
            *mapCodePageCached = this.CodePage;
        }

        internal override void SetDefaultFallbacks()
        {
            // For GB18030Encoding just use default replacement fallbacks because its only for bad surrogates
            this.encoderFallback = EncoderFallback.ReplacementFallback;
            this.decoderFallback = DecoderFallback.ReplacementFallback;
        }

        // Is4Byte
        // Checks the 4 byte table and returns true if this is a 4 byte code.
        // Its a 4 byte code if the flag is set in mapUnicodeTo4BytesFlags
        internal unsafe bool Is4Byte(char charTest)
        {
            // See what kind it is
            byte b4Byte = mapUnicodeTo4BytesFlags[charTest / 8];
            return (b4Byte != 0 && (b4Byte & (1 << (charTest % 8))) != 0);
        }

        // GetByteCount
        internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
        {
            // Just call GetBytes() with null bytes
            return GetBytes(chars, count, null, 0, encoder);
        }

        internal override unsafe int GetBytes(char* chars, int charCount,
                                                byte* bytes, int byteCount, EncoderNLS encoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            // We'll allow null bytes as a count
//            Debug.Assert(bytes != null, "[GB18030Encoding.GetBytes]bytes is null");
            Debug.Assert(byteCount >= 0, "[GB18030Encoding.GetBytes]byteCount is negative");
            Debug.Assert(chars != null, "[GB18030Encoding.GetBytes]chars is null");
            Debug.Assert(charCount >= 0, "[GB18030Encoding.GetBytes]charCount is negative");

            // Assert because we shouldn't be able to have a null encoder.
            Debug.Assert(encoderFallback != null, "[GB18030Encoding.GetBytes]Attempting to use null encoder fallback");

            // Get any left over characters
            char charLeftOver = (char)0;
            if (encoder != null)
                charLeftOver = encoder.charLeftOver;

            // prepare our helpers
            Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
                this, encoder, bytes, byteCount, chars, charCount);

            // Try again if we were MustFlush
            TryAgain:

            // Go ahead and do it, including the fallback.
            while (buffer.MoreData)
            {
                // Get next char
                char ch = buffer.GetNextChar();

                // Have to check for charLeftOver
                if (charLeftOver != 0)
                {
                    Debug.Assert(Char.IsHighSurrogate(charLeftOver),
                        "[GB18030Encoding.GetBytes] leftover character should be high surrogate, not 0x" + ((int)charLeftOver).ToString("X4", CultureInfo.InvariantCulture));

                    // If our next char isn't a low surrogate, then we need to do fallback.
                    if (!Char.IsLowSurrogate(ch))
                    {
                        // No low surrogate, fallback high surrogate & try this one again
                        buffer.MovePrevious(false);                  // (Ignoring this character, don't thow)
                        if (!buffer.Fallback(charLeftOver))
                        {
                            charLeftOver = (char)0;
                            break;
                        }
                        charLeftOver = (char)0;
                        continue;
                    }
                    else
                    {
                        // Next is a surrogate, add it as surrogate pair

                        // Need 4 bytes for surrogates
                        // Get our offset
                        int offset = ((charLeftOver - 0xd800) << 10) + (ch - 0xdc00);

                        byte byte4 = (byte)((offset % 0x0a) + 0x30);
                        offset /= 0x0a;
                        byte byte3 = (byte)((offset % 0x7e) + 0x81);
                        offset /= 0x7e;
                        byte byte2 = (byte)((offset % 0x0a) + 0x30);
                        offset /= 0x0a;
                        Debug.Assert(offset < 0x6f,
                            "[GB18030Encoding.GetBytes](1) Expected offset < 0x6f, not 0x" + offset.ToString("X2", CultureInfo.InvariantCulture));

                        charLeftOver = (char)0;
                        if (!buffer.AddByte((byte)(offset + 0x90),byte2,byte3,byte4))
                        {
                            // Didn't work, need to back up for both surrogates (AddByte already backed up one)
                            buffer.MovePrevious(false);             // (don't throw)
                            break;
                        }
                    }
                    charLeftOver = '\0';
                }
                // ASCII's easiest
                else if (ch <= 0x7f)
                {
                    // Need a byte
                    if (!buffer.AddByte((byte)ch))
                        break;
                }
                // See if its a surrogate pair
                else if (Char.IsHighSurrogate(ch))
                {
                    // Remember it for next time
                    charLeftOver = ch;
                }
                else if (Char.IsLowSurrogate(ch))
                {
                    // Low surrogates should've been found already
                    if (!buffer.Fallback(ch))
                        break;
                }
                else
                {
                    // Not surrogate or ASCII, get value
                    ushort iBytes = mapUnicodeToBytes[ch];

                    // See what kind it is
                    if (Is4Byte(ch))
                    {
                        //
                        // This Unicode character will be converted to four-byte GB18030.
                        //
                        // Need 4 bytes
                        byte byte4 = (byte)((iBytes % 0x0a) + 0x30);
                        iBytes /= 0x0a;
                        byte byte3 = (byte)((iBytes % 0x7e) + 0x81);
                        iBytes /= 0x7e;
                        byte byte2 = (byte)((iBytes % 0x0a) + 0x30);
                        iBytes /= 0x0a;
                        Debug.Assert(iBytes < 0x7e,
                            "[GB18030Encoding.GetBytes]Expected iBytes < 0x7e, not 0x" + iBytes.ToString("X2", CultureInfo.InvariantCulture));
                        if (!buffer.AddByte((byte)(iBytes + 0x81), byte2, byte3, byte4))
                            break;
                    }
                    else
                    {
                        // Its 2 byte, use it
                        if (!buffer.AddByte(unchecked((byte)(iBytes >> 8)), unchecked((byte)(iBytes & 0xff))))
                            break;
                    }
                }
            }

            // Do we need to flush our charLeftOver?
            if ((encoder == null || encoder.MustFlush) && (charLeftOver > 0))
            {
                // Fall it back
                buffer.Fallback(charLeftOver);
                charLeftOver = (char)0;
                goto TryAgain;
            }

            // Fallback stuck it in encoder if necessary, but we have to clear MustFlash cases
            // (Check bytes != null, don't clear it if we're just counting)
            if (encoder != null)
            {
                // Remember our charLeftOver
                if (bytes != null)
                    encoder.charLeftOver = charLeftOver;

                encoder.m_charsUsed = buffer.CharsUsed;
            }

            // Return our length
            return buffer.Count;
        }

        // Helper methods
        internal bool IsGBLeadByte(short ch)
        {
            // return true if we're in the lead byte range
            return ((ch) >= 0x81 && (ch) <= 0xfe);
        }

        internal bool IsGBTwoByteTrailing(short ch)
        {
            // Return true if we are in range for the trailing byte of a 2 byte sequence
            return (((ch) >= 0x40 && (ch) <= 0x7e) ||
                    ((ch) >= 0x80 && (ch) <= 0xfe));
        }

        internal bool IsGBFourByteTrailing(short ch)
        {
            // Return true if we are in range for the trailing byte of a 4 byte sequence
            return ((ch) >= 0x30 && (ch) <= 0x39);
        }

        internal int GetFourBytesOffset(short offset1, short offset2, short offset3, short offset4)
        {
            return ((offset1 - 0x81) * 0x0a * 0x7e * 0x0a +
                    (offset2 - 0x30) * 0x7e * 0x0a +
                    (offset3 - 0x81) * 0x0a +
                     offset4 - 0x30);
        }

        // This is internal and called by something else,
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
        {
            // Just call GetChars() with null chars to count
            return GetChars(bytes, count, null, 0, baseDecoder);
        }

        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            // We'll allow null chars as a count
            Debug.Assert(bytes != null, "[GB18030Encoding.GetChars]bytes is null");
            Debug.Assert(byteCount >= 0, "[GB18030Encoding.GetChars]byteCount is negative");
//            Debug.Assert(chars != null, "[GB18030Encoding.GetChars]chars is null");
            Debug.Assert(charCount >= 0, "[GB18030Encoding.GetChars]charCount is negative");

            // Fix our decoder
            GB18030Decoder decoder = (GB18030Decoder)baseDecoder;

            // Get our info.
            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
                this, decoder, chars, charCount, bytes, byteCount);

            // Need temp bytes because we can't muss up decoder
            short byte1 = -1;
            short byte2 = -1;
            short byte3 = -1;
            short byte4 = -1;

            // See if there was anything to get out of the decoder
            if (decoder != null && decoder.bLeftOver1 != -1)
            {
                // Need temp bytes because we can't muss up decoder
                byte1 = decoder.bLeftOver1;
                byte2 = decoder.bLeftOver2;
                byte3 = decoder.bLeftOver3;
                byte4 = decoder.bLeftOver4;

                // Loop because we might have too many in buffer
                // This could happen if we are working on a 4 byte sequence, but it isn't valid.
                while (byte1 != -1)
                {
                    // If its not a lead byte, use ? or its value, then scoot them down & try again
                    // This could happen if we previously had a bad 4 byte sequence and this is a trail byte
                    if (!IsGBLeadByte(byte1))
                    {
                        // This is either a ? or ASCII, need 1 char output
                        if (byte1 <= 0x7f)
                        {
                            if (!buffer.AddChar((char)byte1))      // Its ASCII
                                break;
                        }
                        else
                        {
                            if (!buffer.Fallback((byte)byte1))     // Not a valid byte
                                break;
                        }

                        byte1 = byte2;
                        byte2 = byte3;
                        byte3 = byte4;
                        byte4 = -1;
                        continue;
                    }

                    // Read in more bytes as needed
                    while (byte2 == -1 ||
                           (IsGBFourByteTrailing(byte2) && byte4 == -1))
                    {
                        // Do we have room?
                        if (!buffer.MoreData)
                        {
                            // No input left to read, do we have to flush?
                            if (!decoder.MustFlush)
                            {
                                // Don't stick stuff in decoder when counting
                                if (chars != null)
                                {
                                    // Don't have to flush, won't have any chars
                                    // Decoder is correct, just return
                                    decoder.bLeftOver1 = byte1;
                                    decoder.bLeftOver2 = byte2;
                                    decoder.bLeftOver3 = byte3;
                                    decoder.bLeftOver4 = byte4;
                                }

                                decoder.m_bytesUsed = buffer.BytesUsed;
                                return buffer.Count;
                            }

                            // We'll have to flush, add a ? and scoot them down to try again
                            // We could be trying for a 4 byte sequence but byte 3 could be ascii and should be spit out
                            // Breaking will do this because we have zeros
                            break;
                        }

                        // Read them in
                        if (byte2 == -1) byte2 = buffer.GetNextByte();
                        else if (byte3 == -1) byte3 = buffer.GetNextByte();
                        else byte4 = buffer.GetNextByte();
                    }

                    // Now we have our 2 or 4 bytes
                    if (IsGBTwoByteTrailing(byte2))
                    {
                        //
                        // The trailing byte is a GB18030 two-byte sequence trailing byte.
                        //
                        int iTwoBytes = byte1 << 8;
                        iTwoBytes |= unchecked((byte)byte2);
                        if (!buffer.AddChar(this.mapBytesToUnicode[iTwoBytes], 2))
                            break;

                        // We're done with it
                        byte1 = -1;
                        byte2 = -1;
                    }
                    else if (IsGBFourByteTrailing(byte2) &&
                             IsGBLeadByte(byte3) &&
                             IsGBFourByteTrailing(byte4))
                    {
                        //
                        // Four-byte GB18030
                        //

                        int sFourBytesOffset = GetFourBytesOffset(
                            byte1, byte2, byte3, byte4);

                        // What kind is it?
                        if (sFourBytesOffset <= GBLast4ByteCode)
                        {
                            //
                            // The Unicode will be in the BMP range.
                            //
                            if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4))
                                break;
                        }
                        else if (sFourBytesOffset >= GBSurrogateOffset &&
                                 sFourBytesOffset <= GBLastSurrogateOffset)
                        {
                            //
                            // This will be converted to a surrogate pair, need another char
                            //

                            // Use our surrogate
                            sFourBytesOffset -= GBSurrogateOffset;
                            if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))),
                                                unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4))
                                break;
                        }
                        else
                        {
                            // Real GB18030 codepoint, but can't be mapped to unicode
                            // We already checked our buffer space.
                            // Do fallback here if we impliment decoderfallbacks.
                            if (!buffer.Fallback((byte)byte1, (byte)byte2, (byte)byte3, (byte)byte4))
                                break;
                        }

                        // We're done with this one
                        byte1 = -1;
                        byte2 = -1;
                        byte3 = -1;
                        byte4 = -1;
                    }
                    else
                    {
                        // Not a valid sequence, use '?' for 1st byte & scoot them all down 1
                        if (!buffer.Fallback((byte)byte1))
                            break;

                        // Move all bytes down 1
                        byte1 = byte2;
                        byte2 = byte3;
                        byte3 = byte4;
                        byte4 = -1;
                    }
                }
            }

            // Loop, just do '?' replacement because we don't have fallbacks for decodings.
            while (buffer.MoreData)
            {
                byte ch = buffer.GetNextByte();

                // ASCII case is easy
                if (ch <= 0x7f)
                {
                    // ASCII, have room?
                    if (!buffer.AddChar((char)ch))
                        break;              // No room in convert buffer, so stop
                }
                // See if its a lead byte
                else if (IsGBLeadByte(ch))
                {
                    // ch is a lead byte, have room for more?
                    if (buffer.MoreData)
                    {
                        byte ch2 = buffer.GetNextByte();
                        if (IsGBTwoByteTrailing(ch2))
                        {
                            //
                            // The trailing byte is a GB18030 two-byte sequence trailing byte.
                            //

                            //
                            // Two-byte GB18030
                            //
                            int iTwoBytes = ch << 8;
                            iTwoBytes |= ch2;
                            if (!buffer.AddChar(this.mapBytesToUnicode[iTwoBytes], 2))
                                break;
                        }
                        else if (IsGBFourByteTrailing(ch2))
                        {
                            // Do we have room for Four Byte Sequence? (already have 1 byte)
                            if (buffer.EvenMoreData(2))
                            {
                                // Is it a valid 4 byte sequence?
                                byte ch3 = buffer.GetNextByte();
                                byte ch4 = buffer.GetNextByte();
                                if (IsGBLeadByte(ch3) &&
                                    IsGBFourByteTrailing(ch4))
                                {
                                    //
                                    // Four-byte GB18030
                                    //
                                    int sFourBytesOffset = GetFourBytesOffset(ch, ch2, ch3, ch4);

                                    // What kind is it?
                                    // We'll be at least 1 BMP char or a '?' char.

                                    if (sFourBytesOffset <= GBLast4ByteCode)
                                    {
                                        //
                                        // The Unicode will be in the BMP range.
                                        //
                                        if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset],4))
                                            break;
                                    }
                                    else if (sFourBytesOffset >= GBSurrogateOffset &&
                                             sFourBytesOffset <= GBLastSurrogateOffset)
                                    {
                                        //
                                        // This will be converted to a surrogate pair, need another char
                                        //

                                        // Use our surrogate
                                        sFourBytesOffset -= GBSurrogateOffset;
                                        if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))),
                                                             unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))),4))
                                            break;
                                    }
                                    else
                                    {
                                        // Real GB18030 codepoint, but can't be mapped to unicode
                                        if (!buffer.Fallback(ch, ch2, ch3, ch4))
                                            break;
                                    }
                                }
                                else
                                {
                                    // Not a valid 2 or 4 byte sequence, use '?' for ch and try other 3 again
                                    buffer.AdjustBytes(-3);
                                    if (!buffer.Fallback(ch))
                                        break;
                                }
                            }
                            else
                            {
                                // No room for 4 bytes, have 2 already, may be one more
                                // Lead byte but no place to stick it
                                if (decoder != null && !decoder.MustFlush)
                                {
                                    // (make sure not to set decoder if counting, so check chars)
                                    if (chars != null)
                                    {
                                        // We'll be able to stick the remainder in the decoder
                                        byte1 = ch;
                                        byte2 = ch2;

                                        if (buffer.MoreData)
                                            byte3 = buffer.GetNextByte();
                                        else
                                            byte3 = -1;

                                        byte4=-1;
                                    }
                                    break;
                                }

                                // Won't go in decoder, we'll use '?' for it.
                                if (!buffer.Fallback(ch, ch2))
                                    break;
                            }
                        }
                        else
                        {
                            // Unknown byte sequence, fall back lead byte and try 2nd one again
                            buffer.AdjustBytes(-1);
                            if (!buffer.Fallback(ch))
                                break;
                        }
                    }
                    else
                    {
                        // Lead byte but don't know about trail byte
                        // (make sure not to set decoder if counting, so check bytes)
                        if (decoder != null && !decoder.MustFlush)
                        {
                            // We'll be able to stick it in the decoder
                            // (don't actually do it when counting though)
                            if (chars != null)
                            {
                                byte1 = ch;
                                byte2 = -1;
                                byte3 = -1;
                                byte4 = -1;
                            }
                            break;
                        }

                        if (!buffer.Fallback(ch))
                            break;
                    }
                }
                else
                {
                    // Not ASCII and not a lead byte, we'll use '?' for it if we have room
                    if (!buffer.Fallback(ch))
                        break;
                }
            }

            // Need to flush the decoder if necessary
            // (make sure not to set decoder if counting, so check bytes)
            if (decoder != null)
            {
                if (chars != null)
                {
                    decoder.bLeftOver1 = byte1;
                    decoder.bLeftOver2 = byte2;
                    decoder.bLeftOver3 = byte3;
                    decoder.bLeftOver4 = byte4;
                }
                decoder.m_bytesUsed = buffer.BytesUsed;
            }

            // Return the # of characters we found
            return buffer.Count;
        }

        public override int GetMaxByteCount(int charCount)
        {
            if (charCount < 0)
               throw new ArgumentOutOfRangeException(nameof(charCount),
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
            Contract.EndContractBlock();

            // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
            long byteCount = (long)charCount + 1;

            if (EncoderFallback.MaxCharCount > 1)
                byteCount *= EncoderFallback.MaxCharCount;

            // We could have 4 bytes for each char, no extra for surrogates because 18030 can do whole unicode range.
            byteCount *= 4;

            if (byteCount > 0x7fffffff)
                throw new ArgumentOutOfRangeException(nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));

            return (int)byteCount;
        }

        public override int GetMaxCharCount(int byteCount)
        {
            if (byteCount < 0)
               throw new ArgumentOutOfRangeException(nameof(byteCount),
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
            Contract.EndContractBlock();

            // Just return length, we could have a single char for each byte + whatever extra our decoder could do to us.
            // If decoder is messed up it could spit out 3 ?s.
            long charCount = ((long)byteCount) + 3;

            // Take fallback size into consideration
            if (DecoderFallback.MaxCharCount > 1)
                charCount *= DecoderFallback.MaxCharCount;

            if (charCount > 0x7fffffff)
                throw new ArgumentOutOfRangeException(nameof(byteCount), Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));

            return (int)charCount;
        }

        public override Decoder GetDecoder()
        {
            return new GB18030Decoder(this);
        }

        [Serializable]
        internal sealed class GB18030Decoder : System.Text.DecoderNLS, ISerializable
        {
            internal short bLeftOver1 = -1;
            internal short bLeftOver2 = -1;
            internal short bLeftOver3 = -1;
            internal short bLeftOver4 = -1;

            internal GB18030Decoder(EncodingNLS encoding) : base(encoding)
            {
                // DecoderNLS Calls reset
            }

            // Constructor called by serialization, have to handle deserializing from Everett
            internal GB18030Decoder(SerializationInfo info, StreamingContext context)
            {
                // Any info?
                if (info==null) throw new ArgumentNullException(nameof(info));
                Contract.EndContractBlock();

                try
                {
                    //
                    // Try Whidbey V2.0 Fields
                    //
                    this.m_encoding = (Encoding)info.GetValue("m_encoding", typeof(Encoding));
                    this.m_fallback = (DecoderFallback)info.GetValue("m_fallback", typeof(DecoderFallback));
                    this.bLeftOver1 = (short)info.GetValue("bLeftOver1", typeof(short));
                    this.bLeftOver2 = (short)info.GetValue("bLeftOver2", typeof(short));
                    this.bLeftOver3 = (short)info.GetValue("bLeftOver3", typeof(short));
                    this.bLeftOver4 = (short)info.GetValue("bLeftOver4", typeof(short));
                }
                catch (SerializationException)
                {
                    // Didn't have Whidbey stuff, try Everett (DecoderNLS already called Reset())
                    this.m_encoding = new GB18030Encoding();
                }
            }

            // ISerializable implementation, get data for this object
            void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
            {
                // Any info?
                if (info==null) throw new ArgumentNullException(nameof(info));
                Contract.EndContractBlock();

                // Save Whidbey data
                // Just need Everett maxCharSize (BaseCodePageEncoding) or m_maxByteSize (MLangBaseCodePageEncoding)
                info.AddValue("m_encoding", this.m_encoding);
                info.AddValue("m_fallback", this.m_fallback);
                info.AddValue("bLeftOver1", this.bLeftOver1);
                info.AddValue("bLeftOver2", this.bLeftOver2);
                info.AddValue("bLeftOver3", this.bLeftOver3);
                info.AddValue("bLeftOver4", this.bLeftOver4);

                // Everett needs different data (this is just empty for Everett)
                info.AddValue("m_leftOverBytes", (int)0);
                info.AddValue("leftOver", new byte[8]);
            }

            public override void Reset()
            {
                bLeftOver1 = -1;
                bLeftOver2 = -1;
                bLeftOver3 = -1;
                bLeftOver4 = -1;
                if (m_fallbackBuffer != null)
                    m_fallbackBuffer.Reset();
            }

            // Anything left in our decoder?
            internal override bool HasState
            {
                get
                {
                    return (this.bLeftOver1 >= 0);
                }
            }
        }

        // tableUnicodeToGBDiffs
        //
        // This compressed data enumerates the differences between gb18030 and the 936 code page as follows:
        //
        // <count> & 0x8000 == 0x8000      The next count <count> characters are identical to 936 characters.
        // <count> & 0x8000 == 0x0000      The next count <count> characters are 4 byte gb18030 characters.
        // Except for:
        // <count> >= 0x9000 && <count> != 0xD1A6     This character is this 2 byte GB18030 value.
        //
        readonly ushort[] tableUnicodeToGBDiffs =
        {
            0x8080, // U+0000 - U+007F (  128 chars) use CP 936 conversion.
            0x0024, // U+0080 - U+00A3 (   36 chars) are GB18030 81 30 81 30 - 81 30 84 35 (offset 0000 - 0023)
            0x8001, // U+00A4 - U+00A4 (    1 chars) use CP 936 conversion.
            0x0002, // U+00A5 - U+00A6 (    2 chars) are GB18030 81 30 84 36 - 81 30 84 37 (offset 0024 - 0025)
            0x8002, // U+00A7 - U+00A8 (    2 chars) use CP 936 conversion.
            0x0007, // U+00A9 - U+00AF (    7 chars) are GB18030 81 30 84 38 - 81 30 85 34 (offset 0026 - 002C)
            0x8002, // U+00B0 - U+00B1 (    2 chars) use CP 936 conversion.
            0x0005, // U+00B2 - U+00B6 (    5 chars) are GB18030 81 30 85 35 - 81 30 85 39 (offset 002D - 0031)
            0x8001, // U+00B7 - U+00B7 (    1 chars) use CP 936 conversion.
            0x001F, // U+00B8 - U+00D6 (   31 chars) are GB18030 81 30 86 30 - 81 30 89 30 (offset 0032 - 0050)
            0x8001, // U+00D7 - U+00D7 (    1 chars) use CP 936 conversion.
            0x0008, // U+00D8 - U+00DF (    8 chars) are GB18030 81 30 89 31 - 81 30 89 38 (offset 0051 - 0058)
            0x8002, // U+00E0 - U+00E1 (    2 chars) use CP 936 conversion.
            0x0006, // U+00E2 - U+00E7 (    6 chars) are GB18030 81 30 89 39 - 81 30 8A 34 (offset 0059 - 005E)
            0x8003, // U+00E8 - U+00EA (    3 chars) use CP 936 conversion.
            0x0001, // U+00EB - U+00EB (    1 chars) are GB18030 81 30 8A 35 - 81 30 8A 35 (offset 005F - 005F)
            0x8002, // U+00EC - U+00ED (    2 chars) use CP 936 conversion.
            0x0004, // U+00EE - U+00F1 (    4 chars) are GB18030 81 30 8A 36 - 81 30 8A 39 (offset 0060 - 0063)
            0x8002, // U+00F2 - U+00F3 (    2 chars) use CP 936 conversion.
            0x0003, // U+00F4 - U+00F6 (    3 chars) are GB18030 81 30 8B 30 - 81 30 8B 32 (offset 0064 - 0066)
            0x8001, // U+00F7 - U+00F7 (    1 chars) use CP 936 conversion.
            0x0001, // U+00F8 - U+00F8 (    1 chars) are GB18030 81 30 8B 33 - 81 30 8B 33 (offset 0067 - 0067)
            0x8002, // U+00F9 - U+00FA (    2 chars) use CP 936 conversion.
            0x0001, // U+00FB - U+00FB (    1 chars) are GB18030 81 30 8B 34 - 81 30 8B 34 (offset 0068 - 0068)
            0x8001, // U+00FC - U+00FC (    1 chars) use CP 936 conversion.
            0x0004, // U+00FD - U+0100 (    4 chars) are GB18030 81 30 8B 35 - 81 30 8B 38 (offset 0069 - 006C)
            0x8001, // U+0101 - U+0101 (    1 chars) use CP 936 conversion.
            0x0011, // U+0102 - U+0112 (   17 chars) are GB18030 81 30 8B 39 - 81 30 8D 35 (offset 006D - 007D)
            0x8001, // U+0113 - U+0113 (    1 chars) use CP 936 conversion.
            0x0007, // U+0114 - U+011A (    7 chars) are GB18030 81 30 8D 36 - 81 30 8E 32 (offset 007E - 0084)
            0x8001, // U+011B - U+011B (    1 chars) use CP 936 conversion.
            0x000F, // U+011C - U+012A (   15 chars) are GB18030 81 30 8E 33 - 81 30 8F 37 (offset 0085 - 0093)
            0x8001, // U+012B - U+012B (    1 chars) use CP 936 conversion.
            0x0018, // U+012C - U+0143 (   24 chars) are GB18030 81 30 8F 38 - 81 30 92 31 (offset 0094 - 00AB)
            0x8001, // U+0144 - U+0144 (    1 chars) use CP 936 conversion.
            0x0003, // U+0145 - U+0147 (    3 chars) are GB18030 81 30 92 32 - 81 30 92 34 (offset 00AC - 00AE)
            0x8001, // U+0148 - U+0148 (    1 chars) use CP 936 conversion.
            0x0004, // U+0149 - U+014C (    4 chars) are GB18030 81 30 92 35 - 81 30 92 38 (offset 00AF - 00B2)
            0x8001, // U+014D - U+014D (    1 chars) use CP 936 conversion.
            0x001D, // U+014E - U+016A (   29 chars) are GB18030 81 30 92 39 - 81 30 95 37 (offset 00B3 - 00CF)
            0x8001, // U+016B - U+016B (    1 chars) use CP 936 conversion.
            0x0062, // U+016C - U+01CD (   98 chars) are GB18030 81 30 95 38 - 81 30 9F 35 (offset 00D0 - 0131)
            0x8001, // U+01CE - U+01CE (    1 chars) use CP 936 conversion.
            0x0001, // U+01CF - U+01CF (    1 chars) are GB18030 81 30 9F 36 - 81 30 9F 36 (offset 0132 - 0132)
            0x8001, // U+01D0 - U+01D0 (    1 chars) use CP 936 conversion.
            0x0001, // U+01D1 - U+01D1 (    1 chars) are GB18030 81 30 9F 37 - 81 30 9F 37 (offset 0133 - 0133)
            0x8001, // U+01D2 - U+01D2 (    1 chars) use CP 936 conversion.
            0x0001, // U+01D3 - U+01D3 (    1 chars) are GB18030 81 30 9F 38 - 81 30 9F 38 (offset 0134 - 0134)
            0x8001, // U+01D4 - U+01D4 (    1 chars) use CP 936 conversion.
            0x0001, // U+01D5 - U+01D5 (    1 chars) are GB18030 81 30 9F 39 - 81 30 9F 39 (offset 0135 - 0135)
            0x8001, // U+01D6 - U+01D6 (    1 chars) use CP 936 conversion.
            0x0001, // U+01D7 - U+01D7 (    1 chars) are GB18030 81 30 A0 30 - 81 30 A0 30 (offset 0136 - 0136)
            0x8001, // U+01D8 - U+01D8 (    1 chars) use CP 936 conversion.
            0x0001, // U+01D9 - U+01D9 (    1 chars) are GB18030 81 30 A0 31 - 81 30 A0 31 (offset 0137 - 0137)
            0x8001, // U+01DA - U+01DA (    1 chars) use CP 936 conversion.
            0x0001, // U+01DB - U+01DB (    1 chars) are GB18030 81 30 A0 32 - 81 30 A0 32 (offset 0138 - 0138)
            0x8001, // U+01DC - U+01DC (    1 chars) use CP 936 conversion.
            0x001C, // U+01DD - U+01F8 (   28 chars) are GB18030 81 30 A0 33 - 81 30 A3 30 (offset 0139 - 0154)
            0xA8BF, // U+01F9 is non-936 GB18030 value A8 BF.
            0x0057, // U+01FA - U+0250 (   87 chars) are GB18030 81 30 A3 31 - 81 30 AB 37 (offset 0155 - 01AB)
            0x8001, // U+0251 - U+0251 (    1 chars) use CP 936 conversion.
            0x000F, // U+0252 - U+0260 (   15 chars) are GB18030 81 30 AB 38 - 81 30 AD 32 (offset 01AC - 01BA)
            0x8001, // U+0261 - U+0261 (    1 chars) use CP 936 conversion.
            0x0065, // U+0262 - U+02C6 (  101 chars) are GB18030 81 30 AD 33 - 81 30 B7 33 (offset 01BB - 021F)
            0x8001, // U+02C7 - U+02C7 (    1 chars) use CP 936 conversion.
            0x0001, // U+02C8 - U+02C8 (    1 chars) are GB18030 81 30 B7 34 - 81 30 B7 34 (offset 0220 - 0220)
            0x8003, // U+02C9 - U+02CB (    3 chars) use CP 936 conversion.
            0x000D, // U+02CC - U+02D8 (   13 chars) are GB18030 81 30 B7 35 - 81 30 B8 37 (offset 0221 - 022D)
            0x8001, // U+02D9 - U+02D9 (    1 chars) use CP 936 conversion.
            0x00B7, // U+02DA - U+0390 (  183 chars) are GB18030 81 30 B8 38 - 81 30 CB 30 (offset 022E - 02E4)
            0x8011, // U+0391 - U+03A1 (   17 chars) use CP 936 conversion.
            0x0001, // U+03A2 - U+03A2 (    1 chars) are GB18030 81 30 CB 31 - 81 30 CB 31 (offset 02E5 - 02E5)
            0x8007, // U+03A3 - U+03A9 (    7 chars) use CP 936 conversion.
            0x0007, // U+03AA - U+03B0 (    7 chars) are GB18030 81 30 CB 32 - 81 30 CB 38 (offset 02E6 - 02EC)
            0x8011, // U+03B1 - U+03C1 (   17 chars) use CP 936 conversion.
            0x0001, // U+03C2 - U+03C2 (    1 chars) are GB18030 81 30 CB 39 - 81 30 CB 39 (offset 02ED - 02ED)
            0x8007, // U+03C3 - U+03C9 (    7 chars) use CP 936 conversion.
            0x0037, // U+03CA - U+0400 (   55 chars) are GB18030 81 30 CC 30 - 81 30 D1 34 (offset 02EE - 0324)
            0x8001, // U+0401 - U+0401 (    1 chars) use CP 936 conversion.
            0x000E, // U+0402 - U+040F (   14 chars) are GB18030 81 30 D1 35 - 81 30 D2 38 (offset 0325 - 0332)
            0x8040, // U+0410 - U+044F (   64 chars) use CP 936 conversion.
            0x0001, // U+0450 - U+0450 (    1 chars) are GB18030 81 30 D2 39 - 81 30 D2 39 (offset 0333 - 0333)
            0x8001, // U+0451 - U+0451 (    1 chars) use CP 936 conversion.
            0x1BBE, // U+0452 - U+200F ( 7102 chars) are GB18030 81 30 D3 30 - 81 36 A5 31 (offset 0334 - 1EF1)
            0x8001, // U+2010 - U+2010 (    1 chars) use CP 936 conversion.
            0x0002, // U+2011 - U+2012 (    2 chars) are GB18030 81 36 A5 32 - 81 36 A5 33 (offset 1EF2 - 1EF3)
            0x8004, // U+2013 - U+2016 (    4 chars) use CP 936 conversion.
            0x0001, // U+2017 - U+2017 (    1 chars) are GB18030 81 36 A5 34 - 81 36 A5 34 (offset 1EF4 - 1EF4)
            0x8002, // U+2018 - U+2019 (    2 chars) use CP 936 conversion.
            0x0002, // U+201A - U+201B (    2 chars) are GB18030 81 36 A5 35 - 81 36 A5 36 (offset 1EF5 - 1EF6)
            0x8002, // U+201C - U+201D (    2 chars) use CP 936 conversion.
            0x0007, // U+201E - U+2024 (    7 chars) are GB18030 81 36 A5 37 - 81 36 A6 33 (offset 1EF7 - 1EFD)
            0x8002, // U+2025 - U+2026 (    2 chars) use CP 936 conversion.
            0x0009, // U+2027 - U+202F (    9 chars) are GB18030 81 36 A6 34 - 81 36 A7 32 (offset 1EFE - 1F06)
            0x8001, // U+2030 - U+2030 (    1 chars) use CP 936 conversion.
            0x0001, // U+2031 - U+2031 (    1 chars) are GB18030 81 36 A7 33 - 81 36 A7 33 (offset 1F07 - 1F07)
            0x8002, // U+2032 - U+2033 (    2 chars) use CP 936 conversion.
            0x0001, // U+2034 - U+2034 (    1 chars) are GB18030 81 36 A7 34 - 81 36 A7 34 (offset 1F08 - 1F08)
            0x8001, // U+2035 - U+2035 (    1 chars) use CP 936 conversion.
            0x0005, // U+2036 - U+203A (    5 chars) are GB18030 81 36 A7 35 - 81 36 A7 39 (offset 1F09 - 1F0D)
            0x8001, // U+203B - U+203B (    1 chars) use CP 936 conversion.
            0x0070, // U+203C - U+20AB (  112 chars) are GB18030 81 36 A8 30 - 81 36 B3 31 (offset 1F0E - 1F7D)
            0xA2E3, // U+20AC is non-936 GB18030 value A2 E3.
            0x0056, // U+20AD - U+2102 (   86 chars) are GB18030 81 36 B3 32 - 81 36 BB 37 (offset 1F7E - 1FD3)
            0x8001, // U+2103 - U+2103 (    1 chars) use CP 936 conversion.
            0x0001, // U+2104 - U+2104 (    1 chars) are GB18030 81 36 BB 38 - 81 36 BB 38 (offset 1FD4 - 1FD4)
            0x8001, // U+2105 - U+2105 (    1 chars) use CP 936 conversion.
            0x0003, // U+2106 - U+2108 (    3 chars) are GB18030 81 36 BB 39 - 81 36 BC 31 (offset 1FD5 - 1FD7)
            0x8001, // U+2109 - U+2109 (    1 chars) use CP 936 conversion.
            0x000C, // U+210A - U+2115 (   12 chars) are GB18030 81 36 BC 32 - 81 36 BD 33 (offset 1FD8 - 1FE3)
            0x8001, // U+2116 - U+2116 (    1 chars) use CP 936 conversion.
            0x000A, // U+2117 - U+2120 (   10 chars) are GB18030 81 36 BD 34 - 81 36 BE 33 (offset 1FE4 - 1FED)
            0x8001, // U+2121 - U+2121 (    1 chars) use CP 936 conversion.
            0x003E, // U+2122 - U+215F (   62 chars) are GB18030 81 36 BE 34 - 81 36 C4 35 (offset 1FEE - 202B)
            0x800C, // U+2160 - U+216B (   12 chars) use CP 936 conversion.
            0x0004, // U+216C - U+216F (    4 chars) are GB18030 81 36 C4 36 - 81 36 C4 39 (offset 202C - 202F)
            0x800A, // U+2170 - U+2179 (   10 chars) use CP 936 conversion.
            0x0016, // U+217A - U+218F (   22 chars) are GB18030 81 36 C5 30 - 81 36 C7 31 (offset 2030 - 2045)
            0x8004, // U+2190 - U+2193 (    4 chars) use CP 936 conversion.
            0x0002, // U+2194 - U+2195 (    2 chars) are GB18030 81 36 C7 32 - 81 36 C7 33 (offset 2046 - 2047)
            0x8004, // U+2196 - U+2199 (    4 chars) use CP 936 conversion.
            0x006E, // U+219A - U+2207 (  110 chars) are GB18030 81 36 C7 34 - 81 36 D2 33 (offset 2048 - 20B5)
            0x8001, // U+2208 - U+2208 (    1 chars) use CP 936 conversion.
            0x0006, // U+2209 - U+220E (    6 chars) are GB18030 81 36 D2 34 - 81 36 D2 39 (offset 20B6 - 20BB)
            0x8001, // U+220F - U+220F (    1 chars) use CP 936 conversion.
            0x0001, // U+2210 - U+2210 (    1 chars) are GB18030 81 36 D3 30 - 81 36 D3 30 (offset 20BC - 20BC)
            0x8001, // U+2211 - U+2211 (    1 chars) use CP 936 conversion.
            0x0003, // U+2212 - U+2214 (    3 chars) are GB18030 81 36 D3 31 - 81 36 D3 33 (offset 20BD - 20BF)
            0x8001, // U+2215 - U+2215 (    1 chars) use CP 936 conversion.
            0x0004, // U+2216 - U+2219 (    4 chars) are GB18030 81 36 D3 34 - 81 36 D3 37 (offset 20C0 - 20C3)
            0x8001, // U+221A - U+221A (    1 chars) use CP 936 conversion.
            0x0002, // U+221B - U+221C (    2 chars) are GB18030 81 36 D3 38 - 81 36 D3 39 (offset 20C4 - 20C5)
            0x8004, // U+221D - U+2220 (    4 chars) use CP 936 conversion.
            0x0002, // U+2221 - U+2222 (    2 chars) are GB18030 81 36 D4 30 - 81 36 D4 31 (offset 20C6 - 20C7)
            0x8001, // U+2223 - U+2223 (    1 chars) use CP 936 conversion.
            0x0001, // U+2224 - U+2224 (    1 chars) are GB18030 81 36 D4 32 - 81 36 D4 32 (offset 20C8 - 20C8)
            0x8001, // U+2225 - U+2225 (    1 chars) use CP 936 conversion.
            0x0001, // U+2226 - U+2226 (    1 chars) are GB18030 81 36 D4 33 - 81 36 D4 33 (offset 20C9 - 20C9)
            0x8005, // U+2227 - U+222B (    5 chars) use CP 936 conversion.
            0x0002, // U+222C - U+222D (    2 chars) are GB18030 81 36 D4 34 - 81 36 D4 35 (offset 20CA - 20CB)
            0x8001, // U+222E - U+222E (    1 chars) use CP 936 conversion.
            0x0005, // U+222F - U+2233 (    5 chars) are GB18030 81 36 D4 36 - 81 36 D5 30 (offset 20CC - 20D0)
            0x8004, // U+2234 - U+2237 (    4 chars) use CP 936 conversion.
            0x0005, // U+2238 - U+223C (    5 chars) are GB18030 81 36 D5 31 - 81 36 D5 35 (offset 20D1 - 20D5)
            0x8001, // U+223D - U+223D (    1 chars) use CP 936 conversion.
            0x000A, // U+223E - U+2247 (   10 chars) are GB18030 81 36 D5 36 - 81 36 D6 35 (offset 20D6 - 20DF)
            0x8001, // U+2248 - U+2248 (    1 chars) use CP 936 conversion.
            0x0003, // U+2249 - U+224B (    3 chars) are GB18030 81 36 D6 36 - 81 36 D6 38 (offset 20E0 - 20E2)
            0x8001, // U+224C - U+224C (    1 chars) use CP 936 conversion.
            0x0005, // U+224D - U+2251 (    5 chars) are GB18030 81 36 D6 39 - 81 36 D7 33 (offset 20E3 - 20E7)
            0x8001, // U+2252 - U+2252 (    1 chars) use CP 936 conversion.
            0x000D, // U+2253 - U+225F (   13 chars) are GB18030 81 36 D7 34 - 81 36 D8 36 (offset 20E8 - 20F4)
            0x8002, // U+2260 - U+2261 (    2 chars) use CP 936 conversion.
            0x0002, // U+2262 - U+2263 (    2 chars) are GB18030 81 36 D8 37 - 81 36 D8 38 (offset 20F5 - 20F6)
            0x8004, // U+2264 - U+2267 (    4 chars) use CP 936 conversion.
            0x0006, // U+2268 - U+226D (    6 chars) are GB18030 81 36 D8 39 - 81 36 D9 34 (offset 20F7 - 20FC)
            0x8002, // U+226E - U+226F (    2 chars) use CP 936 conversion.
            0x0025, // U+2270 - U+2294 (   37 chars) are GB18030 81 36 D9 35 - 81 36 DD 31 (offset 20FD - 2121)
            0x8001, // U+2295 - U+2295 (    1 chars) use CP 936 conversion.
            0x0003, // U+2296 - U+2298 (    3 chars) are GB18030 81 36 DD 32 - 81 36 DD 34 (offset 2122 - 2124)
            0x8001, // U+2299 - U+2299 (    1 chars) use CP 936 conversion.
            0x000B, // U+229A - U+22A4 (   11 chars) are GB18030 81 36 DD 35 - 81 36 DE 35 (offset 2125 - 212F)
            0x8001, // U+22A5 - U+22A5 (    1 chars) use CP 936 conversion.
            0x0019, // U+22A6 - U+22BE (   25 chars) are GB18030 81 36 DE 36 - 81 36 E1 30 (offset 2130 - 2148)
            0x8001, // U+22BF - U+22BF (    1 chars) use CP 936 conversion.
            0x0052, // U+22C0 - U+2311 (   82 chars) are GB18030 81 36 E1 31 - 81 36 E9 32 (offset 2149 - 219A)
            0x8001, // U+2312 - U+2312 (    1 chars) use CP 936 conversion.
            0x014D, // U+2313 - U+245F (  333 chars) are GB18030 81 36 E9 33 - 81 37 8C 35 (offset 219B - 22E7)
            0x800A, // U+2460 - U+2469 (   10 chars) use CP 936 conversion.
            0x000A, // U+246A - U+2473 (   10 chars) are GB18030 81 37 8C 36 - 81 37 8D 35 (offset 22E8 - 22F1)
            0x8028, // U+2474 - U+249B (   40 chars) use CP 936 conversion.
            0x0064, // U+249C - U+24FF (  100 chars) are GB18030 81 37 8D 36 - 81 37 97 35 (offset 22F2 - 2355)
            0x804C, // U+2500 - U+254B (   76 chars) use CP 936 conversion.
            0x0004, // U+254C - U+254F (    4 chars) are GB18030 81 37 97 36 - 81 37 97 39 (offset 2356 - 2359)
            0x8024, // U+2550 - U+2573 (   36 chars) use CP 936 conversion.
            0x000D, // U+2574 - U+2580 (   13 chars) are GB18030 81 37 98 30 - 81 37 99 32 (offset 235A - 2366)
            0x800F, // U+2581 - U+258F (   15 chars) use CP 936 conversion.
            0x0003, // U+2590 - U+2592 (    3 chars) are GB18030 81 37 99 33 - 81 37 99 35 (offset 2367 - 2369)
            0x8003, // U+2593 - U+2595 (    3 chars) use CP 936 conversion.
            0x000A, // U+2596 - U+259F (   10 chars) are GB18030 81 37 99 36 - 81 37 9A 35 (offset 236A - 2373)
            0x8002, // U+25A0 - U+25A1 (    2 chars) use CP 936 conversion.
            0x0010, // U+25A2 - U+25B1 (   16 chars) are GB18030 81 37 9A 36 - 81 37 9C 31 (offset 2374 - 2383)
            0x8002, // U+25B2 - U+25B3 (    2 chars) use CP 936 conversion.
            0x0008, // U+25B4 - U+25BB (    8 chars) are GB18030 81 37 9C 32 - 81 37 9C 39 (offset 2384 - 238B)
            0x8002, // U+25BC - U+25BD (    2 chars) use CP 936 conversion.
            0x0008, // U+25BE - U+25C5 (    8 chars) are GB18030 81 37 9D 30 - 81 37 9D 37 (offset 238C - 2393)
            0x8002, // U+25C6 - U+25C7 (    2 chars) use CP 936 conversion.
            0x0003, // U+25C8 - U+25CA (    3 chars) are GB18030 81 37 9D 38 - 81 37 9E 30 (offset 2394 - 2396)
            0x8001, // U+25CB - U+25CB (    1 chars) use CP 936 conversion.
            0x0002, // U+25CC - U+25CD (    2 chars) are GB18030 81 37 9E 31 - 81 37 9E 32 (offset 2397 - 2398)
            0x8002, // U+25CE - U+25CF (    2 chars) use CP 936 conversion.
            0x0012, // U+25D0 - U+25E1 (   18 chars) are GB18030 81 37 9E 33 - 81 37 A0 30 (offset 2399 - 23AA)
            0x8004, // U+25E2 - U+25E5 (    4 chars) use CP 936 conversion.
            0x001F, // U+25E6 - U+2604 (   31 chars) are GB18030 81 37 A0 31 - 81 37 A3 31 (offset 23AB - 23C9)
            0x8002, // U+2605 - U+2606 (    2 chars) use CP 936 conversion.
            0x0002, // U+2607 - U+2608 (    2 chars) are GB18030 81 37 A3 32 - 81 37 A3 33 (offset 23CA - 23CB)
            0x8001, // U+2609 - U+2609 (    1 chars) use CP 936 conversion.
            0x0036, // U+260A - U+263F (   54 chars) are GB18030 81 37 A3 34 - 81 37 A8 37 (offset 23CC - 2401)
            0x8001, // U+2640 - U+2640 (    1 chars) use CP 936 conversion.
            0x0001, // U+2641 - U+2641 (    1 chars) are GB18030 81 37 A8 38 - 81 37 A8 38 (offset 2402 - 2402)
            0x8001, // U+2642 - U+2642 (    1 chars) use CP 936 conversion.
            0x083E, // U+2643 - U+2E80 ( 2110 chars) are GB18030 81 37 A8 39 - 81 38 FD 38 (offset 2403 - 2C40)
            0xFE50, // U+2E81 is non-936 GB18030 value FE 50.
            0x0002, // U+2E82 - U+2E83 (    2 chars) are GB18030 81 38 FD 39 - 81 38 FE 30 (offset 2C41 - 2C42)
            0xFE54, // U+2E84 is non-936 GB18030 value FE 54.
            0x0003, // U+2E85 - U+2E87 (    3 chars) are GB18030 81 38 FE 31 - 81 38 FE 33 (offset 2C43 - 2C45)
            0xFE57, // U+2E88 is non-936 GB18030 value FE 57.
            0x0002, // U+2E89 - U+2E8A (    2 chars) are GB18030 81 38 FE 34 - 81 38 FE 35 (offset 2C46 - 2C47)
            0xFE58, // U+2E8B is non-936 GB18030 value FE 58.
            0xFE5D, // U+2E8C is non-936 GB18030 value FE 5D.
            0x000A, // U+2E8D - U+2E96 (   10 chars) are GB18030 81 38 FE 36 - 81 39 81 35 (offset 2C48 - 2C51)
            0xFE5E, // U+2E97 is non-936 GB18030 value FE 5E.
            0x000F, // U+2E98 - U+2EA6 (   15 chars) are GB18030 81 39 81 36 - 81 39 83 30 (offset 2C52 - 2C60)
            0xFE6B, // U+2EA7 is non-936 GB18030 value FE 6B.
            0x0002, // U+2EA8 - U+2EA9 (    2 chars) are GB18030 81 39 83 31 - 81 39 83 32 (offset 2C61 - 2C62)
            0xFE6E, // U+2EAA is non-936 GB18030 value FE 6E.
            0x0003, // U+2EAB - U+2EAD (    3 chars) are GB18030 81 39 83 33 - 81 39 83 35 (offset 2C63 - 2C65)
            0xFE71, // U+2EAE is non-936 GB18030 value FE 71.
            0x0004, // U+2EAF - U+2EB2 (    4 chars) are GB18030 81 39 83 36 - 81 39 83 39 (offset 2C66 - 2C69)
            0xFE73, // U+2EB3 is non-936 GB18030 value FE 73.
            0x0002, // U+2EB4 - U+2EB5 (    2 chars) are GB18030 81 39 84 30 - 81 39 84 31 (offset 2C6A - 2C6B)
            0xFE74, // U+2EB6 is non-936 GB18030 value FE 74.
            0xFE75, // U+2EB7 is non-936 GB18030 value FE 75.
            0x0003, // U+2EB8 - U+2EBA (    3 chars) are GB18030 81 39 84 32 - 81 39 84 34 (offset 2C6C - 2C6E)
            0xFE79, // U+2EBB is non-936 GB18030 value FE 79.
            0x000E, // U+2EBC - U+2EC9 (   14 chars) are GB18030 81 39 84 35 - 81 39 85 38 (offset 2C6F - 2C7C)
            0xFE84, // U+2ECA is non-936 GB18030 value FE 84.
            0x0125, // U+2ECB - U+2FEF (  293 chars) are GB18030 81 39 85 39 - 81 39 A3 31 (offset 2C7D - 2DA1)
            0xA98A, // U+2FF0 is non-936 GB18030 value A9 8A.
            0xA98B, // U+2FF1 is non-936 GB18030 value A9 8B.
            0xA98C, // U+2FF2 is non-936 GB18030 value A9 8C.
            0xA98D, // U+2FF3 is non-936 GB18030 value A9 8D.
            0xA98E, // U+2FF4 is non-936 GB18030 value A9 8E.
            0xA98F, // U+2FF5 is non-936 GB18030 value A9 8F.
            0xA990, // U+2FF6 is non-936 GB18030 value A9 90.
            0xA991, // U+2FF7 is non-936 GB18030 value A9 91.
            0xA992, // U+2FF8 is non-936 GB18030 value A9 92.
            0xA993, // U+2FF9 is non-936 GB18030 value A9 93.
            0xA994, // U+2FFA is non-936 GB18030 value A9 94.
            0xA995, // U+2FFB is non-936 GB18030 value A9 95.
            0x0004, // U+2FFC - U+2FFF (    4 chars) are GB18030 81 39 A3 32 - 81 39 A3 35 (offset 2DA2 - 2DA5)
            0x8004, // U+3000 - U+3003 (    4 chars) use CP 936 conversion.
            0x0001, // U+3004 - U+3004 (    1 chars) are GB18030 81 39 A3 36 - 81 39 A3 36 (offset 2DA6 - 2DA6)
            0x8013, // U+3005 - U+3017 (   19 chars) use CP 936 conversion.
            0x0005, // U+3018 - U+301C (    5 chars) are GB18030 81 39 A3 37 - 81 39 A4 31 (offset 2DA7 - 2DAB)
            0x8002, // U+301D - U+301E (    2 chars) use CP 936 conversion.
            0x0002, // U+301F - U+3020 (    2 chars) are GB18030 81 39 A4 32 - 81 39 A4 33 (offset 2DAC - 2DAD)
            0x8009, // U+3021 - U+3029 (    9 chars) use CP 936 conversion.
            0x0014, // U+302A - U+303D (   20 chars) are GB18030 81 39 A4 34 - 81 39 A6 33 (offset 2DAE - 2DC1)
            0xA989, // U+303E is non-936 GB18030 value A9 89.
            0x0002, // U+303F - U+3040 (    2 chars) are GB18030 81 39 A6 34 - 81 39 A6 35 (offset 2DC2 - 2DC3)
            0x8053, // U+3041 - U+3093 (   83 chars) use CP 936 conversion.
            0x0007, // U+3094 - U+309A (    7 chars) are GB18030 81 39 A6 36 - 81 39 A7 32 (offset 2DC4 - 2DCA)
            0x8004, // U+309B - U+309E (    4 chars) use CP 936 conversion.
            0x0002, // U+309F - U+30A0 (    2 chars) are GB18030 81 39 A7 33 - 81 39 A7 34 (offset 2DCB - 2DCC)
            0x8056, // U+30A1 - U+30F6 (   86 chars) use CP 936 conversion.
            0x0005, // U+30F7 - U+30FB (    5 chars) are GB18030 81 39 A7 35 - 81 39 A7 39 (offset 2DCD - 2DD1)
            0x8003, // U+30FC - U+30FE (    3 chars) use CP 936 conversion.
            0x0006, // U+30FF - U+3104 (    6 chars) are GB18030 81 39 A8 30 - 81 39 A8 35 (offset 2DD2 - 2DD7)
            0x8025, // U+3105 - U+3129 (   37 chars) use CP 936 conversion.
            0x00F6, // U+312A - U+321F (  246 chars) are GB18030 81 39 A8 36 - 81 39 C1 31 (offset 2DD8 - 2ECD)
            0x800A, // U+3220 - U+3229 (   10 chars) use CP 936 conversion.
            0x0007, // U+322A - U+3230 (    7 chars) are GB18030 81 39 C1 32 - 81 39 C1 38 (offset 2ECE - 2ED4)
            0x8001, // U+3231 - U+3231 (    1 chars) use CP 936 conversion.
            0x0071, // U+3232 - U+32A2 (  113 chars) are GB18030 81 39 C1 39 - 81 39 CD 31 (offset 2ED5 - 2F45)
            0x8001, // U+32A3 - U+32A3 (    1 chars) use CP 936 conversion.
            0x00EA, // U+32A4 - U+338D (  234 chars) are GB18030 81 39 CD 32 - 81 39 E4 35 (offset 2F46 - 302F)
            0x8002, // U+338E - U+338F (    2 chars) use CP 936 conversion.
            0x000C, // U+3390 - U+339B (   12 chars) are GB18030 81 39 E4 36 - 81 39 E5 37 (offset 3030 - 303B)
            0x8003, // U+339C - U+339E (    3 chars) use CP 936 conversion.
            0x0002, // U+339F - U+33A0 (    2 chars) are GB18030 81 39 E5 38 - 81 39 E5 39 (offset 303C - 303D)
            0x8001, // U+33A1 - U+33A1 (    1 chars) use CP 936 conversion.
            0x0022, // U+33A2 - U+33C3 (   34 chars) are GB18030 81 39 E6 30 - 81 39 E9 33 (offset 303E - 305F)
            0x8001, // U+33C4 - U+33C4 (    1 chars) use CP 936 conversion.
            0x0009, // U+33C5 - U+33CD (    9 chars) are GB18030 81 39 E9 34 - 81 39 EA 32 (offset 3060 - 3068)
            0x8001, // U+33CE - U+33CE (    1 chars) use CP 936 conversion.
            0x0002, // U+33CF - U+33D0 (    2 chars) are GB18030 81 39 EA 33 - 81 39 EA 34 (offset 3069 - 306A)
            0x8002, // U+33D1 - U+33D2 (    2 chars) use CP 936 conversion.
            0x0002, // U+33D3 - U+33D4 (    2 chars) are GB18030 81 39 EA 35 - 81 39 EA 36 (offset 306B - 306C)
            0x8001, // U+33D5 - U+33D5 (    1 chars) use CP 936 conversion.
            0x0071, // U+33D6 - U+3446 (  113 chars) are GB18030 81 39 EA 37 - 81 39 F5 39 (offset 306D - 30DD)
            0xFE56, // U+3447 is non-936 GB18030 value FE 56.
            0x002B, // U+3448 - U+3472 (   43 chars) are GB18030 81 39 F6 30 - 81 39 FA 32 (offset 30DE - 3108)
            0xFE55, // U+3473 is non-936 GB18030 value FE 55.
            0x012A, // U+3474 - U+359D (  298 chars) are GB18030 81 39 FA 33 - 82 30 9A 30 (offset 3109 - 3232)
            0xFE5A, // U+359E is non-936 GB18030 value FE 5A.
            0x006F, // U+359F - U+360D (  111 chars) are GB18030 82 30 9A 31 - 82 30 A5 31 (offset 3233 - 32A1)
            0xFE5C, // U+360E is non-936 GB18030 value FE 5C.
            0x000B, // U+360F - U+3619 (   11 chars) are GB18030 82 30 A5 32 - 82 30 A6 32 (offset 32A2 - 32AC)
            0xFE5B, // U+361A is non-936 GB18030 value FE 5B.
            0x02FD, // U+361B - U+3917 (  765 chars) are GB18030 82 30 A6 33 - 82 30 F2 37 (offset 32AD - 35A9)
            0xFE60, // U+3918 is non-936 GB18030 value FE 60.
            0x0055, // U+3919 - U+396D (   85 chars) are GB18030 82 30 F2 38 - 82 30 FB 32 (offset 35AA - 35FE)
            0xFE5F, // U+396E is non-936 GB18030 value FE 5F.
            0x0060, // U+396F - U+39CE (   96 chars) are GB18030 82 30 FB 33 - 82 31 86 38 (offset 35FF - 365E)
            0xFE62, // U+39CF is non-936 GB18030 value FE 62.
            0xFE65, // U+39D0 is non-936 GB18030 value FE 65.
            0x000E, // U+39D1 - U+39DE (   14 chars) are GB18030 82 31 86 39 - 82 31 88 32 (offset 365F - 366C)
            0xFE63, // U+39DF is non-936 GB18030 value FE 63.
            0x0093, // U+39E0 - U+3A72 (  147 chars) are GB18030 82 31 88 33 - 82 31 96 39 (offset 366D - 36FF)
            0xFE64, // U+3A73 is non-936 GB18030 value FE 64.
            0x00DA, // U+3A74 - U+3B4D (  218 chars) are GB18030 82 31 97 30 - 82 31 AC 37 (offset 3700 - 37D9)
            0xFE68, // U+3B4E is non-936 GB18030 value FE 68.
            0x011F, // U+3B4F - U+3C6D (  287 chars) are GB18030 82 31 AC 38 - 82 31 C9 34 (offset 37DA - 38F8)
            0xFE69, // U+3C6E is non-936 GB18030 value FE 69.
            0x0071, // U+3C6F - U+3CDF (  113 chars) are GB18030 82 31 C9 35 - 82 31 D4 37 (offset 38F9 - 3969)
            0xFE6A, // U+3CE0 is non-936 GB18030 value FE 6A.
            0x0375, // U+3CE1 - U+4055 (  885 chars) are GB18030 82 31 D4 38 - 82 32 AF 32 (offset 396A - 3CDE)
            0xFE6F, // U+4056 is non-936 GB18030 value FE 6F.
            0x0108, // U+4057 - U+415E (  264 chars) are GB18030 82 32 AF 33 - 82 32 C9 36 (offset 3CDF - 3DE6)
            0xFE70, // U+415F is non-936 GB18030 value FE 70.
            0x01D7, // U+4160 - U+4336 (  471 chars) are GB18030 82 32 C9 37 - 82 32 F8 37 (offset 3DE7 - 3FBD)
            0xFE72, // U+4337 is non-936 GB18030 value FE 72.
            0x0074, // U+4338 - U+43AB (  116 chars) are GB18030 82 32 F8 38 - 82 33 86 33 (offset 3FBE - 4031)
            0xFE78, // U+43AC is non-936 GB18030 value FE 78.
            0x0004, // U+43AD - U+43B0 (    4 chars) are GB18030 82 33 86 34 - 82 33 86 37 (offset 4032 - 4035)
            0xFE77, // U+43B1 is non-936 GB18030 value FE 77.
            0x002B, // U+43B2 - U+43DC (   43 chars) are GB18030 82 33 86 38 - 82 33 8B 30 (offset 4036 - 4060)
            0xFE7A, // U+43DD is non-936 GB18030 value FE 7A.
            0x00F8, // U+43DE - U+44D5 (  248 chars) are GB18030 82 33 8B 31 - 82 33 A3 38 (offset 4061 - 4158)
            0xFE7B, // U+44D6 is non-936 GB18030 value FE 7B.
            0x0175, // U+44D7 - U+464B (  373 chars) are GB18030 82 33 A3 39 - 82 33 C9 31 (offset 4159 - 42CD)
            0xFE7D, // U+464C is non-936 GB18030 value FE 7D.
            0x0014, // U+464D - U+4660 (   20 chars) are GB18030 82 33 C9 32 - 82 33 CB 31 (offset 42CE - 42E1)
            0xFE7C, // U+4661 is non-936 GB18030 value FE 7C.
            0x00C1, // U+4662 - U+4722 (  193 chars) are GB18030 82 33 CB 32 - 82 33 DE 34 (offset 42E2 - 43A2)
            0xFE80, // U+4723 is non-936 GB18030 value FE 80.
            0x0005, // U+4724 - U+4728 (    5 chars) are GB18030 82 33 DE 35 - 82 33 DE 39 (offset 43A3 - 43A7)
            0xFE81, // U+4729 is non-936 GB18030 value FE 81.
            0x0052, // U+472A - U+477B (   82 chars) are GB18030 82 33 DF 30 - 82 33 E7 31 (offset 43A8 - 43F9)
            0xFE82, // U+477C is non-936 GB18030 value FE 82.
            0x0010, // U+477D - U+478C (   16 chars) are GB18030 82 33 E7 32 - 82 33 E8 37 (offset 43FA - 4409)
            0xFE83, // U+478D is non-936 GB18030 value FE 83.
            0x01B9, // U+478E - U+4946 (  441 chars) are GB18030 82 33 E8 38 - 82 34 96 38 (offset 440A - 45C2)
            0xFE85, // U+4947 is non-936 GB18030 value FE 85.
            0x0032, // U+4948 - U+4979 (   50 chars) are GB18030 82 34 96 39 - 82 34 9B 38 (offset 45C3 - 45F4)
            0xFE86, // U+497A is non-936 GB18030 value FE 86.
            0x0002, // U+497B - U+497C (    2 chars) are GB18030 82 34 9B 39 - 82 34 9C 30 (offset 45F5 - 45F6)
            0xFE87, // U+497D is non-936 GB18030 value FE 87.
            0x0004, // U+497E - U+4981 (    4 chars) are GB18030 82 34 9C 31 - 82 34 9C 34 (offset 45F7 - 45FA)
            0xFE88, // U+4982 is non-936 GB18030 value FE 88.
            0xFE89, // U+4983 is non-936 GB18030 value FE 89.
            0x0001, // U+4984 - U+4984 (    1 chars) are GB18030 82 34 9C 35 - 82 34 9C 35 (offset 45FB - 45FB)
            0xFE8A, // U+4985 is non-936 GB18030 value FE 8A.
            0xFE8B, // U+4986 is non-936 GB18030 value FE 8B.
            0x0014, // U+4987 - U+499A (   20 chars) are GB18030 82 34 9C 36 - 82 34 9E 35 (offset 45FC - 460F)
            0xFE8D, // U+499B is non-936 GB18030 value FE 8D.
            0x0003, // U+499C - U+499E (    3 chars) are GB18030 82 34 9E 36 - 82 34 9E 38 (offset 4610 - 4612)
            0xFE8C, // U+499F is non-936 GB18030 value FE 8C.
            0x0016, // U+49A0 - U+49B5 (   22 chars) are GB18030 82 34 9E 39 - 82 34 A1 30 (offset 4613 - 4628)
            0xFE8F, // U+49B6 is non-936 GB18030 value FE 8F.
            0xFE8E, // U+49B7 is non-936 GB18030 value FE 8E.
            0x02BF, // U+49B8 - U+4C76 (  703 chars) are GB18030 82 34 A1 31 - 82 34 E7 33 (offset 4629 - 48E7)
            0xFE96, // U+4C77 is non-936 GB18030 value FE 96.
            0x0027, // U+4C78 - U+4C9E (   39 chars) are GB18030 82 34 E7 34 - 82 34 EB 32 (offset 48E8 - 490E)
            0xFE93, // U+4C9F is non-936 GB18030 value FE 93.
            0xFE94, // U+4CA0 is non-936 GB18030 value FE 94.
            0xFE95, // U+4CA1 is non-936 GB18030 value FE 95.
            0xFE97, // U+4CA2 is non-936 GB18030 value FE 97.
            0xFE92, // U+4CA3 is non-936 GB18030 value FE 92.
            0x006F, // U+4CA4 - U+4D12 (  111 chars) are GB18030 82 34 EB 33 - 82 34 F6 33 (offset 490F - 497D)
            0xFE98, // U+4D13 is non-936 GB18030 value FE 98.
            0xFE99, // U+4D14 is non-936 GB18030 value FE 99.
            0xFE9A, // U+4D15 is non-936 GB18030 value FE 9A.
            0xFE9B, // U+4D16 is non-936 GB18030 value FE 9B.
            0xFE9C, // U+4D17 is non-936 GB18030 value FE 9C.
            0xFE9D, // U+4D18 is non-936 GB18030 value FE 9D.
            0xFE9E, // U+4D19 is non-936 GB18030 value FE 9E.
            0x0094, // U+4D1A - U+4DAD (  148 chars) are GB18030 82 34 F6 34 - 82 35 87 31 (offset 497E - 4A11)
            0xFE9F, // U+4DAE is non-936 GB18030 value FE 9F.
            0x0051, // U+4DAF - U+4DFF (   81 chars) are GB18030 82 35 87 32 - 82 35 8F 32 (offset 4A12 - 4A62)
            0xD1A6, // U+4E00 - U+9FA5 (20902 chars) use CP 936 conversion.
            0x385A, // U+9FA6 - U+D7FF (14426 chars) are GB18030 82 35 8F 33 - 83 36 C7 38 (offset 4A63 - 82BC)
            0x8F6C, // U+D800 - U+E76B ( 3948 chars) use CP 936 conversion.
            0x0001, // U+E76C - U+E76C (    1 chars) are GB18030 83 36 C7 39 - 83 36 C7 39 (offset 82BD - 82BD)
            0x805B, // U+E76D - U+E7C7 (   91 chars) use CP 936 conversion.
            0x0001, // U+E7C8 - U+E7C8 (    1 chars) are GB18030 83 36 C8 30 - 83 36 C8 30 (offset 82BE - 82BE)
            0x801E, // U+E7C9 - U+E7E6 (   30 chars) use CP 936 conversion.
            0x000D, // U+E7E7 - U+E7F3 (   13 chars) are GB18030 83 36 C8 31 - 83 36 C9 33 (offset 82BF - 82CB)
            0x8021, // U+E7F4 - U+E814 (   33 chars) use CP 936 conversion.
            0x0001, // U+E815 - U+E815 (    1 chars) are GB18030 83 36 C9 34 - 83 36 C9 34 (offset 82CC - 82CC)
            0x8003, // U+E816 - U+E818 (    3 chars) use CP 936 conversion.
            0x0005, // U+E819 - U+E81D (    5 chars) are GB18030 83 36 C9 35 - 83 36 C9 39 (offset 82CD - 82D1)
            0x8001, // U+E81E - U+E81E (    1 chars) use CP 936 conversion.
            0x0007, // U+E81F - U+E825 (    7 chars) are GB18030 83 36 CA 30 - 83 36 CA 36 (offset 82D2 - 82D8)
            0x8001, // U+E826 - U+E826 (    1 chars) use CP 936 conversion.
            0x0004, // U+E827 - U+E82A (    4 chars) are GB18030 83 36 CA 37 - 83 36 CB 30 (offset 82D9 - 82DC)
            0x8002, // U+E82B - U+E82C (    2 chars) use CP 936 conversion.
            0x0004, // U+E82D - U+E830 (    4 chars) are GB18030 83 36 CB 31 - 83 36 CB 34 (offset 82DD - 82E0)
            0x8002, // U+E831 - U+E832 (    2 chars) use CP 936 conversion.
            0x0008, // U+E833 - U+E83A (    8 chars) are GB18030 83 36 CB 35 - 83 36 CC 32 (offset 82E1 - 82E8)
            0x8001, // U+E83B - U+E83B (    1 chars) use CP 936 conversion.
            0x0007, // U+E83C - U+E842 (    7 chars) are GB18030 83 36 CC 33 - 83 36 CC 39 (offset 82E9 - 82EF)
            0x8001, // U+E843 - U+E843 (    1 chars) use CP 936 conversion.
            0x0010, // U+E844 - U+E853 (   16 chars) are GB18030 83 36 CD 30 - 83 36 CE 35 (offset 82F0 - 82FF)
            0x8002, // U+E854 - U+E855 (    2 chars) use CP 936 conversion.
            0x000E, // U+E856 - U+E863 (   14 chars) are GB18030 83 36 CE 36 - 83 36 CF 39 (offset 8300 - 830D)
            0x8001, // U+E864 - U+E864 (    1 chars) use CP 936 conversion.
            0x10C7, // U+E865 - U+F92B ( 4295 chars) are GB18030 83 36 D0 30 - 84 30 85 34 (offset 830E - 93D4)
            0x8001, // U+F92C - U+F92C (    1 chars) use CP 936 conversion.
            0x004C, // U+F92D - U+F978 (   76 chars) are GB18030 84 30 85 35 - 84 30 8D 30 (offset 93D5 - 9420)
            0x8001, // U+F979 - U+F979 (    1 chars) use CP 936 conversion.
            0x001B, // U+F97A - U+F994 (   27 chars) are GB18030 84 30 8D 31 - 84 30 8F 37 (offset 9421 - 943B)
            0x8001, // U+F995 - U+F995 (    1 chars) use CP 936 conversion.
            0x0051, // U+F996 - U+F9E6 (   81 chars) are GB18030 84 30 8F 38 - 84 30 97 38 (offset 943C - 948C)
            0x8001, // U+F9E7 - U+F9E7 (    1 chars) use CP 936 conversion.
            0x0009, // U+F9E8 - U+F9F0 (    9 chars) are GB18030 84 30 97 39 - 84 30 98 37 (offset 948D - 9495)
            0x8001, // U+F9F1 - U+F9F1 (    1 chars) use CP 936 conversion.
            0x001A, // U+F9F2 - U+FA0B (   26 chars) are GB18030 84 30 98 38 - 84 30 9B 33 (offset 9496 - 94AF)
            0x8004, // U+FA0C - U+FA0F (    4 chars) use CP 936 conversion.
            0x0001, // U+FA10 - U+FA10 (    1 chars) are GB18030 84 30 9B 34 - 84 30 9B 34 (offset 94B0 - 94B0)
            0x8001, // U+FA11 - U+FA11 (    1 chars) use CP 936 conversion.
            0x0001, // U+FA12 - U+FA12 (    1 chars) are GB18030 84 30 9B 35 - 84 30 9B 35 (offset 94B1 - 94B1)
            0x8002, // U+FA13 - U+FA14 (    2 chars) use CP 936 conversion.
            0x0003, // U+FA15 - U+FA17 (    3 chars) are GB18030 84 30 9B 36 - 84 30 9B 38 (offset 94B2 - 94B4)
            0x8001, // U+FA18 - U+FA18 (    1 chars) use CP 936 conversion.
            0x0006, // U+FA19 - U+FA1E (    6 chars) are GB18030 84 30 9B 39 - 84 30 9C 34 (offset 94B5 - 94BA)
            0x8003, // U+FA1F - U+FA21 (    3 chars) use CP 936 conversion.
            0x0001, // U+FA22 - U+FA22 (    1 chars) are GB18030 84 30 9C 35 - 84 30 9C 35 (offset 94BB - 94BB)
            0x8002, // U+FA23 - U+FA24 (    2 chars) use CP 936 conversion.
            0x0002, // U+FA25 - U+FA26 (    2 chars) are GB18030 84 30 9C 36 - 84 30 9C 37 (offset 94BC - 94BD)
            0x8003, // U+FA27 - U+FA29 (    3 chars) use CP 936 conversion.
            0x0406, // U+FA2A - U+FE2F ( 1030 chars) are GB18030 84 30 9C 38 - 84 31 85 37 (offset 94BE - 98C3)
            0x8002, // U+FE30 - U+FE31 (    2 chars) use CP 936 conversion.
            0x0001, // U+FE32 - U+FE32 (    1 chars) are GB18030 84 31 85 38 - 84 31 85 38 (offset 98C4 - 98C4)
            0x8012, // U+FE33 - U+FE44 (   18 chars) use CP 936 conversion.
            0x0004, // U+FE45 - U+FE48 (    4 chars) are GB18030 84 31 85 39 - 84 31 86 32 (offset 98C5 - 98C8)
            0x800A, // U+FE49 - U+FE52 (   10 chars) use CP 936 conversion.
            0x0001, // U+FE53 - U+FE53 (    1 chars) are GB18030 84 31 86 33 - 84 31 86 33 (offset 98C9 - 98C9)
            0x8004, // U+FE54 - U+FE57 (    4 chars) use CP 936 conversion.
            0x0001, // U+FE58 - U+FE58 (    1 chars) are GB18030 84 31 86 34 - 84 31 86 34 (offset 98CA - 98CA)
            0x800E, // U+FE59 - U+FE66 (   14 chars) use CP 936 conversion.
            0x0001, // U+FE67 - U+FE67 (    1 chars) are GB18030 84 31 86 35 - 84 31 86 35 (offset 98CB - 98CB)
            0x8004, // U+FE68 - U+FE6B (    4 chars) use CP 936 conversion.
            0x0095, // U+FE6C - U+FF00 (  149 chars) are GB18030 84 31 86 36 - 84 31 95 34 (offset 98CC - 9960)
            0x805E, // U+FF01 - U+FF5E (   94 chars) use CP 936 conversion.
            0x0081, // U+FF5F - U+FFDF (  129 chars) are GB18030 84 31 95 35 - 84 31 A2 33 (offset 9961 - 99E1)
            0x8006, // U+FFE0 - U+FFE5 (    6 chars) use CP 936 conversion.
            0x001A, // U+FFE6 - U+FFFF (   26 chars) are GB18030 84 31 A2 34 - 84 31 A4 39 (offset 99E2 - 99FB)
        };
    }
}
#endif // FEATURE_CODEPAGES_FILE