1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
|
/* pr29.h --- Detect strings that are non-idempotent under NFKC in Unicode 3.2.
* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Simon
* Josefsson.
*
* This file is part of GNU Libidn.
*
* GNU Libidn is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* GNU Libidn is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with GNU Libidn; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*
*/
#include <config.h>
#include "pr29.h"
/* Get stringprep_utf8_to_ucs4. */
#include <stringprep.h>
/*
* The tables used in this file was extracted by Simon Josefsson from
* pr-29.html and DerivedCombiningClass-3.2.0.txt, as published by
* Unicode Inc., for the GNU Libidn project.
*
*/
/* These are the characters with non-zero combination class, extracted
from DerivedCombiningClass-3.2.0.txt. */
static uint32_t nzcc[] = {
/* 1 # Mn [5] COMBINING TILDE OVERLAY..
* ..COMBINING LONG SOLIDUS OVERLAY */
0x0334,
0x0335,
0x0336,
0x0337,
0x0338,
/* 1 # Mn [2] COMBINING LONG VERTICAL LINE OVERLAY..
* ..COMBINING SHORT VERTICAL LINE OVERLAY */
0x20D2,
0x20D3,
/* 1 # Mn [3] COMBINING RING OVERLAY..
* ..COMBINING ANTICLOCKWISE RING OVERLAY */
0x20D8,
0x20D9,
0x20DA,
/* 1 # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..
* ..COMBINING DOUBLE VERTICAL STROKE OVERLAY */
0x20E5,
0x20E6,
/* 1 # Mn COMBINING LEFTWARDS ARROW OVERLAY */
0x20EA,
/* 1 # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..
* ..MUSICAL SYMBOL COMBINING TREMOLO-3 */
0x1D167,
0x1D168,
0x1D169,
/* 7 # Mn DEVANAGARI SIGN NUKTA */
0x093C,
/* 7 # Mn BENGALI SIGN NUKTA */
0x09BC,
/* 7 # Mn GURMUKHI SIGN NUKTA */
0x0A3C,
/* 7 # Mn GUJARATI SIGN NUKTA */
0x0ABC,
/* 7 # Mn ORIYA SIGN NUKTA */
0x0B3C,
/* 7 # Mn MYANMAR SIGN DOT BELOW */
0x1037,
/* 8 # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..
* ..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
0x3099,
0x309A,
/* 9 # Mn DEVANAGARI SIGN VIRAMA */
0x094D,
/* 9 # Mn BENGALI SIGN VIRAMA */
0x09CD,
/* 9 # Mn GURMUKHI SIGN VIRAMA */
0x0A4D,
/* 9 # Mn GUJARATI SIGN VIRAMA */
0x0ACD,
/* 9 # Mn ORIYA SIGN VIRAMA */
0x0B4D,
/* 9 # Mn TAMIL SIGN VIRAMA */
0x0BCD,
/* 9 # Mn TELUGU SIGN VIRAMA */
0x0C4D,
/* 9 # Mn KANNADA SIGN VIRAMA */
0x0CCD,
/* 9 # Mn MALAYALAM SIGN VIRAMA */
0x0D4D,
/* 9 # Mn SINHALA SIGN AL-LAKUNA */
0x0DCA,
/* 9 # Mn THAI CHARACTER PHINTHU */
0x0E3A,
/* 9 # Mn TIBETAN MARK HALANTA */
0x0F84,
/* 9 # Mn MYANMAR SIGN VIRAMA */
0x1039,
/* 9 # Mn TAGALOG SIGN VIRAMA */
0x1714,
/* 9 # Mn HANUNOO SIGN PAMUDPOD */
0x1734,
/* 9 # Mn KHMER SIGN COENG */
0x17D2,
/* 10 # Mn HEBREW POINT SHEVA */
0x05B0,
/* 11 # Mn HEBREW POINT HATAF SEGOL */
0x05B1,
/* 12 # Mn HEBREW POINT HATAF PATAH */
0x05B2,
/* 13 # Mn HEBREW POINT HATAF QAMATS */
0x05B3,
/* 14 # Mn HEBREW POINT HIRIQ */
0x05B4,
/* 15 # Mn HEBREW POINT TSERE */
0x05B5,
/* 16 # Mn HEBREW POINT SEGOL */
0x05B6,
/* 17 # Mn HEBREW POINT PATAH */
0x05B7,
/* 18 # Mn HEBREW POINT QAMATS */
0x05B8,
/* 19 # Mn HEBREW POINT HOLAM */
0x05B9,
/* 20 # Mn HEBREW POINT QUBUTS */
0x05BB,
/* 21 # Mn HEBREW POINT DAGESH OR MAPIQ */
0x05BC,
/* 22 # Mn HEBREW POINT METEG */
0x05BD,
/* 23 # Mn HEBREW POINT RAFE */
0x05BF,
/* 24 # Mn HEBREW POINT SHIN DOT */
0x05C1,
/* 25 # Mn HEBREW POINT SIN DOT */
0x05C2,
/* 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA */
0xFB1E,
/* 27 # Mn ARABIC FATHATAN */
0x064B,
/* 28 # Mn ARABIC DAMMATAN */
0x064C,
/* 29 # Mn ARABIC KASRATAN */
0x064D,
/* 30 # Mn ARABIC FATHA */
0x064E,
/* 31 # Mn ARABIC DAMMA */
0x064F,
/* 32 # Mn ARABIC KASRA */
0x0650,
/* 33 # Mn ARABIC SHADDA */
0x0651,
/* 34 # Mn ARABIC SUKUN */
0x0652,
/* 35 # Mn ARABIC LETTER SUPERSCRIPT ALEF */
0x0670,
/* 36 # Mn SYRIAC LETTER SUPERSCRIPT ALAPH */
0x0711,
/* 84 # Mn TELUGU LENGTH MARK */
0x0C55,
/* 91 # Mn TELUGU AI LENGTH MARK */
0x0C56,
/* 103 # Mn [2] THAI CHARACTER SARA U..
* ..THAI CHARACTER SARA UU */
0x0E38,
0x0E39,
/* 107 # Mn [4] THAI CHARACTER MAI EK..
* ..THAI CHARACTER MAI CHATTAWA */
0x0E48,
0x0E49,
0x0E4A,
0x04EB,
/* 118 # Mn [2] LAO VOWEL SIGN U..
* ..LAO VOWEL SIGN UU */
0x0EB8,
0x0EB9,
/* 122 # Mn [4] LAO TONE MAI EK..
* ..LAO TONE MAI CATAWA */
0x0EC8,
0x0EC9,
0x0ECA,
0x0ECB,
/* 129 # Mn TIBETAN VOWEL SIGN AA */
0x0F71,
/* 130 # Mn TIBETAN VOWEL SIGN I */
0x0F72,
/* 130 # Mn [4] TIBETAN VOWEL SIGN E..
* ..TIBETAN VOWEL SIGN OO */
0x0F7A,
0x0F7B,
0x0F7C,
0x0F7D,
/* 130 # Mn TIBETAN VOWEL SIGN REVERSED I */
0x0F80,
/* 132 # Mn TIBETAN VOWEL SIGN U */
0x0F74,
/* 202 # Mn [2] COMBINING PALATALIZED HOOK BELOW..
* ..COMBINING RETROFLEX HOOK BELOW */
0x0321,
0x0322,
/* 202 # Mn [2] COMBINING CEDILLA..
* ..COMBINING OGONEK */
0x0327,
0x0328,
/* 216 # Mn COMBINING HORN */
0x031B,
/* 216 # Mn TIBETAN MARK TSA -PHRU */
0x0F39,
/* 216 # Mc [2] MUSICAL SYMBOL COMBINING STEM..
* ..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM */
0x1D165,
0x1D166,
/* 216 # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..
* ..MUSICAL SYMBOL COMBINING FLAG-5 */
0x1D16E,
0x1D16F,
0x1D170,
0x1D171,
0x1D172,
/* 218 # Mn IDEOGRAPHIC LEVEL TONE MARK */
0x302A,
/* 220 # Mn [4] COMBINING GRAVE ACCENT BELOW..
* ..COMBINING RIGHT TACK BELOW */
0x0316,
0x0317,
0x0318,
0x0319,
/* 220 # Mn [5] COMBINING LEFT HALF RING BELOW..
* ..COMBINING MINUS SIGN BELOW */
0x031C,
0x031D,
0x031E,
0x031F,
0x0320,
/* 220 # Mn [4] COMBINING DOT BELOW..
* ..COMBINING COMMA BELOW */
0x0323,
0x0324,
0x0325,
0x0326,
/* 220 # Mn [11] COMBINING VERTICAL LINE BELOW..
* ..COMBINING DOUBLE LOW LINE */
0x0329,
0x032A,
0x032B,
0x032C,
0x032D,
0x032E,
0x032F,
0x0330,
0x0331,
0x0332,
0x0333,
/* 220 # Mn [4] COMBINING RIGHT HALF RING BELOW..
* ..COMBINING SEAGULL BELOW */
0x0339,
0x033A,
0x033B,
0x033C,
/* 220 # Mn [3] COMBINING EQUALS SIGN BELOW..
* ..COMBINING LEFT ANGLE BELOW */
0x0347,
0x0348,
0x0349,
/* 220 # Mn [2] COMBINING LEFT RIGHT ARROW BELOW..
* ..COMBINING UPWARDS ARROW BELOW */
0x034D,
0x034E,
/* 220 # Mn HEBREW ACCENT ETNAHTA */
0x0591,
/* 220 # Mn HEBREW ACCENT TIPEHA */
0x0596,
/* 220 # Mn HEBREW ACCENT TEVIR */
0x059B,
/* 220 # Mn [5] HEBREW ACCENT MUNAH..
* ..HEBREW ACCENT DARGA */
0x05A3,
0x05A4,
0x05A5,
0x05A6,
0x05A7,
/* 220 # Mn HEBREW ACCENT YERAH BEN YOMO */
0x05AA,
/* 220 # Mn ARABIC HAMZA BELOW */
0x0655,
/* 220 # Mn ARABIC SMALL LOW SEEN */
0x06E3,
/* 220 # Mn ARABIC EMPTY CENTRE LOW STOP */
0x06EA,
/* 220 # Mn ARABIC SMALL LOW MEEM */
0x06ED,
/* 220 # Mn SYRIAC PTHAHA BELOW */
0x0731,
/* 220 # Mn SYRIAC ZQAPHA BELOW */
0x0734,
/* 220 # Mn [3] SYRIAC RBASA BELOW..
* ..SYRIAC DOTTED ZLAMA ANGULAR */
0x0737,
0x0738,
0x0739,
/* 220 # Mn [2] SYRIAC HBASA BELOW..
* ..SYRIAC HBASA-ESASA DOTTED */
0x073B,
0x073C,
/* 220 # Mn SYRIAC ESASA BELOW */
0x073E,
/* 220 # Mn SYRIAC RUKKAKHA */
0x0742,
/* 220 # Mn SYRIAC TWO VERTICAL DOTS BELOW */
0x0744,
/* 220 # Mn SYRIAC THREE DOTS BELOW */
0x0746,
/* 220 # Mn SYRIAC OBLIQUE LINE BELOW */
0x0748,
/* 220 # Mn DEVANAGARI STRESS SIGN ANUDATTA */
0x0952,
/* 220 # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..
* ..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */
0x0F18,
0x0F19,
/* 220 # Mn TIBETAN MARK NGAS BZUNG NYI ZLA */
0x0F35,
/* 220 # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS */
0x0F37,
/* 220 # Mn TIBETAN SYMBOL PADMA GDAN */
0x0FC6,
/* 220 # Mn COMBINING TRIPLE UNDERDOT */
0x20E8,
/* 220 # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..
* ..MUSICAL SYMBOL COMBINING LOURE */
0x1D17B,
0x1D17C,
0x1D17D,
0x1D17E,
0x1D17F,
0x1D180,
0x1D181,
0x1D182,
/* 220 # Mn [2] MUSICAL SYMBOL COMBINING DOUBLE TONGUE..
* ..MUSICAL SYMBOL COMBINING TRIPLE TONGUE */
0x1D18A,
0x1D18B,
/* 222 # Mn HEBREW ACCENT YETIV */
0x059A,
/* 222 # Mn HEBREW ACCENT DEHI */
0x05AD,
/* 222 # Mn IDEOGRAPHIC ENTERING TONE MARK */
0x302D,
/* 224 # Mn [2] HANGUL SINGLE DOT TONE MARK..
* ..HANGUL DOUBLE DOT TONE MARK */
0x302E,
0x302F,
/* 226 # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT */
0x1D16D,
/* 228 # Mn HEBREW ACCENT ZINOR */
0x05AE,
/* 228 # Mn MONGOLIAN LETTER ALI GALI DAGALGA */
0x18A9,
/* 228 # Mn IDEOGRAPHIC RISING TONE MARK */
0x302B,
/* 230 # Mn [21] COMBINING GRAVE ACCENT..
* ..COMBINING REVERSED COMMA ABOVE */
0x0300,
0x0301,
0x0302,
0x0303,
0x0304,
0x0305,
0x0306,
0x0307,
0x0308,
0x0309,
0x030A,
0x030B,
0x030C,
0x030D,
0x030E,
0x030F,
0x0310,
0x0311,
0x0312,
0x0313,
0x0314,
/* 230 # Mn [8] COMBINING X ABOVE..
* ..COMBINING GREEK DIALYTIKA TONOS */
0x033D,
0x033E,
0x033F,
0x0340,
0x0341,
0x0342,
0x0343,
0x0344,
/* 230 # Mn COMBINING BRIDGE ABOVE */
0x0346,
/* 230 # Mn [3] COMBINING NOT TILDE ABOVE..
* ..COMBINING ALMOST EQUAL TO ABOVE */
0x034A,
0x034B,
0x034C,
/* 230 # Mn [13] COMBINING LATIN SMALL LETTER A..
* ..COMBINING LATIN SMALL LETTER X */
0x0363,
0x0364,
0x0365,
0x0366,
0x0367,
0x0368,
0x0369,
0x036A,
0x036B,
0x036C,
0x036D,
0x036E,
0x036F,
/* 230 # Mn [4] COMBINING CYRILLIC TITLO..
* ..COMBINING CYRILLIC PSILI PNEUMATA */
0x0483,
0x0484,
0x0485,
0x0486,
/* 230 # Mn [4] HEBREW ACCENT SEGOL..
* ..HEBREW ACCENT ZAQEF GADOL */
0x0592,
0x0593,
0x0594,
0x0595,
/* 230 # Mn [3] HEBREW ACCENT REVIA..
* ..HEBREW ACCENT PASHTA */
0x0597,
0x0598,
0x0599,
/* 230 # Mn [6] HEBREW ACCENT GERESH..
* ..HEBREW ACCENT PAZER */
0x059C,
0x059D,
0x059E,
0x059F,
0x05A0,
0x05A1,
/* 230 # Mn [2] HEBREW ACCENT QADMA..
* ..HEBREW ACCENT TELISHA QETANA */
0x05A8,
0x05A9,
/* 230 # Mn [2] HEBREW ACCENT OLE..
* ..HEBREW ACCENT ILUY */
0x05AB,
0x05AC,
/* 230 # Mn HEBREW MARK MASORA CIRCLE */
0x05AF,
/* 230 # Mn HEBREW MARK UPPER DOT */
0x05C4,
/* 230 # Mn [2] ARABIC MADDAH ABOVE..
* ..ARABIC HAMZA ABOVE */
0x0653,
0x0654,
/* 230 # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..
* ..ARABIC SMALL HIGH SEEN */
0x06D6,
0x06D7,
0x06D8,
0x06D9,
0x06DA,
0x06DB,
0x06DC,
/* 230 # Mn [4] ARABIC SMALL HIGH ROUNDED ZERO..
* ..ARABIC SMALL HIGH MEEM ISOLATED FORM */
0x06DF,
0x06E0,
0x06E1,
0x06E2,
/* 230 # Mn ARABIC SMALL HIGH MADDA */
0x06E4,
/* 230 # Mn [2] ARABIC SMALL HIGH YEH..
* ..ARABIC SMALL HIGH NOON */
0x06E7,
0x06E8,
/* 230 # Mn [2] ARABIC EMPTY CENTRE HIGH STOP..
* ..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE */
0x06EB,
0x06EC,
/* 230 # Mn SYRIAC PTHAHA ABOVE */
0x0730,
/* 230 # Mn [2] SYRIAC PTHAHA DOTTED..
* ..SYRIAC ZQAPHA ABOVE */
0x0732,
0x0733,
/* 230 # Mn [2] SYRIAC ZQAPHA DOTTED..
* ..SYRIAC RBASA ABOVE */
0x0735,
0x0736,
/* 230 # Mn SYRIAC HBASA ABOVE */
0x073A,
/* 230 # Mn SYRIAC ESASA ABOVE */
0x073D,
/* 230 # Mn [3] SYRIAC RWAHA..
* ..SYRIAC QUSHSHAYA */
0x073F,
0x0740,
0x0741,
/* 230 # Mn SYRIAC TWO VERTICAL DOTS ABOVE */
0x0743,
/* 230 # Mn SYRIAC THREE DOTS ABOVE */
0x0745,
/* 230 # Mn SYRIAC OBLIQUE LINE ABOVE */
0x0747,
/* 230 # Mn [2] SYRIAC MUSIC..
* ..SYRIAC BARREKH */
0x0749,
0x074A,
/* 230 # Mn DEVANAGARI STRESS SIGN UDATTA */
0x0951,
/* 230 # Mn [2] DEVANAGARI GRAVE ACCENT..
* ..DEVANAGARI ACUTE ACCENT */
0x0953,
0x0954,
/* 230 # Mn [2] TIBETAN SIGN NYI ZLA NAA DA..
* ..TIBETAN SIGN SNA LDAN */
0x0F82,
0x0F83,
/* 230 # Mn [2] TIBETAN SIGN LCI RTAGS..
* ..TIBETAN SIGN YANG RTAGS */
0x0F86,
0x0F87,
/* 230 # Mn [2] COMBINING LEFT HARPOON ABOVE..
* ..COMBINING RIGHT HARPOON ABOVE */
0x20D0,
0x20D1,
/* 230 # Mn [4] COMBINING ANTICLOCKWISE ARROW ABOVE..
* ..COMBINING RIGHT ARROW ABOVE */
0x20D4,
0x20D5,
0x20D6,
0x20D7,
/* 230 # Mn [2] COMBINING THREE DOTS ABOVE..
* ..COMBINING FOUR DOTS ABOVE */
0x20DB,
0x20DC,
/* 230 # Mn COMBINING LEFT RIGHT ARROW ABOVE */
0x20E1,
/* 230 # Mn COMBINING ANNUITY SYMBOL */
0x20E7,
/* 230 # Mn COMBINING WIDE BRIDGE ABOVE */
0x20E9,
/* 230 # Mn [4] COMBINING LIGATURE LEFT HALF..
* ..COMBINING DOUBLE TILDE RIGHT HALF */
0xFE20,
0xFE21,
0xFE22,
0xFE23,
/* 230 # Mn [5] MUSICAL SYMBOL COMBINING DOIT..
* ..MUSICAL SYMBOL COMBINING BEND */
0x1D185,
0x1D186,
0x1D187,
0x1D188,
0x1D189,
/* 230 # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..
* ..MUSICAL SYMBOL COMBINING SNAP PIZZICATO */
0x1D1AA,
0x1D1AB,
0x1D1AC,
0x1D1AD,
/* 232 # Mn COMBINING COMMA ABOVE RIGHT */
0x0315,
/* 232 # Mn COMBINING LEFT ANGLE ABOVE */
0x031A,
/* 232 # Mn IDEOGRAPHIC DEPARTING TONE MARK */
0x302C,
/* 233 # Mn COMBINING DOUBLE RIGHTWARDS ARROW BELOW */
0x0362,
/* 234 # Mn [2] COMBINING DOUBLE TILDE..
* ..COMBINING DOUBLE INVERTED BREVE */
0x0360,
0x0361,
/* 240 # Mn COMBINING GREEK YPOGEGRAMMENI */
0x0345,
0
};
/*
* 09C7 BENGALI VOWEL SIGN E 09BE BENGALI VOWEL SIGN AA or
* 09D7 BENGALI AU LENGTH MARK
*/
static const uint32_t pr29_1_1[] = {
0x09C7, 0
};
static const uint32_t pr29_1_2[] = {
0x09BE, 0x09D7, 0
};
/*
* 0B47 ORIYA VOWEL SIGN E 0B3E ORIYA VOWEL SIGN AA or
* 0B56 ORIYA AI LENGTH MARK or
* 0B57 ORIYA AU LENGTH MARK
*/
static const uint32_t pr29_2_1[] = {
0x0B47, 0
};
static const uint32_t pr29_2_2[] = {
0x0B3E, 0x0B56, 0x0B57, 0
};
/*
* 0BC6 TAMIL VOWEL SIGN E 0BBE TAMIL VOWEL SIGN AA or
* 0BD7 TAMIL AU LENGTH MARK
*/
static const uint32_t pr29_3_1[] = {
0x0BC6, 0
};
static const uint32_t pr29_3_2[] = {
0x0BBE, 0x0BD7, 0
};
/*
* 0BC7 TAMIL VOWEL SIGN EE 0BBE TAMIL VOWEL SIGN AA
*/
static const uint32_t pr29_4_1[] = {
0x0BC7, 0
};
static const uint32_t pr29_4_2[] = {
0x0BBE, 0
};
/*
* 0B92 TAMIL LETTER O 0BD7 TAMIL AU LENGTH MARK
*/
static const uint32_t pr29_5_1[] = {
0x0B92, 0
};
static const uint32_t pr29_5_2[] = {
0x0BD7, 0
};
/*
* 0CC6 KANNADA VOWEL SIGN E 0CC2 KANNADA VOWEL SIGN UU or
* 0CD5 KANNADA LENGTH MARK or
* 0CD6 KANNADA AI LENGTH MARK
*/
static const uint32_t pr29_6_1[] = {
0x0CC6, 0
};
static const uint32_t pr29_6_2[] = {
0x0CC2, 0xCD5, 0xCD6, 0
};
/*
* 0CBF KANNADA VOWEL SIGN I or
* 0CCA KANNADA VOWEL SIGN O 0CD5 KANNADA LENGTH MARK
*/
static const uint32_t pr29_7_1[] = {
0x0CBF, 0xCCA, 0
};
static const uint32_t pr29_7_2[] = {
0x0CD5, 0
};
/*
* 0D47 MALAYALAM VOWEL SIGN EE 0D3E MALAYALAM VOWEL SIGN AA
*/
static const uint32_t pr29_8_1[] = {
0x0D47, 0
};
static const uint32_t pr29_8_2[] = {
0x0D3E, 0
};
/*
* 0D46 MALAYALAM VOWEL SIGN E 0D3E MALAYALAM VOWEL SIGN AA or
* 0D57 MALAYALAM AU LENGTH MARK
*/
static const uint32_t pr29_9_1[] = {
0x0D46, 0
};
static const uint32_t pr29_9_2[] = {
0x0D3E, 0x0D57, 0
};
/*
* 1025 MYANMAR LETTER U 102E MYANMAR VOWEL SIGN II
*/
static const uint32_t pr29_10_1[] = {
0x1025, 0
};
static const uint32_t pr29_10_2[] = {
0x102E, 0
};
/*
* 0DD9 SINHALA VOWEL SIGN KOMBUVA 0DCF SINHALA VOWEL SIGN AELA-PILLA or
* 0DDF SINHALA VOWEL SIGN GAYANUKITTA
*/
static const uint32_t pr29_11_1[] = {
0x0DD9, 0
};
static const uint32_t pr29_11_2[] = {
0x0DCF, 0x0DDF, 0
};
/*
* 1100..1112 HANGUL CHOSEONG KIYEOK..HIEUH [19 instances]
* 1161..1175 HANGUL JUNGSEONG A..I [21 instances]
*/
static const uint32_t pr29_12_1[] = {
0x1100, 0x1101, 0x1102, 0x1103, 0x1104, 0x1105, 0x1106, 0x1107,
0x1108, 0x1109, 0x110A, 0x110B, 0x110C, 0x110D, 0x110E, 0x110F,
0x1110, 0x1111, 0x1112, 0
};
static const uint32_t pr29_12_2[] = {
0x1161, 0x1162, 0x1163, 0x1164, 0x1165, 0x1166, 0x1167, 0x1168,
0x1169, 0x116A, 0x116B, 0x116C, 0x116D, 0x116E, 0x116F, 0x1170,
0x1171, 0x1172, 0x1173, 0x1174, 0x1175, 0
};
/*
* [:HangulSyllableType=LV:]
* 11A8..11C2 HANGUL JONGSEONG KIYEOK..HIEUH [27 instances]
*/
static const uint32_t pr29_13_1[] = {
0xAC00, /* LV # Lo HANGUL SYLLABLE GA */
0xAC1C, /* LV # Lo HANGUL SYLLABLE GAE */
0xAC38, /* LV # Lo HANGUL SYLLABLE GYA */
0xAC54, /* LV # Lo HANGUL SYLLABLE GYAE */
0xAC70, /* LV # Lo HANGUL SYLLABLE GEO */
0xAC8C, /* LV # Lo HANGUL SYLLABLE GE */
0xACA8, /* LV # Lo HANGUL SYLLABLE GYEO */
0xACC4, /* LV # Lo HANGUL SYLLABLE GYE */
0xACE0, /* LV # Lo HANGUL SYLLABLE GO */
0xACFC, /* LV # Lo HANGUL SYLLABLE GWA */
0xAD18, /* LV # Lo HANGUL SYLLABLE GWAE */
0xAD34, /* LV # Lo HANGUL SYLLABLE GOE */
0xAD50, /* LV # Lo HANGUL SYLLABLE GYO */
0xAD6C, /* LV # Lo HANGUL SYLLABLE GU */
0xAD88, /* LV # Lo HANGUL SYLLABLE GWEO */
0xADA4, /* LV # Lo HANGUL SYLLABLE GWE */
0xADC0, /* LV # Lo HANGUL SYLLABLE GWI */
0xADDC, /* LV # Lo HANGUL SYLLABLE GYU */
0xADF8, /* LV # Lo HANGUL SYLLABLE GEU */
0xAE14, /* LV # Lo HANGUL SYLLABLE GYI */
0xAE30, /* LV # Lo HANGUL SYLLABLE GI */
0xAE4C, /* LV # Lo HANGUL SYLLABLE GGA */
0xAE68, /* LV # Lo HANGUL SYLLABLE GGAE */
0xAE84, /* LV # Lo HANGUL SYLLABLE GGYA */
0xAEA0, /* LV # Lo HANGUL SYLLABLE GGYAE */
0xAEBC, /* LV # Lo HANGUL SYLLABLE GGEO */
0xAED8, /* LV # Lo HANGUL SYLLABLE GGE */
0xAEF4, /* LV # Lo HANGUL SYLLABLE GGYEO */
0xAF10, /* LV # Lo HANGUL SYLLABLE GGYE */
0xAF2C, /* LV # Lo HANGUL SYLLABLE GGO */
0xAF48, /* LV # Lo HANGUL SYLLABLE GGWA */
0xAF64, /* LV # Lo HANGUL SYLLABLE GGWAE */
0xAF80, /* LV # Lo HANGUL SYLLABLE GGOE */
0xAF9C, /* LV # Lo HANGUL SYLLABLE GGYO */
0xAFB8, /* LV # Lo HANGUL SYLLABLE GGU */
0xAFD4, /* LV # Lo HANGUL SYLLABLE GGWEO */
0xAFF0, /* LV # Lo HANGUL SYLLABLE GGWE */
0xB00C, /* LV # Lo HANGUL SYLLABLE GGWI */
0xB028, /* LV # Lo HANGUL SYLLABLE GGYU */
0xB044, /* LV # Lo HANGUL SYLLABLE GGEU */
0xB060, /* LV # Lo HANGUL SYLLABLE GGYI */
0xB07C, /* LV # Lo HANGUL SYLLABLE GGI */
0xB098, /* LV # Lo HANGUL SYLLABLE NA */
0xB0B4, /* LV # Lo HANGUL SYLLABLE NAE */
0xB0D0, /* LV # Lo HANGUL SYLLABLE NYA */
0xB0EC, /* LV # Lo HANGUL SYLLABLE NYAE */
0xB108, /* LV # Lo HANGUL SYLLABLE NEO */
0xB124, /* LV # Lo HANGUL SYLLABLE NE */
0xB140, /* LV # Lo HANGUL SYLLABLE NYEO */
0xB15C, /* LV # Lo HANGUL SYLLABLE NYE */
0xB178, /* LV # Lo HANGUL SYLLABLE NO */
0xB194, /* LV # Lo HANGUL SYLLABLE NWA */
0xB1B0, /* LV # Lo HANGUL SYLLABLE NWAE */
0xB1CC, /* LV # Lo HANGUL SYLLABLE NOE */
0xB1E8, /* LV # Lo HANGUL SYLLABLE NYO */
0xB204, /* LV # Lo HANGUL SYLLABLE NU */
0xB220, /* LV # Lo HANGUL SYLLABLE NWEO */
0xB23C, /* LV # Lo HANGUL SYLLABLE NWE */
0xB258, /* LV # Lo HANGUL SYLLABLE NWI */
0xB274, /* LV # Lo HANGUL SYLLABLE NYU */
0xB290, /* LV # Lo HANGUL SYLLABLE NEU */
0xB2AC, /* LV # Lo HANGUL SYLLABLE NYI */
0xB2C8, /* LV # Lo HANGUL SYLLABLE NI */
0xB2E4, /* LV # Lo HANGUL SYLLABLE DA */
0xB300, /* LV # Lo HANGUL SYLLABLE DAE */
0xB31C, /* LV # Lo HANGUL SYLLABLE DYA */
0xB338, /* LV # Lo HANGUL SYLLABLE DYAE */
0xB354, /* LV # Lo HANGUL SYLLABLE DEO */
0xB370, /* LV # Lo HANGUL SYLLABLE DE */
0xB38C, /* LV # Lo HANGUL SYLLABLE DYEO */
0xB3A8, /* LV # Lo HANGUL SYLLABLE DYE */
0xB3C4, /* LV # Lo HANGUL SYLLABLE DO */
0xB3E0, /* LV # Lo HANGUL SYLLABLE DWA */
0xB3FC, /* LV # Lo HANGUL SYLLABLE DWAE */
0xB418, /* LV # Lo HANGUL SYLLABLE DOE */
0xB434, /* LV # Lo HANGUL SYLLABLE DYO */
0xB450, /* LV # Lo HANGUL SYLLABLE DU */
0xB46C, /* LV # Lo HANGUL SYLLABLE DWEO */
0xB488, /* LV # Lo HANGUL SYLLABLE DWE */
0xB4A4, /* LV # Lo HANGUL SYLLABLE DWI */
0xB4C0, /* LV # Lo HANGUL SYLLABLE DYU */
0xB4DC, /* LV # Lo HANGUL SYLLABLE DEU */
0xB4F8, /* LV # Lo HANGUL SYLLABLE DYI */
0xB514, /* LV # Lo HANGUL SYLLABLE DI */
0xB530, /* LV # Lo HANGUL SYLLABLE DDA */
0xB54C, /* LV # Lo HANGUL SYLLABLE DDAE */
0xB568, /* LV # Lo HANGUL SYLLABLE DDYA */
0xB584, /* LV # Lo HANGUL SYLLABLE DDYAE */
0xB5A0, /* LV # Lo HANGUL SYLLABLE DDEO */
0xB5BC, /* LV # Lo HANGUL SYLLABLE DDE */
0xB5D8, /* LV # Lo HANGUL SYLLABLE DDYEO */
0xB5F4, /* LV # Lo HANGUL SYLLABLE DDYE */
0xB610, /* LV # Lo HANGUL SYLLABLE DDO */
0xB62C, /* LV # Lo HANGUL SYLLABLE DDWA */
0xB648, /* LV # Lo HANGUL SYLLABLE DDWAE */
0xB664, /* LV # Lo HANGUL SYLLABLE DDOE */
0xB680, /* LV # Lo HANGUL SYLLABLE DDYO */
0xB69C, /* LV # Lo HANGUL SYLLABLE DDU */
0xB6B8, /* LV # Lo HANGUL SYLLABLE DDWEO */
0xB6D4, /* LV # Lo HANGUL SYLLABLE DDWE */
0xB6F0, /* LV # Lo HANGUL SYLLABLE DDWI */
0xB70C, /* LV # Lo HANGUL SYLLABLE DDYU */
0xB728, /* LV # Lo HANGUL SYLLABLE DDEU */
0xB744, /* LV # Lo HANGUL SYLLABLE DDYI */
0xB760, /* LV # Lo HANGUL SYLLABLE DDI */
0xB77C, /* LV # Lo HANGUL SYLLABLE RA */
0xB798, /* LV # Lo HANGUL SYLLABLE RAE */
0xB7B4, /* LV # Lo HANGUL SYLLABLE RYA */
0xB7D0, /* LV # Lo HANGUL SYLLABLE RYAE */
0xB7EC, /* LV # Lo HANGUL SYLLABLE REO */
0xB808, /* LV # Lo HANGUL SYLLABLE RE */
0xB824, /* LV # Lo HANGUL SYLLABLE RYEO */
0xB840, /* LV # Lo HANGUL SYLLABLE RYE */
0xB85C, /* LV # Lo HANGUL SYLLABLE RO */
0xB878, /* LV # Lo HANGUL SYLLABLE RWA */
0xB894, /* LV # Lo HANGUL SYLLABLE RWAE */
0xB8B0, /* LV # Lo HANGUL SYLLABLE ROE */
0xB8CC, /* LV # Lo HANGUL SYLLABLE RYO */
0xB8E8, /* LV # Lo HANGUL SYLLABLE RU */
0xB904, /* LV # Lo HANGUL SYLLABLE RWEO */
0xB920, /* LV # Lo HANGUL SYLLABLE RWE */
0xB93C, /* LV # Lo HANGUL SYLLABLE RWI */
0xB958, /* LV # Lo HANGUL SYLLABLE RYU */
0xB974, /* LV # Lo HANGUL SYLLABLE REU */
0xB990, /* LV # Lo HANGUL SYLLABLE RYI */
0xB9AC, /* LV # Lo HANGUL SYLLABLE RI */
0xB9C8, /* LV # Lo HANGUL SYLLABLE MA */
0xB9E4, /* LV # Lo HANGUL SYLLABLE MAE */
0xBA00, /* LV # Lo HANGUL SYLLABLE MYA */
0xBA1C, /* LV # Lo HANGUL SYLLABLE MYAE */
0xBA38, /* LV # Lo HANGUL SYLLABLE MEO */
0xBA54, /* LV # Lo HANGUL SYLLABLE ME */
0xBA70, /* LV # Lo HANGUL SYLLABLE MYEO */
0xBA8C, /* LV # Lo HANGUL SYLLABLE MYE */
0xBAA8, /* LV # Lo HANGUL SYLLABLE MO */
0xBAC4, /* LV # Lo HANGUL SYLLABLE MWA */
0xBAE0, /* LV # Lo HANGUL SYLLABLE MWAE */
0xBAFC, /* LV # Lo HANGUL SYLLABLE MOE */
0xBB18, /* LV # Lo HANGUL SYLLABLE MYO */
0xBB34, /* LV # Lo HANGUL SYLLABLE MU */
0xBB50, /* LV # Lo HANGUL SYLLABLE MWEO */
0xBB6C, /* LV # Lo HANGUL SYLLABLE MWE */
0xBB88, /* LV # Lo HANGUL SYLLABLE MWI */
0xBBA4, /* LV # Lo HANGUL SYLLABLE MYU */
0xBBC0, /* LV # Lo HANGUL SYLLABLE MEU */
0xBBDC, /* LV # Lo HANGUL SYLLABLE MYI */
0xBBF8, /* LV # Lo HANGUL SYLLABLE MI */
0xBC14, /* LV # Lo HANGUL SYLLABLE BA */
0xBC30, /* LV # Lo HANGUL SYLLABLE BAE */
0xBC4C, /* LV # Lo HANGUL SYLLABLE BYA */
0xBC68, /* LV # Lo HANGUL SYLLABLE BYAE */
0xBC84, /* LV # Lo HANGUL SYLLABLE BEO */
0xBCA0, /* LV # Lo HANGUL SYLLABLE BE */
0xBCBC, /* LV # Lo HANGUL SYLLABLE BYEO */
0xBCD8, /* LV # Lo HANGUL SYLLABLE BYE */
0xBCF4, /* LV # Lo HANGUL SYLLABLE BO */
0xBD10, /* LV # Lo HANGUL SYLLABLE BWA */
0xBD2C, /* LV # Lo HANGUL SYLLABLE BWAE */
0xBD48, /* LV # Lo HANGUL SYLLABLE BOE */
0xBD64, /* LV # Lo HANGUL SYLLABLE BYO */
0xBD80, /* LV # Lo HANGUL SYLLABLE BU */
0xBD9C, /* LV # Lo HANGUL SYLLABLE BWEO */
0xBDB8, /* LV # Lo HANGUL SYLLABLE BWE */
0xBDD4, /* LV # Lo HANGUL SYLLABLE BWI */
0xBDF0, /* LV # Lo HANGUL SYLLABLE BYU */
0xBE0C, /* LV # Lo HANGUL SYLLABLE BEU */
0xBE28, /* LV # Lo HANGUL SYLLABLE BYI */
0xBE44, /* LV # Lo HANGUL SYLLABLE BI */
0xBE60, /* LV # Lo HANGUL SYLLABLE BBA */
0xBE7C, /* LV # Lo HANGUL SYLLABLE BBAE */
0xBE98, /* LV # Lo HANGUL SYLLABLE BBYA */
0xBEB4, /* LV # Lo HANGUL SYLLABLE BBYAE */
0xBED0, /* LV # Lo HANGUL SYLLABLE BBEO */
0xBEEC, /* LV # Lo HANGUL SYLLABLE BBE */
0xBF08, /* LV # Lo HANGUL SYLLABLE BBYEO */
0xBF24, /* LV # Lo HANGUL SYLLABLE BBYE */
0xBF40, /* LV # Lo HANGUL SYLLABLE BBO */
0xBF5C, /* LV # Lo HANGUL SYLLABLE BBWA */
0xBF78, /* LV # Lo HANGUL SYLLABLE BBWAE */
0xBF94, /* LV # Lo HANGUL SYLLABLE BBOE */
0xBFB0, /* LV # Lo HANGUL SYLLABLE BBYO */
0xBFCC, /* LV # Lo HANGUL SYLLABLE BBU */
0xBFE8, /* LV # Lo HANGUL SYLLABLE BBWEO */
0xC004, /* LV # Lo HANGUL SYLLABLE BBWE */
0xC020, /* LV # Lo HANGUL SYLLABLE BBWI */
0xC03C, /* LV # Lo HANGUL SYLLABLE BBYU */
0xC058, /* LV # Lo HANGUL SYLLABLE BBEU */
0xC074, /* LV # Lo HANGUL SYLLABLE BBYI */
0xC090, /* LV # Lo HANGUL SYLLABLE BBI */
0xC0AC, /* LV # Lo HANGUL SYLLABLE SA */
0xC0C8, /* LV # Lo HANGUL SYLLABLE SAE */
0xC0E4, /* LV # Lo HANGUL SYLLABLE SYA */
0xC100, /* LV # Lo HANGUL SYLLABLE SYAE */
0xC11C, /* LV # Lo HANGUL SYLLABLE SEO */
0xC138, /* LV # Lo HANGUL SYLLABLE SE */
0xC154, /* LV # Lo HANGUL SYLLABLE SYEO */
0xC170, /* LV # Lo HANGUL SYLLABLE SYE */
0xC18C, /* LV # Lo HANGUL SYLLABLE SO */
0xC1A8, /* LV # Lo HANGUL SYLLABLE SWA */
0xC1C4, /* LV # Lo HANGUL SYLLABLE SWAE */
0xC1E0, /* LV # Lo HANGUL SYLLABLE SOE */
0xC1FC, /* LV # Lo HANGUL SYLLABLE SYO */
0xC218, /* LV # Lo HANGUL SYLLABLE SU */
0xC234, /* LV # Lo HANGUL SYLLABLE SWEO */
0xC250, /* LV # Lo HANGUL SYLLABLE SWE */
0xC26C, /* LV # Lo HANGUL SYLLABLE SWI */
0xC288, /* LV # Lo HANGUL SYLLABLE SYU */
0xC2A4, /* LV # Lo HANGUL SYLLABLE SEU */
0xC2C0, /* LV # Lo HANGUL SYLLABLE SYI */
0xC2DC, /* LV # Lo HANGUL SYLLABLE SI */
0xC2F8, /* LV # Lo HANGUL SYLLABLE SSA */
0xC314, /* LV # Lo HANGUL SYLLABLE SSAE */
0xC330, /* LV # Lo HANGUL SYLLABLE SSYA */
0xC34C, /* LV # Lo HANGUL SYLLABLE SSYAE */
0xC368, /* LV # Lo HANGUL SYLLABLE SSEO */
0xC384, /* LV # Lo HANGUL SYLLABLE SSE */
0xC3A0, /* LV # Lo HANGUL SYLLABLE SSYEO */
0xC3BC, /* LV # Lo HANGUL SYLLABLE SSYE */
0xC3D8, /* LV # Lo HANGUL SYLLABLE SSO */
0xC3F4, /* LV # Lo HANGUL SYLLABLE SSWA */
0xC410, /* LV # Lo HANGUL SYLLABLE SSWAE */
0xC42C, /* LV # Lo HANGUL SYLLABLE SSOE */
0xC448, /* LV # Lo HANGUL SYLLABLE SSYO */
0xC464, /* LV # Lo HANGUL SYLLABLE SSU */
0xC480, /* LV # Lo HANGUL SYLLABLE SSWEO */
0xC49C, /* LV # Lo HANGUL SYLLABLE SSWE */
0xC4B8, /* LV # Lo HANGUL SYLLABLE SSWI */
0xC4D4, /* LV # Lo HANGUL SYLLABLE SSYU */
0xC4F0, /* LV # Lo HANGUL SYLLABLE SSEU */
0xC50C, /* LV # Lo HANGUL SYLLABLE SSYI */
0xC528, /* LV # Lo HANGUL SYLLABLE SSI */
0xC544, /* LV # Lo HANGUL SYLLABLE A */
0xC560, /* LV # Lo HANGUL SYLLABLE AE */
0xC57C, /* LV # Lo HANGUL SYLLABLE YA */
0xC598, /* LV # Lo HANGUL SYLLABLE YAE */
0xC5B4, /* LV # Lo HANGUL SYLLABLE EO */
0xC5D0, /* LV # Lo HANGUL SYLLABLE E */
0xC5EC, /* LV # Lo HANGUL SYLLABLE YEO */
0xC608, /* LV # Lo HANGUL SYLLABLE YE */
0xC624, /* LV # Lo HANGUL SYLLABLE O */
0xC640, /* LV # Lo HANGUL SYLLABLE WA */
0xC65C, /* LV # Lo HANGUL SYLLABLE WAE */
0xC678, /* LV # Lo HANGUL SYLLABLE OE */
0xC694, /* LV # Lo HANGUL SYLLABLE YO */
0xC6B0, /* LV # Lo HANGUL SYLLABLE U */
0xC6CC, /* LV # Lo HANGUL SYLLABLE WEO */
0xC6E8, /* LV # Lo HANGUL SYLLABLE WE */
0xC704, /* LV # Lo HANGUL SYLLABLE WI */
0xC720, /* LV # Lo HANGUL SYLLABLE YU */
0xC73C, /* LV # Lo HANGUL SYLLABLE EU */
0xC758, /* LV # Lo HANGUL SYLLABLE YI */
0xC774, /* LV # Lo HANGUL SYLLABLE I */
0xC790, /* LV # Lo HANGUL SYLLABLE JA */
0xC7AC, /* LV # Lo HANGUL SYLLABLE JAE */
0xC7C8, /* LV # Lo HANGUL SYLLABLE JYA */
0xC7E4, /* LV # Lo HANGUL SYLLABLE JYAE */
0xC800, /* LV # Lo HANGUL SYLLABLE JEO */
0xC81C, /* LV # Lo HANGUL SYLLABLE JE */
0xC838, /* LV # Lo HANGUL SYLLABLE JYEO */
0xC854, /* LV # Lo HANGUL SYLLABLE JYE */
0xC870, /* LV # Lo HANGUL SYLLABLE JO */
0xC88C, /* LV # Lo HANGUL SYLLABLE JWA */
0xC8A8, /* LV # Lo HANGUL SYLLABLE JWAE */
0xC8C4, /* LV # Lo HANGUL SYLLABLE JOE */
0xC8E0, /* LV # Lo HANGUL SYLLABLE JYO */
0xC8FC, /* LV # Lo HANGUL SYLLABLE JU */
0xC918, /* LV # Lo HANGUL SYLLABLE JWEO */
0xC934, /* LV # Lo HANGUL SYLLABLE JWE */
0xC950, /* LV # Lo HANGUL SYLLABLE JWI */
0xC96C, /* LV # Lo HANGUL SYLLABLE JYU */
0xC988, /* LV # Lo HANGUL SYLLABLE JEU */
0xC9A4, /* LV # Lo HANGUL SYLLABLE JYI */
0xC9C0, /* LV # Lo HANGUL SYLLABLE JI */
0xC9DC, /* LV # Lo HANGUL SYLLABLE JJA */
0xC9F8, /* LV # Lo HANGUL SYLLABLE JJAE */
0xCA14, /* LV # Lo HANGUL SYLLABLE JJYA */
0xCA30, /* LV # Lo HANGUL SYLLABLE JJYAE */
0xCA4C, /* LV # Lo HANGUL SYLLABLE JJEO */
0xCA68, /* LV # Lo HANGUL SYLLABLE JJE */
0xCA84, /* LV # Lo HANGUL SYLLABLE JJYEO */
0xCAA0, /* LV # Lo HANGUL SYLLABLE JJYE */
0xCABC, /* LV # Lo HANGUL SYLLABLE JJO */
0xCAD8, /* LV # Lo HANGUL SYLLABLE JJWA */
0xCAF4, /* LV # Lo HANGUL SYLLABLE JJWAE */
0xCB10, /* LV # Lo HANGUL SYLLABLE JJOE */
0xCB2C, /* LV # Lo HANGUL SYLLABLE JJYO */
0xCB48, /* LV # Lo HANGUL SYLLABLE JJU */
0xCB64, /* LV # Lo HANGUL SYLLABLE JJWEO */
0xCB80, /* LV # Lo HANGUL SYLLABLE JJWE */
0xCB9C, /* LV # Lo HANGUL SYLLABLE JJWI */
0xCBB8, /* LV # Lo HANGUL SYLLABLE JJYU */
0xCBD4, /* LV # Lo HANGUL SYLLABLE JJEU */
0xCBF0, /* LV # Lo HANGUL SYLLABLE JJYI */
0xCC0C, /* LV # Lo HANGUL SYLLABLE JJI */
0xCC28, /* LV # Lo HANGUL SYLLABLE CA */
0xCC44, /* LV # Lo HANGUL SYLLABLE CAE */
0xCC60, /* LV # Lo HANGUL SYLLABLE CYA */
0xCC7C, /* LV # Lo HANGUL SYLLABLE CYAE */
0xCC98, /* LV # Lo HANGUL SYLLABLE CEO */
0xCCB4, /* LV # Lo HANGUL SYLLABLE CE */
0xCCD0, /* LV # Lo HANGUL SYLLABLE CYEO */
0xCCEC, /* LV # Lo HANGUL SYLLABLE CYE */
0xCD08, /* LV # Lo HANGUL SYLLABLE CO */
0xCD24, /* LV # Lo HANGUL SYLLABLE CWA */
0xCD40, /* LV # Lo HANGUL SYLLABLE CWAE */
0xCD5C, /* LV # Lo HANGUL SYLLABLE COE */
0xCD78, /* LV # Lo HANGUL SYLLABLE CYO */
0xCD94, /* LV # Lo HANGUL SYLLABLE CU */
0xCDB0, /* LV # Lo HANGUL SYLLABLE CWEO */
0xCDCC, /* LV # Lo HANGUL SYLLABLE CWE */
0xCDE8, /* LV # Lo HANGUL SYLLABLE CWI */
0xCE04, /* LV # Lo HANGUL SYLLABLE CYU */
0xCE20, /* LV # Lo HANGUL SYLLABLE CEU */
0xCE3C, /* LV # Lo HANGUL SYLLABLE CYI */
0xCE58, /* LV # Lo HANGUL SYLLABLE CI */
0xCE74, /* LV # Lo HANGUL SYLLABLE KA */
0xCE90, /* LV # Lo HANGUL SYLLABLE KAE */
0xCEAC, /* LV # Lo HANGUL SYLLABLE KYA */
0xCEC8, /* LV # Lo HANGUL SYLLABLE KYAE */
0xCEE4, /* LV # Lo HANGUL SYLLABLE KEO */
0xCF00, /* LV # Lo HANGUL SYLLABLE KE */
0xCF1C, /* LV # Lo HANGUL SYLLABLE KYEO */
0xCF38, /* LV # Lo HANGUL SYLLABLE KYE */
0xCF54, /* LV # Lo HANGUL SYLLABLE KO */
0xCF70, /* LV # Lo HANGUL SYLLABLE KWA */
0xCF8C, /* LV # Lo HANGUL SYLLABLE KWAE */
0xCFA8, /* LV # Lo HANGUL SYLLABLE KOE */
0xCFC4, /* LV # Lo HANGUL SYLLABLE KYO */
0xCFE0, /* LV # Lo HANGUL SYLLABLE KU */
0xCFFC, /* LV # Lo HANGUL SYLLABLE KWEO */
0xD018, /* LV # Lo HANGUL SYLLABLE KWE */
0xD034, /* LV # Lo HANGUL SYLLABLE KWI */
0xD050, /* LV # Lo HANGUL SYLLABLE KYU */
0xD06C, /* LV # Lo HANGUL SYLLABLE KEU */
0xD088, /* LV # Lo HANGUL SYLLABLE KYI */
0xD0A4, /* LV # Lo HANGUL SYLLABLE KI */
0xD0C0, /* LV # Lo HANGUL SYLLABLE TA */
0xD0DC, /* LV # Lo HANGUL SYLLABLE TAE */
0xD0F8, /* LV # Lo HANGUL SYLLABLE TYA */
0xD114, /* LV # Lo HANGUL SYLLABLE TYAE */
0xD130, /* LV # Lo HANGUL SYLLABLE TEO */
0xD14C, /* LV # Lo HANGUL SYLLABLE TE */
0xD168, /* LV # Lo HANGUL SYLLABLE TYEO */
0xD184, /* LV # Lo HANGUL SYLLABLE TYE */
0xD1A0, /* LV # Lo HANGUL SYLLABLE TO */
0xD1BC, /* LV # Lo HANGUL SYLLABLE TWA */
0xD1D8, /* LV # Lo HANGUL SYLLABLE TWAE */
0xD1F4, /* LV # Lo HANGUL SYLLABLE TOE */
0xD210, /* LV # Lo HANGUL SYLLABLE TYO */
0xD22C, /* LV # Lo HANGUL SYLLABLE TU */
0xD248, /* LV # Lo HANGUL SYLLABLE TWEO */
0xD264, /* LV # Lo HANGUL SYLLABLE TWE */
0xD280, /* LV # Lo HANGUL SYLLABLE TWI */
0xD29C, /* LV # Lo HANGUL SYLLABLE TYU */
0xD2B8, /* LV # Lo HANGUL SYLLABLE TEU */
0xD2D4, /* LV # Lo HANGUL SYLLABLE TYI */
0xD2F0, /* LV # Lo HANGUL SYLLABLE TI */
0xD30C, /* LV # Lo HANGUL SYLLABLE PA */
0xD328, /* LV # Lo HANGUL SYLLABLE PAE */
0xD344, /* LV # Lo HANGUL SYLLABLE PYA */
0xD360, /* LV # Lo HANGUL SYLLABLE PYAE */
0xD37C, /* LV # Lo HANGUL SYLLABLE PEO */
0xD398, /* LV # Lo HANGUL SYLLABLE PE */
0xD3B4, /* LV # Lo HANGUL SYLLABLE PYEO */
0xD3D0, /* LV # Lo HANGUL SYLLABLE PYE */
0xD3EC, /* LV # Lo HANGUL SYLLABLE PO */
0xD408, /* LV # Lo HANGUL SYLLABLE PWA */
0xD424, /* LV # Lo HANGUL SYLLABLE PWAE */
0xD440, /* LV # Lo HANGUL SYLLABLE POE */
0xD45C, /* LV # Lo HANGUL SYLLABLE PYO */
0xD478, /* LV # Lo HANGUL SYLLABLE PU */
0xD494, /* LV # Lo HANGUL SYLLABLE PWEO */
0xD4B0, /* LV # Lo HANGUL SYLLABLE PWE */
0xD4CC, /* LV # Lo HANGUL SYLLABLE PWI */
0xD4E8, /* LV # Lo HANGUL SYLLABLE PYU */
0xD504, /* LV # Lo HANGUL SYLLABLE PEU */
0xD520, /* LV # Lo HANGUL SYLLABLE PYI */
0xD53C, /* LV # Lo HANGUL SYLLABLE PI */
0xD558, /* LV # Lo HANGUL SYLLABLE HA */
0xD574, /* LV # Lo HANGUL SYLLABLE HAE */
0xD590, /* LV # Lo HANGUL SYLLABLE HYA */
0xD5AC, /* LV # Lo HANGUL SYLLABLE HYAE */
0xD5C8, /* LV # Lo HANGUL SYLLABLE HEO */
0xD5E4, /* LV # Lo HANGUL SYLLABLE HE */
0xD600, /* LV # Lo HANGUL SYLLABLE HYEO */
0xD61C, /* LV # Lo HANGUL SYLLABLE HYE */
0xD638, /* LV # Lo HANGUL SYLLABLE HO */
0xD654, /* LV # Lo HANGUL SYLLABLE HWA */
0xD670, /* LV # Lo HANGUL SYLLABLE HWAE */
0xD68C, /* LV # Lo HANGUL SYLLABLE HOE */
0xD6A8, /* LV # Lo HANGUL SYLLABLE HYO */
0xD6C4, /* LV # Lo HANGUL SYLLABLE HU */
0xD6E0, /* LV # Lo HANGUL SYLLABLE HWEO */
0xD6FC, /* LV # Lo HANGUL SYLLABLE HWE */
0xD718, /* LV # Lo HANGUL SYLLABLE HWI */
0xD734, /* LV # Lo HANGUL SYLLABLE HYU */
0xD750, /* LV # Lo HANGUL SYLLABLE HEU */
0xD76C, /* LV # Lo HANGUL SYLLABLE HYI */
0xD788, /* LV # Lo HANGUL SYLLABLE HI */
0
};
static const uint32_t pr29_13_2[] = {
0x11A8, 0x11A9, 0x11AA, 0x11AB, 0x11AC, 0x11AD, 0x11AE, 0x11AF,
0x11B0, 0x11B1, 0x11B2, 0x11B3, 0x11B4, 0x11B5, 0x11B6, 0x11B7,
0x11B8, 0x11B9, 0x11BA, 0x11BB, 0x11BC, 0x11BD, 0x11BE, 0x11BF,
0x11C0, 0x11C1, 0x11C2, 0
};
typedef struct
{
const uint32_t *first;
const uint32_t *last;
} Pr29;
static const Pr29 pr29[] = {
{&pr29_1_1[0], &pr29_1_2[0]},
{&pr29_2_1[0], &pr29_2_2[0]},
{&pr29_3_1[0], &pr29_3_2[0]},
{&pr29_4_1[0], &pr29_4_2[0]},
{&pr29_5_1[0], &pr29_5_2[0]},
{&pr29_6_1[0], &pr29_6_2[0]},
{&pr29_7_1[0], &pr29_7_2[0]},
{&pr29_8_1[0], &pr29_8_2[0]},
{&pr29_9_1[0], &pr29_9_2[0]},
{&pr29_10_1[0], &pr29_10_2[0]},
{&pr29_11_1[0], &pr29_11_2[0]},
{&pr29_12_1[0], &pr29_12_2[0]},
{&pr29_13_1[0], &pr29_13_2[0]},
{NULL, NULL}
};
static size_t
first_column (uint32_t c)
{
size_t i, j;
for (i = 0; pr29[i].first; i++)
for (j = 0; pr29[i].first[j]; j++)
if (c == pr29[i].first[j])
return i + 1;
return 0;
}
static int
in_last_column_row (uint32_t c, size_t row)
{
size_t i;
for (i = 0; pr29[row - 1].last[i]; i++)
if (c == pr29[row - 1].last[i])
return 1;
return 0;
}
static size_t
combinationclass (uint32_t c)
{
size_t i;
for (i = 0; nzcc[i]; i++)
if (c == nzcc[i])
return i + 1;
return 0;
}
/**
* pr29_4:
* @in: input array with unicode code points.
* @len: length of input array with unicode code points.
*
* Check the input to see if it may be normalized into different
* strings by different NFKC implementations, due to an anomaly in the
* NFKC specifications.
*
* Return value: Returns the #Pr29_rc value %PR29_SUCCESS on success,
* and %PR29_PROBLEM if the input sequence is a "problem sequence"
* (i.e., may be normalized into different strings by different
* implementations).
**/
int
pr29_4 (const uint32_t * in, size_t len)
{
size_t i, j, k, row;
/*
* The problem sequence are of the form:
*
* first_character intervening_character+ last_character
*
* where the first_character and last_character come from the same
* row in the following table, and there is at least one
* intervening_character with non-zero Canonical Combining
* Class. (The '+' above means one or more occurrences.)
*
*/
for (i = 0; i < len; i++)
if ((row = first_column (in[i])) > 0)
for (j = i + 1; j < len; j++)
if (combinationclass (in[j]))
for (k = j + 1; k < len; j++)
if (in_last_column_row (in[k], row))
return PR29_PROBLEM;
return PR29_SUCCESS;
}
/**
* pr29_4z:
* @in: zero terminated array of Unicode code points.
*
* Check the input to see if it may be normalized into different
* strings by different NFKC implementations, due to an anomaly in the
* NFKC specifications.
*
* Return value: Returns the #Pr29_rc value %PR29_SUCCESS on success,
* and %PR29_PROBLEM if the input sequence is a "problem sequence"
* (i.e., may be normalized into different strings by different
* implementations).
**/
int
pr29_4z (const uint32_t * in)
{
size_t len;
for (len = 0; in[len]; len++)
;
return pr29_4 (in, len);
}
/**
* pr29_8z:
* @in: zero terminated input UTF-8 string.
*
* Check the input to see if it may be normalized into different
* strings by different NFKC implementations, due to an anomaly in the
* NFKC specifications.
*
* Return value: Returns the #Pr29_rc value %PR29_SUCCESS on success,
* and %PR29_PROBLEM if the input sequence is a "problem sequence"
* (i.e., may be normalized into different strings by different
* implementations), or %PR29_STRINGPREP_ERROR if there was a
* problem converting the string from UTF-8 to UCS-4.
**/
int
pr29_8z (const char *in)
{
uint32_t *p;
int rc;
p = stringprep_utf8_to_ucs4 (in, -1, NULL);
if (!p)
return PR29_STRINGPREP_ERROR;
rc = pr29_4z (p);
free (p);
return rc;
}
/**
* Pr29_rc:
* @PR29_SUCCESS: Successful operation. This value is guaranteed to
* always be zero, the remaining ones are only guaranteed to hold
* non-zero values, for logical comparison purposes.
* @PR29_PROBLEM: A problem sequence was encountered.
* @PR29_STRINGPREP_ERROR: The character set conversion failed (only
* for pr29_8z()).
*
* Enumerated return codes for pr29_4(), pr29_4z(), pr29_8z(). The
* value 0 is guaranteed to always correspond to success.
*/
|