#include #include "utf8.h" #define YYCTYPE unsigned char bool scan(const YYCTYPE * start, const YYCTYPE * const limit) { __attribute__((unused)) const YYCTYPE * YYMARKER; // silence compiler warnings when YYMARKER is not used # define YYCURSOR start M: /*!re2c re2c:yyfill:enable = 0; M = [\u0300-\u036f\u0483-\u0489\u0591-\u05bd\u05bf-\u05bf\u05c1-\u05c2\u05c4-\u05c5\u05c7-\u05c7\u0610-\u061a\u064b-\u065f\u0670-\u0670\u06d6-\u06dc\u06df-\u06e4\u06e7-\u06e8\u06ea-\u06ed\u0711-\u0711\u0730-\u074a\u07a6-\u07b0\u07eb-\u07f3\u0816-\u0819\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0859-\u085b\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0981-\u0983\u09bc-\u09bc\u09be-\u09c4\u09c7-\u09c8\u09cb-\u09cd\u09d7-\u09d7\u09e2-\u09e3\u0a01-\u0a03\u0a3c-\u0a3c\u0a3e-\u0a42\u0a47-\u0a48\u0a4b-\u0a4d\u0a51-\u0a51\u0a70-\u0a71\u0a75-\u0a75\u0a81-\u0a83\u0abc-\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2-\u0ae3\u0b01-\u0b03\u0b3c-\u0b3c\u0b3e-\u0b44\u0b47-\u0b48\u0b4b-\u0b4d\u0b56-\u0b57\u0b62-\u0b63\u0b82-\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7-\u0bd7\u0c01-\u0c03\u0c3e-\u0c44\u0c46-\u0c48\u0c4a-\u0c4d\u0c55-\u0c56\u0c62-\u0c63\u0c82-\u0c83\u0cbc-\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5-\u0cd6\u0ce2-\u0ce3\u0d02-\u0d03\u0d3e-\u0d44\u0d46-\u0d48\u0d4a-\u0d4d\u0d57-\u0d57\u0d62-\u0d63\u0d82-\u0d83\u0dca-\u0dca\u0dcf-\u0dd4\u0dd6-\u0dd6\u0dd8-\u0ddf\u0df2-\u0df3\u0e31-\u0e31\u0e34-\u0e3a\u0e47-\u0e4e\u0eb1-\u0eb1\u0eb4-\u0eb9\u0ebb-\u0ebc\u0ec8-\u0ecd\u0f18-\u0f19\u0f35-\u0f35\u0f37-\u0f37\u0f39-\u0f39\u0f3e-\u0f3f\u0f71-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6-\u0fc6\u102b-\u103e\u1056-\u1059\u105e-\u1060\u1062-\u1064\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u108f\u109a-\u109d\u135d-\u135f\u1712-\u1714\u1732-\u1734\u1752-\u1753\u1772-\u1773\u17b6-\u17d3\u17dd-\u17dd\u180b-\u180d\u18a9-\u18a9\u1920-\u192b\u1930-\u193b\u19b0-\u19c0\u19c8-\u19c9\u1a17-\u1a1b\u1a55-\u1a5e\u1a60-\u1a7c\u1a7f-\u1a7f\u1b00-\u1b04\u1b34-\u1b44\u1b6b-\u1b73\u1b80-\u1b82\u1ba1-\u1baa\u1be6-\u1bf3\u1c24-\u1c37\u1cd0-\u1cd2\u1cd4-\u1ce8\u1ced-\u1ced\u1cf2-\u1cf2\u1dc0-\u1de6\u1dfc-\u1dff\u20d0-\u20f0\u2cef-\u2cf1\u2d7f-\u2d7f\u2de0-\u2dff\u302a-\u302f\u3099-\u309a\ua66f-\ua672\ua67c-\ua67d\ua6f0-\ua6f1\ua802-\ua802\ua806-\ua806\ua80b-\ua80b\ua823-\ua827\ua880-\ua881\ua8b4-\ua8c4\ua8e0-\ua8f1\ua926-\ua92d\ua947-\ua953\ua980-\ua983\ua9b3-\ua9c0\uaa29-\uaa36\uaa43-\uaa43\uaa4c-\uaa4d\uaa7b-\uaa7b\uaab0-\uaab0\uaab2-\uaab4\uaab7-\uaab8\uaabe-\uaabf\uaac1-\uaac1\uabe3-\uabea\uabec-\uabed\ufb1e-\ufb1e\ufe00-\ufe0f\ufe20-\ufe26\U000101fd-\U000101fd\U00010a01-\U00010a03\U00010a05-\U00010a06\U00010a0c-\U00010a0f\U00010a38-\U00010a3a\U00010a3f-\U00010a3f\U00011000-\U00011002\U00011038-\U00011046\U00011080-\U00011082\U000110b0-\U000110ba\U0001d165-\U0001d169\U0001d16d-\U0001d172\U0001d17b-\U0001d182\U0001d185-\U0001d18b\U0001d1aa-\U0001d1ad\U0001d242-\U0001d244\U000e0100-\U000e01ef]; M { goto M; } * { return YYCURSOR == limit; } */ } static const unsigned int chars_M [] = {0x300,0x36f, 0x483,0x489, 0x591,0x5bd, 0x5bf,0x5bf, 0x5c1,0x5c2, 0x5c4,0x5c5, 0x5c7,0x5c7, 0x610,0x61a, 0x64b,0x65f, 0x670,0x670, 0x6d6,0x6dc, 0x6df,0x6e4, 0x6e7,0x6e8, 0x6ea,0x6ed, 0x711,0x711, 0x730,0x74a, 0x7a6,0x7b0, 0x7eb,0x7f3, 0x816,0x819, 0x81b,0x823, 0x825,0x827, 0x829,0x82d, 0x859,0x85b, 0x900,0x903, 0x93a,0x93c, 0x93e,0x94f, 0x951,0x957, 0x962,0x963, 0x981,0x983, 0x9bc,0x9bc, 0x9be,0x9c4, 0x9c7,0x9c8, 0x9cb,0x9cd, 0x9d7,0x9d7, 0x9e2,0x9e3, 0xa01,0xa03, 0xa3c,0xa3c, 0xa3e,0xa42, 0xa47,0xa48, 0xa4b,0xa4d, 0xa51,0xa51, 0xa70,0xa71, 0xa75,0xa75, 0xa81,0xa83, 0xabc,0xabc, 0xabe,0xac5, 0xac7,0xac9, 0xacb,0xacd, 0xae2,0xae3, 0xb01,0xb03, 0xb3c,0xb3c, 0xb3e,0xb44, 0xb47,0xb48, 0xb4b,0xb4d, 0xb56,0xb57, 0xb62,0xb63, 0xb82,0xb82, 0xbbe,0xbc2, 0xbc6,0xbc8, 0xbca,0xbcd, 0xbd7,0xbd7, 0xc01,0xc03, 0xc3e,0xc44, 0xc46,0xc48, 0xc4a,0xc4d, 0xc55,0xc56, 0xc62,0xc63, 0xc82,0xc83, 0xcbc,0xcbc, 0xcbe,0xcc4, 0xcc6,0xcc8, 0xcca,0xccd, 0xcd5,0xcd6, 0xce2,0xce3, 0xd02,0xd03, 0xd3e,0xd44, 0xd46,0xd48, 0xd4a,0xd4d, 0xd57,0xd57, 0xd62,0xd63, 0xd82,0xd83, 0xdca,0xdca, 0xdcf,0xdd4, 0xdd6,0xdd6, 0xdd8,0xddf, 0xdf2,0xdf3, 0xe31,0xe31, 0xe34,0xe3a, 0xe47,0xe4e, 0xeb1,0xeb1, 0xeb4,0xeb9, 0xebb,0xebc, 0xec8,0xecd, 0xf18,0xf19, 0xf35,0xf35, 0xf37,0xf37, 0xf39,0xf39, 0xf3e,0xf3f, 0xf71,0xf84, 0xf86,0xf87, 0xf8d,0xf97, 0xf99,0xfbc, 0xfc6,0xfc6, 0x102b,0x103e, 0x1056,0x1059, 0x105e,0x1060, 0x1062,0x1064, 0x1067,0x106d, 0x1071,0x1074, 0x1082,0x108d, 0x108f,0x108f, 0x109a,0x109d, 0x135d,0x135f, 0x1712,0x1714, 0x1732,0x1734, 0x1752,0x1753, 0x1772,0x1773, 0x17b6,0x17d3, 0x17dd,0x17dd, 0x180b,0x180d, 0x18a9,0x18a9, 0x1920,0x192b, 0x1930,0x193b, 0x19b0,0x19c0, 0x19c8,0x19c9, 0x1a17,0x1a1b, 0x1a55,0x1a5e, 0x1a60,0x1a7c, 0x1a7f,0x1a7f, 0x1b00,0x1b04, 0x1b34,0x1b44, 0x1b6b,0x1b73, 0x1b80,0x1b82, 0x1ba1,0x1baa, 0x1be6,0x1bf3, 0x1c24,0x1c37, 0x1cd0,0x1cd2, 0x1cd4,0x1ce8, 0x1ced,0x1ced, 0x1cf2,0x1cf2, 0x1dc0,0x1de6, 0x1dfc,0x1dff, 0x20d0,0x20f0, 0x2cef,0x2cf1, 0x2d7f,0x2d7f, 0x2de0,0x2dff, 0x302a,0x302f, 0x3099,0x309a, 0xa66f,0xa672, 0xa67c,0xa67d, 0xa6f0,0xa6f1, 0xa802,0xa802, 0xa806,0xa806, 0xa80b,0xa80b, 0xa823,0xa827, 0xa880,0xa881, 0xa8b4,0xa8c4, 0xa8e0,0xa8f1, 0xa926,0xa92d, 0xa947,0xa953, 0xa980,0xa983, 0xa9b3,0xa9c0, 0xaa29,0xaa36, 0xaa43,0xaa43, 0xaa4c,0xaa4d, 0xaa7b,0xaa7b, 0xaab0,0xaab0, 0xaab2,0xaab4, 0xaab7,0xaab8, 0xaabe,0xaabf, 0xaac1,0xaac1, 0xabe3,0xabea, 0xabec,0xabed, 0xfb1e,0xfb1e, 0xfe00,0xfe0f, 0xfe20,0xfe26, 0x101fd,0x101fd, 0x10a01,0x10a03, 0x10a05,0x10a06, 0x10a0c,0x10a0f, 0x10a38,0x10a3a, 0x10a3f,0x10a3f, 0x11000,0x11002, 0x11038,0x11046, 0x11080,0x11082, 0x110b0,0x110ba, 0x1d165,0x1d169, 0x1d16d,0x1d172, 0x1d17b,0x1d182, 0x1d185,0x1d18b, 0x1d1aa,0x1d1ad, 0x1d242,0x1d244, 0xe0100,0xe01ef, 0x0,0x0}; static unsigned int encode_utf8 (const unsigned int * ranges, unsigned int ranges_count, unsigned char * s) { unsigned char * const s_start = s; for (unsigned int i = 0; i < ranges_count - 2; i += 2) for (unsigned int j = ranges[i]; j <= ranges[i + 1]; ++j) s += re2c::utf8::rune_to_bytes (s, j); re2c::utf8::rune_to_bytes (s, ranges[ranges_count - 1]); return s - s_start + 1; } int main () { YYCTYPE * buffer_M = new YYCTYPE [5996]; unsigned int buffer_len = encode_utf8 (chars_M, sizeof (chars_M) / sizeof (unsigned int), buffer_M); if (!scan (reinterpret_cast (buffer_M), reinterpret_cast (buffer_M + buffer_len))) printf("test 'M' failed\n"); delete [] buffer_M; return 0; }