|
|
|
@ -696,7 +696,7 @@ static const pg_unicode_properties unicode_opt_ascii[128] = |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
/* table of Unicode codepoint ranges and their categories */ |
|
|
|
|
static const pg_category_range unicode_categories[3302] = |
|
|
|
|
static const pg_category_range unicode_categories[3368] = |
|
|
|
|
{ |
|
|
|
|
{0x000000, 0x00001f, PG_U_CONTROL}, |
|
|
|
|
{0x000020, 0x000020, PG_U_SPACE_SEPARATOR}, |
|
|
|
@ -1408,7 +1408,7 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x000888, 0x000888, PG_U_MODIFIER_SYMBOL}, |
|
|
|
|
{0x000889, 0x00088e, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x000890, 0x000891, PG_U_FORMAT}, |
|
|
|
|
{0x000898, 0x00089f, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x000897, 0x00089f, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x0008a0, 0x0008c8, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x0008c9, 0x0008c9, PG_U_MODIFIER_LETTER}, |
|
|
|
|
{0x0008ca, 0x0008e1, PG_U_NONSPACING_MARK}, |
|
|
|
@ -1887,12 +1887,13 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x001b42, 0x001b42, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x001b43, 0x001b44, PG_U_SPACING_MARK}, |
|
|
|
|
{0x001b45, 0x001b4c, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x001b4e, 0x001b4f, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x001b50, 0x001b59, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x001b5a, 0x001b60, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x001b61, 0x001b6a, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x001b6b, 0x001b73, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x001b74, 0x001b7c, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x001b7d, 0x001b7e, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x001b7d, 0x001b7f, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x001b80, 0x001b81, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x001b82, 0x001b82, PG_U_SPACING_MARK}, |
|
|
|
|
{0x001b83, 0x001ba0, PG_U_OTHER_LETTER}, |
|
|
|
@ -1927,6 +1928,8 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x001c78, 0x001c7d, PG_U_MODIFIER_LETTER}, |
|
|
|
|
{0x001c7e, 0x001c7f, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x001c80, 0x001c88, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x001c89, 0x001c89, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x001c8a, 0x001c8a, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x001c90, 0x001cba, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x001cbd, 0x001cbf, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x001cc0, 0x001cc7, PG_U_OTHER_PUNCTUATION}, |
|
|
|
@ -2378,7 +2381,7 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x00239b, 0x0023b3, PG_U_MATH_SYMBOL}, |
|
|
|
|
{0x0023b4, 0x0023db, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x0023dc, 0x0023e1, PG_U_MATH_SYMBOL}, |
|
|
|
|
{0x0023e2, 0x002426, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x0023e2, 0x002429, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x002440, 0x00244a, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x002460, 0x00249b, PG_U_OTHER_NUMBER}, |
|
|
|
|
{0x00249c, 0x0024e9, PG_U_OTHER_SYMBOL}, |
|
|
|
@ -2719,7 +2722,7 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x003192, 0x003195, PG_U_OTHER_NUMBER}, |
|
|
|
|
{0x003196, 0x00319f, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x0031a0, 0x0031bf, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x0031c0, 0x0031e3, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x0031c0, 0x0031e5, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x0031ef, 0x0031ef, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x0031f0, 0x0031ff, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x003200, 0x00321e, PG_U_OTHER_SYMBOL}, |
|
|
|
@ -2983,6 +2986,8 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x00a7c8, 0x00a7c8, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x00a7c9, 0x00a7c9, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x00a7ca, 0x00a7ca, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x00a7cb, 0x00a7cc, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x00a7cd, 0x00a7cd, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x00a7d0, 0x00a7d0, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x00a7d1, 0x00a7d1, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x00a7d3, 0x00a7d3, PG_U_LOWERCASE_LETTER}, |
|
|
|
@ -2991,6 +2996,9 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x00a7d7, 0x00a7d7, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x00a7d8, 0x00a7d8, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x00a7d9, 0x00a7d9, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x00a7da, 0x00a7da, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x00a7db, 0x00a7db, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x00a7dc, 0x00a7dc, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x00a7f2, 0x00a7f4, PG_U_MODIFIER_LETTER}, |
|
|
|
|
{0x00a7f5, 0x00a7f5, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x00a7f6, 0x00a7f6, PG_U_LOWERCASE_LETTER}, |
|
|
|
@ -3306,6 +3314,7 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x0105a3, 0x0105b1, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x0105b3, 0x0105b9, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x0105bb, 0x0105bc, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x0105c0, 0x0105f3, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x010600, 0x010736, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x010740, 0x010755, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x010760, 0x010767, PG_U_OTHER_LETTER}, |
|
|
|
@ -3376,12 +3385,23 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x010d00, 0x010d23, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x010d24, 0x010d27, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x010d30, 0x010d39, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x010d40, 0x010d49, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x010d4a, 0x010d4d, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x010d4e, 0x010d4e, PG_U_MODIFIER_LETTER}, |
|
|
|
|
{0x010d4f, 0x010d4f, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x010d50, 0x010d65, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x010d69, 0x010d6d, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x010d6e, 0x010d6e, PG_U_DASH_PUNCTUATION}, |
|
|
|
|
{0x010d6f, 0x010d6f, PG_U_MODIFIER_LETTER}, |
|
|
|
|
{0x010d70, 0x010d85, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x010d8e, 0x010d8f, PG_U_MATH_SYMBOL}, |
|
|
|
|
{0x010e60, 0x010e7e, PG_U_OTHER_NUMBER}, |
|
|
|
|
{0x010e80, 0x010ea9, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x010eab, 0x010eac, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x010ead, 0x010ead, PG_U_DASH_PUNCTUATION}, |
|
|
|
|
{0x010eb0, 0x010eb1, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x010efd, 0x010eff, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x010ec2, 0x010ec4, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x010efc, 0x010eff, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x010f00, 0x010f1c, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x010f1d, 0x010f26, PG_U_OTHER_NUMBER}, |
|
|
|
|
{0x010f27, 0x010f27, PG_U_OTHER_LETTER}, |
|
|
|
@ -3497,6 +3517,26 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x011362, 0x011363, PG_U_SPACING_MARK}, |
|
|
|
|
{0x011366, 0x01136c, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x011370, 0x011374, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x011380, 0x011389, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x01138b, 0x01138b, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x01138e, 0x01138e, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x011390, 0x0113b5, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x0113b7, 0x0113b7, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x0113b8, 0x0113ba, PG_U_SPACING_MARK}, |
|
|
|
|
{0x0113bb, 0x0113c0, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x0113c2, 0x0113c2, PG_U_SPACING_MARK}, |
|
|
|
|
{0x0113c5, 0x0113c5, PG_U_SPACING_MARK}, |
|
|
|
|
{0x0113c7, 0x0113ca, PG_U_SPACING_MARK}, |
|
|
|
|
{0x0113cc, 0x0113cd, PG_U_SPACING_MARK}, |
|
|
|
|
{0x0113ce, 0x0113ce, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x0113cf, 0x0113cf, PG_U_SPACING_MARK}, |
|
|
|
|
{0x0113d0, 0x0113d0, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x0113d1, 0x0113d1, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x0113d2, 0x0113d2, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x0113d3, 0x0113d3, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x0113d4, 0x0113d5, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x0113d7, 0x0113d8, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x0113e1, 0x0113e2, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x011400, 0x011434, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x011435, 0x011437, PG_U_SPACING_MARK}, |
|
|
|
|
{0x011438, 0x01143f, PG_U_NONSPACING_MARK}, |
|
|
|
@ -3556,8 +3596,11 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x0116b8, 0x0116b8, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x0116b9, 0x0116b9, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x0116c0, 0x0116c9, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x0116d0, 0x0116e3, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x011700, 0x01171a, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x01171d, 0x01171f, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x01171d, 0x01171d, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x01171e, 0x01171e, PG_U_SPACING_MARK}, |
|
|
|
|
{0x01171f, 0x01171f, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x011720, 0x011721, PG_U_SPACING_MARK}, |
|
|
|
|
{0x011722, 0x011725, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x011726, 0x011726, PG_U_SPACING_MARK}, |
|
|
|
@ -3627,6 +3670,9 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x011a9e, 0x011aa2, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x011ab0, 0x011af8, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x011b00, 0x011b09, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x011bc0, 0x011be0, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x011be1, 0x011be1, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x011bf0, 0x011bf9, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x011c00, 0x011c08, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x011c0a, 0x011c2e, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x011c2f, 0x011c2f, PG_U_SPACING_MARK}, |
|
|
|
@ -3685,6 +3731,7 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x011f42, 0x011f42, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x011f43, 0x011f4f, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x011f50, 0x011f59, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x011f5a, 0x011f5a, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x011fb0, 0x011fb0, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x011fc0, 0x011fd4, PG_U_OTHER_NUMBER}, |
|
|
|
|
{0x011fd5, 0x011fdc, PG_U_OTHER_SYMBOL}, |
|
|
|
@ -3702,7 +3749,13 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x013440, 0x013440, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x013441, 0x013446, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x013447, 0x013455, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x013460, 0x0143fa, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x014400, 0x014646, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x016100, 0x01611d, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x01611e, 0x016129, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x01612a, 0x01612c, PG_U_SPACING_MARK}, |
|
|
|
|
{0x01612d, 0x01612f, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x016130, 0x016139, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x016800, 0x016a38, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x016a40, 0x016a5e, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x016a60, 0x016a69, PG_U_DECIMAL_NUMBER}, |
|
|
|
@ -3723,6 +3776,11 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x016b5b, 0x016b61, PG_U_OTHER_NUMBER}, |
|
|
|
|
{0x016b63, 0x016b77, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x016b7d, 0x016b8f, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x016d40, 0x016d42, PG_U_MODIFIER_LETTER}, |
|
|
|
|
{0x016d43, 0x016d6a, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x016d6b, 0x016d6c, PG_U_MODIFIER_LETTER}, |
|
|
|
|
{0x016d6d, 0x016d6f, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x016d70, 0x016d79, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x016e40, 0x016e5f, PG_U_UPPERCASE_LETTER}, |
|
|
|
|
{0x016e60, 0x016e7f, PG_U_LOWERCASE_LETTER}, |
|
|
|
|
{0x016e80, 0x016e96, PG_U_OTHER_NUMBER}, |
|
|
|
@ -3740,7 +3798,7 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x016ff0, 0x016ff1, PG_U_SPACING_MARK}, |
|
|
|
|
{0x017000, 0x0187f7, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x018800, 0x018cd5, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x018d00, 0x018d08, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x018cff, 0x018d08, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x01aff0, 0x01aff3, PG_U_MODIFIER_LETTER}, |
|
|
|
|
{0x01aff5, 0x01affb, PG_U_MODIFIER_LETTER}, |
|
|
|
|
{0x01affd, 0x01affe, PG_U_MODIFIER_LETTER}, |
|
|
|
@ -3758,6 +3816,9 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x01bc9d, 0x01bc9e, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x01bc9f, 0x01bc9f, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x01bca0, 0x01bca3, PG_U_FORMAT}, |
|
|
|
|
{0x01cc00, 0x01ccef, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01ccf0, 0x01ccf9, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x01cd00, 0x01ceb3, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01cf00, 0x01cf2d, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x01cf30, 0x01cf46, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x01cf50, 0x01cfc3, PG_U_OTHER_SYMBOL}, |
|
|
|
@ -3891,6 +3952,11 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x01e4eb, 0x01e4eb, PG_U_MODIFIER_LETTER}, |
|
|
|
|
{0x01e4ec, 0x01e4ef, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x01e4f0, 0x01e4f9, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x01e5d0, 0x01e5ed, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x01e5ee, 0x01e5ef, PG_U_NONSPACING_MARK}, |
|
|
|
|
{0x01e5f0, 0x01e5f0, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x01e5f1, 0x01e5fa, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x01e5ff, 0x01e5ff, PG_U_OTHER_PUNCTUATION}, |
|
|
|
|
{0x01e7e0, 0x01e7e6, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x01e7e8, 0x01e7eb, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x01e7ed, 0x01e7ee, PG_U_OTHER_LETTER}, |
|
|
|
@ -3973,18 +4039,18 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
{0x01f850, 0x01f859, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01f860, 0x01f887, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01f890, 0x01f8ad, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01f8b0, 0x01f8b1, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01f8b0, 0x01f8bb, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01f8c0, 0x01f8c1, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01f900, 0x01fa53, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fa60, 0x01fa6d, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fa70, 0x01fa7c, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fa80, 0x01fa88, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fa90, 0x01fabd, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fabf, 0x01fac5, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01face, 0x01fadb, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fae0, 0x01fae8, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fa80, 0x01fa89, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fa8f, 0x01fac6, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01face, 0x01fadc, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fadf, 0x01fae9, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01faf0, 0x01faf8, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fb00, 0x01fb92, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fb94, 0x01fbca, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fb94, 0x01fbef, PG_U_OTHER_SYMBOL}, |
|
|
|
|
{0x01fbf0, 0x01fbf9, PG_U_DECIMAL_NUMBER}, |
|
|
|
|
{0x020000, 0x02a6df, PG_U_OTHER_LETTER}, |
|
|
|
|
{0x02a700, 0x02b739, PG_U_OTHER_LETTER}, |
|
|
|
@ -4003,7 +4069,7 @@ static const pg_category_range unicode_categories[3302] = |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
/* table of Unicode codepoint ranges of Alphabetic characters */ |
|
|
|
|
static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
static const pg_unicode_range unicode_alphabetic[1179] = |
|
|
|
|
{ |
|
|
|
|
{0x000041, 0x00005a}, |
|
|
|
|
{0x000061, 0x00007a}, |
|
|
|
@ -4025,6 +4091,7 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x0002ec, 0x0002ec}, |
|
|
|
|
{0x0002ee, 0x0002ee}, |
|
|
|
|
{0x000345, 0x000345}, |
|
|
|
|
{0x000363, 0x00036f}, |
|
|
|
|
{0x000370, 0x000373}, |
|
|
|
|
{0x000374, 0x000374}, |
|
|
|
|
{0x000376, 0x000377}, |
|
|
|
@ -4088,6 +4155,7 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x000860, 0x00086a}, |
|
|
|
|
{0x000870, 0x000887}, |
|
|
|
|
{0x000889, 0x00088e}, |
|
|
|
|
{0x000897, 0x000897}, |
|
|
|
|
{0x0008a0, 0x0008c8}, |
|
|
|
|
{0x0008c9, 0x0008c9}, |
|
|
|
|
{0x0008d4, 0x0008df}, |
|
|
|
@ -4455,7 +4523,7 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x001c4d, 0x001c4f}, |
|
|
|
|
{0x001c5a, 0x001c77}, |
|
|
|
|
{0x001c78, 0x001c7d}, |
|
|
|
|
{0x001c80, 0x001c88}, |
|
|
|
|
{0x001c80, 0x001c8a}, |
|
|
|
|
{0x001c90, 0x001cba}, |
|
|
|
|
{0x001cbd, 0x001cbf}, |
|
|
|
|
{0x001ce9, 0x001cec}, |
|
|
|
@ -4468,7 +4536,7 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x001d78, 0x001d78}, |
|
|
|
|
{0x001d79, 0x001d9a}, |
|
|
|
|
{0x001d9b, 0x001dbf}, |
|
|
|
|
{0x001de7, 0x001df4}, |
|
|
|
|
{0x001dd3, 0x001df4}, |
|
|
|
|
{0x001e00, 0x001f15}, |
|
|
|
|
{0x001f18, 0x001f1d}, |
|
|
|
|
{0x001f20, 0x001f45}, |
|
|
|
@ -4575,10 +4643,10 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x00a788, 0x00a788}, |
|
|
|
|
{0x00a78b, 0x00a78e}, |
|
|
|
|
{0x00a78f, 0x00a78f}, |
|
|
|
|
{0x00a790, 0x00a7ca}, |
|
|
|
|
{0x00a790, 0x00a7cd}, |
|
|
|
|
{0x00a7d0, 0x00a7d1}, |
|
|
|
|
{0x00a7d3, 0x00a7d3}, |
|
|
|
|
{0x00a7d5, 0x00a7d9}, |
|
|
|
|
{0x00a7d5, 0x00a7dc}, |
|
|
|
|
{0x00a7f2, 0x00a7f4}, |
|
|
|
|
{0x00a7f5, 0x00a7f6}, |
|
|
|
|
{0x00a7f7, 0x00a7f7}, |
|
|
|
@ -4743,6 +4811,7 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x0105a3, 0x0105b1}, |
|
|
|
|
{0x0105b3, 0x0105b9}, |
|
|
|
|
{0x0105bb, 0x0105bc}, |
|
|
|
|
{0x0105c0, 0x0105f3}, |
|
|
|
|
{0x010600, 0x010736}, |
|
|
|
|
{0x010740, 0x010755}, |
|
|
|
|
{0x010760, 0x010767}, |
|
|
|
@ -4783,9 +4852,18 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x010cc0, 0x010cf2}, |
|
|
|
|
{0x010d00, 0x010d23}, |
|
|
|
|
{0x010d24, 0x010d27}, |
|
|
|
|
{0x010d4a, 0x010d4d}, |
|
|
|
|
{0x010d4e, 0x010d4e}, |
|
|
|
|
{0x010d4f, 0x010d4f}, |
|
|
|
|
{0x010d50, 0x010d65}, |
|
|
|
|
{0x010d69, 0x010d69}, |
|
|
|
|
{0x010d6f, 0x010d6f}, |
|
|
|
|
{0x010d70, 0x010d85}, |
|
|
|
|
{0x010e80, 0x010ea9}, |
|
|
|
|
{0x010eab, 0x010eac}, |
|
|
|
|
{0x010eb0, 0x010eb1}, |
|
|
|
|
{0x010ec2, 0x010ec4}, |
|
|
|
|
{0x010efc, 0x010efc}, |
|
|
|
|
{0x010f00, 0x010f1c}, |
|
|
|
|
{0x010f27, 0x010f27}, |
|
|
|
|
{0x010f30, 0x010f45}, |
|
|
|
@ -4866,6 +4944,19 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x011357, 0x011357}, |
|
|
|
|
{0x01135d, 0x011361}, |
|
|
|
|
{0x011362, 0x011363}, |
|
|
|
|
{0x011380, 0x011389}, |
|
|
|
|
{0x01138b, 0x01138b}, |
|
|
|
|
{0x01138e, 0x01138e}, |
|
|
|
|
{0x011390, 0x0113b5}, |
|
|
|
|
{0x0113b7, 0x0113b7}, |
|
|
|
|
{0x0113b8, 0x0113ba}, |
|
|
|
|
{0x0113bb, 0x0113c0}, |
|
|
|
|
{0x0113c2, 0x0113c2}, |
|
|
|
|
{0x0113c5, 0x0113c5}, |
|
|
|
|
{0x0113c7, 0x0113ca}, |
|
|
|
|
{0x0113cc, 0x0113cd}, |
|
|
|
|
{0x0113d1, 0x0113d1}, |
|
|
|
|
{0x0113d3, 0x0113d3}, |
|
|
|
|
{0x011400, 0x011434}, |
|
|
|
|
{0x011435, 0x011437}, |
|
|
|
|
{0x011438, 0x01143f}, |
|
|
|
@ -4908,7 +4999,9 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x0116b0, 0x0116b5}, |
|
|
|
|
{0x0116b8, 0x0116b8}, |
|
|
|
|
{0x011700, 0x01171a}, |
|
|
|
|
{0x01171d, 0x01171f}, |
|
|
|
|
{0x01171d, 0x01171d}, |
|
|
|
|
{0x01171e, 0x01171e}, |
|
|
|
|
{0x01171f, 0x01171f}, |
|
|
|
|
{0x011720, 0x011721}, |
|
|
|
|
{0x011722, 0x011725}, |
|
|
|
|
{0x011726, 0x011726}, |
|
|
|
@ -4956,6 +5049,7 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x011a97, 0x011a97}, |
|
|
|
|
{0x011a9d, 0x011a9d}, |
|
|
|
|
{0x011ab0, 0x011af8}, |
|
|
|
|
{0x011bc0, 0x011be0}, |
|
|
|
|
{0x011c00, 0x011c08}, |
|
|
|
|
{0x011c0a, 0x011c2e}, |
|
|
|
|
{0x011c2f, 0x011c2f}, |
|
|
|
@ -5009,7 +5103,12 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x012f90, 0x012ff0}, |
|
|
|
|
{0x013000, 0x01342f}, |
|
|
|
|
{0x013441, 0x013446}, |
|
|
|
|
{0x013460, 0x0143fa}, |
|
|
|
|
{0x014400, 0x014646}, |
|
|
|
|
{0x016100, 0x01611d}, |
|
|
|
|
{0x01611e, 0x016129}, |
|
|
|
|
{0x01612a, 0x01612c}, |
|
|
|
|
{0x01612d, 0x01612e}, |
|
|
|
|
{0x016800, 0x016a38}, |
|
|
|
|
{0x016a40, 0x016a5e}, |
|
|
|
|
{0x016a70, 0x016abe}, |
|
|
|
@ -5018,6 +5117,9 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x016b40, 0x016b43}, |
|
|
|
|
{0x016b63, 0x016b77}, |
|
|
|
|
{0x016b7d, 0x016b8f}, |
|
|
|
|
{0x016d40, 0x016d42}, |
|
|
|
|
{0x016d43, 0x016d6a}, |
|
|
|
|
{0x016d6b, 0x016d6c}, |
|
|
|
|
{0x016e40, 0x016e7f}, |
|
|
|
|
{0x016f00, 0x016f4a}, |
|
|
|
|
{0x016f4f, 0x016f4f}, |
|
|
|
@ -5030,7 +5132,7 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x016ff0, 0x016ff1}, |
|
|
|
|
{0x017000, 0x0187f7}, |
|
|
|
|
{0x018800, 0x018cd5}, |
|
|
|
|
{0x018d00, 0x018d08}, |
|
|
|
|
{0x018cff, 0x018d08}, |
|
|
|
|
{0x01aff0, 0x01aff3}, |
|
|
|
|
{0x01aff5, 0x01affb}, |
|
|
|
|
{0x01affd, 0x01affe}, |
|
|
|
@ -5093,6 +5195,8 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
{0x01e2c0, 0x01e2eb}, |
|
|
|
|
{0x01e4d0, 0x01e4ea}, |
|
|
|
|
{0x01e4eb, 0x01e4eb}, |
|
|
|
|
{0x01e5d0, 0x01e5ed}, |
|
|
|
|
{0x01e5f0, 0x01e5f0}, |
|
|
|
|
{0x01e7e0, 0x01e7e6}, |
|
|
|
|
{0x01e7e8, 0x01e7eb}, |
|
|
|
|
{0x01e7ed, 0x01e7ee}, |
|
|
|
@ -5149,7 +5253,7 @@ static const pg_unicode_range unicode_alphabetic[1141] = |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
/* table of Unicode codepoint ranges of Lowercase characters */ |
|
|
|
|
static const pg_unicode_range unicode_lowercase[686] = |
|
|
|
|
static const pg_unicode_range unicode_lowercase[690] = |
|
|
|
|
{ |
|
|
|
|
{0x000061, 0x00007a}, |
|
|
|
|
{0x0000aa, 0x0000aa}, |
|
|
|
@ -5433,6 +5537,7 @@ static const pg_unicode_range unicode_lowercase[686] = |
|
|
|
|
{0x0010fd, 0x0010ff}, |
|
|
|
|
{0x0013f8, 0x0013fd}, |
|
|
|
|
{0x001c80, 0x001c88}, |
|
|
|
|
{0x001c8a, 0x001c8a}, |
|
|
|
|
{0x001d00, 0x001d2b}, |
|
|
|
|
{0x001d2c, 0x001d6a}, |
|
|
|
|
{0x001d6b, 0x001d77}, |
|
|
|
@ -5774,11 +5879,13 @@ static const pg_unicode_range unicode_lowercase[686] = |
|
|
|
|
{0x00a7c3, 0x00a7c3}, |
|
|
|
|
{0x00a7c8, 0x00a7c8}, |
|
|
|
|
{0x00a7ca, 0x00a7ca}, |
|
|
|
|
{0x00a7cd, 0x00a7cd}, |
|
|
|
|
{0x00a7d1, 0x00a7d1}, |
|
|
|
|
{0x00a7d3, 0x00a7d3}, |
|
|
|
|
{0x00a7d5, 0x00a7d5}, |
|
|
|
|
{0x00a7d7, 0x00a7d7}, |
|
|
|
|
{0x00a7d9, 0x00a7d9}, |
|
|
|
|
{0x00a7db, 0x00a7db}, |
|
|
|
|
{0x00a7f2, 0x00a7f4}, |
|
|
|
|
{0x00a7f6, 0x00a7f6}, |
|
|
|
|
{0x00a7f8, 0x00a7f9}, |
|
|
|
@ -5802,6 +5909,7 @@ static const pg_unicode_range unicode_lowercase[686] = |
|
|
|
|
{0x010787, 0x0107b0}, |
|
|
|
|
{0x0107b2, 0x0107ba}, |
|
|
|
|
{0x010cc0, 0x010cf2}, |
|
|
|
|
{0x010d70, 0x010d85}, |
|
|
|
|
{0x0118c0, 0x0118df}, |
|
|
|
|
{0x016e60, 0x016e7f}, |
|
|
|
|
{0x01d41a, 0x01d433}, |
|
|
|
@ -5840,7 +5948,7 @@ static const pg_unicode_range unicode_lowercase[686] = |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
/* table of Unicode codepoint ranges of Uppercase characters */ |
|
|
|
|
static const pg_unicode_range unicode_uppercase[651] = |
|
|
|
|
static const pg_unicode_range unicode_uppercase[656] = |
|
|
|
|
{ |
|
|
|
|
{0x000041, 0x00005a}, |
|
|
|
|
{0x0000c0, 0x0000d6}, |
|
|
|
@ -6118,6 +6226,7 @@ static const pg_unicode_range unicode_uppercase[651] = |
|
|
|
|
{0x0010c7, 0x0010c7}, |
|
|
|
|
{0x0010cd, 0x0010cd}, |
|
|
|
|
{0x0013a0, 0x0013f5}, |
|
|
|
|
{0x001c89, 0x001c89}, |
|
|
|
|
{0x001c90, 0x001cba}, |
|
|
|
|
{0x001cbd, 0x001cbf}, |
|
|
|
|
{0x001e00, 0x001e00}, |
|
|
|
@ -6444,9 +6553,12 @@ static const pg_unicode_range unicode_uppercase[651] = |
|
|
|
|
{0x00a7c2, 0x00a7c2}, |
|
|
|
|
{0x00a7c4, 0x00a7c7}, |
|
|
|
|
{0x00a7c9, 0x00a7c9}, |
|
|
|
|
{0x00a7cb, 0x00a7cc}, |
|
|
|
|
{0x00a7d0, 0x00a7d0}, |
|
|
|
|
{0x00a7d6, 0x00a7d6}, |
|
|
|
|
{0x00a7d8, 0x00a7d8}, |
|
|
|
|
{0x00a7da, 0x00a7da}, |
|
|
|
|
{0x00a7dc, 0x00a7dc}, |
|
|
|
|
{0x00a7f5, 0x00a7f5}, |
|
|
|
|
{0x00ff21, 0x00ff3a}, |
|
|
|
|
{0x010400, 0x010427}, |
|
|
|
@ -6456,6 +6568,7 @@ static const pg_unicode_range unicode_uppercase[651] = |
|
|
|
|
{0x01058c, 0x010592}, |
|
|
|
|
{0x010594, 0x010595}, |
|
|
|
|
{0x010c80, 0x010cb2}, |
|
|
|
|
{0x010d50, 0x010d65}, |
|
|
|
|
{0x0118a0, 0x0118bf}, |
|
|
|
|
{0x016e40, 0x016e5f}, |
|
|
|
|
{0x01d400, 0x01d419}, |
|
|
|
@ -6496,7 +6609,7 @@ static const pg_unicode_range unicode_uppercase[651] = |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
/* table of Unicode codepoint ranges of Case_Ignorable characters */ |
|
|
|
|
static const pg_unicode_range unicode_case_ignorable[491] = |
|
|
|
|
static const pg_unicode_range unicode_case_ignorable[506] = |
|
|
|
|
{ |
|
|
|
|
{0x000027, 0x000027}, |
|
|
|
|
{0x00002e, 0x00002e}, |
|
|
|
@ -6565,7 +6678,7 @@ static const pg_unicode_range unicode_case_ignorable[491] = |
|
|
|
|
{0x000859, 0x00085b}, |
|
|
|
|
{0x000888, 0x000888}, |
|
|
|
|
{0x000890, 0x000891}, |
|
|
|
|
{0x000898, 0x00089f}, |
|
|
|
|
{0x000897, 0x00089f}, |
|
|
|
|
{0x0008c9, 0x0008c9}, |
|
|
|
|
{0x0008ca, 0x0008e1}, |
|
|
|
|
{0x0008e2, 0x0008e2}, |
|
|
|
@ -6846,8 +6959,11 @@ static const pg_unicode_range unicode_case_ignorable[491] = |
|
|
|
|
{0x010a3f, 0x010a3f}, |
|
|
|
|
{0x010ae5, 0x010ae6}, |
|
|
|
|
{0x010d24, 0x010d27}, |
|
|
|
|
{0x010d4e, 0x010d4e}, |
|
|
|
|
{0x010d69, 0x010d6d}, |
|
|
|
|
{0x010d6f, 0x010d6f}, |
|
|
|
|
{0x010eab, 0x010eac}, |
|
|
|
|
{0x010efd, 0x010eff}, |
|
|
|
|
{0x010efc, 0x010eff}, |
|
|
|
|
{0x010f46, 0x010f50}, |
|
|
|
|
{0x010f82, 0x010f85}, |
|
|
|
|
{0x011001, 0x011001}, |
|
|
|
@ -6880,6 +6996,11 @@ static const pg_unicode_range unicode_case_ignorable[491] = |
|
|
|
|
{0x011340, 0x011340}, |
|
|
|
|
{0x011366, 0x01136c}, |
|
|
|
|
{0x011370, 0x011374}, |
|
|
|
|
{0x0113bb, 0x0113c0}, |
|
|
|
|
{0x0113ce, 0x0113ce}, |
|
|
|
|
{0x0113d0, 0x0113d0}, |
|
|
|
|
{0x0113d2, 0x0113d2}, |
|
|
|
|
{0x0113e1, 0x0113e2}, |
|
|
|
|
{0x011438, 0x01143f}, |
|
|
|
|
{0x011442, 0x011444}, |
|
|
|
|
{0x011446, 0x011446}, |
|
|
|
@ -6899,7 +7020,8 @@ static const pg_unicode_range unicode_case_ignorable[491] = |
|
|
|
|
{0x0116ad, 0x0116ad}, |
|
|
|
|
{0x0116b0, 0x0116b5}, |
|
|
|
|
{0x0116b7, 0x0116b7}, |
|
|
|
|
{0x01171d, 0x01171f}, |
|
|
|
|
{0x01171d, 0x01171d}, |
|
|
|
|
{0x01171f, 0x01171f}, |
|
|
|
|
{0x011722, 0x011725}, |
|
|
|
|
{0x011727, 0x01172b}, |
|
|
|
|
{0x01182f, 0x011837}, |
|
|
|
@ -6938,12 +7060,17 @@ static const pg_unicode_range unicode_case_ignorable[491] = |
|
|
|
|
{0x011f36, 0x011f3a}, |
|
|
|
|
{0x011f40, 0x011f40}, |
|
|
|
|
{0x011f42, 0x011f42}, |
|
|
|
|
{0x011f5a, 0x011f5a}, |
|
|
|
|
{0x013430, 0x01343f}, |
|
|
|
|
{0x013440, 0x013440}, |
|
|
|
|
{0x013447, 0x013455}, |
|
|
|
|
{0x01611e, 0x016129}, |
|
|
|
|
{0x01612d, 0x01612f}, |
|
|
|
|
{0x016af0, 0x016af4}, |
|
|
|
|
{0x016b30, 0x016b36}, |
|
|
|
|
{0x016b40, 0x016b43}, |
|
|
|
|
{0x016d40, 0x016d42}, |
|
|
|
|
{0x016d6b, 0x016d6c}, |
|
|
|
|
{0x016f4f, 0x016f4f}, |
|
|
|
|
{0x016f8f, 0x016f92}, |
|
|
|
|
{0x016f93, 0x016f9f}, |
|
|
|
@ -6982,6 +7109,7 @@ static const pg_unicode_range unicode_case_ignorable[491] = |
|
|
|
|
{0x01e2ec, 0x01e2ef}, |
|
|
|
|
{0x01e4eb, 0x01e4eb}, |
|
|
|
|
{0x01e4ec, 0x01e4ef}, |
|
|
|
|
{0x01e5ee, 0x01e5ef}, |
|
|
|
|
{0x01e8d0, 0x01e8d6}, |
|
|
|
|
{0x01e944, 0x01e94a}, |
|
|
|
|
{0x01e94b, 0x01e94b}, |
|
|
|
|