Extend collection of Unicode combining characters to beyond the BMP

The former limit was perhaps a carryover from an older hand-coded
table. Since commit bab982161 we have enough space in mbinterval to
store larger codepoints, so collect all combining characters.

Discussion: https://www.postgresql.org/message-id/49ad1fa0-174e-c901-b14c-c484b60907f1%40enterprisedb.com
pull/69/merge
John Naylor 4 years ago
parent bab982161e
commit 5bc429aacb
  1. 2
      src/common/unicode/generate-unicode_combining_table.pl
  2. 102
      src/include/common/unicode_combining_table.h

@ -25,8 +25,6 @@ foreach my $line (<ARGV>)
my @fields = split ';', $line;
$codepoint = hex $fields[0];
next if $codepoint > 0xFFFF;
if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
{
# combining character, save for start of range

@ -193,4 +193,106 @@ static const struct mbinterval combining[] = {
{0xFB1E, 0xFB1E},
{0xFE00, 0xFE0F},
{0xFE20, 0xFE2F},
{0x101FD, 0x101FD},
{0x102E0, 0x102E0},
{0x10376, 0x1037A},
{0x10A01, 0x10A0F},
{0x10A38, 0x10A3F},
{0x10AE5, 0x10AE6},
{0x10D24, 0x10D27},
{0x10EAB, 0x10EAC},
{0x10F46, 0x10F50},
{0x11001, 0x11001},
{0x11038, 0x11046},
{0x1107F, 0x11081},
{0x110B3, 0x110B6},
{0x110B9, 0x110BA},
{0x11100, 0x11102},
{0x11127, 0x1112B},
{0x1112D, 0x11134},
{0x11173, 0x11173},
{0x11180, 0x11181},
{0x111B6, 0x111BE},
{0x111C9, 0x111CC},
{0x111CF, 0x111CF},
{0x1122F, 0x11231},
{0x11234, 0x11234},
{0x11236, 0x11237},
{0x1123E, 0x1123E},
{0x112DF, 0x112DF},
{0x112E3, 0x112EA},
{0x11300, 0x11301},
{0x1133B, 0x1133C},
{0x11340, 0x11340},
{0x11366, 0x11374},
{0x11438, 0x1143F},
{0x11442, 0x11444},
{0x11446, 0x11446},
{0x1145E, 0x1145E},
{0x114B3, 0x114B8},
{0x114BA, 0x114BA},
{0x114BF, 0x114C0},
{0x114C2, 0x114C3},
{0x115B2, 0x115B5},
{0x115BC, 0x115BD},
{0x115BF, 0x115C0},
{0x115DC, 0x115DD},
{0x11633, 0x1163A},
{0x1163D, 0x1163D},
{0x1163F, 0x11640},
{0x116AB, 0x116AB},
{0x116AD, 0x116AD},
{0x116B0, 0x116B5},
{0x116B7, 0x116B7},
{0x1171D, 0x1171F},
{0x11722, 0x11725},
{0x11727, 0x1172B},
{0x1182F, 0x11837},
{0x11839, 0x1183A},
{0x1193B, 0x1193C},
{0x1193E, 0x1193E},
{0x11943, 0x11943},
{0x119D4, 0x119DB},
{0x119E0, 0x119E0},
{0x11A01, 0x11A0A},
{0x11A33, 0x11A38},
{0x11A3B, 0x11A3E},
{0x11A47, 0x11A47},
{0x11A51, 0x11A56},
{0x11A59, 0x11A5B},
{0x11A8A, 0x11A96},
{0x11A98, 0x11A99},
{0x11C30, 0x11C3D},
{0x11C3F, 0x11C3F},
{0x11C92, 0x11CA7},
{0x11CAA, 0x11CB0},
{0x11CB2, 0x11CB3},
{0x11CB5, 0x11CB6},
{0x11D31, 0x11D45},
{0x11D47, 0x11D47},
{0x11D90, 0x11D91},
{0x11D95, 0x11D95},
{0x11D97, 0x11D97},
{0x11EF3, 0x11EF4},
{0x16AF0, 0x16AF4},
{0x16B30, 0x16B36},
{0x16F4F, 0x16F4F},
{0x16F8F, 0x16F92},
{0x16FE4, 0x16FE4},
{0x1BC9D, 0x1BC9E},
{0x1D167, 0x1D169},
{0x1D17B, 0x1D182},
{0x1D185, 0x1D18B},
{0x1D1AA, 0x1D1AD},
{0x1D242, 0x1D244},
{0x1DA00, 0x1DA36},
{0x1DA3B, 0x1DA6C},
{0x1DA75, 0x1DA75},
{0x1DA84, 0x1DA84},
{0x1DA9B, 0x1E02A},
{0x1E130, 0x1E136},
{0x1E2EC, 0x1E2EF},
{0x1E8D0, 0x1E8D6},
{0x1E944, 0x1E94A},
{0xE0100, 0xE01EF},
};

Loading…
Cancel
Save