mirror of https://github.com/postgres/postgres
The hardcoded "wide character" set in ucs_wcwidth() was last updated around the Unicode 5.0 era. This led to misalignment when printing emojis and other codepoints that have since been designated wide or full-width. To fix and keep up to date, extend update-unicode to download the list of wide and full-width codepoints from the offical sources. In passing, remove some comments about non-spacing characters that haven't been accurate since we removed the former hardcoded logic. Jacob Champion Reported and reviewed by Pavel Stehule Discussion: https://www.postgresql.org/message-id/flat/CAFj8pRCeX21O69YHxmykYySYyprZAqrKWWg0KoGKdjgqcGyygg@mail.gmail.compull/69/merge
parent
1563ecbc1b
commit
bab982161e
@ -0,0 +1,76 @@ |
|||||||
|
#!/usr/bin/perl |
||||||
|
# |
||||||
|
# Generate a sorted list of non-overlapping intervals of East Asian Wide (W) |
||||||
|
# and East Asian Fullwidth (F) characters, using Unicode data files as input. |
||||||
|
# Pass EastAsianWidth.txt as argument. The output is on stdout. |
||||||
|
# |
||||||
|
# Copyright (c) 2019-2021, PostgreSQL Global Development Group |
||||||
|
|
||||||
|
use strict; |
||||||
|
use warnings; |
||||||
|
|
||||||
|
my $range_start = undef; |
||||||
|
my ($first, $last); |
||||||
|
my $prev_last; |
||||||
|
|
||||||
|
print |
||||||
|
"/* generated by src/common/unicode/generate-unicode_east_asian_fw_table.pl, do not edit */\n\n"; |
||||||
|
|
||||||
|
print "static const struct mbinterval east_asian_fw[] = {\n"; |
||||||
|
|
||||||
|
foreach my $line (<ARGV>) |
||||||
|
{ |
||||||
|
chomp $line; |
||||||
|
$line =~ s/\s*#.*$//; |
||||||
|
next if $line eq ''; |
||||||
|
my ($codepoint, $width) = split ';', $line; |
||||||
|
|
||||||
|
if ($codepoint =~ /\.\./) |
||||||
|
{ |
||||||
|
($first, $last) = split /\.\./, $codepoint; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
$first = $last = $codepoint; |
||||||
|
} |
||||||
|
|
||||||
|
($first, $last) = map(hex, ($first, $last)); |
||||||
|
|
||||||
|
if ($width eq 'F' || $width eq 'W') |
||||||
|
{ |
||||||
|
# fullwidth/wide characters |
||||||
|
if (!defined($range_start)) |
||||||
|
{ |
||||||
|
# save for start of range if one hasn't been started yet |
||||||
|
$range_start = $first; |
||||||
|
} |
||||||
|
elsif ($first != $prev_last + 1) |
||||||
|
{ |
||||||
|
# ranges aren't contiguous; emit the last and start a new one |
||||||
|
printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last; |
||||||
|
$range_start = $first; |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
# not wide characters, print out previous range if any |
||||||
|
if (defined($range_start)) |
||||||
|
{ |
||||||
|
printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last; |
||||||
|
$range_start = undef; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
continue |
||||||
|
{ |
||||||
|
$prev_last = $last; |
||||||
|
} |
||||||
|
|
||||||
|
# don't forget any ranges at the very end of the database (though there are none |
||||||
|
# as of Unicode 13.0) |
||||||
|
if (defined($range_start)) |
||||||
|
{ |
||||||
|
printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last; |
||||||
|
} |
||||||
|
|
||||||
|
print "};\n"; |
@ -0,0 +1,120 @@ |
|||||||
|
/* generated by src/common/unicode/generate-unicode_east_asian_fw_table.pl, do not edit */ |
||||||
|
|
||||||
|
static const struct mbinterval east_asian_fw[] = { |
||||||
|
{0x1100, 0x115F}, |
||||||
|
{0x231A, 0x231B}, |
||||||
|
{0x2329, 0x232A}, |
||||||
|
{0x23E9, 0x23EC}, |
||||||
|
{0x23F0, 0x23F0}, |
||||||
|
{0x23F3, 0x23F3}, |
||||||
|
{0x25FD, 0x25FE}, |
||||||
|
{0x2614, 0x2615}, |
||||||
|
{0x2648, 0x2653}, |
||||||
|
{0x267F, 0x267F}, |
||||||
|
{0x2693, 0x2693}, |
||||||
|
{0x26A1, 0x26A1}, |
||||||
|
{0x26AA, 0x26AB}, |
||||||
|
{0x26BD, 0x26BE}, |
||||||
|
{0x26C4, 0x26C5}, |
||||||
|
{0x26CE, 0x26CE}, |
||||||
|
{0x26D4, 0x26D4}, |
||||||
|
{0x26EA, 0x26EA}, |
||||||
|
{0x26F2, 0x26F3}, |
||||||
|
{0x26F5, 0x26F5}, |
||||||
|
{0x26FA, 0x26FA}, |
||||||
|
{0x26FD, 0x26FD}, |
||||||
|
{0x2705, 0x2705}, |
||||||
|
{0x270A, 0x270B}, |
||||||
|
{0x2728, 0x2728}, |
||||||
|
{0x274C, 0x274C}, |
||||||
|
{0x274E, 0x274E}, |
||||||
|
{0x2753, 0x2755}, |
||||||
|
{0x2757, 0x2757}, |
||||||
|
{0x2795, 0x2797}, |
||||||
|
{0x27B0, 0x27B0}, |
||||||
|
{0x27BF, 0x27BF}, |
||||||
|
{0x2B1B, 0x2B1C}, |
||||||
|
{0x2B50, 0x2B50}, |
||||||
|
{0x2B55, 0x2B55}, |
||||||
|
{0x2E80, 0x2E99}, |
||||||
|
{0x2E9B, 0x2EF3}, |
||||||
|
{0x2F00, 0x2FD5}, |
||||||
|
{0x2FF0, 0x2FFB}, |
||||||
|
{0x3000, 0x303E}, |
||||||
|
{0x3041, 0x3096}, |
||||||
|
{0x3099, 0x30FF}, |
||||||
|
{0x3105, 0x312F}, |
||||||
|
{0x3131, 0x318E}, |
||||||
|
{0x3190, 0x31E3}, |
||||||
|
{0x31F0, 0x321E}, |
||||||
|
{0x3220, 0x3247}, |
||||||
|
{0x3250, 0x4DBF}, |
||||||
|
{0x4E00, 0xA48C}, |
||||||
|
{0xA490, 0xA4C6}, |
||||||
|
{0xA960, 0xA97C}, |
||||||
|
{0xAC00, 0xD7A3}, |
||||||
|
{0xF900, 0xFAFF}, |
||||||
|
{0xFE10, 0xFE19}, |
||||||
|
{0xFE30, 0xFE52}, |
||||||
|
{0xFE54, 0xFE66}, |
||||||
|
{0xFE68, 0xFE6B}, |
||||||
|
{0xFF01, 0xFF60}, |
||||||
|
{0xFFE0, 0xFFE6}, |
||||||
|
{0x16FE0, 0x16FE4}, |
||||||
|
{0x16FF0, 0x16FF1}, |
||||||
|
{0x17000, 0x187F7}, |
||||||
|
{0x18800, 0x18CD5}, |
||||||
|
{0x18D00, 0x18D08}, |
||||||
|
{0x1B000, 0x1B11E}, |
||||||
|
{0x1B150, 0x1B152}, |
||||||
|
{0x1B164, 0x1B167}, |
||||||
|
{0x1B170, 0x1B2FB}, |
||||||
|
{0x1F004, 0x1F004}, |
||||||
|
{0x1F0CF, 0x1F0CF}, |
||||||
|
{0x1F18E, 0x1F18E}, |
||||||
|
{0x1F191, 0x1F19A}, |
||||||
|
{0x1F200, 0x1F202}, |
||||||
|
{0x1F210, 0x1F23B}, |
||||||
|
{0x1F240, 0x1F248}, |
||||||
|
{0x1F250, 0x1F251}, |
||||||
|
{0x1F260, 0x1F265}, |
||||||
|
{0x1F300, 0x1F320}, |
||||||
|
{0x1F32D, 0x1F335}, |
||||||
|
{0x1F337, 0x1F37C}, |
||||||
|
{0x1F37E, 0x1F393}, |
||||||
|
{0x1F3A0, 0x1F3CA}, |
||||||
|
{0x1F3CF, 0x1F3D3}, |
||||||
|
{0x1F3E0, 0x1F3F0}, |
||||||
|
{0x1F3F4, 0x1F3F4}, |
||||||
|
{0x1F3F8, 0x1F43E}, |
||||||
|
{0x1F440, 0x1F440}, |
||||||
|
{0x1F442, 0x1F4FC}, |
||||||
|
{0x1F4FF, 0x1F53D}, |
||||||
|
{0x1F54B, 0x1F54E}, |
||||||
|
{0x1F550, 0x1F567}, |
||||||
|
{0x1F57A, 0x1F57A}, |
||||||
|
{0x1F595, 0x1F596}, |
||||||
|
{0x1F5A4, 0x1F5A4}, |
||||||
|
{0x1F5FB, 0x1F64F}, |
||||||
|
{0x1F680, 0x1F6C5}, |
||||||
|
{0x1F6CC, 0x1F6CC}, |
||||||
|
{0x1F6D0, 0x1F6D2}, |
||||||
|
{0x1F6D5, 0x1F6D7}, |
||||||
|
{0x1F6EB, 0x1F6EC}, |
||||||
|
{0x1F6F4, 0x1F6FC}, |
||||||
|
{0x1F7E0, 0x1F7EB}, |
||||||
|
{0x1F90C, 0x1F93A}, |
||||||
|
{0x1F93C, 0x1F945}, |
||||||
|
{0x1F947, 0x1F978}, |
||||||
|
{0x1F97A, 0x1F9CB}, |
||||||
|
{0x1F9CD, 0x1F9FF}, |
||||||
|
{0x1FA70, 0x1FA74}, |
||||||
|
{0x1FA78, 0x1FA7A}, |
||||||
|
{0x1FA80, 0x1FA86}, |
||||||
|
{0x1FA90, 0x1FAA8}, |
||||||
|
{0x1FAB0, 0x1FAB6}, |
||||||
|
{0x1FAC0, 0x1FAC2}, |
||||||
|
{0x1FAD0, 0x1FAD6}, |
||||||
|
{0x20000, 0x2FFFD}, |
||||||
|
{0x30000, 0x3FFFD}, |
||||||
|
}; |
Loading…
Reference in new issue