|
|
|
|
@ -49,7 +49,8 @@ while (my $line = <$FH>) |
|
|
|
|
$simple{$code} = { |
|
|
|
|
Simple_Lowercase => ($simple_lowercase || $code), |
|
|
|
|
Simple_Titlecase => ($simple_titlecase || $code), |
|
|
|
|
Simple_Uppercase => ($simple_uppercase || $code) |
|
|
|
|
Simple_Uppercase => ($simple_uppercase || $code), |
|
|
|
|
Simple_Foldcase => $code, |
|
|
|
|
}; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
@ -87,6 +88,7 @@ while (my $line = <$FH>) |
|
|
|
|
my @lower = map { hex $_ } (grep /^[0-9A-F]+$/, (split /\s+/, $elts[1])); |
|
|
|
|
my @title = map { hex $_ } (grep /^[0-9A-F]+$/, (split /\s+/, $elts[2])); |
|
|
|
|
my @upper = map { hex $_ } (grep /^[0-9A-F]+$/, (split /\s+/, $elts[3])); |
|
|
|
|
my @fold = (); |
|
|
|
|
my @conditions = map { |
|
|
|
|
# supporting negated conditions may require storing a |
|
|
|
|
# mask of relevant conditions for a given rule to differentiate |
|
|
|
|
@ -101,6 +103,7 @@ while (my $line = <$FH>) |
|
|
|
|
push @lower, $code if (scalar @lower == 0); |
|
|
|
|
push @title, $code if (scalar @title == 0); |
|
|
|
|
push @upper, $code if (scalar @upper == 0); |
|
|
|
|
push @fold, $code; |
|
|
|
|
|
|
|
|
|
# none should map to more than 3 codepoints |
|
|
|
|
die "lowercase expansion for 0x$elts[0] exceeds maximum: '$elts[1]'" |
|
|
|
|
@ -114,13 +117,15 @@ while (my $line = <$FH>) |
|
|
|
|
while (scalar @upper < $MAX_CASE_EXPANSION) { push @upper, 0x000000 } |
|
|
|
|
while (scalar @lower < $MAX_CASE_EXPANSION) { push @lower, 0x000000 } |
|
|
|
|
while (scalar @title < $MAX_CASE_EXPANSION) { push @title, 0x000000 } |
|
|
|
|
while (scalar @fold < $MAX_CASE_EXPANSION) { push @fold, 0x000000 } |
|
|
|
|
|
|
|
|
|
# Characters with special mappings may not have simple mappings; |
|
|
|
|
# ensure that an entry exists. |
|
|
|
|
$simple{$code} ||= { |
|
|
|
|
Simple_Lowercase => $code, |
|
|
|
|
Simple_Titlecase => $code, |
|
|
|
|
Simple_Uppercase => $code |
|
|
|
|
Simple_Uppercase => $code, |
|
|
|
|
Simple_Foldcase => $code |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
# Multiple special case rules for a single codepoint could be |
|
|
|
|
@ -135,11 +140,96 @@ while (my $line = <$FH>) |
|
|
|
|
Lowercase => \@lower, |
|
|
|
|
Titlecase => \@title, |
|
|
|
|
Uppercase => \@upper, |
|
|
|
|
Foldcase => \@fold, |
|
|
|
|
Conditions => $cond_str |
|
|
|
|
}; |
|
|
|
|
} |
|
|
|
|
close $FH; |
|
|
|
|
|
|
|
|
|
open($FH, '<', "$output_path/CaseFolding.txt") |
|
|
|
|
or die "Could not open $output_path/CaseFolding.txt: $!."; |
|
|
|
|
while (my $line = <$FH>) |
|
|
|
|
{ |
|
|
|
|
# remove comments |
|
|
|
|
$line =~ s/^(.*?)#.*$/$1/s; |
|
|
|
|
|
|
|
|
|
# ignore empty lines |
|
|
|
|
next unless $line =~ /;/; |
|
|
|
|
|
|
|
|
|
my @elts = split(';', $line); |
|
|
|
|
my $code = hex($elts[0]); |
|
|
|
|
my $status = $elts[1] =~ s/^\s+|\s+$//rg; |
|
|
|
|
|
|
|
|
|
# Codepoint may map to multiple characters when folding. Split |
|
|
|
|
# each mapping on whitespace and extract the hexadecimal into an |
|
|
|
|
# array of codepoints. |
|
|
|
|
my @fold = map { hex $_ } (grep /[0-9A-F]+/, (split /\s+/, $elts[2])); |
|
|
|
|
|
|
|
|
|
die "codepoint $code out of range" if $code > 0x10FFFF; |
|
|
|
|
|
|
|
|
|
# status 'T' unsupported; skip |
|
|
|
|
next if $status eq 'T'; |
|
|
|
|
|
|
|
|
|
# encountered unrecognized status type |
|
|
|
|
die "unsupported status type '$status'" |
|
|
|
|
if $status ne 'S' && $status ne 'C' && $status ne 'F'; |
|
|
|
|
|
|
|
|
|
# initialize simple case mappings if they don't exist |
|
|
|
|
$simple{$code} ||= { |
|
|
|
|
Simple_Lowercase => $code, |
|
|
|
|
Simple_Titlecase => $code, |
|
|
|
|
Simple_Uppercase => $code, |
|
|
|
|
Simple_Foldcase => $code |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
if ($status eq 'S' || $status eq 'C') |
|
|
|
|
{ |
|
|
|
|
die |
|
|
|
|
"Simple case folding for $code has multiple codepoints: '$line' '$elts[2]'" |
|
|
|
|
if scalar @fold != 1; |
|
|
|
|
my $simple_foldcase = $fold[0]; |
|
|
|
|
|
|
|
|
|
die "Simple_Foldcase $code out of range" |
|
|
|
|
if $simple_foldcase > 0x10FFFF; |
|
|
|
|
|
|
|
|
|
$simple{$code}{Simple_Foldcase} = $simple_foldcase; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if ($status eq 'F' || ($status eq 'C' && defined $special{$code})) |
|
|
|
|
{ |
|
|
|
|
while (scalar @fold < $MAX_CASE_EXPANSION) { push @fold, 0x000000 } |
|
|
|
|
|
|
|
|
|
#initialize special case mappings if they don't exist |
|
|
|
|
if (!defined $special{$code}) |
|
|
|
|
{ |
|
|
|
|
my @lower = ($simple{$code}{Simple_Lowercase}); |
|
|
|
|
my @title = ($simple{$code}{Simple_Titlecase}); |
|
|
|
|
my @upper = ($simple{$code}{Simple_Uppercase}); |
|
|
|
|
while (scalar @lower < $MAX_CASE_EXPANSION) |
|
|
|
|
{ |
|
|
|
|
push @lower, 0x000000; |
|
|
|
|
} |
|
|
|
|
while (scalar @title < $MAX_CASE_EXPANSION) |
|
|
|
|
{ |
|
|
|
|
push @title, 0x000000; |
|
|
|
|
} |
|
|
|
|
while (scalar @upper < $MAX_CASE_EXPANSION) |
|
|
|
|
{ |
|
|
|
|
push @upper, 0x000000; |
|
|
|
|
} |
|
|
|
|
$special{$code} = { |
|
|
|
|
Lowercase => \@lower, |
|
|
|
|
Titlecase => \@title, |
|
|
|
|
Uppercase => \@upper, |
|
|
|
|
Conditions => '0' |
|
|
|
|
}; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
$special{$code}{Foldcase} = \@fold; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
close $FH; |
|
|
|
|
|
|
|
|
|
# assign sequential array indexes to the special mappings |
|
|
|
|
my $special_idx = 0; |
|
|
|
|
foreach my $code (sort { $a <=> $b } (keys %special)) |
|
|
|
|
@ -202,6 +292,7 @@ typedef enum |
|
|
|
|
CaseLower = 0, |
|
|
|
|
CaseTitle = 1, |
|
|
|
|
CaseUpper = 2, |
|
|
|
|
CaseFold = 3, |
|
|
|
|
NCaseKind |
|
|
|
|
} CaseKind; |
|
|
|
|
|
|
|
|
|
@ -232,14 +323,17 @@ foreach my $code (sort { $a <=> $b } (keys %special)) |
|
|
|
|
die if scalar @{ $special{$code}{Lowercase} } != $MAX_CASE_EXPANSION; |
|
|
|
|
die if scalar @{ $special{$code}{Titlecase} } != $MAX_CASE_EXPANSION; |
|
|
|
|
die if scalar @{ $special{$code}{Uppercase} } != $MAX_CASE_EXPANSION; |
|
|
|
|
die if scalar @{ $special{$code}{Foldcase} } != $MAX_CASE_EXPANSION; |
|
|
|
|
my $lower = join ", ", |
|
|
|
|
(map { sprintf "0x%06x", $_ } @{ $special{$code}{Lowercase} }); |
|
|
|
|
my $title = join ", ", |
|
|
|
|
(map { sprintf "0x%06x", $_ } @{ $special{$code}{Titlecase} }); |
|
|
|
|
my $upper = join ", ", |
|
|
|
|
(map { sprintf "0x%06x", $_ } @{ $special{$code}{Uppercase} }); |
|
|
|
|
my $fold = join ", ", |
|
|
|
|
(map { sprintf "0x%06x", $_ } @{ $special{$code}{Foldcase} }); |
|
|
|
|
printf $OT "\t{0x%06x, %s, ", $code, $special{$code}{Conditions}; |
|
|
|
|
printf $OT "{{%s}, {%s}, {%s}}},\n", $lower, $title, $upper; |
|
|
|
|
printf $OT "{{%s}, {%s}, {%s}, {%s}}},\n", $lower, $title, $upper, $fold; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
print $OT "\t{0, 0, {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}}\n"; |
|
|
|
|
@ -260,11 +354,13 @@ for (my $code = 0; $code < 0x80; $code++) |
|
|
|
|
my $lc = ($simple{$code}{Simple_Lowercase} || $code); |
|
|
|
|
my $tc = ($simple{$code}{Simple_Titlecase} || $code); |
|
|
|
|
my $uc = ($simple{$code}{Simple_Uppercase} || $code); |
|
|
|
|
my $fc = ($simple{$code}{Simple_Foldcase} || $code); |
|
|
|
|
|
|
|
|
|
die "unexpected special case for code $code" |
|
|
|
|
if defined $special{$code}; |
|
|
|
|
printf $OT |
|
|
|
|
"\t{0x%06x, {[CaseLower] = 0x%06x,[CaseTitle] = 0x%06x,[CaseUpper] = 0x%06x}, NULL},\n", |
|
|
|
|
$code, $lc, $tc, $uc; |
|
|
|
|
"\t{0x%06x, {[CaseLower] = 0x%06x,[CaseTitle] = 0x%06x,[CaseUpper] = 0x%06x,[CaseFold] = 0x%06x}, NULL},\n", |
|
|
|
|
$code, $lc, $tc, $uc, $fc; |
|
|
|
|
} |
|
|
|
|
printf $OT "\n"; |
|
|
|
|
|
|
|
|
|
@ -280,8 +376,8 @@ foreach my $code (sort { $a <=> $b } (keys %simple)) |
|
|
|
|
$special_case = sprintf "&special_case[%d]", $special{$code}{Index}; |
|
|
|
|
} |
|
|
|
|
printf $OT |
|
|
|
|
"\t{0x%06x, {[CaseLower] = 0x%06x,[CaseTitle] = 0x%06x,[CaseUpper] = 0x%06x}, %s},\n", |
|
|
|
|
"\t{0x%06x, {[CaseLower] = 0x%06x,[CaseTitle] = 0x%06x,[CaseUpper] = 0x%06x,[CaseFold] = 0x%06x}, %s},\n", |
|
|
|
|
$code, $map->{Simple_Lowercase}, $map->{Simple_Titlecase}, |
|
|
|
|
$map->{Simple_Uppercase}, $special_case; |
|
|
|
|
$map->{Simple_Uppercase}, $map->{Simple_Foldcase}, $special_case; |
|
|
|
|
} |
|
|
|
|
print $OT "};\n"; |
|
|
|
|
|