@ -9,15 +9,15 @@ use strict;
use Exporter 'import' ;
use Exporter 'import' ;
our @ EXPORT = qw( NONE TO_UNICODE FROM_UNICODE BOTH read_source print_conversion_tables ) ;
our @ EXPORT =
qw( NONE TO_UNICODE FROM_UNICODE BOTH read_source print_conversion_tables ) ;
# Constants used in the 'direction' field of the character maps
# Constants used in the 'direction' field of the character maps
use constant {
use constant {
NONE = > 0 ,
NONE = > 0 ,
TO_UNICODE = > 1 ,
TO_UNICODE = > 1 ,
FROM_UNICODE = > 2 ,
FROM_UNICODE = > 2 ,
BOTH = > 3
BOTH = > 3 } ;
} ;
#######################################################################
#######################################################################
# read_source - common routine to read source file
# read_source - common routine to read source file
@ -36,7 +36,7 @@ sub read_source
next if ( /^#/ ) ;
next if ( /^#/ ) ;
chop ;
chop ;
next if ( /^$/ ) ; # Ignore empty lines
next if ( /^$/ ) ; # Ignore empty lines
next if ( /^0x([0-9A-F]+)\s+(#.*)$/ ) ;
next if ( /^0x([0-9A-F]+)\s+(#.*)$/ ) ;
@ -49,13 +49,13 @@ sub read_source
print STDERR "READ ERROR at line $. in $fname: $_\n" ;
print STDERR "READ ERROR at line $. in $fname: $_\n" ;
exit ;
exit ;
}
}
my $ out = { code = > hex ( $ 1 ) ,
my $ out = {
ucs = > hex ( $ 2 ) ,
code = > hex ( $ 1 ) ,
comment = > $ 4 ,
ucs = > hex ( $ 2 ) ,
dire cti on = > BOTH ,
comme nt = > $ 4 ,
f = > $ fname ,
direction = > BOTH ,
l = > $.
f = > $ fname ,
} ;
l = > $. } ;
# Ignore pure ASCII mappings. PostgreSQL character conversion code
# Ignore pure ASCII mappings. PostgreSQL character conversion code
# never even passes these to the conversion code.
# never even passes these to the conversion code.
@ -92,8 +92,10 @@ sub print_conversion_tables
{
{
my ( $ this_script , $ csname , $ charset ) = @ _ ;
my ( $ this_script , $ csname , $ charset ) = @ _ ;
print_conversion_tables_direction ( $ this_script , $ csname , FROM_UNICODE , $ charset ) ;
print_conversion_tables_direction ( $ this_script , $ csname , FROM_UNICODE ,
print_conversion_tables_direction ( $ this_script , $ csname , TO_UNICODE , $ charset ) ;
$ charset ) ;
print_conversion_tables_direction ( $ this_script , $ csname , TO_UNICODE ,
$ charset ) ;
}
}
#############################################################################
#############################################################################
@ -117,14 +119,14 @@ sub print_conversion_tables_direction
my $ tblname ;
my $ tblname ;
if ( $ direction == TO_UNICODE )
if ( $ direction == TO_UNICODE )
{
{
$ fname = lc ( "${csname}_to_utf8.map" ) ;
$ fname = lc ( "${csname}_to_utf8.map" ) ;
$ tblname = lc ( "${csname}_to_unicode_tree" ) ;
$ tblname = lc ( "${csname}_to_unicode_tree" ) ;
print "- Writing ${csname}=>UTF8 conversion table: $fname\n" ;
print "- Writing ${csname}=>UTF8 conversion table: $fname\n" ;
}
}
else
else
{
{
$ fname = lc ( "utf8_to_${csname}.map" ) ;
$ fname = lc ( "utf8_to_${csname}.map" ) ;
$ tblname = lc ( "${csname}_from_unicode_tree" ) ;
$ tblname = lc ( "${csname}_from_unicode_tree" ) ;
print "- Writing UTF8=>${csname} conversion table: $fname\n" ;
print "- Writing UTF8=>${csname} conversion table: $fname\n" ;
@ -135,24 +137,22 @@ sub print_conversion_tables_direction
print $ out "/* src/backend/utils/mb/Unicode/$fname */\n" ;
print $ out "/* src/backend/utils/mb/Unicode/$fname */\n" ;
print $ out "/* This file is generated by $this_script */\n\n" ;
print $ out "/* This file is generated by $this_script */\n\n" ;
# Collect regular, non-combined, mappings, and create the radix tree from them.
# Collect regular, non-combined, mappings, and create the radix tree from them.
my $ charmap = & make_charmap ( $ out , $ charset , $ direction , 0 ) ;
my $ charmap = & make_charmap ( $ out , $ charset , $ direction , 0 ) ;
print_radix_table ( $ out , $ tblname , $ charmap ) ;
print_radix_table ( $ out , $ tblname , $ charmap ) ;
# Collect combined characters, and create combined character table (if any)
# Collect combined characters, and create combined character table (if any)
my $ charmap_combined = & make_charmap_combined ( $ charset , $ direction ) ;
my $ charmap_combined = & make_charmap_combined ( $ charset , $ direction ) ;
if ( scalar @ { $ charmap_combined } > 0 )
if ( scalar @ { $ charmap_combined } > 0 )
{
{
if ( $ direction == TO_UNICODE )
if ( $ direction == TO_UNICODE )
{
{
print_to_utf8_combined_map ( $ out , $ csname ,
print_to_utf8_combined_map ( $ out , $ csname , $ charmap_combined , 1 ) ;
$ charmap_combined , 1 ) ;
}
}
else
else
{
{
print_from_utf8_combined_map ( $ out , $ csname ,
print_from_utf8_combined_map ( $ out , $ csname , $ charmap_combined , 1 ) ;
$ charmap_combined , 1 ) ;
}
}
}
}
@ -166,14 +166,16 @@ sub print_from_utf8_combined_map
my $ last_comment = "" ;
my $ last_comment = "" ;
printf $ out "\n/* Combined character map */\n" ;
printf $ out "\n/* Combined character map */\n" ;
printf $ out "static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {" ,
printf $ out
"static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {" ,
scalar ( @$ table ) ;
scalar ( @$ table ) ;
my $ first = 1 ;
my $ first = 1 ;
foreach my $ i ( sort { $ a - > { utf8 } <=> $ b - > { utf8 } } @$ table )
foreach my $ i ( sort { $ a - > { utf8 } <=> $ b - > { utf8 } } @$ table )
{
{
print ( $ out "," ) if ( ! $ first ) ;
print ( $ out "," ) if ( ! $ first ) ;
$ first = 0 ;
$ first = 0 ;
print $ out "\t/* $last_comment */" if ( $ verbose && $ last_comment ne "" ) ;
print $ out "\t/* $last_comment */"
if ( $ verbose && $ last_comment ne "" ) ;
printf $ out "\n {0x%08x, 0x%08x, 0x%04x}" ,
printf $ out "\n {0x%08x, 0x%08x, 0x%04x}" ,
$ i - > { utf8 } , $ i - > { utf8_second } , $ i - > { code } ;
$ i - > { utf8 } , $ i - > { utf8_second } , $ i - > { code } ;
@ -198,15 +200,17 @@ sub print_to_utf8_combined_map
my $ last_comment = "" ;
my $ last_comment = "" ;
printf $ out "\n/* Combined character map */\n" ;
printf $ out "\n/* Combined character map */\n" ;
printf $ out "static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {" ,
printf $ out
"static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {" ,
scalar ( @$ table ) ;
scalar ( @$ table ) ;
my $ first = 1 ;
my $ first = 1 ;
foreach my $ i ( sort { $ a - > { code } <=> $ b - > { code } } @$ table )
foreach my $ i ( sort { $ a - > { code } <=> $ b - > { code } } @$ table )
{
{
print ( $ out "," ) if ( ! $ first ) ;
print ( $ out "," ) if ( ! $ first ) ;
$ first = 0 ;
$ first = 0 ;
print $ out "\t/* $last_comment */" if ( $ verbose && $ last_comment ne "" ) ;
print $ out "\t/* $last_comment */"
if ( $ verbose && $ last_comment ne "" ) ;
printf $ out "\n {0x%04x, 0x%08x, 0x%08x}" ,
printf $ out "\n {0x%04x, 0x%08x, 0x%08x}" ,
$ i - > { code } , $ i - > { utf8 } , $ i - > { utf8_second } ;
$ i - > { code } , $ i - > { utf8 } , $ i - > { utf8_second } ;
@ -214,7 +218,7 @@ sub print_to_utf8_combined_map
if ( $ verbose >= 2 )
if ( $ verbose >= 2 )
{
{
$ last_comment =
$ last_comment =
sprintf ( "%s:%d %s" , $ i - > { f } , $ i - > { l } , $ i - > { comment } ) ;
sprintf ( "%s:%d %s" , $ i - > { f } , $ i - > { l } , $ i - > { comment } ) ;
}
}
elsif ( $ verbose >= 1 )
elsif ( $ verbose >= 1 )
{
{
@ -255,25 +259,25 @@ sub print_radix_table
}
}
elsif ( $ in < 0x10000 )
elsif ( $ in < 0x10000 )
{
{
my $ b1 = $ in >> 8 ;
my $ b1 = $ in >> 8 ;
my $ b2 = $ in & 0xff ;
my $ b2 = $ in & 0xff ;
$ b2map { $ b1 } { $ b2 } = $ out ;
$ b2map { $ b1 } { $ b2 } = $ out ;
}
}
elsif ( $ in < 0x1000000 )
elsif ( $ in < 0x1000000 )
{
{
my $ b1 = $ in >> 16 ;
my $ b1 = $ in >> 16 ;
my $ b2 = ( $ in >> 8 ) & 0xff ;
my $ b2 = ( $ in >> 8 ) & 0xff ;
my $ b3 = $ in & 0xff ;
my $ b3 = $ in & 0xff ;
$ b3map { $ b1 } { $ b2 } { $ b3 } = $ out ;
$ b3map { $ b1 } { $ b2 } { $ b3 } = $ out ;
}
}
elsif ( $ in < 0x100000000 )
elsif ( $ in < 0x100000000 )
{
{
my $ b1 = $ in >> 24 ;
my $ b1 = $ in >> 24 ;
my $ b2 = ( $ in >> 16 ) & 0xff ;
my $ b2 = ( $ in >> 16 ) & 0xff ;
my $ b3 = ( $ in >> 8 ) & 0xff ;
my $ b3 = ( $ in >> 8 ) & 0xff ;
my $ b4 = $ in & 0xff ;
my $ b4 = $ in & 0xff ;
$ b4map { $ b1 } { $ b2 } { $ b3 } { $ b4 } = $ out ;
$ b4map { $ b1 } { $ b2 } { $ b3 } { $ b4 } = $ out ;
}
}
@ -309,10 +313,14 @@ sub print_radix_table
###
###
# Add the segments for the radix trees themselves.
# Add the segments for the radix trees themselves.
push @ segments , build_segments_from_tree ( "Single byte table" , "1-byte" , 1 , \ % b1map ) ;
push @ segments ,
push @ segments , build_segments_from_tree ( "Two byte table" , "2-byte" , 2 , \ % b2map ) ;
build_segments_from_tree ( "Single byte table" , "1-byte" , 1 , \ % b1map ) ;
push @ segments , build_segments_from_tree ( "Three byte table" , "3-byte" , 3 , \ % b3map ) ;
push @ segments ,
push @ segments , build_segments_from_tree ( "Four byte table" , "4-byte" , 4 , \ % b4map ) ;
build_segments_from_tree ( "Two byte table" , "2-byte" , 2 , \ % b2map ) ;
push @ segments ,
build_segments_from_tree ( "Three byte table" , "3-byte" , 3 , \ % b3map ) ;
push @ segments ,
build_segments_from_tree ( "Four byte table" , "4-byte" , 4 , \ % b4map ) ;
###
###
### Find min and max index used in each level of each tree.
### Find min and max index used in each level of each tree.
@ -325,23 +333,24 @@ sub print_radix_table
my % max_idx ;
my % max_idx ;
foreach my $ seg ( @ segments )
foreach my $ seg ( @ segments )
{
{
my $ this_min = $ min_idx { $ seg - > { depth } } - > { $ seg - > { level } } ;
my $ this_min = $ min_idx { $ seg - > { depth } } - > { $ seg - > { level } } ;
my $ this_max = $ max_idx { $ seg - > { depth } } - > { $ seg - > { level } } ;
my $ this_max = $ max_idx { $ seg - > { depth } } - > { $ seg - > { level } } ;
foreach my $ i ( keys % { $ seg - > { values } } )
foreach my $ i ( keys % { $ seg - > { values } } )
{
{
$ this_min = $ i if ( ! defined $ this_min || $ i < $ this_min ) ;
$ this_min = $ i if ( ! defined $ this_min || $ i < $ this_min ) ;
$ this_max = $ i if ( ! defined $ this_max || $ i > $ this_max ) ;
$ this_max = $ i if ( ! defined $ this_max || $ i > $ this_max ) ;
}
}
$ min_idx { $ seg - > { depth } } { $ seg - > { level } } = $ this_min ;
$ min_idx { $ seg - > { depth } } { $ seg - > { level } } = $ this_min ;
$ max_idx { $ seg - > { depth } } { $ seg - > { level } } = $ this_max ;
$ max_idx { $ seg - > { depth } } { $ seg - > { level } } = $ this_max ;
}
}
# Copy the mins and max's back to every segment, for convenience.
# Copy the mins and max's back to every segment, for convenience.
foreach my $ seg ( @ segments )
foreach my $ seg ( @ segments )
{
{
$ seg - > { min_idx } = $ min_idx { $ seg - > { depth } } { $ seg - > { level } } ;
$ seg - > { min_idx } = $ min_idx { $ seg - > { depth } } { $ seg - > { level } } ;
$ seg - > { max_idx } = $ max_idx { $ seg - > { depth } } { $ seg - > { level } } ;
$ seg - > { max_idx } = $ max_idx { $ seg - > { depth } } { $ seg - > { level } } ;
}
}
###
###
@ -359,11 +368,10 @@ sub print_radix_table
$ widest_range = $ this_range if ( $ this_range > $ widest_range ) ;
$ widest_range = $ this_range if ( $ this_range > $ widest_range ) ;
}
}
unshift @ segments , {
unshift @ segments ,
header = > "Dummy map, for invalid values" ,
{ header = > "Dummy map, for invalid values" ,
min_idx = > 0 ,
min_idx = > 0 ,
max_idx = > $ widest_range
max_idx = > $ widest_range } ;
} ;
###
###
### Eliminate overlapping zeros
### Eliminate overlapping zeros
@ -378,26 +386,34 @@ sub print_radix_table
###
###
for ( my $ j = 0 ; $ j < $# segments - 1 ; $ j + + )
for ( my $ j = 0 ; $ j < $# segments - 1 ; $ j + + )
{
{
my $ seg = $ segments [ $ j ] ;
my $ seg = $ segments [ $ j ] ;
my $ nextseg = $ segments [ $ j + 1 ] ;
my $ nextseg = $ segments [ $ j + 1 ] ;
# Count the number of zero values at the end of this segment.
# Count the number of zero values at the end of this segment.
my $ this_trail_zeros = 0 ;
my $ this_trail_zeros = 0 ;
for ( my $ i = $ seg - > { max_idx } ; $ i >= $ seg - > { min_idx } && ! $ seg - > { values } - > { $ i } ; $ i - - )
for (
my $ i = $ seg - > { max_idx } ;
$ i >= $ seg - > { min_idx } && ! $ seg - > { values } - > { $ i } ;
$ i - - )
{
{
$ this_trail_zeros + + ;
$ this_trail_zeros + + ;
}
}
# Count the number of zeros at the beginning of next segment.
# Count the number of zeros at the beginning of next segment.
my $ next_lead_zeros = 0 ;
my $ next_lead_zeros = 0 ;
for ( my $ i = $ nextseg - > { min_idx } ; $ i <= $ nextseg - > { max_idx } && ! $ nextseg - > { values } - > { $ i } ; $ i + + )
for (
my $ i = $ nextseg - > { min_idx } ;
$ i <= $ nextseg - > { max_idx } && ! $ nextseg - > { values } - > { $ i } ;
$ i + + )
{
{
$ next_lead_zeros + + ;
$ next_lead_zeros + + ;
}
}
# How many zeros in common?
# How many zeros in common?
my $ overlaid_trail_zeros =
my $ overlaid_trail_zeros =
( $ this_trail_zeros > $ next_lead_zeros ) ? $ next_lead_zeros : $ this_trail_zeros ;
( $ this_trail_zeros > $ next_lead_zeros )
? $ next_lead_zeros
: $ this_trail_zeros ;
$ seg - > { overlaid_trail_zeros } = $ overlaid_trail_zeros ;
$ seg - > { overlaid_trail_zeros } = $ overlaid_trail_zeros ;
$ seg - > { max_idx } = $ seg - > { max_idx } - $ overlaid_trail_zeros ;
$ seg - > { max_idx } = $ seg - > { max_idx } - $ overlaid_trail_zeros ;
@ -419,7 +435,7 @@ sub print_radix_table
foreach my $ seg ( @ segments )
foreach my $ seg ( @ segments )
{
{
$ seg - > { offset } = $ flatoff ;
$ seg - > { offset } = $ flatoff ;
$ segmap { $ seg - > { label } } = $ flatoff ;
$ segmap { $ seg - > { label } } = $ flatoff ;
$ flatoff += $ seg - > { max_idx } - $ seg - > { min_idx } + 1 ;
$ flatoff += $ seg - > { max_idx } - $ seg - > { min_idx } + 1 ;
}
}
my $ tblsize = $ flatoff ;
my $ tblsize = $ flatoff ;
@ -427,9 +443,9 @@ sub print_radix_table
# Second pass: look up the offset of each label reference in the hash.
# Second pass: look up the offset of each label reference in the hash.
foreach my $ seg ( @ segments )
foreach my $ seg ( @ segments )
{
{
while ( my ( $ i , $ val ) = each % { $ seg - > { values } } )
while ( my ( $ i , $ val ) = each % { $ seg - > { values } } )
{
{
if ( ! ( $ val =~ /^[0-9,.E]+$/ ) )
if ( ! ( $ val =~ /^[0-9,.E]+$/ ) )
{
{
my $ segoff = $ segmap { $ val } ;
my $ segoff = $ segmap { $ val } ;
if ( $ segoff )
if ( $ segoff )
@ -482,7 +498,7 @@ sub print_radix_table
my $ max_val = 0 ;
my $ max_val = 0 ;
foreach my $ seg ( @ segments )
foreach my $ seg ( @ segments )
{
{
foreach my $ val ( values % { $ seg - > { values } } )
foreach my $ val ( values % { $ seg - > { values } } )
{
{
$ max_val = $ val if ( $ val > $ max_val ) ;
$ max_val = $ val if ( $ val > $ max_val ) ;
}
}
@ -498,17 +514,17 @@ sub print_radix_table
if ( $ max_val <= 0xffff )
if ( $ max_val <= 0xffff )
{
{
$ vals_per_line = 8 ;
$ vals_per_line = 8 ;
$ colwidth = 4 ;
$ colwidth = 4 ;
}
}
elsif ( $ max_val <= 0xffffff )
elsif ( $ max_val <= 0xffffff )
{
{
$ vals_per_line = 4 ;
$ vals_per_line = 4 ;
$ colwidth = 6 ;
$ colwidth = 6 ;
}
}
else
else
{
{
$ vals_per_line = 4 ;
$ vals_per_line = 4 ;
$ colwidth = 8 ;
$ colwidth = 8 ;
}
}
###
###
@ -529,17 +545,20 @@ sub print_radix_table
print $ out " ${tblname}_table,\n" ;
print $ out " ${tblname}_table,\n" ;
}
}
printf $ out "\n" ;
printf $ out "\n" ;
printf $ out " 0x%04x, /* offset of table for 1-byte inputs */\n" , $ b1root ;
printf $ out " 0x%04x, /* offset of table for 1-byte inputs */\n" ,
$ b1root ;
printf $ out " 0x%02x, /* b1_lower */\n" , $ b1_lower ;
printf $ out " 0x%02x, /* b1_lower */\n" , $ b1_lower ;
printf $ out " 0x%02x, /* b1_upper */\n" , $ b1_upper ;
printf $ out " 0x%02x, /* b1_upper */\n" , $ b1_upper ;
printf $ out "\n" ;
printf $ out "\n" ;
printf $ out " 0x%04x, /* offset of table for 2-byte inputs */\n" , $ b2root ;
printf $ out " 0x%04x, /* offset of table for 2-byte inputs */\n" ,
$ b2root ;
printf $ out " 0x%02x, /* b2_1_lower */\n" , $ b2_1_lower ;
printf $ out " 0x%02x, /* b2_1_lower */\n" , $ b2_1_lower ;
printf $ out " 0x%02x, /* b2_1_upper */\n" , $ b2_1_upper ;
printf $ out " 0x%02x, /* b2_1_upper */\n" , $ b2_1_upper ;
printf $ out " 0x%02x, /* b2_2_lower */\n" , $ b2_2_lower ;
printf $ out " 0x%02x, /* b2_2_lower */\n" , $ b2_2_lower ;
printf $ out " 0x%02x, /* b2_2_upper */\n" , $ b2_2_upper ;
printf $ out " 0x%02x, /* b2_2_upper */\n" , $ b2_2_upper ;
printf $ out "\n" ;
printf $ out "\n" ;
printf $ out " 0x%04x, /* offset of table for 3-byte inputs */\n" , $ b3root ;
printf $ out " 0x%04x, /* offset of table for 3-byte inputs */\n" ,
$ b3root ;
printf $ out " 0x%02x, /* b3_1_lower */\n" , $ b3_1_lower ;
printf $ out " 0x%02x, /* b3_1_lower */\n" , $ b3_1_lower ;
printf $ out " 0x%02x, /* b3_1_upper */\n" , $ b3_1_upper ;
printf $ out " 0x%02x, /* b3_1_upper */\n" , $ b3_1_upper ;
printf $ out " 0x%02x, /* b3_2_lower */\n" , $ b3_2_lower ;
printf $ out " 0x%02x, /* b3_2_lower */\n" , $ b3_2_lower ;
@ -547,7 +566,8 @@ sub print_radix_table
printf $ out " 0x%02x, /* b3_3_lower */\n" , $ b3_3_lower ;
printf $ out " 0x%02x, /* b3_3_lower */\n" , $ b3_3_lower ;
printf $ out " 0x%02x, /* b3_3_upper */\n" , $ b3_3_upper ;
printf $ out " 0x%02x, /* b3_3_upper */\n" , $ b3_3_upper ;
printf $ out "\n" ;
printf $ out "\n" ;
printf $ out " 0x%04x, /* offset of table for 3-byte inputs */\n" , $ b4root ;
printf $ out " 0x%04x, /* offset of table for 3-byte inputs */\n" ,
$ b4root ;
printf $ out " 0x%02x, /* b4_1_lower */\n" , $ b4_1_lower ;
printf $ out " 0x%02x, /* b4_1_lower */\n" , $ b4_1_lower ;
printf $ out " 0x%02x, /* b4_1_upper */\n" , $ b4_1_upper ;
printf $ out " 0x%02x, /* b4_1_upper */\n" , $ b4_1_upper ;
printf $ out " 0x%02x, /* b4_2_lower */\n" , $ b4_2_lower ;
printf $ out " 0x%02x, /* b4_2_lower */\n" , $ b4_2_lower ;
@ -561,18 +581,21 @@ sub print_radix_table
print $ out "static const $datatype ${tblname}_table[$tblsize] =\n" ;
print $ out "static const $datatype ${tblname}_table[$tblsize] =\n" ;
print $ out "{" ;
print $ out "{" ;
my $ off = 0 ;
my $ off = 0 ;
foreach my $ seg ( @ segments )
foreach my $ seg ( @ segments )
{
{
printf $ out "\n" ;
printf $ out "\n" ;
printf $ out " /*** %s - offset 0x%05x ***/\n" , $ seg - > { header } , $ off ;
printf $ out " /*** %s - offset 0x%05x ***/\n" , $ seg - > { header } , $ off ;
printf $ out "\n" ;
printf $ out "\n" ;
for ( my $ i = $ seg - > { min_idx } ; $ i <= $ seg - > { max_idx } ; )
for ( my $ i = $ seg - > { min_idx } ; $ i <= $ seg - > { max_idx } ; )
{
{
# Print the next line's worth of values.
# Print the next line's worth of values.
# XXX pad to begin at a nice boundary
# XXX pad to begin at a nice boundary
printf $ out " /* %02x */ " , $ i ;
printf $ out " /* %02x */ " , $ i ;
for ( my $ j = 0 ; $ j < $ vals_per_line && $ i <= $ seg - > { max_idx } ; $ j + + )
for ( my $ j = 0 ;
$ j < $ vals_per_line && $ i <= $ seg - > { max_idx } ; $ j + + )
{
{
my $ val = $ seg - > { values } - > { $ i } ;
my $ val = $ seg - > { values } - > { $ i } ;
@ -588,7 +611,8 @@ sub print_radix_table
}
}
if ( $ seg - > { overlaid_trail_zeros } )
if ( $ seg - > { overlaid_trail_zeros } )
{
{
printf $ out " /* $seg->{overlaid_trail_zeros} trailing zero values shared with next segment */\n" ;
printf $ out
" /* $seg->{overlaid_trail_zeros} trailing zero values shared with next segment */\n" ;
}
}
}
}
@ -607,13 +631,14 @@ sub build_segments_from_tree
if ( % { $ map } )
if ( % { $ map } )
{
{
@ segments = build_segments_recurse ( $ header , $ rootlabel , "" , 1 , $ depth , $ map ) ;
@ segments =
build_segments_recurse ( $ header , $ rootlabel , "" , 1 , $ depth , $ map ) ;
# Sort the segments into "breadth-first" order. Not strictly required,
# Sort the segments into "breadth-first" order. Not strictly required,
# but makes the maps nicer to read.
# but makes the maps nicer to read.
@ segments = sort { $ a - > { level } cmp $ b - > { level } or
@ segments =
$ a - > { path } cmp $ b - > { path } }
sort { $ a - > { level } cmp $ b - > { level } or $ a - > { path } cmp $ b - > { path } }
@ segments ;
@ segments ;
}
}
return @ segments ;
return @ segments ;
@ -628,14 +653,13 @@ sub build_segments_recurse
if ( $ level == $ depth )
if ( $ level == $ depth )
{
{
push @ segments , {
push @ segments ,
header = > $ header . ", leaf: ${path}xx" ,
{ header = > $ header . ", leaf: ${path}xx" ,
label = > $ label ,
label = > $ label ,
level = > $ level ,
level = > $ level ,
depth = > $ depth ,
depth = > $ depth ,
path = > $ path ,
path = > $ path ,
values = > $ map
values = > $ map } ;
} ;
}
}
else
else
{
{
@ -646,19 +670,19 @@ sub build_segments_recurse
my $ childpath = $ path . sprintf ( "%02x" , $ i ) ;
my $ childpath = $ path . sprintf ( "%02x" , $ i ) ;
my $ childlabel = "$depth-level-$level-$childpath" ;
my $ childlabel = "$depth-level-$level-$childpath" ;
push @ segments , build_segments_recurse ( $ header , $ childlabel , $ childpath ,
push @ segments ,
$ level + 1 , $ depth , $ val ) ;
build_segments_recurse ( $ header , $ childlabel , $ childpath ,
$ level + 1 , $ depth , $ val ) ;
$ children { $ i } = $ childlabel ;
$ children { $ i } = $ childlabel ;
}
}
push @ segments , {
push @ segments ,
header = > $ header . ", byte #$level: ${path}xx" ,
{ header = > $ header . ", byte #$level: ${path}xx" ,
label = > $ label ,
label = > $ label ,
level = > $ level ,
level = > $ level ,
depth = > $ depth ,
depth = > $ depth ,
path = > $ path ,
path = > $ path ,
values = > \ % children
values = > \ % children } ;
} ;
}
}
return @ segments ;
return @ segments ;
}
}
@ -688,29 +712,31 @@ sub make_charmap
my % charmap ;
my % charmap ;
foreach my $ c ( @$ charset )
foreach my $ c ( @$ charset )
{
{
# combined characters are handled elsewhere
# combined characters are handled elsewhere
next if ( defined $ c - > { ucs_second } ) ;
next if ( defined $ c - > { ucs_second } ) ;
next if ( $ c - > { direction } != $ direction && $ c - > { direction } != BOTH ) ;
next if ( $ c - > { direction } != $ direction && $ c - > { direction } != BOTH ) ;
my ( $ src , $ dst ) =
my ( $ src , $ dst ) =
$ direction == TO_UNICODE
$ direction == TO_UNICODE
? ( $ c - > { code } , ucs2utf ( $ c - > { ucs } ) )
? ( $ c - > { code } , ucs2utf ( $ c - > { ucs } ) )
: ( ucs2utf ( $ c - > { ucs } ) , $ c - > { code } ) ;
: ( ucs2utf ( $ c - > { ucs } ) , $ c - > { code } ) ;
# check for duplicate source codes
# check for duplicate source codes
if ( defined $ charmap { $ src } )
if ( defined $ charmap { $ src } )
{
{
printf STDERR
printf STDERR
"Error: duplicate source code on %s:%d: 0x%04x => 0x%04x, 0x%04x\n" ,
"Error: duplicate source code on %s:%d: 0x%04x => 0x%04x, 0x%04x\n" ,
$ c - > { f } , $ c - > { l } , $ src , $ charmap { $ src } , $ dst ;
$ c - > { f } , $ c - > { l } , $ src , $ charmap { $ src } , $ dst ;
exit ;
exit ;
}
}
$ charmap { $ src } = $ dst ;
$ charmap { $ src } = $ dst ;
if ( $ verbose )
if ( $ verbose )
{
{
printf $ out "0x%04x 0x%04x %s:%d %s\n" , $ src , $ dst , $ c - > { f } , $ c - > { l } , $ c - > { comment } ;
printf $ out "0x%04x 0x%04x %s:%d %s\n" , $ src , $ dst , $ c - > { f } ,
$ c - > { l } , $ c - > { comment } ;
}
}
}
}
if ( $ verbose )
if ( $ verbose )
@ -743,11 +769,13 @@ sub make_charmap_combined
if ( defined $ c - > { ucs_second } )
if ( defined $ c - > { ucs_second } )
{
{
my $ entry = { utf8 = > ucs2utf ( $ c - > { ucs } ) ,
my $ entry = {
utf8_second = > ucs2utf ( $ c - > { ucs_second } ) ,
utf8 = > ucs2utf ( $ c - > { ucs } ) ,
code = > $ c - > { code } ,
utf8_second = > ucs2utf ( $ c - > { ucs_second } ) ,
comment = > $ c - > { comment } ,
code = > $ c - > { code } ,
f = > $ c - > { f } , l = > $ c - > { l } } ;
comment = > $ c - > { comment } ,
f = > $ c - > { f } ,
l = > $ c - > { l } } ;
push @ combined , $ entry ;
push @ combined , $ entry ;
}
}
}
}