|
|
@ -2,7 +2,7 @@ |
|
|
|
# |
|
|
|
# |
|
|
|
# Copyright 2001 by PostgreSQL Global Development Group |
|
|
|
# Copyright 2001 by PostgreSQL Global Development Group |
|
|
|
# |
|
|
|
# |
|
|
|
# $Id: UCS_to_SJIS.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $ |
|
|
|
# $Id: UCS_to_SJIS.pl,v 1.2 2001/02/23 08:44:33 ishii Exp $ |
|
|
|
# |
|
|
|
# |
|
|
|
# Generate UTF-8 <--> SJIS code conversion tables from |
|
|
|
# Generate UTF-8 <--> SJIS code conversion tables from |
|
|
|
# map files provided by Unicode organization. |
|
|
|
# map files provided by Unicode organization. |
|
|
@ -21,7 +21,8 @@ require "ucs2utf.pl"; |
|
|
|
|
|
|
|
|
|
|
|
# first generate UTF-8 --> SJIS table |
|
|
|
# first generate UTF-8 --> SJIS table |
|
|
|
|
|
|
|
|
|
|
|
$in_file = "SHIFTJIS.TXT"; |
|
|
|
$in_file = "CP932.TXT"; |
|
|
|
|
|
|
|
$count = 0; |
|
|
|
|
|
|
|
|
|
|
|
open( FILE, $in_file ) || die( "cannot open $in_file" ); |
|
|
|
open( FILE, $in_file ) || die( "cannot open $in_file" ); |
|
|
|
|
|
|
|
|
|
|
@ -35,15 +36,30 @@ while( <FILE> ){ |
|
|
|
$code = hex($c); |
|
|
|
$code = hex($c); |
|
|
|
if( $code >= 0x80 && $ucs >= 0x100 ){ |
|
|
|
if( $code >= 0x80 && $ucs >= 0x100 ){ |
|
|
|
$utf = &ucs2utf($ucs); |
|
|
|
$utf = &ucs2utf($ucs); |
|
|
|
if( $array{ $utf } ne "" ){ |
|
|
|
if((( $code >= 0xed40 ) |
|
|
|
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs; |
|
|
|
&& ( $code <= 0xeefc )) |
|
|
|
|
|
|
|
|| (( $code >= 0x8754 ) |
|
|
|
|
|
|
|
&&( $code <= 0x875d )) |
|
|
|
|
|
|
|
|| ( $code == 0x878a ) |
|
|
|
|
|
|
|
|| ( $code == 0x8782 ) |
|
|
|
|
|
|
|
|| ( $code == 0x8784 ) |
|
|
|
|
|
|
|
|| ( $code == 0xfa5b ) |
|
|
|
|
|
|
|
|| ( $code == 0xfa54 ) |
|
|
|
|
|
|
|
|| (( $code >= 0x8790 ) |
|
|
|
|
|
|
|
&& ( $code <= 0x8792 )) |
|
|
|
|
|
|
|
|| (( $code >= 0x8795 ) |
|
|
|
|
|
|
|
&& ( $code <= 0x8797 )) |
|
|
|
|
|
|
|
|| (( $code >= 0x879a ) |
|
|
|
|
|
|
|
&& ( $code <= 0x879c ))) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
printf STDERR "Warning: duplicate unicode : UCS=0x%04x SJIS=0x%04x\n",$ucs,$code; |
|
|
|
next; |
|
|
|
next; |
|
|
|
} |
|
|
|
} |
|
|
|
$count++; |
|
|
|
$count++; |
|
|
|
|
|
|
|
|
|
|
|
$array{ $utf } = $code; |
|
|
|
$array{ $utf } = $code; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
close( FILE ); |
|
|
|
close( FILE ); |
|
|
|
|
|
|
|
|
|
|
|
# |
|
|
|
# |
|
|
@ -68,12 +84,13 @@ print FILE "};\n"; |
|
|
|
close(FILE); |
|
|
|
close(FILE); |
|
|
|
|
|
|
|
|
|
|
|
# |
|
|
|
# |
|
|
|
# then generate EUC_JP --> UTF8 table |
|
|
|
# then generate SJIS --> UTF8 table |
|
|
|
# |
|
|
|
# |
|
|
|
|
|
|
|
|
|
|
|
open( FILE, $in_file ) || die( "cannot open $in_file" ); |
|
|
|
open( FILE, $in_file ) || die( "cannot open $in_file" ); |
|
|
|
|
|
|
|
|
|
|
|
reset 'array'; |
|
|
|
reset 'array'; |
|
|
|
|
|
|
|
$count = 0; |
|
|
|
|
|
|
|
|
|
|
|
while( <FILE> ){ |
|
|
|
while( <FILE> ){ |
|
|
|
chop; |
|
|
|
chop; |
|
|
@ -85,10 +102,6 @@ while( <FILE> ){ |
|
|
|
$code = hex($c); |
|
|
|
$code = hex($c); |
|
|
|
if( $code >= 0x80 && $ucs >= 0x100 ){ |
|
|
|
if( $code >= 0x80 && $ucs >= 0x100 ){ |
|
|
|
$utf = &ucs2utf($ucs); |
|
|
|
$utf = &ucs2utf($ucs); |
|
|
|
if( $array{ $code } ne "" ){ |
|
|
|
|
|
|
|
printf STDERR "Warning: duplicate code: %04x\n",$ucs; |
|
|
|
|
|
|
|
next; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
$count++; |
|
|
|
$count++; |
|
|
|
|
|
|
|
|
|
|
|
$array{ $code } = $utf; |
|
|
|
$array{ $code } = $utf; |
|
|
|