mirror of https://github.com/postgres/postgres
This adds a couple of tests to trigger encoding conversion when input and server encodings do not match in COPY FROM/TO, or need_transcoding set to true in the COPY state data. These tests rely on UTF8 <-> LATIN1 for the valid cases as LATIN1 accepts any bytes, and UTF8 <-> EUC_JP for some of the invalid cases where a character cannot be understood, causing a conversion failure. Both ENCODING and client_encoding are covered. Test suggested by Andres Freund. Author: Sutou Kouhei Discussion: https://postgr.es/m/20240206222445.hzq22pb2nye7rm67@awork3.anarazel.depull/194/head
parent
bf9165bb0c
commit
3ad8b840ce
@ -0,0 +1,46 @@ |
||||
-- |
||||
-- Test cases for encoding with COPY commands |
||||
-- |
||||
-- skip test if not UTF8 server encoding |
||||
SELECT getdatabaseencoding() <> 'UTF8' |
||||
AS skip_test \gset |
||||
\if :skip_test |
||||
\quit |
||||
\endif |
||||
-- directory paths are passed to us in environment variables |
||||
\getenv abs_builddir PG_ABS_BUILDDIR |
||||
\set utf8_csv :abs_builddir '/results/copyencoding_utf8.csv' |
||||
CREATE TABLE copy_encoding_tab (t text); |
||||
-- Valid cases |
||||
-- Use ENCODING option |
||||
-- U+3042 HIRAGANA LETTER A |
||||
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8'); |
||||
-- Read UTF8 data as LATIN1: no error |
||||
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'LATIN1'); |
||||
-- Use client_encoding |
||||
SET client_encoding TO UTF8; |
||||
-- U+3042 HIRAGANA LETTER A |
||||
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv); |
||||
-- Read UTF8 data as LATIN1: no error |
||||
SET client_encoding TO LATIN1; |
||||
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv); |
||||
RESET client_encoding; |
||||
-- Invalid cases |
||||
-- Use ENCODING explicitly |
||||
-- U+3042 HIRAGANA LETTER A |
||||
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8'); |
||||
-- Read UTF8 data as EUC_JP: no error |
||||
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'EUC_JP'); |
||||
ERROR: invalid byte sequence for encoding "EUC_JP": 0xe3 0x81 |
||||
CONTEXT: COPY copy_encoding_tab, line 1 |
||||
-- Use client_encoding |
||||
SET client_encoding TO UTF8; |
||||
-- U+3042 HIRAGANA LETTER A |
||||
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv); |
||||
-- Read UTF8 data as EUC_JP: no error |
||||
SET client_encoding TO EUC_JP; |
||||
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv); |
||||
ERROR: invalid byte sequence for encoding "EUC_JP": 0xe3 0x81 |
||||
CONTEXT: COPY copy_encoding_tab, line 1 |
||||
RESET client_encoding; |
||||
DROP TABLE copy_encoding_tab; |
@ -0,0 +1,8 @@ |
||||
-- |
||||
-- Test cases for encoding with COPY commands |
||||
-- |
||||
-- skip test if not UTF8 server encoding |
||||
SELECT getdatabaseencoding() <> 'UTF8' |
||||
AS skip_test \gset |
||||
\if :skip_test |
||||
\quit |
@ -0,0 +1,53 @@ |
||||
-- |
||||
-- Test cases for encoding with COPY commands |
||||
-- |
||||
|
||||
-- skip test if not UTF8 server encoding |
||||
SELECT getdatabaseencoding() <> 'UTF8' |
||||
AS skip_test \gset |
||||
\if :skip_test |
||||
\quit |
||||
\endif |
||||
|
||||
-- directory paths are passed to us in environment variables |
||||
\getenv abs_builddir PG_ABS_BUILDDIR |
||||
|
||||
\set utf8_csv :abs_builddir '/results/copyencoding_utf8.csv' |
||||
|
||||
CREATE TABLE copy_encoding_tab (t text); |
||||
|
||||
-- Valid cases |
||||
|
||||
-- Use ENCODING option |
||||
-- U+3042 HIRAGANA LETTER A |
||||
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8'); |
||||
-- Read UTF8 data as LATIN1: no error |
||||
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'LATIN1'); |
||||
|
||||
-- Use client_encoding |
||||
SET client_encoding TO UTF8; |
||||
-- U+3042 HIRAGANA LETTER A |
||||
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv); |
||||
-- Read UTF8 data as LATIN1: no error |
||||
SET client_encoding TO LATIN1; |
||||
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv); |
||||
RESET client_encoding; |
||||
|
||||
-- Invalid cases |
||||
|
||||
-- Use ENCODING explicitly |
||||
-- U+3042 HIRAGANA LETTER A |
||||
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8'); |
||||
-- Read UTF8 data as EUC_JP: no error |
||||
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'EUC_JP'); |
||||
|
||||
-- Use client_encoding |
||||
SET client_encoding TO UTF8; |
||||
-- U+3042 HIRAGANA LETTER A |
||||
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv); |
||||
-- Read UTF8 data as EUC_JP: no error |
||||
SET client_encoding TO EUC_JP; |
||||
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv); |
||||
RESET client_encoding; |
||||
|
||||
DROP TABLE copy_encoding_tab; |
Loading…
Reference in new issue