|
|
|
<!--
|
|
|
|
doc/src/sgml/ref/copy.sgml
|
|
|
|
PostgreSQL documentation
|
|
|
|
-->
|
|
|
|
|
|
|
|
<refentry id="sql-copy">
|
|
|
|
<indexterm zone="sql-copy">
|
|
|
|
<primary>COPY</primary>
|
|
|
|
</indexterm>
|
|
|
|
|
|
|
|
<refmeta>
|
|
|
|
<refentrytitle>COPY</refentrytitle>
|
|
|
|
<manvolnum>7</manvolnum>
|
|
|
|
<refmiscinfo>SQL - Language Statements</refmiscinfo>
|
|
|
|
</refmeta>
|
|
|
|
|
|
|
|
<refnamediv>
|
|
|
|
<refname>COPY</refname>
|
|
|
|
<refpurpose>copy data between a file and a table</refpurpose>
|
|
|
|
</refnamediv>
|
|
|
|
|
|
|
|
<refsynopsisdiv>
|
|
|
|
<synopsis>
|
|
|
|
COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
13 years ago
|
|
|
FROM { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDIN }
|
|
|
|
[ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
|
|
|
|
[ WHERE <replaceable class="parameter">condition</replaceable> ]
|
|
|
|
|
|
|
|
COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
13 years ago
|
|
|
TO { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDOUT }
|
|
|
|
[ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
|
|
|
|
|
|
|
|
<phrase>where <replaceable class="parameter">option</replaceable> can be one of:</phrase>
|
|
|
|
|
|
|
|
FORMAT <replaceable class="parameter">format_name</replaceable>
|
|
|
|
FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
|
|
|
|
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
|
|
|
|
NULL '<replaceable class="parameter">null_string</replaceable>'
|
|
|
|
DEFAULT '<replaceable class="parameter">default_string</replaceable>'
|
|
|
|
HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
|
|
|
|
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
|
|
|
|
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
|
|
|
|
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
|
|
|
FORCE_NOT_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
|
|
|
FORCE_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
|
|
|
ON_ERROR <replaceable class="parameter">error_action</replaceable>
|
|
|
|
REJECT_LIMIT <replaceable class="parameter">maxerror</replaceable>
|
|
|
|
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
|
|
|
|
LOG_VERBOSITY <replaceable class="parameter">verbosity</replaceable>
|
|
|
|
</synopsis>
|
|
|
|
</refsynopsisdiv>
|
|
|
|
|
|
|
|
<refsect1>
|
|
|
|
<title>Description</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
<command>COPY</command> moves data between
|
|
|
|
<productname>PostgreSQL</productname> tables and standard file-system
|
|
|
|
files. <command>COPY TO</command> copies the contents of a table
|
|
|
|
<emphasis>to</emphasis> a file, while <command>COPY FROM</command> copies
|
|
|
|
data <emphasis>from</emphasis> a file to a table (appending the data to
|
|
|
|
whatever is in the table already). <command>COPY TO</command>
|
|
|
|
can also copy the results of a <command>SELECT</command> query.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
If a column list is specified, <command>COPY TO</command> copies only
|
|
|
|
the data in the specified columns to the file. For <command>COPY
|
|
|
|
FROM</command>, each field in the file is inserted, in order, into the
|
|
|
|
specified column. Table columns not specified in the <command>COPY
|
|
|
|
FROM</command> column list will receive their default values.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
<command>COPY</command> with a file name instructs the
|
|
|
|
<productname>PostgreSQL</productname> server to directly read from
|
|
|
|
or write to a file. The file must be accessible by the
|
|
|
|
<productname>PostgreSQL</productname> user (the user ID the server
|
|
|
|
runs as) and the name must be specified from the viewpoint of the
|
|
|
|
server. When <literal>PROGRAM</literal> is specified, the server
|
|
|
|
executes the given command and reads from the standard output of the
|
|
|
|
program, or writes to the standard input of the program. The command
|
|
|
|
must be specified from the viewpoint of the server, and be executable
|
|
|
|
by the <productname>PostgreSQL</productname> user. When
|
|
|
|
<literal>STDIN</literal> or <literal>STDOUT</literal> is
|
This patch implements holdable cursors, following the proposal
(materialization into a tuple store) discussed on pgsql-hackers earlier.
I've updated the documentation and the regression tests.
Notes on the implementation:
- I needed to change the tuple store API slightly -- it assumes that it
won't be used to hold data across transaction boundaries, so the temp
files that it uses for on-disk storage are automatically reclaimed at
end-of-transaction. I added a flag to tuplestore_begin_heap() to control
this behavior. Is changing the tuple store API in this fashion OK?
- in order to store executor results in a tuple store, I added a new
CommandDest. This works well for the most part, with one exception: the
current DestFunction API doesn't provide enough information to allow the
Executor to store results into an arbitrary tuple store (where the
particular tuple store to use is chosen by the call site of
ExecutorRun). To workaround this, I've temporarily hacked up a solution
that works, but is not ideal: since the receiveTuple DestFunction is
passed the portal name, we can use that to lookup the Portal data
structure for the cursor and then use that to get at the tuple store the
Portal is using. This unnecessarily ties the Portal code with the
tupleReceiver code, but it works...
The proper fix for this is probably to change the DestFunction API --
Tom suggested passing the full QueryDesc to the receiveTuple function.
In that case, callers of ExecutorRun could "subclass" QueryDesc to add
any additional fields that their particular CommandDest needed to get
access to. This approach would work, but I'd like to think about it for
a little bit longer before deciding which route to go. In the mean time,
the code works fine, so I don't think a fix is urgent.
- (semi-related) I added a NO SCROLL keyword to DECLARE CURSOR, and
adjusted the behavior of SCROLL in accordance with the discussion on
-hackers.
- (unrelated) Cleaned up some SGML markup in sql.sgml, copy.sgml
Neil Conway
23 years ago
|
|
|
specified, data is transmitted via the connection between the
|
|
|
|
client and the server.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Each backend running <command>COPY</command> will report its progress
|
|
|
|
in the <structname>pg_stat_progress_copy</structname> view. See
|
|
|
|
<xref linkend="copy-progress-reporting"/> for details.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
By default, <command>COPY</command> will fail if it encounters an error
|
|
|
|
during processing. For use cases where a best-effort attempt at loading
|
|
|
|
the entire file is desired, the <literal>ON_ERROR</literal> clause can
|
|
|
|
be used to specify some other behavior.
|
|
|
|
</para>
|
|
|
|
</refsect1>
|
|
|
|
|
|
|
|
<refsect1>
|
|
|
|
<title>Parameters</title>
|
|
|
|
|
|
|
|
<variablelist>
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">table_name</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name (optionally schema-qualified) of an existing table.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">column_name</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
An optional list of columns to be copied. If no column list is
|
|
|
|
specified, all columns of the table except generated columns will be
|
|
|
|
copied.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">query</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
Improve <xref> vs. <command> formatting in the documentation
SQL commands are generally marked up as <command>, except when a link
to a reference page is used using <xref>. But the latter doesn't
create monospace markup, so this looks strange especially when a
paragraph contains a mix of links and non-links.
We considered putting <command> in the <refentrytitle> on the target
side, but that creates some formatting side effects elsewhere.
Generally, it seems safer to solve this on the link source side.
We can't put the <xref> inside the <command>; the DTD doesn't allow
this. DocBook 5 would allow the <command> to have the linkend
attribute itself, but we are not there yet.
So to solve this for now, convert the <xref>s to <link> plus
<command>. This gives the correct look and also gives some more
flexibility what we can put into the link text (e.g., subcommands or
other clauses). In the future, these could then be converted to
DocBook 5 style.
I haven't converted absolutely all xrefs to SQL command reference
pages, only those where we care about the appearance of the link text
or where it was otherwise appropriate to make the appearance match a
bit better. Also in some cases, the links where repetitive, so in
those cases the links where just removed and replaced by a plain
<command>. In cases where we just want the link and don't
specifically care about the generated link text (typically phrased
"for further information see <xref ...>") the xref is kept.
Reported-by: Dagfinn Ilmari Mannsåker <ilmari@ilmari.org>
Discussion: https://www.postgresql.org/message-id/flat/87o8pco34z.fsf@wibble.ilmari.org
5 years ago
|
|
|
A <link linkend="sql-select"><command>SELECT</command></link>,
|
|
|
|
<link linkend="sql-values"><command>VALUES</command></link>,
|
|
|
|
<link linkend="sql-insert"><command>INSERT</command></link>,
|
Add RETURNING support to MERGE.
This allows a RETURNING clause to be appended to a MERGE query, to
return values based on each row inserted, updated, or deleted. As with
plain INSERT, UPDATE, and DELETE commands, the returned values are
based on the new contents of the target table for INSERT and UPDATE
actions, and on its old contents for DELETE actions. Values from the
source relation may also be returned.
As with INSERT/UPDATE/DELETE, the output of MERGE ... RETURNING may be
used as the source relation for other operations such as WITH queries
and COPY commands.
Additionally, a special function merge_action() is provided, which
returns 'INSERT', 'UPDATE', or 'DELETE', depending on the action
executed for each row. The merge_action() function can be used
anywhere in the RETURNING list, including in arbitrary expressions and
subqueries, but it is an error to use it anywhere outside of a MERGE
query's RETURNING list.
Dean Rasheed, reviewed by Isaac Morland, Vik Fearing, Alvaro Herrera,
Gurjeet Singh, Jian He, Jeff Davis, Merlin Moncure, Peter Eisentraut,
and Wolfgang Walther.
Discussion: http://postgr.es/m/CAEZATCWePEGQR5LBn-vD6SfeLZafzEm2Qy_L_Oky2=qw2w3Pzg@mail.gmail.com
2 years ago
|
|
|
<link linkend="sql-update"><command>UPDATE</command></link>,
|
|
|
|
<link linkend="sql-delete"><command>DELETE</command></link>, or
|
|
|
|
<link linkend="sql-merge"><command>MERGE</command></link> command
|
|
|
|
whose results are to be copied. Note that parentheses are required
|
|
|
|
around the query.
|
|
|
|
</para>
|
|
|
|
<para>
|
Add RETURNING support to MERGE.
This allows a RETURNING clause to be appended to a MERGE query, to
return values based on each row inserted, updated, or deleted. As with
plain INSERT, UPDATE, and DELETE commands, the returned values are
based on the new contents of the target table for INSERT and UPDATE
actions, and on its old contents for DELETE actions. Values from the
source relation may also be returned.
As with INSERT/UPDATE/DELETE, the output of MERGE ... RETURNING may be
used as the source relation for other operations such as WITH queries
and COPY commands.
Additionally, a special function merge_action() is provided, which
returns 'INSERT', 'UPDATE', or 'DELETE', depending on the action
executed for each row. The merge_action() function can be used
anywhere in the RETURNING list, including in arbitrary expressions and
subqueries, but it is an error to use it anywhere outside of a MERGE
query's RETURNING list.
Dean Rasheed, reviewed by Isaac Morland, Vik Fearing, Alvaro Herrera,
Gurjeet Singh, Jian He, Jeff Davis, Merlin Moncure, Peter Eisentraut,
and Wolfgang Walther.
Discussion: http://postgr.es/m/CAEZATCWePEGQR5LBn-vD6SfeLZafzEm2Qy_L_Oky2=qw2w3Pzg@mail.gmail.com
2 years ago
|
|
|
For <command>INSERT</command>, <command>UPDATE</command>,
|
|
|
|
<command>DELETE</command>, and <command>MERGE</command> queries a
|
|
|
|
<literal>RETURNING</literal> clause must be provided, and the target
|
|
|
|
relation must not have a conditional rule, nor an
|
|
|
|
<literal>ALSO</literal> rule, nor an <literal>INSTEAD</literal> rule
|
|
|
|
that expands to multiple statements.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">filename</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The path name of the input or output file. An input file name can be
|
|
|
|
an absolute or relative path, but an output file name must be an absolute
|
|
|
|
path. Windows users might need to use an <literal>E''</literal> string and
|
|
|
|
double any backslashes used in the path name.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
13 years ago
|
|
|
<varlistentry>
|
|
|
|
<term><literal>PROGRAM</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
A command to execute. In <command>COPY FROM</command>, the input is
|
|
|
|
read from standard output of the command, and in <command>COPY TO</command>,
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
13 years ago
|
|
|
the output is written to the standard input of the command.
|
|
|
|
</para>
|
|
|
|
<para>
|
|
|
|
Note that the command is invoked by the shell, so if you need to pass
|
|
|
|
any arguments that come from an untrusted source, you
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
13 years ago
|
|
|
must be careful to strip or escape any special characters that might
|
|
|
|
have a special meaning for the shell. For security reasons, it is best
|
|
|
|
to use a fixed command string, or at least avoid including any user input
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
13 years ago
|
|
|
in it.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>STDIN</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies that input comes from the client application.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>STDOUT</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies that output goes to the client application.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">boolean</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies whether the selected option should be turned on or off.
|
|
|
|
You can write <literal>TRUE</literal>, <literal>ON</literal>, or
|
|
|
|
<literal>1</literal> to enable the option, and <literal>FALSE</literal>,
|
|
|
|
<literal>OFF</literal>, or <literal>0</literal> to disable it. The
|
|
|
|
<replaceable class="parameter">boolean</replaceable> value can also
|
|
|
|
be omitted, in which case <literal>TRUE</literal> is assumed.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>FORMAT</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Selects the data format to be read or written:
|
|
|
|
<literal>text</literal>,
|
|
|
|
<literal>csv</literal> (Comma Separated Values),
|
|
|
|
or <literal>binary</literal>.
|
|
|
|
The default is <literal>text</literal>.
|
|
|
|
See <xref linkend="sql-copy-file-formats"/> below for details.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>FREEZE</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Requests copying the data with rows already frozen, just as they
|
|
|
|
would be after running the <command>VACUUM FREEZE</command> command.
|
|
|
|
This is intended as a performance option for initial data loading.
|
|
|
|
Rows will be frozen only if the table being loaded has been created
|
|
|
|
or truncated in the current subtransaction, there are no cursors
|
|
|
|
open and there are no older snapshots held by this transaction. It is
|
|
|
|
currently not possible to perform a <command>COPY FREEZE</command> on
|
|
|
|
a partitioned table or foreign table.
|
|
|
|
This option is only allowed in <command>COPY FROM</command>.
|
|
|
|
</para>
|
|
|
|
<para>
|
|
|
|
Note that all other sessions will immediately be able to see the data
|
|
|
|
once it has been successfully loaded. This violates the normal rules
|
|
|
|
of MVCC visibility and users should be aware of the
|
|
|
|
potential problems this might cause.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>DELIMITER</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies the character that separates columns within each row
|
|
|
|
(line) of the file. The default is a tab character in text format,
|
|
|
|
a comma in <literal>CSV</literal> format.
|
|
|
|
This must be a single one-byte character.
|
|
|
|
This option is not allowed when using <literal>binary</literal> format.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>NULL</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies the string that represents a null value. The default is
|
|
|
|
<literal>\N</literal> (backslash-N) in text format, and an unquoted empty
|
|
|
|
string in <literal>CSV</literal> format. You might prefer an
|
|
|
|
empty string even in text format for cases where you don't want to
|
|
|
|
distinguish nulls from empty strings.
|
|
|
|
This option is not allowed when using <literal>binary</literal> format.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<note>
|
|
|
|
<para>
|
|
|
|
When using <command>COPY FROM</command>, any data item that matches
|
|
|
|
this string will be stored as a null value, so you should make
|
|
|
|
sure that you use the same string as you used with
|
|
|
|
<command>COPY TO</command>.
|
|
|
|
</para>
|
|
|
|
</note>
|
|
|
|
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>DEFAULT</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies the string that represents a default value. Each time the string
|
|
|
|
is found in the input file, the default value of the corresponding column
|
|
|
|
will be used.
|
|
|
|
This option is allowed only in <command>COPY FROM</command>, and only when
|
|
|
|
not using <literal>binary</literal> format.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>HEADER</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies that the file contains a header line with the names of each
|
|
|
|
column in the file. On output, the first line contains the column
|
|
|
|
names from the table. On input, the first line is discarded when this
|
|
|
|
option is set to <literal>true</literal> (or equivalent Boolean value).
|
|
|
|
If this option is set to <literal>MATCH</literal>, the number and names
|
|
|
|
of the columns in the header line must match the actual column names of
|
|
|
|
the table, in order; otherwise an error is raised.
|
|
|
|
This option is not allowed when using <literal>binary</literal> format.
|
|
|
|
The <literal>MATCH</literal> option is only valid for <command>COPY
|
|
|
|
FROM</command> commands.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>QUOTE</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies the quoting character to be used when a data value is quoted.
|
|
|
|
The default is double-quote.
|
|
|
|
This must be a single one-byte character.
|
|
|
|
This option is allowed only when using <literal>CSV</literal> format.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>ESCAPE</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies the character that should appear before a
|
|
|
|
data character that matches the <literal>QUOTE</literal> value.
|
|
|
|
The default is the same as the <literal>QUOTE</literal> value (so that
|
|
|
|
the quoting character is doubled if it appears in the data).
|
|
|
|
This must be a single one-byte character.
|
|
|
|
This option is allowed only when using <literal>CSV</literal> format.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>FORCE_QUOTE</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Forces quoting to be
|
|
|
|
used for all non-<literal>NULL</literal> values in each specified column.
|
|
|
|
<literal>NULL</literal> output is never quoted. If <literal>*</literal> is specified,
|
|
|
|
non-<literal>NULL</literal> values will be quoted in all columns.
|
|
|
|
This option is allowed only in <command>COPY TO</command>, and only when
|
|
|
|
using <literal>CSV</literal> format.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>FORCE_NOT_NULL</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Do not match the specified columns' values against the null string.
|
|
|
|
In the default case where the null string is empty, this means that
|
|
|
|
empty values will be read as zero-length strings rather than nulls,
|
|
|
|
even when they are not quoted.
|
|
|
|
If <literal>*</literal> is specified, the option will be applied to all columns.
|
|
|
|
This option is allowed only in <command>COPY FROM</command>, and only when
|
|
|
|
using <literal>CSV</literal> format.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>FORCE_NULL</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Match the specified columns' values against the null string, even
|
|
|
|
if it has been quoted, and if a match is found set the value to
|
|
|
|
<literal>NULL</literal>. In the default case where the null string is empty,
|
|
|
|
this converts a quoted empty string into NULL.
|
|
|
|
If <literal>*</literal> is specified, the option will be applied to all columns.
|
|
|
|
This option is allowed only in <command>COPY FROM</command>, and only when
|
|
|
|
using <literal>CSV</literal> format.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2 years ago
|
|
|
<varlistentry>
|
|
|
|
<term><literal>ON_ERROR</literal></term>
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2 years ago
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies how to behave when encountering an error converting a column's
|
|
|
|
input value into its data type.
|
|
|
|
An <replaceable class="parameter">error_action</replaceable> value of
|
|
|
|
<literal>stop</literal> means fail the command, while
|
|
|
|
<literal>ignore</literal> means discard the input row and continue with the next one.
|
|
|
|
The default is <literal>stop</literal>.
|
|
|
|
</para>
|
|
|
|
<para>
|
|
|
|
The <literal>ignore</literal> option is applicable only for <command>COPY FROM</command>
|
|
|
|
when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
|
|
|
|
</para>
|
|
|
|
<para>
|
|
|
|
A <literal>NOTICE</literal> message containing the ignored row count is
|
|
|
|
emitted at the end of the <command>COPY FROM</command> if at least one
|
|
|
|
row was discarded. When <literal>LOG_VERBOSITY</literal> option is set to
|
|
|
|
<literal>verbose</literal>, a <literal>NOTICE</literal> message
|
|
|
|
containing the line of the input file and the column name whose input
|
|
|
|
conversion has failed is emitted for each discarded row.
|
|
|
|
When it is set to <literal>silent</literal>, no message is emitted
|
|
|
|
regarding ignored rows.
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2 years ago
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>REJECT_LIMIT</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies the maximum number of errors tolerated while converting a
|
|
|
|
column's input value to its data type, when <literal>ON_ERROR</literal> is
|
|
|
|
set to <literal>ignore</literal>.
|
|
|
|
If the input causes more errors than the specified value, the <command>COPY</command>
|
|
|
|
command fails, even with <literal>ON_ERROR</literal> set to <literal>ignore</literal>.
|
|
|
|
This clause must be used with <literal>ON_ERROR</literal>=<literal>ignore</literal>
|
|
|
|
and <replaceable class="parameter">maxerror</replaceable> must be positive <type>bigint</type>.
|
|
|
|
If not specified, <literal>ON_ERROR</literal>=<literal>ignore</literal>
|
|
|
|
allows an unlimited number of errors, meaning <command>COPY</command> will
|
|
|
|
skip all erroneous data.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>ENCODING</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies that the file is encoded in the <replaceable
|
|
|
|
class="parameter">encoding_name</replaceable>. If this option is
|
|
|
|
omitted, the current client encoding is used. See the Notes below
|
|
|
|
for more details.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>LOG_VERBOSITY</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Specifies the amount of messages emitted by a <command>COPY</command>
|
|
|
|
command: <literal>default</literal>, <literal>verbose</literal>, or
|
|
|
|
<literal>silent</literal>.
|
|
|
|
If <literal>verbose</literal> is specified, additional messages are
|
|
|
|
emitted during processing.
|
|
|
|
<literal>silent</literal> suppresses both verbose and default messages.
|
|
|
|
</para>
|
|
|
|
<para>
|
|
|
|
This is currently used in <command>COPY FROM</command> command when
|
|
|
|
<literal>ON_ERROR</literal> option is set to <literal>ignore</literal>.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>WHERE</literal></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The optional <literal>WHERE</literal> clause has the general form
|
|
|
|
<synopsis>
|
|
|
|
WHERE <replaceable class="parameter">condition</replaceable>
|
|
|
|
</synopsis>
|
|
|
|
where <replaceable class="parameter">condition</replaceable> is
|
|
|
|
any expression that evaluates to a result of type
|
|
|
|
<type>boolean</type>. Any row that does not satisfy this
|
|
|
|
condition will not be inserted to the table. A row satisfies the
|
|
|
|
condition if it returns true when the actual row values are
|
|
|
|
substituted for any variable references.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Currently, subqueries are not allowed in <literal>WHERE</literal>
|
|
|
|
expressions, and the evaluation does not see any changes made by the
|
|
|
|
<command>COPY</command> itself (this matters when the expression
|
|
|
|
contains calls to <literal>VOLATILE</literal> functions).
|
|
|
|
</para>
|
|
|
|
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
</variablelist>
|
|
|
|
</refsect1>
|
|
|
|
|
|
|
|
<refsect1>
|
|
|
|
<title>Outputs</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
On successful completion, a <command>COPY</command> command returns a command
|
|
|
|
tag of the form
|
|
|
|
<screen>
|
|
|
|
COPY <replaceable class="parameter">count</replaceable>
|
|
|
|
</screen>
|
|
|
|
The <replaceable class="parameter">count</replaceable> is the number
|
|
|
|
of rows copied.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<note>
|
|
|
|
<para>
|
|
|
|
<application>psql</application> will print this command tag only if the command
|
|
|
|
was not <literal>COPY ... TO STDOUT</literal>, or the
|
|
|
|
equivalent <application>psql</application> meta-command
|
|
|
|
<literal>\copy ... to stdout</literal>. This is to prevent confusing the
|
|
|
|
command tag with the data that was just printed.
|
|
|
|
</para>
|
|
|
|
</note>
|
|
|
|
</refsect1>
|
|
|
|
|
|
|
|
<refsect1>
|
|
|
|
<title>Notes</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
<command>COPY TO</command> can be used with plain
|
|
|
|
tables and populated materialized views.
|
|
|
|
For example,
|
|
|
|
<literal>COPY <replaceable class="parameter">table</replaceable>
|
|
|
|
TO</literal> copies the same rows as
|
|
|
|
<literal>SELECT * FROM ONLY <replaceable class="parameter">table</replaceable></literal>.
|
|
|
|
However it doesn't directly support other relation types,
|
|
|
|
such as partitioned tables, inheritance child tables, or views.
|
|
|
|
To copy all rows from such relations, use <literal>COPY (SELECT * FROM
|
|
|
|
<replaceable class="parameter">table</replaceable>) TO</literal>.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
<command>COPY FROM</command> can be used with plain, foreign, or
|
|
|
|
partitioned tables or with views that have
|
|
|
|
<literal>INSTEAD OF INSERT</literal> triggers.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
You must have select privilege on the table
|
|
|
|
whose values are read by <command>COPY TO</command>, and
|
|
|
|
insert privilege on the table into which values
|
|
|
|
are inserted by <command>COPY FROM</command>. It is sufficient
|
|
|
|
to have column privileges on the column(s) listed in the command.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
If row-level security is enabled for the table, the relevant
|
|
|
|
<command>SELECT</command> policies will apply to <literal>COPY
|
|
|
|
<replaceable class="parameter">table</replaceable> TO</literal> statements.
|
|
|
|
Currently, <command>COPY FROM</command> is not supported for tables
|
|
|
|
with row-level security. Use equivalent <command>INSERT</command>
|
|
|
|
statements instead.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Files named in a <command>COPY</command> command are read or written
|
|
|
|
directly by the server, not by the client application. Therefore,
|
|
|
|
they must reside on or be accessible to the database server machine,
|
|
|
|
not the client. They must be accessible to and readable or writable
|
|
|
|
by the <productname>PostgreSQL</productname> user (the user ID the
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
13 years ago
|
|
|
server runs as), not the client. Similarly,
|
|
|
|
the command specified with <literal>PROGRAM</literal> is executed directly
|
|
|
|
by the server, not by the client application, must be executable by the
|
|
|
|
<productname>PostgreSQL</productname> user.
|
|
|
|
<command>COPY</command> naming a file or command is only allowed to
|
|
|
|
database superusers or users who are granted one of the roles
|
|
|
|
<literal>pg_read_server_files</literal>,
|
|
|
|
<literal>pg_write_server_files</literal>,
|
|
|
|
or <literal>pg_execute_server_program</literal>, since it allows reading
|
|
|
|
or writing any file or running a program that the server has privileges to
|
|
|
|
access.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Do not confuse <command>COPY</command> with the
|
|
|
|
<application>psql</application> instruction
|
|
|
|
<command><link linkend="app-psql-meta-commands-copy">\copy</link></command>. <command>\copy</command> invokes
|
|
|
|
<command>COPY FROM STDIN</command> or <command>COPY TO
|
|
|
|
STDOUT</command>, and then fetches/stores the data in a file
|
|
|
|
accessible to the <application>psql</application> client. Thus,
|
|
|
|
file accessibility and access rights depend on the client rather
|
|
|
|
than the server when <command>\copy</command> is used.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
It is recommended that the file name used in <command>COPY</command>
|
|
|
|
always be specified as an absolute path. This is enforced by the
|
|
|
|
server in the case of <command>COPY TO</command>, but for
|
|
|
|
<command>COPY FROM</command> you do have the option of reading from
|
|
|
|
a file specified by a relative path. The path will be interpreted
|
|
|
|
relative to the working directory of the server process (normally
|
|
|
|
the cluster's data directory), not the client's working directory.
|
|
|
|
</para>
|
|
|
|
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
13 years ago
|
|
|
<para>
|
|
|
|
Executing a command with <literal>PROGRAM</literal> might be restricted
|
|
|
|
by the operating system's access control mechanisms, such as SELinux.
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
13 years ago
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
<command>COPY FROM</command> will invoke any triggers and check
|
|
|
|
constraints on the destination table. However, it will not invoke rules.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
For identity columns, the <command>COPY FROM</command> command will always
|
|
|
|
write the column values provided in the input data, like
|
|
|
|
the <command>INSERT</command> option <literal>OVERRIDING SYSTEM
|
|
|
|
VALUE</literal>.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
<command>COPY</command> input and output is affected by
|
|
|
|
<varname>DateStyle</varname>. To ensure portability to other
|
|
|
|
<productname>PostgreSQL</productname> installations that might use
|
|
|
|
non-default <varname>DateStyle</varname> settings,
|
|
|
|
<varname>DateStyle</varname> should be set to <literal>ISO</literal> before
|
|
|
|
using <command>COPY TO</command>. It is also a good idea to avoid dumping
|
|
|
|
data with <varname>IntervalStyle</varname> set to
|
|
|
|
<literal>sql_standard</literal>, because negative interval values might be
|
|
|
|
misinterpreted by a server that has a different setting for
|
|
|
|
<varname>IntervalStyle</varname>.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Input data is interpreted according to <literal>ENCODING</literal>
|
|
|
|
option or the current client encoding, and output data is encoded
|
|
|
|
in <literal>ENCODING</literal> or the current client encoding, even
|
|
|
|
if the data does not pass through the client but is read from or
|
|
|
|
written to a file directly by the server.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The <command>COPY FROM</command> command physically inserts input rows
|
|
|
|
into the table as it progresses. If the command fails, these rows are
|
|
|
|
left in a deleted state; these rows will not be visible, but still
|
|
|
|
occupy disk space. This might amount to considerable
|
|
|
|
wasted disk space if the failure happened well into a large copy
|
|
|
|
operation. <command>VACUUM</command> should be used to recover the
|
|
|
|
wasted space.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
<literal>FORCE_NULL</literal> and <literal>FORCE_NOT_NULL</literal> can be used
|
|
|
|
simultaneously on the same column. This results in converting quoted
|
|
|
|
null strings to null values and unquoted null strings to empty strings.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
</refsect1>
|
|
|
|
|
|
|
|
<refsect1 id="sql-copy-file-formats" xreflabel="File Formats">
|
|
|
|
<title>File Formats</title>
|
|
|
|
|
|
|
|
<refsect2 id="sql-copy-text-format" xreflabel="Text Format">
|
|
|
|
<title>Text Format</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
When the <literal>text</literal> format is used,
|
|
|
|
the data read or written is a text file with one line per table row.
|
|
|
|
Columns in a row are separated by the delimiter character.
|
|
|
|
The column values themselves are strings generated by the
|
|
|
|
output function, or acceptable to the input function, of each
|
|
|
|
attribute's data type. The specified null string is used in
|
|
|
|
place of columns that are null.
|
|
|
|
<command>COPY FROM</command> will raise an error if any line of the
|
|
|
|
input file contains more or fewer columns than are expected.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
Do not treat \. as an EOF marker in CSV mode for COPY IN.
Since backslash is (typically) not special in CSV data, we should
not be treating \. as special either. The server historically did
this to keep CSV and TEXT modes more alike and to support V2 protocol;
but V2 protocol is long dead, and the inconsistency with CSV standards
is annoying. Remove that behavior in CopyReadLineText, and make some
minor consequent code simplifications.
On the client side, we need to fix psql so that it does not check
for \. except when reading data from STDIN (that is, the script
source). We must do that regardless of TEXT/CSV mode or there is
no way to end the COPY short of script EOF. Also, be careful
not to send the \. to the server in that case.
This is a small compatibility break in that other applications
beside psql may need similar adjustment. Also, using an older
version of psql with a v18 server may result in misbehavior
during CSV-mode COPY IN.
Daniel Vérité, reviewed by vignesh C, Robert Haas, and myself
Discussion: https://postgr.es/m/ed659f37-a9dd-42a7-82b9-0da562cc4006@manitou-mail.org
12 months ago
|
|
|
End of data can be represented by a line containing just
|
|
|
|
backslash-period (<literal>\.</literal>). An end-of-data marker is
|
|
|
|
not necessary when reading from a file, since the end of file
|
Do not treat \. as an EOF marker in CSV mode for COPY IN.
Since backslash is (typically) not special in CSV data, we should
not be treating \. as special either. The server historically did
this to keep CSV and TEXT modes more alike and to support V2 protocol;
but V2 protocol is long dead, and the inconsistency with CSV standards
is annoying. Remove that behavior in CopyReadLineText, and make some
minor consequent code simplifications.
On the client side, we need to fix psql so that it does not check
for \. except when reading data from STDIN (that is, the script
source). We must do that regardless of TEXT/CSV mode or there is
no way to end the COPY short of script EOF. Also, be careful
not to send the \. to the server in that case.
This is a small compatibility break in that other applications
beside psql may need similar adjustment. Also, using an older
version of psql with a v18 server may result in misbehavior
during CSV-mode COPY IN.
Daniel Vérité, reviewed by vignesh C, Robert Haas, and myself
Discussion: https://postgr.es/m/ed659f37-a9dd-42a7-82b9-0da562cc4006@manitou-mail.org
12 months ago
|
|
|
serves perfectly well; in that context this provision exists only for
|
|
|
|
backward compatibility. However, <application>psql</application>
|
|
|
|
uses <literal>\.</literal> to terminate a <literal>COPY FROM
|
|
|
|
STDIN</literal> operation (that is, reading
|
|
|
|
in-line <command>COPY</command> data in an SQL script). In that
|
Do not treat \. as an EOF marker in CSV mode for COPY IN.
Since backslash is (typically) not special in CSV data, we should
not be treating \. as special either. The server historically did
this to keep CSV and TEXT modes more alike and to support V2 protocol;
but V2 protocol is long dead, and the inconsistency with CSV standards
is annoying. Remove that behavior in CopyReadLineText, and make some
minor consequent code simplifications.
On the client side, we need to fix psql so that it does not check
for \. except when reading data from STDIN (that is, the script
source). We must do that regardless of TEXT/CSV mode or there is
no way to end the COPY short of script EOF. Also, be careful
not to send the \. to the server in that case.
This is a small compatibility break in that other applications
beside psql may need similar adjustment. Also, using an older
version of psql with a v18 server may result in misbehavior
during CSV-mode COPY IN.
Daniel Vérité, reviewed by vignesh C, Robert Haas, and myself
Discussion: https://postgr.es/m/ed659f37-a9dd-42a7-82b9-0da562cc4006@manitou-mail.org
12 months ago
|
|
|
context the rule is needed to be able to end the operation before the
|
|
|
|
end of the script.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Backslash characters (<literal>\</literal>) can be used in the
|
|
|
|
<command>COPY</command> data to quote data characters that might
|
|
|
|
otherwise be taken as row or column delimiters. In particular, the
|
|
|
|
following characters <emphasis>must</emphasis> be preceded by a backslash if
|
|
|
|
they appear as part of a column value: backslash itself,
|
|
|
|
newline, carriage return, and the current delimiter character.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The specified null string is sent by <command>COPY TO</command> without
|
|
|
|
adding any backslashes; conversely, <command>COPY FROM</command> matches
|
|
|
|
the input against the null string before removing backslashes. Therefore,
|
|
|
|
a null string such as <literal>\N</literal> cannot be confused with
|
|
|
|
the actual data value <literal>\N</literal> (which would be represented
|
|
|
|
as <literal>\\N</literal>).
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The following special backslash sequences are recognized by
|
|
|
|
<command>COPY FROM</command>:
|
|
|
|
|
|
|
|
<informaltable>
|
|
|
|
<tgroup cols="2">
|
|
|
|
<thead>
|
|
|
|
<row>
|
|
|
|
<entry>Sequence</entry>
|
|
|
|
<entry>Represents</entry>
|
|
|
|
</row>
|
|
|
|
</thead>
|
|
|
|
|
|
|
|
<tbody>
|
|
|
|
<row>
|
|
|
|
<entry><literal>\b</literal></entry>
|
|
|
|
<entry>Backspace (ASCII 8)</entry>
|
|
|
|
</row>
|
|
|
|
<row>
|
|
|
|
<entry><literal>\f</literal></entry>
|
|
|
|
<entry>Form feed (ASCII 12)</entry>
|
|
|
|
</row>
|
|
|
|
<row>
|
|
|
|
<entry><literal>\n</literal></entry>
|
|
|
|
<entry>Newline (ASCII 10)</entry>
|
|
|
|
</row>
|
|
|
|
<row>
|
|
|
|
<entry><literal>\r</literal></entry>
|
|
|
|
<entry>Carriage return (ASCII 13)</entry>
|
|
|
|
</row>
|
|
|
|
<row>
|
|
|
|
<entry><literal>\t</literal></entry>
|
|
|
|
<entry>Tab (ASCII 9)</entry>
|
|
|
|
</row>
|
|
|
|
<row>
|
|
|
|
<entry><literal>\v</literal></entry>
|
|
|
|
<entry>Vertical tab (ASCII 11)</entry>
|
|
|
|
</row>
|
|
|
|
<row>
|
|
|
|
<entry><literal>\</literal><replaceable>digits</replaceable></entry>
|
|
|
|
<entry>Backslash followed by one to three octal digits specifies
|
|
|
|
the byte with that numeric code</entry>
|
|
|
|
</row>
|
|
|
|
<row>
|
|
|
|
<entry><literal>\x</literal><replaceable>digits</replaceable></entry>
|
|
|
|
<entry>Backslash <literal>x</literal> followed by one or two hex digits specifies
|
|
|
|
the byte with that numeric code</entry>
|
|
|
|
</row>
|
|
|
|
</tbody>
|
|
|
|
</tgroup>
|
|
|
|
</informaltable>
|
|
|
|
|
|
|
|
Presently, <command>COPY TO</command> will never emit an octal or
|
|
|
|
hex-digits backslash sequence, but it does use the other sequences
|
|
|
|
listed above for those control characters.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Any other backslashed character that is not mentioned in the above table
|
|
|
|
will be taken to represent itself. However, beware of adding backslashes
|
|
|
|
unnecessarily, since that might accidentally produce a string matching the
|
|
|
|
end-of-data marker (<literal>\.</literal>) or the null string (<literal>\N</literal> by
|
|
|
|
default). These strings will be recognized before any other backslash
|
|
|
|
processing is done.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
It is strongly recommended that applications generating <command>COPY</command> data convert
|
|
|
|
data newlines and carriage returns to the <literal>\n</literal> and
|
|
|
|
<literal>\r</literal> sequences respectively. At present it is
|
|
|
|
possible to represent a data carriage return by a backslash and carriage
|
|
|
|
return, and to represent a data newline by a backslash and newline.
|
|
|
|
However, these representations might not be accepted in future releases.
|
|
|
|
They are also highly vulnerable to corruption if the <command>COPY</command> file is
|
|
|
|
transferred across different machines (for example, from Unix to Windows
|
|
|
|
or vice versa).
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
All backslash sequences are interpreted after encoding conversion.
|
|
|
|
The bytes specified with the octal and hex-digit backslash sequences must
|
|
|
|
form valid characters in the database encoding.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
<command>COPY TO</command> will terminate each row with a Unix-style
|
|
|
|
newline (<quote><literal>\n</literal></quote>). Servers running on Microsoft Windows instead
|
|
|
|
output carriage return/newline (<quote><literal>\r\n</literal></quote>), but only for
|
|
|
|
<command>COPY</command> to a server file; for consistency across platforms,
|
|
|
|
<command>COPY TO STDOUT</command> always sends <quote><literal>\n</literal></quote>
|
|
|
|
regardless of server platform.
|
|
|
|
<command>COPY FROM</command> can handle lines ending with newlines,
|
|
|
|
carriage returns, or carriage return/newlines. To reduce the risk of
|
|
|
|
error due to un-backslashed newlines or carriage returns that were
|
|
|
|
meant as data, <command>COPY FROM</command> will complain if the line
|
|
|
|
endings in the input are not all alike.
|
|
|
|
</para>
|
|
|
|
</refsect2>
|
|
|
|
|
|
|
|
<refsect2 id="sql-copy-csv-format" xreflabel="CSV Format">
|
|
|
|
<title>CSV Format</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
This format option is used for importing and exporting the Comma-
|
|
|
|
Separated Value (<literal>CSV</literal>) file format used by many other
|
|
|
|
programs, such as spreadsheets. Instead of the escaping rules used by
|
|
|
|
<productname>PostgreSQL</productname>'s standard text format, it
|
|
|
|
produces and recognizes the common <literal>CSV</literal> escaping mechanism.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The values in each record are separated by the <literal>DELIMITER</literal>
|
|
|
|
character. If the value contains the delimiter character, the
|
|
|
|
<literal>QUOTE</literal> character, the <literal>NULL</literal> string, a carriage
|
|
|
|
return, or line feed character, then the whole value is prefixed and
|
|
|
|
suffixed by the <literal>QUOTE</literal> character, and any occurrence
|
|
|
|
within the value of a <literal>QUOTE</literal> character or the
|
|
|
|
<literal>ESCAPE</literal> character is preceded by the escape character.
|
|
|
|
You can also use <literal>FORCE_QUOTE</literal> to force quotes when outputting
|
|
|
|
non-<literal>NULL</literal> values in specific columns.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The <literal>CSV</literal> format has no standard way to distinguish a
|
|
|
|
<literal>NULL</literal> value from an empty string.
|
|
|
|
<productname>PostgreSQL</productname>'s <command>COPY</command> handles this by quoting.
|
|
|
|
A <literal>NULL</literal> is output as the <literal>NULL</literal> parameter string
|
|
|
|
and is not quoted, while a non-<literal>NULL</literal> value matching the
|
|
|
|
<literal>NULL</literal> parameter string is quoted. For example, with the
|
|
|
|
default settings, a <literal>NULL</literal> is written as an unquoted empty
|
|
|
|
string, while an empty string data value is written with double quotes
|
|
|
|
(<literal>""</literal>). Reading values follows similar rules. You can
|
|
|
|
use <literal>FORCE_NOT_NULL</literal> to prevent <literal>NULL</literal> input
|
|
|
|
comparisons for specific columns. You can also use
|
|
|
|
<literal>FORCE_NULL</literal> to convert quoted null string data values to
|
|
|
|
<literal>NULL</literal>.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Because backslash is not a special character in the <literal>CSV</literal>
|
Do not treat \. as an EOF marker in CSV mode for COPY IN.
Since backslash is (typically) not special in CSV data, we should
not be treating \. as special either. The server historically did
this to keep CSV and TEXT modes more alike and to support V2 protocol;
but V2 protocol is long dead, and the inconsistency with CSV standards
is annoying. Remove that behavior in CopyReadLineText, and make some
minor consequent code simplifications.
On the client side, we need to fix psql so that it does not check
for \. except when reading data from STDIN (that is, the script
source). We must do that regardless of TEXT/CSV mode or there is
no way to end the COPY short of script EOF. Also, be careful
not to send the \. to the server in that case.
This is a small compatibility break in that other applications
beside psql may need similar adjustment. Also, using an older
version of psql with a v18 server may result in misbehavior
during CSV-mode COPY IN.
Daniel Vérité, reviewed by vignesh C, Robert Haas, and myself
Discussion: https://postgr.es/m/ed659f37-a9dd-42a7-82b9-0da562cc4006@manitou-mail.org
12 months ago
|
|
|
format, the end-of-data marker used in text mode (<literal>\.</literal>)
|
|
|
|
is not normally treated as special when reading <literal>CSV</literal>
|
|
|
|
data. An exception is that <application>psql</application> will terminate
|
|
|
|
a <literal>COPY FROM STDIN</literal> operation (that is, reading
|
|
|
|
in-line <command>COPY</command> data in an SQL script) at a line containing
|
Do not treat \. as an EOF marker in CSV mode for COPY IN.
Since backslash is (typically) not special in CSV data, we should
not be treating \. as special either. The server historically did
this to keep CSV and TEXT modes more alike and to support V2 protocol;
but V2 protocol is long dead, and the inconsistency with CSV standards
is annoying. Remove that behavior in CopyReadLineText, and make some
minor consequent code simplifications.
On the client side, we need to fix psql so that it does not check
for \. except when reading data from STDIN (that is, the script
source). We must do that regardless of TEXT/CSV mode or there is
no way to end the COPY short of script EOF. Also, be careful
not to send the \. to the server in that case.
This is a small compatibility break in that other applications
beside psql may need similar adjustment. Also, using an older
version of psql with a v18 server may result in misbehavior
during CSV-mode COPY IN.
Daniel Vérité, reviewed by vignesh C, Robert Haas, and myself
Discussion: https://postgr.es/m/ed659f37-a9dd-42a7-82b9-0da562cc4006@manitou-mail.org
12 months ago
|
|
|
only <literal>\.</literal>, whether it is text or <literal>CSV</literal>
|
|
|
|
mode.
|
|
|
|
</para>
|
|
|
|
|
Do not treat \. as an EOF marker in CSV mode for COPY IN.
Since backslash is (typically) not special in CSV data, we should
not be treating \. as special either. The server historically did
this to keep CSV and TEXT modes more alike and to support V2 protocol;
but V2 protocol is long dead, and the inconsistency with CSV standards
is annoying. Remove that behavior in CopyReadLineText, and make some
minor consequent code simplifications.
On the client side, we need to fix psql so that it does not check
for \. except when reading data from STDIN (that is, the script
source). We must do that regardless of TEXT/CSV mode or there is
no way to end the COPY short of script EOF. Also, be careful
not to send the \. to the server in that case.
This is a small compatibility break in that other applications
beside psql may need similar adjustment. Also, using an older
version of psql with a v18 server may result in misbehavior
during CSV-mode COPY IN.
Daniel Vérité, reviewed by vignesh C, Robert Haas, and myself
Discussion: https://postgr.es/m/ed659f37-a9dd-42a7-82b9-0da562cc4006@manitou-mail.org
12 months ago
|
|
|
<note>
|
|
|
|
<para>
|
|
|
|
<productname>PostgreSQL</productname> versions before v18 always
|
|
|
|
recognized unquoted <literal>\.</literal> as an end-of-data marker,
|
|
|
|
even when reading from a separate file. For compatibility with older
|
|
|
|
versions, <command>COPY TO</command> will quote <literal>\.</literal>
|
|
|
|
when it's alone on a line, even though this is no longer necessary.
|
|
|
|
</para>
|
|
|
|
</note>
|
|
|
|
|
|
|
|
<note>
|
|
|
|
<para>
|
|
|
|
In <literal>CSV</literal> format, all characters are significant. A quoted value
|
|
|
|
surrounded by white space, or any characters other than
|
|
|
|
<literal>DELIMITER</literal>, will include those characters. This can cause
|
|
|
|
errors if you import data from a system that pads <literal>CSV</literal>
|
|
|
|
lines with white space out to some fixed width. If such a situation
|
|
|
|
arises you might need to preprocess the <literal>CSV</literal> file to remove
|
|
|
|
the trailing white space, before importing the data into
|
|
|
|
<productname>PostgreSQL</productname>.
|
|
|
|
</para>
|
|
|
|
</note>
|
|
|
|
|
|
|
|
<note>
|
|
|
|
<para>
|
|
|
|
<literal>CSV</literal> format will both recognize and produce <literal>CSV</literal> files with quoted
|
|
|
|
values containing embedded carriage returns and line feeds. Thus
|
|
|
|
the files are not strictly one line per table row like text-format
|
|
|
|
files.
|
|
|
|
</para>
|
|
|
|
</note>
|
|
|
|
|
|
|
|
<note>
|
|
|
|
<para>
|
|
|
|
Many programs produce strange and occasionally perverse <literal>CSV</literal> files,
|
|
|
|
so the file format is more a convention than a standard. Thus you
|
|
|
|
might encounter some files that cannot be imported using this
|
|
|
|
mechanism, and <command>COPY</command> might produce files that other
|
|
|
|
programs cannot process.
|
|
|
|
</para>
|
|
|
|
</note>
|
|
|
|
|
|
|
|
</refsect2>
|
|
|
|
|
|
|
|
<refsect2 id="sql-copy-binary-format" xreflabel="Binary Format">
|
|
|
|
<title>Binary Format</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The <literal>binary</literal> format option causes all data to be
|
|
|
|
stored/read as binary format rather than as text. It is
|
|
|
|
somewhat faster than the text and <literal>CSV</literal> formats,
|
|
|
|
but a binary-format file is less portable across machine architectures and
|
|
|
|
<productname>PostgreSQL</productname> versions.
|
|
|
|
Also, the binary format is very data type specific; for example
|
|
|
|
it will not work to output binary data from a <type>smallint</type> column
|
|
|
|
and read it into an <type>integer</type> column, even though that would work
|
|
|
|
fine in text format.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The <literal>binary</literal> file format consists
|
|
|
|
of a file header, zero or more tuples containing the row data, and
|
|
|
|
a file trailer. Headers and data are in network byte order.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<note>
|
|
|
|
<para>
|
|
|
|
<productname>PostgreSQL</productname> releases before 7.4 used a
|
|
|
|
different binary file format.
|
|
|
|
</para>
|
|
|
|
</note>
|
|
|
|
|
|
|
|
<refsect3>
|
|
|
|
<title>File Header</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The file header consists of 15 bytes of fixed fields, followed
|
|
|
|
by a variable-length header extension area. The fixed fields are:
|
|
|
|
|
|
|
|
<variablelist>
|
|
|
|
<varlistentry>
|
|
|
|
<term>Signature</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
11-byte sequence <literal>PGCOPY\n\377\r\n\0</literal> — note that the zero byte
|
|
|
|
is a required part of the signature. (The signature is designed to allow
|
|
|
|
easy identification of files that have been munged by a non-8-bit-clean
|
|
|
|
transfer. This signature will be changed by end-of-line-translation
|
|
|
|
filters, dropped zero bytes, dropped high bits, or parity changes.)
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>Flags field</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
32-bit integer bit mask to denote important aspects of the file format. Bits
|
|
|
|
are numbered from 0 (<acronym>LSB</acronym>) to 31 (<acronym>MSB</acronym>). Note that
|
|
|
|
this field is stored in network byte order (most significant byte first),
|
|
|
|
as are all the integer fields used in the file format. Bits
|
|
|
|
16–31 are reserved to denote critical file format issues; a reader
|
|
|
|
should abort if it finds an unexpected bit set in this range. Bits 0–15
|
|
|
|
are reserved to signal backwards-compatible format issues; a reader
|
|
|
|
should simply ignore any unexpected bits set in this range. Currently
|
|
|
|
only one flag bit is defined, and the rest must be zero:
|
|
|
|
<variablelist>
|
|
|
|
<varlistentry>
|
|
|
|
<term>Bit 16</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
7 years ago
|
|
|
If 1, OIDs are included in the data; if 0, not. Oid system columns
|
|
|
|
are not supported in <productname>PostgreSQL</productname>
|
|
|
|
anymore, but the format still contains the indicator.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
</variablelist></para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>Header extension area length</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
32-bit integer, length in bytes of remainder of header, not including self.
|
|
|
|
Currently, this is zero, and the first tuple follows
|
|
|
|
immediately. Future changes to the format might allow additional data
|
|
|
|
to be present in the header. A reader should silently skip over any header
|
|
|
|
extension data it does not know what to do with.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
</variablelist>
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The header extension area is envisioned to contain a sequence of
|
|
|
|
self-identifying chunks. The flags field is not intended to tell readers
|
|
|
|
what is in the extension area. Specific design of header extension contents
|
|
|
|
is left for a later release.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
This design allows for both backwards-compatible header additions (add
|
|
|
|
header extension chunks, or set low-order flag bits) and
|
|
|
|
non-backwards-compatible changes (set high-order flag bits to signal such
|
|
|
|
changes, and add supporting data to the extension area if needed).
|
|
|
|
</para>
|
|
|
|
</refsect3>
|
|
|
|
|
|
|
|
<refsect3>
|
|
|
|
<title>Tuples</title>
|
|
|
|
<para>
|
|
|
|
Each tuple begins with a 16-bit integer count of the number of fields in the
|
|
|
|
tuple. (Presently, all tuples in a table will have the same count, but that
|
|
|
|
might not always be true.) Then, repeated for each field in the tuple, there
|
|
|
|
is a 32-bit length word followed by that many bytes of field data. (The
|
|
|
|
length word does not include itself, and can be zero.) As a special case,
|
|
|
|
-1 indicates a NULL field value. No value bytes follow in the NULL case.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
There is no alignment padding or any other extra data between fields.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Presently, all data values in a binary-format file are
|
|
|
|
assumed to be in binary format (format code one). It is anticipated that a
|
Update reference documentation on may/can/might:
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
19 years ago
|
|
|
future extension might add a header field that allows per-column format codes
|
|
|
|
to be specified.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
To determine the appropriate binary format for the actual tuple data you
|
|
|
|
should consult the <productname>PostgreSQL</productname> source, in
|
|
|
|
particular the <function>*send</function> and <function>*recv</function> functions for
|
|
|
|
each column's data type (typically these functions are found in the
|
|
|
|
<filename>src/backend/utils/adt/</filename> directory of the source
|
|
|
|
distribution).
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
If OIDs are included in the file, the OID field immediately follows the
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
7 years ago
|
|
|
field-count word. It is a normal field except that it's not included in the
|
|
|
|
field-count. Note that oid system columns are not supported in current
|
|
|
|
versions of <productname>PostgreSQL</productname>.
|
|
|
|
</para>
|
|
|
|
</refsect3>
|
|
|
|
|
|
|
|
<refsect3>
|
|
|
|
<title>File Trailer</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The file trailer consists of a 16-bit integer word containing -1. This
|
|
|
|
is easily distinguished from a tuple's field-count word.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
A reader should report an error if a field-count word is neither -1
|
|
|
|
nor the expected number of columns. This provides an extra
|
|
|
|
check against somehow getting out of sync with the data.
|
|
|
|
</para>
|
|
|
|
</refsect3>
|
|
|
|
</refsect2>
|
|
|
|
</refsect1>
|
|
|
|
|
|
|
|
<refsect1>
|
|
|
|
<title>Examples</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The following example copies a table to the client
|
|
|
|
using the vertical bar (<literal>|</literal>) as the field delimiter:
|
|
|
|
<programlisting>
|
|
|
|
COPY country TO STDOUT (DELIMITER '|');
|
|
|
|
</programlisting>
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
To copy data from a file into the <literal>country</literal> table:
|
|
|
|
<programlisting>
|
|
|
|
COPY country FROM '/usr1/proj/bray/sql/country_data';
|
|
|
|
</programlisting>
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
To copy into a file just the countries whose names start with 'A':
|
|
|
|
<programlisting>
|
|
|
|
COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/usr1/proj/bray/sql/a_list_countries.copy';
|
|
|
|
</programlisting>
|
|
|
|
</para>
|
|
|
|
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
13 years ago
|
|
|
<para>
|
|
|
|
To copy into a compressed file, you can pipe the output through an external
|
|
|
|
compression program:
|
|
|
|
<programlisting>
|
|
|
|
COPY country TO PROGRAM 'gzip > /usr1/proj/bray/sql/country_data.gz';
|
|
|
|
</programlisting>
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Here is a sample of data suitable for copying into a table from
|
|
|
|
<literal>STDIN</literal>:
|
|
|
|
<programlisting>
|
|
|
|
AF AFGHANISTAN
|
|
|
|
AL ALBANIA
|
|
|
|
DZ ALGERIA
|
|
|
|
ZM ZAMBIA
|
|
|
|
ZW ZIMBABWE
|
|
|
|
</programlisting>
|
|
|
|
Note that the white space on each line is actually a tab character.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The following is the same data, output in binary format.
|
|
|
|
The data is shown after filtering through the
|
|
|
|
Unix utility <command>od -c</command>. The table has three columns;
|
|
|
|
the first has type <type>char(2)</type>, the second has type <type>text</type>,
|
|
|
|
and the third has type <type>integer</type>. All the rows have a null value
|
|
|
|
in the third column.
|
|
|
|
<programlisting>
|
|
|
|
0000000 P G C O P Y \n 377 \r \n \0 \0 \0 \0 \0 \0
|
|
|
|
0000020 \0 \0 \0 \0 003 \0 \0 \0 002 A F \0 \0 \0 013 A
|
|
|
|
0000040 F G H A N I S T A N 377 377 377 377 \0 003
|
|
|
|
0000060 \0 \0 \0 002 A L \0 \0 \0 007 A L B A N I
|
|
|
|
0000100 A 377 377 377 377 \0 003 \0 \0 \0 002 D Z \0 \0 \0
|
|
|
|
0000120 007 A L G E R I A 377 377 377 377 \0 003 \0 \0
|
|
|
|
0000140 \0 002 Z M \0 \0 \0 006 Z A M B I A 377 377
|
|
|
|
0000160 377 377 \0 003 \0 \0 \0 002 Z W \0 \0 \0 \b Z I
|
|
|
|
0000200 M B A B W E 377 377 377 377 377 377
|
|
|
|
</programlisting></para>
|
|
|
|
</refsect1>
|
|
|
|
|
|
|
|
<refsect1>
|
|
|
|
<title>Compatibility</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
There is no <command>COPY</command> statement in the SQL standard.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The following syntax was used before <productname>PostgreSQL</productname>
|
|
|
|
version 9.0 and is still supported:
|
|
|
|
|
|
|
|
<synopsis>
|
|
|
|
COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
|
|
|
|
FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
|
|
|
|
[ [ WITH ]
|
|
|
|
[ BINARY ]
|
|
|
|
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter_character</replaceable>' ]
|
|
|
|
[ NULL [ AS ] '<replaceable class="parameter">null_string</replaceable>' ]
|
|
|
|
[ CSV [ HEADER ]
|
|
|
|
[ QUOTE [ AS ] '<replaceable class="parameter">quote_character</replaceable>' ]
|
|
|
|
[ ESCAPE [ AS ] '<replaceable class="parameter">escape_character</replaceable>' ]
|
|
|
|
[ FORCE NOT NULL <replaceable class="parameter">column_name</replaceable> [, ...] ] ] ]
|
|
|
|
|
|
|
|
COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
|
|
|
|
TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
|
|
|
|
[ [ WITH ]
|
|
|
|
[ BINARY ]
|
|
|
|
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter_character</replaceable>' ]
|
|
|
|
[ NULL [ AS ] '<replaceable class="parameter">null_string</replaceable>' ]
|
|
|
|
[ CSV [ HEADER ]
|
|
|
|
[ QUOTE [ AS ] '<replaceable class="parameter">quote_character</replaceable>' ]
|
|
|
|
[ ESCAPE [ AS ] '<replaceable class="parameter">escape_character</replaceable>' ]
|
|
|
|
[ FORCE QUOTE { <replaceable class="parameter">column_name</replaceable> [, ...] | * } ] ] ]
|
|
|
|
</synopsis>
|
|
|
|
|
|
|
|
Note that in this syntax, <literal>BINARY</literal> and <literal>CSV</literal> are
|
|
|
|
treated as independent keywords, not as arguments of a <literal>FORMAT</literal>
|
|
|
|
option.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The following syntax was used before <productname>PostgreSQL</productname>
|
|
|
|
version 7.3 and is still supported:
|
|
|
|
|
|
|
|
<synopsis>
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
7 years ago
|
|
|
COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable>
|
|
|
|
FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
|
|
|
|
[ [USING] DELIMITERS '<replaceable class="parameter">delimiter_character</replaceable>' ]
|
|
|
|
[ WITH NULL AS '<replaceable class="parameter">null_string</replaceable>' ]
|
|
|
|
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
7 years ago
|
|
|
COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable>
|
|
|
|
TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
|
|
|
|
[ [USING] DELIMITERS '<replaceable class="parameter">delimiter_character</replaceable>' ]
|
|
|
|
[ WITH NULL AS '<replaceable class="parameter">null_string</replaceable>' ]
|
|
|
|
</synopsis></para>
|
|
|
|
</refsect1>
|
|
|
|
|
|
|
|
<refsect1>
|
|
|
|
<title>See Also</title>
|
|
|
|
|
|
|
|
<simplelist type="inline">
|
|
|
|
<member><xref linkend="copy-progress-reporting"/></member>
|
|
|
|
</simplelist>
|
|
|
|
</refsect1>
|
|
|
|
</refentry>
|