diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index b9413d48925..f493ddb371d 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -44,6 +44,7 @@ COPY { table_name [ ( column_name [, ...] ) | * } FORCE_NULL { ( column_name [, ...] ) | * } ON_ERROR error_action + REJECT_LIMIT maxerror ENCODING 'encoding_name' LOG_VERBOSITY verbosity @@ -413,6 +414,24 @@ COPY { table_name [ ( + + REJECT_LIMIT + + + Specifies the maximum number of errors tolerated while converting a + column's input value to its data type, when ON_ERROR is + set to ignore. + If the input causes more errors than the specified value, the COPY + command fails, even with ON_ERROR set to ignore. + This clause must be used with ON_ERROR=ignore + and maxerror must be positive bigint. + If not specified, ON_ERROR=ignore + allows an unlimited number of errors, meaning COPY will + skip all erroneous data. + + + + ENCODING diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 03eb7a4ebac..befab92074e 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -418,6 +418,23 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from) return COPY_ON_ERROR_STOP; /* keep compiler quiet */ } +/* + * Extract REJECT_LIMIT value from a DefElem. + */ +static int64 +defGetCopyRejectLimitOption(DefElem *def) +{ + int64 reject_limit = defGetInt64(def); + + if (reject_limit <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("REJECT_LIMIT (%lld) must be greater than zero", + (long long) reject_limit))); + + return reject_limit; +} + /* * Extract a CopyLogVerbosityChoice value from a DefElem. */ @@ -472,6 +489,7 @@ ProcessCopyOptions(ParseState *pstate, bool header_specified = false; bool on_error_specified = false; bool log_verbosity_specified = false; + bool reject_limit_specified = false; ListCell *option; /* Support external use for option sanity checking */ @@ -638,6 +656,13 @@ ProcessCopyOptions(ParseState *pstate, log_verbosity_specified = true; opts_out->log_verbosity = defGetCopyLogVerbosityChoice(defel, pstate); } + else if (strcmp(defel->defname, "reject_limit") == 0) + { + if (reject_limit_specified) + errorConflictingDefElem(defel, pstate); + reject_limit_specified = true; + opts_out->reject_limit = defGetCopyRejectLimitOption(defel); + } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -874,6 +899,14 @@ ProcessCopyOptions(ParseState *pstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("NULL specification and DEFAULT specification cannot be the same"))); } + /* Check on_error */ + if (opts_out->reject_limit && !opts_out->on_error) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /*- translator: first and second %s are the names of COPY option, e.g. + * ON_ERROR, third is the value of the COPY option, e.g. IGNORE */ + errmsg("COPY %s requires %s to be set to %s", + "REJECT_LIMIT", "ON_ERROR", "IGNORE"))); } /* diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 9139a407858..07cbd5d22b8 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1018,6 +1018,13 @@ CopyFrom(CopyFromState cstate) pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED, cstate->num_errors); + if (cstate->opts.reject_limit > 0 && \ + cstate->num_errors > cstate->opts.reject_limit) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility", + (long long) cstate->opts.reject_limit))); + /* Repeat NextCopyFrom() until no soft error occurs */ continue; } diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 6f64d97fdd9..4002a7f5382 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -85,6 +85,7 @@ typedef struct CopyFormatOptions bool convert_selectively; /* do selective binary conversion? */ CopyOnErrorChoice on_error; /* what to do when error happened */ CopyLogVerbosityChoice log_verbosity; /* verbosity of logged messages */ + int64 reject_limit; /* maximum tolerable number of errors */ List *convert_select; /* list of column names (can be NIL) */ } CopyFormatOptions; diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 4e752977b53..ab449fa7b80 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -116,6 +116,10 @@ COPY x to stdout (log_verbosity unsupported); ERROR: COPY LOG_VERBOSITY "unsupported" not recognized LINE 1: COPY x to stdout (log_verbosity unsupported); ^ +COPY x from stdin with (reject_limit 1); +ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE +COPY x from stdin with (on_error ignore, reject_limit 0); +ERROR: REJECT_LIMIT (0) must be greater than zero -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; ERROR: column "d" specified more than once @@ -791,6 +795,12 @@ CONTEXT: COPY check_ign_err, line 1: "1 {1}" COPY check_ign_err FROM STDIN WITH (on_error ignore); ERROR: extra data after last expected column CONTEXT: COPY check_ign_err, line 1: "1 {1} 3 abc" +-- tests for reject_limit option +COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 3); +ERROR: skipped more than REJECT_LIMIT (3) rows due to data type incompatibility +CONTEXT: COPY check_ign_err, line 5, column n: "" +COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 4); +NOTICE: 4 rows were skipped due to data type incompatibility -- clean up DROP TABLE forcetest; DROP TABLE vistest; diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index fa6aa17344a..1aa0e41b681 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -82,6 +82,8 @@ COPY x to stdout (format TEXT, force_null(a)); COPY x to stdin (format CSV, force_null(a)); COPY x to stdin (format BINARY, on_error unsupported); COPY x to stdout (log_verbosity unsupported); +COPY x from stdin with (reject_limit 1); +COPY x from stdin with (on_error ignore, reject_limit 0); -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; @@ -561,6 +563,25 @@ COPY check_ign_err FROM STDIN WITH (on_error ignore); 1 {1} 3 abc \. +-- tests for reject_limit option +COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 3); +6 {6} 6 +a {7} 7 +8 {8} 8888888888 +9 {a, 9} 9 + +10 {10} 10 +\. + +COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 4); +6 {6} 6 +a {7} 7 +8 {8} 8888888888 +9 {a, 9} 9 + +10 {10} 10 +\. + -- clean up DROP TABLE forcetest; DROP TABLE vistest;