Improve efficiency of attribute scanning in CopyReadAttributesCSV.

The loop is split into two parts, inside quotes, and outside quotes, saving some instructions in both parts.

Heikki Linnakangas
REL8_5_ALPHA1_BRANCH
Andrew Dunstan 18 years ago
parent 9c767ad57b
commit 95c238d941
  1. 114
      src/backend/commands/copy.c

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.295 2008/01/01 19:45:48 momjian Exp $
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.296 2008/03/08 01:16:26 adunstan Exp $
*
*-------------------------------------------------------------------------
*/
@ -2913,7 +2913,6 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
for (;;)
{
bool found_delim = false;
bool in_quote = false;
bool saw_quote = false;
char *start_ptr;
char *end_ptr;
@ -2929,72 +2928,87 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
start_ptr = cur_ptr;
fieldvals[fieldno] = output_ptr;
/* Scan data for field */
/* Scan data for field,
*
* The loop starts in "not quote" mode and then toggles between
* that and "in quote" mode.
* The loop exits normally if it is in "not quote" mode and a
* delimiter or line end is seen.
*/
for (;;)
{
char c;
end_ptr = cur_ptr;
if (cur_ptr >= line_end_ptr)
break;
c = *cur_ptr++;
/* unquoted field delimiter */
if (c == delimc && !in_quote)
{
found_delim = true;
break;
}
/* start of quoted field (or part of field) */
if (c == quotec && !in_quote)
/* Not in quote */
for (;;)
{
saw_quote = true;
in_quote = true;
continue;
end_ptr = cur_ptr;
if (cur_ptr >= line_end_ptr)
goto endfield;
c = *cur_ptr++;
/* unquoted field delimiter */
if (c == delimc)
{
found_delim = true;
goto endfield;
}
/* start of quoted field (or part of field) */
if (c == quotec)
{
saw_quote = true;
break;
}
/* Add c to output string */
*output_ptr++ = c;
}
/* escape within a quoted field */
if (c == escapec && in_quote)
/* In quote */
for (;;)
{
/*
* peek at the next char if available, and escape it if it is
* an escape char or a quote char
*/
if (cur_ptr < line_end_ptr)
{
char nextc = *cur_ptr;
end_ptr = cur_ptr;
if (cur_ptr >= line_end_ptr)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("unterminated CSV quoted field")));
c = *cur_ptr++;
if (nextc == escapec || nextc == quotec)
/* escape within a quoted field */
if (c == escapec)
{
/*
* peek at the next char if available, and escape it if it is
* an escape char or a quote char
*/
if (cur_ptr < line_end_ptr)
{
*output_ptr++ = nextc;
cur_ptr++;
continue;
char nextc = *cur_ptr;
if (nextc == escapec || nextc == quotec)
{
*output_ptr++ = nextc;
cur_ptr++;
continue;
}
}
}
}
/*
* end of quoted field. Must do this test after testing for escape
* in case quote char and escape char are the same (which is the
* common case).
*/
if (c == quotec)
break;
/*
* end of quoted field. Must do this test after testing for escape
* in case quote char and escape char are the same (which is the
* common case).
*/
if (c == quotec && in_quote)
{
in_quote = false;
continue;
/* Add c to output string */
*output_ptr++ = c;
}
/* Add c to output string */
*output_ptr++ = c;
}
endfield:
/* Terminate attribute value in output area */
*output_ptr++ = '\0';
/* Shouldn't still be in quote mode */
if (in_quote)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("unterminated CSV quoted field")));
/* Check whether raw input matched null marker */
input_len = end_ptr - start_ptr;
if (!saw_quote && input_len == cstate->null_print_len &&

Loading…
Cancel
Save