@ -3,6 +3,50 @@
* copyfromparse . c
* Parse CSV / text / binary format for COPY FROM .
*
* This file contains routines to parse the text , CSV and binary input
* formats . The main entry point is NextCopyFrom ( ) , which parses the
* next input line and returns it as Datums .
*
* In text / CSV mode , the parsing happens in multiple stages :
*
* [ data source ] - - > raw_buf - - > input_buf - - > line_buf - - > attribute_buf
* 1. 2. 3. 4.
*
* 1. CopyLoadRawBuf ( ) reads raw data from the input file or client , and
* places it into ' raw_buf ' .
*
* 2. CopyConvertBuf ( ) calls the encoding conversion function to convert
* the data in ' raw_buf ' from client to server encoding , placing the
* converted result in ' input_buf ' .
*
* 3. CopyReadLine ( ) parses the data in ' input_buf ' , one line at a time .
* It is responsible for finding the next newline marker , taking quote and
* escape characters into account according to the COPY options . The line
* is copied into ' line_buf ' , with quotes and escape characters still
* intact .
*
* 4. CopyReadAttributesText / CSV ( ) function takes the input line from
* ' line_buf ' , and splits it into fields , unescaping the data as required .
* The fields are stored in ' attribute_buf ' , and ' raw_fields ' array holds
* pointers to each field .
*
* If encoding conversion is not required , a shortcut is taken in step 2 to
* avoid copying the data unnecessarily . The ' input_buf ' pointer is set to
* point directly to ' raw_buf ' , so that CopyLoadRawBuf ( ) loads the raw data
* directly into ' input_buf ' . CopyConvertBuf ( ) then merely validates that
* the data is valid in the current encoding .
*
* In binary mode , the pipeline is much simpler . Input is loaded into
* into ' raw_buf ' , and encoding conversion is done in the datatype - specific
* receive functions , if required . ' input_buf ' and ' line_buf ' are not used ,
* but ' attribute_buf ' is used as a temporary buffer to hold one attribute ' s
* data when it ' s passed the receive function .
*
* ' raw_buf ' is always 64 kB in size ( RAW_BUF_SIZE ) . ' input_buf ' is also
* 64 kB ( INPUT_BUF_SIZE ) , if encoding conversion is required . ' line_buf '
* and ' attribute_buf ' are expanded on demand , to hold the longest line
* encountered so far .
*
* Portions Copyright ( c ) 1996 - 2021 , PostgreSQL Global Development Group
* Portions Copyright ( c ) 1994 , Regents of the University of California
*
@ -35,7 +79,7 @@
# define OCTVALUE(c) ((c) - '0')
/*
* These macros centralize code used to process line_buf and raw _buf buffers .
* These macros centralize code used to process line_buf and input _buf buffers .
* They are macros because they often do continue / break control and to avoid
* function call overhead in tight COPY loops .
*
@ -53,9 +97,9 @@
# define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
if ( 1 ) \
{ \
if ( raw _buf_ptr + ( extralen ) > = copy_buf_len & & ! hit_eof ) \
if ( input _buf_ptr + ( extralen ) > = copy_buf_len & & ! hit_eof ) \
{ \
raw _buf_ptr = prev_raw_ptr ; /* undo fetch */ \
input _buf_ptr = prev_raw_ptr ; /* undo fetch */ \
need_data = true ; \
continue ; \
} \
@ -65,10 +109,10 @@ if (1) \
# define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
if ( 1 ) \
{ \
if ( raw _buf_ptr + ( extralen ) > = copy_buf_len & & hit_eof ) \
if ( input _buf_ptr + ( extralen ) > = copy_buf_len & & hit_eof ) \
{ \
if ( extralen ) \
raw _buf_ptr = copy_buf_len ; /* consume the partial character */ \
input _buf_ptr = copy_buf_len ; /* consume the partial character */ \
/* backslash just before EOF, treat as data char */ \
result = true ; \
break ; \
@ -77,17 +121,17 @@ if (1) \
/*
* Transfer any approved data to line_buf ; must do this to be sure
* there is some room in raw _buf.
* there is some room in input _buf.
*/
# define REFILL_LINEBUF \
if ( 1 ) \
{ \
if ( raw _buf_ptr > cstate - > raw _buf_index) \
if ( input _buf_ptr > cstate - > input _buf_index) \
{ \
appendBinaryStringInfo ( & cstate - > line_buf , \
cstate - > raw _buf + cstate - > raw _buf_index, \
raw _buf_ptr - cstate - > raw _buf_index) ; \
cstate - > raw_buf_index = raw _buf_ptr; \
cstate - > input _buf + cstate - > input _buf_index, \
input _buf_ptr - cstate - > input _buf_index) ; \
cstate - > input_buf_index = input _buf_ptr; \
} \
} else ( ( void ) 0 )
@ -95,7 +139,7 @@ if (1) \
# define NO_END_OF_COPY_GOTO \
if ( 1 ) \
{ \
raw _buf_ptr = prev_raw_ptr + 1 ; \
input _buf_ptr = prev_raw_ptr + 1 ; \
goto not_end_of_copy ; \
} else ( ( void ) 0 )
@ -118,7 +162,7 @@ static int CopyGetData(CopyFromState cstate, void *databuf,
int minread , int maxread ) ;
static inline bool CopyGetInt32 ( CopyFromState cstate , int32 * val ) ;
static inline bool CopyGetInt16 ( CopyFromState cstate , int16 * val ) ;
static bool CopyLoadRaw Buf( CopyFromState cstate ) ;
static void CopyLoadInput Buf( CopyFromState cstate ) ;
static int CopyReadBinaryData ( CopyFromState cstate , char * dest , int nbytes ) ;
void
@ -210,10 +254,10 @@ CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
( errcode_for_file_access ( ) ,
errmsg ( " could not read from COPY file: %m " ) ) ) ;
if ( bytesread = = 0 )
cstate - > reached_eof = true ;
cstate - > raw_r eached_eof = true ;
break ;
case COPY_FRONTEND :
while ( maxread > 0 & & bytesread < minread & & ! cstate - > reached_eof )
while ( maxread > 0 & & bytesread < minread & & ! cstate - > raw_r eached_eof )
{
int avail ;
@ -241,7 +285,7 @@ CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
break ;
case ' c ' : /* CopyDone */
/* COPY IN correctly terminated by frontend */
cstate - > reached_eof = true ;
cstate - > raw_r eached_eof = true ;
return bytesread ;
case ' f ' : /* CopyFail */
ereport ( ERROR ,
@ -327,34 +371,303 @@ CopyGetInt16(CopyFromState cstate, int16 *val)
/*
* CopyLoadRawBuf loads some more data into raw_buf
* Perform encoding conversion on data in ' raw_buf ' , writing the converted
* data into ' input_buf ' .
*
* Returns true if able to obtain at least one more byte , else false .
* On entry , there must be some data to convert in ' raw_buf ' .
*/
static void
CopyConvertBuf ( CopyFromState cstate )
{
/*
* If the file and server encoding are the same , no encoding conversion is
* required . However , we still need to verify that the input is valid for
* the encoding .
*/
if ( ! cstate - > need_transcoding )
{
/*
* When conversion is not required , input_buf and raw_buf are the
* same . raw_buf_len is the total number of bytes in the buffer , and
* input_buf_len tracks how many of those bytes have already been
* verified .
*/
int preverifiedlen = cstate - > input_buf_len ;
int unverifiedlen = cstate - > raw_buf_len - cstate - > input_buf_len ;
int nverified ;
if ( unverifiedlen = = 0 )
{
/*
* If no more raw data is coming , report the EOF to the caller .
*/
if ( cstate - > raw_reached_eof )
cstate - > input_reached_eof = true ;
return ;
}
/*
* Verify the new data , including any residual unverified bytes from
* previous round .
*/
nverified = pg_encoding_verifymbstr ( cstate - > file_encoding ,
cstate - > raw_buf + preverifiedlen ,
unverifiedlen ) ;
if ( nverified = = 0 )
{
/*
* Could not verify anything .
*
* If RAW_BUF_BYTES ( cstate ) > 0 , the unprocessed bytes are moved to the start
* of the buffer and then we load more data after that . This case occurs only
* when a multibyte character crosses a bufferload boundary .
* If there is no more raw input data coming , it means that there
* was an incomplete multi - byte sequence at the end . Also , if
* there ' s " enough " input left , we should be able to verify at
* least one character , and a failure to do so means that we ' ve
* hit an invalid byte sequence .
*/
static bool
if ( cstate - > raw_reached_eof | | unverifiedlen > = pg_database_encoding_max_length ( ) )
cstate - > input_reached_error = true ;
return ;
}
cstate - > input_buf_len + = nverified ;
}
else
{
/*
* Encoding conversion is needed .
*/
int nbytes ;
unsigned char * src ;
int srclen ;
unsigned char * dst ;
int dstlen ;
int convertedlen ;
if ( RAW_BUF_BYTES ( cstate ) = = 0 )
{
/*
* If no more raw data is coming , report the EOF to the caller .
*/
if ( cstate - > raw_reached_eof )
cstate - > input_reached_eof = true ;
return ;
}
/*
* First , copy down any unprocessed data .
*/
nbytes = INPUT_BUF_BYTES ( cstate ) ;
if ( nbytes > 0 & & cstate - > input_buf_index > 0 )
memmove ( cstate - > input_buf , cstate - > input_buf + cstate - > input_buf_index ,
nbytes ) ;
cstate - > input_buf_index = 0 ;
cstate - > input_buf_len = nbytes ;
cstate - > input_buf [ nbytes ] = ' \0 ' ;
src = ( unsigned char * ) cstate - > raw_buf + cstate - > raw_buf_index ;
srclen = cstate - > raw_buf_len - cstate - > raw_buf_index ;
dst = ( unsigned char * ) cstate - > input_buf + cstate - > input_buf_len ;
dstlen = INPUT_BUF_SIZE - cstate - > input_buf_len + 1 ;
/*
* Do the conversion . This might stop short , if there is an invalid
* byte sequence in the input . We ' ll convert as much as we can in
* that case .
*
* Note : Even if we hit an invalid byte sequence , we don ' t report the
* error until all the valid bytes have been consumed . The input
* might contain an end - of - input marker ( \ . ) , and we don ' t want to
* report an error if the invalid byte sequence is after the
* end - of - input marker . We might unnecessarily convert some data
* after the end - of - input marker as long as it ' s valid for the
* encoding , but that ' s harmless .
*/
convertedlen = pg_do_encoding_conversion_buf ( cstate - > conversion_proc ,
cstate - > file_encoding ,
GetDatabaseEncoding ( ) ,
src , srclen ,
dst , dstlen ,
true ) ;
if ( convertedlen = = 0 )
{
/*
* Could not convert anything . If there is no more raw input data
* coming , it means that there was an incomplete multi - byte
* sequence at the end . Also , if there is plenty of input left ,
* we should be able to convert at least one character , so a
* failure to do so must mean that we ' ve hit a byte sequence
* that ' s invalid .
*/
if ( cstate - > raw_reached_eof | | srclen > = MAX_CONVERSION_INPUT_LENGTH )
cstate - > input_reached_error = true ;
return ;
}
cstate - > raw_buf_index + = convertedlen ;
cstate - > input_buf_len + = strlen ( ( char * ) dst ) ;
}
}
/*
* Report an encoding or conversion error .
*/
static void
CopyConversionError ( CopyFromState cstate )
{
Assert ( cstate - > raw_buf_len > 0 ) ;
Assert ( cstate - > input_reached_error ) ;
if ( ! cstate - > need_transcoding )
{
/*
* Everything up to input_buf_len was successfully verified , and
* input_buf_len points to the invalid or incomplete character .
*/
report_invalid_encoding ( cstate - > file_encoding ,
cstate - > raw_buf + cstate - > input_buf_len ,
cstate - > raw_buf_len - cstate - > input_buf_len ) ;
}
else
{
/*
* raw_buf_index points to the invalid or untranslatable character . We
* let the conversion routine report the error , because it can provide
* a more specific error message than we could here . An earlier call
* to the conversion routine in CopyConvertBuf ( ) detected that there
* is an error , now we call the conversion routine again with
* noError = false , to have it throw the error .
*/
unsigned char * src ;
int srclen ;
unsigned char * dst ;
int dstlen ;
src = ( unsigned char * ) cstate - > raw_buf + cstate - > raw_buf_index ;
srclen = cstate - > raw_buf_len - cstate - > raw_buf_index ;
dst = ( unsigned char * ) cstate - > input_buf + cstate - > input_buf_len ;
dstlen = INPUT_BUF_SIZE - cstate - > input_buf_len + 1 ;
( void ) pg_do_encoding_conversion_buf ( cstate - > conversion_proc ,
cstate - > file_encoding ,
GetDatabaseEncoding ( ) ,
src , srclen ,
dst , dstlen ,
false ) ;
/*
* The conversion routine should have reported an error , so this
* should not be reached .
*/
elog ( ERROR , " encoding conversion failed without error " ) ;
}
}
/*
* Load more data from data source to raw_buf .
*
* If RAW_BUF_BYTES ( cstate ) > 0 , the unprocessed bytes are moved to the
* beginning of the buffer , and we load new data after that .
*/
static void
CopyLoadRawBuf ( CopyFromState cstate )
{
int nbytes = RAW_BUF_BYTES ( cstate ) ;
int nbytes ;
int inbytes ;
/* Copy down the unprocessed data if any. */
if ( nbytes > 0 )
/*
* In text mode , if encoding conversion is not required , raw_buf and
* input_buf point to the same buffer . Their len / index better agree , too .
*/
if ( cstate - > raw_buf = = cstate - > input_buf )
{
Assert ( ! cstate - > need_transcoding ) ;
Assert ( cstate - > raw_buf_index = = cstate - > input_buf_index ) ;
Assert ( cstate - > input_buf_len < = cstate - > raw_buf_len ) ;
}
/*
* Copy down the unprocessed data if any .
*/
nbytes = RAW_BUF_BYTES ( cstate ) ;
if ( nbytes > 0 & & cstate - > raw_buf_index > 0 )
memmove ( cstate - > raw_buf , cstate - > raw_buf + cstate - > raw_buf_index ,
nbytes ) ;
cstate - > raw_buf_len - = cstate - > raw_buf_index ;
cstate - > raw_buf_index = 0 ;
/*
* If raw_buf and input_buf are in fact the same buffer , adjust the
* input_buf variables , too .
*/
if ( cstate - > raw_buf = = cstate - > input_buf )
{
cstate - > input_buf_len - = cstate - > input_buf_index ;
cstate - > input_buf_index = 0 ;
}
inbytes = CopyGetData ( cstate , cstate - > raw_buf + nbytes ,
1 , RAW_BUF_SIZE - nbytes ) ;
/* Load more data */
inbytes = CopyGetData ( cstate , cstate - > raw_buf + cstate - > raw_buf_len ,
1 , RAW_BUF_SIZE - cstate - > raw_buf_len ) ;
nbytes + = inbytes ;
cstate - > raw_buf [ nbytes ] = ' \0 ' ;
cstate - > raw_buf_index = 0 ;
cstate - > raw_buf_len = nbytes ;
cstate - > bytes_processed + = inbytes ;
pgstat_progress_update_param ( PROGRESS_COPY_BYTES_PROCESSED , cstate - > bytes_processed ) ;
return ( inbytes > 0 ) ;
if ( inbytes = = 0 )
cstate - > raw_reached_eof = true ;
}
/*
* CopyLoadInputBuf loads some more data into input_buf
*
* On return , at least one more input character is loaded into
* input_buf , or input_reached_eof is set .
*
* If INPUT_BUF_BYTES ( cstate ) > 0 , the unprocessed bytes are moved to the start
* of the buffer and then we load more data after that .
*/
static void
CopyLoadInputBuf ( CopyFromState cstate )
{
int nbytes = INPUT_BUF_BYTES ( cstate ) ;
/*
* The caller has updated input_buf_index to indicate how much of the
* input has been consumed and isn ' t needed anymore . If input_buf is the
* same physical area as raw_buf , update raw_buf_index accordingly .
*/
if ( cstate - > raw_buf = = cstate - > input_buf )
{
Assert ( ! cstate - > need_transcoding ) ;
Assert ( cstate - > input_buf_index > = cstate - > raw_buf_index ) ;
cstate - > raw_buf_index = cstate - > input_buf_index ;
}
for ( ; ; )
{
/* If we now have some unconverted data, try to convert it */
CopyConvertBuf ( cstate ) ;
/* If we now have some more input bytes ready, return them */
if ( INPUT_BUF_BYTES ( cstate ) > nbytes )
return ;
/*
* If we reached an invalid byte sequence , or we ' re at an incomplete
* multi - byte character but there is no more raw input data , report
* conversion error .
*/
if ( cstate - > input_reached_error )
CopyConversionError ( cstate ) ;
/* no more input, and everything has been converted */
if ( cstate - > input_reached_eof )
break ;
/* Try to load more raw data */
Assert ( ! cstate - > raw_reached_eof ) ;
CopyLoadRawBuf ( cstate ) ;
}
}
/*
@ -389,7 +702,8 @@ CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
/* Load more data if buffer is empty. */
if ( RAW_BUF_BYTES ( cstate ) = = 0 )
{
if ( ! CopyLoadRawBuf ( cstate ) )
CopyLoadRawBuf ( cstate ) ;
if ( cstate - > raw_reached_eof )
break ; /* EOF */
}
@ -645,8 +959,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
}
/*
* Read the next input line and stash it in line_buf , with conversion to
* server encoding .
* Read the next input line and stash it in line_buf .
*
* Result is true if read was terminated by EOF , false if terminated
* by newline . The terminating newline or EOF marker is not included
@ -658,10 +971,7 @@ CopyReadLine(CopyFromState cstate)
bool result ;
resetStringInfo ( & cstate - > line_buf ) ;
cstate - > line_buf_valid = true ;
/* Mark that encoding conversion hasn't occurred yet */
cstate - > line_buf_converted = false ;
cstate - > line_buf_valid = false ;
/* Parse data and transfer into line_buf */
result = CopyReadLineText ( cstate ) ;
@ -675,10 +985,17 @@ CopyReadLine(CopyFromState cstate)
*/
if ( cstate - > copy_src = = COPY_FRONTEND )
{
int inbytes ;
do
{
cstate - > raw_buf_index = cstate - > raw_buf_len ;
} while ( CopyLoadRawBuf ( cstate ) ) ;
inbytes = CopyGetData ( cstate , cstate - > input_buf ,
1 , INPUT_BUF_SIZE ) ;
} while ( inbytes > 0 ) ;
cstate - > input_buf_index = 0 ;
cstate - > input_buf_len = 0 ;
cstate - > raw_buf_index = 0 ;
cstate - > raw_buf_len = 0 ;
}
}
else
@ -715,25 +1032,8 @@ CopyReadLine(CopyFromState cstate)
}
}
/* Done reading the line. Convert it to server encoding. */
if ( cstate - > need_transcoding )
{
char * cvt ;
cvt = pg_any_to_server ( cstate - > line_buf . data ,
cstate - > line_buf . len ,
cstate - > file_encoding ) ;
if ( cvt ! = cstate - > line_buf . data )
{
/* transfer converted data back to line_buf */
resetStringInfo ( & cstate - > line_buf ) ;
appendBinaryStringInfo ( & cstate - > line_buf , cvt , strlen ( cvt ) ) ;
pfree ( cvt ) ;
}
}
/* Now it's safe to use the buffer in error messages */
cstate - > line_buf_converte d = true ;
cstate - > line_buf_valid = true ;
return result ;
}
@ -744,13 +1044,12 @@ CopyReadLine(CopyFromState cstate)
static bool
CopyReadLineText ( CopyFromState cstate )
{
char * copy_raw _buf ;
int raw _buf_ptr;
char * copy_input _buf ;
int input _buf_ptr;
int copy_buf_len ;
bool need_data = false ;
bool hit_eof = false ;
bool result = false ;
char mblen_str [ 2 ] ;
/* CSV variables */
bool first_char_in_line = true ;
@ -768,8 +1067,6 @@ CopyReadLineText(CopyFromState cstate)
escapec = ' \0 ' ;
}
mblen_str [ 1 ] = ' \0 ' ;
/*
* The objective of this loop is to transfer the entire next input line
* into line_buf . Hence , we only care for detecting newlines ( \ r and / or
@ -782,18 +1079,25 @@ CopyReadLineText(CopyFromState cstate)
* These four characters , and the CSV escape and quote characters , are
* assumed the same in frontend and backend encodings .
*
* For speed , we try to move data from raw_buf to line_buf in chunks
* rather than one character at a time . raw_buf_ptr points to the next
* character to examine ; any characters from raw_buf_index to raw_buf_ptr
* have been determined to be part of the line , but not yet transferred to
* line_buf .
* The input has already been converted to the database encoding . All
* supported server encodings have the property that all bytes in a
* multi - byte sequence have the high bit set , so a multibyte character
* cannot contain any newline or escape characters embedded in the
* multibyte sequence . Therefore , we can process the input byte - by - byte ,
* regardless of the encoding .
*
* For speed , we try to move data from input_buf to line_buf in chunks
* rather than one character at a time . input_buf_ptr points to the next
* character to examine ; any characters from input_buf_index to
* input_buf_ptr have been determined to be part of the line , but not yet
* transferred to line_buf .
*
* For a little extra speed within the loop , we copy raw_buf and
* raw_buf_len into local variables .
* For a little extra speed within the loop , we copy input _buf and
* input _buf_len into local variables .
*/
copy_raw_buf = cstate - > raw_buf ;
raw_buf_ptr = cstate - > raw_buf_index ;
copy_buf_len = cstate - > raw_buf_len ;
copy_input _buf = cstate - > input _buf;
input _buf_ptr = cstate - > input _buf_index;
copy_buf_len = cstate - > input _buf_len;
for ( ; ; )
{
@ -810,24 +1114,21 @@ CopyReadLineText(CopyFromState cstate)
* cstate - > copy_src ! = COPY_OLD_FE , but it hardly seems worth it ,
* considering the size of the buffer .
*/
if ( raw _buf_ptr > = copy_buf_len | | need_data )
if ( input _buf_ptr > = copy_buf_len | | need_data )
{
REFILL_LINEBUF ;
/*
* Try to read some more data . This will certainly reset
* raw_buf_index to zero , and raw_buf_ptr must go with it .
*/
if ( ! CopyLoadRawBuf ( cstate ) )
hit_eof = true ;
raw_buf_ptr = 0 ;
copy_buf_len = cstate - > raw_buf_len ;
CopyLoadInputBuf ( cstate ) ;
/* update our local variables */
hit_eof = cstate - > input_reached_eof ;
input_buf_ptr = cstate - > input_buf_index ;
copy_buf_len = cstate - > input_buf_len ;
/*
* If we are completely out of data , break out of the loop ,
* reporting EOF .
*/
if ( copy_buf_len < = 0 )
if ( INPUT_BUF_BYTES ( cstate ) < = 0 )
{
result = true ;
break ;
@ -836,8 +1137,8 @@ CopyReadLineText(CopyFromState cstate)
}
/* OK to fetch a character */
prev_raw_ptr = raw _buf_ptr;
c = copy_raw_buf [ raw _buf_ptr + + ] ;
prev_raw_ptr = input _buf_ptr;
c = copy_input_buf [ input _buf_ptr + + ] ;
if ( cstate - > opts . csv_mode )
{
@ -891,16 +1192,16 @@ CopyReadLineText(CopyFromState cstate)
* If need more data , go back to loop top to load it .
*
* Note that if we are at EOF , c will wind up as ' \0 ' because
* of the guaranteed pad of raw _buf.
* of the guaranteed pad of input _buf.
*/
IF_NEED_REFILL_AND_NOT_EOF_CONTINUE ( 0 ) ;
/* get next char */
c = copy_raw_buf [ raw _buf_ptr ] ;
c = copy_input_buf [ input _buf_ptr ] ;
if ( c = = ' \n ' )
{
raw _buf_ptr+ + ; /* eat newline */
input _buf_ptr+ + ; /* eat newline */
cstate - > eol_type = EOL_CRNL ; /* in case not set yet */
}
else
@ -967,14 +1268,14 @@ CopyReadLineText(CopyFromState cstate)
/* -----
* get next character
* Note : we do not change c so if it isn ' t \ . , we can fall
* through and continue processing for file encoding .
* through and continue processing .
* - - - - -
*/
c2 = copy_raw_buf [ raw _buf_ptr ] ;
c2 = copy_input_buf [ input _buf_ptr ] ;
if ( c2 = = ' . ' )
{
raw _buf_ptr+ + ; /* consume the '.' */
input _buf_ptr+ + ; /* consume the '.' */
/*
* Note : if we loop back for more data here , it does not
@ -986,7 +1287,7 @@ CopyReadLineText(CopyFromState cstate)
/* Get the next character */
IF_NEED_REFILL_AND_NOT_EOF_CONTINUE ( 0 ) ;
/* if hit_eof, c2 will become '\0' */
c2 = copy_raw_buf [ raw _buf_ptr + + ] ;
c2 = copy_input_buf [ input _buf_ptr + + ] ;
if ( c2 = = ' \n ' )
{
@ -1011,7 +1312,7 @@ CopyReadLineText(CopyFromState cstate)
/* Get the next character */
IF_NEED_REFILL_AND_NOT_EOF_CONTINUE ( 0 ) ;
/* if hit_eof, c2 will become '\0' */
c2 = copy_raw_buf [ raw _buf_ptr + + ] ;
c2 = copy_input_buf [ input _buf_ptr + + ] ;
if ( c2 ! = ' \r ' & & c2 ! = ' \n ' )
{
@ -1036,11 +1337,11 @@ CopyReadLineText(CopyFromState cstate)
* Transfer only the data before the \ . into line_buf , then
* discard the data and the \ . sequence .
*/
if ( prev_raw_ptr > cstate - > raw _buf_index)
if ( prev_raw_ptr > cstate - > input _buf_index)
appendBinaryStringInfo ( & cstate - > line_buf ,
cstate - > raw _buf + cstate - > raw _buf_index,
prev_raw_ptr - cstate - > raw _buf_index) ;
cstate - > raw_buf_index = raw _buf_ptr;
cstate - > input _buf + cstate - > input _buf_index,
prev_raw_ptr - cstate - > input _buf_index) ;
cstate - > input_buf_index = input _buf_ptr;
result = true ; /* report EOF */
break ;
}
@ -1056,15 +1357,8 @@ CopyReadLineText(CopyFromState cstate)
* backslashes are not special , so we want to process the
* character after the backslash just like a normal character ,
* so we don ' t increment in those cases .
*
* Set ' c ' to skip whole character correctly in multi - byte
* encodings . If we don ' t have the whole character in the
* buffer yet , we might loop back to process it , after all ,
* but that ' s OK because multi - byte characters cannot have any
* special meaning .
*/
raw_buf_ptr + + ;
c = c2 ;
input_buf_ptr + + ;
}
}
@ -1075,30 +1369,6 @@ CopyReadLineText(CopyFromState cstate)
* value , while in non - CSV mode , \ . cannot be a data value .
*/
not_end_of_copy :
/*
* Process all bytes of a multi - byte character as a group .
*
* We only support multi - byte sequences where the first byte has the
* high - bit set , so as an optimization we can avoid this block
* entirely if it is not set .
*/
if ( cstate - > encoding_embeds_ascii & & IS_HIGHBIT_SET ( c ) )
{
int mblen ;
/*
* It is enough to look at the first byte in all our encodings , to
* get the length . ( GB18030 is a bit special , but still works for
* our purposes ; see comment in pg_gb18030_mblen ( ) )
*/
mblen_str [ 0 ] = c ;
mblen = pg_encoding_mblen ( cstate - > file_encoding , mblen_str ) ;
IF_NEED_REFILL_AND_NOT_EOF_CONTINUE ( mblen - 1 ) ;
IF_NEED_REFILL_AND_EOF_BREAK ( mblen - 1 ) ;
raw_buf_ptr + = mblen - 1 ;
}
first_char_in_line = false ;
} /* end of outer loop */