@ -667,6 +667,14 @@ json_lex(JsonLexContext *lex)
/*
* The next token in the input stream is known to be a string ; lex it .
*
* If lex - > strval isn ' t NULL , fill it with the decoded string .
* Set lex - > token_terminator to the end of the decoded input , and in
* success cases , transfer its previous value to lex - > prev_token_terminator .
* Return JSON_SUCCESS or an error code .
*
* Note : be careful that all error exits advance lex - > token_terminator
* to the point after the character we detected the error on .
*/
static inline JsonParseErrorType
json_lex_string ( JsonLexContext * lex )
@ -675,6 +683,19 @@ json_lex_string(JsonLexContext *lex)
int len ;
int hi_surrogate = - 1 ;
/* Convenience macros for error exits */
# define FAIL_AT_CHAR_START(code) \
do { \
lex - > token_terminator = s ; \
return code ; \
} while ( 0 )
# define FAIL_AT_CHAR_END(code) \
do { \
lex - > token_terminator = \
s + pg_encoding_mblen_bounded ( lex - > input_encoding , s ) ; \
return code ; \
} while ( 0 )
if ( lex - > strval ! = NULL )
resetStringInfo ( lex - > strval ) ;
@ -687,18 +708,14 @@ json_lex_string(JsonLexContext *lex)
len + + ;
/* Premature end of the string. */
if ( len > = lex - > input_length )
{
lex - > token_terminator = s ;
return JSON_INVALID_TOKEN ;
}
FAIL_AT_CHAR_START ( JSON_INVALID_TOKEN ) ;
else if ( * s = = ' " ' )
break ;
else if ( ( unsigned char ) * s < 32 )
{
/* Per RFC4627, these characters MUST be escaped. */
/* Since *s isn't printable, exclude it from the context string */
lex - > token_terminator = s ;
return JSON_ESCAPING_REQUIRED ;
FAIL_AT_CHAR_START ( JSON_ESCAPING_REQUIRED ) ;
}
else if ( * s = = ' \\ ' )
{
@ -706,10 +723,7 @@ json_lex_string(JsonLexContext *lex)
s + + ;
len + + ;
if ( len > = lex - > input_length )
{
lex - > token_terminator = s ;
return JSON_INVALID_TOKEN ;
}
FAIL_AT_CHAR_START ( JSON_INVALID_TOKEN ) ;
else if ( * s = = ' u ' )
{
int i ;
@ -720,10 +734,7 @@ json_lex_string(JsonLexContext *lex)
s + + ;
len + + ;
if ( len > = lex - > input_length )
{
lex - > token_terminator = s ;
return JSON_INVALID_TOKEN ;
}
FAIL_AT_CHAR_START ( JSON_INVALID_TOKEN ) ;
else if ( * s > = ' 0 ' & & * s < = ' 9 ' )
ch = ( ch * 16 ) + ( * s - ' 0 ' ) ;
else if ( * s > = ' a ' & & * s < = ' f ' )
@ -731,10 +742,7 @@ json_lex_string(JsonLexContext *lex)
else if ( * s > = ' A ' & & * s < = ' F ' )
ch = ( ch * 16 ) + ( * s - ' A ' ) + 10 ;
else
{
lex - > token_terminator = s + pg_encoding_mblen_bounded ( lex - > input_encoding , s ) ;
return JSON_UNICODE_ESCAPE_FORMAT ;
}
FAIL_AT_CHAR_END ( JSON_UNICODE_ESCAPE_FORMAT ) ;
}
if ( lex - > strval ! = NULL )
{
@ -744,20 +752,20 @@ json_lex_string(JsonLexContext *lex)
if ( is_utf16_surrogate_first ( ch ) )
{
if ( hi_surrogate ! = - 1 )
return JSON_UNICODE_HIGH_SURROGATE ;
FAIL_AT_CHAR_END ( JSON_UNICODE_HIGH_SURROGATE ) ;
hi_surrogate = ch ;
continue ;
}
else if ( is_utf16_surrogate_second ( ch ) )
{
if ( hi_surrogate = = - 1 )
return JSON_UNICODE_LOW_SURROGATE ;
FAIL_AT_CHAR_END ( JSON_UNICODE_LOW_SURROGATE ) ;
ch = surrogate_pair_to_codepoint ( hi_surrogate , ch ) ;
hi_surrogate = - 1 ;
}
if ( hi_surrogate ! = - 1 )
return JSON_UNICODE_LOW_SURROGATE ;
FAIL_AT_CHAR_END ( JSON_UNICODE_LOW_SURROGATE ) ;
/*
* Reject invalid cases . We can ' t have a value above
@ -767,7 +775,7 @@ json_lex_string(JsonLexContext *lex)
if ( ch = = 0 )
{
/* We can't allow this, since our TEXT type doesn't */
return JSON_UNICODE_CODE_POINT_ZERO ;
FAIL_AT_CHAR_END ( JSON_UNICODE_CODE_POINT_ZERO ) ;
}
/*
@ -804,14 +812,14 @@ json_lex_string(JsonLexContext *lex)
appendStringInfoChar ( lex - > strval , ( char ) ch ) ;
}
else
return JSON_UNICODE_HIGH_ESCAPE ;
FAIL_AT_CHAR_END ( JSON_UNICODE_HIGH_ESCAPE ) ;
# endif /* FRONTEND */
}
}
else if ( lex - > strval ! = NULL )
{
if ( hi_surrogate ! = - 1 )
return JSON_UNICODE_LOW_SURROGATE ;
FAIL_AT_CHAR_END ( JSON_UNICODE_LOW_SURROGATE ) ;
switch ( * s )
{
@ -836,10 +844,14 @@ json_lex_string(JsonLexContext *lex)
appendStringInfoChar ( lex - > strval , ' \t ' ) ;
break ;
default :
/* Not a valid string escape, so signal error. */
/*
* Not a valid string escape , so signal error . We
* adjust token_start so that just the escape sequence
* is reported , not the whole string .
*/
lex - > token_start = s ;
lex - > token_terminator = s + pg_encoding_mblen_bounded ( lex - > input_encoding , s ) ;
return JSON_ESCAPING_INVALID ;
FAIL_AT_CHAR_END ( JSON_ESCAPING_INVALID ) ;
}
}
else if ( strchr ( " \" \\ /bfnrt " , * s ) = = NULL )
@ -852,26 +864,31 @@ json_lex_string(JsonLexContext *lex)
* shown it ' s not a performance win .
*/
lex - > token_start = s ;
lex - > token_terminator = s + pg_encoding_mblen_bounded ( lex - > input_encoding , s ) ;
return JSON_ESCAPING_INVALID ;
FAIL_AT_CHAR_END ( JSON_ESCAPING_INVALID ) ;
}
}
else if ( lex - > strval ! = NULL )
{
if ( hi_surrogate ! = - 1 )
return JSON_UNICODE_LOW_SURROGATE ;
FAIL_AT_CHAR_END ( JSON_UNICODE_LOW_SURROGATE ) ;
appendStringInfoChar ( lex - > strval , * s ) ;
}
}
if ( hi_surrogate ! = - 1 )
{
lex - > token_terminator = s + 1 ;
return JSON_UNICODE_LOW_SURROGATE ;
}
/* Hooray, we found the end of the string! */
lex - > prev_token_terminator = lex - > token_terminator ;
lex - > token_terminator = s + 1 ;
return JSON_SUCCESS ;
# undef FAIL_AT_CHAR_START
# undef FAIL_AT_CHAR_END
}
/*