Replace the data structure used for keyword lookup.

Previously, ScanKeywordLookup was passed an array of string pointers. This had some performance deficiencies: the strings themselves might be scattered all over the place depending on the compiler (and some quick checking shows that at least with gcc-on-Linux, they indeed weren't reliably close together). That led to very cache-unfriendly behavior as the binary search touched strings in many different pages. Also, depending on the platform, the string pointers might need to be adjusted at program start, so that they couldn't be simple constant data. And the ScanKeyword struct had been designed with an eye to 32-bit machines originally; on 64-bit it requires 16 bytes per keyword, making it even more cache-unfriendly. Redesign so that the keyword strings themselves are allocated consecutively (as part of one big char-string constant), thereby eliminating the touch-lots-of-unrelated-pages syndrome. And get rid of the ScanKeyword array in favor of three separate arrays: uint16 offsets into the keyword array, uint16 token codes, and uint8 keyword categories. That reduces the overhead per keyword to 5 bytes instead of 16 (even less in programs that only need one of the token codes and categories); moreover, the binary search only touches the offsets array, further reducing its cache footprint. This also lets us put the token codes somewhere else than the keyword strings are, which avoids some unpleasant build dependencies. While we're at it, wrap the data used by ScanKeywordLookup into a struct that can be treated as an opaque type by most callers. That doesn't change things much right now, but it will make it less painful to switch to a hash-based lookup method, as is being discussed in the mailing list thread. Most of the change here is associated with adding a generator script that can build the new data structure from the same list-of-PG_KEYWORD header representation we used before. The PG_KEYWORD lists that plpgsql and ecpg used to embed in their scanner .c files have to be moved into headers, and the Makefiles have to be taught to invoke the generator script. This work is also necessary if we're to consider hash-based lookup, since the generator script is what would be responsible for constructing a hash table. Aside from saving a few kilobytes in each program that includes the keyword table, this seems to speed up raw parsing (flex+bison) by a few percent. So it's worth doing even as it stands, though we think we can gain even more with a follow-on patch to switch to hash-based lookup. John Naylor, with further hacking by me Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com
7 years ago · afb0d0712f
parent c5c7fa261f
commit afb0d0712f
32 changed files with 843 additions and 439 deletions
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@ -3075,8 +3075,8 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
 	/* initialize the flex scanner --- should match raw_parser() */
 	yyscanner = scanner_init(query,
 							 &yyextra,
-							 ScanKeywords,
-							 NumScanKeywords);
+							 &ScanKeywords,
+							 ScanKeywordTokens);

 	/* we don't want to re-emit any escape string warnings */
 	yyextra.escape_string_warning = false;
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@ -41,7 +41,7 @@ raw_parser(const char *str)

 	/* initialize the flex scanner */
 	yyscanner = scanner_init(str, &yyextra.core_yy_extra,
-							 ScanKeywords, NumScanKeywords);
+							 &ScanKeywords, ScanKeywordTokens);

 	/* base_yylex() only needs this much initialization */
 	yyextra.have_lookahead = false;
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@ -66,6 +66,21 @@ int			backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
 bool		escape_string_warning = true;
 bool		standard_conforming_strings = true;

+/*
+ * Constant data exported from this file.  This array maps from the
+ * zero-based keyword numbers returned by ScanKeywordLookup to the
+ * Bison token numbers needed by gram.y.  This is exported because
+ * callers need to pass it to scanner_init, if they are using the
+ * standard keyword list ScanKeywords.
+ */
+#define PG_KEYWORD(kwname, value, category) value,
+
+const uint16 ScanKeywordTokens[] = {
+#include "parser/kwlist.h"
+};
+
+#undef PG_KEYWORD
+
 /*
 * Set the type of YYSTYPE.
 */
@ -504,18 +519,18 @@ other			.
 					 * We will pass this along as a normal character string,
 					 * but preceded with an internally-generated "NCHAR".
 					 */
-					const ScanKeyword *keyword;
+					int		kwnum;

 					SET_YYLLOC();
 					yyless(1);	/* eat only 'n' this time */

-					keyword = ScanKeywordLookup("nchar",
-												yyextra->keywords,
-												yyextra->num_keywords);
-					if (keyword != NULL)
+					kwnum = ScanKeywordLookup("nchar",
+											  yyextra->keywordlist);
+					if (kwnum >= 0)
 					{
-						yylval->keyword = keyword->name;
-						return keyword->value;
+						yylval->keyword = GetScanKeyword(kwnum,
+														 yyextra->keywordlist);
+						return yyextra->keyword_tokens[kwnum];
 					}
 					else
 					{
@ -1021,19 +1036,19 @@ other			.


 {identifier}	{
-					const ScanKeyword *keyword;
+					int			kwnum;
 					char	   *ident;

 					SET_YYLLOC();

 					/* Is it a keyword? */
-					keyword = ScanKeywordLookup(yytext,
-												yyextra->keywords,
-												yyextra->num_keywords);
-					if (keyword != NULL)
+					kwnum = ScanKeywordLookup(yytext,
+											  yyextra->keywordlist);
+					if (kwnum >= 0)
 					{
-						yylval->keyword = keyword->name;
-						return keyword->value;
+						yylval->keyword = GetScanKeyword(kwnum,
+														 yyextra->keywordlist);
+						return yyextra->keyword_tokens[kwnum];
 					}

 					/*
@ -1142,8 +1157,8 @@ scanner_yyerror(const char *message, core_yyscan_t yyscanner)
 core_yyscan_t
 scanner_init(const char *str,
 			 core_yy_extra_type *yyext,
-			 const ScanKeyword *keywords,
-			 int num_keywords)
+			 const ScanKeywordList *keywordlist,
+			 const uint16 *keyword_tokens)
 {
 	Size		slen = strlen(str);
 	yyscan_t	scanner;
@ -1153,8 +1168,8 @@ scanner_init(const char *str,

 	core_yyset_extra(yyext, scanner);

-	yyext->keywords = keywords;
-	yyext->num_keywords = num_keywords;
+	yyext->keywordlist = keywordlist;
+	yyext->keyword_tokens = keyword_tokens;

 	yyext->backslash_quote = backslash_quote;
 	yyext->escape_string_warning = escape_string_warning;
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@ -417,15 +417,17 @@ pg_get_keywords(PG_FUNCTION_ARGS)

 	funcctx = SRF_PERCALL_SETUP();

-	if (funcctx->call_cntr < NumScanKeywords)
+	if (funcctx->call_cntr < ScanKeywords.num_keywords)
 	{
 		char	   *values[3];
 		HeapTuple	tuple;

 		/* cast-away-const is ugly but alternatives aren't much better */
-		values[0] = unconstify(char *, ScanKeywords[funcctx->call_cntr].name);
+		values[0] = unconstify(char *,
+							   GetScanKeyword(funcctx->call_cntr,
+											  &ScanKeywords));

-		switch (ScanKeywords[funcctx->call_cntr].category)
+		switch (ScanKeywordCategories[funcctx->call_cntr])
 		{
 			case UNRESERVED_KEYWORD:
 				values[1] = "U";
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@ -10601,11 +10601,9 @@ quote_identifier(const char *ident)
 		 * Note: ScanKeywordLookup() does case-insensitive comparison, but
 		 * that's fine, since we already know we have all-lower-case.
 		 */
-		const ScanKeyword *keyword = ScanKeywordLookup(ident,
-													   ScanKeywords,
-													   NumScanKeywords);
+		int			kwnum = ScanKeywordLookup(ident, &ScanKeywords);

-		if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
+		if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
 			safe = false;
 	}

--- a/src/common/.gitignore
+++ b/src/common/.gitignore
@ -0,0 +1 @@
+/kwlist_d.h
--- a/src/common/Makefile
+++ b/src/common/Makefile
@ -41,11 +41,11 @@ override CPPFLAGS += -DVAL_LDFLAGS_EX="\"$(LDFLAGS_EX)\""
 override CPPFLAGS += -DVAL_LDFLAGS_SL="\"$(LDFLAGS_SL)\""
 override CPPFLAGS += -DVAL_LIBS="\"$(LIBS)\""

-override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
+override CPPFLAGS := -DFRONTEND -I. -I$(top_srcdir)/src/common $(CPPFLAGS)
 LIBS += $(PTHREAD_LIBS)

 OBJS_COMMON = base64.o config_info.o controldata_utils.o exec.o file_perm.o \
-	ip.o keywords.o link-canary.o md5.o pg_lzcompress.o \
+	ip.o keywords.o kwlookup.o link-canary.o md5.o pg_lzcompress.o \
 	pgfnames.o psprintf.o relpath.o \
 	rmtree.o saslprep.o scram-common.o string.o unicode_norm.o \
 	username.o wait_error.o
@ -65,6 +65,8 @@ OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o)

 all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a

+distprep: kwlist_d.h
+
 # libpgcommon is needed by some contrib
 install: all installdirs
 	$(INSTALL_STLIB) libpgcommon.a '$(DESTDIR)$(libdir)/libpgcommon.a'
@ -115,16 +117,18 @@ libpgcommon_srv.a: $(OBJS_SRV)
 %_srv.o: %.c %.o
 	$(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@

-# Dependencies of keywords.o need to be managed explicitly to make sure
-# that you don't get broken parsing code, even in a non-enable-depend build.
-# Note that gram.h isn't required for the frontend versions of keywords.o.
-$(top_builddir)/src/include/parser/gram.h: $(top_srcdir)/src/backend/parser/gram.y
-	$(MAKE) -C $(top_builddir)/src/backend $(top_builddir)/src/include/parser/gram.h
+# generate SQL keyword lookup table to be included into keywords*.o.
+kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(top_srcdir)/src/tools/gen_keywordlist.pl
+	$(PERL) $(top_srcdir)/src/tools/gen_keywordlist.pl --extern $<

-keywords.o: $(top_srcdir)/src/include/parser/kwlist.h
-keywords_shlib.o: $(top_srcdir)/src/include/parser/kwlist.h
-keywords_srv.o: $(top_builddir)/src/include/parser/gram.h $(top_srcdir)/src/include/parser/kwlist.h
+# Dependencies of keywords*.o need to be managed explicitly to make sure
+# that you don't get broken parsing code, even in a non-enable-depend build.
+keywords.o keywords_shlib.o keywords_srv.o: kwlist_d.h

-clean distclean maintainer-clean:
+# kwlist_d.h is in the distribution tarball, so it is not cleaned here.
+clean distclean:
 	rm -f libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a
 	rm -f $(OBJS_FRONTEND) $(OBJS_SHLIB) $(OBJS_SRV)
+
+maintainer-clean: distclean
+	rm -f kwlist_d.h
--- a/src/common/keywords.c
+++ b/src/common/keywords.c
@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
 *
 * keywords.c
- *	  lexical token lookup for key words in PostgreSQL
+ *	  PostgreSQL's list of SQL keywords
 *
 *
 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
@ -13,102 +13,21 @@
 *
 *-------------------------------------------------------------------------
 */
-#ifndef FRONTEND
-#include "postgres.h"
-#else
-#include "postgres_fe.h"
-#endif
+#include "c.h"

-#ifndef FRONTEND
-
-#include "parser/gramparse.h"
+#include "common/keywords.h"

-#define PG_KEYWORD(a,b,c) {a,b,c},

-#else
+/* ScanKeywordList lookup data for SQL keywords */

-#include "common/keywords.h"
-
-/*
- * We don't need the token number for frontend uses, so leave it out to avoid
- * requiring backend headers that won't compile cleanly here.
- */
-#define PG_KEYWORD(a,b,c) {a,0,c},
+#include "kwlist_d.h"

-#endif							/* FRONTEND */
+/* Keyword categories for SQL keywords */

+#define PG_KEYWORD(kwname, value, category) category,

-const ScanKeyword ScanKeywords[] = {
+const uint8 ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS] = {
 #include "parser/kwlist.h"
 };

-const int	NumScanKeywords = lengthof(ScanKeywords);
-
-
-/*
- * ScanKeywordLookup - see if a given word is a keyword
- *
- * The table to be searched is passed explicitly, so that this can be used
- * to search keyword lists other than the standard list appearing above.
- *
- * Returns a pointer to the ScanKeyword table entry, or NULL if no match.
- *
- * The match is done case-insensitively.  Note that we deliberately use a
- * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
- * even if we are in a locale where tolower() would produce more or different
- * translations.  This is to conform to the SQL99 spec, which says that
- * keywords are to be matched in this way even though non-keyword identifiers
- * receive a different case-normalization mapping.
- */
-const ScanKeyword *
-ScanKeywordLookup(const char *text,
-				  const ScanKeyword *keywords,
-				  int num_keywords)
-{
-	int			len,
-				i;
-	char		word[NAMEDATALEN];
-	const ScanKeyword *low;
-	const ScanKeyword *high;
-
-	len = strlen(text);
-	/* We assume all keywords are shorter than NAMEDATALEN. */
-	if (len >= NAMEDATALEN)
-		return NULL;
-
-	/*
-	 * Apply an ASCII-only downcasing.  We must not use tolower() since it may
-	 * produce the wrong translation in some locales (eg, Turkish).
-	 */
-	for (i = 0; i < len; i++)
-	{
-		char		ch = text[i];
-
-		if (ch >= 'A' && ch <= 'Z')
-			ch += 'a' - 'A';
-		word[i] = ch;
-	}
-	word[len] = '\0';
-
-	/*
-	 * Now do a binary search using plain strcmp() comparison.
-	 */
-	low = keywords;
-	high = keywords + (num_keywords - 1);
-	while (low <= high)
-	{
-		const ScanKeyword *middle;
-		int			difference;
-
-		middle = low + (high - low) / 2;
-		difference = strcmp(middle->name, word);
-		if (difference == 0)
-			return middle;
-		else if (difference < 0)
-			low = middle + 1;
-		else
-			high = middle - 1;
-	}
-
-	return NULL;
-}
+#undef PG_KEYWORD
--- a/src/common/kwlookup.c
+++ b/src/common/kwlookup.c
@ -0,0 +1,94 @@
+/*-------------------------------------------------------------------------
+ *
+ * kwlookup.c
+ *	  Key word lookup for PostgreSQL
+ *
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/common/kwlookup.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/kwlookup.h"
+
+
+/*
+ * ScanKeywordLookup - see if a given word is a keyword
+ *
+ * The list of keywords to be matched against is passed as a ScanKeywordList.
+ *
+ * Returns the keyword number (0..N-1) of the keyword, or -1 if no match.
+ * Callers typically use the keyword number to index into information
+ * arrays, but that is no concern of this code.
+ *
+ * The match is done case-insensitively.  Note that we deliberately use a
+ * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
+ * even if we are in a locale where tolower() would produce more or different
+ * translations.  This is to conform to the SQL99 spec, which says that
+ * keywords are to be matched in this way even though non-keyword identifiers
+ * receive a different case-normalization mapping.
+ */
+int
+ScanKeywordLookup(const char *text,
+				  const ScanKeywordList *keywords)
+{
+	int			len,
+				i;
+	char		word[NAMEDATALEN];
+	const char *kw_string;
+	const uint16 *kw_offsets;
+	const uint16 *low;
+	const uint16 *high;
+
+	len = strlen(text);
+
+	if (len > keywords->max_kw_len)
+		return -1;				/* too long to be any keyword */
+
+	/* We assume all keywords are shorter than NAMEDATALEN. */
+	Assert(len < NAMEDATALEN);
+
+	/*
+	 * Apply an ASCII-only downcasing.  We must not use tolower() since it may
+	 * produce the wrong translation in some locales (eg, Turkish).
+	 */
+	for (i = 0; i < len; i++)
+	{
+		char		ch = text[i];
+
+		if (ch >= 'A' && ch <= 'Z')
+			ch += 'a' - 'A';
+		word[i] = ch;
+	}
+	word[len] = '\0';
+
+	/*
+	 * Now do a binary search using plain strcmp() comparison.
+	 */
+	kw_string = keywords->kw_string;
+	kw_offsets = keywords->kw_offsets;
+	low = kw_offsets;
+	high = kw_offsets + (keywords->num_keywords - 1);
+	while (low <= high)
+	{
+		const uint16 *middle;
+		int			difference;
+
+		middle = low + (high - low) / 2;
+		difference = strcmp(kw_string + *middle, word);
+		if (difference == 0)
+			return middle - kw_offsets;
+		else if (difference < 0)
+			low = middle + 1;
+		else
+			high = middle - 1;
+	}
+
+	return -1;
+}
--- a/src/fe_utils/string_utils.c
+++ b/src/fe_utils/string_utils.c
@ -104,11 +104,9 @@ fmtId(const char *rawid)
 		 * Note: ScanKeywordLookup() does case-insensitive comparison, but
 		 * that's fine, since we already know we have all-lower-case.
 		 */
-		const ScanKeyword *keyword = ScanKeywordLookup(rawid,
-													   ScanKeywords,
-													   NumScanKeywords);
+		int			kwnum = ScanKeywordLookup(rawid, &ScanKeywords);

-		if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
+		if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
 			need_quotes = true;
 	}

--- a/src/include/common/keywords.h
+++ b/src/include/common/keywords.h
@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
 *
 * keywords.h
- *	  lexical token lookup for key words in PostgreSQL
+ *	  PostgreSQL's list of SQL keywords
 *
 *
 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
@ -14,31 +14,20 @@
 #ifndef KEYWORDS_H
 #define KEYWORDS_H

+#include "common/kwlookup.h"
+
 /* Keyword categories --- should match lists in gram.y */
 #define UNRESERVED_KEYWORD		0
 #define COL_NAME_KEYWORD		1
 #define TYPE_FUNC_NAME_KEYWORD	2
 #define RESERVED_KEYWORD		3

-
-typedef struct ScanKeyword
-{
-	const char *name;			/* in lower case */
-	int16		value;			/* grammar's token code */
-	int16		category;		/* see codes above */
-} ScanKeyword;
-
 #ifndef FRONTEND
-extern PGDLLIMPORT const ScanKeyword ScanKeywords[];
-extern PGDLLIMPORT const int NumScanKeywords;
+extern PGDLLIMPORT const ScanKeywordList ScanKeywords;
+extern PGDLLIMPORT const uint8 ScanKeywordCategories[];
 #else
-extern const ScanKeyword ScanKeywords[];
-extern const int NumScanKeywords;
+extern const ScanKeywordList ScanKeywords;
+extern const uint8 ScanKeywordCategories[];
 #endif

-
-extern const ScanKeyword *ScanKeywordLookup(const char *text,
-				  const ScanKeyword *keywords,
-				  int num_keywords);
-
 #endif							/* KEYWORDS_H */
--- a/src/include/common/kwlookup.h
+++ b/src/include/common/kwlookup.h
@ -0,0 +1,40 @@
+/*-------------------------------------------------------------------------
+ *
+ * kwlookup.h
+ *	  Key word lookup for PostgreSQL
+ *
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/common/kwlookup.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef KWLOOKUP_H
+#define KWLOOKUP_H
+
+/*
+ * This struct contains the data needed by ScanKeywordLookup to perform a
+ * search within a set of keywords.  The contents are typically generated by
+ * src/tools/gen_keywordlist.pl from a header containing PG_KEYWORD macros.
+ */
+typedef struct ScanKeywordList
+{
+	const char *kw_string;		/* all keywords in order, separated by \0 */
+	const uint16 *kw_offsets;	/* offsets to the start of each keyword */
+	int			num_keywords;	/* number of keywords */
+	int			max_kw_len;		/* length of longest keyword */
+} ScanKeywordList;
+
+
+extern int	ScanKeywordLookup(const char *text, const ScanKeywordList *keywords);
+
+/* Code that wants to retrieve the text of the N'th keyword should use this. */
+static inline const char *
+GetScanKeyword(int n, const ScanKeywordList *keywords)
+{
+	return keywords->kw_string + keywords->kw_offsets[n];
+}
+
+#endif							/* KWLOOKUP_H */
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@ -2,7 +2,7 @@
 *
 * kwlist.h
 *
- * The keyword list is kept in its own source file for possible use by
+ * The keyword lists are kept in their own source files for use by
 * automatic tools.  The exact representation of a keyword is determined
 * by the PG_KEYWORD macro, which is not defined in this file; it can
 * be defined by the caller for special purposes.
--- a/src/include/parser/scanner.h
+++ b/src/include/parser/scanner.h
@ -73,10 +73,10 @@ typedef struct core_yy_extra_type
 	Size		scanbuflen;

 	/*
-	 * The keyword list to use.
+	 * The keyword list to use, and the associated grammar token codes.
 	 */
-	const ScanKeyword *keywords;
-	int			num_keywords;
+	const ScanKeywordList *keywordlist;
+	const uint16 *keyword_tokens;

 	/*
 	 * Scanner settings to use.  These are initialized from the corresponding
@ -116,11 +116,14 @@ typedef struct core_yy_extra_type
 typedef void *core_yyscan_t;


+/* Constant data exported from parser/scan.l */
+extern PGDLLIMPORT const uint16 ScanKeywordTokens[];
+
 /* Entry points in parser/scan.l */
 extern core_yyscan_t scanner_init(const char *str,
 			 core_yy_extra_type *yyext,
-			 const ScanKeyword *keywords,
-			 int num_keywords);
+			 const ScanKeywordList *keywordlist,
+			 const uint16 *keyword_tokens);
 extern void scanner_finish(core_yyscan_t yyscanner);
 extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
 		   core_yyscan_t yyscanner);
--- a/src/interfaces/ecpg/preproc/.gitignore
+++ b/src/interfaces/ecpg/preproc/.gitignore
@ -2,5 +2,7 @@
 /preproc.c
 /preproc.h
 /pgc.c
+/c_kwlist_d.h
+/ecpg_kwlist_d.h
 /typename.c
 /ecpg
--- a/src/interfaces/ecpg/preproc/Makefile
+++ b/src/interfaces/ecpg/preproc/Makefile
@ -28,6 +28,8 @@ OBJS=	preproc.o pgc.o type.o ecpg.o output.o parser.o \
 	keywords.o c_keywords.o ecpg_keywords.o typename.o descriptor.o variable.o \
 	$(WIN32RES)

+GEN_KEYWORDLIST = $(top_srcdir)/src/tools/gen_keywordlist.pl
+
 # Suppress parallel build to avoid a bug in GNU make 3.82
 # (see comments in ../Makefile)
 ifeq ($(MAKE_VERSION),3.82)
@ -53,9 +55,20 @@ preproc.y: ../../../backend/parser/gram.y parse.pl ecpg.addons ecpg.header ecpg.
 	$(PERL) $(srcdir)/parse.pl $(srcdir) < $< > $@
 	$(PERL) $(srcdir)/check_rules.pl $(srcdir) $<

+# generate keyword headers
+c_kwlist_d.h: c_kwlist.h $(GEN_KEYWORDLIST)
+	$(PERL) $(GEN_KEYWORDLIST) --varname ScanCKeywords $<
+
+ecpg_kwlist_d.h: ecpg_kwlist.h $(GEN_KEYWORDLIST)
+	$(PERL) $(GEN_KEYWORDLIST) --varname ScanECPGKeywords $<
+
+# Force these dependencies to be known even without dependency info built:
 ecpg_keywords.o c_keywords.o keywords.o preproc.o pgc.o parser.o: preproc.h
+ecpg_keywords.o: ecpg_kwlist_d.h
+c_keywords.o: c_kwlist_d.h
+keywords.o: $(top_srcdir)/src/include/parser/kwlist.h

-distprep: preproc.y preproc.c preproc.h pgc.c
+distprep: preproc.y preproc.c preproc.h pgc.c c_kwlist_d.h ecpg_kwlist_d.h

 install: all installdirs
 	$(INSTALL_PROGRAM) ecpg$(X) '$(DESTDIR)$(bindir)'
@ -66,12 +79,11 @@ installdirs:
 uninstall:
 	rm -f '$(DESTDIR)$(bindir)/ecpg$(X)'

+# preproc.y, preproc.c, preproc.h, pgc.c, c_kwlist_d.h, and ecpg_kwlist_d.h
+# are in the distribution tarball, so they are not cleaned here.
 clean distclean:
 	rm -f *.o ecpg$(X)
 	rm -f typename.c

-# `make distclean' must not remove preproc.y, preproc.c, preproc.h, or pgc.c
-# since we want to ship those files in the distribution for people with
-# inadequate tools.  Instead, `make maintainer-clean' will remove them.
 maintainer-clean: distclean
-	rm -f preproc.y preproc.c preproc.h pgc.c
+	rm -f preproc.y preproc.c preproc.h pgc.c c_kwlist_d.h ecpg_kwlist_d.h
--- a/src/interfaces/ecpg/preproc/c_keywords.c
+++ b/src/interfaces/ecpg/preproc/c_keywords.c
@ -14,72 +14,57 @@
 #include "preproc_extern.h"
 #include "preproc.h"

-/*
- * List of (keyword-name, keyword-token-value) pairs.
- *
- * !!WARNING!!: This list must be sorted, because binary
- *		 search is used to locate entries.
- */
-static const ScanKeyword ScanCKeywords[] = {
-	/* name, value, category */
+/* ScanKeywordList lookup data for C keywords */
+#include "c_kwlist_d.h"

-	/*
-	 * category is not needed in ecpg, it is only here so we can share the
-	 * data structure with the backend
-	 */
-	{"VARCHAR", VARCHAR, 0},
-	{"auto", S_AUTO, 0},
-	{"bool", SQL_BOOL, 0},
-	{"char", CHAR_P, 0},
-	{"const", S_CONST, 0},
-	{"enum", ENUM_P, 0},
-	{"extern", S_EXTERN, 0},
-	{"float", FLOAT_P, 0},
-	{"hour", HOUR_P, 0},
-	{"int", INT_P, 0},
-	{"long", SQL_LONG, 0},
-	{"minute", MINUTE_P, 0},
-	{"month", MONTH_P, 0},
-	{"register", S_REGISTER, 0},
-	{"second", SECOND_P, 0},
-	{"short", SQL_SHORT, 0},
-	{"signed", SQL_SIGNED, 0},
-	{"static", S_STATIC, 0},
-	{"struct", SQL_STRUCT, 0},
-	{"to", TO, 0},
-	{"typedef", S_TYPEDEF, 0},
-	{"union", UNION, 0},
-	{"unsigned", SQL_UNSIGNED, 0},
-	{"varchar", VARCHAR, 0},
-	{"volatile", S_VOLATILE, 0},
-	{"year", YEAR_P, 0},
+/* Token codes for C keywords */
+#define PG_KEYWORD(kwname, value) value,
+
+static const uint16 ScanCKeywordTokens[] = {
+#include "c_kwlist.h"
 };

+#undef PG_KEYWORD
+

 /*
+ * ScanCKeywordLookup - see if a given word is a keyword
+ *
+ * Returns the token value of the keyword, or -1 if no match.
+ *
 * Do a binary search using plain strcmp() comparison.  This is much like
 * ScanKeywordLookup(), except we want case-sensitive matching.
 */
-const ScanKeyword *
+int
 ScanCKeywordLookup(const char *text)
 {
-	const ScanKeyword *low = &ScanCKeywords[0];
-	const ScanKeyword *high = &ScanCKeywords[lengthof(ScanCKeywords) - 1];
+	const char *kw_string;
+	const uint16 *kw_offsets;
+	const uint16 *low;
+	const uint16 *high;
+
+	if (strlen(text) > ScanCKeywords.max_kw_len)
+		return -1;				/* too long to be any keyword */
+
+	kw_string = ScanCKeywords.kw_string;
+	kw_offsets = ScanCKeywords.kw_offsets;
+	low = kw_offsets;
+	high = kw_offsets + (ScanCKeywords.num_keywords - 1);

 	while (low <= high)
 	{
-		const ScanKeyword *middle;
+		const uint16 *middle;
 		int			difference;

 		middle = low + (high - low) / 2;
-		difference = strcmp(middle->name, text);
+		difference = strcmp(kw_string + *middle, text);
 		if (difference == 0)
-			return middle;
+			return ScanCKeywordTokens[middle - kw_offsets];
 		else if (difference < 0)
 			low = middle + 1;
 		else
 			high = middle - 1;
 	}

-	return NULL;
+	return -1;
 }
--- a/src/interfaces/ecpg/preproc/c_kwlist.h
+++ b/src/interfaces/ecpg/preproc/c_kwlist.h
@ -0,0 +1,53 @@
+/*-------------------------------------------------------------------------
+ *
+ * c_kwlist.h
+ *
+ * The keyword lists are kept in their own source files for use by
+ * automatic tools.  The exact representation of a keyword is determined
+ * by the PG_KEYWORD macro, which is not defined in this file; it can
+ * be defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/interfaces/ecpg/preproc/c_kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* There is deliberately not an #ifndef C_KWLIST_H here. */
+
+/*
+ * List of (keyword-name, keyword-token-value) pairs.
+ *
+ * !!WARNING!!: This list must be sorted by ASCII name, because binary
+ *		 search is used to locate entries.
+ */
+
+/* name, value */
+PG_KEYWORD("VARCHAR", VARCHAR)
+PG_KEYWORD("auto", S_AUTO)
+PG_KEYWORD("bool", SQL_BOOL)
+PG_KEYWORD("char", CHAR_P)
+PG_KEYWORD("const", S_CONST)
+PG_KEYWORD("enum", ENUM_P)
+PG_KEYWORD("extern", S_EXTERN)
+PG_KEYWORD("float", FLOAT_P)
+PG_KEYWORD("hour", HOUR_P)
+PG_KEYWORD("int", INT_P)
+PG_KEYWORD("long", SQL_LONG)
+PG_KEYWORD("minute", MINUTE_P)
+PG_KEYWORD("month", MONTH_P)
+PG_KEYWORD("register", S_REGISTER)
+PG_KEYWORD("second", SECOND_P)
+PG_KEYWORD("short", SQL_SHORT)
+PG_KEYWORD("signed", SQL_SIGNED)
+PG_KEYWORD("static", S_STATIC)
+PG_KEYWORD("struct", SQL_STRUCT)
+PG_KEYWORD("to", TO)
+PG_KEYWORD("typedef", S_TYPEDEF)
+PG_KEYWORD("union", UNION)
+PG_KEYWORD("unsigned", SQL_UNSIGNED)
+PG_KEYWORD("varchar", VARCHAR)
+PG_KEYWORD("volatile", S_VOLATILE)
+PG_KEYWORD("year", YEAR_P)
--- a/src/interfaces/ecpg/preproc/ecpg_keywords.c
+++ b/src/interfaces/ecpg/preproc/ecpg_keywords.c
@ -16,82 +16,40 @@
 #include "preproc_extern.h"
 #include "preproc.h"

-/*
- * List of (keyword-name, keyword-token-value) pairs.
- *
- * !!WARNING!!: This list must be sorted, because binary
- *		 search is used to locate entries.
- */
-static const ScanKeyword ECPGScanKeywords[] = {
-	/* name, value, category */
+/* ScanKeywordList lookup data for ECPG keywords */
+#include "ecpg_kwlist_d.h"
+
+/* Token codes for ECPG keywords */
+#define PG_KEYWORD(kwname, value) value,

-	/*
-	 * category is not needed in ecpg, it is only here so we can share the
-	 * data structure with the backend
-	 */
-	{"allocate", SQL_ALLOCATE, 0},
-	{"autocommit", SQL_AUTOCOMMIT, 0},
-	{"bool", SQL_BOOL, 0},
-	{"break", SQL_BREAK, 0},
-	{"cardinality", SQL_CARDINALITY, 0},
-	{"connect", SQL_CONNECT, 0},
-	{"count", SQL_COUNT, 0},
-	{"datetime_interval_code", SQL_DATETIME_INTERVAL_CODE, 0},
-	{"datetime_interval_precision", SQL_DATETIME_INTERVAL_PRECISION, 0},
-	{"describe", SQL_DESCRIBE, 0},
-	{"descriptor", SQL_DESCRIPTOR, 0},
-	{"disconnect", SQL_DISCONNECT, 0},
-	{"found", SQL_FOUND, 0},
-	{"free", SQL_FREE, 0},
-	{"get", SQL_GET, 0},
-	{"go", SQL_GO, 0},
-	{"goto", SQL_GOTO, 0},
-	{"identified", SQL_IDENTIFIED, 0},
-	{"indicator", SQL_INDICATOR, 0},
-	{"key_member", SQL_KEY_MEMBER, 0},
-	{"length", SQL_LENGTH, 0},
-	{"long", SQL_LONG, 0},
-	{"nullable", SQL_NULLABLE, 0},
-	{"octet_length", SQL_OCTET_LENGTH, 0},
-	{"open", SQL_OPEN, 0},
-	{"output", SQL_OUTPUT, 0},
-	{"reference", SQL_REFERENCE, 0},
-	{"returned_length", SQL_RETURNED_LENGTH, 0},
-	{"returned_octet_length", SQL_RETURNED_OCTET_LENGTH, 0},
-	{"scale", SQL_SCALE, 0},
-	{"section", SQL_SECTION, 0},
-	{"short", SQL_SHORT, 0},
-	{"signed", SQL_SIGNED, 0},
-	{"sqlerror", SQL_SQLERROR, 0},
-	{"sqlprint", SQL_SQLPRINT, 0},
-	{"sqlwarning", SQL_SQLWARNING, 0},
-	{"stop", SQL_STOP, 0},
-	{"struct", SQL_STRUCT, 0},
-	{"unsigned", SQL_UNSIGNED, 0},
-	{"var", SQL_VAR, 0},
-	{"whenever", SQL_WHENEVER, 0},
+static const uint16 ECPGScanKeywordTokens[] = {
+#include "ecpg_kwlist.h"
 };

+#undef PG_KEYWORD
+
+
 /*
 * ScanECPGKeywordLookup - see if a given word is a keyword
 *
- * Returns a pointer to the ScanKeyword table entry, or NULL if no match.
+ * Returns the token value of the keyword, or -1 if no match.
+ *
 * Keywords are matched using the same case-folding rules as in the backend.
 */
-const ScanKeyword *
+int
 ScanECPGKeywordLookup(const char *text)
 {
-	const ScanKeyword *res;
+	int			kwnum;

 	/* First check SQL symbols defined by the backend. */
-	res = ScanKeywordLookup(text, SQLScanKeywords, NumSQLScanKeywords);
-	if (res)
-		return res;
+	kwnum = ScanKeywordLookup(text, &ScanKeywords);
+	if (kwnum >= 0)
+		return SQLScanKeywordTokens[kwnum];

 	/* Try ECPG-specific keywords. */
-	res = ScanKeywordLookup(text, ECPGScanKeywords, lengthof(ECPGScanKeywords));
-	if (res)
-		return res;
+	kwnum = ScanKeywordLookup(text, &ScanECPGKeywords);
+	if (kwnum >= 0)
+		return ECPGScanKeywordTokens[kwnum];

-	return NULL;
+	return -1;
 }
--- a/src/interfaces/ecpg/preproc/ecpg_kwlist.h
+++ b/src/interfaces/ecpg/preproc/ecpg_kwlist.h
@ -0,0 +1,68 @@
+/*-------------------------------------------------------------------------
+ *
+ * ecpg_kwlist.h
+ *
+ * The keyword lists are kept in their own source files for use by
+ * automatic tools.  The exact representation of a keyword is determined
+ * by the PG_KEYWORD macro, which is not defined in this file; it can
+ * be defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/interfaces/ecpg/preproc/ecpg_kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* There is deliberately not an #ifndef ECPG_KWLIST_H here. */
+
+/*
+ * List of (keyword-name, keyword-token-value) pairs.
+ *
+ * !!WARNING!!: This list must be sorted by ASCII name, because binary
+ *		 search is used to locate entries.
+ */
+
+/* name, value */
+PG_KEYWORD("allocate", SQL_ALLOCATE)
+PG_KEYWORD("autocommit", SQL_AUTOCOMMIT)
+PG_KEYWORD("bool", SQL_BOOL)
+PG_KEYWORD("break", SQL_BREAK)
+PG_KEYWORD("cardinality", SQL_CARDINALITY)
+PG_KEYWORD("connect", SQL_CONNECT)
+PG_KEYWORD("count", SQL_COUNT)
+PG_KEYWORD("datetime_interval_code", SQL_DATETIME_INTERVAL_CODE)
+PG_KEYWORD("datetime_interval_precision", SQL_DATETIME_INTERVAL_PRECISION)
+PG_KEYWORD("describe", SQL_DESCRIBE)
+PG_KEYWORD("descriptor", SQL_DESCRIPTOR)
+PG_KEYWORD("disconnect", SQL_DISCONNECT)
+PG_KEYWORD("found", SQL_FOUND)
+PG_KEYWORD("free", SQL_FREE)
+PG_KEYWORD("get", SQL_GET)
+PG_KEYWORD("go", SQL_GO)
+PG_KEYWORD("goto", SQL_GOTO)
+PG_KEYWORD("identified", SQL_IDENTIFIED)
+PG_KEYWORD("indicator", SQL_INDICATOR)
+PG_KEYWORD("key_member", SQL_KEY_MEMBER)
+PG_KEYWORD("length", SQL_LENGTH)
+PG_KEYWORD("long", SQL_LONG)
+PG_KEYWORD("nullable", SQL_NULLABLE)
+PG_KEYWORD("octet_length", SQL_OCTET_LENGTH)
+PG_KEYWORD("open", SQL_OPEN)
+PG_KEYWORD("output", SQL_OUTPUT)
+PG_KEYWORD("reference", SQL_REFERENCE)
+PG_KEYWORD("returned_length", SQL_RETURNED_LENGTH)
+PG_KEYWORD("returned_octet_length", SQL_RETURNED_OCTET_LENGTH)
+PG_KEYWORD("scale", SQL_SCALE)
+PG_KEYWORD("section", SQL_SECTION)
+PG_KEYWORD("short", SQL_SHORT)
+PG_KEYWORD("signed", SQL_SIGNED)
+PG_KEYWORD("sqlerror", SQL_SQLERROR)
+PG_KEYWORD("sqlprint", SQL_SQLPRINT)
+PG_KEYWORD("sqlwarning", SQL_SQLWARNING)
+PG_KEYWORD("stop", SQL_STOP)
+PG_KEYWORD("struct", SQL_STRUCT)
+PG_KEYWORD("unsigned", SQL_UNSIGNED)
+PG_KEYWORD("var", SQL_VAR)
+PG_KEYWORD("whenever", SQL_WHENEVER)
--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@ -17,24 +17,22 @@

 /*
 * This is much trickier than it looks.  We are #include'ing kwlist.h
- * but the "value" numbers that go into the table are from preproc.h
- * not the backend's gram.h.  Therefore this table will recognize all
- * keywords known to the backend, but will supply the token numbers used
+ * but the token numbers that go into the table are from preproc.h
+ * not the backend's gram.h.  Therefore this token table will match
+ * the ScanKeywords table supplied from common/keywords.c, including all
+ * keywords known to the backend, but it will supply the token numbers used
 * by ecpg's grammar, which is what we need.  The ecpg grammar must
 * define all the same token names the backend does, else we'll get
 * undefined-symbol failures in this compile.
 */

-#include "common/keywords.h"
-
 #include "preproc_extern.h"
 #include "preproc.h"

+#define PG_KEYWORD(kwname, value, category) value,

-#define PG_KEYWORD(a,b,c) {a,b,c},
-
-const ScanKeyword SQLScanKeywords[] = {
+const uint16 SQLScanKeywordTokens[] = {
 #include "parser/kwlist.h"
 };

-const int	NumSQLScanKeywords = lengthof(SQLScanKeywords);
+#undef PG_KEYWORD
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@ -920,19 +920,19 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 				}

 {identifier}	{
-					const ScanKeyword  *keyword;
-
 					if (!isdefine())
 					{
+						int		kwvalue;
+
 						/* Is it an SQL/ECPG keyword? */
-						keyword = ScanECPGKeywordLookup(yytext);
-						if (keyword != NULL)
-							return keyword->value;
+						kwvalue = ScanECPGKeywordLookup(yytext);
+						if (kwvalue >= 0)
+							return kwvalue;

 						/* Is it a C keyword? */
-						keyword = ScanCKeywordLookup(yytext);
-						if (keyword != NULL)
-							return keyword->value;
+						kwvalue = ScanCKeywordLookup(yytext);
+						if (kwvalue >= 0)
+							return kwvalue;

 						/*
 						 * None of the above.  Return it as an identifier.
@ -1010,12 +1010,11 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 						return CPP_LINE;
 					}
 <C>{identifier}		{
-						const ScanKeyword		*keyword;
-
 						/*
 						 * Try to detect a function name:
 						 * look for identifiers at the global scope
-						 * keep the last identifier before the first '(' and '{' */
+						 * keep the last identifier before the first '(' and '{'
+						 */
 						if (braces_open == 0 && parenths_open == 0)
 						{
 							if (current_function)
@ -1026,9 +1025,11 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 						/* however, some defines have to be taken care of for compatibility */
 						if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine())
 						{
-							keyword = ScanCKeywordLookup(yytext);
-							if (keyword != NULL)
-								return keyword->value;
+							int		kwvalue;
+
+							kwvalue = ScanCKeywordLookup(yytext);
+							if (kwvalue >= 0)
+								return kwvalue;
 							else
 							{
 								base_yylval.str = mm_strdup(yytext);
--- a/src/interfaces/ecpg/preproc/preproc_extern.h
+++ b/src/interfaces/ecpg/preproc/preproc_extern.h
@ -59,8 +59,7 @@ extern struct when when_error,
 extern struct ECPGstruct_member *struct_member_list[STRUCT_DEPTH];

 /* Globals from keywords.c */
-extern const ScanKeyword SQLScanKeywords[];
-extern const int NumSQLScanKeywords;
+extern const uint16 SQLScanKeywordTokens[];

 /* functions */

@ -102,8 +101,8 @@ extern void check_indicator(struct ECPGtype *);
 extern void remove_typedefs(int);
 extern void remove_variables(int);
 extern struct variable *new_variable(const char *, struct ECPGtype *, int);
-extern const ScanKeyword *ScanCKeywordLookup(const char *);
-extern const ScanKeyword *ScanECPGKeywordLookup(const char *text);
+extern int	ScanCKeywordLookup(const char *text);
+extern int	ScanECPGKeywordLookup(const char *text);
 extern void parser_init(void);
 extern int	filtered_base_yylex(void);

--- a/src/pl/plpgsql/src/.gitignore
+++ b/src/pl/plpgsql/src/.gitignore
@ -1,5 +1,7 @@
 /pl_gram.c
 /pl_gram.h
+/pl_reserved_kwlist_d.h
+/pl_unreserved_kwlist_d.h
 /plerrcodes.h
 /log/
 /results/
--- a/src/pl/plpgsql/src/Makefile
+++ b/src/pl/plpgsql/src/Makefile
@ -29,6 +29,8 @@ REGRESS_OPTS = --dbname=$(PL_TESTDB)
 REGRESS = plpgsql_call plpgsql_control plpgsql_domain plpgsql_record \
 	plpgsql_cache plpgsql_transaction plpgsql_varprops

+GEN_KEYWORDLIST = $(top_srcdir)/src/tools/gen_keywordlist.pl
+
 all: all-lib

 # Shared library stuff
@ -61,6 +63,7 @@ uninstall-headers:

 # Force these dependencies to be known even without dependency info built:
 pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o pl_scanner.o: plpgsql.h pl_gram.h plerrcodes.h
+pl_scanner.o: pl_reserved_kwlist_d.h pl_unreserved_kwlist_d.h

 # See notes in src/backend/parser/Makefile about the following two rules
 pl_gram.h: pl_gram.c
@ -72,6 +75,13 @@ pl_gram.c: BISONFLAGS += -d
 plerrcodes.h: $(top_srcdir)/src/backend/utils/errcodes.txt generate-plerrcodes.pl
 	$(PERL) $(srcdir)/generate-plerrcodes.pl $< > $@

+# generate keyword headers for the scanner
+pl_reserved_kwlist_d.h: pl_reserved_kwlist.h $(GEN_KEYWORDLIST)
+	$(PERL) $(GEN_KEYWORDLIST) --varname ReservedPLKeywords $<
+
+pl_unreserved_kwlist_d.h: pl_unreserved_kwlist.h $(GEN_KEYWORDLIST)
+	$(PERL) $(GEN_KEYWORDLIST) --varname UnreservedPLKeywords $<
+

 check: submake
 	$(pg_regress_check) $(REGRESS_OPTS) $(REGRESS)
@ -84,13 +94,14 @@ submake:
 	$(MAKE) -C $(top_builddir)/src/test/regress pg_regress$(X)


-distprep: pl_gram.h pl_gram.c plerrcodes.h
+distprep: pl_gram.h pl_gram.c plerrcodes.h pl_reserved_kwlist_d.h pl_unreserved_kwlist_d.h

-# pl_gram.c, pl_gram.h and plerrcodes.h are in the distribution tarball,
-# so they are not cleaned here.
+# pl_gram.c, pl_gram.h, plerrcodes.h, pl_reserved_kwlist_d.h, and
+# pl_unreserved_kwlist_d.h are in the distribution tarball, so they
+# are not cleaned here.
 clean distclean: clean-lib
 	rm -f $(OBJS)
 	rm -rf $(pg_regress_clean_files)

 maintainer-clean: distclean
-	rm -f pl_gram.c pl_gram.h plerrcodes.h
+	rm -f pl_gram.c pl_gram.h plerrcodes.h pl_reserved_kwlist_d.h pl_unreserved_kwlist_d.h
--- a/src/pl/plpgsql/src/pl_reserved_kwlist.h
+++ b/src/pl/plpgsql/src/pl_reserved_kwlist.h
@ -0,0 +1,53 @@
+/*-------------------------------------------------------------------------
+ *
+ * pl_reserved_kwlist.h
+ *
+ * The keyword lists are kept in their own source files for use by
+ * automatic tools.  The exact representation of a keyword is determined
+ * by the PG_KEYWORD macro, which is not defined in this file; it can
+ * be defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/pl/plpgsql/src/pl_reserved_kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* There is deliberately not an #ifndef PL_RESERVED_KWLIST_H here. */
+
+/*
+ * List of (keyword-name, keyword-token-value) pairs.
+ *
+ * Be careful not to put the same word in both lists.
+ *
+ * !!WARNING!!: This list must be sorted by ASCII name, because binary
+ *		 search is used to locate entries.
+ */
+
+/* name, value */
+PG_KEYWORD("all", K_ALL)
+PG_KEYWORD("begin", K_BEGIN)
+PG_KEYWORD("by", K_BY)
+PG_KEYWORD("case", K_CASE)
+PG_KEYWORD("declare", K_DECLARE)
+PG_KEYWORD("else", K_ELSE)
+PG_KEYWORD("end", K_END)
+PG_KEYWORD("execute", K_EXECUTE)
+PG_KEYWORD("for", K_FOR)
+PG_KEYWORD("foreach", K_FOREACH)
+PG_KEYWORD("from", K_FROM)
+PG_KEYWORD("if", K_IF)
+PG_KEYWORD("in", K_IN)
+PG_KEYWORD("into", K_INTO)
+PG_KEYWORD("loop", K_LOOP)
+PG_KEYWORD("not", K_NOT)
+PG_KEYWORD("null", K_NULL)
+PG_KEYWORD("or", K_OR)
+PG_KEYWORD("strict", K_STRICT)
+PG_KEYWORD("then", K_THEN)
+PG_KEYWORD("to", K_TO)
+PG_KEYWORD("using", K_USING)
+PG_KEYWORD("when", K_WHEN)
+PG_KEYWORD("while", K_WHILE)
--- a/src/pl/plpgsql/src/pl_scanner.c
+++ b/src/pl/plpgsql/src/pl_scanner.c
@ -22,16 +22,15 @@
 #include "pl_gram.h"			/* must be after parser/scanner.h */


-#define PG_KEYWORD(a,b,c) {a,b,c},
-
-
 /* Klugy flag to tell scanner how to look up identifiers */
 IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;

 /*
 * A word about keywords:
 *
- * We keep reserved and unreserved keywords in separate arrays.  The
+ * We keep reserved and unreserved keywords in separate headers.  Be careful
+ * not to put the same word in both headers.  Also be sure that pl_gram.y's
+ * unreserved_keyword production agrees with the unreserved header.  The
 * reserved keywords are passed to the core scanner, so they will be
 * recognized before (and instead of) any variable name.  Unreserved words
 * are checked for separately, usually after determining that the identifier
@ -57,130 +56,22 @@ IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
 * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE
 */

-/*
- * Lists of keyword (name, token-value, category) entries.
- *
- * !!WARNING!!: These lists must be sorted by ASCII name, because binary
- *		 search is used to locate entries.
- *
- * Be careful not to put the same word in both lists.  Also be sure that
- * pl_gram.y's unreserved_keyword production agrees with the second list.
- */
+/* ScanKeywordList lookup data for PL/pgSQL keywords */
+#include "pl_reserved_kwlist_d.h"
+#include "pl_unreserved_kwlist_d.h"
+
+/* Token codes for PL/pgSQL keywords */
+#define PG_KEYWORD(kwname, value) value,

-static const ScanKeyword reserved_keywords[] = {
-	PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)
-	PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
-	PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
-	PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
-	PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
-	PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
-	PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
-	PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
-	PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
-	PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD)
-	PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
-	PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
-	PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
-	PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
-	PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
-	PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
-	PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
-	PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
-	PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
-	PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
-	PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
-	PG_KEYWORD("using", K_USING, RESERVED_KEYWORD)
-	PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD)
-	PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD)
+static const uint16 ReservedPLKeywordTokens[] = {
+#include "pl_reserved_kwlist.h"
 };

-static const int num_reserved_keywords = lengthof(reserved_keywords);
-
-static const ScanKeyword unreserved_keywords[] = {
-	PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
-	PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
-	PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
-	PG_KEYWORD("call", K_CALL, UNRESERVED_KEYWORD)
-	PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
-	PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
-	PG_KEYWORD("commit", K_COMMIT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
-	PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
-	PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
-	PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
-	PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
-	PG_KEYWORD("do", K_DO, UNRESERVED_KEYWORD)
-	PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
-	PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
-	PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
-	PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
-	PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
-	PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
-	PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
-	PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
-	PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
-	PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
-	PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
-	PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
-	PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
-	PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
-	PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
-	PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
-	PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
-	PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
-	PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
-	PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
-	PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
-	PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
-	PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("reset", K_RESET, UNRESERVED_KEYWORD)
-	PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
-	PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("rollback", K_ROLLBACK, UNRESERVED_KEYWORD)
-	PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD)
-	PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD)
-	PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
-	PG_KEYWORD("set", K_SET, UNRESERVED_KEYWORD)
-	PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
-	PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD)
-	PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
-	PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
+static const uint16 UnreservedPLKeywordTokens[] = {
+#include "pl_unreserved_kwlist.h"
 };

-static const int num_unreserved_keywords = lengthof(unreserved_keywords);
+#undef PG_KEYWORD

 /*
 * This macro must recognize all tokens that can immediately precede a
@ -256,7 +147,7 @@ plpgsql_yylex(void)
 {
 	int			tok1;
 	TokenAuxData aux1;
-	const ScanKeyword *kw;
+	int			kwnum;

 	tok1 = internal_yylex(&aux1);
 	if (tok1 == IDENT || tok1 == PARAM)
@ -333,12 +224,12 @@ plpgsql_yylex(void)
 									   &aux1.lval.word))
 					tok1 = T_DATUM;
 				else if (!aux1.lval.word.quoted &&
-						 (kw = ScanKeywordLookup(aux1.lval.word.ident,
-												 unreserved_keywords,
-												 num_unreserved_keywords)))
+						 (kwnum = ScanKeywordLookup(aux1.lval.word.ident,
+													&UnreservedPLKeywords)) >= 0)
 				{
-					aux1.lval.keyword = kw->name;
-					tok1 = kw->value;
+					aux1.lval.keyword = GetScanKeyword(kwnum,
+													   &UnreservedPLKeywords);
+					tok1 = UnreservedPLKeywordTokens[kwnum];
 				}
 				else
 					tok1 = T_WORD;
@ -375,12 +266,12 @@ plpgsql_yylex(void)
 								   &aux1.lval.word))
 				tok1 = T_DATUM;
 			else if (!aux1.lval.word.quoted &&
-					 (kw = ScanKeywordLookup(aux1.lval.word.ident,
-											 unreserved_keywords,
-											 num_unreserved_keywords)))
+					 (kwnum = ScanKeywordLookup(aux1.lval.word.ident,
+												&UnreservedPLKeywords)) >= 0)
 			{
-				aux1.lval.keyword = kw->name;
-				tok1 = kw->value;
+				aux1.lval.keyword = GetScanKeyword(kwnum,
+												   &UnreservedPLKeywords);
+				tok1 = UnreservedPLKeywordTokens[kwnum];
 			}
 			else
 				tok1 = T_WORD;
@ -497,9 +388,9 @@ plpgsql_token_is_unreserved_keyword(int token)
 {
 	int			i;

-	for (i = 0; i < num_unreserved_keywords; i++)
+	for (i = 0; i < lengthof(UnreservedPLKeywordTokens); i++)
 	{
-		if (unreserved_keywords[i].value == token)
+		if (UnreservedPLKeywordTokens[i] == token)
 			return true;
 	}
 	return false;
@ -696,7 +587,7 @@ plpgsql_scanner_init(const char *str)
 {
 	/* Start up the core scanner */
 	yyscanner = scanner_init(str, &core_yy,
-							 reserved_keywords, num_reserved_keywords);
+							 &ReservedPLKeywords, ReservedPLKeywordTokens);

 	/*
 	 * scanorig points to the original string, which unlike the scanner's
--- a/src/pl/plpgsql/src/pl_unreserved_kwlist.h
+++ b/src/pl/plpgsql/src/pl_unreserved_kwlist.h
@ -0,0 +1,111 @@
+/*-------------------------------------------------------------------------
+ *
+ * pl_unreserved_kwlist.h
+ *
+ * The keyword lists are kept in their own source files for use by
+ * automatic tools.  The exact representation of a keyword is determined
+ * by the PG_KEYWORD macro, which is not defined in this file; it can
+ * be defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/pl/plpgsql/src/pl_unreserved_kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* There is deliberately not an #ifndef PL_UNRESERVED_KWLIST_H here. */
+
+/*
+ * List of (keyword-name, keyword-token-value) pairs.
+ *
+ * Be careful not to put the same word in both lists.  Also be sure that
+ * pl_gram.y's unreserved_keyword production agrees with this list.
+ *
+ * !!WARNING!!: This list must be sorted by ASCII name, because binary
+ *		 search is used to locate entries.
+ */
+
+/* name, value */
+PG_KEYWORD("absolute", K_ABSOLUTE)
+PG_KEYWORD("alias", K_ALIAS)
+PG_KEYWORD("array", K_ARRAY)
+PG_KEYWORD("assert", K_ASSERT)
+PG_KEYWORD("backward", K_BACKWARD)
+PG_KEYWORD("call", K_CALL)
+PG_KEYWORD("close", K_CLOSE)
+PG_KEYWORD("collate", K_COLLATE)
+PG_KEYWORD("column", K_COLUMN)
+PG_KEYWORD("column_name", K_COLUMN_NAME)
+PG_KEYWORD("commit", K_COMMIT)
+PG_KEYWORD("constant", K_CONSTANT)
+PG_KEYWORD("constraint", K_CONSTRAINT)
+PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME)
+PG_KEYWORD("continue", K_CONTINUE)
+PG_KEYWORD("current", K_CURRENT)
+PG_KEYWORD("cursor", K_CURSOR)
+PG_KEYWORD("datatype", K_DATATYPE)
+PG_KEYWORD("debug", K_DEBUG)
+PG_KEYWORD("default", K_DEFAULT)
+PG_KEYWORD("detail", K_DETAIL)
+PG_KEYWORD("diagnostics", K_DIAGNOSTICS)
+PG_KEYWORD("do", K_DO)
+PG_KEYWORD("dump", K_DUMP)
+PG_KEYWORD("elseif", K_ELSIF)
+PG_KEYWORD("elsif", K_ELSIF)
+PG_KEYWORD("errcode", K_ERRCODE)
+PG_KEYWORD("error", K_ERROR)
+PG_KEYWORD("exception", K_EXCEPTION)
+PG_KEYWORD("exit", K_EXIT)
+PG_KEYWORD("fetch", K_FETCH)
+PG_KEYWORD("first", K_FIRST)
+PG_KEYWORD("forward", K_FORWARD)
+PG_KEYWORD("get", K_GET)
+PG_KEYWORD("hint", K_HINT)
+PG_KEYWORD("import", K_IMPORT)
+PG_KEYWORD("info", K_INFO)
+PG_KEYWORD("insert", K_INSERT)
+PG_KEYWORD("is", K_IS)
+PG_KEYWORD("last", K_LAST)
+PG_KEYWORD("log", K_LOG)
+PG_KEYWORD("message", K_MESSAGE)
+PG_KEYWORD("message_text", K_MESSAGE_TEXT)
+PG_KEYWORD("move", K_MOVE)
+PG_KEYWORD("next", K_NEXT)
+PG_KEYWORD("no", K_NO)
+PG_KEYWORD("notice", K_NOTICE)
+PG_KEYWORD("open", K_OPEN)
+PG_KEYWORD("option", K_OPTION)
+PG_KEYWORD("perform", K_PERFORM)
+PG_KEYWORD("pg_context", K_PG_CONTEXT)
+PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME)
+PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT)
+PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL)
+PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT)
+PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS)
+PG_KEYWORD("prior", K_PRIOR)
+PG_KEYWORD("query", K_QUERY)
+PG_KEYWORD("raise", K_RAISE)
+PG_KEYWORD("relative", K_RELATIVE)
+PG_KEYWORD("reset", K_RESET)
+PG_KEYWORD("return", K_RETURN)
+PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE)
+PG_KEYWORD("reverse", K_REVERSE)
+PG_KEYWORD("rollback", K_ROLLBACK)
+PG_KEYWORD("row_count", K_ROW_COUNT)
+PG_KEYWORD("rowtype", K_ROWTYPE)
+PG_KEYWORD("schema", K_SCHEMA)
+PG_KEYWORD("schema_name", K_SCHEMA_NAME)
+PG_KEYWORD("scroll", K_SCROLL)
+PG_KEYWORD("set", K_SET)
+PG_KEYWORD("slice", K_SLICE)
+PG_KEYWORD("sqlstate", K_SQLSTATE)
+PG_KEYWORD("stacked", K_STACKED)
+PG_KEYWORD("table", K_TABLE)
+PG_KEYWORD("table_name", K_TABLE_NAME)
+PG_KEYWORD("type", K_TYPE)
+PG_KEYWORD("use_column", K_USE_COLUMN)
+PG_KEYWORD("use_variable", K_USE_VARIABLE)
+PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT)
+PG_KEYWORD("warning", K_WARNING)
--- a/src/tools/gen_keywordlist.pl
+++ b/src/tools/gen_keywordlist.pl
@ -0,0 +1,156 @@
+#----------------------------------------------------------------------
+#
+# gen_keywordlist.pl
+#	Perl script that transforms a list of keywords into a ScanKeywordList
+#	data structure that can be passed to ScanKeywordLookup().
+#
+# The input is a C header file containing a series of macro calls
+#	PG_KEYWORD("keyword", ...)
+# Lines not starting with PG_KEYWORD are ignored.  The keywords are
+# implicitly numbered 0..N-1 in order of appearance in the header file.
+# Currently, the keywords are required to appear in ASCII order.
+#
+# The output is a C header file that defines a "const ScanKeywordList"
+# variable named according to the -v switch ("ScanKeywords" by default).
+# The variable is marked "static" unless the -e switch is given.
+#
+#
+# Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/tools/gen_keywordlist.pl
+#
+#----------------------------------------------------------------------
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $output_path = '';
+my $extern = 0;
+my $varname = 'ScanKeywords';
+
+GetOptions(
+	'output:s' => \$output_path,
+	'extern'   => \$extern,
+	'varname:s' => \$varname) || usage();
+
+my $kw_input_file = shift @ARGV || die "No input file.\n";
+
+# Make sure output_path ends in a slash if needed.
+if ($output_path ne '' && substr($output_path, -1) ne '/')
+{
+	$output_path .= '/';
+}
+
+$kw_input_file =~ /(\w+)\.h$/ || die "Input file must be named something.h.\n";
+my $base_filename = $1 . '_d';
+my $kw_def_file = $output_path . $base_filename . '.h';
+
+open(my $kif, '<', $kw_input_file) || die "$kw_input_file: $!\n";
+open(my $kwdef, '>', $kw_def_file) || die "$kw_def_file: $!\n";
+
+# Opening boilerplate for keyword definition header.
+printf $kwdef <<EOM, $base_filename, uc $base_filename, uc $base_filename;
+/*-------------------------------------------------------------------------
+ *
+ * %s.h
+ *    List of keywords represented as a ScanKeywordList.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES
+ *  ******************************
+ *  *** DO NOT EDIT THIS FILE! ***
+ *  ******************************
+ *
+ *  It has been GENERATED by src/tools/gen_keywordlist.pl
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef %s_H
+#define %s_H
+
+#include "common/kwlookup.h"
+
+EOM
+
+# Parse input file for keyword names.
+my @keywords;
+while (<$kif>)
+{
+	if (/^PG_KEYWORD\("(\w+)"/)
+	{
+		push @keywords, $1;
+	}
+}
+
+# Error out if the keyword names are not in ASCII order.
+for my $i (0..$#keywords - 1)
+{
+	die qq|The keyword "$keywords[$i + 1]" is out of order in $kw_input_file\n|
+	  if ($keywords[$i] cmp $keywords[$i + 1]) >= 0;
+}
+
+# Emit the string containing all the keywords.
+
+printf $kwdef qq|static const char %s_kw_string[] =\n\t"|, $varname;
+print $kwdef join qq|\\0"\n\t"|, @keywords;
+print $kwdef qq|";\n\n|;
+
+# Emit an array of numerical offsets which will be used to index into the
+# keyword string.  Also determine max keyword length.
+
+printf $kwdef "static const uint16 %s_kw_offsets[] = {\n", $varname;
+
+my $offset = 0;
+my $max_len = 0;
+foreach my $name (@keywords)
+{
+	my $this_length = length($name);
+
+	print $kwdef "\t$offset,\n";
+
+	# Calculate the cumulative offset of the next keyword,
+	# taking into account the null terminator.
+	$offset += $this_length + 1;
+
+	# Update max keyword length.
+	$max_len = $this_length if $max_len < $this_length;
+}
+
+print $kwdef "};\n\n";
+
+# Emit a macro defining the number of keywords.
+# (In some places it's useful to have access to that as a constant.)
+
+printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords;
+
+# Emit the struct that wraps all this lookup info into one variable.
+
+print $kwdef "static " if !$extern;
+printf $kwdef "const ScanKeywordList %s = {\n", $varname;
+printf $kwdef qq|\t%s_kw_string,\n|, $varname;
+printf $kwdef qq|\t%s_kw_offsets,\n|, $varname;
+printf $kwdef qq|\t%s_NUM_KEYWORDS,\n|, uc $varname;
+printf $kwdef qq|\t%d\n|, $max_len;
+print $kwdef "};\n\n";
+
+printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename;
+
+
+sub usage
+{
+	die <<EOM;
+Usage: gen_keywordlist.pl [--output/-o <path>] [--varname/-v <varname>] [--extern/-e] input_file
+    --output   Output directory (default '.')
+    --varname  Name for ScanKeywordList variable (default 'ScanKeywords')
+    --extern   Allow the ScanKeywordList variable to be globally visible
+
+gen_keywordlist.pl transforms a list of keywords into a ScanKeywordList.
+The output filename is derived from the input file by inserting _d,
+for example kwlist_d.h is produced from kwlist.h.
+EOM
+}
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@ -118,7 +118,7 @@ sub mkvcbuild

 	our @pgcommonallfiles = qw(
 	  base64.c config_info.c controldata_utils.c exec.c file_perm.c ip.c
-	  keywords.c link-canary.c md5.c
+	  keywords.c kwlookup.c link-canary.c md5.c
 	  pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
 	  saslprep.c scram-common.c string.c unicode_norm.c username.c
 	  wait_error.c);
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@ -409,6 +409,42 @@ sub GenerateFiles
 		chdir('../../..');
 	}

+	if (IsNewer(
+			'src/common/kwlist_d.h',
+			'src/include/parser/kwlist.h'))
+	{
+		print "Generating kwlist_d.h...\n";
+		system('perl src/tools/gen_keywordlist.pl --extern -o src/common src/include/parser/kwlist.h');
+	}
+
+	if (IsNewer(
+			'src/pl/plpgsql/src/pl_reserved_kwlist_d.h',
+			'src/pl/plpgsql/src/pl_reserved_kwlist.h')
+		|| IsNewer(
+			'src/pl/plpgsql/src/pl_unreserved_kwlist_d.h',
+			'src/pl/plpgsql/src/pl_unreserved_kwlist.h'))
+	{
+		print "Generating pl_reserved_kwlist_d.h and pl_unreserved_kwlist_d.h...\n";
+		chdir('src/pl/plpgsql/src');
+		system('perl ../../../tools/gen_keywordlist.pl --varname ReservedPLKeywords pl_reserved_kwlist.h');
+		system('perl ../../../tools/gen_keywordlist.pl --varname UnreservedPLKeywords pl_unreserved_kwlist.h');
+		chdir('../../../..');
+	}
+
+	if (IsNewer(
+			'src/interfaces/ecpg/preproc/c_kwlist_d.h',
+			'src/interfaces/ecpg/preproc/c_kwlist.h')
+		|| IsNewer(
+			'src/interfaces/ecpg/preproc/ecpg_kwlist_d.h',
+			'src/interfaces/ecpg/preproc/ecpg_kwlist.h'))
+	{
+		print "Generating c_kwlist_d.h and ecpg_kwlist_d.h...\n";
+		chdir('src/interfaces/ecpg/preproc');
+		system('perl ../../../tools/gen_keywordlist.pl --varname ScanCKeywords c_kwlist.h');
+		system('perl ../../../tools/gen_keywordlist.pl --varname ScanECPGKeywords ecpg_kwlist.h');
+		chdir('../../../..');
+	}
+
 	if (IsNewer(
 			'src/interfaces/ecpg/preproc/preproc.y',
 			'src/backend/parser/gram.y'))
--- a/src/tools/msvc/clean.bat
+++ b/src/tools/msvc/clean.bat
@ -64,6 +64,11 @@ if %DIST%==1 if exist src\pl\tcl\pltclerrcodes.h del /q src\pl\tcl\pltclerrcodes
 if %DIST%==1 if exist src\backend\utils\sort\qsort_tuple.c del /q src\backend\utils\sort\qsort_tuple.c
 if %DIST%==1 if exist src\bin\psql\sql_help.c del /q src\bin\psql\sql_help.c
 if %DIST%==1 if exist src\bin\psql\sql_help.h del /q src\bin\psql\sql_help.h
+if %DIST%==1 if exist src\common\kwlist_d.h del /q src\common\kwlist_d.h
+if %DIST%==1 if exist src\pl\plpgsql\src\pl_reserved_kwlist_d.h del /q src\pl\plpgsql\src\pl_reserved_kwlist_d.h
+if %DIST%==1 if exist src\pl\plpgsql\src\pl_unreserved_kwlist_d.h del /q src\pl\plpgsql\src\pl_unreserved_kwlist_d.h
+if %DIST%==1 if exist src\interfaces\ecpg\preproc\c_kwlist_d.h del /q src\interfaces\ecpg\preproc\c_kwlist_d.h
+if %DIST%==1 if exist src\interfaces\ecpg\preproc\ecpg_kwlist_d.h del /q src\interfaces\ecpg\preproc\ecpg_kwlist_d.h
 if %DIST%==1 if exist src\interfaces\ecpg\preproc\preproc.y del /q src\interfaces\ecpg\preproc\preproc.y
 if %DIST%==1 if exist src\backend\catalog\postgres.bki del /q src\backend\catalog\postgres.bki
 if %DIST%==1 if exist src\backend\catalog\postgres.description del /q src\backend\catalog\postgres.description