Improve contrib/pg_stat_statements' handling of garbage collection failure.

If we can't read the query texts file (whether because out-of-memory, or for some other reason), give up and reset the file to empty, discarding all stored query texts, though not the statistics per se. We used to leave things alone and hope for better luck next time, but the problem is that the file is only going to get bigger and even harder to slurp into memory. Better to do something that will get us out of trouble. Likewise reset the file to empty for any other failure within gc_qtexts(). The previous behavior after a write error was to discard query texts but not do anything to truncate the file, which is just weird. Also, increase the maximum supported file size from MaxAllocSize to MaxAllocHugeSize; this makes it more likely we'll be able to do a garbage collection successfully. Also, fix recalculation of mean_query_len within entry_dealloc() to match the calculation in gc_qtexts(). The previous coding overlooked the possibility of dropped texts (query_len == -1) and would underestimate the mean of the remaining entries in such cases, thus possibly causing excess garbage collection cycles. In passing, add some errdetail to the log entry that complains about insufficient memory to read the query texts file, which after all was Jim Nasby's original complaint. Back-patch to 9.4 where the current handling of query texts was introduced. Peter Geoghegan, rather editorialized upon by me
10 years ago · 8bbe4cbd9b
parent 86b1e6784b
commit 8bbe4cbd9b
1 changed files with 75 additions and 18 deletions
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@ -170,7 +170,7 @@ typedef struct pgssEntry
 	pgssHashKey key;			/* hash key of entry - MUST BE FIRST */
 	Counters	counters;		/* the statistics for this query */
 	Size		query_offset;	/* query text offset in external file */
-	int			query_len;		/* # of valid bytes in query string */
+	int			query_len;		/* # of valid bytes in query string, or -1 */
 	int			encoding;		/* query text encoding */
 	slock_t		mutex;			/* protects the counters only */
 } pgssEntry;
@ -1705,7 +1705,8 @@ entry_cmp(const void *lhs, const void *rhs)
 }
 /*
- * Deallocate least used entries.
+ * Deallocate least-used entries.
 *
 * Caller must hold an exclusive lock on pgss->lock.
 */
 static void
@ -1716,17 +1717,27 @@ entry_dealloc(void)
 	pgssEntry  *entry;
 	int			nvictims;
 	int			i;
-	Size		totlen = 0;
+	Size		tottextlen;
 	int			nvalidtexts;
 	/*
 	 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
 	 * While we're scanning the table, apply the decay factor to the usage
-	 * values.
+	 * values, and update the mean query length.
 	 *
 	 * Note that the mean query length is almost immediately obsolete, since
 	 * we compute it before not after discarding the least-used entries.
 	 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
 	 * making two passes to get a more current result.  Likewise, the new
 	 * cur_median_usage includes the entries we're about to zap.
 	 */
 	entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
 	i = 0;
 	tottextlen = 0;
 	nvalidtexts = 0;
 	hash_seq_init(&hash_seq, pgss_hash);
 	while ((entry = hash_seq_search(&hash_seq)) != NULL)
 	{
@ -1736,20 +1747,27 @@ entry_dealloc(void)
 			entry->counters.usage *= STICKY_DECREASE_FACTOR;
 		else
 			entry->counters.usage *= USAGE_DECREASE_FACTOR;
-		/* Accumulate total size, too. */
+		/* In the mean length computation, ignore dropped texts. */
-		totlen += entry->query_len + 1;
+		if (entry->query_len >= 0)
 		{
 			tottextlen += entry->query_len + 1;
 			nvalidtexts++;
 		}
 	}
 	/* Sort into increasing order by usage */
 	qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
 	if (i > 0)
 	{
 	/* Record the (approximate) median usage */
 	if (i > 0)
 		pgss->cur_median_usage = entries[i / 2]->counters.usage;
 	/* Record the mean query length */
-		pgss->mean_query_len = totlen / i;
+	if (nvalidtexts > 0)
-	}
+		pgss->mean_query_len = tottextlen / nvalidtexts;
 	else
 		pgss->mean_query_len = ASSUMED_LENGTH_INIT;
 	/* Now zap an appropriate fraction of lowest-usage entries */
 	nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
 	nvictims = Min(nvictims, i);
@ -1892,7 +1910,7 @@ qtext_load_file(Size *buffer_size)
 	}
 	/* Allocate buffer; beware that off_t might be wider than size_t */
-	if (stat.st_size <= MaxAllocSize)
+	if (stat.st_size <= MaxAllocHugeSize)
 		buf = (char *) malloc(stat.st_size);
 	else
 		buf = NULL;
@ -1900,7 +1918,9 @@ qtext_load_file(Size *buffer_size)
 	{
 		ereport(LOG,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
-				 errmsg("out of memory")));
+				 errmsg("out of memory"),
 				 errdetail("Could not allocate enough memory to read pg_stat_statement file \"%s\".",
 						   PGSS_TEXT_FILE)));
 		CloseTransientFile(fd);
 		return NULL;
 	}
@ -2002,13 +2022,17 @@ need_gc_qtexts(void)
 * occur in the foreseeable future.
 *
 * The caller must hold an exclusive lock on pgss->lock.
 *
 * At the first sign of trouble we unlink the query text file to get a clean
 * slate (although existing statistics are retained), rather than risk
 * thrashing by allowing the same problem case to recur indefinitely.
 */
 static void
 gc_qtexts(void)
 {
 	char	   *qbuffer;
 	Size		qbuffer_size;
-	FILE	   *qfile;
+	FILE	   *qfile = NULL;
 	HASH_SEQ_STATUS hash_seq;
 	pgssEntry  *entry;
 	Size		extent;
@ -2023,12 +2047,15 @@ gc_qtexts(void)
 		return;
 	/*
-	 * Load the old texts file.  If we fail (out of memory, for instance) just
+	 * Load the old texts file.  If we fail (out of memory, for instance),
-	 * skip the garbage collection.
+	 * invalidate query texts.  Hopefully this is rare.  It might seem better
 	 * to leave things alone on an OOM failure, but the problem is that the
 	 * file is only going to get bigger; hoping for a future non-OOM result is
 	 * risky and can easily lead to complete denial of service.
 	 */
 	qbuffer = qtext_load_file(&qbuffer_size);
 	if (qbuffer == NULL)
-		return;
+		goto gc_fail;
 	/*
 	 * We overwrite the query texts file in place, so as to reduce the risk of
@ -2063,6 +2090,7 @@ gc_qtexts(void)
 			/* Trouble ... drop the text */
 			entry->query_offset = 0;
 			entry->query_len = -1;
 			/* entry will not be counted in mean query length computation */
 			continue;
 		}
@ -2147,7 +2175,36 @@ gc_fail:
 		entry->query_len = -1;
 	}
-	/* Seems like a good idea to bump the GC count even though we failed */
+	/*
 	 * Destroy the query text file and create a new, empty one
 	 */
 	(void) unlink(PGSS_TEXT_FILE);
 	qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
 	if (qfile == NULL)
 		ereport(LOG,
 				(errcode_for_file_access(),
 			  errmsg("could not write new pg_stat_statement file \"%s\": %m",
 					 PGSS_TEXT_FILE)));
 	else
 		FreeFile(qfile);
 	/* Reset the shared extent pointer */
 	pgss->extent = 0;
 	/* Reset mean_query_len to match the new state */
 	pgss->mean_query_len = ASSUMED_LENGTH_INIT;
 	/*
 	 * Bump the GC count even though we failed.
 	 *
 	 * This is needed to make concurrent readers of file without any lock on
 	 * pgss->lock notice existence of new version of file.  Once readers
 	 * subsequently observe a change in GC count with pgss->lock held, that
 	 * forces a safe reopen of file.  Writers also require that we bump here,
 	 * of course.  (As required by locking protocol, readers and writers don't
 	 * trust earlier file contents until gc_count is found unchanged after
 	 * pgss->lock acquired in shared or exclusive mode respectively.)
 	 */
 	record_gc_qtexts();
 }