Add file_extend_method=posix_fallocate,write_zeros.

Provide a way to disable the use of posix_fallocate() for relation
files.  It was introduced by commit 4d330a61bb.  The new setting
file_extend_method=write_zeros can be used as a workaround for problems
reported from the field:

 * BTRFS compression is disabled by the use of posix_fallocate()
 * XFS could produce spurious ENOSPC errors in some Linux kernel
   versions, though that problem is reported to have been fixed

The default is file_extend_method=posix_fallocate if available, as
before.  The write_zeros option is similar to PostgreSQL < 16, except
that now it's multi-block.

Backpatch-through: 16
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reported-by: Dimitrios Apostolou <jimis@gmx.net>
Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net
pull/271/head
Thomas Munro 9 months ago
parent e35add48cc
commit f94e9141a0
  1. 37
      doc/src/sgml/config.sgml
  2. 3
      src/backend/storage/file/fd.c
  3. 21
      src/backend/storage/smgr/md.c
  4. 7
      src/backend/utils/misc/guc_parameters.dat
  5. 9
      src/backend/utils/misc/guc_tables.c
  6. 4
      src/backend/utils/misc/postgresql.conf.sample
  7. 11
      src/include/storage/fd.h

@ -2412,6 +2412,43 @@ include_dir 'conf.d'
</listitem>
</varlistentry>
<varlistentry id="guc-file-extend-method" xreflabel="file_extend_method">
<term><varname>file_extend_method</varname> (<type>enum</type>)
<indexterm>
<primary><varname>file_extend_method</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Specifies the method used to extend data files during bulk operations
such as <command>COPY</command>. The first available option is used as
the default, depending on the operating system:
<itemizedlist>
<listitem>
<para>
<literal>posix_fallocate</literal> (Unix) uses the standard POSIX
interface for allocating disk space, but is missing on some systems.
If it is present but the underlying file system doesn't support it,
this option silently falls back to <literal>write_zeros</literal>.
Current versions of BTRFS are known to disable compression when
this option is used.
This is the default on systems that have the function.
</para>
</listitem>
<listitem>
<para>
<literal>write_zeros</literal> extends files by writing out blocks
of zero bytes. This is the default on systems that don't have the
function <function>posix_fallocate</function>.
</para>
</listitem>
</itemizedlist>
The <literal>write_zeros</literal> method is always used when data
files are extended by 8 blocks or fewer.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-max-notify-queue-pages" xreflabel="max_notify_queue_pages">
<term><varname>max_notify_queue_pages</varname> (<type>integer</type>)
<indexterm>

@ -164,6 +164,9 @@ bool data_sync_retry = false;
/* How SyncDataDirectory() should do its job. */
int recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
/* How data files should be bulk-extended with zeros. */
int file_extend_method = DEFAULT_FILE_EXTEND_METHOD;
/* Which kinds of files should be opened with PG_O_DIRECT. */
int io_direct_flags;

@ -602,13 +602,24 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
* that decision should be made though? For now just use a cutoff of
* 8, anything between 4 and 8 worked OK in some local testing.
*/
if (numblocks > 8)
if (numblocks > 8 &&
file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS)
{
int ret;
int ret = 0;
ret = FileFallocate(v->mdfd_vfd,
seekpos, (pgoff_t) BLCKSZ * numblocks,
WAIT_EVENT_DATA_FILE_EXTEND);
#ifdef HAVE_POSIX_FALLOCATE
if (file_extend_method == FILE_EXTEND_METHOD_POSIX_FALLOCATE)
{
ret = FileFallocate(v->mdfd_vfd,
seekpos, (pgoff_t) BLCKSZ * numblocks,
WAIT_EVENT_DATA_FILE_EXTEND);
}
else
#endif
{
elog(ERROR, "unsupported file_extend_method: %d",
file_extend_method);
}
if (ret != 0)
{
ereport(ERROR,

@ -1042,6 +1042,13 @@
options => 'file_copy_method_options',
},
{ name => 'file_extend_method', type => 'enum', context => 'PGC_SIGHUP', group => 'RESOURCES_DISK',
short_desc => 'Selects the method used for extending data files.',
variable => 'file_extend_method',
boot_val => 'DEFAULT_FILE_EXTEND_METHOD',
options => 'file_extend_method_options',
},
{ name => 'from_collapse_limit', type => 'int', context => 'PGC_USERSET', group => 'QUERY_TUNING_OTHER',
short_desc => 'Sets the FROM-list size beyond which subqueries are not collapsed.',
long_desc => 'The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items.',

@ -80,6 +80,7 @@
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/copydir.h"
#include "storage/fd.h"
#include "storage/io_worker.h"
#include "storage/large_object.h"
#include "storage/pg_shmem.h"
@ -491,6 +492,14 @@ static const struct config_enum_entry file_copy_method_options[] = {
{NULL, 0, false}
};
static const struct config_enum_entry file_extend_method_options[] = {
#ifdef HAVE_POSIX_FALLOCATE
{"posix_fallocate", FILE_EXTEND_METHOD_POSIX_FALLOCATE, false},
#endif
{"write_zeros", FILE_EXTEND_METHOD_WRITE_ZEROS, false},
{NULL, 0, false}
};
/*
* Options for enum values stored in other modules
*/

@ -179,6 +179,10 @@
# in kilobytes, or -1 for no limit
#file_copy_method = copy # copy, clone (if supported by OS)
#file_extend_method = posix_fallocate # the default is the first option supported
# by the operating system:
# posix_fallocate (most Unix-like systems)
# write_zeros
#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated
# for NOTIFY / LISTEN queue

@ -55,12 +55,23 @@ typedef int File;
#define IO_DIRECT_WAL 0x02
#define IO_DIRECT_WAL_INIT 0x04
enum FileExtendMethod
{
#ifdef HAVE_POSIX_FALLOCATE
FILE_EXTEND_METHOD_POSIX_FALLOCATE,
#endif
FILE_EXTEND_METHOD_WRITE_ZEROS,
};
/* Default to the first available file_extend_method. */
#define DEFAULT_FILE_EXTEND_METHOD 0
/* GUC parameter */
extern PGDLLIMPORT int max_files_per_process;
extern PGDLLIMPORT bool data_sync_retry;
extern PGDLLIMPORT int recovery_init_sync_method;
extern PGDLLIMPORT int io_direct_flags;
extern PGDLLIMPORT int file_extend_method;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()

Loading…
Cancel
Save