aio: Add pg_aios view

The new view lists all IO handles that are currently in use and is mainly
useful for PG developers, but may also be useful when tuning PG.

Bumps catversion.

Reviewed-by: Noah Misch <noah@leadboat.com>
Discussion: https://postgr.es/m/uvrtrknj4kdytuboidbhwclo4gxhswwcpgadptsjvjqcluzmah%40brqs62irg4dt
pull/208/head
Andres Freund 3 months ago
parent 46250cdcb0
commit 60f566b4f2
  1. 294
      doc/src/sgml/system-views.sgml
  2. 7
      src/backend/catalog/system_views.sql
  3. 1
      src/backend/storage/aio/Makefile
  4. 230
      src/backend/storage/aio/aio_funcs.c
  5. 1
      src/backend/storage/aio/meson.build
  6. 2
      src/include/catalog/catversion.h
  7. 9
      src/include/catalog/pg_proc.dat
  8. 18
      src/test/regress/expected/privileges.out
  9. 16
      src/test/regress/expected/rules.out
  10. 3
      src/test/regress/sql/privileges.sql

@ -51,6 +51,11 @@
</thead>
<tbody>
<row>
<entry><link linkend="view-pg-aios"><structname>pg_aios</structname></link></entry>
<entry>In-use asynchronous IO handles</entry>
</row>
<row>
<entry><link linkend="view-pg-available-extensions"><structname>pg_available_extensions</structname></link></entry>
<entry>available extensions</entry>
@ -231,6 +236,295 @@
</table>
</sect1>
<sect1 id="view-pg-aios">
<title><structname>pg_aios</structname></title>
<indexterm zone="view-pg-aios">
<primary>pg_aios</primary>
</indexterm>
<para>
The <structname>pg_aios</structname> view lists all <xref
linkend="glossary-aio"/> handles that are currently in-use. An I/O handle
is used to reference an I/O operation that is being prepared, executed or
is in the process of completing. <structname>pg_aios</structname> contains
one row for each I/O handle.
</para>
<para>
This view is mainly useful for developers of
<productname>PostgreSQL</productname>, but may also be useful when tuning
<productname>PostgreSQL</productname>.
</para>
<table>
<title><structname>pg_aios</structname> Columns</title>
<tgroup cols="1">
<thead>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
Column Type
</para>
<para>
Description
</para></entry>
</row>
</thead>
<tbody>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>pid</structfield> <type>int4</type>
</para>
<para>
Process ID of the server process that is issuing this I/O.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>io_id</structfield> <type>int4</type>
</para>
<para>
Identifier of the I/O handle. Handles are reused once the I/O
completed (or if the handle is released before I/O is started). On reuse
<link linkend="view-pg-aios-io-generation">
<structname>pg_aios</structname>.<structfield>io_generation</structfield>
</link>
is incremented.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry" id="view-pg-aios-io-generation"><para role="column_definition">
<structfield>io_generation</structfield> <type>int8</type>
</para>
<para>
Generation of the I/O handle.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>state</structfield> <type>text</type>
</para>
<para>
State of the I/O handle:
<itemizedlist>
<listitem>
<para>
<literal>HANDED_OUT</literal>, referenced by code but not yet used
</para>
</listitem>
<listitem>
<para>
<literal>DEFINED</literal>, information necessary for execution is known
</para>
</listitem>
<listitem>
<para>
<literal>STAGED</literal>, ready for execution
</para>
</listitem>
<listitem>
<para>
<literal>SUBMITTED</literal>, submitted for execution
</para>
</listitem>
<listitem>
<para>
<literal>COMPLETED_IO</literal>, finished, but result has not yet been processed
</para>
</listitem>
<listitem>
<para>
<literal>COMPLETED_SHARED</literal>, shared completion processing completed
</para>
</listitem>
<listitem>
<para>
<literal>COMPLETED_LOCAL</literal>, backend local completion processing completed
</para>
</listitem>
</itemizedlist>
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>operation</structfield> <type>text</type>
</para>
<para>
Operation performed using the I/O handle:
<itemizedlist>
<listitem>
<para>
<literal>invalid</literal>, not yet known
</para>
</listitem>
<listitem>
<para>
<literal>readv</literal>, a vectored read
</para>
</listitem>
<listitem>
<para>
<literal>writev</literal>, a vectored write
</para>
</listitem>
</itemizedlist>
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>off</structfield> <type>int8</type>
</para>
<para>
Offset of the I/O operation.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>length</structfield> <type>int8</type>
</para>
<para>
Length of the I/O operation.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>target</structfield> <type>text</type>
</para>
<para>
What kind of object is the I/O targeting:
<itemizedlist spacing="compact">
<listitem>
<para>
<literal>smgr</literal>, I/O on relations
</para>
</listitem>
</itemizedlist>
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>handle_data_len</structfield> <type>int2</type>
</para>
<para>
Length of the data associated with the I/O operation. For I/O to/from
<xref linkend="guc-shared-buffers"/> and <xref
linkend="guc-temp-buffers"/>, this indicates the number of buffers the
I/O is operating on.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>raw_result</structfield> <type>int4</type>
</para>
<para>
Low-level result of the I/O operation, or NULL if the operation has not
yet completed.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>result</structfield> <type>text</type>
</para>
<para>
High-level result of the I/O operation:
<itemizedlist>
<listitem>
<para>
<literal>UNKNOWN</literal> means that the result of the
operation is not yet known.
</para>
</listitem>
<listitem>
<para>
<literal>OK</literal> means the I/O completed successfully.
</para>
</listitem>
<listitem>
<para>
<literal>PARTIAL</literal> means that the I/O completed without
error, but did not process all data. Commonly callers will need to
retry and perform the remainder of the work in a separate I/O.
</para>
</listitem>
<listitem>
<para>
<literal>WARNING</literal> means that the I/O completed without
error, but that execution of the IO triggered a warning. E.g. when
encountering a corrupted buffer with <xref
linkend="guc-zero-damaged-pages"/> enabled.
</para>
</listitem>
<listitem>
<para>
<literal>ERROR</literal> means the I/O failed with an error.
</para>
</listitem>
</itemizedlist>
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>target_desc</structfield> <type>text</type>
</para>
<para>
Description of what the I/O operation is targeting.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>f_sync</structfield> <type>bool</type>
</para>
<para>
Flag indicating whether the I/O is executed synchronously.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>f_localmem</structfield> <type>bool</type>
</para>
<para>
Flag indicating whether the I/O references process local memory.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>f_buffered</structfield> <type>bool</type>
</para>
<para>
Flag indicating whether the I/O is buffered I/O.
</para></entry>
</row>
</tbody>
</tgroup>
</table>
<para>
The <structname>pg_aios</structname> view is read-only.
</para>
<para>
By default, the <structname>pg_aios</structname> view can be read only by
superusers or roles with privileges of the
<literal>pg_read_all_stats</literal> role.
</para>
</sect1>
<sect1 id="view-pg-available-extensions">
<title><structname>pg_available_extensions</structname></title>

@ -1391,3 +1391,10 @@ CREATE VIEW pg_stat_subscription_stats AS
CREATE VIEW pg_wait_events AS
SELECT * FROM pg_get_wait_events();
CREATE VIEW pg_aios AS
SELECT * FROM pg_get_aios();
REVOKE ALL ON pg_aios FROM PUBLIC;
GRANT SELECT ON pg_aios TO pg_read_all_stats;
REVOKE EXECUTE ON FUNCTION pg_get_aios() FROM PUBLIC;
GRANT EXECUTE ON FUNCTION pg_get_aios() TO pg_read_all_stats;

@ -11,6 +11,7 @@ include $(top_builddir)/src/Makefile.global
OBJS = \
aio.o \
aio_callback.o \
aio_funcs.o \
aio_init.o \
aio_io.o \
aio_target.o \

@ -0,0 +1,230 @@
/*-------------------------------------------------------------------------
*
* aio_funcs.c
* AIO - SQL interface for AIO
*
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/storage/aio/aio_funcs.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "funcapi.h"
#include "nodes/execnodes.h"
#include "port/atomics.h"
#include "storage/aio_internal.h"
#include "storage/lock.h"
#include "storage/proc.h"
#include "storage/procnumber.h"
#include "utils/builtins.h"
#include "utils/fmgrprotos.h"
#include "utils/tuplestore.h"
/*
* Byte length of an iovec.
*/
static size_t
iov_byte_length(const struct iovec *iov, int cnt)
{
size_t len = 0;
for (int i = 0; i < cnt; i++)
{
len += iov[i].iov_len;
}
return len;
}
Datum
pg_get_aios(PG_FUNCTION_ARGS)
{
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
InitMaterializedSRF(fcinfo, 0);
#define PG_GET_AIOS_COLS 15
for (uint64 i = 0; i < pgaio_ctl->io_handle_count; i++)
{
PgAioHandle *live_ioh = &pgaio_ctl->io_handles[i];
uint32 ioh_id = pgaio_io_get_id(live_ioh);
Datum values[PG_GET_AIOS_COLS] = {0};
bool nulls[PG_GET_AIOS_COLS] = {0};
ProcNumber owner;
PGPROC *owner_proc;
int32 owner_pid;
PgAioHandleState start_state;
uint64 start_generation;
PgAioHandle ioh_copy;
struct iovec iov_copy[PG_IOV_MAX];
/*
* There is no lock that could prevent the state of the IO to advance
* concurrently - and we don't want to introduce one, as that would
* introduce atomics into a very common path. Instead we
*
* 1) Determine the state + generation of the IO.
*
* 2) Copy the IO to local memory.
*
* 3) Check if state or generation of the IO changed. If the state
* changed, retry, if the generation changed don't display the IO.
*/
/* 1) from above */
start_generation = live_ioh->generation;
/*
* Retry at this point, so we can accept changing states, but not
* changing generations.
*/
retry:
pg_read_barrier();
start_state = live_ioh->state;
if (start_state == PGAIO_HS_IDLE)
continue;
/* 2) from above */
memcpy(&ioh_copy, live_ioh, sizeof(PgAioHandle));
/*
* Safe to copy even if no iovec is used - we always reserve the
* required space.
*/
memcpy(&iov_copy, &pgaio_ctl->iovecs[ioh_copy.iovec_off],
PG_IOV_MAX * sizeof(struct iovec));
/*
* Copy information about owner before 3) below, if the process exited
* it'd have to wait for the IO to finish first, which we would detect
* in 3).
*/
owner = ioh_copy.owner_procno;
owner_proc = GetPGProcByNumber(owner);
owner_pid = owner_proc->pid;
/* 3) from above */
pg_read_barrier();
/*
* The IO completed and a new one was started with the same ID. Don't
* display it - it really started after this function was called.
* There be a risk of a livelock if we just retried endlessly, if IOs
* complete very quickly.
*/
if (live_ioh->generation != start_generation)
continue;
/*
* The IO's state changed while we were "rendering" it. Just start
* from scratch. There's no risk of a livelock here, as an IO has a
* limited sets of states it can be in, and state changes go only in a
* single direction.
*/
if (live_ioh->state != start_state)
goto retry;
/*
* Now that we have copied the IO into local memory and checked that
* it's still in the same state, we are not allowed to access "live"
* memory anymore. To make it slightly easier to catch such cases, set
* the "live" pointers to NULL.
*/
live_ioh = NULL;
owner_proc = NULL;
/* column: owning pid */
if (owner_pid != 0)
values[0] = Int32GetDatum(owner_pid);
else
nulls[0] = false;
/* column: IO's id */
values[1] = ioh_id;
/* column: IO's generation */
values[2] = Int64GetDatum(start_generation);
/* column: IO's state */
values[3] = CStringGetTextDatum(pgaio_io_get_state_name(&ioh_copy));
/*
* If the IO is in PGAIO_HS_HANDED_OUT state, none of the following
* fields are valid yet (or are in the process of being set).
* Therefore we don't want to display any other columns.
*/
if (start_state == PGAIO_HS_HANDED_OUT)
{
memset(nulls + 4, 1, (lengthof(nulls) - 4) * sizeof(bool));
goto display;
}
/* column: IO's operation */
values[4] = CStringGetTextDatum(pgaio_io_get_op_name(&ioh_copy));
/* columns: details about the IO's operation (offset, length) */
switch (ioh_copy.op)
{
case PGAIO_OP_INVALID:
nulls[5] = true;
nulls[6] = true;
break;
case PGAIO_OP_READV:
values[5] = Int64GetDatum(ioh_copy.op_data.read.offset);
values[6] =
Int64GetDatum(iov_byte_length(iov_copy, ioh_copy.op_data.read.iov_length));
break;
case PGAIO_OP_WRITEV:
values[5] = Int64GetDatum(ioh_copy.op_data.write.offset);
values[6] =
Int64GetDatum(iov_byte_length(iov_copy, ioh_copy.op_data.write.iov_length));
break;
}
/* column: IO's target */
values[7] = CStringGetTextDatum(pgaio_io_get_target_name(&ioh_copy));
/* column: length of IO's data array */
values[8] = Int16GetDatum(ioh_copy.handle_data_len);
/* column: raw result (i.e. some form of syscall return value) */
if (start_state == PGAIO_HS_COMPLETED_IO
|| start_state == PGAIO_HS_COMPLETED_SHARED
|| start_state == PGAIO_HS_COMPLETED_LOCAL)
values[9] = Int32GetDatum(ioh_copy.result);
else
nulls[9] = true;
/*
* column: result in the higher level representation (unknown if not
* finished)
*/
values[10] =
CStringGetTextDatum(pgaio_result_status_string(ioh_copy.distilled_result.status));
/* column: target description */
values[11] = CStringGetTextDatum(pgaio_io_get_target_description(&ioh_copy));
/* columns: one for each flag */
values[12] = BoolGetDatum(ioh_copy.flags & PGAIO_HF_SYNCHRONOUS);
values[13] = BoolGetDatum(ioh_copy.flags & PGAIO_HF_REFERENCES_LOCAL);
values[14] = BoolGetDatum(ioh_copy.flags & PGAIO_HF_BUFFERED);
display:
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
}
return (Datum) 0;
}

@ -3,6 +3,7 @@
backend_sources += files(
'aio.c',
'aio_callback.c',
'aio_funcs.c',
'aio_init.c',
'aio_io.c',
'aio_target.c',

@ -57,6 +57,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202503262
#define CATALOG_VERSION_NO 202504011
#endif

@ -12493,4 +12493,13 @@
proargtypes => 'int4',
prosrc => 'gist_stratnum_common' },
# AIO related functions
{ oid => '9200', descr => 'information about in-progress asynchronous IOs',
proname => 'pg_get_aios', prorows => '100', proretset => 't',
provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => '',
proallargtypes => '{int4,int4,int8,text,text,int8,int8,text,int2,int4,text,text,bool,bool,bool}',
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
proargnames => '{pid,io_id,io_generation,state,operation,off,length,target,handle_data_len,raw_result,result,target_desc,f_sync,f_localmem,f_buffered}',
prosrc => 'pg_get_aios' },
]

@ -3132,6 +3132,12 @@ DROP USER regress_locktable_user;
-- switch to superuser
\c -
CREATE ROLE regress_readallstats;
SELECT has_table_privilege('regress_readallstats','pg_aios','SELECT'); -- no
has_table_privilege
---------------------
f
(1 row)
SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no
has_table_privilege
---------------------
@ -3145,6 +3151,12 @@ SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT
(1 row)
GRANT pg_read_all_stats TO regress_readallstats;
SELECT has_table_privilege('regress_readallstats','pg_aios','SELECT'); -- yes
has_table_privilege
---------------------
t
(1 row)
SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- yes
has_table_privilege
---------------------
@ -3159,6 +3171,12 @@ SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT
-- run query to ensure that functions within views can be executed
SET ROLE regress_readallstats;
SELECT COUNT(*) >= 0 AS ok FROM pg_aios;
ok
----
t
(1 row)
SELECT COUNT(*) >= 0 AS ok FROM pg_backend_memory_contexts;
ok
----

@ -1286,6 +1286,22 @@ drop table cchild;
SELECT viewname, definition FROM pg_views
WHERE schemaname = 'pg_catalog'
ORDER BY viewname;
pg_aios| SELECT pid,
io_id,
io_generation,
state,
operation,
off,
length,
target,
handle_data_len,
raw_result,
result,
target_desc,
f_sync,
f_localmem,
f_buffered
FROM pg_get_aios() pg_get_aios(pid, io_id, io_generation, state, operation, off, length, target, handle_data_len, raw_result, result, target_desc, f_sync, f_localmem, f_buffered);
pg_available_extension_versions| SELECT e.name,
e.version,
(x.extname IS NOT NULL) AS installed,

@ -1919,16 +1919,19 @@ DROP USER regress_locktable_user;
CREATE ROLE regress_readallstats;
SELECT has_table_privilege('regress_readallstats','pg_aios','SELECT'); -- no
SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no
SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- no
GRANT pg_read_all_stats TO regress_readallstats;
SELECT has_table_privilege('regress_readallstats','pg_aios','SELECT'); -- yes
SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- yes
SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- yes
-- run query to ensure that functions within views can be executed
SET ROLE regress_readallstats;
SELECT COUNT(*) >= 0 AS ok FROM pg_aios;
SELECT COUNT(*) >= 0 AS ok FROM pg_backend_memory_contexts;
SELECT COUNT(*) >= 0 AS ok FROM pg_shmem_allocations;
RESET ROLE;

Loading…
Cancel
Save