aio: Stop using enum bitfields due to bad code generation

During an investigation into rather odd aio related errors on macos, observed
by Alexander and Konstantin, we started to wonder if bitfield access is
related to the error. At the moment it looks like it is related, we cannot
reproduce the failures when replacing the bitfields. In addition, the problem
can only be reproduced with some compiler [versions] and not everyone has been
able to reproduce the issue.

The observed problem is that, very rarely, PgAioHandle->{state,target} are in
an inconsistent state, after having been checked to be in a valid state not
long before, triggering an assertion failure. Unfortunately, this could be
caused by wrong compiler code generation or somehow of missing memory barriers
- we don't really know. In theory there should not be any concurrent write
access to the handle in the state the bug is triggered, as the handle was idle
and is just being initialized.

Separately from the bug, we observed that at least gcc and clang generate
rather terrible code for the bitfield access. Even if it's not clear if the
observed assertion failure is actually caused by the bitfield somehow, the bad
code generation alone is sufficient reason to stop using bitfields.

Therefore, replace the enum bitfields with uint8s and instead cast in each
switch statement.

Reported-by: Alexander Lakhin <exclusion@gmail.com>
Reported-by: Konstantin Knizhnik <knizhnik@garret.ru>
Discussion: https://postgr.es/m/1500090.1745443021@sss.pgh.pa.us
Backpatch-through: 18
master
Andres Freund 2 weeks ago
parent 310d04169a
commit 5865150b6d
  1. 10
      src/backend/storage/aio/aio.c
  2. 2
      src/backend/storage/aio/aio_funcs.c
  3. 8
      src/backend/storage/aio/aio_io.c
  4. 2
      src/backend/storage/aio/method_io_uring.c
  5. 14
      src/include/storage/aio_internal.h

@ -275,7 +275,7 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
ResourceOwnerForgetAioHandle(ioh->resowner, &ioh->resowner_node);
ioh->resowner = NULL;
switch (ioh->state)
switch ((PgAioHandleState) ioh->state)
{
case PGAIO_HS_IDLE:
elog(ERROR, "unexpected");
@ -600,7 +600,7 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
if (pgaio_io_was_recycled(ioh, ref_generation, &state))
return;
switch (state)
switch ((PgAioHandleState) state)
{
case PGAIO_HS_IDLE:
case PGAIO_HS_HANDED_OUT:
@ -825,7 +825,7 @@ pgaio_io_wait_for_free(void)
&pgaio_my_backend->in_flight_ios);
uint64 generation = ioh->generation;
switch (ioh->state)
switch ((PgAioHandleState) ioh->state)
{
/* should not be in in-flight list */
case PGAIO_HS_IDLE:
@ -905,7 +905,7 @@ static const char *
pgaio_io_state_get_name(PgAioHandleState s)
{
#define PGAIO_HS_TOSTR_CASE(sym) case PGAIO_HS_##sym: return #sym
switch (s)
switch ((PgAioHandleState) s)
{
PGAIO_HS_TOSTR_CASE(IDLE);
PGAIO_HS_TOSTR_CASE(HANDED_OUT);
@ -930,7 +930,7 @@ pgaio_io_get_state_name(PgAioHandle *ioh)
const char *
pgaio_result_status_string(PgAioResultStatus rs)
{
switch (rs)
switch ((PgAioResultStatus) rs)
{
case PGAIO_RS_UNKNOWN:
return "UNKNOWN";

@ -175,7 +175,7 @@ retry:
values[4] = CStringGetTextDatum(pgaio_io_get_op_name(&ioh_copy));
/* columns: details about the IO's operation (offset, length) */
switch (ioh_copy.op)
switch ((PgAioOp) ioh_copy.op)
{
case PGAIO_OP_INVALID:
nulls[5] = true;

@ -121,7 +121,7 @@ pgaio_io_perform_synchronously(PgAioHandle *ioh)
START_CRIT_SECTION();
/* Perform IO. */
switch (ioh->op)
switch ((PgAioOp) ioh->op)
{
case PGAIO_OP_READV:
pgstat_report_wait_start(WAIT_EVENT_DATA_FILE_READ);
@ -176,7 +176,7 @@ pgaio_io_get_op_name(PgAioHandle *ioh)
{
Assert(ioh->op >= 0 && ioh->op < PGAIO_OP_COUNT);
switch (ioh->op)
switch ((PgAioOp) ioh->op)
{
case PGAIO_OP_INVALID:
return "invalid";
@ -198,7 +198,7 @@ pgaio_io_uses_fd(PgAioHandle *ioh, int fd)
{
Assert(ioh->state >= PGAIO_HS_DEFINED);
switch (ioh->op)
switch ((PgAioOp) ioh->op)
{
case PGAIO_OP_READV:
return ioh->op_data.read.fd == fd;
@ -222,7 +222,7 @@ pgaio_io_get_iovec_length(PgAioHandle *ioh, struct iovec **iov)
*iov = &pgaio_ctl->iovecs[ioh->iovec_off];
switch (ioh->op)
switch ((PgAioOp) ioh->op)
{
case PGAIO_OP_READV:
return ioh->op_data.read.iov_length;

@ -660,7 +660,7 @@ pgaio_uring_sq_from_io(PgAioHandle *ioh, struct io_uring_sqe *sqe)
{
struct iovec *iov;
switch (ioh->op)
switch ((PgAioOp) ioh->op)
{
case PGAIO_OP_READV:
iov = &pgaio_ctl->iovecs[ioh->iovec_off];

@ -92,17 +92,23 @@ typedef enum PgAioHandleState
struct ResourceOwnerData;
/* typedef is in aio_types.h */
/*
* Typedef is in aio_types.h
*
* We don't use the underlying enums for state, target and op to avoid wasting
* space. We tried using bitfields, but several compilers generate rather
* horrid code for that.
*/
struct PgAioHandle
{
/* all state updates should go through pgaio_io_update_state() */
PgAioHandleState state:8;
uint8 state;
/* what are we operating on */
PgAioTargetID target:8;
uint8 target;
/* which IO operation */
PgAioOp op:8;
uint8 op;
/* bitfield of PgAioHandleFlags */
uint8 flags;

Loading…
Cancel
Save