@ -184,6 +184,8 @@ pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
PgAioHandle *
pgaio_io_acquire_nb ( struct ResourceOwnerData * resowner , PgAioReturn * ret )
{
PgAioHandle * ioh = NULL ;
if ( pgaio_my_backend - > num_staged_ios > = PGAIO_SUBMIT_BATCH_SIZE )
{
Assert ( pgaio_my_backend - > num_staged_ios = = PGAIO_SUBMIT_BATCH_SIZE ) ;
@ -193,10 +195,17 @@ pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
if ( pgaio_my_backend - > handed_out_io )
elog ( ERROR , " API violation: Only one IO can be handed out " ) ;
/*
* Probably not needed today , as interrupts should not process this IO ,
* but . . .
*/
HOLD_INTERRUPTS ( ) ;
if ( ! dclist_is_empty ( & pgaio_my_backend - > idle_ios ) )
{
dlist_node * ion = dclist_pop_head_node ( & pgaio_my_backend - > idle_ios ) ;
PgAioHandle * ioh = dclist_container ( PgAioHandle , node , ion ) ;
ioh = dclist_container ( PgAioHandle , node , ion ) ;
Assert ( ioh - > state = = PGAIO_HS_IDLE ) ;
Assert ( ioh - > owner_procno = = MyProcNumber ) ;
@ -212,11 +221,11 @@ pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
ioh - > report_return = ret ;
ret - > result . status = PGAIO_RS_UNKNOWN ;
}
return ioh ;
}
return NULL ;
RESUME_INTERRUPTS ( ) ;
return ioh ;
}
/*
@ -233,6 +242,12 @@ pgaio_io_release(PgAioHandle *ioh)
Assert ( ioh - > resowner ) ;
pgaio_my_backend - > handed_out_io = NULL ;
/*
* Note that no interrupts are processed between the handed_out_io
* check and the call to reclaim - that ' s important as otherwise an
* interrupt could have already reclaimed the handle .
*/
pgaio_io_reclaim ( ioh ) ;
}
else
@ -251,6 +266,12 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
Assert ( ioh - > resowner ) ;
/*
* Otherwise an interrupt , in the middle of releasing the IO , could end up
* trying to wait for the IO , leading to state confusion .
*/
HOLD_INTERRUPTS ( ) ;
ResourceOwnerForgetAioHandle ( ioh - > resowner , & ioh - > resowner_node ) ;
ioh - > resowner = NULL ;
@ -291,6 +312,8 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
*/
if ( ioh - > report_return )
ioh - > report_return = NULL ;
RESUME_INTERRUPTS ( ) ;
}
/*
@ -359,6 +382,13 @@ pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
static inline void
pgaio_io_update_state ( PgAioHandle * ioh , PgAioHandleState new_state )
{
/*
* All callers need to have held interrupts in some form , otherwise
* interrupt processing could wait for the IO to complete , while in an
* intermediary state .
*/
Assert ( ! INTERRUPTS_CAN_BE_PROCESSED ( ) ) ;
pgaio_debug_io ( DEBUG5 , ioh ,
" updating state to %s " ,
pgaio_io_state_get_name ( new_state ) ) ;
@ -396,6 +426,13 @@ pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
Assert ( pgaio_my_backend - > handed_out_io = = ioh ) ;
Assert ( pgaio_io_has_target ( ioh ) ) ;
/*
* Otherwise an interrupt , in the middle of staging and possibly executing
* the IO , could end up trying to wait for the IO , leading to state
* confusion .
*/
HOLD_INTERRUPTS ( ) ;
ioh - > op = op ;
ioh - > result = 0 ;
@ -435,6 +472,8 @@ pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
pgaio_io_prepare_submit ( ioh ) ;
pgaio_io_perform_synchronously ( ioh ) ;
}
RESUME_INTERRUPTS ( ) ;
}
bool
@ -544,8 +583,8 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
& & state ! = PGAIO_HS_COMPLETED_SHARED
& & state ! = PGAIO_HS_COMPLETED_LOCAL )
{
elog ( PANIC , " waiting for own IO in wrong state: %d " ,
state ) ;
elog ( PANIC , " waiting for own IO %d in wrong state: %s " ,
pgaio_io_get_id ( ioh ) , pgaio_io_get_ state_name ( ioh ) ) ;
}
}
@ -599,7 +638,13 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
case PGAIO_HS_COMPLETED_SHARED :
case PGAIO_HS_COMPLETED_LOCAL :
/* see above */
/*
* Note that no interrupts are processed between
* pgaio_io_was_recycled ( ) and this check - that ' s important
* as otherwise an interrupt could have already reclaimed the
* handle .
*/
if ( am_owner )
pgaio_io_reclaim ( ioh ) ;
return ;
@ -610,6 +655,11 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
/*
* Make IO handle ready to be reused after IO has completed or after the
* handle has been released without being used .
*
* Note that callers need to be careful about only calling this in the right
* state and that no interrupts can be processed between the state check and
* the call to pgaio_io_reclaim ( ) . Otherwise interrupt processing could
* already have reclaimed the handle .
*/
static void
pgaio_io_reclaim ( PgAioHandle * ioh )
@ -618,6 +668,9 @@ pgaio_io_reclaim(PgAioHandle *ioh)
Assert ( ioh - > owner_procno = = MyProcNumber ) ;
Assert ( ioh - > state ! = PGAIO_HS_IDLE ) ;
/* see comment in function header */
HOLD_INTERRUPTS ( ) ;
/*
* It ' s a bit ugly , but right now the easiest place to put the execution
* of local completion callbacks is this function , as we need to execute
@ -685,6 +738,8 @@ pgaio_io_reclaim(PgAioHandle *ioh)
* efficient in cases where only a few IOs are used .
*/
dclist_push_head ( & pgaio_my_backend - > idle_ios , & ioh - > node ) ;
RESUME_INTERRUPTS ( ) ;
}
/*
@ -700,7 +755,7 @@ pgaio_io_wait_for_free(void)
pgaio_debug ( DEBUG2 , " waiting for free IO with %d pending, %d in-flight, %d idle IOs " ,
pgaio_my_backend - > num_staged_ios ,
dclist_count ( & pgaio_my_backend - > in_flight_ios ) ,
dclist_is_empty ( & pgaio_my_backend - > idle_ios ) ) ;
dclist_count ( & pgaio_my_backend - > idle_ios ) ) ;
/*
* First check if any of our IOs actually have completed - when using
@ -714,6 +769,11 @@ pgaio_io_wait_for_free(void)
if ( ioh - > state = = PGAIO_HS_COMPLETED_SHARED )
{
/*
* Note that no interrupts are processed between the state check
* and the call to reclaim - that ' s important as otherwise an
* interrupt could have already reclaimed the handle .
*/
pgaio_io_reclaim ( ioh ) ;
reclaimed + + ;
}
@ -730,13 +790,17 @@ pgaio_io_wait_for_free(void)
if ( pgaio_my_backend - > num_staged_ios > 0 )
pgaio_submit_staged ( ) ;
/* possibly some IOs finished during submission */
if ( ! dclist_is_empty ( & pgaio_my_backend - > idle_ios ) )
return ;
if ( dclist_count ( & pgaio_my_backend - > in_flight_ios ) = = 0 )
ereport ( ERROR ,
errmsg_internal ( " no free IOs despite no in-flight IOs " ) ,
errdetail_internal ( " %d pending, %d in-flight, %d idle IOs " ,
pgaio_my_backend - > num_staged_ios ,
dclist_count ( & pgaio_my_backend - > in_flight_ios ) ,
dclist_is_empty ( & pgaio_my_backend - > idle_ios ) ) ) ;
dclist_count ( & pgaio_my_backend - > idle_ios ) ) ) ;
/*
* Wait for the oldest in - flight IO to complete .
@ -747,6 +811,7 @@ pgaio_io_wait_for_free(void)
{
PgAioHandle * ioh = dclist_head_element ( PgAioHandle , node ,
& pgaio_my_backend - > in_flight_ios ) ;
uint64 generation = ioh - > generation ;
switch ( ioh - > state )
{
@ -770,13 +835,24 @@ pgaio_io_wait_for_free(void)
* In a more general case this would be racy , because the
* generation could increase after we read ioh - > state above .
* But we are only looking at IOs by the current backend and
* the IO can only be recycled by this backend .
* the IO can only be recycled by this backend . Even this is
* only OK because we get the handle ' s generation before
* potentially processing interrupts , e . g . as part of
* pgaio_debug_io ( ) .
*/
pgaio_io_wait ( ioh , ioh - > generation ) ;
pgaio_io_wait ( ioh , generation ) ;
break ;
case PGAIO_HS_COMPLETED_SHARED :
/* it's possible that another backend just finished this IO */
/*
* It ' s possible that another backend just finished this IO .
*
* Note that no interrupts are processed between the state
* check and the call to reclaim - that ' s important as
* otherwise an interrupt could have already reclaimed the
* handle .
*/
pgaio_io_reclaim ( ioh ) ;
break ;
}
@ -926,6 +1002,11 @@ pgaio_wref_check_done(PgAioWaitRef *iow)
if ( state = = PGAIO_HS_COMPLETED_SHARED | |
state = = PGAIO_HS_COMPLETED_LOCAL )
{
/*
* Note that no interrupts are processed between
* pgaio_io_was_recycled ( ) and this check - that ' s important as
* otherwise an interrupt could have already reclaimed the handle .
*/
if ( am_owner )
pgaio_io_reclaim ( ioh ) ;
return true ;
@ -1153,11 +1234,14 @@ pgaio_closing_fd(int fd)
{
dlist_iter iter ;
PgAioHandle * ioh = NULL ;
uint64 generation ;
dclist_foreach ( iter , & pgaio_my_backend - > in_flight_ios )
{
ioh = dclist_container ( PgAioHandle , node , iter . cur ) ;
generation = ioh - > generation ;
if ( pgaio_io_uses_fd ( ioh , fd ) )
break ;
else
@ -1172,7 +1256,7 @@ pgaio_closing_fd(int fd)
fd , dclist_count ( & pgaio_my_backend - > in_flight_ios ) ) ;
/* see comment in pgaio_io_wait_for_free() about raciness */
pgaio_io_wait ( ioh , ioh - > generation ) ;
pgaio_io_wait ( ioh , generation ) ;
}
}
}
@ -1201,13 +1285,14 @@ pgaio_shutdown(int code, Datum arg)
while ( ! dclist_is_empty ( & pgaio_my_backend - > in_flight_ios ) )
{
PgAioHandle * ioh = dclist_head_element ( PgAioHandle , node , & pgaio_my_backend - > in_flight_ios ) ;
uint64 generation = ioh - > generation ;
pgaio_debug_io ( DEBUG2 , ioh ,
" waiting for IO to complete during shutdown, %d in-flight IOs " ,
dclist_count ( & pgaio_my_backend - > in_flight_ios ) ) ;
/* see comment in pgaio_io_wait_for_free() about raciness */
pgaio_io_wait ( ioh , ioh - > generation ) ;
pgaio_io_wait ( ioh , generation ) ;
}
pgaio_my_backend = NULL ;