@ -3,27 +3,24 @@
* latch . c
* Routines for inter - process latches
*
* The Unix implementation uses the so - called self - pipe trick to overcome
* the race condition involved with select ( ) and setting a global flag
* in the signal handler . When a latch is set and the current process
* is waiting for it , the signal handler wakes up the select ( ) in
* WaitLatch by writing a byte to a pipe . A signal by itself doesn ' t
* interrupt select ( ) on all platforms , and even on platforms where it
* does , a signal that arrives just before the select ( ) call does not
* prevent the select ( ) from entering sleep . An incoming byte on a pipe
* however reliably interrupts the sleep , and causes select ( ) to return
* immediately even if the signal arrives before select ( ) begins .
*
* ( Actually , we prefer epoll_wait ( ) over poll ( ) over select ( ) where
* available , but the same comments apply . )
* The Unix implementation uses the so - called self - pipe trick to overcome the
* race condition involved with poll ( ) ( or epoll_wait ( ) on linux ) and setting
* a global flag in the signal handler . When a latch is set and the current
* process is waiting for it , the signal handler wakes up the poll ( ) in
* WaitLatch by writing a byte to a pipe . A signal by itself doesn ' t interrupt
* poll ( ) on all platforms , and even on platforms where it does , a signal that
* arrives just before the poll ( ) call does not prevent poll ( ) from entering
* sleep . An incoming byte on a pipe however reliably interrupts the sleep ,
* and causes poll ( ) to return immediately even if the signal arrives before
* poll ( ) begins .
*
* When SetLatch is called from the same process that owns the latch ,
* SetLatch writes the byte directly to the pipe . If it ' s owned by another
* process , SIGUSR1 is sent and the signal handler in the waiting process
* writes the byte to the pipe on behalf of the signaling process .
*
* The Windows implementation uses Windows events that are inherited by
* all postmaster child processes .
* The Windows implementation uses Windows events that are inherited by all
* postmaster child processes . There ' s no need for the self - pipe trick there .
*
* Portions Copyright ( c ) 1996 - 2017 , PostgreSQL Global Development Group
* Portions Copyright ( c ) 1994 , Regents of the University of California
@ -39,7 +36,6 @@
# include <limits.h>
# include <signal.h>
# include <unistd.h>
# include <sys/time.h>
# ifdef HAVE_SYS_EPOLL_H
# include <sys/epoll.h>
# endif
@ -49,9 +45,6 @@
# ifdef HAVE_SYS_POLL_H
# include <sys/poll.h>
# endif
# ifdef HAVE_SYS_SELECT_H
# include <sys/select.h>
# endif
# include "miscadmin.h"
# include "pgstat.h"
@ -69,14 +62,12 @@
* define somewhere before this block .
*/
# if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
defined ( WAIT_USE_SELECT ) | | defined ( WAIT_USE_ WIN32 )
defined ( WAIT_USE_WIN32 )
/* don't overwrite manual choice */
# elif defined(HAVE_SYS_EPOLL_H)
# define WAIT_USE_EPOLL
# elif defined(HAVE_POLL)
# define WAIT_USE_POLL
# elif HAVE_SYS_SELECT_H
# define WAIT_USE_SELECT
# elif WIN32
# define WAIT_USE_WIN32
# else
@ -162,8 +153,8 @@ InitializeLatchSupport(void)
/*
* Set up the self - pipe that allows a signal handler to wake up the
* selec t( ) in WaitLatch . Make the write - end non - blocking , so that
* SetLatch won ' t block if the event has already been set many times
* poll ( ) / epoll_wai t( ) in WaitLatch . Make the write - end non - blocking , so
* that SetLatch won ' t block if the event has already been set many times
* filling the kernel buffer . Make the read - end non - blocking too , so that
* we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK .
*/
@ -401,8 +392,9 @@ SetLatch(volatile Latch *latch)
/*
* See if anyone ' s waiting for the latch . It can be the current process if
* we ' re in a signal handler . We use the self - pipe to wake up the select ( )
* in that case . If it ' s another process , send a signal .
* we ' re in a signal handler . We use the self - pipe to wake up the
* poll ( ) / epoll_wait ( ) in that case . If it ' s another process , send a
* signal .
*
* Fetch owner_pid only once , in case the latch is concurrently getting
* owned or disowned . XXX : This assumes that pid_t is atomic , which isn ' t
@ -666,8 +658,6 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
WaitEventAdjustEpoll ( set , event , EPOLL_CTL_ADD ) ;
# elif defined(WAIT_USE_POLL)
WaitEventAdjustPoll ( set , event ) ;
# elif defined(WAIT_USE_SELECT)
/* nothing to do */
# elif defined(WAIT_USE_WIN32)
WaitEventAdjustWin32 ( set , event ) ;
# endif
@ -724,8 +714,6 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
WaitEventAdjustEpoll ( set , event , EPOLL_CTL_MOD ) ;
# elif defined(WAIT_USE_POLL)
WaitEventAdjustPoll ( set , event ) ;
# elif defined(WAIT_USE_SELECT)
/* nothing to do */
# elif defined(WAIT_USE_WIN32)
WaitEventAdjustWin32 ( set , event ) ;
# endif
@ -1055,9 +1043,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
* because we don ' t expect the pipe to become readable or to have
* any errors either , treat those cases as postmaster death , too .
*
* As explained in the WAIT_USE_SELECT implementation , select ( 2 )
* may spuriously return . Be paranoid about that here too , a
* spurious WL_POSTMASTER_DEATH would be painful .
* Be paranoid about a spurious event signalling the postmaster as
* being dead . There have been reports about that happening with
* older primitives ( select ( 2 ) to be specific ) , and a spurious
* WL_POSTMASTER_DEATH event would be painful . Re - checking doesn ' t
* cost much .
*/
if ( ! PostmasterIsAlive ( ) )
{
@ -1171,9 +1161,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
* we don ' t expect the pipe to become readable or to have any
* errors either , treat those cases as postmaster death , too .
*
* As explained in the WAIT_USE_SELECT implementation , select ( 2 )
* may spuriously return . Be paranoid about that here too , a
* spurious WL_POSTMASTER_DEATH would be painful .
* Be paranoid about a spurious event signalling the postmaster as
* being dead . There have been reports about that happening with
* older primitives ( select ( 2 ) to be specific ) , and a spurious
* WL_POSTMASTER_DEATH event would be painful . Re - checking doesn ' t
* cost much .
*/
if ( ! PostmasterIsAlive ( ) )
{
@ -1214,163 +1206,6 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
return returned_events ;
}
# elif defined(WAIT_USE_SELECT)
/*
* Wait using select ( 2 ) .
*
* XXX : On at least older linux kernels select ( ) , in violation of POSIX ,
* doesn ' t reliably return a socket as writable if closed - but we rely on
* that . So far all the known cases of this problem are on platforms that also
* provide a poll ( ) implementation without that bug . If we find one where
* that ' s not the case , we ' ll need to add a workaround .
*/
static inline int
WaitEventSetWaitBlock ( WaitEventSet * set , int cur_timeout ,
WaitEvent * occurred_events , int nevents )
{
int returned_events = 0 ;
int rc ;
WaitEvent * cur_event ;
fd_set input_mask ;
fd_set output_mask ;
int hifd ;
struct timeval tv ;
struct timeval * tvp = NULL ;
FD_ZERO ( & input_mask ) ;
FD_ZERO ( & output_mask ) ;
/*
* Prepare input / output masks . We do so every loop iteration as there ' s no
* entirely portable way to copy fd_sets .
*/
for ( cur_event = set - > events ;
cur_event < ( set - > events + set - > nevents ) ;
cur_event + + )
{
if ( cur_event - > events = = WL_LATCH_SET )
FD_SET ( cur_event - > fd , & input_mask ) ;
else if ( cur_event - > events = = WL_POSTMASTER_DEATH )
FD_SET ( cur_event - > fd , & input_mask ) ;
else
{
Assert ( cur_event - > events & ( WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE ) ) ;
if ( cur_event - > events = = WL_SOCKET_READABLE )
FD_SET ( cur_event - > fd , & input_mask ) ;
else if ( cur_event - > events = = WL_SOCKET_WRITEABLE )
FD_SET ( cur_event - > fd , & output_mask ) ;
}
if ( cur_event - > fd > hifd )
hifd = cur_event - > fd ;
}
/* Sleep */
if ( cur_timeout > = 0 )
{
tv . tv_sec = cur_timeout / 1000L ;
tv . tv_usec = ( cur_timeout % 1000L ) * 1000L ;
tvp = & tv ;
}
rc = select ( hifd + 1 , & input_mask , & output_mask , NULL , tvp ) ;
/* Check return code */
if ( rc < 0 )
{
/* EINTR is okay, otherwise complain */
if ( errno ! = EINTR )
{
waiting = false ;
ereport ( ERROR ,
( errcode_for_socket_access ( ) ,
errmsg ( " select() failed: %m " ) ) ) ;
}
return 0 ; /* retry */
}
else if ( rc = = 0 )
{
/* timeout exceeded */
return - 1 ;
}
/*
* To associate events with select ' s masks , we have to check the status of
* the file descriptors associated with an event ; by looping through all
* events .
*/
for ( cur_event = set - > events ;
cur_event < ( set - > events + set - > nevents )
& & returned_events < nevents ;
cur_event + + )
{
occurred_events - > pos = cur_event - > pos ;
occurred_events - > user_data = cur_event - > user_data ;
occurred_events - > events = 0 ;
if ( cur_event - > events = = WL_LATCH_SET & &
FD_ISSET ( cur_event - > fd , & input_mask ) )
{
/* There's data in the self-pipe, clear it. */
drainSelfPipe ( ) ;
if ( set - > latch - > is_set )
{
occurred_events - > fd = PGINVALID_SOCKET ;
occurred_events - > events = WL_LATCH_SET ;
occurred_events + + ;
returned_events + + ;
}
}
else if ( cur_event - > events = = WL_POSTMASTER_DEATH & &
FD_ISSET ( cur_event - > fd , & input_mask ) )
{
/*
* According to the select ( 2 ) man page on Linux , select ( 2 ) may
* spuriously return and report a file descriptor as readable ,
* when it ' s not ; and presumably so can poll ( 2 ) . It ' s not clear
* that the relevant cases would ever apply to the postmaster
* pipe , but since the consequences of falsely returning
* WL_POSTMASTER_DEATH could be pretty unpleasant , we take the
* trouble to positively verify EOF with PostmasterIsAlive ( ) .
*/
if ( ! PostmasterIsAlive ( ) )
{
occurred_events - > fd = PGINVALID_SOCKET ;
occurred_events - > events = WL_POSTMASTER_DEATH ;
occurred_events + + ;
returned_events + + ;
}
}
else if ( cur_event - > events & ( WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE ) )
{
Assert ( cur_event - > fd ! = PGINVALID_SOCKET ) ;
if ( ( cur_event - > events & WL_SOCKET_READABLE ) & &
FD_ISSET ( cur_event - > fd , & input_mask ) )
{
/* data available in socket, or EOF */
occurred_events - > events | = WL_SOCKET_READABLE ;
}
if ( ( cur_event - > events & WL_SOCKET_WRITEABLE ) & &
FD_ISSET ( cur_event - > fd , & output_mask ) )
{
/* socket is writeable, or EOF */
occurred_events - > events | = WL_SOCKET_WRITEABLE ;
}
if ( occurred_events - > events ! = 0 )
{
occurred_events - > fd = cur_event - > fd ;
occurred_events + + ;
returned_events + + ;
}
}
}
return returned_events ;
}
# elif defined(WAIT_USE_WIN32)
/*