@ -72,7 +72,7 @@ from synapse.util.metrics import Measure
logger = logging . getLogger ( __name__ )
# These values are used in the `enqueus_event` and `_do_fetch ` methods to
# These values are used in the `enqueue_event` and `_fetch_loop ` methods to
# control how we batch/bulk fetch events from the database.
# The values are plucked out of thing air to make initial sync run faster
# on jki.re
@ -602,7 +602,7 @@ class EventsWorkerStore(SQLBaseStore):
# already due to `_get_events_from_db`).
fetching_deferred : ObservableDeferred [
Dict [ str , _EventCacheEntry ]
] = ObservableDeferred ( defer . Deferred ( ) )
] = ObservableDeferred ( defer . Deferred ( ) , consumeErrors = True )
for event_id in missing_events_ids :
self . _current_event_fetches [ event_id ] = fetching_deferred
@ -736,35 +736,118 @@ class EventsWorkerStore(SQLBaseStore):
for e in state_to_include . values ( )
]
def _do_fetch ( self , conn : Connection ) - > None :
def _maybe_start_fetch_thread ( self ) - > None :
""" Starts an event fetch thread if we are not yet at the maximum number. """
with self . _event_fetch_lock :
if (
self . _event_fetch_list
and self . _event_fetch_ongoing < EVENT_QUEUE_THREADS
) :
self . _event_fetch_ongoing + = 1
event_fetch_ongoing_gauge . set ( self . _event_fetch_ongoing )
# `_event_fetch_ongoing` is decremented in `_fetch_thread`.
should_start = True
else :
should_start = False
if should_start :
run_as_background_process ( " fetch_events " , self . _fetch_thread )
async def _fetch_thread ( self ) - > None :
""" Services requests for events from `_event_fetch_list`. """
exc = None
try :
await self . db_pool . runWithConnection ( self . _fetch_loop )
except BaseException as e :
exc = e
raise
finally :
should_restart = False
event_fetches_to_fail = [ ]
with self . _event_fetch_lock :
self . _event_fetch_ongoing - = 1
event_fetch_ongoing_gauge . set ( self . _event_fetch_ongoing )
# There may still be work remaining in `_event_fetch_list` if we
# failed, or it was added in between us deciding to exit and
# decrementing `_event_fetch_ongoing`.
if self . _event_fetch_list :
if exc is None :
# We decided to exit, but then some more work was added
# before `_event_fetch_ongoing` was decremented.
# If a new event fetch thread was not started, we should
# restart ourselves since the remaining event fetch threads
# may take a while to get around to the new work.
#
# Unfortunately it is not possible to tell whether a new
# event fetch thread was started, so we restart
# unconditionally. If we are unlucky, we will end up with
# an idle fetch thread, but it will time out after
# `EVENT_QUEUE_ITERATIONS * EVENT_QUEUE_TIMEOUT_S` seconds
# in any case.
#
# Note that multiple fetch threads may run down this path at
# the same time.
should_restart = True
elif isinstance ( exc , Exception ) :
if self . _event_fetch_ongoing == 0 :
# We were the last remaining fetcher and failed.
# Fail any outstanding fetches since no one else will
# handle them.
event_fetches_to_fail = self . _event_fetch_list
self . _event_fetch_list = [ ]
else :
# We weren't the last remaining fetcher, so another
# fetcher will pick up the work. This will either happen
# after their existing work, however long that takes,
# or after at most `EVENT_QUEUE_TIMEOUT_S` seconds if
# they are idle.
pass
else :
# The exception is a `SystemExit`, `KeyboardInterrupt` or
# `GeneratorExit`. Don't try to do anything clever here.
pass
if should_restart :
# We exited cleanly but noticed more work.
self . _maybe_start_fetch_thread ( )
if event_fetches_to_fail :
# We were the last remaining fetcher and failed.
# Fail any outstanding fetches since no one else will handle them.
assert exc is not None
with PreserveLoggingContext ( ) :
for _ , deferred in event_fetches_to_fail :
deferred . errback ( exc )
def _fetch_loop ( self , conn : Connection ) - > None :
""" Takes a database connection and waits for requests for events from
the _event_fetch_list queue .
"""
try :
i = 0
while True :
with self . _event_fetch_lock :
event_list = self . _event_fetch_list
self . _event_fetch_list = [ ]
if not event_list :
single_threaded = self . database_engine . single_threaded
if (
not self . USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING
or single_threaded
or i > EVENT_QUEUE_ITERATIONS
) :
break
else :
self . _event_fetch_lock . wait ( EVENT_QUEUE_TIMEOUT_S )
i + = 1
continue
i = 0
i = 0
while True :
with self . _event_fetch_lock :
event_list = self . _event_fetch_list
self . _event_fetch_list = [ ]
if not event_list :
# There are no requests waiting. If we haven't yet reached the
# maximum iteration limit, wait for some more requests to turn up.
# Otherwise, bail out.
single_threaded = self . database_engine . single_threaded
if (
not self . USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING
or single_threaded
or i > EVENT_QUEUE_ITERATIONS
) :
return
self . _event_fetch_lock . wait ( EVENT_QUEUE_TIMEOUT_S )
i + = 1
continue
i = 0
self . _fetch_event_list ( conn , event_list )
finally :
self . _event_fetch_ongoing - = 1
event_fetch_ongoing_gauge . set ( self . _event_fetch_ongoing )
self . _fetch_event_list ( conn , event_list )
def _fetch_event_list (
self , conn : Connection , event_list : List [ Tuple [ List [ str ] , defer . Deferred ] ]
@ -806,9 +889,7 @@ class EventsWorkerStore(SQLBaseStore):
# We only want to resolve deferreds from the main thread
def fire ( evs , exc ) :
for _ , d in evs :
if not d . called :
with PreserveLoggingContext ( ) :
d . errback ( exc )
d . errback ( exc )
with PreserveLoggingContext ( ) :
self . hs . get_reactor ( ) . callFromThread ( fire , event_list , e )
@ -983,20 +1064,9 @@ class EventsWorkerStore(SQLBaseStore):
events_d = defer . Deferred ( )
with self . _event_fetch_lock :
self . _event_fetch_list . append ( ( events , events_d ) )
self . _event_fetch_lock . notify ( )
if self . _event_fetch_ongoing < EVENT_QUEUE_THREADS :
self . _event_fetch_ongoing + = 1
event_fetch_ongoing_gauge . set ( self . _event_fetch_ongoing )
should_start = True
else :
should_start = False
if should_start :
run_as_background_process (
" fetch_events " , self . db_pool . runWithConnection , self . _do_fetch
)
self . _maybe_start_fetch_thread ( )
logger . debug ( " Loading %d events: %s " , len ( events ) , events )
with PreserveLoggingContext ( ) :