mirror of https://github.com/watcha-fr/synapse
Shuffle persist event data store functions. (#7440)
The aim here is to get to a stage where we have a `PersistEventStore` that holds all the write methods used during event persistence, so that we can take that class out of the `DataStore` mixin and instansiate it separately. This will allow us to instansiate it on processes other than master, while also ensuring it is only available on processes that are configured to write to events stream. This is a bit of an architectural change, where we end up with multiple classes per data store (rather than one per data store we have now). We end up having: 1. Storage classes that provide high level APIs that can talk to multiple data stores. 2. Data store modules that consist of classes that must point at the same database instance. 3. Classes in a data store that can be instantiated on processes depending on config.code_spécifique_watcha
parent
7ee24c5674
commit
782e4e64df
@ -0,0 +1 @@ |
||||
Refactor event persistence database functions in preparation for allowing them to be run on non-master processes. |
@ -0,0 +1,213 @@ |
||||
# -*- coding: utf-8 -*- |
||||
# Copyright 2020 The Matrix.org Foundation C.I.C. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import logging |
||||
from typing import TYPE_CHECKING |
||||
|
||||
from twisted.internet import defer |
||||
|
||||
from synapse.events.utils import prune_event_dict |
||||
from synapse.metrics.background_process_metrics import run_as_background_process |
||||
from synapse.storage._base import SQLBaseStore |
||||
from synapse.storage.data_stores.main.cache import CacheInvalidationWorkerStore |
||||
from synapse.storage.data_stores.main.events import encode_json |
||||
from synapse.storage.data_stores.main.events_worker import EventsWorkerStore |
||||
from synapse.storage.database import Database |
||||
|
||||
if TYPE_CHECKING: |
||||
from synapse.server import HomeServer |
||||
|
||||
|
||||
logger = logging.getLogger(__name__) |
||||
|
||||
|
||||
class CensorEventsStore(CacheInvalidationWorkerStore, EventsWorkerStore, SQLBaseStore): |
||||
def __init__(self, database: Database, db_conn, hs: "HomeServer"): |
||||
super().__init__(database, db_conn, hs) |
||||
|
||||
# This should only exist on master for now |
||||
assert ( |
||||
hs.config.worker.worker_app is None |
||||
), "Can only instantiate CensorEventsStore on master" |
||||
|
||||
def _censor_redactions(): |
||||
return run_as_background_process( |
||||
"_censor_redactions", self._censor_redactions |
||||
) |
||||
|
||||
if self.hs.config.redaction_retention_period is not None: |
||||
hs.get_clock().looping_call(_censor_redactions, 5 * 60 * 1000) |
||||
|
||||
async def _censor_redactions(self): |
||||
"""Censors all redactions older than the configured period that haven't |
||||
been censored yet. |
||||
|
||||
By censor we mean update the event_json table with the redacted event. |
||||
""" |
||||
|
||||
if self.hs.config.redaction_retention_period is None: |
||||
return |
||||
|
||||
if not ( |
||||
await self.db.updates.has_completed_background_update( |
||||
"redactions_have_censored_ts_idx" |
||||
) |
||||
): |
||||
# We don't want to run this until the appropriate index has been |
||||
# created. |
||||
return |
||||
|
||||
before_ts = self._clock.time_msec() - self.hs.config.redaction_retention_period |
||||
|
||||
# We fetch all redactions that: |
||||
# 1. point to an event we have, |
||||
# 2. has a received_ts from before the cut off, and |
||||
# 3. we haven't yet censored. |
||||
# |
||||
# This is limited to 100 events to ensure that we don't try and do too |
||||
# much at once. We'll get called again so this should eventually catch |
||||
# up. |
||||
sql = """ |
||||
SELECT redactions.event_id, redacts FROM redactions |
||||
LEFT JOIN events AS original_event ON ( |
||||
redacts = original_event.event_id |
||||
) |
||||
WHERE NOT have_censored |
||||
AND redactions.received_ts <= ? |
||||
ORDER BY redactions.received_ts ASC |
||||
LIMIT ? |
||||
""" |
||||
|
||||
rows = await self.db.execute( |
||||
"_censor_redactions_fetch", None, sql, before_ts, 100 |
||||
) |
||||
|
||||
updates = [] |
||||
|
||||
for redaction_id, event_id in rows: |
||||
redaction_event = await self.get_event(redaction_id, allow_none=True) |
||||
original_event = await self.get_event( |
||||
event_id, allow_rejected=True, allow_none=True |
||||
) |
||||
|
||||
# The SQL above ensures that we have both the redaction and |
||||
# original event, so if the `get_event` calls return None it |
||||
# means that the redaction wasn't allowed. Either way we know that |
||||
# the result won't change so we mark the fact that we've checked. |
||||
if ( |
||||
redaction_event |
||||
and original_event |
||||
and original_event.internal_metadata.is_redacted() |
||||
): |
||||
# Redaction was allowed |
||||
pruned_json = encode_json( |
||||
prune_event_dict( |
||||
original_event.room_version, original_event.get_dict() |
||||
) |
||||
) |
||||
else: |
||||
# Redaction wasn't allowed |
||||
pruned_json = None |
||||
|
||||
updates.append((redaction_id, event_id, pruned_json)) |
||||
|
||||
def _update_censor_txn(txn): |
||||
for redaction_id, event_id, pruned_json in updates: |
||||
if pruned_json: |
||||
self._censor_event_txn(txn, event_id, pruned_json) |
||||
|
||||
self.db.simple_update_one_txn( |
||||
txn, |
||||
table="redactions", |
||||
keyvalues={"event_id": redaction_id}, |
||||
updatevalues={"have_censored": True}, |
||||
) |
||||
|
||||
await self.db.runInteraction("_update_censor_txn", _update_censor_txn) |
||||
|
||||
def _censor_event_txn(self, txn, event_id, pruned_json): |
||||
"""Censor an event by replacing its JSON in the event_json table with the |
||||
provided pruned JSON. |
||||
|
||||
Args: |
||||
txn (LoggingTransaction): The database transaction. |
||||
event_id (str): The ID of the event to censor. |
||||
pruned_json (str): The pruned JSON |
||||
""" |
||||
self.db.simple_update_one_txn( |
||||
txn, |
||||
table="event_json", |
||||
keyvalues={"event_id": event_id}, |
||||
updatevalues={"json": pruned_json}, |
||||
) |
||||
|
||||
@defer.inlineCallbacks |
||||
def expire_event(self, event_id): |
||||
"""Retrieve and expire an event that has expired, and delete its associated |
||||
expiry timestamp. If the event can't be retrieved, delete its associated |
||||
timestamp so we don't try to expire it again in the future. |
||||
|
||||
Args: |
||||
event_id (str): The ID of the event to delete. |
||||
""" |
||||
# Try to retrieve the event's content from the database or the event cache. |
||||
event = yield self.get_event(event_id) |
||||
|
||||
def delete_expired_event_txn(txn): |
||||
# Delete the expiry timestamp associated with this event from the database. |
||||
self._delete_event_expiry_txn(txn, event_id) |
||||
|
||||
if not event: |
||||
# If we can't find the event, log a warning and delete the expiry date |
||||
# from the database so that we don't try to expire it again in the |
||||
# future. |
||||
logger.warning( |
||||
"Can't expire event %s because we don't have it.", event_id |
||||
) |
||||
return |
||||
|
||||
# Prune the event's dict then convert it to JSON. |
||||
pruned_json = encode_json( |
||||
prune_event_dict(event.room_version, event.get_dict()) |
||||
) |
||||
|
||||
# Update the event_json table to replace the event's JSON with the pruned |
||||
# JSON. |
||||
self._censor_event_txn(txn, event.event_id, pruned_json) |
||||
|
||||
# We need to invalidate the event cache entry for this event because we |
||||
# changed its content in the database. We can't call |
||||
# self._invalidate_cache_and_stream because self.get_event_cache isn't of the |
||||
# right type. |
||||
txn.call_after(self._get_event_cache.invalidate, (event.event_id,)) |
||||
# Send that invalidation to replication so that other workers also invalidate |
||||
# the event cache. |
||||
self._send_invalidation_to_replication( |
||||
txn, "_get_event_cache", (event.event_id,) |
||||
) |
||||
|
||||
yield self.db.runInteraction("delete_expired_event", delete_expired_event_txn) |
||||
|
||||
def _delete_event_expiry_txn(self, txn, event_id): |
||||
"""Delete the expiry timestamp associated with an event ID without deleting the |
||||
actual event. |
||||
|
||||
Args: |
||||
txn (LoggingTransaction): The transaction to use to perform the deletion. |
||||
event_id (str): The event ID to delete the associated expiry timestamp of. |
||||
""" |
||||
return self.db.simple_delete_txn( |
||||
txn=txn, table="event_expiry", keyvalues={"event_id": event_id} |
||||
) |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,128 @@ |
||||
# -*- coding: utf-8 -*- |
||||
# Copyright 2020 The Matrix.org Foundation C.I.C. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
import typing |
||||
from collections import Counter |
||||
|
||||
from twisted.internet import defer |
||||
|
||||
from synapse.metrics import BucketCollector |
||||
from synapse.metrics.background_process_metrics import run_as_background_process |
||||
from synapse.storage._base import SQLBaseStore |
||||
from synapse.storage.data_stores.main.event_push_actions import ( |
||||
EventPushActionsWorkerStore, |
||||
) |
||||
from synapse.storage.database import Database |
||||
|
||||
|
||||
class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): |
||||
"""Functions to pull various metrics from the DB, for e.g. phone home |
||||
stats and prometheus metrics. |
||||
""" |
||||
|
||||
def __init__(self, database: Database, db_conn, hs): |
||||
super().__init__(database, db_conn, hs) |
||||
|
||||
# Collect metrics on the number of forward extremities that exist. |
||||
# Counter of number of extremities to count |
||||
self._current_forward_extremities_amount = ( |
||||
Counter() |
||||
) # type: typing.Counter[int] |
||||
|
||||
BucketCollector( |
||||
"synapse_forward_extremities", |
||||
lambda: self._current_forward_extremities_amount, |
||||
buckets=[1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"], |
||||
) |
||||
|
||||
# Read the extrems every 60 minutes |
||||
def read_forward_extremities(): |
||||
# run as a background process to make sure that the database transactions |
||||
# have a logcontext to report to |
||||
return run_as_background_process( |
||||
"read_forward_extremities", self._read_forward_extremities |
||||
) |
||||
|
||||
hs.get_clock().looping_call(read_forward_extremities, 60 * 60 * 1000) |
||||
|
||||
async def _read_forward_extremities(self): |
||||
def fetch(txn): |
||||
txn.execute( |
||||
""" |
||||
select count(*) c from event_forward_extremities |
||||
group by room_id |
||||
""" |
||||
) |
||||
return txn.fetchall() |
||||
|
||||
res = await self.db.runInteraction("read_forward_extremities", fetch) |
||||
self._current_forward_extremities_amount = Counter([x[0] for x in res]) |
||||
|
||||
@defer.inlineCallbacks |
||||
def count_daily_messages(self): |
||||
""" |
||||
Returns an estimate of the number of messages sent in the last day. |
||||
|
||||
If it has been significantly less or more than one day since the last |
||||
call to this function, it will return None. |
||||
""" |
||||
|
||||
def _count_messages(txn): |
||||
sql = """ |
||||
SELECT COALESCE(COUNT(*), 0) FROM events |
||||
WHERE type = 'm.room.message' |
||||
AND stream_ordering > ? |
||||
""" |
||||
txn.execute(sql, (self.stream_ordering_day_ago,)) |
||||
(count,) = txn.fetchone() |
||||
return count |
||||
|
||||
ret = yield self.db.runInteraction("count_messages", _count_messages) |
||||
return ret |
||||
|
||||
@defer.inlineCallbacks |
||||
def count_daily_sent_messages(self): |
||||
def _count_messages(txn): |
||||
# This is good enough as if you have silly characters in your own |
||||
# hostname then thats your own fault. |
||||
like_clause = "%:" + self.hs.hostname |
||||
|
||||
sql = """ |
||||
SELECT COALESCE(COUNT(*), 0) FROM events |
||||
WHERE type = 'm.room.message' |
||||
AND sender LIKE ? |
||||
AND stream_ordering > ? |
||||
""" |
||||
|
||||
txn.execute(sql, (like_clause, self.stream_ordering_day_ago)) |
||||
(count,) = txn.fetchone() |
||||
return count |
||||
|
||||
ret = yield self.db.runInteraction("count_daily_sent_messages", _count_messages) |
||||
return ret |
||||
|
||||
@defer.inlineCallbacks |
||||
def count_daily_active_rooms(self): |
||||
def _count(txn): |
||||
sql = """ |
||||
SELECT COALESCE(COUNT(DISTINCT room_id), 0) FROM events |
||||
WHERE type = 'm.room.message' |
||||
AND stream_ordering > ? |
||||
""" |
||||
txn.execute(sql, (self.stream_ordering_day_ago,)) |
||||
(count,) = txn.fetchone() |
||||
return count |
||||
|
||||
ret = yield self.db.runInteraction("count_daily_active_rooms", _count) |
||||
return ret |
@ -0,0 +1,399 @@ |
||||
# -*- coding: utf-8 -*- |
||||
# Copyright 2020 The Matrix.org Foundation C.I.C. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import logging |
||||
from typing import Any, Tuple |
||||
|
||||
from synapse.api.errors import SynapseError |
||||
from synapse.storage._base import SQLBaseStore |
||||
from synapse.storage.data_stores.main.state import StateGroupWorkerStore |
||||
from synapse.types import RoomStreamToken |
||||
|
||||
logger = logging.getLogger(__name__) |
||||
|
||||
|
||||
class PurgeEventsStore(StateGroupWorkerStore, SQLBaseStore): |
||||
def purge_history(self, room_id, token, delete_local_events): |
||||
"""Deletes room history before a certain point |
||||
|
||||
Args: |
||||
room_id (str): |
||||
|
||||
token (str): A topological token to delete events before |
||||
|
||||
delete_local_events (bool): |
||||
if True, we will delete local events as well as remote ones |
||||
(instead of just marking them as outliers and deleting their |
||||
state groups). |
||||
|
||||
Returns: |
||||
Deferred[set[int]]: The set of state groups that are referenced by |
||||
deleted events. |
||||
""" |
||||
|
||||
return self.db.runInteraction( |
||||
"purge_history", |
||||
self._purge_history_txn, |
||||
room_id, |
||||
token, |
||||
delete_local_events, |
||||
) |
||||
|
||||
def _purge_history_txn(self, txn, room_id, token_str, delete_local_events): |
||||
token = RoomStreamToken.parse(token_str) |
||||
|
||||
# Tables that should be pruned: |
||||
# event_auth |
||||
# event_backward_extremities |
||||
# event_edges |
||||
# event_forward_extremities |
||||
# event_json |
||||
# event_push_actions |
||||
# event_reference_hashes |
||||
# event_search |
||||
# event_to_state_groups |
||||
# events |
||||
# rejections |
||||
# room_depth |
||||
# state_groups |
||||
# state_groups_state |
||||
|
||||
# we will build a temporary table listing the events so that we don't |
||||
# have to keep shovelling the list back and forth across the |
||||
# connection. Annoyingly the python sqlite driver commits the |
||||
# transaction on CREATE, so let's do this first. |
||||
# |
||||
# furthermore, we might already have the table from a previous (failed) |
||||
# purge attempt, so let's drop the table first. |
||||
|
||||
txn.execute("DROP TABLE IF EXISTS events_to_purge") |
||||
|
||||
txn.execute( |
||||
"CREATE TEMPORARY TABLE events_to_purge (" |
||||
" event_id TEXT NOT NULL," |
||||
" should_delete BOOLEAN NOT NULL" |
||||
")" |
||||
) |
||||
|
||||
# First ensure that we're not about to delete all the forward extremeties |
||||
txn.execute( |
||||
"SELECT e.event_id, e.depth FROM events as e " |
||||
"INNER JOIN event_forward_extremities as f " |
||||
"ON e.event_id = f.event_id " |
||||
"AND e.room_id = f.room_id " |
||||
"WHERE f.room_id = ?", |
||||
(room_id,), |
||||
) |
||||
rows = txn.fetchall() |
||||
max_depth = max(row[1] for row in rows) |
||||
|
||||
if max_depth < token.topological: |
||||
# We need to ensure we don't delete all the events from the database |
||||
# otherwise we wouldn't be able to send any events (due to not |
||||
# having any backwards extremeties) |
||||
raise SynapseError( |
||||
400, "topological_ordering is greater than forward extremeties" |
||||
) |
||||
|
||||
logger.info("[purge] looking for events to delete") |
||||
|
||||
should_delete_expr = "state_key IS NULL" |
||||
should_delete_params = () # type: Tuple[Any, ...] |
||||
if not delete_local_events: |
||||
should_delete_expr += " AND event_id NOT LIKE ?" |
||||
|
||||
# We include the parameter twice since we use the expression twice |
||||
should_delete_params += ("%:" + self.hs.hostname, "%:" + self.hs.hostname) |
||||
|
||||
should_delete_params += (room_id, token.topological) |
||||
|
||||
# Note that we insert events that are outliers and aren't going to be |
||||
# deleted, as nothing will happen to them. |
||||
txn.execute( |
||||
"INSERT INTO events_to_purge" |
||||
" SELECT event_id, %s" |
||||
" FROM events AS e LEFT JOIN state_events USING (event_id)" |
||||
" WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?" |
||||
% (should_delete_expr, should_delete_expr), |
||||
should_delete_params, |
||||
) |
||||
|
||||
# We create the indices *after* insertion as that's a lot faster. |
||||
|
||||
# create an index on should_delete because later we'll be looking for |
||||
# the should_delete / shouldn't_delete subsets |
||||
txn.execute( |
||||
"CREATE INDEX events_to_purge_should_delete" |
||||
" ON events_to_purge(should_delete)" |
||||
) |
||||
|
||||
# We do joins against events_to_purge for e.g. calculating state |
||||
# groups to purge, etc., so lets make an index. |
||||
txn.execute("CREATE INDEX events_to_purge_id ON events_to_purge(event_id)") |
||||
|
||||
txn.execute("SELECT event_id, should_delete FROM events_to_purge") |
||||
event_rows = txn.fetchall() |
||||
logger.info( |
||||
"[purge] found %i events before cutoff, of which %i can be deleted", |
||||
len(event_rows), |
||||
sum(1 for e in event_rows if e[1]), |
||||
) |
||||
|
||||
logger.info("[purge] Finding new backward extremities") |
||||
|
||||
# We calculate the new entries for the backward extremeties by finding |
||||
# events to be purged that are pointed to by events we're not going to |
||||
# purge. |
||||
txn.execute( |
||||
"SELECT DISTINCT e.event_id FROM events_to_purge AS e" |
||||
" INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id" |
||||
" LEFT JOIN events_to_purge AS ep2 ON ed.event_id = ep2.event_id" |
||||
" WHERE ep2.event_id IS NULL" |
||||
) |
||||
new_backwards_extrems = txn.fetchall() |
||||
|
||||
logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems) |
||||
|
||||
txn.execute( |
||||
"DELETE FROM event_backward_extremities WHERE room_id = ?", (room_id,) |
||||
) |
||||
|
||||
# Update backward extremeties |
||||
txn.executemany( |
||||
"INSERT INTO event_backward_extremities (room_id, event_id)" |
||||
" VALUES (?, ?)", |
||||
[(room_id, event_id) for event_id, in new_backwards_extrems], |
||||
) |
||||
|
||||
logger.info("[purge] finding state groups referenced by deleted events") |
||||
|
||||
# Get all state groups that are referenced by events that are to be |
||||
# deleted. |
||||
txn.execute( |
||||
""" |
||||
SELECT DISTINCT state_group FROM events_to_purge |
||||
INNER JOIN event_to_state_groups USING (event_id) |
||||
""" |
||||
) |
||||
|
||||
referenced_state_groups = {sg for sg, in txn} |
||||
logger.info( |
||||
"[purge] found %i referenced state groups", len(referenced_state_groups) |
||||
) |
||||
|
||||
logger.info("[purge] removing events from event_to_state_groups") |
||||
txn.execute( |
||||
"DELETE FROM event_to_state_groups " |
||||
"WHERE event_id IN (SELECT event_id from events_to_purge)" |
||||
) |
||||
for event_id, _ in event_rows: |
||||
txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) |
||||
|
||||
# Delete all remote non-state events |
||||
for table in ( |
||||
"events", |
||||
"event_json", |
||||
"event_auth", |
||||
"event_edges", |
||||
"event_forward_extremities", |
||||
"event_reference_hashes", |
||||
"event_search", |
||||
"rejections", |
||||
): |
||||
logger.info("[purge] removing events from %s", table) |
||||
|
||||
txn.execute( |
||||
"DELETE FROM %s WHERE event_id IN (" |
||||
" SELECT event_id FROM events_to_purge WHERE should_delete" |
||||
")" % (table,) |
||||
) |
||||
|
||||
# event_push_actions lacks an index on event_id, and has one on |
||||
# (room_id, event_id) instead. |
||||
for table in ("event_push_actions",): |
||||
logger.info("[purge] removing events from %s", table) |
||||
|
||||
txn.execute( |
||||
"DELETE FROM %s WHERE room_id = ? AND event_id IN (" |
||||
" SELECT event_id FROM events_to_purge WHERE should_delete" |
||||
")" % (table,), |
||||
(room_id,), |
||||
) |
||||
|
||||
# Mark all state and own events as outliers |
||||
logger.info("[purge] marking remaining events as outliers") |
||||
txn.execute( |
||||
"UPDATE events SET outlier = ?" |
||||
" WHERE event_id IN (" |
||||
" SELECT event_id FROM events_to_purge " |
||||
" WHERE NOT should_delete" |
||||
")", |
||||
(True,), |
||||
) |
||||
|
||||
# synapse tries to take out an exclusive lock on room_depth whenever it |
||||
# persists events (because upsert), and once we run this update, we |
||||
# will block that for the rest of our transaction. |
||||
# |
||||
# So, let's stick it at the end so that we don't block event |
||||
# persistence. |
||||
# |
||||
# We do this by calculating the minimum depth of the backwards |
||||
# extremities. However, the events in event_backward_extremities |
||||
# are ones we don't have yet so we need to look at the events that |
||||
# point to it via event_edges table. |
||||
txn.execute( |
||||
""" |
||||
SELECT COALESCE(MIN(depth), 0) |
||||
FROM event_backward_extremities AS eb |
||||
INNER JOIN event_edges AS eg ON eg.prev_event_id = eb.event_id |
||||
INNER JOIN events AS e ON e.event_id = eg.event_id |
||||
WHERE eb.room_id = ? |
||||
""", |
||||
(room_id,), |
||||
) |
||||
(min_depth,) = txn.fetchone() |
||||
|
||||
logger.info("[purge] updating room_depth to %d", min_depth) |
||||
|
||||
txn.execute( |
||||
"UPDATE room_depth SET min_depth = ? WHERE room_id = ?", |
||||
(min_depth, room_id), |
||||
) |
||||
|
||||
# finally, drop the temp table. this will commit the txn in sqlite, |
||||
# so make sure to keep this actually last. |
||||
txn.execute("DROP TABLE events_to_purge") |
||||
|
||||
logger.info("[purge] done") |
||||
|
||||
return referenced_state_groups |
||||
|
||||
def purge_room(self, room_id): |
||||
"""Deletes all record of a room |
||||
|
||||
Args: |
||||
room_id (str) |
||||
|
||||
Returns: |
||||
Deferred[List[int]]: The list of state groups to delete. |
||||
""" |
||||
|
||||
return self.db.runInteraction("purge_room", self._purge_room_txn, room_id) |
||||
|
||||
def _purge_room_txn(self, txn, room_id): |
||||
# First we fetch all the state groups that should be deleted, before |
||||
# we delete that information. |
||||
txn.execute( |
||||
""" |
||||
SELECT DISTINCT state_group FROM events |
||||
INNER JOIN event_to_state_groups USING(event_id) |
||||
WHERE events.room_id = ? |
||||
""", |
||||
(room_id,), |
||||
) |
||||
|
||||
state_groups = [row[0] for row in txn] |
||||
|
||||
# Now we delete tables which lack an index on room_id but have one on event_id |
||||
for table in ( |
||||
"event_auth", |
||||
"event_edges", |
||||
"event_push_actions_staging", |
||||
"event_reference_hashes", |
||||
"event_relations", |
||||
"event_to_state_groups", |
||||
"redactions", |
||||
"rejections", |
||||
"state_events", |
||||
): |
||||
logger.info("[purge] removing %s from %s", room_id, table) |
||||
|
||||
txn.execute( |
||||
""" |
||||
DELETE FROM %s WHERE event_id IN ( |
||||
SELECT event_id FROM events WHERE room_id=? |
||||
) |
||||
""" |
||||
% (table,), |
||||
(room_id,), |
||||
) |
||||
|
||||
# and finally, the tables with an index on room_id (or no useful index) |
||||
for table in ( |
||||
"current_state_events", |
||||
"event_backward_extremities", |
||||
"event_forward_extremities", |
||||
"event_json", |
||||
"event_push_actions", |
||||
"event_search", |
||||
"events", |
||||
"group_rooms", |
||||
"public_room_list_stream", |
||||
"receipts_graph", |
||||
"receipts_linearized", |
||||
"room_aliases", |
||||
"room_depth", |
||||
"room_memberships", |
||||
"room_stats_state", |
||||
"room_stats_current", |
||||
"room_stats_historical", |
||||
"room_stats_earliest_token", |
||||
"rooms", |
||||
"stream_ordering_to_exterm", |
||||
"users_in_public_rooms", |
||||
"users_who_share_private_rooms", |
||||
# no useful index, but let's clear them anyway |
||||
"appservice_room_list", |
||||
"e2e_room_keys", |
||||
"event_push_summary", |
||||
"pusher_throttle", |
||||
"group_summary_rooms", |
||||
"local_invites", |
||||
"room_account_data", |
||||
"room_tags", |
||||
"local_current_membership", |
||||
): |
||||
logger.info("[purge] removing %s from %s", room_id, table) |
||||
txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,)) |
||||
|
||||
# Other tables we do NOT need to clear out: |
||||
# |
||||
# - blocked_rooms |
||||
# This is important, to make sure that we don't accidentally rejoin a blocked |
||||
# room after it was purged |
||||
# |
||||
# - user_directory |
||||
# This has a room_id column, but it is unused |
||||
# |
||||
|
||||
# Other tables that we might want to consider clearing out include: |
||||
# |
||||
# - event_reports |
||||
# Given that these are intended for abuse management my initial |
||||
# inclination is to leave them in place. |
||||
# |
||||
# - current_state_delta_stream |
||||
# - ex_outlier_stream |
||||
# - room_tags_revisions |
||||
# The problem with these is that they are largeish and there is no room_id |
||||
# index on them. In any case we should be clearing out 'stream' tables |
||||
# periodically anyway (#5888) |
||||
|
||||
# TODO: we could probably usefully do a bunch of cache invalidation here |
||||
|
||||
logger.info("[purge] done") |
||||
|
||||
return state_groups |
Loading…
Reference in new issue