Add not null constraint to column `full_user_id` of tables `profiles` and `user_filters` (#15537)

1.103.0-whithout-watcha
Shay 2 years ago committed by GitHub
parent 77cda342be
commit 9f6ff6a0eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      changelog.d/15537.misc
  2. 95
      synapse/storage/databases/main/filtering.py
  3. 102
      synapse/storage/databases/main/profile.py
  4. 10
      synapse/storage/schema/__init__.py
  5. 16
      synapse/storage/schema/main/delta/77/01_add_profiles_not_valid_check.sql.postgres
  6. 16
      synapse/storage/schema/main/delta/77/02_add_user_filters_not_valid_check.sql.postgres
  7. 16
      synapse/storage/schema/main/delta/77/03bg_populate_full_user_id_profiles.sql
  8. 16
      synapse/storage/schema/main/delta/77/04bg_populate_full_user_id_user_filters.sql
  9. 63
      tests/storage/test_profile.py
  10. 94
      tests/storage/test_user_filters.py

@ -0,0 +1 @@
Add not null constraint to column full_user_id of tables profiles and user_filters.

@ -25,6 +25,7 @@ from synapse.storage.database import (
LoggingDatabaseConnection,
LoggingTransaction,
)
from synapse.storage.engines import PostgresEngine
from synapse.types import JsonDict, UserID
from synapse.util.caches.descriptors import cached
@ -40,6 +41,8 @@ class FilteringWorkerStore(SQLBaseStore):
hs: "HomeServer",
):
super().__init__(database, db_conn, hs)
self.server_name: str = hs.hostname
self.database_engine = database.engine
self.db_pool.updates.register_background_index_update(
"full_users_filters_unique_idx",
index_name="full_users_unique_idx",
@ -48,6 +51,98 @@ class FilteringWorkerStore(SQLBaseStore):
unique=True,
)
self.db_pool.updates.register_background_update_handler(
"populate_full_user_id_user_filters",
self.populate_full_user_id_user_filters,
)
async def populate_full_user_id_user_filters(
self, progress: JsonDict, batch_size: int
) -> int:
"""
Background update to populate the column `full_user_id` of the table
user_filters from entries in the column `user_local_part` of the same table
"""
lower_bound_id = progress.get("lower_bound_id", "")
def _get_last_id(txn: LoggingTransaction) -> Optional[str]:
sql = """
SELECT user_id FROM user_filters
WHERE user_id > ?
ORDER BY user_id
LIMIT 1 OFFSET 50
"""
txn.execute(sql, (lower_bound_id,))
res = txn.fetchone()
if res:
upper_bound_id = res[0]
return upper_bound_id
else:
return None
def _process_batch(
txn: LoggingTransaction, lower_bound_id: str, upper_bound_id: str
) -> None:
sql = """
UPDATE user_filters
SET full_user_id = '@' || user_id || ?
WHERE ? < user_id AND user_id <= ? AND full_user_id IS NULL
"""
txn.execute(sql, (f":{self.server_name}", lower_bound_id, upper_bound_id))
def _final_batch(txn: LoggingTransaction, lower_bound_id: str) -> None:
sql = """
UPDATE user_filters
SET full_user_id = '@' || user_id || ?
WHERE ? < user_id AND full_user_id IS NULL
"""
txn.execute(
sql,
(
f":{self.server_name}",
lower_bound_id,
),
)
if isinstance(self.database_engine, PostgresEngine):
sql = """
ALTER TABLE user_filters VALIDATE CONSTRAINT full_user_id_not_null
"""
txn.execute(sql)
upper_bound_id = await self.db_pool.runInteraction(
"populate_full_user_id_user_filters", _get_last_id
)
if upper_bound_id is None:
await self.db_pool.runInteraction(
"populate_full_user_id_user_filters", _final_batch, lower_bound_id
)
await self.db_pool.updates._end_background_update(
"populate_full_user_id_user_filters"
)
return 1
await self.db_pool.runInteraction(
"populate_full_user_id_user_filters",
_process_batch,
lower_bound_id,
upper_bound_id,
)
progress["lower_bound_id"] = upper_bound_id
await self.db_pool.runInteraction(
"populate_full_user_id_user_filters",
self.db_pool.updates._background_update_progress_txn,
"populate_full_user_id_user_filters",
progress,
)
return 50
@cached(num_args=2)
async def get_user_filter(
self, user_localpart: str, filter_id: Union[int, str]

@ -15,9 +15,14 @@ from typing import TYPE_CHECKING, Optional
from synapse.api.errors import StoreError
from synapse.storage._base import SQLBaseStore
from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
from synapse.storage.database import (
DatabasePool,
LoggingDatabaseConnection,
LoggingTransaction,
)
from synapse.storage.databases.main.roommember import ProfileInfo
from synapse.types import UserID
from synapse.storage.engines import PostgresEngine
from synapse.types import JsonDict, UserID
if TYPE_CHECKING:
from synapse.server import HomeServer
@ -31,6 +36,8 @@ class ProfileWorkerStore(SQLBaseStore):
hs: "HomeServer",
):
super().__init__(database, db_conn, hs)
self.server_name: str = hs.hostname
self.database_engine = database.engine
self.db_pool.updates.register_background_index_update(
"profiles_full_user_id_key_idx",
index_name="profiles_full_user_id_key",
@ -39,6 +46,97 @@ class ProfileWorkerStore(SQLBaseStore):
unique=True,
)
self.db_pool.updates.register_background_update_handler(
"populate_full_user_id_profiles", self.populate_full_user_id_profiles
)
async def populate_full_user_id_profiles(
self, progress: JsonDict, batch_size: int
) -> int:
"""
Background update to populate the column `full_user_id` of the table
profiles from entries in the column `user_local_part` of the same table
"""
lower_bound_id = progress.get("lower_bound_id", "")
def _get_last_id(txn: LoggingTransaction) -> Optional[str]:
sql = """
SELECT user_id FROM profiles
WHERE user_id > ?
ORDER BY user_id
LIMIT 1 OFFSET 50
"""
txn.execute(sql, (lower_bound_id,))
res = txn.fetchone()
if res:
upper_bound_id = res[0]
return upper_bound_id
else:
return None
def _process_batch(
txn: LoggingTransaction, lower_bound_id: str, upper_bound_id: str
) -> None:
sql = """
UPDATE profiles
SET full_user_id = '@' || user_id || ?
WHERE ? < user_id AND user_id <= ? AND full_user_id IS NULL
"""
txn.execute(sql, (f":{self.server_name}", lower_bound_id, upper_bound_id))
def _final_batch(txn: LoggingTransaction, lower_bound_id: str) -> None:
sql = """
UPDATE profiles
SET full_user_id = '@' || user_id || ?
WHERE ? < user_id AND full_user_id IS NULL
"""
txn.execute(
sql,
(
f":{self.server_name}",
lower_bound_id,
),
)
if isinstance(self.database_engine, PostgresEngine):
sql = """
ALTER TABLE profiles VALIDATE CONSTRAINT full_user_id_not_null
"""
txn.execute(sql)
upper_bound_id = await self.db_pool.runInteraction(
"populate_full_user_id_profiles", _get_last_id
)
if upper_bound_id is None:
await self.db_pool.runInteraction(
"populate_full_user_id_profiles", _final_batch, lower_bound_id
)
await self.db_pool.updates._end_background_update(
"populate_full_user_id_profiles"
)
return 1
await self.db_pool.runInteraction(
"populate_full_user_id_profiles",
_process_batch,
lower_bound_id,
upper_bound_id,
)
progress["lower_bound_id"] = upper_bound_id
await self.db_pool.runInteraction(
"populate_full_user_id_profiles",
self.db_pool.updates._background_update_progress_txn,
"populate_full_user_id_profiles",
progress,
)
return 50
async def get_profileinfo(self, user_localpart: str) -> ProfileInfo:
try:
profile = await self.db_pool.simple_select_one(

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
SCHEMA_VERSION = 76 # remember to update the list below when updating
SCHEMA_VERSION = 77 # remember to update the list below when updating
"""Represents the expectations made by the codebase about the database schema
This should be incremented whenever the codebase changes its requirements on the
@ -100,13 +100,19 @@ Changes in SCHEMA_VERSION = 75:
Changes in SCHEMA_VERSION = 76:
- Adds a full_user_id column to tables profiles and user_filters.
Changes in SCHEMA_VERSION = 77
- (Postgres) Add NOT VALID CHECK (full_user_id IS NOT NULL) to tables profiles and user_filters
"""
SCHEMA_COMPAT_VERSION = (
# Queries against `event_stream_ordering` columns in membership tables must
# be disambiguated.
74
#
# insertions to the column `full_user_id` of tables profiles and user_filters can no
# longer be null
76
)
"""Limit on how far the synapse codebase can be rolled back without breaking db compat

@ -0,0 +1,16 @@
/* Copyright 2023 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
ALTER TABLE profiles ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID;

@ -0,0 +1,16 @@
/* Copyright 2023 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
ALTER TABLE user_filters ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID;

@ -0,0 +1,16 @@
/* Copyright 2023 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (7703, 'populate_full_user_id_profiles', '{}');

@ -0,0 +1,16 @@
/* Copyright 2023 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (7704, 'populate_full_user_id_user_filters', '{}');

@ -14,6 +14,8 @@
from twisted.test.proto_helpers import MemoryReactor
from synapse.server import HomeServer
from synapse.storage.database import LoggingTransaction
from synapse.storage.engines import PostgresEngine
from synapse.types import UserID
from synapse.util import Clock
@ -69,3 +71,64 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
self.assertIsNone(
self.get_success(self.store.get_profile_avatar_url(self.u_frank.localpart))
)
def test_profiles_bg_migration(self) -> None:
"""
Test background job that copies entries from column user_id to full_user_id, adding
the hostname in the process.
"""
updater = self.hs.get_datastores().main.db_pool.updates
# drop the constraint so we can insert nulls in full_user_id to populate the test
if isinstance(self.store.database_engine, PostgresEngine):
def f(txn: LoggingTransaction) -> None:
txn.execute(
"ALTER TABLE profiles DROP CONSTRAINT full_user_id_not_null"
)
self.get_success(self.store.db_pool.runInteraction("", f))
for i in range(0, 70):
self.get_success(
self.store.db_pool.simple_insert(
"profiles",
{"user_id": f"hello{i:02}"},
)
)
# re-add the constraint so that when it's validated it actually exists
if isinstance(self.store.database_engine, PostgresEngine):
def f(txn: LoggingTransaction) -> None:
txn.execute(
"ALTER TABLE profiles ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID"
)
self.get_success(self.store.db_pool.runInteraction("", f))
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
values={
"update_name": "populate_full_user_id_profiles",
"progress_json": "{}",
},
)
)
self.get_success(
updater.run_background_updates(False),
)
expected_values = []
for i in range(0, 70):
expected_values.append((f"@hello{i:02}:{self.hs.hostname}",))
res = self.get_success(
self.store.db_pool.execute(
"", None, "SELECT full_user_id from profiles ORDER BY full_user_id"
)
)
self.assertEqual(len(res), len(expected_values))
self.assertEqual(res, expected_values)

@ -0,0 +1,94 @@
# Copyright 2023 The Matrix.org Foundation C.I.C
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from twisted.test.proto_helpers import MemoryReactor
from synapse.server import HomeServer
from synapse.storage.database import LoggingTransaction
from synapse.storage.engines import PostgresEngine
from synapse.util import Clock
from tests import unittest
class UserFiltersStoreTestCase(unittest.HomeserverTestCase):
"""
Test background migration that copies entries from column user_id to full_user_id, adding
the hostname in the process.
"""
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.store = hs.get_datastores().main
def test_bg_migration(self) -> None:
updater = self.hs.get_datastores().main.db_pool.updates
# drop the constraint so we can insert nulls in full_user_id to populate the test
if isinstance(self.store.database_engine, PostgresEngine):
def f(txn: LoggingTransaction) -> None:
txn.execute(
"ALTER TABLE user_filters DROP CONSTRAINT full_user_id_not_null"
)
self.get_success(self.store.db_pool.runInteraction("", f))
for i in range(0, 70):
self.get_success(
self.store.db_pool.simple_insert(
"user_filters",
{
"user_id": f"hello{i:02}",
"filter_id": i,
"filter_json": bytearray(i),
},
)
)
# re-add the constraint so that when it's validated it actually exists
if isinstance(self.store.database_engine, PostgresEngine):
def f(txn: LoggingTransaction) -> None:
txn.execute(
"ALTER TABLE user_filters ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID"
)
self.get_success(self.store.db_pool.runInteraction("", f))
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
values={
"update_name": "populate_full_user_id_user_filters",
"progress_json": "{}",
},
)
)
self.get_success(
updater.run_background_updates(False),
)
expected_values = []
for i in range(0, 70):
expected_values.append((f"@hello{i:02}:{self.hs.hostname}",))
res = self.get_success(
self.store.db_pool.execute(
"", None, "SELECT full_user_id from user_filters ORDER BY full_user_id"
)
)
self.assertEqual(len(res), len(expected_values))
self.assertEqual(res, expected_values)
Loading…
Cancel
Save