From 1b3e398bea8129fa7ae6fe28fd3a395fcd427ad9 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 18 Jun 2021 13:15:52 +0200 Subject: [PATCH] Standardise the module interface (#10062) This PR adds a common configuration section for all modules (see docs). These modules are then loaded at startup by the homeserver. Modules register their hooks and web resources using the new `register_[...]_callbacks` and `register_web_resource` methods of the module API. --- UPGRADE.rst | 17 ++ changelog.d/10062.feature | 1 + changelog.d/10062.removal | 1 + docs/SUMMARY.md | 2 +- docs/modules.md | 258 ++++++++++++++++++ docs/sample_config.yaml | 29 +- docs/spam_checker.md | 4 + synapse/app/_base.py | 9 + synapse/app/generic_worker.py | 4 + synapse/app/homeserver.py | 4 + synapse/config/_base.pyi | 2 + synapse/config/homeserver.py | 5 +- synapse/config/modules.py | 49 ++++ synapse/config/spam_checker.py | 15 -- synapse/events/spamcheck.py | 306 +++++++++++++++------- synapse/handlers/register.py | 2 +- synapse/module_api/__init__.py | 30 ++- synapse/module_api/errors.py | 1 + synapse/server.py | 39 ++- synapse/util/module_loader.py | 35 +-- tests/handlers/test_register.py | 120 ++++++--- tests/handlers/test_user_directory.py | 21 +- tests/rest/media/v1/test_media_storage.py | 3 + 23 files changed, 769 insertions(+), 188 deletions(-) create mode 100644 changelog.d/10062.feature create mode 100644 changelog.d/10062.removal create mode 100644 docs/modules.md create mode 100644 synapse/config/modules.py diff --git a/UPGRADE.rst b/UPGRADE.rst index 9f61aad41..ee8b4fa60 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -85,6 +85,23 @@ for example: wget https://packages.matrix.org/debian/pool/main/m/matrix-synapse-py3/matrix-synapse-py3_1.3.0+stretch1_amd64.deb dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb +Upgrading to v1.37.0 +==================== + +Deprecation of the current spam checker interface +------------------------------------------------- + +The current spam checker interface is deprecated in favour of a new generic modules system. +Authors of spam checker modules can refer to `this documentation `_ +to update their modules. Synapse administrators can refer to `this documentation `_ +to update their configuration once the modules they are using have been updated. + +We plan to remove support for the current spam checker interface in August 2021. + +More module interfaces will be ported over to this new generic system in future versions +of Synapse. + + Upgrading to v1.34.0 ==================== diff --git a/changelog.d/10062.feature b/changelog.d/10062.feature new file mode 100644 index 000000000..97474f030 --- /dev/null +++ b/changelog.d/10062.feature @@ -0,0 +1 @@ +Standardised the module interface. diff --git a/changelog.d/10062.removal b/changelog.d/10062.removal new file mode 100644 index 000000000..7f0cbdae2 --- /dev/null +++ b/changelog.d/10062.removal @@ -0,0 +1 @@ +The current spam checker interface is deprecated in favour of a new generic modules system. See the [upgrade notes](https://github.com/matrix-org/synapse/blob/master/UPGRADE.rst#deprecation-of-the-current-spam-checker-interface) for more information on how to update to the new system. \ No newline at end of file diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 01ef4ff60..98969bdd2 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -35,7 +35,7 @@ - [URL Previews](url_previews.md) - [User Directory](user_directory.md) - [Message Retention Policies](message_retention_policies.md) - - [Pluggable Modules]() + - [Pluggable Modules](modules.md) - [Third Party Rules]() - [Spam Checker](spam_checker.md) - [Presence Router](presence_router_module.md) diff --git a/docs/modules.md b/docs/modules.md new file mode 100644 index 000000000..d64ec4493 --- /dev/null +++ b/docs/modules.md @@ -0,0 +1,258 @@ +# Modules + +Synapse supports extending its functionality by configuring external modules. + +## Using modules + +To use a module on Synapse, add it to the `modules` section of the configuration file: + +```yaml +modules: + - module: my_super_module.MySuperClass + config: + do_thing: true + - module: my_other_super_module.SomeClass + config: {} +``` + +Each module is defined by a path to a Python class as well as a configuration. This +information for a given module should be available in the module's own documentation. + +**Note**: When using third-party modules, you effectively allow someone else to run +custom code on your Synapse homeserver. Server admins are encouraged to verify the +provenance of the modules they use on their homeserver and make sure the modules aren't +running malicious code on their instance. + +Also note that we are currently in the process of migrating module interfaces to this +system. While some interfaces might be compatible with it, others still require +configuring modules in another part of Synapse's configuration file. Currently, only the +spam checker interface is compatible with this new system. + +## Writing a module + +A module is a Python class that uses Synapse's module API to interact with the +homeserver. It can register callbacks that Synapse will call on specific operations, as +well as web resources to attach to Synapse's web server. + +When instantiated, a module is given its parsed configuration as well as an instance of +the `synapse.module_api.ModuleApi` class. The configuration is a dictionary, and is +either the output of the module's `parse_config` static method (see below), or the +configuration associated with the module in Synapse's configuration file. + +See the documentation for the `ModuleApi` class +[here](https://github.com/matrix-org/synapse/blob/master/synapse/module_api/__init__.py). + +### Handling the module's configuration + +A module can implement the following static method: + +```python +@staticmethod +def parse_config(config: dict) -> dict +``` + +This method is given a dictionary resulting from parsing the YAML configuration for the +module. It may modify it (for example by parsing durations expressed as strings (e.g. +"5d") into milliseconds, etc.), and return the modified dictionary. It may also verify +that the configuration is correct, and raise an instance of +`synapse.module_api.errors.ConfigError` if not. + +### Registering a web resource + +Modules can register web resources onto Synapse's web server using the following module +API method: + +```python +def ModuleApi.register_web_resource(path: str, resource: IResource) +``` + +The path is the full absolute path to register the resource at. For example, if you +register a resource for the path `/_synapse/client/my_super_module/say_hello`, Synapse +will serve it at `http(s)://[HS_URL]/_synapse/client/my_super_module/say_hello`. Note +that Synapse does not allow registering resources for several sub-paths in the `/_matrix` +namespace (such as anything under `/_matrix/client` for example). It is strongly +recommended that modules register their web resources under the `/_synapse/client` +namespace. + +The provided resource is a Python class that implements Twisted's [IResource](https://twistedmatrix.com/documents/current/api/twisted.web.resource.IResource.html) +interface (such as [Resource](https://twistedmatrix.com/documents/current/api/twisted.web.resource.Resource.html)). + +Only one resource can be registered for a given path. If several modules attempt to +register a resource for the same path, the module that appears first in Synapse's +configuration file takes priority. + +Modules **must** register their web resources in their `__init__` method. + +### Registering a callback + +Modules can use Synapse's module API to register callbacks. Callbacks are functions that +Synapse will call when performing specific actions. Callbacks must be asynchronous, and +are split in categories. A single module may implement callbacks from multiple categories, +and is under no obligation to implement all callbacks from the categories it registers +callbacks for. + +#### Spam checker callbacks + +To register one of the callbacks described in this section, a module needs to use the +module API's `register_spam_checker_callbacks` method. The callback functions are passed +to `register_spam_checker_callbacks` as keyword arguments, with the callback name as the +argument name and the function as its value. This is demonstrated in the example below. + +The available spam checker callbacks are: + +```python +def check_event_for_spam(event: "synapse.events.EventBase") -> Union[bool, str] +``` + +Called when receiving an event from a client or via federation. The module can return +either a `bool` to indicate whether the event must be rejected because of spam, or a `str` +to indicate the event must be rejected because of spam and to give a rejection reason to +forward to clients. + +```python +def user_may_invite(inviter: str, invitee: str, room_id: str) -> bool +``` + +Called when processing an invitation. The module must return a `bool` indicating whether +the inviter can invite the invitee to the given room. Both inviter and invitee are +represented by their Matrix user ID (i.e. `@alice:example.com`). + +```python +def user_may_create_room(user: str) -> bool +``` + +Called when processing a room creation request. The module must return a `bool` indicating +whether the given user (represented by their Matrix user ID) is allowed to create a room. + +```python +def user_may_create_room_alias(user: str, room_alias: "synapse.types.RoomAlias") -> bool +``` + +Called when trying to associate an alias with an existing room. The module must return a +`bool` indicating whether the given user (represented by their Matrix user ID) is allowed +to set the given alias. + +```python +def user_may_publish_room(user: str, room_id: str) -> bool +``` + +Called when trying to publish a room to the homeserver's public rooms directory. The +module must return a `bool` indicating whether the given user (represented by their +Matrix user ID) is allowed to publish the given room. + +```python +def check_username_for_spam(user_profile: Dict[str, str]) -> bool +``` + +Called when computing search results in the user directory. The module must return a +`bool` indicating whether the given user profile can appear in search results. The profile +is represented as a dictionary with the following keys: + +* `user_id`: The Matrix ID for this user. +* `display_name`: The user's display name. +* `avatar_url`: The `mxc://` URL to the user's avatar. + +The module is given a copy of the original dictionary, so modifying it from within the +module cannot modify a user's profile when included in user directory search results. + +```python +def check_registration_for_spam( + email_threepid: Optional[dict], + username: Optional[str], + request_info: Collection[Tuple[str, str]], + auth_provider_id: Optional[str] = None, +) -> "synapse.spam_checker_api.RegistrationBehaviour" +``` + +Called when registering a new user. The module must return a `RegistrationBehaviour` +indicating whether the registration can go through or must be denied, or whether the user +may be allowed to register but will be shadow banned. + +The arguments passed to this callback are: + +* `email_threepid`: The email address used for registering, if any. +* `username`: The username the user would like to register. Can be `None`, meaning that + Synapse will generate one later. +* `request_info`: A collection of tuples, which first item is a user agent, and which + second item is an IP address. These user agents and IP addresses are the ones that were + used during the registration process. +* `auth_provider_id`: The identifier of the SSO authentication provider, if any. + +```python +def check_media_file_for_spam( + file_wrapper: "synapse.rest.media.v1.media_storage.ReadableFileWrapper", + file_info: "synapse.rest.media.v1._base.FileInfo" +) -> bool +``` + +Called when storing a local or remote file. The module must return a boolean indicating +whether the given file can be stored in the homeserver's media store. + +### Porting an existing module that uses the old interface + +In order to port a module that uses Synapse's old module interface, its author needs to: + +* ensure the module's callbacks are all asynchronous. +* register their callbacks using one or more of the `register_[...]_callbacks` methods + from the `ModuleApi` class in the module's `__init__` method (see [this section](#registering-a-web-resource) + for more info). + +Additionally, if the module is packaged with an additional web resource, the module +should register this resource in its `__init__` method using the `register_web_resource` +method from the `ModuleApi` class (see [this section](#registering-a-web-resource) for +more info). + +The module's author should also update any example in the module's configuration to only +use the new `modules` section in Synapse's configuration file (see [this section](#using-modules) +for more info). + +### Example + +The example below is a module that implements the spam checker callback +`user_may_create_room` to deny room creation to user `@evilguy:example.com`, and registers +a web resource to the path `/_synapse/client/demo/hello` that returns a JSON object. + +```python +import json + +from twisted.web.resource import Resource +from twisted.web.server import Request + +from synapse.module_api import ModuleApi + + +class DemoResource(Resource): + def __init__(self, config): + super(DemoResource, self).__init__() + self.config = config + + def render_GET(self, request: Request): + name = request.args.get(b"name")[0] + request.setHeader(b"Content-Type", b"application/json") + return json.dumps({"hello": name}) + + +class DemoModule: + def __init__(self, config: dict, api: ModuleApi): + self.config = config + self.api = api + + self.api.register_web_resource( + path="/_synapse/client/demo/hello", + resource=DemoResource(self.config), + ) + + self.api.register_spam_checker_callbacks( + user_may_create_room=self.user_may_create_room, + ) + + @staticmethod + def parse_config(config): + return config + + async def user_may_create_room(self, user: str) -> bool: + if user == "@evilguy:example.com": + return False + + return True +``` diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 307f8cd3c..19505c7fd 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -31,6 +31,22 @@ # # [1] https://docs.ansible.com/ansible/latest/reference_appendices/YAMLSyntax.html + +## Modules ## + +# Server admins can expand Synapse's functionality with external modules. +# +# See https://matrix-org.github.io/synapse/develop/modules.html for more +# documentation on how to configure or create custom modules for Synapse. +# +modules: + # - module: my_super_module.MySuperClass + # config: + # do_thing: true + # - module: my_other_super_module.SomeClass + # config: {} + + ## Server ## # The public-facing domain of the server @@ -2491,19 +2507,6 @@ push: #group_unread_count_by_room: false -# Spam checkers are third-party modules that can block specific actions -# of local users, such as creating rooms and registering undesirable -# usernames, as well as remote users by redacting incoming events. -# -spam_checker: - #- module: "my_custom_project.SuperSpamChecker" - # config: - # example_option: 'things' - #- module: "some_other_project.BadEventStopper" - # config: - # example_stop_events_from: ['@bad:example.com'] - - ## Rooms ## # Controls whether locally-created rooms should be end-to-end encrypted by diff --git a/docs/spam_checker.md b/docs/spam_checker.md index 52947f605..c16914e61 100644 --- a/docs/spam_checker.md +++ b/docs/spam_checker.md @@ -1,3 +1,7 @@ +**Note: this page of the Synapse documentation is now deprecated. For up to date +documentation on setting up or writing a spam checker module, please see +[this page](https://matrix-org.github.io/synapse/develop/modules.html).** + # Handling spam in Synapse Synapse has support to customize spam checking behavior. It can plug into a diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 1dde9d717..00ab67e7e 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -35,6 +35,7 @@ from synapse.app import check_bind_error from synapse.app.phone_stats_home import start_phone_stats_home from synapse.config.homeserver import HomeServerConfig from synapse.crypto import context_factory +from synapse.events.spamcheck import load_legacy_spam_checkers from synapse.logging.context import PreserveLoggingContext from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.metrics.jemalloc import setup_jemalloc_stats @@ -330,6 +331,14 @@ async def start(hs: "synapse.server.HomeServer"): # Start the tracer synapse.logging.opentracing.init_tracer(hs) # type: ignore[attr-defined] # noqa + # Instantiate the modules so they can register their web resources to the module API + # before we start the listeners. + module_api = hs.get_module_api() + for module, config in hs.config.modules.loaded_modules: + module(config=config, api=module_api) + + load_legacy_spam_checkers(hs) + # It is now safe to start your Synapse. hs.start_listening() hs.get_datastore().db_pool.start_profiling() diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 57c2fc2e8..8e648c6ee 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -354,6 +354,10 @@ class GenericWorkerServer(HomeServer): if name == "replication": resources[REPLICATION_PREFIX] = ReplicationRestResource(self) + # Attach additional resources registered by modules. + resources.update(self._module_web_resources) + self._module_web_resources_consumed = True + root_resource = create_resource_tree(resources, OptionsResource()) _base.listen_tcp( diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index fb16bceff..f31467bde 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -124,6 +124,10 @@ class SynapseHomeServer(HomeServer): ) resources[path] = resource + # Attach additional resources registered by modules. + resources.update(self._module_web_resources) + self._module_web_resources_consumed = True + # try to find something useful to redirect '/' to if WEB_CLIENT_PREFIX in resources: root_resource = RootOptionsRedirectResource(WEB_CLIENT_PREFIX) diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index 4e7bfa8b3..844ecd470 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -16,6 +16,7 @@ from synapse.config import ( key, logger, metrics, + modules, oidc, password_auth_providers, push, @@ -85,6 +86,7 @@ class RootConfig: thirdpartyrules: third_party_event_rules.ThirdPartyRulesConfig tracer: tracer.TracerConfig redis: redis.RedisConfig + modules: modules.ModulesConfig config_classes: List = ... def __init__(self) -> None: ... diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 5ae0f55bc..1f42a5185 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -1,5 +1,4 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -30,6 +29,7 @@ from .jwt import JWTConfig from .key import KeyConfig from .logger import LoggingConfig from .metrics import MetricsConfig +from .modules import ModulesConfig from .oidc import OIDCConfig from .password_auth_providers import PasswordAuthProviderConfig from .push import PushConfig @@ -56,6 +56,7 @@ from .workers import WorkerConfig class HomeServerConfig(RootConfig): config_classes = [ + ModulesConfig, ServerConfig, TlsConfig, FederationConfig, diff --git a/synapse/config/modules.py b/synapse/config/modules.py new file mode 100644 index 000000000..3209e1c49 --- /dev/null +++ b/synapse/config/modules.py @@ -0,0 +1,49 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Dict, List, Tuple + +from synapse.config._base import Config, ConfigError +from synapse.util.module_loader import load_module + + +class ModulesConfig(Config): + section = "modules" + + def read_config(self, config: dict, **kwargs): + self.loaded_modules: List[Tuple[Any, Dict]] = [] + + configured_modules = config.get("modules") or [] + for i, module in enumerate(configured_modules): + config_path = ("modules", "" % i) + if not isinstance(module, dict): + raise ConfigError("expected a mapping", config_path) + + self.loaded_modules.append(load_module(module, config_path)) + + def generate_config_section(self, **kwargs): + return """ + ## Modules ## + + # Server admins can expand Synapse's functionality with external modules. + # + # See https://matrix-org.github.io/synapse/develop/modules.html for more + # documentation on how to configure or create custom modules for Synapse. + # + modules: + # - module: my_super_module.MySuperClass + # config: + # do_thing: true + # - module: my_other_super_module.SomeClass + # config: {} + """ diff --git a/synapse/config/spam_checker.py b/synapse/config/spam_checker.py index 447ba3303..c24165eb8 100644 --- a/synapse/config/spam_checker.py +++ b/synapse/config/spam_checker.py @@ -42,18 +42,3 @@ class SpamCheckerConfig(Config): self.spam_checkers.append(load_module(spam_checker, config_path)) else: raise ConfigError("spam_checker syntax is incorrect") - - def generate_config_section(self, **kwargs): - return """\ - # Spam checkers are third-party modules that can block specific actions - # of local users, such as creating rooms and registering undesirable - # usernames, as well as remote users by redacting incoming events. - # - spam_checker: - #- module: "my_custom_project.SuperSpamChecker" - # config: - # example_option: 'things' - #- module: "some_other_project.BadEventStopper" - # config: - # example_stop_events_from: ['@bad:example.com'] - """ diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index d5fa19509..45ec96dfc 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -15,7 +15,18 @@ import inspect import logging -from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional, Tuple, Union +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Collection, + Dict, + List, + Optional, + Tuple, + Union, +) from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper @@ -29,20 +40,186 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[ + ["synapse.events.EventBase"], + Awaitable[Union[bool, str]], +] +USER_MAY_INVITE_CALLBACK = Callable[[str, str, str], Awaitable[bool]] +USER_MAY_CREATE_ROOM_CALLBACK = Callable[[str], Awaitable[bool]] +USER_MAY_CREATE_ROOM_ALIAS_CALLBACK = Callable[[str, RoomAlias], Awaitable[bool]] +USER_MAY_PUBLISH_ROOM_CALLBACK = Callable[[str, str], Awaitable[bool]] +CHECK_USERNAME_FOR_SPAM_CALLBACK = Callable[[Dict[str, str]], Awaitable[bool]] +LEGACY_CHECK_REGISTRATION_FOR_SPAM_CALLBACK = Callable[ + [ + Optional[dict], + Optional[str], + Collection[Tuple[str, str]], + ], + Awaitable[RegistrationBehaviour], +] +CHECK_REGISTRATION_FOR_SPAM_CALLBACK = Callable[ + [ + Optional[dict], + Optional[str], + Collection[Tuple[str, str]], + Optional[str], + ], + Awaitable[RegistrationBehaviour], +] +CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK = Callable[ + [ReadableFileWrapper, FileInfo], + Awaitable[bool], +] + + +def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): + """Wrapper that loads spam checkers configured using the old configuration, and + registers the spam checker hooks they implement. + """ + spam_checkers = [] # type: List[Any] + api = hs.get_module_api() + for module, config in hs.config.spam_checkers: + # Older spam checkers don't accept the `api` argument, so we + # try and detect support. + spam_args = inspect.getfullargspec(module) + if "api" in spam_args.args: + spam_checkers.append(module(config=config, api=api)) + else: + spam_checkers.append(module(config=config)) + + # The known spam checker hooks. If a spam checker module implements a method + # which name appears in this set, we'll want to register it. + spam_checker_methods = { + "check_event_for_spam", + "user_may_invite", + "user_may_create_room", + "user_may_create_room_alias", + "user_may_publish_room", + "check_username_for_spam", + "check_registration_for_spam", + "check_media_file_for_spam", + } + + for spam_checker in spam_checkers: + # Methods on legacy spam checkers might not be async, so we wrap them around a + # wrapper that will call maybe_awaitable on the result. + def async_wrapper(f: Optional[Callable]) -> Optional[Callable[..., Awaitable]]: + # f might be None if the callback isn't implemented by the module. In this + # case we don't want to register a callback at all so we return None. + if f is None: + return None + + if f.__name__ == "check_registration_for_spam": + checker_args = inspect.signature(f) + if len(checker_args.parameters) == 3: + # Backwards compatibility; some modules might implement a hook that + # doesn't expect a 4th argument. In this case, wrap it in a function + # that gives it only 3 arguments and drops the auth_provider_id on + # the floor. + def wrapper( + email_threepid: Optional[dict], + username: Optional[str], + request_info: Collection[Tuple[str, str]], + auth_provider_id: Optional[str], + ) -> Union[Awaitable[RegistrationBehaviour], RegistrationBehaviour]: + # We've already made sure f is not None above, but mypy doesn't + # do well across function boundaries so we need to tell it f is + # definitely not None. + assert f is not None + + return f( + email_threepid, + username, + request_info, + ) + + f = wrapper + elif len(checker_args.parameters) != 4: + raise RuntimeError( + "Bad signature for callback check_registration_for_spam", + ) + + def run(*args, **kwargs): + # We've already made sure f is not None above, but mypy doesn't do well + # across function boundaries so we need to tell it f is definitely not + # None. + assert f is not None + + return maybe_awaitable(f(*args, **kwargs)) + + return run + + # Register the hooks through the module API. + hooks = { + hook: async_wrapper(getattr(spam_checker, hook, None)) + for hook in spam_checker_methods + } + + api.register_spam_checker_callbacks(**hooks) + class SpamChecker: - def __init__(self, hs: "synapse.server.HomeServer"): - self.spam_checkers = [] # type: List[Any] - api = hs.get_module_api() - - for module, config in hs.config.spam_checkers: - # Older spam checkers don't accept the `api` argument, so we - # try and detect support. - spam_args = inspect.getfullargspec(module) - if "api" in spam_args.args: - self.spam_checkers.append(module(config=config, api=api)) - else: - self.spam_checkers.append(module(config=config)) + def __init__(self): + self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = [] + self._user_may_invite_callbacks: List[USER_MAY_INVITE_CALLBACK] = [] + self._user_may_create_room_callbacks: List[USER_MAY_CREATE_ROOM_CALLBACK] = [] + self._user_may_create_room_alias_callbacks: List[ + USER_MAY_CREATE_ROOM_ALIAS_CALLBACK + ] = [] + self._user_may_publish_room_callbacks: List[USER_MAY_PUBLISH_ROOM_CALLBACK] = [] + self._check_username_for_spam_callbacks: List[ + CHECK_USERNAME_FOR_SPAM_CALLBACK + ] = [] + self._check_registration_for_spam_callbacks: List[ + CHECK_REGISTRATION_FOR_SPAM_CALLBACK + ] = [] + self._check_media_file_for_spam_callbacks: List[ + CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK + ] = [] + + def register_callbacks( + self, + check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None, + user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None, + user_may_create_room: Optional[USER_MAY_CREATE_ROOM_CALLBACK] = None, + user_may_create_room_alias: Optional[ + USER_MAY_CREATE_ROOM_ALIAS_CALLBACK + ] = None, + user_may_publish_room: Optional[USER_MAY_PUBLISH_ROOM_CALLBACK] = None, + check_username_for_spam: Optional[CHECK_USERNAME_FOR_SPAM_CALLBACK] = None, + check_registration_for_spam: Optional[ + CHECK_REGISTRATION_FOR_SPAM_CALLBACK + ] = None, + check_media_file_for_spam: Optional[CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK] = None, + ): + """Register callbacks from module for each hook.""" + if check_event_for_spam is not None: + self._check_event_for_spam_callbacks.append(check_event_for_spam) + + if user_may_invite is not None: + self._user_may_invite_callbacks.append(user_may_invite) + + if user_may_create_room is not None: + self._user_may_create_room_callbacks.append(user_may_create_room) + + if user_may_create_room_alias is not None: + self._user_may_create_room_alias_callbacks.append( + user_may_create_room_alias, + ) + + if user_may_publish_room is not None: + self._user_may_publish_room_callbacks.append(user_may_publish_room) + + if check_username_for_spam is not None: + self._check_username_for_spam_callbacks.append(check_username_for_spam) + + if check_registration_for_spam is not None: + self._check_registration_for_spam_callbacks.append( + check_registration_for_spam, + ) + + if check_media_file_for_spam is not None: + self._check_media_file_for_spam_callbacks.append(check_media_file_for_spam) async def check_event_for_spam( self, event: "synapse.events.EventBase" @@ -60,9 +237,10 @@ class SpamChecker: True or a string if the event is spammy. If a string is returned it will be used as the error message returned to the user. """ - for spam_checker in self.spam_checkers: - if await maybe_awaitable(spam_checker.check_event_for_spam(event)): - return True + for callback in self._check_event_for_spam_callbacks: + res = await callback(event) # type: Union[bool, str] + if res: + return res return False @@ -81,15 +259,8 @@ class SpamChecker: Returns: True if the user may send an invite, otherwise False """ - for spam_checker in self.spam_checkers: - if ( - await maybe_awaitable( - spam_checker.user_may_invite( - inviter_userid, invitee_userid, room_id - ) - ) - is False - ): + for callback in self._user_may_invite_callbacks: + if await callback(inviter_userid, invitee_userid, room_id) is False: return False return True @@ -105,11 +276,8 @@ class SpamChecker: Returns: True if the user may create a room, otherwise False """ - for spam_checker in self.spam_checkers: - if ( - await maybe_awaitable(spam_checker.user_may_create_room(userid)) - is False - ): + for callback in self._user_may_create_room_callbacks: + if await callback(userid) is False: return False return True @@ -128,13 +296,8 @@ class SpamChecker: Returns: True if the user may create a room alias, otherwise False """ - for spam_checker in self.spam_checkers: - if ( - await maybe_awaitable( - spam_checker.user_may_create_room_alias(userid, room_alias) - ) - is False - ): + for callback in self._user_may_create_room_alias_callbacks: + if await callback(userid, room_alias) is False: return False return True @@ -151,13 +314,8 @@ class SpamChecker: Returns: True if the user may publish the room, otherwise False """ - for spam_checker in self.spam_checkers: - if ( - await maybe_awaitable( - spam_checker.user_may_publish_room(userid, room_id) - ) - is False - ): + for callback in self._user_may_publish_room_callbacks: + if await callback(userid, room_id) is False: return False return True @@ -177,15 +335,11 @@ class SpamChecker: Returns: True if the user is spammy. """ - for spam_checker in self.spam_checkers: - # For backwards compatibility, only run if the method exists on the - # spam checker - checker = getattr(spam_checker, "check_username_for_spam", None) - if checker: - # Make a copy of the user profile object to ensure the spam checker - # cannot modify it. - if await maybe_awaitable(checker(user_profile.copy())): - return True + for callback in self._check_username_for_spam_callbacks: + # Make a copy of the user profile object to ensure the spam checker cannot + # modify it. + if await callback(user_profile.copy()): + return True return False @@ -211,33 +365,13 @@ class SpamChecker: Enum for how the request should be handled """ - for spam_checker in self.spam_checkers: - # For backwards compatibility, only run if the method exists on the - # spam checker - checker = getattr(spam_checker, "check_registration_for_spam", None) - if checker: - # Provide auth_provider_id if the function supports it - checker_args = inspect.signature(checker) - if len(checker_args.parameters) == 4: - d = checker( - email_threepid, - username, - request_info, - auth_provider_id, - ) - elif len(checker_args.parameters) == 3: - d = checker(email_threepid, username, request_info) - else: - logger.error( - "Invalid signature for %s.check_registration_for_spam. Denying registration", - spam_checker.__module__, - ) - return RegistrationBehaviour.DENY - - behaviour = await maybe_awaitable(d) - assert isinstance(behaviour, RegistrationBehaviour) - if behaviour != RegistrationBehaviour.ALLOW: - return behaviour + for callback in self._check_registration_for_spam_callbacks: + behaviour = await ( + callback(email_threepid, username, request_info, auth_provider_id) + ) + assert isinstance(behaviour, RegistrationBehaviour) + if behaviour != RegistrationBehaviour.ALLOW: + return behaviour return RegistrationBehaviour.ALLOW @@ -275,13 +409,9 @@ class SpamChecker: allowed. """ - for spam_checker in self.spam_checkers: - # For backwards compatibility, only run if the method exists on the - # spam checker - checker = getattr(spam_checker, "check_media_file_for_spam", None) - if checker: - spam = await maybe_awaitable(checker(file_wrapper, file_info)) - if spam: - return True + for callback in self._check_media_file_for_spam_callbacks: + spam = await callback(file_wrapper, file_info) + if spam: + return True return False diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 4ceef3fab..ca1ed6a5c 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -195,7 +195,7 @@ class RegistrationHandler(BaseHandler): bind_emails: list of emails to bind to this account. by_admin: True if this registration is being made via the admin api, otherwise False. - user_agent_ips: Tuples of IP addresses and user-agents used + user_agent_ips: Tuples of user-agents and IP addresses used during the registration process. auth_provider_id: The SSO IdP the user used, if any. Returns: diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index cecdc96bf..58b255eb1 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -16,6 +16,7 @@ import logging from typing import TYPE_CHECKING, Any, Generator, Iterable, List, Optional, Tuple from twisted.internet import defer +from twisted.web.resource import IResource from synapse.events import EventBase from synapse.http.client import SimpleHttpClient @@ -42,7 +43,7 @@ class ModuleApi: can register new users etc if necessary. """ - def __init__(self, hs, auth_handler): + def __init__(self, hs: "HomeServer", auth_handler): self._hs = hs self._store = hs.get_datastore() @@ -56,6 +57,33 @@ class ModuleApi: self._http_client = hs.get_simple_http_client() # type: SimpleHttpClient self._public_room_list_manager = PublicRoomListManager(hs) + self._spam_checker = hs.get_spam_checker() + + ################################################################################# + # The following methods should only be called during the module's initialisation. + + @property + def register_spam_checker_callbacks(self): + """Registers callbacks for spam checking capabilities.""" + return self._spam_checker.register_callbacks + + def register_web_resource(self, path: str, resource: IResource): + """Registers a web resource to be served at the given path. + + This function should be called during initialisation of the module. + + If multiple modules register a resource for the same path, the module that + appears the highest in the configuration file takes priority. + + Args: + path: The path to register the resource for. + resource: The resource to attach to this path. + """ + self._hs.register_module_web_resource(path, resource) + + ######################################################################### + # The following methods can be called by the module at any point in time. + @property def http_client(self): """Allows making outbound HTTP requests to remote resources. diff --git a/synapse/module_api/errors.py b/synapse/module_api/errors.py index d24864c54..02bbb0be3 100644 --- a/synapse/module_api/errors.py +++ b/synapse/module_api/errors.py @@ -15,3 +15,4 @@ """Exception types which are exposed as part of the stable module API""" from synapse.api.errors import RedirectException, SynapseError # noqa: F401 +from synapse.config._base import ConfigError # noqa: F401 diff --git a/synapse/server.py b/synapse/server.py index e8dd2fa9f..2c27d2a7e 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -1,6 +1,4 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2017-2018 New Vector Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -39,6 +37,7 @@ import twisted.internet.tcp from twisted.internet import defer from twisted.mail.smtp import sendmail from twisted.web.iweb import IPolicyForHTTPS +from twisted.web.resource import IResource from synapse.api.auth import Auth from synapse.api.filtering import Filtering @@ -258,6 +257,38 @@ class HomeServer(metaclass=abc.ABCMeta): self.datastores = None # type: Optional[Databases] + self._module_web_resources: Dict[str, IResource] = {} + self._module_web_resources_consumed = False + + def register_module_web_resource(self, path: str, resource: IResource): + """Allows a module to register a web resource to be served at the given path. + + If multiple modules register a resource for the same path, the module that + appears the highest in the configuration file takes priority. + + Args: + path: The path to register the resource for. + resource: The resource to attach to this path. + + Raises: + SynapseError(500): A module tried to register a web resource after the HTTP + listeners have been started. + """ + if self._module_web_resources_consumed: + raise RuntimeError( + "Tried to register a web resource from a module after startup", + ) + + # Don't register a resource that's already been registered. + if path not in self._module_web_resources.keys(): + self._module_web_resources[path] = resource + else: + logger.warning( + "Module tried to register a web resource for path %s but another module" + " has already registered a resource for this path.", + path, + ) + def get_instance_id(self) -> str: """A unique ID for this synapse process instance. @@ -646,7 +677,7 @@ class HomeServer(metaclass=abc.ABCMeta): @cache_in_self def get_spam_checker(self) -> SpamChecker: - return SpamChecker(self) + return SpamChecker() @cache_in_self def get_third_party_event_rules(self) -> ThirdPartyEventRules: diff --git a/synapse/util/module_loader.py b/synapse/util/module_loader.py index cbfbd097f..5a638c6e9 100644 --- a/synapse/util/module_loader.py +++ b/synapse/util/module_loader.py @@ -51,21 +51,26 @@ def load_module(provider: dict, config_path: Iterable[str]) -> Tuple[Type, Any]: # Load the module config. If None, pass an empty dictionary instead module_config = provider.get("config") or {} - try: - provider_config = provider_class.parse_config(module_config) - except jsonschema.ValidationError as e: - raise json_error_to_config_error(e, itertools.chain(config_path, ("config",))) - except ConfigError as e: - raise _wrap_config_error( - "Failed to parse config for module %r" % (modulename,), - prefix=itertools.chain(config_path, ("config",)), - e=e, - ) - except Exception as e: - raise ConfigError( - "Failed to parse config for module %r" % (modulename,), - path=itertools.chain(config_path, ("config",)), - ) from e + if hasattr(provider_class, "parse_config"): + try: + provider_config = provider_class.parse_config(module_config) + except jsonschema.ValidationError as e: + raise json_error_to_config_error( + e, itertools.chain(config_path, ("config",)) + ) + except ConfigError as e: + raise _wrap_config_error( + "Failed to parse config for module %r" % (modulename,), + prefix=itertools.chain(config_path, ("config",)), + e=e, + ) + except Exception as e: + raise ConfigError( + "Failed to parse config for module %r" % (modulename,), + path=itertools.chain(config_path, ("config",)), + ) from e + else: + provider_config = module_config return provider_class, provider_config diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index c51763f41..a9fd3036d 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -27,6 +27,58 @@ from tests.utils import mock_getRawHeaders from .. import unittest +class TestSpamChecker: + def __init__(self, config, api): + api.register_spam_checker_callbacks( + check_registration_for_spam=self.check_registration_for_spam, + ) + + @staticmethod + def parse_config(config): + return config + + async def check_registration_for_spam( + self, + email_threepid, + username, + request_info, + auth_provider_id, + ): + pass + + +class DenyAll(TestSpamChecker): + async def check_registration_for_spam( + self, + email_threepid, + username, + request_info, + auth_provider_id, + ): + return RegistrationBehaviour.DENY + + +class BanAll(TestSpamChecker): + async def check_registration_for_spam( + self, + email_threepid, + username, + request_info, + auth_provider_id, + ): + return RegistrationBehaviour.SHADOW_BAN + + +class BanBadIdPUser(TestSpamChecker): + async def check_registration_for_spam( + self, email_threepid, username, request_info, auth_provider_id=None + ): + # Reject any user coming from CAS and whose username contains profanity + if auth_provider_id == "cas" and "flimflob" in username: + return RegistrationBehaviour.DENY + return RegistrationBehaviour.ALLOW + + class RegistrationTestCase(unittest.HomeserverTestCase): """Tests the RegistrationHandler.""" @@ -42,6 +94,11 @@ class RegistrationTestCase(unittest.HomeserverTestCase): hs_config["limit_usage_by_mau"] = True hs = self.setup_test_homeserver(config=hs_config) + + module_api = hs.get_module_api() + for module, config in hs.config.modules.loaded_modules: + module(config=config, api=module_api) + return hs def prepare(self, reactor, clock, hs): @@ -465,34 +522,30 @@ class RegistrationTestCase(unittest.HomeserverTestCase): self.handler.register_user(localpart=invalid_user_id), SynapseError ) + @override_config( + { + "modules": [ + { + "module": TestSpamChecker.__module__ + ".DenyAll", + } + ] + } + ) def test_spam_checker_deny(self): """A spam checker can deny registration, which results in an error.""" - - class DenyAll: - def check_registration_for_spam( - self, email_threepid, username, request_info - ): - return RegistrationBehaviour.DENY - - # Configure a spam checker that denies all users. - spam_checker = self.hs.get_spam_checker() - spam_checker.spam_checkers = [DenyAll()] - self.get_failure(self.handler.register_user(localpart="user"), SynapseError) + @override_config( + { + "modules": [ + { + "module": TestSpamChecker.__module__ + ".BanAll", + } + ] + } + ) def test_spam_checker_shadow_ban(self): """A spam checker can choose to shadow-ban a user, which allows registration to succeed.""" - - class BanAll: - def check_registration_for_spam( - self, email_threepid, username, request_info - ): - return RegistrationBehaviour.SHADOW_BAN - - # Configure a spam checker that denies all users. - spam_checker = self.hs.get_spam_checker() - spam_checker.spam_checkers = [BanAll()] - user_id = self.get_success(self.handler.register_user(localpart="user")) # Get an access token. @@ -512,22 +565,17 @@ class RegistrationTestCase(unittest.HomeserverTestCase): self.assertTrue(requester.shadow_banned) + @override_config( + { + "modules": [ + { + "module": TestSpamChecker.__module__ + ".BanBadIdPUser", + } + ] + } + ) def test_spam_checker_receives_sso_type(self): """Test rejecting registration based on SSO type""" - - class BanBadIdPUser: - def check_registration_for_spam( - self, email_threepid, username, request_info, auth_provider_id=None - ): - # Reject any user coming from CAS and whose username contains profanity - if auth_provider_id == "cas" and "flimflob" in username: - return RegistrationBehaviour.DENY - return RegistrationBehaviour.ALLOW - - # Configure a spam checker that denies a certain user on a specific IdP - spam_checker = self.hs.get_spam_checker() - spam_checker.spam_checkers = [BanBadIdPUser()] - f = self.get_failure( self.handler.register_user(localpart="bobflimflob", auth_provider_id="cas"), SynapseError, diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index daac37abd..549876dc8 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -312,15 +312,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): s = self.get_success(self.handler.search_users(u1, "user2", 10)) self.assertEqual(len(s["results"]), 1) + async def allow_all(user_profile): + # Allow all users. + return False + # Configure a spam checker that does not filter any users. spam_checker = self.hs.get_spam_checker() - - class AllowAll: - async def check_username_for_spam(self, user_profile): - # Allow all users. - return False - - spam_checker.spam_checkers = [AllowAll()] + spam_checker._check_username_for_spam_callbacks = [allow_all] # The results do not change: # We get one search result when searching for user2 by user1. @@ -328,12 +326,11 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual(len(s["results"]), 1) # Configure a spam checker that filters all users. - class BlockAll: - async def check_username_for_spam(self, user_profile): - # All users are spammy. - return True + async def block_all(user_profile): + # All users are spammy. + return True - spam_checker.spam_checkers = [BlockAll()] + spam_checker._check_username_for_spam_callbacks = [block_all] # User1 now gets no search results for any of the other users. s = self.get_success(self.handler.search_users(u1, "user2", 10)) diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 4a213d13d..95e707584 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -27,6 +27,7 @@ from PIL import Image as Image from twisted.internet import defer from twisted.internet.defer import Deferred +from synapse.events.spamcheck import load_legacy_spam_checkers from synapse.logging.context import make_deferred_yieldable from synapse.rest import admin from synapse.rest.client.v1 import login @@ -535,6 +536,8 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase): self.download_resource = self.media_repo.children[b"download"] self.upload_resource = self.media_repo.children[b"upload"] + load_legacy_spam_checkers(hs) + def default_config(self): config = default_config("test")