diff --git a/README.rst b/README.rst index e1fa4d75f..5eebe5b72 100644 --- a/README.rst +++ b/README.rst @@ -96,6 +96,11 @@ Installing prerequisites on Ubuntu or Debian:: $ sudo apt-get install build-essential python2.7-dev libffi-dev \ python-pip python-setuptools sqlite3 \ libssl-dev python-virtualenv libjpeg-dev + +Installing prerequisites on ArchLinux:: + + $ sudo pacman -S base-devel python2 python-pip \ + python-setuptools python-virtualenv sqlite3 Installing prerequisites on Mac OS X:: @@ -148,6 +153,39 @@ failing, e.g.:: On OSX, if you encounter clang: error: unknown argument: '-mno-fused-madd' you will need to export CFLAGS=-Qunused-arguments. +ArchLinux +--------- + +Installation on ArchLinux may encounter a few hiccups as Arch defaults to +python 3, but synapse currently assumes python 2.7 by default. + +pip may be outdated (6.0.7-1 and needs to be upgraded to 6.0.8-1 ):: + + $ sudo pip2.7 install --upgrade pip + +You also may need to explicitly specify python 2.7 again during the install +request:: + + $ pip2.7 install --process-dependency-links \ + https://github.com/matrix-org/synapse/tarball/master + +If you encounter an error with lib bcrypt causing an Wrong ELF Class: +ELFCLASS32 (x64 Systems), you may need to reinstall py-bcrypt to correctly +compile it under the right architecture. (This should not be needed if +installing under virtualenv):: + + $ sudo pip2.7 uninstall py-bcrypt + $ sudo pip2.7 install py-bcrypt + +During setup of homeserver you need to call python2.7 directly again:: + + $ python2.7 -m synapse.app.homeserver \ + --server-name machine.my.domain.name \ + --config-path homeserver.yaml \ + --generate-config + +...substituting your host and domain name as appropriate. + Windows Install --------------- Synapse can be installed on Cygwin. It requires the following Cygwin packages: @@ -207,6 +245,14 @@ fix try re-installing from PyPI or directly from $ # Install from github $ pip install --user https://github.com/pyca/pynacl/tarball/master +ArchLinux +--------- + +If running `$ synctl start` fails wit 'returned non-zero exit status 1', you will need to explicitly call Python2.7 - either running as:: + + $ python2.7 -m synapse.app.homeserver --daemonize -c homeserver.yaml --pid-file homeserver.pid + +...or by editing synctl with the correct python executable. Homeserver Development ====================== diff --git a/scripts/make_identicons.pl b/scripts/make_identicons.pl new file mode 100755 index 000000000..cbff63e29 --- /dev/null +++ b/scripts/make_identicons.pl @@ -0,0 +1,39 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use DBI; +use DBD::SQLite; +use JSON; +use Getopt::Long; + +my $db; # = "homeserver.db"; +my $server = "http://localhost:8008"; +my $size = 320; + +GetOptions("db|d=s", \$db, + "server|s=s", \$server, + "width|w=i", \$size) or usage(); + +usage() unless $db; + +my $dbh = DBI->connect("dbi:SQLite:dbname=$db","","") || die $DBI::errstr; + +my $res = $dbh->selectall_arrayref("select token, name from access_tokens, users where access_tokens.user_id = users.id group by user_id") || die $DBI::errstr; + +foreach (@$res) { + my ($token, $mxid) = ($_->[0], $_->[1]); + my ($user_id) = ($mxid =~ m/@(.*):/); + my ($url) = $dbh->selectrow_array("select avatar_url from profiles where user_id=?", undef, $user_id); + if (!$url || $url =~ /#auto$/) { + `curl -s -o tmp.png "$server/_matrix/media/v1/identicon?name=${mxid}&width=$size&height=$size"`; + my $json = `curl -s -X POST -H "Content-Type: image/png" -T "tmp.png" $server/_matrix/media/v1/upload?access_token=$token`; + my $content_uri = from_json($json)->{content_uri}; + `curl -X PUT -H "Content-Type: application/json" --data '{ "avatar_url": "${content_uri}#auto"}' $server/_matrix/client/api/v1/profile/${mxid}/avatar_url?access_token=$token`; + } +} + +sub usage { + die "usage: ./make-identicons.pl\n\t-d database [e.g. homeserver.db]\n\t-s homeserver (default: http://localhost:8008)\n\t-w identicon size in pixels (default 320)"; +} \ No newline at end of file diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 7105ee21d..005474536 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -89,12 +89,19 @@ class Auth(object): raise @defer.inlineCallbacks - def check_joined_room(self, room_id, user_id): - member = yield self.state.get_current_state( - room_id=room_id, - event_type=EventTypes.Member, - state_key=user_id - ) + def check_joined_room(self, room_id, user_id, current_state=None): + if current_state: + member = current_state.get( + (EventTypes.Member, user_id), + None + ) + else: + member = yield self.state.get_current_state( + room_id=room_id, + event_type=EventTypes.Member, + state_key=user_id + ) + self._check_joined_room(member, user_id, room_id) defer.returnValue(member) @@ -102,7 +109,7 @@ class Auth(object): def check_host_in_room(self, room_id, host): curr_state = yield self.state.get_current_state(room_id) - for event in curr_state: + for event in curr_state.values(): if event.type == EventTypes.Member: try: if UserID.from_string(event.state_key).domain != host: diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 3a128af5f..2b17cae54 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -67,7 +67,7 @@ class SynapseHomeServer(HomeServer): return ClientV2AlphaRestResource(self) def build_resource_for_federation(self): - return JsonResource() + return JsonResource(self) def build_resource_for_web_client(self): syweb_path = os.path.dirname(syweb.__file__) @@ -275,6 +275,7 @@ def setup(): hs.get_pusherpool().start() hs.get_state_handler().start_caching() + hs.get_datastore().start_profiling() if config.daemonize: print config.pid_file diff --git a/synapse/config/_base.py b/synapse/config/_base.py index dfc115d8e..9b0f8c3c3 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -50,8 +50,9 @@ class Config(object): ) return cls.abspath(file_path) - @staticmethod - def ensure_directory(dir_path): + @classmethod + def ensure_directory(cls, dir_path): + dir_path = cls.abspath(dir_path) if not os.path.exists(dir_path): os.makedirs(dir_path) if not os.path.isdir(dir_path): diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 3f51f38f1..3355adefc 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -35,6 +35,7 @@ class MessageHandler(BaseHandler): def __init__(self, hs): super(MessageHandler, self).__init__(hs) self.hs = hs + self.state = hs.get_state_handler() self.clock = hs.get_clock() self.validator = EventValidator() @@ -225,7 +226,9 @@ class MessageHandler(BaseHandler): # TODO: This is duplicating logic from snapshot_all_rooms current_state = yield self.state_handler.get_current_state(room_id) now = self.clock.time_msec() - defer.returnValue([serialize_event(c, now) for c in current_state]) + defer.returnValue( + [serialize_event(c, now) for c in current_state.values()] + ) @defer.inlineCallbacks def snapshot_all_rooms(self, user_id=None, pagin_config=None, @@ -313,7 +316,7 @@ class MessageHandler(BaseHandler): ) d["state"] = [ serialize_event(c, time_now, as_client_event) - for c in current_state + for c in current_state.values() ] except: logger.exception("Failed to get snapshot") @@ -329,7 +332,14 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def room_initial_sync(self, user_id, room_id, pagin_config=None, feedback=False): - yield self.auth.check_joined_room(room_id, user_id) + current_state = yield self.state.get_current_state( + room_id=room_id, + ) + + yield self.auth.check_joined_room( + room_id, user_id, + current_state=current_state + ) # TODO(paul): I wish I was called with user objects not user_id # strings... @@ -337,13 +347,12 @@ class MessageHandler(BaseHandler): # TODO: These concurrently time_now = self.clock.time_msec() - state_tuples = yield self.state_handler.get_current_state(room_id) - state = [serialize_event(x, time_now) for x in state_tuples] + state = [ + serialize_event(x, time_now) + for x in current_state.values() + ] - member_event = (yield self.store.get_room_member( - user_id=user_id, - room_id=room_id - )) + member_event = current_state.get((EventTypes.Member, user_id,)) now_token = yield self.hs.get_event_sources().get_current_token() @@ -360,7 +369,10 @@ class MessageHandler(BaseHandler): start_token = now_token.copy_and_replace("room_key", token[0]) end_token = now_token.copy_and_replace("room_key", token[1]) - room_members = yield self.store.get_room_members(room_id) + room_members = [ + m for m in current_state.values() + if m.type == EventTypes.Member + ] presence_handler = self.hs.get_handlers().presence_handler presence = [] diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 66a89c10b..4f06c487b 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -99,6 +99,23 @@ class RegistrationHandler(BaseHandler): raise RegistrationError( 500, "Cannot generate user ID.") + # create a default avatar for the user + # XXX: ideally clients would explicitly specify one, but given they don't + # and we want consistent and pretty identicons for random users, we'll + # do it here. + try: + auth_user = UserID.from_string(user_id) + identicon_resource = self.hs.get_resource_for_media_repository().getChildWithDefault("identicon", None) + upload_resource = self.hs.get_resource_for_media_repository().getChildWithDefault("upload", None) + identicon_bytes = identicon_resource.generate_identicon(user_id, 320, 320) + content_uri = yield upload_resource.create_content( + "image/png", None, identicon_bytes, len(identicon_bytes), auth_user + ) + profile_handler = self.hs.get_handlers().profile_handler + profile_handler.set_avatar_url(auth_user, auth_user, ("%s#auto" % content_uri)) + except NotImplementedError: + pass # make tests pass without messing around creating default avatars + defer.returnValue((user_id, token)) @defer.inlineCallbacks diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 439164ae3..5af90cc5d 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -175,9 +175,10 @@ class SyncHandler(BaseHandler): room_id, sync_config, now_token, ) - current_state_events = yield self.state_handler.get_current_state( + current_state = yield self.state_handler.get_current_state( room_id ) + current_state_events = current_state.values() defer.returnValue(RoomSyncResult( room_id=room_id, @@ -347,9 +348,10 @@ class SyncHandler(BaseHandler): # TODO(mjark): This seems racy since this isn't being passed a # token to indicate what point in the stream this is - current_state_events = yield self.state_handler.get_current_state( + current_state = yield self.state_handler.get_current_state( room_id ) + current_state_events = current_state.values() state_at_previous_sync = yield self.get_state_at_previous_sync( room_id, since_token=since_token @@ -431,6 +433,7 @@ class SyncHandler(BaseHandler): joined = True if joined: - state_delta = yield self.state_handler.get_current_state(room_id) + res = yield self.state_handler.get_current_state(room_id) + state_delta = res.values() defer.returnValue(state_delta) diff --git a/synapse/http/server.py b/synapse/http/server.py index 0f6539e1b..6d084fa33 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -69,9 +69,10 @@ class JsonResource(HttpServer, resource.Resource): _PathEntry = collections.namedtuple("_PathEntry", ["pattern", "callback"]) - def __init__(self): + def __init__(self, hs): resource.Resource.__init__(self) + self.clock = hs.get_clock() self.path_regexs = {} def register_path(self, method, path_pattern, callback): @@ -111,6 +112,7 @@ class JsonResource(HttpServer, resource.Resource): This checks if anyone has registered a callback for that method and path. """ + code = None try: # Just say yes to OPTIONS. if request.method == "OPTIONS": @@ -130,6 +132,13 @@ class JsonResource(HttpServer, resource.Resource): urllib.unquote(u).decode("UTF-8") for u in m.groups() ] + logger.info( + "Received request: %s %s", + request.method, request.path + ) + + start = self.clock.time_msec() + code, response = yield path_entry.callback( request, *args @@ -145,9 +154,11 @@ class JsonResource(HttpServer, resource.Resource): logger.info("%s SynapseError: %s - %s", request, e.code, e.msg) else: logger.exception(e) + + code = e.code self._send_response( request, - e.code, + code, cs_exception(e), response_code_message=e.response_code_message ) @@ -158,6 +169,14 @@ class JsonResource(HttpServer, resource.Resource): 500, {"error": "Internal server error"} ) + finally: + code = str(code) if code else "-" + + end = self.clock.time_msec() + logger.info( + "Processed request: %dms %s %s %s", + end-start, code, request.method, request.path + ) def _send_response(self, request, code, response_json_object, response_code_message=None): diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index 729371529..6f143a5df 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -22,7 +22,6 @@ import synapse.util.async import baserules import logging -import fnmatch import json import re @@ -130,26 +129,38 @@ class Pusher(object): defer.returnValue(Pusher.DEFAULT_ACTIONS) + @staticmethod + def _glob_to_regexp(glob): + r = re.escape(glob) + r = re.sub(r'\\\*', r'.*?', r) + r = re.sub(r'\\\?', r'.', r) + + # handle [abc], [a-z] and [!a-z] style ranges. + r = re.sub(r'\\\[(\\\!|)(.*)\\\]', + lambda x: ('[%s%s]' % (x.group(1) and '^' or '', + re.sub(r'\\\-', '-', x.group(2)))), r) + return r + def _event_fulfills_condition(self, ev, condition, display_name, room_member_count): if condition['kind'] == 'event_match': if 'pattern' not in condition: logger.warn("event_match condition with no pattern") return False - pat = condition['pattern'] - - if pat.strip("*?[]") == pat: - # no special glob characters so we assume the user means - # 'contains this string' rather than 'is this string' - pat = "*%s*" % (pat,) - + # XXX: optimisation: cache our pattern regexps + if condition['key'] == 'content.body': + r = r'\b%s\b' % self._glob_to_regexp(condition['pattern']) + else: + r = r'^%s$' % self._glob_to_regexp(condition['pattern']) val = _value_for_dotted_key(condition['key'], ev) if val is None: return False - return fnmatch.fnmatch(val.upper(), pat.upper()) + return re.search(r, val, flags=re.IGNORECASE) is not None + elif condition['kind'] == 'device': if 'profile_tag' not in condition: return True return condition['profile_tag'] == self.profile_tag + elif condition['kind'] == 'contains_display_name': # This is special because display names can be different # between rooms and so you can't really hard code it in a rule. @@ -159,9 +170,9 @@ class Pusher(object): return False if not display_name: return False - return fnmatch.fnmatch( - ev['content']['body'].upper(), "*%s*" % (display_name.upper(),) - ) + return re.search("\b%s\b" % re.escape(display_name), + ev['content']['body'], flags=re.IGNORECASE) is not None + elif condition['kind'] == 'room_member_count': if 'is' not in condition: return False diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py index 8d4b806da..37878f1e0 100644 --- a/synapse/push/baserules.py +++ b/synapse/push/baserules.py @@ -4,24 +4,24 @@ def list_with_base_rules(rawrules, user_name): ruleslist = [] # shove the server default rules for each kind onto the end of each - current_prio_class = 1 + current_prio_class = PRIORITY_CLASS_INVERSE_MAP.keys()[-1] for r in rawrules: - if r['priority_class'] > current_prio_class: - while current_prio_class < r['priority_class']: + if r['priority_class'] < current_prio_class: + while r['priority_class'] < current_prio_class: ruleslist.extend(make_base_rules( user_name, PRIORITY_CLASS_INVERSE_MAP[current_prio_class]) ) - current_prio_class += 1 + current_prio_class -= 1 ruleslist.append(r) - while current_prio_class <= PRIORITY_CLASS_INVERSE_MAP.keys()[-1]: + while current_prio_class > 0: ruleslist.extend(make_base_rules( user_name, PRIORITY_CLASS_INVERSE_MAP[current_prio_class]) ) - current_prio_class += 1 + current_prio_class -= 1 return ruleslist diff --git a/synapse/rest/client/v1/__init__.py b/synapse/rest/client/v1/__init__.py index d8d01cdd1..21876b348 100644 --- a/synapse/rest/client/v1/__init__.py +++ b/synapse/rest/client/v1/__init__.py @@ -25,7 +25,7 @@ class ClientV1RestResource(JsonResource): """A resource for version 1 of the matrix client API.""" def __init__(self, hs): - JsonResource.__init__(self) + JsonResource.__init__(self, hs) self.register_servlets(self, hs) @staticmethod diff --git a/synapse/rest/client/v2_alpha/__init__.py b/synapse/rest/client/v2_alpha/__init__.py index 8f611de3a..bca65f2a6 100644 --- a/synapse/rest/client/v2_alpha/__init__.py +++ b/synapse/rest/client/v2_alpha/__init__.py @@ -25,7 +25,7 @@ class ClientV2AlphaRestResource(JsonResource): """A resource for version 2 alpha of the matrix client API.""" def __init__(self, hs): - JsonResource.__init__(self) + JsonResource.__init__(self, hs) self.register_servlets(self, hs) @staticmethod diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 688e7376a..d44d5f129 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -82,7 +82,7 @@ class BaseMediaResource(Resource): raise SynapseError( 404, "Invalid media id token %r" % (request.postpath,), - Codes.UNKKOWN, + Codes.UNKNOWN, ) @staticmethod diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 61ed90f39..9ca4d884d 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -34,6 +34,7 @@ class MediaRepositoryResource(Resource): => POST /_matrix/media/v1/upload HTTP/1.1 Content-Type: + Content-Length: diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index b939a30e1..5b4278233 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -38,6 +38,35 @@ class UploadResource(BaseMediaResource): def render_OPTIONS(self, request): respond_with_json(request, 200, {}, send_cors=True) return NOT_DONE_YET + + @defer.inlineCallbacks + def create_content(self, media_type, upload_name, content, content_length, auth_user): + media_id = random_string(24) + + fname = self.filepaths.local_media_filepath(media_id) + self._makedirs(fname) + + # This shouldn't block for very long because the content will have + # already been uploaded at this point. + with open(fname, "wb") as f: + f.write(content) + + yield self.store.store_local_media( + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=upload_name, + media_length=content_length, + user_id=auth_user, + ) + media_info = { + "media_type": media_type, + "media_length": content_length, + } + + yield self._generate_local_thumbnails(media_id, media_info) + + defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @defer.inlineCallbacks def _async_render_POST(self, request): @@ -70,32 +99,10 @@ class UploadResource(BaseMediaResource): # disposition = headers.getRawHeaders("Content-Disposition")[0] # TODO(markjh): parse content-dispostion - media_id = random_string(24) - - fname = self.filepaths.local_media_filepath(media_id) - self._makedirs(fname) - - # This shouldn't block for very long because the content will have - # already been uploaded at this point. - with open(fname, "wb") as f: - f.write(request.content.read()) - - yield self.store.store_local_media( - media_id=media_id, - media_type=media_type, - time_now_ms=self.clock.time_msec(), - upload_name=None, - media_length=content_length, - user_id=auth_user, + content_uri = yield self.create_content( + media_type, None, request.content.read(), + content_length, auth_user ) - media_info = { - "media_type": media_type, - "media_length": content_length, - } - - yield self._generate_local_thumbnails(media_id, media_info) - - content_uri = "mxc://%s/%s" % (self.server_name, media_id) respond_with_json( request, 200, {"content_uri": content_uri}, send_cors=True diff --git a/synapse/state.py b/synapse/state.py index 31f503a1e..64c58a393 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -114,7 +114,7 @@ class StateHandler(object): defer.returnValue(state.get((event_type, state_key))) return - defer.returnValue(state.values()) + defer.returnValue(state) @defer.inlineCallbacks def compute_event_context(self, event, old_state=None): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index b350fd61f..5ddd41060 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -85,6 +85,52 @@ class SQLBaseStore(object): self._db_pool = hs.get_db_pool() self._clock = hs.get_clock() + self._previous_txn_total_time = 0 + self._current_txn_total_time = 0 + self._previous_loop_ts = 0 + self._txn_perf_counters = {} + self._previous_txn_perf_counters = {} + + def start_profiling(self): + self._previous_loop_ts = self._clock.time_msec() + + def loop(): + curr = self._current_txn_total_time + prev = self._previous_txn_total_time + self._previous_txn_total_time = curr + + time_now = self._clock.time_msec() + time_then = self._previous_loop_ts + self._previous_loop_ts = time_now + + ratio = (curr - prev)/(time_now - time_then) + + txn_counters = [] + for name, (count, cum_time) in self._txn_perf_counters.items(): + prev_count, prev_time = self._previous_txn_perf_counters.get( + name, (0,0) + ) + txn_counters.append(( + (cum_time - prev_time) / (time_now - time_then), + count - prev_count, + name + )) + + self._previous_txn_perf_counters = dict(self._txn_perf_counters) + + txn_counters.sort(reverse=True) + top_three_counters = ", ".join( + "%s(%d): %.3f%%" % (name, count, 100 * ratio) + for ratio, count, name in txn_counters[:3] + ) + + logger.info( + "Total database time: %.3f%% {%s}", + ratio * 100, top_three_counters + ) + + self._clock.looping_call(loop, 10000) + @defer.inlineCallbacks def runInteraction(self, desc, func, *args, **kwargs): """Wraps the .runInteraction() method on the underlying db_pool.""" @@ -94,7 +140,7 @@ class SQLBaseStore(object): with LoggingContext("runInteraction") as context: current_context.copy_to(context) start = time.time() * 1000 - txn_id = SQLBaseStore._TXN_ID + txn_id = self._TXN_ID # We don't really need these to be unique, so lets stop it from # growing really large. @@ -114,6 +160,14 @@ class SQLBaseStore(object): "[TXN END] {%s} %f", name, end - start ) + + self._current_txn_total_time += end - start + + count, cum_time = self._txn_perf_counters.get(desc, (0,0)) + count += 1 + cum_time += end - start + self._txn_perf_counters[desc] = (count, cum_time) + with PreserveLoggingContext(): result = yield self._db_pool.runInteraction( inner_func, *args, **kwargs