From f570916a3eb4088500e966182dea82647a5acac2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 27 Mar 2019 07:40:32 +0000 Subject: [PATCH 1/3] Add parse_row method to replication stream class This will allow individual stream classes to override how a row is parsed. --- synapse/replication/tcp/client.py | 5 +++-- synapse/replication/tcp/protocol.py | 2 +- synapse/replication/tcp/streams/_base.py | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 150975608..206dc3b39 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -105,13 +105,14 @@ class ReplicationClientHandler(object): def on_rdata(self, stream_name, token, rows): """Called to handle a batch of replication data with a given stream token. - By default this just pokes the slave store. Can be overriden in subclasses to + By default this just pokes the slave store. Can be overridden in subclasses to handle more. Args: stream_name (str): name of the replication stream for this batch of rows token (int): stream token for this batch of rows - rows (list): a list of Stream.ROW_TYPE objects. + rows (list): a list of Stream.ROW_TYPE objects as returned by + Stream.parse_row. Returns: Deferred|None diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 02e5bf6cc..9daec2c99 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -605,7 +605,7 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): inbound_rdata_count.labels(stream_name).inc() try: - row = STREAMS_MAP[stream_name].ROW_TYPE(*cmd.row) + row = STREAMS_MAP[stream_name].parse_row(cmd.row) except Exception: logger.exception( "[%s] Failed to parse RDATA: %r %r", diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 18df89dee..25c3a2366 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -115,6 +115,21 @@ class Stream(object): ROW_TYPE = None # The type of the row _LIMITED = True # Whether the update function takes a limit + @classmethod + def parse_row(cls, row): + """Parse a row received over replication + + By default, assumes that the row data is an array object and passes its contents + to the constructor of the ROW_TYPE for this stream. + + Args: + row: row data from the incoming RDATA command, after json decoding + + Returns: + ROW_TYPE object for this stream + """ + return cls.ROW_TYPE(*row) + def __init__(self, hs): # The token from which we last asked for updates self.last_token = self.current_token() From 015b3622ebbea118baebc457227e355913a5702f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 27 Mar 2019 21:58:03 +0000 Subject: [PATCH 2/3] Skip building a ROW_TYPE when building updates We're about to turn it straight into a JSON object anyway so building a ROW_TYPE is a bit pointless, and reduces flexibility in the update_function. --- synapse/replication/tcp/streams/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 25c3a2366..13ab1bee0 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -112,7 +112,7 @@ class Stream(object): time it was called up until the point `advance_current_token` was called. """ NAME = None # The name of the stream - ROW_TYPE = None # The type of the row + ROW_TYPE = None # The type of the row. Used by the default impl of parse_row. _LIMITED = True # Whether the update function takes a limit @classmethod @@ -201,7 +201,7 @@ class Stream(object): from_token, current_token, ) - updates = [(row[0], self.ROW_TYPE(*row[1:])) for row in rows] + updates = [(row[0], row[1:]) for row in rows] # check we didn't get more rows than the limit. # doing it like this allows the update_function to be a generator. From a65763a5d6058dcb953970a566bbe91fc8d43daf Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 27 Mar 2019 22:04:01 +0000 Subject: [PATCH 3/3] changelog --- changelog.d/4954.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/4954.misc diff --git a/changelog.d/4954.misc b/changelog.d/4954.misc new file mode 100644 index 000000000..91f145950 --- /dev/null +++ b/changelog.d/4954.misc @@ -0,0 +1 @@ +Refactor replication row generation/parsing.