State events filtering database api (#438)

This PR adds a gomatrixserverlib.Filter parameter to functions handling the syncapi_current_room_state table. It does not implement any filtering logic inside the syncapi IncrementalSync/CompleteSync functions, just the APIs for future use.

Default filters are provided as placeholders in IncrementalSync/CompleteSync, so behaviour should be unchanged (except the default 20 event limit)

SQL table will be changed. You can upgrade an existing database using:

```
ALTER TABLE syncapi_current_room_state  ADD COLUMN IF NOT EXISTS sender text;
UPDATE syncapi_current_room_state SET sender=(event_json::json->>'sender');
ALTER TABLE syncapi_current_room_state ALTER COLUMN sender SET NOT NULL;
ALTER TABLE syncapi_current_room_state  ADD COLUMN IF NOT EXISTS contains_url bool;
UPDATE syncapi_current_room_state SET contains_url=(event_json::json->>'content')::json->>'url' IS NOT NULL;
ALTER TABLE syncapi_current_room_state ALTER COLUMN contains_url SET NOT NULL;
```

Note: This depends on #436 (and includes all its commits). I'm not sure if Github will remove the duplicated commits once #436 is merged.
This commit is contained in:
Thibaut CHARLES 2019-08-07 12:12:09 +02:00 committed by Andrew Morgan
parent 94ea325c93
commit 76e4ebaf78
5 changed files with 134 additions and 21 deletions

View File

@ -44,7 +44,10 @@ func OnIncomingStateRequest(req *http.Request, db *storage.SyncServerDatasource,
// TODO(#287): Auth request and handle the case where the user has left (where // TODO(#287): Auth request and handle the case where the user has left (where
// we should return the state at the poin they left) // we should return the state at the poin they left)
stateEvents, err := db.GetStateEventsForRoom(req.Context(), roomID) stateFilterPart := gomatrixserverlib.DefaultFilterPart()
// TODO: stateFilterPart should not limit the number of state events (or only limits abusive number of events)
stateEvents, err := db.GetStateEventsForRoom(req.Context(), roomID, &stateFilterPart)
if err != nil { if err != nil {
return httputil.LogThenError(req, err) return httputil.LogThenError(req, err)
} }

View File

@ -17,6 +17,7 @@ package storage
import ( import (
"context" "context"
"database/sql" "database/sql"
"encoding/json"
"github.com/lib/pq" "github.com/lib/pq"
"github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/common"
@ -32,6 +33,10 @@ CREATE TABLE IF NOT EXISTS syncapi_current_room_state (
event_id TEXT NOT NULL, event_id TEXT NOT NULL,
-- The state event type e.g 'm.room.member' -- The state event type e.g 'm.room.member'
type TEXT NOT NULL, type TEXT NOT NULL,
-- The 'sender' property of the event.
sender TEXT NOT NULL,
-- true if the event content contains a url key
contains_url BOOL NOT NULL,
-- The state_key value for this state event e.g '' -- The state_key value for this state event e.g ''
state_key TEXT NOT NULL, state_key TEXT NOT NULL,
-- The JSON for the event. Stored as TEXT because this should be valid UTF-8. -- The JSON for the event. Stored as TEXT because this should be valid UTF-8.
@ -46,16 +51,16 @@ CREATE TABLE IF NOT EXISTS syncapi_current_room_state (
CONSTRAINT syncapi_room_state_unique UNIQUE (room_id, type, state_key) CONSTRAINT syncapi_room_state_unique UNIQUE (room_id, type, state_key)
); );
-- for event deletion -- for event deletion
CREATE UNIQUE INDEX IF NOT EXISTS syncapi_event_id_idx ON syncapi_current_room_state(event_id); CREATE UNIQUE INDEX IF NOT EXISTS syncapi_event_id_idx ON syncapi_current_room_state(event_id, room_id, type, sender, contains_url);
-- for querying membership states of users -- for querying membership states of users
CREATE INDEX IF NOT EXISTS syncapi_membership_idx ON syncapi_current_room_state(type, state_key, membership) WHERE membership IS NOT NULL AND membership != 'leave'; CREATE INDEX IF NOT EXISTS syncapi_membership_idx ON syncapi_current_room_state(type, state_key, membership) WHERE membership IS NOT NULL AND membership != 'leave';
` `
const upsertRoomStateSQL = "" + const upsertRoomStateSQL = "" +
"INSERT INTO syncapi_current_room_state (room_id, event_id, type, state_key, event_json, membership, added_at)" + "INSERT INTO syncapi_current_room_state (room_id, event_id, type, sender, contains_url, state_key, event_json, membership, added_at)" +
" VALUES ($1, $2, $3, $4, $5, $6, $7)" + " VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)" +
" ON CONFLICT ON CONSTRAINT syncapi_room_state_unique" + " ON CONFLICT ON CONSTRAINT syncapi_room_state_unique" +
" DO UPDATE SET event_id = $2, event_json = $5, membership = $6, added_at = $7" " DO UPDATE SET event_id = $2, sender=$4, contains_url=$5, event_json = $7, membership = $8, added_at = $9"
const deleteRoomStateByEventIDSQL = "" + const deleteRoomStateByEventIDSQL = "" +
"DELETE FROM syncapi_current_room_state WHERE event_id = $1" "DELETE FROM syncapi_current_room_state WHERE event_id = $1"
@ -64,7 +69,13 @@ const selectRoomIDsWithMembershipSQL = "" +
"SELECT room_id FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = $1 AND membership = $2" "SELECT room_id FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = $1 AND membership = $2"
const selectCurrentStateSQL = "" + const selectCurrentStateSQL = "" +
"SELECT event_json FROM syncapi_current_room_state WHERE room_id = $1" "SELECT event_json FROM syncapi_current_room_state WHERE room_id = $1" +
" AND ( $2::text[] IS NULL OR sender = ANY($2) )" +
" AND ( $3::text[] IS NULL OR NOT(sender = ANY($3)) )" +
" AND ( $4::text[] IS NULL OR type LIKE ANY($4) )" +
" AND ( $5::text[] IS NULL OR NOT(type LIKE ANY($5)) )" +
" AND ( $6::bool IS NULL OR contains_url = $6 )" +
" LIMIT $7"
const selectJoinedUsersSQL = "" + const selectJoinedUsersSQL = "" +
"SELECT room_id, state_key FROM syncapi_current_room_state WHERE type = 'm.room.member' AND membership = 'join'" "SELECT room_id, state_key FROM syncapi_current_room_state WHERE type = 'm.room.member' AND membership = 'join'"
@ -166,9 +177,17 @@ func (s *currentRoomStateStatements) selectRoomIDsWithMembership(
// CurrentState returns all the current state events for the given room. // CurrentState returns all the current state events for the given room.
func (s *currentRoomStateStatements) selectCurrentState( func (s *currentRoomStateStatements) selectCurrentState(
ctx context.Context, txn *sql.Tx, roomID string, ctx context.Context, txn *sql.Tx, roomID string,
stateFilterPart *gomatrixserverlib.FilterPart,
) ([]gomatrixserverlib.Event, error) { ) ([]gomatrixserverlib.Event, error) {
stmt := common.TxStmt(txn, s.selectCurrentStateStmt) stmt := common.TxStmt(txn, s.selectCurrentStateStmt)
rows, err := stmt.QueryContext(ctx, roomID) rows, err := stmt.QueryContext(ctx, roomID,
pq.StringArray(stateFilterPart.Senders),
pq.StringArray(stateFilterPart.NotSenders),
pq.StringArray(filterConvertTypeWildcardToSQL(stateFilterPart.Types)),
pq.StringArray(filterConvertTypeWildcardToSQL(stateFilterPart.NotTypes)),
stateFilterPart.ContainsURL,
stateFilterPart.Limit,
)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -189,12 +208,23 @@ func (s *currentRoomStateStatements) upsertRoomState(
ctx context.Context, txn *sql.Tx, ctx context.Context, txn *sql.Tx,
event gomatrixserverlib.Event, membership *string, addedAt int64, event gomatrixserverlib.Event, membership *string, addedAt int64,
) error { ) error {
// Parse content as JSON and search for an "url" key
containsURL := false
var content map[string]interface{}
if json.Unmarshal(event.Content(), &content) != nil {
// Set containsURL to true if url is present
_, containsURL = content["url"]
}
// upsert state event
stmt := common.TxStmt(txn, s.upsertRoomStateStmt) stmt := common.TxStmt(txn, s.upsertRoomStateStmt)
_, err := stmt.ExecContext( _, err := stmt.ExecContext(
ctx, ctx,
event.RoomID(), event.RoomID(),
event.EventID(), event.EventID(),
event.Type(), event.Type(),
event.Sender(),
containsURL,
*event.StateKey(), *event.StateKey(),
event.JSON(), event.JSON(),
membership, membership,

View File

@ -0,0 +1,36 @@
// Copyright 2017 Thibaut CHARLES
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"strings"
)
// filterConvertWildcardToSQL converts wildcards as defined in
// https://matrix.org/docs/spec/client_server/r0.3.0.html#post-matrix-client-r0-user-userid-filter
// to SQL wildcards that can be used with LIKE()
func filterConvertTypeWildcardToSQL(values []string) []string {
if values == nil {
// Return nil instead of []string{} so IS NULL can work correctly when
// the return value is passed into SQL queries
return nil
}
ret := make([]string, len(values))
for i := range values {
ret[i] = strings.Replace(values[i], "*", "%", -1)
}
return ret
}

View File

@ -17,6 +17,7 @@ package storage
import ( import (
"context" "context"
"database/sql" "database/sql"
"encoding/json"
"sort" "sort"
"github.com/matrix-org/dendrite/roomserver/api" "github.com/matrix-org/dendrite/roomserver/api"
@ -43,6 +44,12 @@ CREATE TABLE IF NOT EXISTS syncapi_output_room_events (
room_id TEXT NOT NULL, room_id TEXT NOT NULL,
-- The JSON for the event. Stored as TEXT because this should be valid UTF-8. -- The JSON for the event. Stored as TEXT because this should be valid UTF-8.
event_json TEXT NOT NULL, event_json TEXT NOT NULL,
-- The event type e.g 'm.room.member'.
type TEXT NOT NULL,
-- The 'sender' property of the event.
sender TEXT NOT NULL,
-- true if the event content contains a url key.
contains_url BOOL NOT NULL,
-- A list of event IDs which represent a delta of added/removed room state. This can be NULL -- A list of event IDs which represent a delta of added/removed room state. This can be NULL
-- if there is no delta. -- if there is no delta.
add_state_ids TEXT[], add_state_ids TEXT[],
@ -56,8 +63,8 @@ CREATE UNIQUE INDEX IF NOT EXISTS syncapi_event_id_idx ON syncapi_output_room_ev
const insertEventSQL = "" + const insertEventSQL = "" +
"INSERT INTO syncapi_output_room_events (" + "INSERT INTO syncapi_output_room_events (" +
" room_id, event_id, event_json, add_state_ids, remove_state_ids, device_id, transaction_id" + "room_id, event_id, event_json, type, sender, contains_url, add_state_ids, remove_state_ids, device_id, transaction_id" +
") VALUES ($1, $2, $3, $4, $5, $6, $7) RETURNING id" ") VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) RETURNING id"
const selectEventsSQL = "" + const selectEventsSQL = "" +
"SELECT id, event_json FROM syncapi_output_room_events WHERE event_id = ANY($1)" "SELECT id, event_json FROM syncapi_output_room_events WHERE event_id = ANY($1)"
@ -75,7 +82,13 @@ const selectStateInRangeSQL = "" +
"SELECT id, event_json, add_state_ids, remove_state_ids" + "SELECT id, event_json, add_state_ids, remove_state_ids" +
" FROM syncapi_output_room_events" + " FROM syncapi_output_room_events" +
" WHERE (id > $1 AND id <= $2) AND (add_state_ids IS NOT NULL OR remove_state_ids IS NOT NULL)" + " WHERE (id > $1 AND id <= $2) AND (add_state_ids IS NOT NULL OR remove_state_ids IS NOT NULL)" +
" ORDER BY id ASC" " AND ( $3::text[] IS NULL OR sender = ANY($3) )" +
" AND ( $4::text[] IS NULL OR NOT(sender = ANY($4)) )" +
" AND ( $5::text[] IS NULL OR type LIKE ANY($5) )" +
" AND ( $6::text[] IS NULL OR NOT(type LIKE ANY($6)) )" +
" AND ( $7::bool IS NULL OR contains_url = $7 )" +
" ORDER BY id ASC" +
" LIMIT $8"
type outputRoomEventsStatements struct { type outputRoomEventsStatements struct {
insertEventStmt *sql.Stmt insertEventStmt *sql.Stmt
@ -113,10 +126,19 @@ func (s *outputRoomEventsStatements) prepare(db *sql.DB) (err error) {
// two positions, only the most recent state is returned. // two positions, only the most recent state is returned.
func (s *outputRoomEventsStatements) selectStateInRange( func (s *outputRoomEventsStatements) selectStateInRange(
ctx context.Context, txn *sql.Tx, oldPos, newPos int64, ctx context.Context, txn *sql.Tx, oldPos, newPos int64,
stateFilterPart *gomatrixserverlib.FilterPart,
) (map[string]map[string]bool, map[string]streamEvent, error) { ) (map[string]map[string]bool, map[string]streamEvent, error) {
stmt := common.TxStmt(txn, s.selectStateInRangeStmt) stmt := common.TxStmt(txn, s.selectStateInRangeStmt)
rows, err := stmt.QueryContext(ctx, oldPos, newPos) rows, err := stmt.QueryContext(
ctx, oldPos, newPos,
pq.StringArray(stateFilterPart.Senders),
pq.StringArray(stateFilterPart.NotSenders),
pq.StringArray(filterConvertTypeWildcardToSQL(stateFilterPart.Types)),
pq.StringArray(filterConvertTypeWildcardToSQL(stateFilterPart.NotTypes)),
stateFilterPart.ContainsURL,
stateFilterPart.Limit,
)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -205,12 +227,23 @@ func (s *outputRoomEventsStatements) insertEvent(
txnID = &transactionID.TransactionID txnID = &transactionID.TransactionID
} }
// Parse content as JSON and search for an "url" key
containsURL := false
var content map[string]interface{}
if json.Unmarshal(event.Content(), &content) != nil {
// Set containsURL to true if url is present
_, containsURL = content["url"]
}
stmt := common.TxStmt(txn, s.insertEventStmt) stmt := common.TxStmt(txn, s.insertEventStmt)
err = stmt.QueryRowContext( err = stmt.QueryRowContext(
ctx, ctx,
event.RoomID(), event.RoomID(),
event.EventID(), event.EventID(),
event.JSON(), event.JSON(),
event.Type(),
event.Sender(),
containsURL,
pq.StringArray(addState), pq.StringArray(addState),
pq.StringArray(removeState), pq.StringArray(removeState),
deviceID, deviceID,

View File

@ -185,10 +185,10 @@ func (d *SyncServerDatasource) GetStateEvent(
// Returns an empty slice if no state events could be found for this room. // Returns an empty slice if no state events could be found for this room.
// Returns an error if there was an issue with the retrieval. // Returns an error if there was an issue with the retrieval.
func (d *SyncServerDatasource) GetStateEventsForRoom( func (d *SyncServerDatasource) GetStateEventsForRoom(
ctx context.Context, roomID string, ctx context.Context, roomID string, stateFilterPart *gomatrixserverlib.FilterPart,
) (stateEvents []gomatrixserverlib.Event, err error) { ) (stateEvents []gomatrixserverlib.Event, err error) {
err = common.WithTransaction(d.db, func(txn *sql.Tx) error { err = common.WithTransaction(d.db, func(txn *sql.Tx) error {
stateEvents, err = d.roomstate.selectCurrentState(ctx, txn, roomID) stateEvents, err = d.roomstate.selectCurrentState(ctx, txn, roomID, stateFilterPart)
return err return err
}) })
return return
@ -245,6 +245,8 @@ func (d *SyncServerDatasource) addPDUDeltaToResponse(
var succeeded bool var succeeded bool
defer common.EndTransaction(txn, &succeeded) defer common.EndTransaction(txn, &succeeded)
stateFilterPart := gomatrixserverlib.DefaultFilterPart() // TODO: use filter provided in request
// Work out which rooms to return in the response. This is done by getting not only the currently // Work out which rooms to return in the response. This is done by getting not only the currently
// joined rooms, but also which rooms have membership transitions for this user between the 2 PDU stream positions. // joined rooms, but also which rooms have membership transitions for this user between the 2 PDU stream positions.
// This works out what the 'state' key should be for each room as well as which membership block // This works out what the 'state' key should be for each room as well as which membership block
@ -252,9 +254,13 @@ func (d *SyncServerDatasource) addPDUDeltaToResponse(
var deltas []stateDelta var deltas []stateDelta
var joinedRoomIDs []string var joinedRoomIDs []string
if !wantFullState { if !wantFullState {
deltas, joinedRoomIDs, err = d.getStateDeltas(ctx, &device, txn, fromPos, toPos, device.UserID) deltas, joinedRoomIDs, err = d.getStateDeltas(
ctx, &device, txn, fromPos, toPos, device.UserID, &stateFilterPart,
)
} else { } else {
deltas, joinedRoomIDs, err = d.getStateDeltasForFullStateSync(ctx, &device, txn, fromPos, toPos, device.UserID) deltas, joinedRoomIDs, err = d.getStateDeltasForFullStateSync(
ctx, &device, txn, fromPos, toPos, device.UserID, &stateFilterPart,
)
} }
if err != nil { if err != nil {
return nil, err return nil, err
@ -404,10 +410,12 @@ func (d *SyncServerDatasource) getResponseWithPDUsForCompleteSync(
return return
} }
stateFilterPart := gomatrixserverlib.DefaultFilterPart() // TODO: use filter provided in request
// Build up a /sync response. Add joined rooms. // Build up a /sync response. Add joined rooms.
for _, roomID := range joinedRoomIDs { for _, roomID := range joinedRoomIDs {
var stateEvents []gomatrixserverlib.Event var stateEvents []gomatrixserverlib.Event
stateEvents, err = d.roomstate.selectCurrentState(ctx, txn, roomID) stateEvents, err = d.roomstate.selectCurrentState(ctx, txn, roomID, &stateFilterPart)
if err != nil { if err != nil {
return return
} }
@ -733,6 +741,7 @@ func (d *SyncServerDatasource) fetchMissingStateEvents(
func (d *SyncServerDatasource) getStateDeltas( func (d *SyncServerDatasource) getStateDeltas(
ctx context.Context, device *authtypes.Device, txn *sql.Tx, ctx context.Context, device *authtypes.Device, txn *sql.Tx,
fromPos, toPos int64, userID string, fromPos, toPos int64, userID string,
stateFilterPart *gomatrixserverlib.FilterPart,
) ([]stateDelta, []string, error) { ) ([]stateDelta, []string, error) {
// Implement membership change algorithm: https://github.com/matrix-org/synapse/blob/v0.19.3/synapse/handlers/sync.py#L821 // Implement membership change algorithm: https://github.com/matrix-org/synapse/blob/v0.19.3/synapse/handlers/sync.py#L821
// - Get membership list changes for this user in this sync response // - Get membership list changes for this user in this sync response
@ -745,7 +754,7 @@ func (d *SyncServerDatasource) getStateDeltas(
var deltas []stateDelta var deltas []stateDelta
// get all the state events ever between these two positions // get all the state events ever between these two positions
stateNeeded, eventMap, err := d.events.selectStateInRange(ctx, txn, fromPos, toPos) stateNeeded, eventMap, err := d.events.selectStateInRange(ctx, txn, fromPos, toPos, stateFilterPart)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -765,7 +774,7 @@ func (d *SyncServerDatasource) getStateDeltas(
if membership == gomatrixserverlib.Join { if membership == gomatrixserverlib.Join {
// send full room state down instead of a delta // send full room state down instead of a delta
var s []streamEvent var s []streamEvent
s, err = d.currentStateStreamEventsForRoom(ctx, txn, roomID) s, err = d.currentStateStreamEventsForRoom(ctx, txn, roomID, stateFilterPart)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -807,6 +816,7 @@ func (d *SyncServerDatasource) getStateDeltas(
func (d *SyncServerDatasource) getStateDeltasForFullStateSync( func (d *SyncServerDatasource) getStateDeltasForFullStateSync(
ctx context.Context, device *authtypes.Device, txn *sql.Tx, ctx context.Context, device *authtypes.Device, txn *sql.Tx,
fromPos, toPos int64, userID string, fromPos, toPos int64, userID string,
stateFilterPart *gomatrixserverlib.FilterPart,
) ([]stateDelta, []string, error) { ) ([]stateDelta, []string, error) {
joinedRoomIDs, err := d.roomstate.selectRoomIDsWithMembership(ctx, txn, userID, gomatrixserverlib.Join) joinedRoomIDs, err := d.roomstate.selectRoomIDsWithMembership(ctx, txn, userID, gomatrixserverlib.Join)
if err != nil { if err != nil {
@ -818,7 +828,7 @@ func (d *SyncServerDatasource) getStateDeltasForFullStateSync(
// Add full states for all joined rooms // Add full states for all joined rooms
for _, joinedRoomID := range joinedRoomIDs { for _, joinedRoomID := range joinedRoomIDs {
s, stateErr := d.currentStateStreamEventsForRoom(ctx, txn, joinedRoomID) s, stateErr := d.currentStateStreamEventsForRoom(ctx, txn, joinedRoomID, stateFilterPart)
if stateErr != nil { if stateErr != nil {
return nil, nil, stateErr return nil, nil, stateErr
} }
@ -830,7 +840,7 @@ func (d *SyncServerDatasource) getStateDeltasForFullStateSync(
} }
// Get all the state events ever between these two positions // Get all the state events ever between these two positions
stateNeeded, eventMap, err := d.events.selectStateInRange(ctx, txn, fromPos, toPos) stateNeeded, eventMap, err := d.events.selectStateInRange(ctx, txn, fromPos, toPos, stateFilterPart)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -861,8 +871,9 @@ func (d *SyncServerDatasource) getStateDeltasForFullStateSync(
func (d *SyncServerDatasource) currentStateStreamEventsForRoom( func (d *SyncServerDatasource) currentStateStreamEventsForRoom(
ctx context.Context, txn *sql.Tx, roomID string, ctx context.Context, txn *sql.Tx, roomID string,
stateFilterPart *gomatrixserverlib.FilterPart,
) ([]streamEvent, error) { ) ([]streamEvent, error) {
allState, err := d.roomstate.selectCurrentState(ctx, txn, roomID) allState, err := d.roomstate.selectCurrentState(ctx, txn, roomID, stateFilterPart)
if err != nil { if err != nil {
return nil, err return nil, err
} }