Fix sqlite locking bugs present on sytest (#1543)

* Fix sqite locking bugs present on sytest

Comments do the explaining.

* Fix deadlock in sqlite mode

Caused by starting a writer whilst within a writer

* Only complain about invalid state deltas for non-overwrite events

* Do not re-process outlier unnecessarily
This commit is contained in:
Kegsay 2020-10-20 11:42:54 +01:00 committed by GitHub
parent 92982a402f
commit eb86e2b336
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 52 additions and 13 deletions

View File

@ -17,6 +17,7 @@
package input
import (
"bytes"
"context"
"fmt"
@ -26,6 +27,7 @@ import (
"github.com/matrix-org/dendrite/roomserver/state"
"github.com/matrix-org/dendrite/roomserver/types"
"github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/util"
"github.com/sirupsen/logrus"
)
@ -44,6 +46,28 @@ func (r *Inputer) processRoomEvent(
headered := input.Event
event := headered.Unwrap()
// if we have already got this event then do not process it again, if the input kind is an outlier.
// Outliers contain no extra information which may warrant a re-processing.
if input.Kind == api.KindOutlier {
evs, err := r.DB.EventsFromIDs(ctx, []string{event.EventID()})
if err == nil && len(evs) == 1 {
// check hash matches if we're on early room versions where the event ID was a random string
idFormat, err := headered.RoomVersion.EventIDFormat()
if err == nil {
switch idFormat {
case gomatrixserverlib.EventIDFormatV1:
if bytes.Equal(event.EventReference().EventSHA256, evs[0].EventReference().EventSHA256) {
util.GetLogger(ctx).WithField("event_id", event.EventID()).Infof("Already processed event; ignoring")
return event.EventID(), nil
}
default:
util.GetLogger(ctx).WithField("event_id", event.EventID()).Infof("Already processed event; ignoring")
return event.EventID(), nil
}
}
}
}
// Check that the event passes authentication checks and work out
// the numeric IDs for the auth events.
isRejected := false

View File

@ -233,7 +233,7 @@ func (u *latestEventsUpdater) latestState() error {
if err != nil {
return fmt.Errorf("roomState.DifferenceBetweenStateSnapshots: %w", err)
}
if len(u.removed) > len(u.added) {
if !u.stateAtEvent.Overwrite && len(u.removed) > len(u.added) {
// This really shouldn't happen.
// TODO: What is ultimately the best way to handle this situation?
logrus.Errorf(

View File

@ -70,16 +70,14 @@ func (u *LatestEventsUpdater) CurrentStateSnapshotNID() types.StateSnapshotNID {
return u.currentStateSnapshotNID
}
// StorePreviousEvents implements types.RoomRecentEventsUpdater
// StorePreviousEvents implements types.RoomRecentEventsUpdater - This must be called from a Writer
func (u *LatestEventsUpdater) StorePreviousEvents(eventNID types.EventNID, previousEventReferences []gomatrixserverlib.EventReference) error {
return u.d.Writer.Do(u.d.DB, u.txn, func(txn *sql.Tx) error {
for _, ref := range previousEventReferences {
if err := u.d.PrevEventsTable.InsertPreviousEvent(u.ctx, txn, ref.EventID, ref.EventSHA256, eventNID); err != nil {
if err := u.d.PrevEventsTable.InsertPreviousEvent(u.ctx, u.txn, ref.EventID, ref.EventSHA256, eventNID); err != nil {
return fmt.Errorf("u.d.PrevEventsTable.InsertPreviousEvent: %w", err)
}
}
return nil
})
}
// IsReferenced implements types.RoomRecentEventsUpdater

View File

@ -492,15 +492,32 @@ func (d *Database) StoreEvent(
if roomInfo == nil && len(prevEvents) > 0 {
return 0, types.StateAtEvent{}, nil, "", fmt.Errorf("expected room %q to exist", event.RoomID())
}
// Create an updater - NB: on sqlite this WILL create a txn as we are directly calling the shared DB form of
// GetLatestEventsForUpdate - not via the SQLiteDatabase form which has `nil` txns. This
// function only does SELECTs though so the created txn (at this point) is just a read txn like
// any other so this is fine. If we ever update GetLatestEventsForUpdate or NewLatestEventsUpdater
// to do writes however then this will need to go inside `Writer.Do`.
updater, err = d.GetLatestEventsForUpdate(ctx, *roomInfo)
if err != nil {
return 0, types.StateAtEvent{}, nil, "", fmt.Errorf("NewLatestEventsUpdater: %w", err)
}
// Ensure that we atomically store prev events AND commit them. If we don't wrap StorePreviousEvents
// and EndTransaction in a writer then it's possible for a new write txn to be made between the two
// function calls which will then fail with 'database is locked'. This new write txn would HAVE to be
// something like SetRoomAlias/RemoveRoomAlias as normal input events are already done sequentially due to
// SupportsConcurrentRoomInputs() == false on sqlite, though this does not apply to setting room aliases
// as they don't go via InputRoomEvents
err = d.Writer.Do(d.DB, updater.txn, func(txn *sql.Tx) error {
if err = updater.StorePreviousEvents(eventNID, prevEvents); err != nil {
return 0, types.StateAtEvent{}, nil, "", fmt.Errorf("updater.StorePreviousEvents: %w", err)
return fmt.Errorf("updater.StorePreviousEvents: %w", err)
}
succeeded := true
err = sqlutil.EndTransaction(updater, &succeeded)
return err
})
if err != nil {
return 0, types.StateAtEvent{}, nil, "", err
}
}
return roomNID, types.StateAtEvent{