dendrite/federationapi/internal/api.go
Neil Alexander a763cbb0e1
Roomserver/federation input refactor (#2104)
* Put federation client functions into their own file

* Look for missing auth events in RS input

* Remove retrieveMissingAuthEvents from federation API

* Logging

* Sorta transplanted the code over

* Use event origin failing all else

* Don't get stuck on mutexes:

* Add verifier

* Don't mark state events with zero snapshot NID as not existing

* Check missing state if not an outlier before storing the event

* Reject instead of soft-fail, don't copy roominfo so much

* Use synchronous contexts, limit time to fetch missing events

* Clean up some commented out bits

* Simplify `/send` endpoint significantly

* Submit async

* Report errors on sending to RS input

* Set max payload in NATS to 16MB

* Tweak metrics

* Add `workerForRoom` for tidiness

* Try skipping unmarshalling errors for RespMissingEvents

* Track missing prev events separately to avoid calculating state when not possible

* Tweak logic around checking missing state

* Care about state when checking missing prev events

* Don't check missing state for create events

* Try that again

* Handle create events better

* Send create room events as new

* Use given event kind when sending auth/state events

* Revert "Use given event kind when sending auth/state events"

This reverts commit 089d64d271b5fca8c104e1554711187420dbebca.

* Only search for missing prev events or state for new events

* Tweaks

* We only have missing prev if we don't supply state

* Room version tweaks

* Allow async inputs again

* Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed

* Set timeouts on roomserver input tasks (need to decide what timeout makes sense)

* Use work queue policy, deliver all on restart

* Reduce chance of duplicates being sent by NATS

* Limit the number of servers we attempt to reduce backpressure

* Some review comment fixes

* Tidy up a couple things

* Don't limit servers, randomise order using map

* Some context refactoring

* Update gmsl

* Don't resend create events

* Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't

* Exclude our own servername

* Try backing off servers

* Make excluding self behaviour optional

* Exclude self from g_m_e

* Update sytest-whitelist

* Update consumers for the roomserver output stream

* Remember to send outliers for state returned from /gme

* Make full HTTP tests less upsetti

* Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist

* Remove debugging test

* Fix blacklist again, remove unnecessary duplicate context

* Clearer contexts, don't use background in case there's something happening there

* Don't queue up events more than once in memory

* Correctly identify create events when checking for state

* Fill in gaps again in /gme code

* Remove `AuthEventIDs` from `InputRoomEvent`

* Remove stray field

Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00

182 lines
4.9 KiB
Go

package internal
import (
"crypto/ed25519"
"encoding/base64"
"fmt"
"sync"
"time"
"github.com/matrix-org/dendrite/federationapi/api"
"github.com/matrix-org/dendrite/federationapi/queue"
"github.com/matrix-org/dendrite/federationapi/statistics"
"github.com/matrix-org/dendrite/federationapi/storage"
"github.com/matrix-org/dendrite/federationapi/storage/cache"
"github.com/matrix-org/dendrite/internal/caching"
roomserverAPI "github.com/matrix-org/dendrite/roomserver/api"
"github.com/matrix-org/dendrite/setup/config"
"github.com/matrix-org/gomatrix"
"github.com/matrix-org/gomatrixserverlib"
"github.com/sirupsen/logrus"
)
// FederationInternalAPI is an implementation of api.FederationInternalAPI
type FederationInternalAPI struct {
db storage.Database
cfg *config.FederationAPI
statistics *statistics.Statistics
rsAPI roomserverAPI.RoomserverInternalAPI
federation *gomatrixserverlib.FederationClient
keyRing *gomatrixserverlib.KeyRing
queues *queue.OutgoingQueues
joins sync.Map // joins currently in progress
}
func NewFederationInternalAPI(
db storage.Database, cfg *config.FederationAPI,
rsAPI roomserverAPI.RoomserverInternalAPI,
federation *gomatrixserverlib.FederationClient,
statistics *statistics.Statistics,
caches *caching.Caches,
queues *queue.OutgoingQueues,
keyRing *gomatrixserverlib.KeyRing,
) *FederationInternalAPI {
serverKeyDB, err := cache.NewKeyDatabase(db, caches)
if err != nil {
logrus.WithError(err).Panicf("failed to set up caching wrapper for server key database")
}
if keyRing == nil {
keyRing = &gomatrixserverlib.KeyRing{
KeyFetchers: []gomatrixserverlib.KeyFetcher{},
KeyDatabase: serverKeyDB,
}
addDirectFetcher := func() {
keyRing.KeyFetchers = append(
keyRing.KeyFetchers,
&gomatrixserverlib.DirectKeyFetcher{
Client: federation,
},
)
}
if cfg.PreferDirectFetch {
addDirectFetcher()
} else {
defer addDirectFetcher()
}
var b64e = base64.StdEncoding.WithPadding(base64.NoPadding)
for _, ps := range cfg.KeyPerspectives {
perspective := &gomatrixserverlib.PerspectiveKeyFetcher{
PerspectiveServerName: ps.ServerName,
PerspectiveServerKeys: map[gomatrixserverlib.KeyID]ed25519.PublicKey{},
Client: federation,
}
for _, key := range ps.Keys {
rawkey, err := b64e.DecodeString(key.PublicKey)
if err != nil {
logrus.WithError(err).WithFields(logrus.Fields{
"server_name": ps.ServerName,
"public_key": key.PublicKey,
}).Warn("Couldn't parse perspective key")
continue
}
perspective.PerspectiveServerKeys[key.KeyID] = rawkey
}
keyRing.KeyFetchers = append(keyRing.KeyFetchers, perspective)
logrus.WithFields(logrus.Fields{
"server_name": ps.ServerName,
"num_public_keys": len(ps.Keys),
}).Info("Enabled perspective key fetcher")
}
}
return &FederationInternalAPI{
db: db,
cfg: cfg,
rsAPI: rsAPI,
keyRing: keyRing,
federation: federation,
statistics: statistics,
queues: queues,
}
}
func (a *FederationInternalAPI) isBlacklistedOrBackingOff(s gomatrixserverlib.ServerName) (*statistics.ServerStatistics, error) {
stats := a.statistics.ForServer(s)
until, blacklisted := stats.BackoffInfo()
if blacklisted {
return stats, &api.FederationClientError{
Blacklisted: true,
}
}
now := time.Now()
if until != nil && now.Before(*until) {
return stats, &api.FederationClientError{
RetryAfter: time.Until(*until),
}
}
return stats, nil
}
func failBlacklistableError(err error, stats *statistics.ServerStatistics) (until time.Time, blacklisted bool) {
if err == nil {
return
}
mxerr, ok := err.(gomatrix.HTTPError)
if !ok {
return stats.Failure()
}
if mxerr.Code == 401 { // invalid signature in X-Matrix header
return stats.Failure()
}
if mxerr.Code >= 500 && mxerr.Code < 600 { // internal server errors
return stats.Failure()
}
return
}
func (a *FederationInternalAPI) doRequestIfNotBackingOffOrBlacklisted(
s gomatrixserverlib.ServerName, request func() (interface{}, error),
) (interface{}, error) {
stats, err := a.isBlacklistedOrBackingOff(s)
if err != nil {
return nil, err
}
res, err := request()
if err != nil {
until, blacklisted := failBlacklistableError(err, stats)
now := time.Now()
var retryAfter time.Duration
if until.After(now) {
retryAfter = time.Until(until)
}
return res, &api.FederationClientError{
Err: err.Error(),
Blacklisted: blacklisted,
RetryAfter: retryAfter,
}
}
stats.Success()
return res, nil
}
func (a *FederationInternalAPI) doRequestIfNotBlacklisted(
s gomatrixserverlib.ServerName, request func() (interface{}, error),
) (interface{}, error) {
stats := a.statistics.ForServer(s)
if _, blacklisted := stats.BackoffInfo(); blacklisted {
return stats, &api.FederationClientError{
Err: fmt.Sprintf("server %q is blacklisted", s),
Blacklisted: true,
}
}
return request()
}