Add a way to force federationsender to retry sending transactions (#1077)

* Add a way to force federationsender to retry sending transactions

And use it in P2P mode when we pick up new nodes.

* Linting

* Use atomic bool to stop us blocking on the channel
This commit is contained in:
Kegsay 2020-06-01 18:34:08 +01:00 committed by GitHub
parent a5d822004d
commit cfc137652e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 135 additions and 6 deletions

View File

@ -39,6 +39,7 @@ import (
"github.com/matrix-org/dendrite/roomserver"
"github.com/matrix-org/dendrite/syncapi"
go_http_js_libp2p "github.com/matrix-org/go-http-js-libp2p"
"github.com/matrix-org/gomatrixserverlib"
"github.com/sirupsen/logrus"
@ -204,7 +205,6 @@ func main() {
},
KeyDatabase: fetcher,
}
p2pPublicRoomProvider := NewLibP2PPublicRoomsProvider(node)
rsAPI := roomserver.SetupRoomServerComponent(base, keyRing, federation)
eduInputAPI := eduserver.SetupEDUServerComponent(base, cache.New(), deviceDB)
@ -213,6 +213,7 @@ func main() {
)
fedSenderAPI := federationsender.SetupFederationSenderComponent(base, federation, rsAPI, &keyRing)
rsAPI.SetFederationSenderAPI(fedSenderAPI)
p2pPublicRoomProvider := NewLibP2PPublicRoomsProvider(node, fedSenderAPI)
clientapi.SetupClientAPIComponent(
base, deviceDB, accountDB,

View File

@ -17,23 +17,48 @@
package main
import (
"context"
"github.com/matrix-org/dendrite/federationsender/api"
go_http_js_libp2p "github.com/matrix-org/go-http-js-libp2p"
"github.com/matrix-org/gomatrixserverlib"
)
type libp2pPublicRoomsProvider struct {
node *go_http_js_libp2p.P2pLocalNode
providers []go_http_js_libp2p.PeerInfo
fedSender api.FederationSenderInternalAPI
}
func NewLibP2PPublicRoomsProvider(node *go_http_js_libp2p.P2pLocalNode) *libp2pPublicRoomsProvider {
func NewLibP2PPublicRoomsProvider(node *go_http_js_libp2p.P2pLocalNode, fedSender api.FederationSenderInternalAPI) *libp2pPublicRoomsProvider {
p := &libp2pPublicRoomsProvider{
node: node,
fedSender: fedSender,
}
node.RegisterFoundProviders(p.foundProviders)
return p
}
func (p *libp2pPublicRoomsProvider) foundProviders(peerInfos []go_http_js_libp2p.PeerInfo) {
// work out the diff then poke for new ones
seen := make(map[string]bool, len(p.providers))
for _, pr := range p.providers {
seen[pr.Id] = true
}
var newPeers []gomatrixserverlib.ServerName
for _, pi := range peerInfos {
if !seen[pi.Id] {
newPeers = append(newPeers, gomatrixserverlib.ServerName(pi.Id))
}
}
if len(newPeers) > 0 {
var res api.PerformServersAliveResponse
// ignore errors, we don't care.
p.fedSender.PerformServersAlive(context.Background(), &api.PerformServersAliveRequest{
Servers: newPeers,
}, &res)
}
p.providers = peerInfos
}

View File

@ -42,6 +42,12 @@ type FederationSenderInternalAPI interface {
request *PerformLeaveRequest,
response *PerformLeaveResponse,
) error
// Notifies the federation sender that these servers may be online and to retry sending messages.
PerformServersAlive(
ctx context.Context,
request *PerformServersAliveRequest,
response *PerformServersAliveResponse,
) error
}
// NewFederationSenderInternalAPIHTTP creates a FederationSenderInternalAPI implemented by talking to a HTTP POST API.

View File

@ -18,6 +18,9 @@ const (
// FederationSenderPerformLeaveRequestPath is the HTTP path for the PerformLeaveRequest API.
FederationSenderPerformLeaveRequestPath = "/federationsender/performLeaveRequest"
// FederationSenderPerformServersAlivePath is the HTTP path for the PerformServersAlive API.
FederationSenderPerformServersAlivePath = "/federationsender/performServersAlive"
)
type PerformDirectoryLookupRequest struct {
@ -88,3 +91,22 @@ func (h *httpFederationSenderInternalAPI) PerformLeave(
apiURL := h.federationSenderURL + FederationSenderPerformLeaveRequestPath
return internalHTTP.PostJSON(ctx, span, h.httpClient, apiURL, request, response)
}
type PerformServersAliveRequest struct {
Servers []gomatrixserverlib.ServerName
}
type PerformServersAliveResponse struct {
}
func (h *httpFederationSenderInternalAPI) PerformServersAlive(
ctx context.Context,
request *PerformServersAliveRequest,
response *PerformServersAliveResponse,
) error {
span, ctx := opentracing.StartSpanFromContext(ctx, "PerformServersAlive")
defer span.Finish()
apiURL := h.federationSenderURL + FederationSenderPerformServersAlivePath
return internalHTTP.PostJSON(ctx, span, h.httpClient, apiURL, request, response)
}

View File

@ -67,7 +67,7 @@ func SetupFederationSenderComponent(
queryAPI := internal.NewFederationSenderInternalAPI(
federationSenderDB, base.Cfg, roomserverProducer, federation, keyRing,
statistics,
statistics, queues,
)
queryAPI.SetupHTTP(base.InternalAPIMux)

View File

@ -7,6 +7,7 @@ import (
"github.com/gorilla/mux"
"github.com/matrix-org/dendrite/federationsender/api"
"github.com/matrix-org/dendrite/federationsender/producers"
"github.com/matrix-org/dendrite/federationsender/queue"
"github.com/matrix-org/dendrite/federationsender/storage"
"github.com/matrix-org/dendrite/federationsender/types"
"github.com/matrix-org/dendrite/internal"
@ -24,6 +25,7 @@ type FederationSenderInternalAPI struct {
producer *producers.RoomserverProducer
federation *gomatrixserverlib.FederationClient
keyRing *gomatrixserverlib.KeyRing
queues *queue.OutgoingQueues
}
func NewFederationSenderInternalAPI(
@ -32,6 +34,7 @@ func NewFederationSenderInternalAPI(
federation *gomatrixserverlib.FederationClient,
keyRing *gomatrixserverlib.KeyRing,
statistics *types.Statistics,
queues *queue.OutgoingQueues,
) *FederationSenderInternalAPI {
return &FederationSenderInternalAPI{
db: db,
@ -40,6 +43,7 @@ func NewFederationSenderInternalAPI(
federation: federation,
keyRing: keyRing,
statistics: statistics,
queues: queues,
}
}
@ -112,4 +116,17 @@ func (f *FederationSenderInternalAPI) SetupHTTP(internalAPIMux *mux.Router) {
return util.JSONResponse{Code: http.StatusOK, JSON: &response}
}),
)
internalAPIMux.Handle(api.FederationSenderPerformServersAlivePath,
internal.MakeInternalAPI("PerformServersAliveRequest", func(req *http.Request) util.JSONResponse {
var request api.PerformServersAliveRequest
var response api.PerformServersAliveResponse
if err := json.NewDecoder(req.Body).Decode(&request); err != nil {
return util.MessageResponse(http.StatusBadRequest, err.Error())
}
if err := f.PerformServersAlive(req.Context(), &request, &response); err != nil {
return util.ErrorResponse(err)
}
return util.JSONResponse{Code: http.StatusOK, JSON: &response}
}),
)
}

View File

@ -275,3 +275,16 @@ func (r *FederationSenderInternalAPI) PerformLeave(
request.RoomID, len(request.ServerNames),
)
}
// PerformServersAlive implements api.FederationSenderInternalAPI
func (r *FederationSenderInternalAPI) PerformServersAlive(
ctx context.Context,
request *api.PerformServersAliveRequest,
response *api.PerformServersAliveResponse,
) (err error) {
for _, srv := range request.Servers {
r.queues.RetryServer(srv)
}
return nil
}

View File

@ -39,6 +39,7 @@ type destinationQueue struct {
origin gomatrixserverlib.ServerName // origin of requests
destination gomatrixserverlib.ServerName // destination of requests
running atomic.Bool // is the queue worker running?
backingOff atomic.Bool // true if we're backing off
statistics *types.ServerStatistics // statistics about this remote server
incomingPDUs chan *gomatrixserverlib.HeaderedEvent // PDUs to send
incomingEDUs chan *gomatrixserverlib.EDU // EDUs to send
@ -47,6 +48,28 @@ type destinationQueue struct {
pendingPDUs []*gomatrixserverlib.HeaderedEvent // owned by backgroundSend
pendingEDUs []*gomatrixserverlib.EDU // owned by backgroundSend
pendingInvites []*gomatrixserverlib.InviteV2Request // owned by backgroundSend
retryServerCh chan bool // interrupts backoff
}
// retry will clear the blacklist state and attempt to send built up events to the server,
// resetting and interrupting any backoff timers.
func (oq *destinationQueue) retry() {
// TODO: We don't send all events in the case where the server has been blacklisted as we
// drop events instead then. This means we will send the oldest N events (chan size, currently 128)
// and then skip ahead a lot which feels non-ideal but equally we can't persist thousands of events
// in-memory to maybe-send it one day. Ideally we would just shove these pending events in a database
// so we can send a lot of events.
oq.statistics.Success()
// if we were backing off, swap to not backing off and interrupt the select.
// We need to use an atomic bool here to prevent multiple calls to retry() blocking on the channel
// as it is unbuffered.
if oq.backingOff.CAS(true, false) {
oq.retryServerCh <- true
}
if !oq.running.Load() {
log.Infof("Restarting queue for %s", oq.destination)
go oq.backgroundSend()
}
}
// Send event adds the event to the pending queue for the destination.
@ -155,9 +178,15 @@ func (oq *destinationQueue) backgroundSend() {
}
// If we are backing off this server then wait for the
// backoff duration to complete first.
// backoff duration to complete first, or until explicitly
// told to retry.
if backoff, duration := oq.statistics.BackoffDuration(); backoff {
<-time.After(duration)
oq.backingOff.Store(true)
select {
case <-time.After(duration):
case <-oq.retryServerCh:
}
oq.backingOff.Store(false)
}
// How many things do we have waiting?

View File

@ -52,6 +52,12 @@ func NewOutgoingQueues(
}
}
func (oqs *OutgoingQueues) getQueueIfExists(destination gomatrixserverlib.ServerName) *destinationQueue {
oqs.queuesMutex.Lock()
defer oqs.queuesMutex.Unlock()
return oqs.queues[destination]
}
func (oqs *OutgoingQueues) getQueue(destination gomatrixserverlib.ServerName) *destinationQueue {
oqs.queuesMutex.Lock()
defer oqs.queuesMutex.Unlock()
@ -66,6 +72,7 @@ func (oqs *OutgoingQueues) getQueue(destination gomatrixserverlib.ServerName) *d
incomingPDUs: make(chan *gomatrixserverlib.HeaderedEvent, 128),
incomingEDUs: make(chan *gomatrixserverlib.EDU, 128),
incomingInvites: make(chan *gomatrixserverlib.InviteV2Request, 128),
retryServerCh: make(chan bool),
}
oqs.queues[destination] = oq
}
@ -160,6 +167,15 @@ func (oqs *OutgoingQueues) SendEDU(
return nil
}
// RetryServer attempts to resend events to the given server if we had given up.
func (oqs *OutgoingQueues) RetryServer(srv gomatrixserverlib.ServerName) {
q := oqs.getQueueIfExists(srv)
if q == nil {
return
}
q.retry()
}
// filterAndDedupeDests removes our own server from the list of destinations
// and deduplicates any servers in the list that may appear more than once.
func filterAndDedupeDests(origin gomatrixserverlib.ServerName, destinations []gomatrixserverlib.ServerName) (