From 5513f182ccd978866354eaa45effd293b0745207 Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Fri, 19 Aug 2022 10:23:09 +0100 Subject: [PATCH] Enforce device list backoffs (#2653) This ensures that if the device list updater is already backing off a node, we don't try to call processServer again anyway for server just because the server name arrived in the channel. Otherwise we can keep trying to hit a remote server that is offline or not behaving every second and that spams the logs too. --- keyserver/internal/device_list_update.go | 13 ++++++++++--- sytest-whitelist | 2 -- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/keyserver/internal/device_list_update.go b/keyserver/internal/device_list_update.go index 80efbec5..304b67b2 100644 --- a/keyserver/internal/device_list_update.go +++ b/keyserver/internal/device_list_update.go @@ -335,8 +335,9 @@ func (u *DeviceListUpdater) worker(ch chan gomatrixserverlib.ServerName) { retriesMu := &sync.Mutex{} // restarter goroutine which will inject failed servers into ch when it is time go func() { + var serversToRetry []gomatrixserverlib.ServerName for { - var serversToRetry []gomatrixserverlib.ServerName + serversToRetry = serversToRetry[:0] // reuse memory time.Sleep(time.Second) retriesMu.Lock() now := time.Now() @@ -355,11 +356,17 @@ func (u *DeviceListUpdater) worker(ch chan gomatrixserverlib.ServerName) { } }() for serverName := range ch { + retriesMu.Lock() + _, exists := retries[serverName] + retriesMu.Unlock() + if exists { + // Don't retry a server that we're already waiting for. + continue + } waitTime, shouldRetry := u.processServer(serverName) if shouldRetry { retriesMu.Lock() - _, exists := retries[serverName] - if !exists { + if _, exists = retries[serverName]; !exists { retries[serverName] = time.Now().Add(waitTime) } retriesMu.Unlock() diff --git a/sytest-whitelist b/sytest-whitelist index dcffeaff..5c8896b9 100644 --- a/sytest-whitelist +++ b/sytest-whitelist @@ -144,7 +144,6 @@ Server correctly handles incoming m.device_list_update If remote user leaves room, changes device and rejoins we see update in sync If remote user leaves room, changes device and rejoins we see update in /keys/changes If remote user leaves room we no longer receive device updates -If a device list update goes missing, the server resyncs on the next one Server correctly resyncs when client query keys and there is no remote cache Server correctly resyncs when server leaves and rejoins a room Device list doesn't change if remote server is down @@ -633,7 +632,6 @@ Test that rejected pushers are removed. Trying to add push rule with no scope fails with 400 Trying to add push rule with invalid scope fails with 400 Forward extremities remain so even after the next events are populated as outliers -If a device list update goes missing, the server resyncs on the next one uploading self-signing key notifies over federation uploading signed devices gets propagated over federation Device list doesn't change if remote server is down