From 8e4dc6b4ae2e6b1ac8f62da2ba72deb282fb89b0 Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Sat, 20 Jan 2024 22:26:57 +0100 Subject: [PATCH] Optimize `PrevEventIDs` when getting thousands of backwards extremeties (#3308) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes how many `PrevEventIDs` we send to other servers when backfilling, capped to 100 events. Unsure about how representative this benchmark is.. ``` goos: linux goarch: amd64 pkg: github.com/matrix-org/dendrite/roomserver/api cpu: Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ PrevEventIDs/Original1-8 264.9n ± 5% 237.4n ± 7% -10.36% (p=0.000 n=10) PrevEventIDs/Original10-8 3.101µ ± 4% 1.590µ ± 2% -48.72% (p=0.000 n=10) PrevEventIDs/Original100-8 44.32µ ± 2% 12.80µ ± 4% -71.11% (p=0.000 n=10) PrevEventIDs/Original500-8 263.835µ ± 4% 7.907µ ± 4% -97.00% (p=0.000 n=10) PrevEventIDs/Original1000-8 578.798µ ± 2% 7.620µ ± 2% -98.68% (p=0.000 n=10) PrevEventIDs/Original2000-8 1272.039µ ± 2% 8.241µ ± 9% -99.35% (p=0.000 n=10) geomean 43.81µ 3.659µ -91.65% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ PrevEventIDs/Original1-8 72.00 ± 0% 48.00 ± 0% -33.33% (p=0.000 n=10) PrevEventIDs/Original10-8 1512.0 ± 0% 500.0 ± 0% -66.93% (p=0.000 n=10) PrevEventIDs/Original100-8 11.977Ki ± 0% 7.023Ki ± 0% -41.36% (p=0.000 n=10) PrevEventIDs/Original500-8 67.227Ki ± 0% 7.023Ki ± 0% -89.55% (p=0.000 n=10) PrevEventIDs/Original1000-8 163.227Ki ± 0% 7.023Ki ± 0% -95.70% (p=0.000 n=10) PrevEventIDs/Original2000-8 347.227Ki ± 0% 7.023Ki ± 0% -97.98% (p=0.000 n=10) geomean 12.96Ki 1.954Ki -84.92% │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ PrevEventIDs/Original1-8 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.000 n=10) PrevEventIDs/Original10-8 6.000 ± 0% 2.000 ± 0% -66.67% (p=0.000 n=10) PrevEventIDs/Original100-8 9.000 ± 0% 3.000 ± 0% -66.67% (p=0.000 n=10) PrevEventIDs/Original500-8 12.000 ± 0% 3.000 ± 0% -75.00% (p=0.000 n=10) PrevEventIDs/Original1000-8 14.000 ± 0% 3.000 ± 0% -78.57% (p=0.000 n=10) PrevEventIDs/Original2000-8 16.000 ± 0% 3.000 ± 0% -81.25% (p=0.000 n=10) geomean 8.137 2.335 -71.31% ``` --- federationapi/routing/backfill.go | 6 +++ roomserver/api/perform.go | 43 +++++++++++++--- roomserver/api/perform_test.go | 81 +++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 7 deletions(-) create mode 100644 roomserver/api/perform_test.go diff --git a/federationapi/routing/backfill.go b/federationapi/routing/backfill.go index 75a00726..bc413883 100644 --- a/federationapi/routing/backfill.go +++ b/federationapi/routing/backfill.go @@ -95,6 +95,12 @@ func Backfill( } } + // Enforce a limit of 100 events, as not to hit the DB to hard. + // Synapse has a hard limit of 100 events as well. + if req.Limit > 100 { + req.Limit = 100 + } + // Query the Roomserver. if err = rsAPI.PerformBackfill(httpReq.Context(), &req, &res); err != nil { util.GetLogger(httpReq.Context()).WithError(err).Error("query.PerformBackfill failed") diff --git a/roomserver/api/perform.go b/roomserver/api/perform.go index 2818efaa..9e00da2c 100644 --- a/roomserver/api/perform.go +++ b/roomserver/api/perform.go @@ -8,7 +8,6 @@ import ( "github.com/matrix-org/dendrite/roomserver/types" "github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/gomatrixserverlib/spec" - "github.com/matrix-org/util" ) type PerformCreateRoomRequest struct { @@ -91,14 +90,44 @@ type PerformBackfillRequest struct { VirtualHost spec.ServerName `json:"virtual_host"` } -// PrevEventIDs returns the prev_event IDs of all backwards extremities, de-duplicated in a lexicographically sorted order. +// limitPrevEventIDs is the maximum of eventIDs we +// return when calling PrevEventIDs. +const limitPrevEventIDs = 100 + +// PrevEventIDs returns the prev_event IDs of either 100 backwards extremities or +// len(r.BackwardsExtremities). Limited to 100, due to Synapse/Dendrite stopping after reaching +// this limit. (which sounds sane) func (r *PerformBackfillRequest) PrevEventIDs() []string { - var prevEventIDs []string - for _, pes := range r.BackwardsExtremities { - prevEventIDs = append(prevEventIDs, pes...) + var uniqueIDs map[string]struct{} + + // Create a unique eventID map of either 100 or len(r.BackwardsExtremities). + // 100 since Synapse/Dendrite stops after reaching 100 events. + if len(r.BackwardsExtremities) > limitPrevEventIDs { + uniqueIDs = make(map[string]struct{}, limitPrevEventIDs) + } else { + uniqueIDs = make(map[string]struct{}, len(r.BackwardsExtremities)) } - prevEventIDs = util.UniqueStrings(prevEventIDs) - return prevEventIDs + +outerLoop: + for _, pes := range r.BackwardsExtremities { + for _, evID := range pes { + uniqueIDs[evID] = struct{}{} + // We found enough unique eventIDs. + if len(uniqueIDs) >= limitPrevEventIDs { + break outerLoop + } + } + } + + // map -> []string + result := make([]string, len(uniqueIDs)) + i := 0 + for evID := range uniqueIDs { + result[i] = evID + i++ + } + + return result } // PerformBackfillResponse is a response to PerformBackfill. diff --git a/roomserver/api/perform_test.go b/roomserver/api/perform_test.go new file mode 100644 index 00000000..f26438d3 --- /dev/null +++ b/roomserver/api/perform_test.go @@ -0,0 +1,81 @@ +package api + +import ( + "fmt" + "math/rand" + "testing" + + "github.com/stretchr/testify/assert" +) + +func BenchmarkPrevEventIDs(b *testing.B) { + for _, x := range []int64{1, 10, 100, 500, 1000, 2000} { + benchPrevEventIDs(b, int(x)) + } +} + +func benchPrevEventIDs(b *testing.B, count int) { + bwExtrems := generateBackwardsExtremities(b, count) + backfiller := PerformBackfillRequest{ + BackwardsExtremities: bwExtrems, + } + + b.Run(fmt.Sprintf("Original%d", count), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + prevIDs := backfiller.PrevEventIDs() + _ = prevIDs + } + }) +} + +type testLike interface { + Helper() +} + +const randomIDCharsCount = 10 + +func generateBackwardsExtremities(t testLike, count int) map[string][]string { + t.Helper() + result := make(map[string][]string, count) + for i := 0; i < count; i++ { + eventID := randomEventId(int64(i)) + result[eventID] = []string{ + randomEventId(int64(i + 1)), + randomEventId(int64(i + 2)), + randomEventId(int64(i + 3)), + } + } + return result +} + +const alphanumerics = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + +// randomEventId generates a pseudo-random string of length n. +func randomEventId(src int64) string { + randSrc := rand.NewSource(src) + b := make([]byte, randomIDCharsCount) + for i := range b { + b[i] = alphanumerics[randSrc.Int63()%int64(len(alphanumerics))] + } + return string(b) +} + +func TestPrevEventIDs(t *testing.T) { + // generate 10 backwards extremities + bwExtrems := generateBackwardsExtremities(t, 10) + backfiller := PerformBackfillRequest{ + BackwardsExtremities: bwExtrems, + } + + prevIDs := backfiller.PrevEventIDs() + // Given how "generateBackwardsExtremities" works, this + // generates 12 unique event IDs + assert.Equal(t, 12, len(prevIDs)) + + // generate 200 backwards extremities + backfiller.BackwardsExtremities = generateBackwardsExtremities(t, 200) + prevIDs = backfiller.PrevEventIDs() + // PrevEventIDs returns at max 100 event IDs + assert.Equal(t, 100, len(prevIDs)) +}