diff --git a/internal/fulltext/bleve.go b/internal/fulltext/bleve.go index dea7c504..f7412470 100644 --- a/internal/fulltext/bleve.go +++ b/internal/fulltext/bleve.go @@ -18,6 +18,7 @@ package fulltext import ( + "regexp" "strings" "github.com/blevesearch/bleve/v2" @@ -60,6 +61,7 @@ type Indexer interface { Index(elements ...IndexElement) error Delete(eventID string) error Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) + GetHighlights(result *bleve.SearchResult) []string Close() error } @@ -124,6 +126,47 @@ func (f *Search) Delete(eventID string) error { return f.FulltextIndex.Delete(eventID) } +var highlightMatcher = regexp.MustCompile("(.*?)") + +// GetHighlights extracts the highlights from a SearchResult. +func (f *Search) GetHighlights(result *bleve.SearchResult) []string { + if result == nil { + return []string{} + } + + seenMatches := make(map[string]struct{}) + + for _, hit := range result.Hits { + if hit.Fragments == nil { + continue + } + fragments, ok := hit.Fragments["Content"] + if !ok { + continue + } + for _, x := range fragments { + substringMatches := highlightMatcher.FindAllStringSubmatch(x, -1) + for _, matches := range substringMatches { + for i := range matches { + if i == 0 { // skip first match, this is the complete substring match + continue + } + if _, ok := seenMatches[matches[i]]; ok { + continue + } + seenMatches[matches[i]] = struct{}{} + } + } + } + } + + res := make([]string, 0, len(seenMatches)) + for m := range seenMatches { + res = append(res, m) + } + return res +} + // Search searches the index given a search term, roomIDs and keys. func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) { qry := bleve.NewConjunctionQuery() @@ -163,6 +206,10 @@ func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, or s.SortBy([]string{"-StreamPosition"}) } + // Highlight some words + s.Highlight = bleve.NewHighlight() + s.Highlight.Fields = []string{"Content"} + return f.FulltextIndex.Search(s) } diff --git a/internal/fulltext/bleve_test.go b/internal/fulltext/bleve_test.go index bd8289d5..a77c2393 100644 --- a/internal/fulltext/bleve_test.go +++ b/internal/fulltext/bleve_test.go @@ -160,14 +160,16 @@ func TestSearch(t *testing.T) { roomIndex []int } tests := []struct { - name string - args args - wantCount int - wantErr bool + name string + args args + wantCount int + wantErr bool + wantHighlights []string }{ { - name: "Can search for many results in one room", - wantCount: 16, + name: "Can search for many results in one room", + wantCount: 16, + wantHighlights: []string{"lorem"}, args: args{ term: "lorem", roomIndex: []int{0}, @@ -175,8 +177,9 @@ func TestSearch(t *testing.T) { }, }, { - name: "Can search for one result in one room", - wantCount: 1, + name: "Can search for one result in one room", + wantCount: 1, + wantHighlights: []string{"lorem"}, args: args{ term: "lorem", roomIndex: []int{16}, @@ -184,8 +187,9 @@ func TestSearch(t *testing.T) { }, }, { - name: "Can search for many results in multiple rooms", - wantCount: 17, + name: "Can search for many results in multiple rooms", + wantCount: 17, + wantHighlights: []string{"lorem"}, args: args{ term: "lorem", roomIndex: []int{0, 16}, @@ -193,8 +197,9 @@ func TestSearch(t *testing.T) { }, }, { - name: "Can search for many results in all rooms, reversed", - wantCount: 30, + name: "Can search for many results in all rooms, reversed", + wantCount: 30, + wantHighlights: []string{"lorem"}, args: args{ term: "lorem", limit: 30, @@ -202,8 +207,9 @@ func TestSearch(t *testing.T) { }, }, { - name: "Can search for specific search room name", - wantCount: 1, + name: "Can search for specific search room name", + wantCount: 1, + wantHighlights: []string{"testing"}, args: args{ term: "testing", roomIndex: []int{}, @@ -212,8 +218,9 @@ func TestSearch(t *testing.T) { }, }, { - name: "Can search for specific search room topic", - wantCount: 1, + name: "Can search for specific search room topic", + wantCount: 1, + wantHighlights: []string{"fulltext"}, args: args{ term: "fulltext", roomIndex: []int{}, @@ -222,6 +229,7 @@ func TestSearch(t *testing.T) { }, }, } + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { f, ctx := mustOpenIndex(t, "") @@ -238,6 +246,12 @@ func TestSearch(t *testing.T) { t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr) return } + + highlights := f.GetHighlights(got) + if !reflect.DeepEqual(highlights, tt.wantHighlights) { + t.Errorf("Search() got highligts = %v, want %v", highlights, tt.wantHighlights) + } + if !reflect.DeepEqual(len(got.Hits), tt.wantCount) { t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount) } diff --git a/internal/fulltext/bleve_wasm.go b/internal/fulltext/bleve_wasm.go index 0053ed8c..12709900 100644 --- a/internal/fulltext/bleve_wasm.go +++ b/internal/fulltext/bleve_wasm.go @@ -33,6 +33,7 @@ type Indexer interface { Index(elements ...IndexElement) error Delete(eventID string) error Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) + GetHighlights(result SearchResult) []string Close() error } @@ -71,3 +72,7 @@ func (f *Search) Delete(eventID string) error { func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) { return SearchResult{}, nil } + +func (f *Search) GetHighlights(result SearchResult) []string { + return []string{} +} diff --git a/syncapi/routing/search.go b/syncapi/routing/search.go index 13625b9c..69fa5294 100644 --- a/syncapi/routing/search.go +++ b/syncapi/routing/search.go @@ -19,7 +19,6 @@ import ( "net/http" "sort" "strconv" - "strings" "time" "github.com/blevesearch/bleve/v2/search" @@ -123,8 +122,8 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts return util.JSONResponse{ Code: http.StatusOK, JSON: SearchResponse{ - SearchCategories: SearchCategories{ - RoomEvents: RoomEvents{ + SearchCategories: SearchCategoriesResponse{ + RoomEvents: RoomEventsResponse{ Count: int(result.Total), NextBatch: nil, }, @@ -158,7 +157,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts } groups := make(map[string]RoomResult) - knownUsersProfiles := make(map[string]ProfileInfo) + knownUsersProfiles := make(map[string]ProfileInfoResponse) // Sort the events by depth, as the returned values aren't ordered if orderByTime { @@ -180,7 +179,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts return jsonerror.InternalServerError() } - profileInfos := make(map[string]ProfileInfo) + profileInfos := make(map[string]ProfileInfoResponse) for _, ev := range append(eventsBefore, eventsAfter...) { profile, ok := knownUsersProfiles[event.Sender()] if !ok { @@ -192,7 +191,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts if stateEvent == nil { continue } - profile = ProfileInfo{ + profile = ProfileInfoResponse{ AvatarURL: gjson.GetBytes(stateEvent.Content(), "avatar_url").Str, DisplayName: gjson.GetBytes(stateEvent.Content(), "displayname").Str, } @@ -237,13 +236,13 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts } res := SearchResponse{ - SearchCategories: SearchCategories{ - RoomEvents: RoomEvents{ + SearchCategories: SearchCategoriesResponse{ + RoomEvents: RoomEventsResponse{ Count: int(result.Total), Groups: Groups{RoomID: groups}, Results: results, NextBatch: nextBatchResult, - Highlights: strings.Split(searchReq.SearchCategories.RoomEvents.SearchTerm, " "), + Highlights: fts.GetHighlights(result), State: stateForRooms, }, }, @@ -286,30 +285,40 @@ func contextEvents( return eventsBefore, eventsAfter, err } +type EventContext struct { + AfterLimit int `json:"after_limit,omitempty"` + BeforeLimit int `json:"before_limit,omitempty"` + IncludeProfile bool `json:"include_profile,omitempty"` +} + +type GroupBy struct { + Key string `json:"key"` +} + +type Groupings struct { + GroupBy []GroupBy `json:"group_by"` +} + +type RoomEvents struct { + EventContext EventContext `json:"event_context"` + Filter gomatrixserverlib.RoomEventFilter `json:"filter"` + Groupings Groupings `json:"groupings"` + IncludeState bool `json:"include_state"` + Keys []string `json:"keys"` + OrderBy string `json:"order_by"` + SearchTerm string `json:"search_term"` +} + +type SearchCategories struct { + RoomEvents RoomEvents `json:"room_events"` +} + type SearchRequest struct { - SearchCategories struct { - RoomEvents struct { - EventContext struct { - AfterLimit int `json:"after_limit,omitempty"` - BeforeLimit int `json:"before_limit,omitempty"` - IncludeProfile bool `json:"include_profile,omitempty"` - } `json:"event_context"` - Filter gomatrixserverlib.RoomEventFilter `json:"filter"` - Groupings struct { - GroupBy []struct { - Key string `json:"key"` - } `json:"group_by"` - } `json:"groupings"` - IncludeState bool `json:"include_state"` - Keys []string `json:"keys"` - OrderBy string `json:"order_by"` - SearchTerm string `json:"search_term"` - } `json:"room_events"` - } `json:"search_categories"` + SearchCategories SearchCategories `json:"search_categories"` } type SearchResponse struct { - SearchCategories SearchCategories `json:"search_categories"` + SearchCategories SearchCategoriesResponse `json:"search_categories"` } type RoomResult struct { NextBatch *string `json:"next_batch,omitempty"` @@ -332,15 +341,15 @@ type SearchContextResponse struct { EventsAfter []gomatrixserverlib.ClientEvent `json:"events_after"` EventsBefore []gomatrixserverlib.ClientEvent `json:"events_before"` Start string `json:"start"` - ProfileInfo map[string]ProfileInfo `json:"profile_info"` + ProfileInfo map[string]ProfileInfoResponse `json:"profile_info"` } -type ProfileInfo struct { +type ProfileInfoResponse struct { AvatarURL string `json:"avatar_url"` DisplayName string `json:"display_name"` } -type RoomEvents struct { +type RoomEventsResponse struct { Count int `json:"count"` Groups Groups `json:"groups"` Highlights []string `json:"highlights"` @@ -348,6 +357,6 @@ type RoomEvents struct { Results []Result `json:"results"` State map[string][]gomatrixserverlib.ClientEvent `json:"state,omitempty"` } -type SearchCategories struct { - RoomEvents RoomEvents `json:"room_events"` +type SearchCategoriesResponse struct { + RoomEvents RoomEventsResponse `json:"room_events"` } diff --git a/syncapi/routing/search_test.go b/syncapi/routing/search_test.go new file mode 100644 index 00000000..05479300 --- /dev/null +++ b/syncapi/routing/search_test.go @@ -0,0 +1,264 @@ +package routing + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/matrix-org/dendrite/internal/fulltext" + "github.com/matrix-org/dendrite/internal/sqlutil" + "github.com/matrix-org/dendrite/syncapi/storage" + "github.com/matrix-org/dendrite/syncapi/types" + "github.com/matrix-org/dendrite/test" + "github.com/matrix-org/dendrite/test/testrig" + userapi "github.com/matrix-org/dendrite/userapi/api" + "github.com/matrix-org/gomatrixserverlib" + "github.com/stretchr/testify/assert" +) + +func TestSearch(t *testing.T) { + alice := test.NewUser(t) + aliceDevice := userapi.Device{UserID: alice.ID} + room := test.NewRoom(t, alice) + room.CreateAndInsert(t, alice, "m.room.message", map[string]interface{}{"body": "context before"}) + room.CreateAndInsert(t, alice, "m.room.message", map[string]interface{}{"body": "hello world3!"}) + room.CreateAndInsert(t, alice, "m.room.message", map[string]interface{}{"body": "context after"}) + + roomsFilter := []string{room.ID} + roomsFilterUnknown := []string{"!unknown"} + + emptyFromString := "" + fromStringValid := "1" + fromStringInvalid := "iCantBeParsed" + + testCases := []struct { + name string + wantOK bool + searchReq SearchRequest + device *userapi.Device + wantResponseCount int + from *string + }{ + { + name: "no user ID", + searchReq: SearchRequest{}, + device: &userapi.Device{}, + }, + { + name: "with alice ID", + wantOK: true, + searchReq: SearchRequest{}, + device: &aliceDevice, + }, + { + name: "searchTerm specified, found at the beginning", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello"}}, + }, + device: &aliceDevice, + wantResponseCount: 1, + }, + { + name: "searchTerm specified, found at the end", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "world3"}}, + }, + device: &aliceDevice, + wantResponseCount: 1, + }, + /* the following would need matchQuery.SetFuzziness(1) in bleve.go + { + name: "searchTerm fuzzy search", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hell"}}, // this still should find hello world + }, + device: &aliceDevice, + wantResponseCount: 1, + }, + */ + { + name: "searchTerm specified but no result", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "i don't match"}}, + }, + device: &aliceDevice, + }, + { + name: "filter on room", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{ + RoomEvents: RoomEvents{ + SearchTerm: "hello", + Filter: gomatrixserverlib.RoomEventFilter{ + Rooms: &roomsFilter, + }, + }, + }, + }, + device: &aliceDevice, + wantResponseCount: 1, + }, + { + name: "filter on unknown room", + searchReq: SearchRequest{ + SearchCategories: SearchCategories{ + RoomEvents: RoomEvents{ + SearchTerm: "hello", + Filter: gomatrixserverlib.RoomEventFilter{ + Rooms: &roomsFilterUnknown, + }, + }, + }, + }, + device: &aliceDevice, + }, + { + name: "include state", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{ + RoomEvents: RoomEvents{ + SearchTerm: "hello", + Filter: gomatrixserverlib.RoomEventFilter{ + Rooms: &roomsFilter, + }, + IncludeState: true, + }, + }, + }, + device: &aliceDevice, + wantResponseCount: 1, + }, + { + name: "empty from does not error", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{ + RoomEvents: RoomEvents{ + SearchTerm: "hello", + Filter: gomatrixserverlib.RoomEventFilter{ + Rooms: &roomsFilter, + }, + }, + }, + }, + wantResponseCount: 1, + device: &aliceDevice, + from: &emptyFromString, + }, + { + name: "valid from does not error", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{ + RoomEvents: RoomEvents{ + SearchTerm: "hello", + Filter: gomatrixserverlib.RoomEventFilter{ + Rooms: &roomsFilter, + }, + }, + }, + }, + wantResponseCount: 1, + device: &aliceDevice, + from: &fromStringValid, + }, + { + name: "invalid from does error", + searchReq: SearchRequest{ + SearchCategories: SearchCategories{ + RoomEvents: RoomEvents{ + SearchTerm: "hello", + Filter: gomatrixserverlib.RoomEventFilter{ + Rooms: &roomsFilter, + }, + }, + }, + }, + device: &aliceDevice, + from: &fromStringInvalid, + }, + { + name: "order by stream position", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello", OrderBy: "recent"}}, + }, + device: &aliceDevice, + wantResponseCount: 1, + }, + } + + test.WithAllDatabases(t, func(t *testing.T, dbType test.DBType) { + cfg, processCtx, closeDB := testrig.CreateConfig(t, dbType) + defer closeDB() + + // create requisites + fts, err := fulltext.New(processCtx, cfg.SyncAPI.Fulltext) + assert.NoError(t, err) + assert.NotNil(t, fts) + + cm := sqlutil.NewConnectionManager(processCtx, cfg.Global.DatabaseOptions) + db, err := storage.NewSyncServerDatasource(processCtx.Context(), cm, &cfg.SyncAPI.Database) + assert.NoError(t, err) + + elements := []fulltext.IndexElement{} + // store the events in the database + var sp types.StreamPosition + for _, x := range room.Events() { + var stateEvents []*gomatrixserverlib.HeaderedEvent + var stateEventIDs []string + if x.Type() == gomatrixserverlib.MRoomMember { + stateEvents = append(stateEvents, x) + stateEventIDs = append(stateEventIDs, x.EventID()) + } + sp, err = db.WriteEvent(processCtx.Context(), x, stateEvents, stateEventIDs, nil, nil, false, gomatrixserverlib.HistoryVisibilityShared) + assert.NoError(t, err) + if x.Type() != "m.room.message" { + continue + } + elements = append(elements, fulltext.IndexElement{ + EventID: x.EventID(), + RoomID: x.RoomID(), + Content: string(x.Content()), + ContentType: x.Type(), + StreamPosition: int64(sp), + }) + } + // Index the events + err = fts.Index(elements...) + assert.NoError(t, err) + + // run the tests + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + reqBody := &bytes.Buffer{} + err = json.NewEncoder(reqBody).Encode(tc.searchReq) + assert.NoError(t, err) + req := httptest.NewRequest(http.MethodPost, "/", reqBody) + + res := Search(req, tc.device, db, fts, tc.from) + if !tc.wantOK && !res.Is2xx() { + return + } + resp, ok := res.JSON.(SearchResponse) + if !ok && !tc.wantOK { + t.Fatalf("not a SearchResponse: %T: %s", res.JSON, res.JSON) + } + assert.Equal(t, tc.wantResponseCount, resp.SearchCategories.RoomEvents.Count) + + // if we requested state, it should not be empty + if tc.searchReq.SearchCategories.RoomEvents.IncludeState { + assert.NotEmpty(t, resp.SearchCategories.RoomEvents.State) + } + }) + } + }) +}