1
0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2025-08-01 17:38:33 +00:00

enh(search): improve issue search

- new sort by relevency option for issue search
- rework bleve fuzzy search to consider each term rather than matching the entire phrase
This commit is contained in:
Shiny Nematoda 2024-11-01 13:06:02 +00:00
parent c8ba330833
commit a265574821
10 changed files with 56 additions and 31 deletions

View file

@ -19,6 +19,15 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
return q
}
// MatchQuery generates a match query for the given phrase, field and analyzer
func MatchQuery(matchTerm, field, analyzer string, fuzziness int) *query.MatchQuery {
q := bleve.NewMatchQuery(matchTerm)
q.FieldVal = field
q.Analyzer = analyzer
q.Fuzziness = fuzziness
return q
}
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
q := bleve.NewMatchPhraseQuery(matchPhrase)

View file

@ -35,13 +35,7 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
})
}
const (
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
// see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311
maxFuzziness = 2
)
const maxBatchSize = 16
// IndexerData an update to the issue indexer
type IndexerData internal.IndexerData
@ -162,16 +156,25 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
var queries []query.Query
if options.Keyword != "" {
fuzziness := 0
if options.IsFuzzyKeyword {
fuzziness = min(maxFuzziness, len(options.Keyword)/fuzzyDenominator)
fuzziness := 1
if kl := len(options.Keyword); kl > 3 {
fuzziness = 2
} else if kl < 2 {
fuzziness = 0
}
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
}...))
} else {
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, 0),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, 0),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, 0),
}...))
}
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
}...))
}
if len(options.RepoIDs) > 0 || options.AllPublic {

View file

@ -78,7 +78,9 @@ func ToSearchOptions(keyword string, opts *issues_model.IssuesOptions) *SearchOp
searchOpt.Paginator = opts.Paginator
switch opts.SortType {
case "", "latest":
case "", "relevance":
searchOpt.SortBy = SortByScore
case "latest":
searchOpt.SortBy = SortByCreatedDesc
case "oldest":
searchOpt.SortBy = SortByCreatedAsc

View file

@ -236,7 +236,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
}
if options.SortBy == "" {
options.SortBy = internal.SortByCreatedAsc
options.SortBy = internal.SortByScore
}
sortBy := []elastic.Sorter{
parseSortBy(options.SortBy),

View file

@ -269,6 +269,7 @@ func IsAvailable(ctx context.Context) bool {
type SearchOptions = internal.SearchOptions
const (
SortByScore = internal.SortByScore
SortByCreatedDesc = internal.SortByCreatedDesc
SortByUpdatedDesc = internal.SortByUpdatedDesc
SortByCommentsDesc = internal.SortByCommentsDesc

View file

@ -127,6 +127,7 @@ func (o *SearchOptions) Copy(edit ...func(options *SearchOptions)) *SearchOption
type SortBy string
const (
SortByScore SortBy = "-_score"
SortByCreatedDesc SortBy = "-created_unix"
SortByUpdatedDesc SortBy = "-updated_unix"
SortByCommentsDesc SortBy = "-comment_count"

View file

@ -208,12 +208,18 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
query.And(inner_meilisearch.NewFilterLte("updated_unix", options.UpdatedBeforeUnix.Value()))
}
if options.SortBy == "" {
options.SortBy = internal.SortByCreatedAsc
}
sortBy := []string{
parseSortBy(options.SortBy),
"id:desc",
var sortBy []string
switch options.SortBy {
// sort by relevancy (no explicit sorting)
case internal.SortByScore:
fallthrough
case "":
sortBy = []string{}
default:
sortBy = []string{
parseSortBy(options.SortBy),
"id:desc",
}
}
skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits)

View file

@ -19,6 +19,10 @@ func NewStringUtils() *StringUtils {
return &stringUtils
}
func (su *StringUtils) Make(arr ...string) []string {
return arr
}
func (su *StringUtils) HasPrefix(s any, prefix string) bool {
switch v := s.(type) {
case string: