mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-09-15 18:56:59 +00:00
Optimization of labels handling in issue_search (#4228)
This PR optimizes the SQL query and de-duplicate the labels' ids when generating the query string, on the issue page. <hr/> ### Background Some time ago, BingBot and some other crawlers have been putting my instance on its knees with requests containing a lot of label ids, like this one : ``` [07/Aug/2023:11:28:37 +0200] "GET /Dolibarr/sendrecurringinvoicebymail/issues?q=&type=all&sort=&state=closed&labels=1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c2%2c10%2c2%2c1%2c1%2c10%2c10%2c7%2c6%2c10%2c10%2c3%2c2%2c1%2c5%2c10%2c1%2c6%2c2%2c7%2c3%2c7%2c6%2c10%2c1%2c10%2c1%2c1%2c7%2c7%2c1%2c1%2c1%2c1%2c10%2c10%2c1%2c2%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c2%2c1%2c12%2c6%2c6%2c10&milestone=0&project=-1&poster=0 HTTP/1.1" 499 0 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/103.0.5060.134 Safari/537.36" ``` Since each of the label ids implies a join, it grows exponentially expensive for the database engine (at least on PostgreSQL but SQLite suffers a little too). Thus, this PR proposes two enhancements: * rewrite the database query to use only one squashed condition, * deduplicate the label ids when generating the URL. ### Performance comparison Here are some timings on Postgresql-backed, Forgejo 7.0.4 instances : ```sh $ time curl -s -o /dev/null "http://localhost:3000/toto/tata/issues?q=&type=all&sort=&labels=19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25&state=open&milestone=0&project=0&assignee=0&poster=0" real 0m10,491s user 0m0,017s sys 0m0,008s ``` ...and with the patch: ```sh $ time curl -s -o /dev/null "http://localhost:3000/toto/tata/issues?q=&type=all&sort=&labels=19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25&state=open&milestone=0&project=0&assignee=0&poster=0" real 0m0,094s user 0m0,012s sys 0m0,013s ``` ### Annex This issue was originally proposed to [Gitea](https://github.com/go-gitea/gitea/pull/26460) but didn't get much attention, and I switched to Forgejo in the meantime :) Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/4228 Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Co-authored-by: Chl <chl@xlii.si> Co-committed-by: Chl <chl@xlii.si>
This commit is contained in:
parent
2121a29f89
commit
544cbc6f01
4 changed files with 80 additions and 16 deletions
|
@ -6,6 +6,7 @@ package issues
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/models/db"
|
||||
|
@ -13,6 +14,7 @@ import (
|
|||
repo_model "code.gitea.io/gitea/models/repo"
|
||||
"code.gitea.io/gitea/models/unit"
|
||||
user_model "code.gitea.io/gitea/models/user"
|
||||
"code.gitea.io/gitea/modules/container"
|
||||
"code.gitea.io/gitea/modules/optional"
|
||||
|
||||
"xorm.io/builder"
|
||||
|
@ -116,14 +118,30 @@ func applyLabelsCondition(sess *xorm.Session, opts *IssuesOptions) {
|
|||
if opts.LabelIDs[0] == 0 {
|
||||
sess.Where("issue.id NOT IN (SELECT issue_id FROM issue_label)")
|
||||
} else {
|
||||
for i, labelID := range opts.LabelIDs {
|
||||
// deduplicate the label IDs for inclusion and exclusion
|
||||
includedLabelIDs := make(container.Set[int64])
|
||||
excludedLabelIDs := make(container.Set[int64])
|
||||
for _, labelID := range opts.LabelIDs {
|
||||
if labelID > 0 {
|
||||
sess.Join("INNER", fmt.Sprintf("issue_label il%d", i),
|
||||
fmt.Sprintf("issue.id = il%[1]d.issue_id AND il%[1]d.label_id = %[2]d", i, labelID))
|
||||
includedLabelIDs.Add(labelID)
|
||||
} else if labelID < 0 { // 0 is not supported here, so just ignore it
|
||||
sess.Where("issue.id not in (select issue_id from issue_label where label_id = ?)", -labelID)
|
||||
excludedLabelIDs.Add(-labelID)
|
||||
}
|
||||
}
|
||||
// ... and use them in a subquery of the form :
|
||||
// where (select count(*) from issue_label where issue_id=issue.id and label_id in (2, 4, 6)) = 3
|
||||
// This equality is guaranteed thanks to unique index (issue_id,label_id) on table issue_label.
|
||||
if len(includedLabelIDs) > 0 {
|
||||
subQuery := builder.Select("count(*)").From("issue_label").Where(builder.Expr("issue_id = issue.id")).
|
||||
And(builder.In("label_id", includedLabelIDs.Values()))
|
||||
sess.Where(builder.Eq{strconv.Itoa(len(includedLabelIDs)): subQuery})
|
||||
}
|
||||
// or (select count(*)...) = 0 for excluded labels
|
||||
if len(excludedLabelIDs) > 0 {
|
||||
subQuery := builder.Select("count(*)").From("issue_label").Where(builder.Expr("issue_id = issue.id")).
|
||||
And(builder.In("label_id", excludedLabelIDs.Values()))
|
||||
sess.Where(builder.Eq{"0": subQuery})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue