1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00

refactor: use a better construct than doc.Find(…).First()

As mentioned in goquery's documentation (https://pkg.go.dev/github.com/PuerkitoBio/goquery#Single):

> By default, Selection.Find and other functions that accept a selector string
to select nodes will use all matches corresponding to that selector. By using
the Matcher returned by Single, at most the first match will be selected.
>
> The one using Single is optimized to be potentially much faster on large documents.
This commit is contained in:
Julien Voisin 2024-12-12 03:40:55 +00:00 committed by GitHub
parent 68448b4abb
commit 1b0b8b9c42
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 18 additions and 18 deletions

View file

@ -87,7 +87,7 @@ func genericProxyRewriter(router *mux.Router, proxifyFunction urlProxyRewriter,
} }
} }
output, err := doc.Find("body").First().Html() output, err := doc.FindMatcher(goquery.Single("body")).Html()
if err != nil { if err != nil {
return htmlDocument return htmlDocument
} }

View file

@ -48,7 +48,7 @@ func fetchNebulaWatchTime(websiteURL string) (int, error) {
return 0, docErr return 0, docErr
} }
durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content") durs, exists := doc.FindMatcher(goquery.Single(`meta[property="video:duration"]`)).Attr("content")
// durs contains video watch time in seconds // durs contains video watch time in seconds
if !exists { if !exists {
return 0, errors.New("duration has not found") return 0, errors.New("duration has not found")

View file

@ -48,7 +48,7 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) {
return 0, docErr return 0, docErr
} }
durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content") durs, exists := doc.FindMatcher(goquery.Single(`meta[property="og:video:duration"]`)).Attr("content")
// durs contains video watch time in seconds // durs contains video watch time in seconds
if !exists { if !exists {
return 0, errors.New("duration has not found") return 0, errors.New("duration has not found")

View file

@ -60,7 +60,7 @@ func fetchYouTubeWatchTimeFromWebsite(websiteURL string) (int, error) {
return 0, docErr return 0, docErr
} }
durs, exists := doc.Find(`meta[itemprop="duration"]`).First().Attr("content") durs, exists := doc.FindMatcher(goquery.Single(`meta[itemprop="duration"]`)).Attr("content")
if !exists { if !exists {
return 0, errors.New("duration has not found") return 0, errors.New("duration has not found")
} }

View file

@ -77,7 +77,7 @@ func ExtractContent(page io.Reader) (baseURL string, extractedContent string, er
return "", "", err return "", "", err
} }
if hrefValue, exists := document.Find("head base").First().Attr("href"); exists { if hrefValue, exists := document.FindMatcher(goquery.Single("head base")).Attr("href"); exists {
hrefValue = strings.TrimSpace(hrefValue) hrefValue = strings.TrimSpace(hrefValue)
if urllib.IsAbsoluteURL(hrefValue) { if urllib.IsAbsoluteURL(hrefValue) {
baseURL = hrefValue baseURL = hrefValue

View file

@ -44,7 +44,7 @@ func addImageTitle(entryURL, entryContent string) string {
img.ReplaceWithHtml(`<figure><img src="` + srcAttr + `" alt="` + altAttr + `"/><figcaption><p>` + html.EscapeString(titleAttr) + `</p></figcaption></figure>`) img.ReplaceWithHtml(`<figure><img src="` + srcAttr + `" alt="` + altAttr + `"/><figcaption><p>` + html.EscapeString(titleAttr) + `</p></figcaption></figure>`)
}) })
output, _ := doc.Find("body").First().Html() output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output return output
} }
@ -76,7 +76,7 @@ func addMailtoSubject(entryURL, entryContent string) string {
a.AppendHtml(" [" + html.EscapeString(subject) + "]") a.AppendHtml(" [" + html.EscapeString(subject) + "]")
}) })
output, _ := doc.Find("body").First().Html() output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output return output
} }
@ -160,7 +160,7 @@ func addDynamicImage(entryURL, entryContent string) string {
} }
if changed { if changed {
output, _ := doc.Find("body").First().Html() output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output return output
} }
@ -197,7 +197,7 @@ func addDynamicIframe(entryURL, entryContent string) string {
}) })
if changed { if changed {
output, _ := doc.Find("body").First().Html() output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output return output
} }
@ -217,7 +217,7 @@ func fixMediumImages(entryURL, entryContent string) string {
} }
}) })
output, _ := doc.Find("body").First().Html() output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output return output
} }
@ -239,7 +239,7 @@ func useNoScriptImages(entryURL, entryContent string) string {
} }
}) })
output, _ := doc.Find("body").First().Html() output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output return output
} }
@ -317,7 +317,7 @@ func removeCustom(entryContent string, selector string) string {
doc.Find(selector).Remove() doc.Find(selector).Remove()
output, _ := doc.Find("body").First().Html() output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output return output
} }
@ -344,7 +344,7 @@ func applyFuncOnTextContent(entryContent string, selector string, repl func(stri
doc.Find(selector).Each(treatChildren) doc.Find(selector).Each(treatChildren)
output, _ := doc.Find("body").First().Html() output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output return output
} }
@ -401,7 +401,7 @@ func addHackerNewsLinksUsing(entryContent, app string) string {
} }
}) })
output, _ := doc.Find("body").First().Html() output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output return output
} }
@ -420,7 +420,7 @@ func removeTables(entryContent string) string {
for _, selector := range selectors { for _, selector := range selectors {
for { for {
loopElement = doc.Find(selector).First() loopElement = doc.FindMatcher(goquery.Single(selector))
if loopElement.Length() == 0 { if loopElement.Length() == 0 {
break break
@ -436,6 +436,6 @@ func removeTables(entryContent string) string {
} }
} }
output, _ := doc.Find("body").First().Html() output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output return output
} }

View file

@ -75,7 +75,7 @@ func findContentUsingCustomRules(page io.Reader, rules string) (baseURL string,
return "", "", err return "", "", err
} }
if hrefValue, exists := document.Find("head base").First().Attr("href"); exists { if hrefValue, exists := document.FindMatcher(goquery.Single("head base")).Attr("href"); exists {
hrefValue = strings.TrimSpace(hrefValue) hrefValue = strings.TrimSpace(hrefValue)
if urllib.IsAbsoluteURL(hrefValue) { if urllib.IsAbsoluteURL(hrefValue) {
baseURL = hrefValue baseURL = hrefValue

View file

@ -146,7 +146,7 @@ func (f *SubscriptionFinder) FindSubscriptionsFromWebPage(websiteURL, contentTyp
return nil, locale.NewLocalizedErrorWrapper(err, "error.unable_to_parse_html_document", err) return nil, locale.NewLocalizedErrorWrapper(err, "error.unable_to_parse_html_document", err)
} }
if hrefValue, exists := doc.Find("head base").First().Attr("href"); exists { if hrefValue, exists := doc.FindMatcher(goquery.Single("head base")).Attr("href"); exists {
hrefValue = strings.TrimSpace(hrefValue) hrefValue = strings.TrimSpace(hrefValue)
if urllib.IsAbsoluteURL(hrefValue) { if urllib.IsAbsoluteURL(hrefValue) {
websiteURL = hrefValue websiteURL = hrefValue