1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-09-15 18:57:04 +00:00

fix(icon): implement better handling of relative icon URLs within a subfolder

This commit is contained in:
Frédéric Guillot 2025-09-09 20:11:54 -07:00
parent 8129500296
commit 7ada5d54be
3 changed files with 60 additions and 53 deletions

View file

@ -64,13 +64,17 @@ func (f *iconFinder) findIcon() (*model.Icon, error) {
} }
} }
if icon, err := f.fetchIconsFromHTMLDocument(); err != nil { // Try the website URL first, then fall back to the root URL if no icon is found.
slog.Debug("Unable to fetch icons from HTML document", // The website URL may include a subdirectory (e.g., https://example.org/subfolder/), and icons can be referenced relative to that path.
slog.String("website_url", f.websiteURL), for _, documentURL := range []string{f.websiteURL, urllib.RootURL(f.websiteURL)} {
slog.Any("error", err), if icon, err := f.fetchIconsFromHTMLDocument(documentURL); err != nil {
) slog.Debug("Unable to fetch icons from HTML document",
} else if icon != nil { slog.String("document_url", documentURL),
return icon, nil slog.Any("error", err),
)
} else if icon != nil {
return icon, nil
}
} }
return f.fetchDefaultIcon() return f.fetchDefaultIcon()
@ -94,14 +98,12 @@ func (f *iconFinder) fetchDefaultIcon() (*model.Icon, error) {
return icon, nil return icon, nil
} }
func (f *iconFinder) fetchIconsFromHTMLDocument() (*model.Icon, error) { func (f *iconFinder) fetchIconsFromHTMLDocument(documentURL string) (*model.Icon, error) {
slog.Debug("Searching icons from HTML document", slog.Debug("Searching icons from HTML document",
slog.String("website_url", f.websiteURL), slog.String("document_url", documentURL),
) )
rootURL := urllib.RootURL(f.websiteURL) responseHandler := fetcher.NewResponseHandler(f.requestBuilder.ExecuteRequest(documentURL))
responseHandler := fetcher.NewResponseHandler(f.requestBuilder.ExecuteRequest(rootURL))
defer responseHandler.Close() defer responseHandler.Close()
if localizedError := responseHandler.LocalizedError(); localizedError != nil { if localizedError := responseHandler.LocalizedError(); localizedError != nil {
@ -109,6 +111,7 @@ func (f *iconFinder) fetchIconsFromHTMLDocument() (*model.Icon, error) {
} }
iconURLs, err := findIconURLsFromHTMLDocument( iconURLs, err := findIconURLsFromHTMLDocument(
documentURL,
responseHandler.Body(config.Opts.HTTPClientMaxBodySize()), responseHandler.Body(config.Opts.HTTPClientMaxBodySize()),
responseHandler.ContentType(), responseHandler.ContentType(),
) )
@ -117,32 +120,27 @@ func (f *iconFinder) fetchIconsFromHTMLDocument() (*model.Icon, error) {
} }
slog.Debug("Searched icon from HTML document", slog.Debug("Searched icon from HTML document",
slog.String("website_url", f.websiteURL), slog.String("document_url", documentURL),
slog.String("icon_urls", strings.Join(iconURLs, ",")), slog.String("icon_urls", strings.Join(iconURLs, ",")),
) )
for _, iconURL := range iconURLs { for _, iconURL := range iconURLs {
if strings.HasPrefix(iconURL, "data:") { if strings.HasPrefix(iconURL, "data:") {
slog.Debug("Found icon with data URL", slog.Debug("Found icon with data URL",
slog.String("website_url", f.websiteURL), slog.String("document_url", documentURL),
) )
return parseImageDataURL(iconURL) return parseImageDataURL(iconURL)
} }
iconURL, err = urllib.AbsoluteURL(f.websiteURL, iconURL)
if err != nil {
return nil, fmt.Errorf(`icon: unable to convert icon URL to absolute URL: %w`, err)
}
if icon, err := f.downloadIcon(iconURL); err != nil { if icon, err := f.downloadIcon(iconURL); err != nil {
slog.Debug("Unable to download icon from HTML document", slog.Debug("Unable to download icon from HTML document",
slog.String("website_url", f.websiteURL), slog.String("document_url", documentURL),
slog.String("icon_url", iconURL), slog.String("icon_url", iconURL),
slog.Any("error", err), slog.Any("error", err),
) )
} else if icon != nil { } else if icon != nil {
slog.Debug("Downloaded icon from HTML document", slog.Debug("Downloaded icon from HTML document",
slog.String("website_url", f.websiteURL), slog.String("document_url", documentURL),
slog.String("icon_url", iconURL), slog.String("icon_url", iconURL),
) )
return icon, nil return icon, nil
@ -195,7 +193,7 @@ func resizeIcon(icon *model.Icon) *model.Icon {
} }
if !slices.Contains([]string{"image/jpeg", "image/png", "image/gif", "image/webp"}, icon.MimeType) { if !slices.Contains([]string{"image/jpeg", "image/png", "image/gif", "image/webp"}, icon.MimeType) {
slog.Info("Icon resize skipped: unsupported MIME type", slog.String("mime_type", icon.MimeType)) slog.Debug("Icon resize skipped: unsupported MIME type", slog.String("mime_type", icon.MimeType))
return icon return icon
} }
@ -244,7 +242,7 @@ func resizeIcon(icon *model.Icon) *model.Icon {
return icon return icon
} }
func findIconURLsFromHTMLDocument(body io.Reader, contentType string) ([]string, error) { func findIconURLsFromHTMLDocument(documentURL string, body io.Reader, contentType string) ([]string, error) {
htmlDocumentReader, err := encoding.NewCharsetReader(body, contentType) htmlDocumentReader, err := encoding.NewCharsetReader(body, contentType)
if err != nil { if err != nil {
return nil, fmt.Errorf("icon: unable to create charset reader: %w", err) return nil, fmt.Errorf("icon: unable to create charset reader: %w", err)
@ -268,11 +266,20 @@ func findIconURLsFromHTMLDocument(body io.Reader, contentType string) ([]string,
for _, s := range doc.Find(query).EachIter() { for _, s := range doc.Find(query).EachIter() {
href, _ := s.Attr("href") href, _ := s.Attr("href")
if iconURL := strings.TrimSpace(href); iconURL != "" { href = strings.TrimSpace(href)
iconURLs = append(iconURLs, iconURL) if href == "" {
continue
}
if absoluteIconURL, err := urllib.AbsoluteURL(documentURL, href); err != nil {
slog.Warn("Unable to convert icon URL to absolute URL", slog.Any("error", err), slog.String("icon_href", href))
} else {
iconURLs = append(iconURLs, absoluteIconURL)
slog.Debug("Found icon URL in HTML document", slog.Debug("Found icon URL in HTML document",
slog.String("query", query), slog.String("query", query),
slog.String("icon_url", iconURL)) slog.String("icon_href", href),
slog.String("absolute_icon_url", absoluteIconURL),
)
} }
} }
} }

View file

@ -123,16 +123,16 @@ func TestFindIconURLsFromHTMLDocument_MultipleIcons(t *testing.T) {
</head> </head>
</html>` </html>`
iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") iconURLs, err := findIconURLsFromHTMLDocument("https://example.org", strings.NewReader(html), "text/html")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
expected := []string{ expected := []string{
"/favicon.ico", "https://example.org/favicon.ico",
"/shortcut-favicon.ico", "https://example.org/shortcut-favicon.ico",
"/icon-shortcut.ico", "https://example.org/icon-shortcut.ico",
"/apple-touch-icon.png", "https://example.org/apple-touch-icon.png",
} }
if len(iconURLs) != len(expected) { if len(iconURLs) != len(expected) {
@ -155,22 +155,22 @@ func TestFindIconURLsFromHTMLDocument_CaseInsensitiveRel(t *testing.T) {
<link rel="SHORTCUT ICON" href="/favicon3.ico"> <link rel="SHORTCUT ICON" href="/favicon3.ico">
<link rel="Shortcut Icon" href="/favicon4.ico"> <link rel="Shortcut Icon" href="/favicon4.ico">
<link rel="ICON SHORTCUT" href="/favicon5.ico"> <link rel="ICON SHORTCUT" href="/favicon5.ico">
<link rel="Icon Shortcut" href="/favicon6.ico"> <link rel="Icon Shortcut" href="favicon6.ico">
</head> </head>
</html>` </html>`
iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") iconURLs, err := findIconURLsFromHTMLDocument("https://example.org/folder/", strings.NewReader(html), "text/html")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
expected := []string{ expected := []string{
"/favicon1.ico", "https://example.org/favicon1.ico",
"/favicon2.ico", "https://example.org/favicon2.ico",
"/favicon3.ico", "https://example.org/favicon3.ico",
"/favicon4.ico", "https://example.org/favicon4.ico",
"/favicon5.ico", "https://example.org/favicon5.ico",
"/favicon6.ico", "https://example.org/folder/favicon6.ico",
} }
if len(iconURLs) != len(expected) { if len(iconURLs) != len(expected) {
@ -194,7 +194,7 @@ func TestFindIconURLsFromHTMLDocument_NoIcons(t *testing.T) {
</head> </head>
</html>` </html>`
iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") iconURLs, err := findIconURLsFromHTMLDocument("https://example.org", strings.NewReader(html), "text/html")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -215,12 +215,12 @@ func TestFindIconURLsFromHTMLDocument_EmptyHref(t *testing.T) {
</head> </head>
</html>` </html>`
iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") iconURLs, err := findIconURLsFromHTMLDocument("https://example.org", strings.NewReader(html), "text/html")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
expected := []string{"/valid-icon.ico"} expected := []string{"https://example.org/valid-icon.ico"}
if len(iconURLs) != len(expected) { if len(iconURLs) != len(expected) {
t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs)) t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs))
@ -241,7 +241,7 @@ func TestFindIconURLsFromHTMLDocument_DataURLs(t *testing.T) {
</head> </head>
</html>` </html>`
iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") iconURLs, err := findIconURLsFromHTMLDocument("https://example.org/folder", strings.NewReader(html), "text/html")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -250,7 +250,7 @@ func TestFindIconURLsFromHTMLDocument_DataURLs(t *testing.T) {
// So both rel="icon" links are found first, then the rel="shortcut icon" link // So both rel="icon" links are found first, then the rel="shortcut icon" link
expected := []string{ expected := []string{
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAGAhGAQ+QAAAABJRU5ErkJggg==", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAGAhGAQ+QAAAABJRU5ErkJggg==",
"/regular-icon.ico", "https://example.org/regular-icon.ico",
"data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg'></svg>", "data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg'></svg>",
} }
@ -277,17 +277,17 @@ func TestFindIconURLsFromHTMLDocument_RelativeAndAbsoluteURLs(t *testing.T) {
</head> </head>
</html>` </html>`
iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") iconURLs, err := findIconURLsFromHTMLDocument("https://example.org/folder/", strings.NewReader(html), "text/html")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
expected := []string{ expected := []string{
"/absolute-path.ico", "https://example.org/absolute-path.ico",
"relative-path.ico", "https://example.org/folder/relative-path.ico",
"../parent-dir.ico", "https://example.org/parent-dir.ico",
"https://example.com/external.ico", "https://example.com/external.ico",
"//cdn.example.com/protocol-relative.ico", "https://cdn.example.com/protocol-relative.ico",
} }
if len(iconURLs) != len(expected) { if len(iconURLs) != len(expected) {
@ -311,7 +311,7 @@ func TestFindIconURLsFromHTMLDocument_InvalidHTML(t *testing.T) {
</head> </head>
</html>` </html>`
iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") iconURLs, err := findIconURLsFromHTMLDocument("https://example.org", strings.NewReader(html), "text/html")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -324,7 +324,7 @@ func TestFindIconURLsFromHTMLDocument_InvalidHTML(t *testing.T) {
// Should at least find the valid ones // Should at least find the valid ones
foundValidIcon := false foundValidIcon := false
for _, url := range iconURLs { for _, url := range iconURLs {
if url == "/valid-before-error.ico" || url == "/valid-after-error.ico" { if url == "https://example.org/valid-before-error.ico" || url == "https://example.org/valid-after-error.ico" {
foundValidIcon = true foundValidIcon = true
break break
} }
@ -336,7 +336,7 @@ func TestFindIconURLsFromHTMLDocument_InvalidHTML(t *testing.T) {
} }
func TestFindIconURLsFromHTMLDocument_EmptyDocument(t *testing.T) { func TestFindIconURLsFromHTMLDocument_EmptyDocument(t *testing.T) {
iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(""), "text/html") iconURLs, err := findIconURLsFromHTMLDocument("https://example.org", strings.NewReader(""), "text/html")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View file

@ -18,7 +18,7 @@ func IsAbsoluteURL(link string) bool {
return u.IsAbs() return u.IsAbs()
} }
// GetAbsoluteURL return the absolute form of `input` is possible, as well as its parser form. // GetAbsoluteURL returns the absolute form of `input` if possible, as well as its parsed form.
func GetAbsoluteURL(input string) (string, *url.URL, error) { func GetAbsoluteURL(input string) (string, *url.URL, error) {
if strings.HasPrefix(input, "//") { if strings.HasPrefix(input, "//") {
return "https:" + input, nil, nil return "https:" + input, nil, nil