diff --git a/internal/reader/icon/finder_test.go b/internal/reader/icon/finder_test.go index d30e69c6..161c3b65 100644 --- a/internal/reader/icon/finder_test.go +++ b/internal/reader/icon/finder_test.go @@ -112,22 +112,237 @@ func TestParseInvalidImageDataURLWithWrongPrefix(t *testing.T) { } } -func TestParseDocumentWithWhitespaceIconURL(t *testing.T) { - html := `` +func TestFindIconURLsFromHTMLDocument_MultipleIcons(t *testing.T) { + html := ` + + + + + + + +` iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") if err != nil { t.Fatal(err) } - if len(iconURLs) != 1 { - t.Fatalf(`Invalid number of icon URLs, got %d`, len(iconURLs)) + expected := []string{ + "/favicon.ico", + "/shortcut-favicon.ico", + "/icon-shortcut.ico", + "/apple-touch-icon.png", } - if iconURLs[0] != "/static/img/favicon.ico" { - t.Errorf(`Invalid icon URL, got %q`, iconURLs[0]) + if len(iconURLs) != len(expected) { + t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs)) + } + + for i, expectedURL := range expected { + if iconURLs[i] != expectedURL { + t.Errorf("Expected icon URL %d to be %q, got %q", i, expectedURL, iconURLs[i]) + } + } +} + +func TestFindIconURLsFromHTMLDocument_CaseInsensitiveRel(t *testing.T) { + html := ` + + + + + + + + + +` + + iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") + if err != nil { + t.Fatal(err) + } + + expected := []string{ + "/favicon1.ico", + "/favicon2.ico", + "/favicon3.ico", + "/favicon4.ico", + "/favicon5.ico", + "/favicon6.ico", + } + + if len(iconURLs) != len(expected) { + t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs)) + } + + for i, expectedURL := range expected { + if iconURLs[i] != expectedURL { + t.Errorf("Expected icon URL %d to be %q, got %q", i, expectedURL, iconURLs[i]) + } + } +} + +func TestFindIconURLsFromHTMLDocument_NoIcons(t *testing.T) { + html := ` + + + No Icons Here + + + +` + + iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") + if err != nil { + t.Fatal(err) + } + + if len(iconURLs) != 0 { + t.Fatalf("Expected 0 icon URLs, got %d: %v", len(iconURLs), iconURLs) + } +} + +func TestFindIconURLsFromHTMLDocument_EmptyHref(t *testing.T) { + html := ` + + + + + + + +` + + iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") + if err != nil { + t.Fatal(err) + } + + expected := []string{"/valid-icon.ico"} + + if len(iconURLs) != len(expected) { + t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs)) + } + + if iconURLs[0] != expected[0] { + t.Errorf("Expected icon URL to be %q, got %q", expected[0], iconURLs[0]) + } +} + +func TestFindIconURLsFromHTMLDocument_DataURLs(t *testing.T) { + html := ` + + + + + + +` + + iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") + if err != nil { + t.Fatal(err) + } + + // The function processes queries in order: rel="icon", then rel="shortcut icon", etc. + // So both rel="icon" links are found first, then the rel="shortcut icon" link + expected := []string{ + "", + "/regular-icon.ico", + "data:image/svg+xml,", + } + + if len(iconURLs) != len(expected) { + t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs)) + } + + for i, expectedURL := range expected { + if iconURLs[i] != expectedURL { + t.Errorf("Expected icon URL %d to be %q, got %q", i, expectedURL, iconURLs[i]) + } + } +} + +func TestFindIconURLsFromHTMLDocument_RelativeAndAbsoluteURLs(t *testing.T) { + html := ` + + + + + + + + +` + + iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") + if err != nil { + t.Fatal(err) + } + + expected := []string{ + "/absolute-path.ico", + "relative-path.ico", + "../parent-dir.ico", + "https://example.com/external.ico", + "//cdn.example.com/protocol-relative.ico", + } + + if len(iconURLs) != len(expected) { + t.Fatalf("Expected %d icon URLs, got %d", len(expected), len(iconURLs)) + } + + for i, expectedURL := range expected { + if iconURLs[i] != expectedURL { + t.Errorf("Expected icon URL %d to be %q, got %q", i, expectedURL, iconURLs[i]) + } + } +} + +func TestFindIconURLsFromHTMLDocument_InvalidHTML(t *testing.T) { + html := ` + + + + + +` + + iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(html), "text/html") + if err != nil { + t.Fatal(err) + } + + // goquery should handle malformed HTML gracefully + if len(iconURLs) == 0 { + t.Fatal("Expected to find some icon URLs even with malformed HTML") + } + + // Should at least find the valid ones + foundValidIcon := false + for _, url := range iconURLs { + if url == "/valid-before-error.ico" || url == "/valid-after-error.ico" { + foundValidIcon = true + break + } + } + + if !foundValidIcon { + t.Errorf("Expected to find at least one valid icon URL, got: %v", iconURLs) + } +} + +func TestFindIconURLsFromHTMLDocument_EmptyDocument(t *testing.T) { + iconURLs, err := findIconURLsFromHTMLDocument(strings.NewReader(""), "text/html") + if err != nil { + t.Fatal(err) + } + + if len(iconURLs) != 0 { + t.Fatalf("Expected 0 icon URLs from empty document, got %d", len(iconURLs)) } }