diff --git a/reader/atom/atom_10.go b/reader/atom/atom_10.go index 25bdec37..e7797d2e 100644 --- a/reader/atom/atom_10.go +++ b/reader/atom/atom_10.go @@ -123,7 +123,7 @@ func (a *atom10Entry) entryDate() time.Time { if dateText != "" { result, err := date.Parse(dateText) if err != nil { - logger.Error("atom: %v", err) + logger.Error("atom: %v (entry ID = %s)", err, a.ID) return time.Now() } diff --git a/reader/date/parser.go b/reader/date/parser.go index b8dde1ed..eb78f490 100644 --- a/reader/date/parser.go +++ b/reader/date/parser.go @@ -23,6 +23,7 @@ var dateFormats = []string{ time.RFC1123Z, time.RFC1123, time.ANSIC, + "Mon, January 2, 2006, 3:04 PM MST", "Mon, January 2 2006 15:04:05 -0700", "Mon, January 02, 2006, 15:04:05 MST", "Mon, January 02, 2006 15:04:05 MST", @@ -37,6 +38,8 @@ var dateFormats = []string{ "Mon Jan 02, 2006 3:04 pm", "Mon, Jan 02,2006 15:04:05 MST", "Mon Jan 02 2006 15:04:05 -0700", + "Monday, 2. January 2006 - 15:04", + "Monday 02 January 2006", "Monday, January 2, 2006 15:04:05 MST", "Monday, January 2, 2006 03:04 PM", "Monday, January 2, 2006", @@ -111,6 +114,11 @@ var dateFormats = []string{ "Mon, 02 Jan 2006", "Mon, 02 Jan 06 15:04:05 MST", "Mon, 02 Jan 2006 3:04 PM MST", + "Mon Jan 02 2006 15:04:05 MST", + "Mon, 01 02 2006 15:04:05 -0700", + "Mon, 2th Jan 2006 15:05:05 MST", + "Jan. 2, 2006, 3:04 a.m.", + "fri, 02 jan 2006 15:04:05 -0700", "January 02 2006 03:04:05 PM", "January 2, 2006 3:04 PM", "January 2, 2006, 3:04 p.m.", @@ -145,6 +153,7 @@ var dateFormats = []string{ "2006-1-2T15:04:05Z", "2006-1-2 15:04:05", "2006-1-2", + "2006-01-02T15:04:05-07:00Z", "2006-1-02T15:04:05Z", "2006-01-02T15:04Z", "2006-01-02T15:04-07:00", @@ -196,41 +205,106 @@ var dateFormats = []string{ "01/02/2006 - 15:04", "01/02/2006", "01-02-2006", + "Jan. 2006", } +var invalidTimezoneReplacer = strings.NewReplacer( + "Europe/Brussels", "CET", + "GMT+0000 (Coordinated Universal Time)", "GMT", +) + +var invalidLocalizedDateReplacer = strings.NewReplacer( + "Mo,", "Mon,", + "Di,", "Tue,", + "Mi,", "Wed,", + "Do,", "Thu,", + "Fr,", "Fri,", + "Sa,", "Sat,", + "So,", "Sun,", + "Mär ", "Mar ", + "Mai ", "May ", + "Okt ", "Oct ", + "Dez ", "Dec ", + "lun,", "Mon,", + "mar,", "Tue,", + "mer,", "Wed,", + "jeu,", "Thu,", + "ven,", "Fri,", + "sam,", "Sat,", + "dim,", "Sun,", + "lun.", "Mon", + "mar.", "Tue", + "mer.", "Wed", + "jeu.", "Thu", + "ven.", "Fri", + "sam.", "Sat", + "dim.", "Sun", + "Lundi,", "Monday,", + "Mardi,", "Tuesday,", + "Mercredi,", "Wednesday,", + "Jeudi,", "Thursday,", + "Vendredi,", "Friday,", + "Samedi,", "Saturday,", + "Dimanche,", "Sunday,", + "avr ", "Apr ", + "mai ", "May ", + "jui ", "Jun ", + "juin ", "June ", + "jan.", "January ", + "feb.", "February ", + "mars.", "March ", + "avril.", "April ", + "mai.", "May ", + "juin.", "June ", + "juil.", "july", + "août.", "august", + "sept.", "september", + "oct.", "october", + "nov.", "november", + "dec.", "december", + "Janvier", "January", + "Février", "February", + "Mars", "March", + "Avril", "April", + "Mai", "May", + "Juin", "June", + "Juillet", "July", + "Août", "August", + "Septembre", "September", + "Octobre", "October", + "Novembre", "November", + "Décembre", "December", +) + // Parse parses a given date string using a large // list of commonly found feed date formats. -func Parse(ds string) (t time.Time, err error) { - timestamp, err := strconv.ParseInt(ds, 10, 64) +func Parse(rawInput string) (t time.Time, err error) { + timestamp, err := strconv.ParseInt(rawInput, 10, 64) if err == nil { return time.Unix(timestamp, 0), nil } - ds = replaceNonEnglishWords(ds) - d := strings.TrimSpace(ds) - if d == "" { - return t, errors.New("date parser: empty value") + processedInput := invalidLocalizedDateReplacer.Replace(rawInput) + processedInput = invalidTimezoneReplacer.Replace(processedInput) + processedInput = strings.TrimSpace(processedInput) + if processedInput == "" { + return t, errors.New(`date parser: empty value`) } for _, layout := range dateFormats { switch layout { case time.RFC822, time.RFC850, time.RFC1123: - if t, err = parseLocalTimeDates(layout, d); err == nil { + if t, err = parseLocalTimeDates(layout, processedInput); err == nil { return } } - if t, err = time.Parse(layout, d); err == nil { + if t, err = time.Parse(layout, processedInput); err == nil { return } } - lastSpace := strings.LastIndex(ds, " ") - if lastSpace > 0 { - return Parse(ds[0:lastSpace]) - } - - err = fmt.Errorf(`date parser: failed to parse date "%s"`, ds) + err = fmt.Errorf(`date parser: failed to parse date "%s"`, rawInput) return } @@ -249,32 +323,3 @@ func parseLocalTimeDates(layout, ds string) (t time.Time, err error) { return time.ParseInLocation(layout, ds, loc) } - -// Replace German and French dates to English. -func replaceNonEnglishWords(ds string) string { - r := strings.NewReplacer( - "Mo,", "Mon,", - "Di,", "Tue,", - "Mi,", "Wed,", - "Do,", "Thu,", - "Fr,", "Fri,", - "Sa,", "Sat,", - "So,", "Sun,", - "Mär ", "Mar ", - "Mai ", "May ", - "Okt ", "Oct ", - "Dez ", "Dec ", - "lun,", "Mon,", - "mar,", "Tue,", - "mer,", "Wed,", - "jeu,", "Thu,", - "ven,", "Fri,", - "sam,", "Sat,", - "dim,", "Sun,", - "avr ", "Apr ", - "mai ", "May ", - "jui ", "Jun ", - ) - - return r.Replace(ds) -} diff --git a/reader/date/parser_test.go b/reader/date/parser_test.go index 5479b940..31f9029b 100644 --- a/reader/date/parser_test.go +++ b/reader/date/parser_test.go @@ -133,11 +133,21 @@ func TestParseWeirdDateFormat(t *testing.T) { "Mon, 30 Mar 2020 19:53 +0000", "Mon, 03/30/2020 - 19:19", "2018-12-12T12:12", + "2020-11-08T16:20:00-05:00Z", + "Nov. 16, 2020, 10:57 a.m.", + "Friday 06 November 2020", + "Mon, November 16, 2020, 11:12 PM EST", + "Lundi, 16. Novembre 2020 - 15:54", + "Thu Nov 12 2020 17:00:00 GMT+0000 (Coordinated Universal Time)", + "Sat, 11 04 2020 08:51:49 +0100", + "Mon, 16th Nov 2020 13:16:28 GMT", + "Nov. 2020", + "ven., 03 juil. 2020 15:09:58 +0000", } for _, date := range dates { if _, err := Parse(date); err != nil { - t.Fatalf(`Unable to parse date: %q`, date) + t.Errorf(`Unable to parse date: %q`, date) } } } diff --git a/reader/rss/rss.go b/reader/rss/rss.go index cbb1bd19..490f9253 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -179,7 +179,7 @@ func (r *rssItem) entryDate() time.Time { if value != "" { result, err := date.Parse(value) if err != nil { - logger.Error("rss: %v", err) + logger.Error("rss: %v (entry GUID = %s)", err, r.GUID) return time.Now() }