diff options
author | elumbella <elumbella@posteo.net> | 2020-05-05 11:26:43 +0200 |
---|---|---|
committer | Drew DeVault <sir@cmpwn.com> | 2020-05-06 10:00:33 -0400 |
commit | d1600e4666e9efdb3ae45ef2cc01182950a41bb5 (patch) | |
tree | 53f6b6d413a92a669d5512845cc3c766e6255a38 /worker/lib/parse.go | |
parent | 9a520c4098204b06b09f8d31f7ba126837d45a28 (diff) | |
download | aerc-d1600e4666e9efdb3ae45ef2cc01182950a41bb5.tar.gz |
Guess date from received if not present
Diffstat (limited to 'worker/lib/parse.go')
-rw-r--r-- | worker/lib/parse.go | 42 |
1 files changed, 41 insertions, 1 deletions
diff --git a/worker/lib/parse.go b/worker/lib/parse.go index ac20ac8..bbea49d 100644 --- a/worker/lib/parse.go +++ b/worker/lib/parse.go @@ -2,9 +2,12 @@ package lib import ( "bytes" + "errors" "fmt" "io" + "regexp" "strings" + "time" "git.sr.ht/~sircmpwn/aerc/models" "github.com/emersion/go-message" @@ -12,6 +15,11 @@ import ( "github.com/emersion/go-message/mail" ) +// RFC 1123Z regexp +var dateRe = regexp.MustCompile(`(((Mon|Tue|Wed|Thu|Fri|Sat|Sun))[,]?\s[0-9]{1,2})\s` + + `(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s` + + `([0-9]{4})\s([0-9]{2}):([0-9]{2})(:([0-9]{2}))?\s([\+|\-][0-9]{4})\s?`) + func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) { if len(index) < 1 { return nil, fmt.Errorf("no part to read") @@ -97,7 +105,7 @@ func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) { } func parseEnvelope(h *mail.Header) (*models.Envelope, error) { - date, err := h.Date() + date, err := parseDate(h) if err != nil { return nil, fmt.Errorf("could not parse date header: %v", err) } @@ -141,6 +149,38 @@ func parseEnvelope(h *mail.Header) (*models.Envelope, error) { }, nil } +// parseDate extends the built-in date parser with additional layouts which are +// non-conforming but appear in the wild. +func parseDate(h *mail.Header) (time.Time, error) { + t, parseErr := h.Date() + if parseErr == nil { + return t, nil + } + text, err := h.Text("date") + if err != nil { + return time.Time{}, errors.New("no date header") + } + // sometimes, no error occurs but the date is empty. In this case, guess time from received header field + if text == "" { + guess, err := h.Text("received") + if err != nil { + return time.Time{}, errors.New("no received header") + } + t, _ := time.Parse(time.RFC1123Z, dateRe.FindString(guess)) + return t, nil + } + layouts := []string{ + // X-Mailer: EarthLink Zoo Mail 1.0 + "Mon, _2 Jan 2006 15:04:05 -0700 (GMT-07:00)", + } + for _, layout := range layouts { + if t, err := time.Parse(layout, text); err == nil { + return t, nil + } + } + return time.Time{}, fmt.Errorf("unrecognized date format: %s", t) +} + func parseAddressList(h *mail.Header, key string) ([]*models.Address, error) { var converted []*models.Address addrs, err := h.AddressList(key) |