about summary refs log tree commit diff stats
path: root/worker/lib/parse.go
diff options
context:
space:
mode:
authorelumbella <elumbella@posteo.net>2020-05-05 11:26:43 +0200
committerDrew DeVault <sir@cmpwn.com>2020-05-06 10:00:33 -0400
commitd1600e4666e9efdb3ae45ef2cc01182950a41bb5 (patch)
tree53f6b6d413a92a669d5512845cc3c766e6255a38 /worker/lib/parse.go
parent9a520c4098204b06b09f8d31f7ba126837d45a28 (diff)
downloadaerc-d1600e4666e9efdb3ae45ef2cc01182950a41bb5.tar.gz
Guess date from received if not present
Diffstat (limited to 'worker/lib/parse.go')
-rw-r--r--worker/lib/parse.go42
1 files changed, 41 insertions, 1 deletions
diff --git a/worker/lib/parse.go b/worker/lib/parse.go
index ac20ac8..bbea49d 100644
--- a/worker/lib/parse.go
+++ b/worker/lib/parse.go
@@ -2,9 +2,12 @@ package lib
 
 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"io"
+	"regexp"
 	"strings"
+	"time"
 
 	"git.sr.ht/~sircmpwn/aerc/models"
 	"github.com/emersion/go-message"
@@ -12,6 +15,11 @@ import (
 	"github.com/emersion/go-message/mail"
 )
 
+// RFC 1123Z regexp
+var dateRe = regexp.MustCompile(`(((Mon|Tue|Wed|Thu|Fri|Sat|Sun))[,]?\s[0-9]{1,2})\s` +
+	`(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s` +
+	`([0-9]{4})\s([0-9]{2}):([0-9]{2})(:([0-9]{2}))?\s([\+|\-][0-9]{4})\s?`)
+
 func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) {
 	if len(index) < 1 {
 		return nil, fmt.Errorf("no part to read")
@@ -97,7 +105,7 @@ func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
 }
 
 func parseEnvelope(h *mail.Header) (*models.Envelope, error) {
-	date, err := h.Date()
+	date, err := parseDate(h)
 	if err != nil {
 		return nil, fmt.Errorf("could not parse date header: %v", err)
 	}
@@ -141,6 +149,38 @@ func parseEnvelope(h *mail.Header) (*models.Envelope, error) {
 	}, nil
 }
 
+// parseDate extends the built-in date parser with additional layouts which are
+// non-conforming but appear in the wild.
+func parseDate(h *mail.Header) (time.Time, error) {
+	t, parseErr := h.Date()
+	if parseErr == nil {
+		return t, nil
+	}
+	text, err := h.Text("date")
+	if err != nil {
+		return time.Time{}, errors.New("no date header")
+	}
+	// sometimes, no error occurs but the date is empty. In this case, guess time from received header field
+	if text == "" {
+		guess, err := h.Text("received")
+		if err != nil {
+			return time.Time{}, errors.New("no received header")
+		}
+		t, _ := time.Parse(time.RFC1123Z, dateRe.FindString(guess))
+		return t, nil
+	}
+	layouts := []string{
+		// X-Mailer: EarthLink Zoo Mail 1.0
+		"Mon, _2 Jan 2006 15:04:05 -0700 (GMT-07:00)",
+	}
+	for _, layout := range layouts {
+		if t, err := time.Parse(layout, text); err == nil {
+			return t, nil
+		}
+	}
+	return time.Time{}, fmt.Errorf("unrecognized date format: %s", t)
+}
+
 func parseAddressList(h *mail.Header, key string) ([]*models.Address, error) {
 	var converted []*models.Address
 	addrs, err := h.AddressList(key)