/*
Copyright (c) 2019 Ben Morrison (gbmor)
This file is part of Registry.
Registry is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Registry is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Registry. If not, see <https://www.gnu.org/licenses/>.
*/
package registry // import "git.sr.ht/~gbmor/getwtxt/registry"
import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"net/http"
"regexp"
"strings"
"sync"
"time"
)
// GetTwtxt fetches the raw twtxt file data from the user's
// provided URL, after validating the URL. If the returned
// boolean value is false, the fetched URL is a single user's
// twtxt file. If true, the fetched URL is the output of
// another registry's /api/plain/tweets. The output of
// GetTwtxt should be passed to either ParseUserTwtxt or
// ParseRegistryTwtxt, respectively.
// Generally, the *http.Client inside a given Registry instance should
// be passed to GetTwtxt. If the *http.Client passed is nil,
// Registry will use a preconstructed client with a
// timeout of 10s and all other values set to default.
func GetTwtxt(urlKey string, client *http.Client) ([]byte, bool, error) {
if !strings.HasPrefix(urlKey, "http://") && !strings.HasPrefix(urlKey, "https://") {
return nil, false, fmt.Errorf("invalid URL: %v", urlKey)
}
res, err := doReq(urlKey, "GET", "", client)
if err != nil {
return nil, false, err
}
defer res.Body.Close()
var textPlain bool
for _, v := range res.Header["Content-Type"] {
if strings.Contains(v, "text/plain") {
textPlain = true
break
}
}
if !textPlain {
return nil, false, fmt.Errorf("received non-text/plain response body from %v", urlKey)
}
if res.StatusCode != http.StatusOK {
return nil, false, fmt.Errorf("didn't get 200 from remote server, received %v: %v", res.StatusCode, urlKey)
}
twtxt, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, false, fmt.Errorf("error reading response body from %v: %v", urlKey, err)
}
// Signal that we're adding another twtxt registry as a "user"
if strings.HasSuffix(urlKey, "/api/plain/tweets") || strings.HasSuffix(urlKey, "/api/plain/tweets/all") {
return twtxt, true, nil
}
return twtxt, false, nil
}
// DiffTwtxt issues a HEAD request on the user's
// remote twtxt data. It then checks the Content-Length
// header. If it's different from the stored result of
// the previous Content-Length header, update the stored
// value for a given user and return true.
// Otherwise, return false. In some error conditions,
// such as the user not being in the registry, it returns true.
// In other error conditions considered "unrecoverable,"
// such as the supplied URL being invalid, it returns false.
func (registry *Registry) DiffTwtxt(urlKey string) (bool, error) {
if !strings.HasPrefix(urlKey, "http://") && !strings.HasPrefix(urlKey, "https://") {
return false, fmt.Errorf("invalid URL: %v", urlKey)
}
registry.Mu.Lock()
user, ok := registry.Users[urlKey]
if !ok {
return true, fmt.Errorf("user not in registry")
}
user.Mu.Lock()
defer func() {
registry.Users[urlKey] = user
user.Mu.Unlock()
registry.Mu.Unlock()
}()
res, err := doReq(urlKey, "HEAD", user.LastModified, registry.HTTPClient)
if err != nil {
return false, err
}
switch res.StatusCode {
case http.StatusOK:
for _, e := range res.Header["Last-Modified"] {
if e != "" {
user.LastModified = e
break
}
}
return true, nil
case http.StatusNotModified:
return false, nil
}
return false, nil
}
// internal function. boilerplate for http requests.
func doReq(urlKey, method, modTime string, client *http.Client) (*http.Response, error) {
if client == nil {
client = &http.Client{
Transport: nil,
CheckRedirect: nil,
Jar: nil,
Timeout: 10 * time.Second,
}
}
var b []byte
buf := bytes.NewBuffer(b)
req, err := http.NewRequest(method, urlKey, buf)
if err != nil {
return nil, err
}
if modTime != "" {
req.Header.Set("If-Modified-Since", modTime)
}
res, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("couldn't %v %v: %v", method, urlKey, err)
}
return res, nil
}
// ParseUserTwtxt takes a fetched twtxt file in the form of
// a slice of bytes, parses it, and returns it as a
// TimeMap. The output may then be passed to Index.AddUser()
func ParseUserTwtxt(twtxt []byte, nickname, urlKey string) (TimeMap, error) {
var erz []byte
if len(twtxt) == 0 {
return nil, fmt.Errorf("no data to parse in twtxt file")
}
reader := bytes.NewReader(twtxt)
scanner := bufio.NewScanner(reader)
timemap := NewTimeMap()
for scanner.Scan() {
nopadding := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(nopadding, "#") || nopadding == "" {
continue
}
columns := strings.Split(nopadding, "\t")
if len(columns) != 2 {
return nil, fmt.Errorf("improperly formatted data in twtxt file")
}
normalizedDatestamp := fixTimestamp(columns[0])
thetime, err := time.Parse(time.RFC3339, normalizedDatestamp)
if err != nil {
erz = append(erz, []byte(fmt.Sprintf("unable to retrieve date: %v\n", err))...)
}
timemap[thetime] = nickname + "\t" + urlKey + "\t" + nopadding
}
if len(erz) == 0 {
return timemap, nil
}
return timemap, fmt.Errorf("%v", string(erz))
}
func fixTimestamp(ts string) string {
normalizeTimestamp := regexp.MustCompile(`[\+][\d][\d][:][\d][\d]`)
return strings.TrimSpace(normalizeTimestamp.ReplaceAllString(ts, "Z"))
}
// ParseRegistryTwtxt takes output from a remote registry and outputs
// the accessible user data via a slice of Users.
func ParseRegistryTwtxt(twtxt []byte) ([]*User, error) {
var erz []byte
if len(twtxt) == 0 {
return nil, fmt.Errorf("received no data")
}
reader := bytes.NewReader(twtxt)
scanner := bufio.NewScanner(reader)
userdata := []*User{}
for scanner.Scan() {
nopadding := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(nopadding, "#") || nopadding == "" {
continue
}
columns := strings.Split(nopadding, "\t")
if len(columns) != 4 {
return nil, fmt.Errorf("improperly formatted data")
}
thetime, err := time.Parse(time.RFC3339, columns[2])
if err != nil {
erz = append(erz, []byte(fmt.Sprintf("%v\n", err))...)
continue
}
parsednickname := columns[0]
dataIndex := 0
parsedurl := columns[1]
inIndex := false
for i, e := range userdata {
if e.Nick == parsednickname || e.URL == parsedurl {
dataIndex = i
inIndex = true
break
}
}
if inIndex {
tmp := userdata[dataIndex]
tmp.Status[thetime] = nopadding
userdata[dataIndex] = tmp
} else {
timeNowRFC := time.Now().Format(time.RFC3339)
if err != nil {
erz = append(erz, []byte(fmt.Sprintf("%v\n", err))...)
}
tmp := &User{
Mu: sync.RWMutex{},
Nick: parsednickname,
URL: parsedurl,
Date: timeNowRFC,
Status: TimeMap{
thetime: nopadding,
},
}
userdata = append(userdata, tmp)
}
}
return userdata, fmt.Errorf("%v", erz)
}