diff options
Diffstat (limited to 'registry')
-rw-r--r-- | registry/README.md | 48 | ||||
-rw-r--r-- | registry/fetch.go | 277 | ||||
-rw-r--r-- | registry/fetch_test.go | 286 | ||||
-rw-r--r-- | registry/init_test.go | 93 | ||||
-rw-r--r-- | registry/integ_test.go | 98 | ||||
-rw-r--r-- | registry/query.go | 196 | ||||
-rw-r--r-- | registry/query_test.go | 459 | ||||
-rw-r--r-- | registry/revive.toml | 30 | ||||
-rw-r--r-- | registry/types.go | 148 | ||||
-rw-r--r-- | registry/user.go | 270 | ||||
-rw-r--r-- | registry/user_test.go | 349 |
11 files changed, 2254 insertions, 0 deletions
diff --git a/registry/README.md b/registry/README.md new file mode 100644 index 0000000..34cd37e --- /dev/null +++ b/registry/README.md @@ -0,0 +1,48 @@ +# `getwtxt/registry` + +### twtxt Registry Library for Go + +`getwtxt/registry` helps you implement twtxt registries in Go. +It uses no third-party dependencies whatsoever, only the standard library, +and has no global state. +Specifying your own `http.Client` for requests is encouraged, with a sensible +default available by passing `nil` to the constructor. + +## Using the Library + +Just add it to your imports list in the file(s) where it's needed. + +```go +import ( + "git.sr.ht/~gbmor/getwtxt/registry" +) +``` + +## Documentation + +The code is commented, so feel free to browse the files themselves. +Alternatively, the generated documentation can be found at: + +[pkg.go.dev/git.sr.ht/~gbmor/getwtxt/registry](https://pkg.go.dev/git.sr.ht/~gbmor/getwtxt/registry) + +## Contributions + +All contributions are very welcome! Please specify that you are referring to `getwtxt/registry` +when using the following: + +* Mailing list (patches, discussion) + * [https://lists.sr.ht/~gbmor/getwtxt](https://lists.sr.ht/~gbmor/getwtxt) +* Ticket tracker + * [https://todo.sr.ht/~gbmor/getwtxt](https://todo.sr.ht/~gbmor/getwtxt) + +## Notes + +* getwtxt - parent project: + * [sr.ht/~gbmor/getwtxt](https://sr.ht/~gbmor/getwtxt) + +* twtxt repository: + * [github.com/buckket/twtxt](https://github.com/buckket/twtxt) +* twtxt documentation: + * [twtxt.readthedocs.io/en/latest/](https://twtxt.readthedocs.io/en/latest/) +* twtxt registry documentation: + * [twtxt.readthedocs.io/en/latest/user/registry.html](https://twtxt.readthedocs.io/en/latest/user/registry.html) diff --git a/registry/fetch.go b/registry/fetch.go new file mode 100644 index 0000000..9adf4ec --- /dev/null +++ b/registry/fetch.go @@ -0,0 +1,277 @@ +/* +Copyright (c) 2019 Ben Morrison (gbmor) + +This file is part of Registry. + +Registry is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Registry is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Registry. If not, see <https://www.gnu.org/licenses/>. +*/ + +package registry // import "git.sr.ht/~gbmor/getwtxt/registry" + +import ( + "bufio" + "bytes" + "fmt" + "io/ioutil" + "net/http" + "regexp" + "strings" + "sync" + "time" +) + +// GetTwtxt fetches the raw twtxt file data from the user's +// provided URL, after validating the URL. If the returned +// boolean value is false, the fetched URL is a single user's +// twtxt file. If true, the fetched URL is the output of +// another registry's /api/plain/tweets. The output of +// GetTwtxt should be passed to either ParseUserTwtxt or +// ParseRegistryTwtxt, respectively. +// Generally, the *http.Client inside a given Registry instance should +// be passed to GetTwtxt. If the *http.Client passed is nil, +// Registry will use a preconstructed client with a +// timeout of 10s and all other values set to default. +func GetTwtxt(urlKey string, client *http.Client) ([]byte, bool, error) { + if !strings.HasPrefix(urlKey, "http://") && !strings.HasPrefix(urlKey, "https://") { + return nil, false, fmt.Errorf("invalid URL: %v", urlKey) + } + + res, err := doReq(urlKey, "GET", "", client) + if err != nil { + return nil, false, err + } + defer res.Body.Close() + + var textPlain bool + for _, v := range res.Header["Content-Type"] { + if strings.Contains(v, "text/plain") { + textPlain = true + break + } + } + if !textPlain { + return nil, false, fmt.Errorf("received non-text/plain response body from %v", urlKey) + } + + if res.StatusCode != http.StatusOK { + return nil, false, fmt.Errorf("didn't get 200 from remote server, received %v: %v", res.StatusCode, urlKey) + } + + twtxt, err := ioutil.ReadAll(res.Body) + if err != nil { + return nil, false, fmt.Errorf("error reading response body from %v: %v", urlKey, err) + } + + // Signal that we're adding another twtxt registry as a "user" + if strings.HasSuffix(urlKey, "/api/plain/tweets") || strings.HasSuffix(urlKey, "/api/plain/tweets/all") { + return twtxt, true, nil + } + + return twtxt, false, nil +} + +// DiffTwtxt issues a HEAD request on the user's +// remote twtxt data. It then checks the Content-Length +// header. If it's different from the stored result of +// the previous Content-Length header, update the stored +// value for a given user and return true. +// Otherwise, return false. In some error conditions, +// such as the user not being in the registry, it returns true. +// In other error conditions considered "unrecoverable," +// such as the supplied URL being invalid, it returns false. +func (registry *Registry) DiffTwtxt(urlKey string) (bool, error) { + if !strings.HasPrefix(urlKey, "http://") && !strings.HasPrefix(urlKey, "https://") { + return false, fmt.Errorf("invalid URL: %v", urlKey) + } + + registry.Mu.Lock() + user, ok := registry.Users[urlKey] + if !ok { + return true, fmt.Errorf("user not in registry") + } + + user.Mu.Lock() + + defer func() { + registry.Users[urlKey] = user + user.Mu.Unlock() + registry.Mu.Unlock() + }() + + res, err := doReq(urlKey, "HEAD", user.LastModified, registry.HTTPClient) + if err != nil { + return false, err + } + + switch res.StatusCode { + case http.StatusOK: + for _, e := range res.Header["Last-Modified"] { + if e != "" { + user.LastModified = e + break + } + } + return true, nil + + case http.StatusNotModified: + return false, nil + } + + return false, nil +} + +// internal function. boilerplate for http requests. +func doReq(urlKey, method, modTime string, client *http.Client) (*http.Response, error) { + if client == nil { + client = &http.Client{ + Transport: nil, + CheckRedirect: nil, + Jar: nil, + Timeout: 10 * time.Second, + } + } + + var b []byte + buf := bytes.NewBuffer(b) + req, err := http.NewRequest(method, urlKey, buf) + if err != nil { + return nil, err + } + + if modTime != "" { + req.Header.Set("If-Modified-Since", modTime) + } + + res, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("couldn't %v %v: %v", method, urlKey, err) + } + + return res, nil +} + +// ParseUserTwtxt takes a fetched twtxt file in the form of +// a slice of bytes, parses it, and returns it as a +// TimeMap. The output may then be passed to Index.AddUser() +func ParseUserTwtxt(twtxt []byte, nickname, urlKey string) (TimeMap, error) { + var erz []byte + if len(twtxt) == 0 { + return nil, fmt.Errorf("no data to parse in twtxt file") + } + + reader := bytes.NewReader(twtxt) + scanner := bufio.NewScanner(reader) + timemap := NewTimeMap() + + for scanner.Scan() { + nopadding := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(nopadding, "#") || nopadding == "" { + continue + } + + columns := strings.Split(nopadding, "\t") + if len(columns) != 2 { + return nil, fmt.Errorf("improperly formatted data in twtxt file") + } + + normalizedDatestamp := fixTimestamp(columns[0]) + thetime, err := time.Parse(time.RFC3339, normalizedDatestamp) + if err != nil { + erz = append(erz, []byte(fmt.Sprintf("unable to retrieve date: %v\n", err))...) + } + + timemap[thetime] = nickname + "\t" + urlKey + "\t" + nopadding + } + + if len(erz) == 0 { + return timemap, nil + } + return timemap, fmt.Errorf("%v", string(erz)) +} + +func fixTimestamp(ts string) string { + normalizeTimestamp := regexp.MustCompile(`[\+][\d][\d][:][\d][\d]`) + return strings.TrimSpace(normalizeTimestamp.ReplaceAllString(ts, "Z")) +} + +// ParseRegistryTwtxt takes output from a remote registry and outputs +// the accessible user data via a slice of Users. +func ParseRegistryTwtxt(twtxt []byte) ([]*User, error) { + var erz []byte + if len(twtxt) == 0 { + return nil, fmt.Errorf("received no data") + } + + reader := bytes.NewReader(twtxt) + scanner := bufio.NewScanner(reader) + userdata := []*User{} + + for scanner.Scan() { + + nopadding := strings.TrimSpace(scanner.Text()) + + if strings.HasPrefix(nopadding, "#") || nopadding == "" { + continue + } + + columns := strings.Split(nopadding, "\t") + if len(columns) != 4 { + return nil, fmt.Errorf("improperly formatted data") + } + + thetime, err := time.Parse(time.RFC3339, columns[2]) + if err != nil { + erz = append(erz, []byte(fmt.Sprintf("%v\n", err))...) + continue + } + + parsednickname := columns[0] + dataIndex := 0 + parsedurl := columns[1] + inIndex := false + + for i, e := range userdata { + if e.Nick == parsednickname || e.URL == parsedurl { + dataIndex = i + inIndex = true + break + } + } + + if inIndex { + tmp := userdata[dataIndex] + tmp.Status[thetime] = nopadding + userdata[dataIndex] = tmp + } else { + timeNowRFC := time.Now().Format(time.RFC3339) + if err != nil { + erz = append(erz, []byte(fmt.Sprintf("%v\n", err))...) + } + + tmp := &User{ + Mu: sync.RWMutex{}, + Nick: parsednickname, + URL: parsedurl, + Date: timeNowRFC, + Status: TimeMap{ + thetime: nopadding, + }, + } + + userdata = append(userdata, tmp) + } + } + + return userdata, fmt.Errorf("%v", erz) +} diff --git a/registry/fetch_test.go b/registry/fetch_test.go new file mode 100644 index 0000000..4eab2a4 --- /dev/null +++ b/registry/fetch_test.go @@ -0,0 +1,286 @@ +/* +Copyright (c) 2019 Ben Morrison (gbmor) + +This file is part of Registry. + +Registry is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Registry is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Registry. If not, see <https://www.gnu.org/licenses/>. +*/ + +package registry + +import ( + "bufio" + "fmt" + "net/http" + "os" + "strings" + "testing" + "time" +) + +func constructTwtxt() []byte { + registry := initTestEnv() + var resp []byte + // iterates through each mock user's mock statuses + for _, v := range registry.Users { + for _, e := range v.Status { + split := strings.Split(e, "\t") + status := []byte(split[2] + "\t" + split[3] + "\n") + resp = append(resp, status...) + } + } + return resp +} + +// this is just dumping all the mock statuses. +// it'll be served under fake paths as +// "remote" twtxt.txt files +func twtxtHandler(w http.ResponseWriter, _ *http.Request) { + // prepare the response + resp := constructTwtxt() + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + n, err := w.Write(resp) + if err != nil || n == 0 { + fmt.Printf("Got error or wrote zero bytes: %v bytes, %v\n", n, err) + } +} + +var getTwtxtCases = []struct { + name string + url string + wantErr bool + localOnly bool +}{ + { + name: "Constructed Local twtxt.txt", + url: "http://localhost:8080/twtxt.txt", + wantErr: false, + localOnly: true, + }, + { + name: "Inaccessible Site With twtxt.txt", + url: "https://example33333333333.com/twtxt.txt", + wantErr: true, + localOnly: false, + }, + { + name: "Inaccessible Site Without twtxt.txt", + url: "https://example333333333333.com", + wantErr: true, + localOnly: false, + }, + { + name: "Local File Inclusion 1", + url: "file://init_test.go", + wantErr: true, + localOnly: false, + }, + { + name: "Local File Inclusion 2", + url: "/etc/passwd", + wantErr: true, + localOnly: false, + }, + { + name: "Remote File Inclusion", + url: "https://example.com/file.cgi", + wantErr: true, + localOnly: false, + }, + { + name: "Remote Registry", + url: "https://twtxt.tilde.institute/api/plain/tweets/", + wantErr: false, + localOnly: false, + }, + { + name: "Garbage Data", + url: "this will be replaced with garbage data", + wantErr: true, + localOnly: true, + }, +} + +// Test the function that yoinks the /twtxt.txt file +// for a given user. +func Test_GetTwtxt(t *testing.T) { + var buf = make([]byte, 256) + // read random data into case 4 + rando, _ := os.Open("/dev/random") + reader := bufio.NewReader(rando) + n, err := reader.Read(buf) + if err != nil || n == 0 { + t.Errorf("Couldn't set up test: %v\n", err) + } + getTwtxtCases[7].url = string(buf) + + if !getTwtxtCases[0].localOnly { + http.Handle("/twtxt.txt", http.HandlerFunc(twtxtHandler)) + go fmt.Println(http.ListenAndServe(":8080", nil)) + } + + for _, tt := range getTwtxtCases { + t.Run(tt.name, func(t *testing.T) { + if tt.localOnly { + t.Skipf("Local-only test. Skipping ... \n") + } + out, _, err := GetTwtxt(tt.url, nil) + if tt.wantErr && err == nil { + t.Errorf("Expected error: %v\n", tt.url) + } + if !tt.wantErr && err != nil { + t.Errorf("Unexpected error: %v %v\n", tt.url, err) + } + if !tt.wantErr && out == nil { + t.Errorf("Incorrect data received: %v\n", out) + } + }) + } + +} + +// running the benchmarks separately for each case +// as they have different properties (allocs, time) +func Benchmark_GetTwtxt(b *testing.B) { + + for i := 0; i < b.N; i++ { + _, _, err := GetTwtxt("https://gbmor.dev/twtxt.txt", nil) + if err != nil { + continue + } + } +} + +var parseTwtxtCases = []struct { + name string + data []byte + wantErr bool + localOnly bool +}{ + { + name: "Constructed twtxt file", + data: constructTwtxt(), + wantErr: false, + localOnly: false, + }, + { + name: "Incorrectly formatted date", + data: []byte("2019 April 23rd\tI love twtxt!!!11"), + wantErr: true, + localOnly: false, + }, + { + name: "No data", + data: []byte{}, + wantErr: true, + localOnly: false, + }, + { + name: "Variant rfc3339 datestamp", + data: []byte("2020-02-04T21:28:21.868659+00:00\tWill this work?"), + wantErr: false, + localOnly: false, + }, + { + name: "Random/garbage data", + wantErr: true, + localOnly: true, + }, +} + +// See if we can break ParseTwtxt or get it +// to throw an unexpected error +func Test_ParseUserTwtxt(t *testing.T) { + var buf = make([]byte, 256) + // read random data into case 4 + rando, _ := os.Open("/dev/random") + reader := bufio.NewReader(rando) + n, err := reader.Read(buf) + if err != nil || n == 0 { + t.Errorf("Couldn't set up test: %v\n", err) + } + parseTwtxtCases[4].data = buf + + for _, tt := range parseTwtxtCases { + if tt.localOnly { + t.Skipf("Local-only test: Skipping ... \n") + } + t.Run(tt.name, func(t *testing.T) { + timemap, errs := ParseUserTwtxt(tt.data, "testuser", "testurl") + if errs == nil && tt.wantErr { + t.Errorf("Expected error(s), received none.\n") + } + + if !tt.wantErr { + if errs != nil { + t.Errorf("Unexpected error: %v\n", errs) + } + + for k, v := range timemap { + if k == (time.Time{}) || v == "" { + t.Errorf("Empty status or empty timestamp: %v, %v\n", k, v) + } + } + } + }) + } +} + +func Benchmark_ParseUserTwtxt(b *testing.B) { + var buf = make([]byte, 256) + // read random data into case 4 + rando, _ := os.Open("/dev/random") + reader := bufio.NewReader(rando) + n, err := reader.Read(buf) + if err != nil || n == 0 { + b.Errorf("Couldn't set up benchmark: %v\n", err) + } + parseTwtxtCases[3].data = buf + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, tt := range parseTwtxtCases { + _, _ = ParseUserTwtxt(tt.data, "testuser", "testurl") + } + } +} + +var timestampCases = []struct { + name string + orig string + expected string +}{ + { + name: "Timezone appended", + orig: "2020-01-13T16:08:25.544735+00:00", + expected: "2020-01-13T16:08:25.544735Z", + }, + { + name: "It's fine already", + orig: "2020-01-14T00:19:45.092344Z", + expected: "2020-01-14T00:19:45.092344Z", + }, +} + +func Test_fixTimestamp(t *testing.T) { + for _, tt := range timestampCases { + t.Run(tt.name, func(t *testing.T) { + tsout := fixTimestamp(tt.orig) + if tsout != tt.expected { + t.Errorf("Failed :: %s :: got %s expected %s", tt.name, tsout, tt.expected) + } + }) + } +} diff --git a/registry/init_test.go b/registry/init_test.go new file mode 100644 index 0000000..ab9d494 --- /dev/null +++ b/registry/init_test.go @@ -0,0 +1,93 @@ +/* +Copyright (c) 2019 Ben Morrison (gbmor) + +This file is part of Registry. + +Registry is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Registry is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Registry. If not, see <https://www.gnu.org/licenses/>. +*/ + +package registry //import "git.sr.ht/~gbmor/getwtxt/registry" + +import ( + "fmt" + "log" + "os" + "time" +) + +func quickErr(err error) { + if err != nil { + fmt.Printf("%v\n", err) + } +} + +// Sets up mock users and statuses +func initTestEnv() *Registry { + hush, err := os.Open("/dev/null") + quickErr(err) + log.SetOutput(hush) + + // this is a bit tedious, but set up fake dates + // for the mock users' join and status timestamps + timeMonthPrev := time.Now().AddDate(0, -1, 0) + timeMonthPrevRFC := timeMonthPrev.Format(time.RFC3339) + + timeTwoMonthsPrev := time.Now().AddDate(0, -2, 0) + timeTwoMonthsPrevRFC := timeTwoMonthsPrev.Format(time.RFC3339) + + timeThreeMonthsPrev := time.Now().AddDate(0, -3, 0) + timeThreeMonthsPrevRFC := timeThreeMonthsPrev.Format(time.RFC3339) + + timeFourMonthsPrev := time.Now().AddDate(0, -4, 0) + timeFourMonthsPrevRFC := timeFourMonthsPrev.Format(time.RFC3339) + + var mockusers = []struct { + url string + nick string + date string + apidate []byte + status TimeMap + }{ + { + url: "https://example3.com/twtxt.txt", + nick: "foo_barrington", + date: timeTwoMonthsPrevRFC, + status: TimeMap{ + timeTwoMonthsPrev: "foo_barrington\thttps://example3.com/twtxt.txt\t" + timeTwoMonthsPrevRFC + "\tJust got started with #twtxt!", + timeMonthPrev: "foo_barrington\thttps://example3.com/twtxt.txt\t" + timeMonthPrevRFC + "\tHey <@foo https://example.com/twtxt.txt>, I love programming. Just FYI.", + }, + }, + { + url: "https://example.com/twtxt.txt", + nick: "foo", + date: timeFourMonthsPrevRFC, + status: TimeMap{ + timeFourMonthsPrev: "foo\thttps://example.com/twtxt.txt\t" + timeFourMonthsPrevRFC + "\tThis is so much better than #twitter", + timeThreeMonthsPrev: "foo\thttps://example.com/twtxt.txt\t" + timeThreeMonthsPrevRFC + "\tI can't wait to start on my next programming #project with <@foo_barrington https://example3.com/twtxt.txt>", + }, + }, + } + registry := New(nil) + + // fill the test registry with the mock users + for _, e := range mockusers { + data := &User{} + data.Nick = e.nick + data.Date = e.date + data.Status = e.status + registry.Users[e.url] = data + } + + return registry +} diff --git a/registry/integ_test.go b/registry/integ_test.go new file mode 100644 index 0000000..2cfbb13 --- /dev/null +++ b/registry/integ_test.go @@ -0,0 +1,98 @@ +/* +Copyright (c) 2019 Ben Morrison (gbmor) + +This file is part of Registry. + +Registry is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Registry is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Registry. If not, see <https://www.gnu.org/licenses/>. +*/ + +package registry + +import ( + "strings" + "testing" +) + +// This tests all the operations on an registry. +func Test_Integration(t *testing.T) { + var integration = func(t *testing.T) { + t.Logf("Creating registry object ...\n") + registry := New(nil) + + t.Logf("Fetching remote twtxt file ...\n") + mainregistry, _, err := GetTwtxt("https://gbmor.dev/twtxt.txt", nil) + if err != nil { + t.Errorf("%v\n", err) + } + + t.Logf("Parsing remote twtxt file ...\n") + parsed, errz := ParseUserTwtxt(mainregistry, "gbmor", "https://gbmor.dev/twtxt.txt") + if errz != nil { + t.Errorf("%v\n", errz) + } + + t.Logf("Adding new user to registry ...\n") + err = registry.AddUser("TestRegistry", "https://gbmor.dev/twtxt.txt", nil, parsed) + if err != nil { + t.Errorf("%v\n", err) + } + + t.Logf("Querying user statuses ...\n") + queryuser, err := registry.QueryUser("TestRegistry") + if err != nil { + t.Errorf("%v\n", err) + } + for _, e := range queryuser { + if !strings.Contains(e, "TestRegistry") { + t.Errorf("QueryUser() returned incorrect data\n") + } + } + + t.Logf("Querying for keyword in statuses ...\n") + querystatus, err := registry.QueryInStatus("morning") + if err != nil { + t.Errorf("%v\n", err) + } + for _, e := range querystatus { + if !strings.Contains(e, "morning") { + t.Errorf("QueryInStatus() returned incorrect data\n") + } + } + + t.Logf("Querying for all statuses ...\n") + allstatus, err := registry.QueryAllStatuses() + if err != nil { + t.Errorf("%v\n", err) + } + if len(allstatus) == 0 || allstatus == nil { + t.Errorf("Got nil/zero from QueryAllStatuses") + } + + t.Logf("Querying for all users ...\n") + allusers, err := registry.QueryUser("") + if err != nil { + t.Errorf("%v\n", err) + } + if len(allusers) == 0 || allusers == nil { + t.Errorf("Got nil/zero users on empty QueryUser() query") + } + + t.Logf("Deleting user ...\n") + err = registry.DelUser("https://gbmor.dev/twtxt.txt") + if err != nil { + t.Errorf("%v\n", err) + } + } + t.Run("Integration Test", integration) +} diff --git a/registry/query.go b/registry/query.go new file mode 100644 index 0000000..604b974 --- /dev/null +++ b/registry/query.go @@ -0,0 +1,196 @@ +/* +Copyright (c) 2019 Ben Morrison (gbmor) + +This file is part of Registry. + +Registry is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Registry is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Registry. If not, see <https://www.gnu.org/licenses/>. +*/ + +package registry // import "git.sr.ht/~gbmor/getwtxt/registry" + +import ( + "fmt" + "sort" + "strings" + "time" +) + +// QueryUser checks the Registry for usernames +// or user URLs that contain the term provided as an argument. Entries +// are returned sorted by the date they were added to the Registry. If +// the argument provided is blank, return all users. +func (registry *Registry) QueryUser(term string) ([]string, error) { + if registry == nil { + return nil, fmt.Errorf("can't query empty registry for user") + } + + term = strings.ToLower(term) + timekey := NewTimeMap() + keys := make(TimeSlice, 0) + var users []string + + registry.Mu.RLock() + defer registry.Mu.RUnlock() + + for k, v := range registry.Users { + if registry.Users[k] == nil { + continue + } + v.Mu.RLock() + if strings.Contains(strings.ToLower(v.Nick), term) || strings.Contains(strings.ToLower(k), term) { + thetime, err := time.Parse(time.RFC3339, v.Date) + if err != nil { + v.Mu.RUnlock() + continue + } + timekey[thetime] = v.Nick + "\t" + k + "\t" + v.Date + "\n" + keys = append(keys, thetime) + } + v.Mu.RUnlock() + } + + sort.Sort(keys) + for _, e := range keys { + users = append(users, timekey[e]) + } + + return users, nil +} + +// QueryInStatus returns all statuses in the Registry +// that contain the provided substring (tag, mention URL, etc). +func (registry *Registry) QueryInStatus(substring string) ([]string, error) { + if substring == "" { + return nil, fmt.Errorf("cannot query for empty tag") + } else if registry == nil { + return nil, fmt.Errorf("can't query statuses of empty registry") + } + + statusmap := make([]TimeMap, 0) + + registry.Mu.RLock() + defer registry.Mu.RUnlock() + + for _, v := range registry.Users { + statusmap = append(statusmap, v.FindInStatus(substring)) + } + + sorted, err := SortByTime(statusmap...) + if err != nil { + return nil, err + } + + return sorted, nil +} + +// QueryAllStatuses returns all statuses in the Registry +// as a slice of strings sorted by timestamp. +func (registry *Registry) QueryAllStatuses() ([]string, error) { + if registry == nil { + return nil, fmt.Errorf("can't get latest statuses from empty registry") + } + + statusmap, err := registry.GetStatuses() + if err != nil { + return nil, err + } + + sorted, err := SortByTime(statusmap) + if err != nil { + return nil, err + } + + if sorted == nil { + sorted = make([]string, 1) + } + + return sorted, nil +} + +// ReduceToPage returns the passed 'page' worth of output. +// One page is twenty items. For example, if 2 is passed, +// it will return data[20:40]. According to the twtxt +// registry specification, queries should accept a "page" +// value. +func ReduceToPage(page int, data []string) []string { + end := 20 * page + if end > len(data) || end < 1 { + end = len(data) + } + + beg := end - 20 + if beg > len(data)-1 || beg < 0 { + beg = 0 + } + + return data[beg:end] +} + +// FindInStatus takes a user's statuses and looks for a given substring. +// Returns the statuses that include the substring as a TimeMap. +func (userdata *User) FindInStatus(substring string) TimeMap { + if userdata == nil { + return nil + } else if len(substring) > 140 { + return nil + } + + substring = strings.ToLower(substring) + statuses := NewTimeMap() + + userdata.Mu.RLock() + defer userdata.Mu.RUnlock() + + for k, e := range userdata.Status { + if _, ok := userdata.Status[k]; !ok { + continue + } + + parts := strings.Split(strings.ToLower(e), "\t") + if strings.Contains(parts[3], substring) { + statuses[k] = e + } + } + + return statuses +} + +// SortByTime returns a string slice of the query results, +// sorted by timestamp in descending order (newest first). +func SortByTime(tm ...TimeMap) ([]string, error) { + if tm == nil { + return nil, fmt.Errorf("can't sort nil TimeMaps") + } + + var times = make(TimeSlice, 0) + var data []string + + for _, e := range tm { + for k := range e { + times = append(times, k) + } + } + + sort.Sort(times) + + for k := range tm { + for _, e := range times { + if _, ok := tm[k][e]; ok { + data = append(data, tm[k][e]) + } + } + } + + return data, nil +} diff --git a/registry/query_test.go b/registry/query_test.go new file mode 100644 index 0000000..7eed2cd --- /dev/null +++ b/registry/query_test.go @@ -0,0 +1,459 @@ +/* +Copyright (c) 2019 Ben Morrison (gbmor) + +This file is part of Registry. + +Registry is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Registry is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Registry. If not, see <https://www.gnu.org/licenses/>. +*/ + +package registry + +import ( + "bufio" + "os" + "strings" + "testing" + "time" +) + +var queryUserCases = []struct { + name string + term string + wantErr bool +}{ + { + name: "Valid User", + term: "foo", + wantErr: false, + }, + { + name: "Empty Query", + term: "", + wantErr: false, + }, + { + name: "Nonexistent User", + term: "doesntexist", + wantErr: true, + }, + { + name: "Garbage Data", + term: "will be replaced with garbage data", + wantErr: true, + }, +} + +// Checks if Registry.QueryUser() returns users that +// match the provided substring. +func Test_Registry_QueryUser(t *testing.T) { + registry := initTestEnv() + var buf = make([]byte, 256) + // read random data into case 8 + rando, _ := os.Open("/dev/random") + reader := bufio.NewReader(rando) + n, err := reader.Read(buf) + if err != nil || n == 0 { + t.Errorf("Couldn't set up test: %v\n", err) + } + queryUserCases[3].term = string(buf) + + for n, tt := range queryUserCases { + + t.Run(tt.name, func(t *testing.T) { + out, err := registry.QueryUser(tt.term) + + if out == nil && err != nil && !tt.wantErr { + t.Errorf("Received nil output or an error when unexpected. Case %v, %v, %v\n", n, tt.term, err) + } + + if out != nil && tt.wantErr { + t.Errorf("Received unexpected nil output when an error was expected. Case %v, %v\n", n, tt.term) + } + + for _, e := range out { + one := strings.Split(e, "\t") + + if !strings.Contains(one[0], tt.term) && !strings.Contains(one[1], tt.term) { + t.Errorf("Received incorrect output: %v != %v\n", tt.term, e) + } + } + }) + } +} +func Benchmark_Registry_QueryUser(b *testing.B) { + registry := initTestEnv() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, tt := range queryUserCases { + _, err := registry.QueryUser(tt.term) + if err != nil { + b.Errorf("%v\n", err) + } + } + } +} + +var queryInStatusCases = []struct { + name string + substr string + wantNil bool + wantErr bool +}{ + { + name: "Tag in Status", + substr: "twtxt", + wantNil: false, + wantErr: false, + }, + { + name: "Valid URL", + substr: "https://example.com/twtxt.txt", + wantNil: false, + wantErr: false, + }, + { + name: "Multiple Words in Status", + substr: "next programming", + wantNil: false, + wantErr: false, + }, + { + name: "Multiple Words, Not in Status", + substr: "explosive bananas from antarctica", + wantNil: true, + wantErr: false, + }, + { + name: "Empty Query", + substr: "", + wantNil: true, + wantErr: true, + }, + { + name: "Nonsense", + substr: "ahfiurrenkhfkajdhfao", + wantNil: true, + wantErr: false, + }, + { + name: "Invalid URL", + substr: "https://doesnt.exist/twtxt.txt", + wantNil: true, + wantErr: false, + }, + { + name: "Garbage Data", + substr: "will be replaced with garbage data", + wantNil: true, + wantErr: false, + }, +} + +// This tests whether we can find a substring in all of +// the known status messages, disregarding the metadata +// stored with each status. +func Test_Registry_QueryInStatus(t *testing.T) { + registry := initTestEnv() + var buf = make([]byte, 256) + // read random data into case 8 + rando, _ := os.Open("/dev/random") + reader := bufio.NewReader(rando) + n, err := reader.Read(buf) + if err != nil || n == 0 { + t.Errorf("Couldn't set up test: %v\n", err) + } + queryInStatusCases[7].substr = string(buf) + + for _, tt := range queryInStatusCases { + + t.Run(tt.name, func(t *testing.T) { + + out, err := registry.QueryInStatus(tt.substr) + if err != nil && !tt.wantErr { + t.Errorf("Caught unexpected error: %v\n", err) + } + + if !tt.wantErr && out == nil && !tt.wantNil { + t.Errorf("Got nil when expecting output\n") + } + + if err == nil && tt.wantErr { + t.Errorf("Expecting error, got nil.\n") + } + + for _, e := range out { + split := strings.Split(strings.ToLower(e), "\t") + + if e != "" { + if !strings.Contains(split[3], strings.ToLower(tt.substr)) { + t.Errorf("Status without substring returned\n") + } + } + } + }) + } + +} +func Benchmark_Registry_QueryInStatus(b *testing.B) { + registry := initTestEnv() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, tt := range queryInStatusCases { + _, err := registry.QueryInStatus(tt.substr) + if err != nil { + continue + } + } + } +} + +// Tests whether we can retrieve the 20 most +// recent statuses in the registry +func Test_QueryAllStatuses(t *testing.T) { + registry := initTestEnv() + t.Run("Latest Statuses", func(t *testing.T) { + out, err := registry.QueryAllStatuses() + if out == nil || err != nil { + t.Errorf("Got no statuses, or more than 20: %v, %v\n", len(out), err) + } + }) +} +func Benchmark_QueryAllStatuses(b *testing.B) { + registry := initTestEnv() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := registry.QueryAllStatuses() + if err != nil { + continue + } + } +} + +var get20cases = []struct { + name string + page int + wantErr bool +}{ + { + name: "First Page", + page: 1, + wantErr: false, + }, + { + name: "High Page Number", + page: 256, + wantErr: false, + }, + { + name: "Illegal Page Number", + page: -23, + wantErr: false, + }, +} + +func Test_ReduceToPage(t *testing.T) { + registry := initTestEnv() + for _, tt := range get20cases { + t.Run(tt.name, func(t *testing.T) { + out, err := registry.QueryAllStatuses() + if err != nil && !tt.wantErr { + t.Errorf("%v\n", err.Error()) + } + out = ReduceToPage(tt.page, out) + if len(out) > 20 || len(out) == 0 { + t.Errorf("Page-Reduce Malfunction: length of data %v\n", len(out)) + } + }) + } +} + +func Benchmark_ReduceToPage(b *testing.B) { + registry := initTestEnv() + out, _ := registry.QueryAllStatuses() + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, tt := range get20cases { + ReduceToPage(tt.page, out) + } + } +} + +// This tests whether we can find a substring in the +// given user's status messages, disregarding the metadata +// stored with each status. +func Test_User_FindInStatus(t *testing.T) { + registry := initTestEnv() + var buf = make([]byte, 256) + // read random data into case 8 + rando, _ := os.Open("/dev/random") + reader := bufio.NewReader(rando) + n, err := reader.Read(buf) + if err != nil || n == 0 { + t.Errorf("Couldn't set up test: %v\n", err) + } + queryInStatusCases[7].substr = string(buf) + + data := make([]*User, 0) + + for _, v := range registry.Users { + data = append(data, v) + } + + for _, tt := range queryInStatusCases { + t.Run(tt.name, func(t *testing.T) { + for _, e := range data { + + tag := e.FindInStatus(tt.substr) + if tag == nil && !tt.wantNil { + t.Errorf("Got nil tag\n") + } + } + }) + } + +} +func Benchmark_User_FindInStatus(b *testing.B) { + registry := initTestEnv() + data := make([]*User, 0) + + for _, v := range registry.Users { + data = append(data, v) + } + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, tt := range data { + for _, v := range queryInStatusCases { + tt.FindInStatus(v.substr) + } + } + } +} + +func Test_SortByTime_Slice(t *testing.T) { + registry := initTestEnv() + + statusmap, err := registry.GetStatuses() + if err != nil { + t.Errorf("Failed to finish test initialization: %v\n", err) + } + + t.Run("Sort By Time ([]TimeMap)", func(t *testing.T) { + sorted, err := SortByTime(statusmap) + if err != nil { + t.Errorf("%v\n", err) + } + split := strings.Split(sorted[0], "\t") + firsttime, _ := time.Parse("RFC3339", split[0]) + + for i := range sorted { + if i < len(sorted)-1 { + + nextsplit := strings.Split(sorted[i+1], "\t") + nexttime, _ := time.Parse("RFC3339", nextsplit[0]) + + if firsttime.Before(nexttime) { + t.Errorf("Timestamps out of order: %v\n", sorted) + } + + firsttime = nexttime + } + } + }) +} + +// Benchmarking a sort of 1000000 statuses by timestamp. +// Right now it's at roughly 2000ns per 2 statuses. +// Set sortMultiplier to be the number of desired +// statuses divided by four. +func Benchmark_SortByTime_Slice(b *testing.B) { + // I set this to 250,000,000 and it hard-locked + // my laptop. Oops. + sortMultiplier := 250 + b.Logf("Benchmarking SortByTime with a constructed slice of %v statuses ...\n", sortMultiplier*4) + registry := initTestEnv() + + statusmap, err := registry.GetStatuses() + if err != nil { + b.Errorf("Failed to finish benchmark initialization: %v\n", err) + } + + // Constructed registry has four statuses. This + // makes a TimeMapSlice of 1000000 statuses. + statusmaps := make([]TimeMap, sortMultiplier*4) + for i := 0; i < sortMultiplier; i++ { + statusmaps = append(statusmaps, statusmap) + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _, err := SortByTime(statusmaps...) + if err != nil { + b.Errorf("%v\n", err) + } + } +} + +func Test_SortByTime_Single(t *testing.T) { + registry := initTestEnv() + + statusmap, err := registry.GetStatuses() + if err != nil { + t.Errorf("Failed to finish test initialization: %v\n", err) + } + + t.Run("Sort By Time (TimeMap)", func(t *testing.T) { + sorted, err := SortByTime(statusmap) + if err != nil { + t.Errorf("%v\n", err) + } + split := strings.Split(sorted[0], "\t") + firsttime, _ := time.Parse("RFC3339", split[0]) + + for i := range sorted { + if i < len(sorted)-1 { + + nextsplit := strings.Split(sorted[i+1], "\t") + nexttime, _ := time.Parse("RFC3339", nextsplit[0]) + + if firsttime.Before(nexttime) { + t.Errorf("Timestamps out of order: %v\n", sorted) + } + + firsttime = nexttime + } + } + }) +} + +func Benchmark_SortByTime_Single(b *testing.B) { + registry := initTestEnv() + + statusmap, err := registry.GetStatuses() + if err != nil { + b.Errorf("Failed to finish benchmark initialization: %v\n", err) + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _, err := SortByTime(statusmap) + if err != nil { + b.Errorf("%v\n", err) + } + } +} diff --git a/registry/revive.toml b/registry/revive.toml new file mode 100644 index 0000000..f9e2405 --- /dev/null +++ b/registry/revive.toml @@ -0,0 +1,30 @@ +ignoreGeneratedHeader = false +severity = "warning" +confidence = 0.8 +errorCode = 0 +warningCode = 0 + +[rule.blank-imports] +[rule.context-as-argument] +[rule.context-keys-type] +[rule.dot-imports] +[rule.error-return] +[rule.error-strings] +[rule.error-naming] +[rule.exported] +[rule.if-return] +[rule.increment-decrement] +[rule.var-naming] +[rule.var-declaration] +[rule.package-comments] +[rule.range] +[rule.receiver-naming] +[rule.time-naming] +[rule.unexported-return] +[rule.indent-error-flow] +[rule.errorf] +[rule.empty-block] +[rule.superfluous-else] +[rule.unused-parameter] +[rule.unreachable-code] +[rule.redefines-builtin-id] diff --git a/registry/types.go b/registry/types.go new file mode 100644 index 0000000..eb8eee1 --- /dev/null +++ b/registry/types.go @@ -0,0 +1,148 @@ +/* +Copyright (c) 2019 Ben Morrison (gbmor) + +This file is part of Registry. + +Registry is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Registry is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Registry. If not, see <https://www.gnu.org/licenses/>. +*/ + +// Package registry implements functions and types that assist +// in the creation and management of a twtxt registry. +package registry // import "git.sr.ht/~gbmor/getwtxt/registry" + +import ( + "net" + "net/http" + "sync" + "time" +) + +// Registrar implements the minimum amount of methods +// for a functioning Registry. +type Registrar interface { + Put(user *User) error + Get(urlKey string) (*User, error) + DelUser(urlKey string) error + UpdateUser(urlKey string) error + GetUserStatuses(urlKey string) (TimeMap, error) + GetStatuses() (TimeMap, error) +} + +// User holds a given user's information +// and statuses. +type User struct { + // Provided to aid in concurrency-safe + // reads and writes. In most cases, the + // mutex in the associated Index should be + // used instead. This mutex is provided + // should the library user need to access + // a User independently of an Index. + Mu sync.RWMutex + + // Nick is the user-specified nickname. + Nick string + + // The URL of the user's twtxt file + URL string + + // The reported last modification date + // of the user's twtxt.txt file. + LastModified string + + // The IP address of the user is optionally + // recorded when submitted via POST. + IP net.IP + + // The timestamp, in RFC3339 format, + // reflecting when the user was added. + Date string + + // A TimeMap of the user's statuses + // from their twtxt file. + Status TimeMap +} + +// Registry enables the bulk of a registry's +// user data storage and access. +type Registry struct { + // Provided to aid in concurrency-safe + // reads and writes to a given registry + // Users map. + Mu sync.RWMutex + + // The registry's user data is contained + // in this map. The functions within this + // library expect the key to be the URL of + // a given user's twtxt file. + Users map[string]*User + + // The client to use for HTTP requests. + // If nil is passed to NewIndex(), a + // client with a 10 second timeout + // and all other values as default is + // used. + HTTPClient *http.Client +} + +// TimeMap holds extracted and processed user data as a +// string. A time.Time value is used as the key. +type TimeMap map[time.Time]string + +// TimeSlice is a slice of time.Time used for sorting +// a TimeMap by timestamp. +type TimeSlice []time.Time + +// NewUser returns a pointer to an initialized User +func NewUser() *User { + return &User{ + Mu: sync.RWMutex{}, + Status: NewTimeMap(), + } +} + +// New returns an initialized Registry instance. +func New(client *http.Client) *Registry { + return &Registry{ + Mu: sync.RWMutex{}, + Users: make(map[string]*User), + HTTPClient: client, + } +} + +// NewTimeMap returns an initialized TimeMap. +func NewTimeMap() TimeMap { + return make(TimeMap) +} + +// Len returns the length of the TimeSlice to be sorted. +// This helps satisfy sort.Interface. +func (t TimeSlice) Len() int { + return len(t) +} + +// Less returns true if the timestamp at index i is after +// the timestamp at index j in a given TimeSlice. This results +// in a descending (reversed) sort order for timestamps rather +// than ascending. +// This helps satisfy sort.Interface. +func (t TimeSlice) Less(i, j int) bool { + return t[i].After(t[j]) +} + +// Swap transposes the timestamps at the two given indices +// for the TimeSlice receiver. +// This helps satisfy sort.Interface. +func (t TimeSlice) Swap(i, j int) { + t[i], t[j] = t[j], t[i] +} diff --git a/registry/user.go b/registry/user.go new file mode 100644 index 0000000..329b6e3 --- /dev/null +++ b/registry/user.go @@ -0,0 +1,270 @@ +/* +Copyright (c) 2019 Ben Morrison (gbmor) + +This file is part of Registry. + +Registry is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Registry is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Registry. If not, see <https://www.gnu.org/licenses/>. +*/ + +package registry // import "git.sr.ht/~gbmor/getwtxt/registry" + +import ( + "fmt" + "net" + "strings" + "sync" + "time" +) + +// AddUser inserts a new user into the Registry. +func (registry *Registry) AddUser(nickname, urlKey string, ipAddress net.IP, statuses TimeMap) error { + + if registry == nil { + return fmt.Errorf("can't add user to uninitialized registry") + + } else if nickname == "" || urlKey == "" { + return fmt.Errorf("both URL and Nick must be specified") + + } else if !strings.HasPrefix(urlKey, "http") { + return fmt.Errorf("invalid URL: %v", urlKey) + } + + registry.Mu.Lock() + defer registry.Mu.Unlock() + + if _, ok := registry.Users[urlKey]; ok { + return fmt.Errorf("user %v already exists", urlKey) + } + + registry.Users[urlKey] = &User{ + Mu: sync.RWMutex{}, + Nick: nickname, + URL: urlKey, + LastModified: "", + IP: ipAddress, + Date: time.Now().Format(time.RFC3339), + Status: statuses} + + return nil +} + +// Put inserts a given User into an Registry. The User +// being pushed need only have the URL field filled. +// All other fields may be empty. +// This can be destructive: an existing User in the +// Registry will be overwritten if its User.URL is the +// same as the User.URL being pushed. +func (registry *Registry) Put(user *User) error { + if user == nil { + return fmt.Errorf("can't push nil data to registry") + } + if registry == nil || registry.Users == nil { + return fmt.Errorf("can't push data to registry: registry uninitialized") + } + user.Mu.RLock() + if user.URL == "" { + user.Mu.RUnlock() + return fmt.Errorf("can't push data to registry: missing URL for key") + } + urlKey := user.URL + registry.Mu.Lock() + registry.Users[urlKey] = user + registry.Mu.Unlock() + user.Mu.RUnlock() + + return nil +} + +// Get returns the User associated with the +// provided URL key in the Registry. +func (registry *Registry) Get(urlKey string) (*User, error) { + if registry == nil { + return nil, fmt.Errorf("can't pop from nil registry") + } + if urlKey == "" { + return nil, fmt.Errorf("can't pop unless provided a key") + } + + registry.Mu.RLock() + defer registry.Mu.RUnlock() + + if _, ok := registry.Users[urlKey]; !ok { + return nil, fmt.Errorf("provided url key doesn't exist in registry") + } + + registry.Users[urlKey].Mu.RLock() + userGot := registry.Users[urlKey] + registry.Users[urlKey].Mu.RUnlock() + + return userGot, nil +} + +// DelUser removes a user and all associated data from +// the Registry. +func (registry *Registry) DelUser(urlKey string) error { + + if registry == nil { + return fmt.Errorf("can't delete user from empty registry") + + } else if urlKey == "" { + return fmt.Errorf("can't delete blank user") + + } else if !strings.HasPrefix(urlKey, "http") { + return fmt.Errorf("invalid URL: %v", urlKey) + } + + registry.Mu.Lock() + defer registry.Mu.Unlock() + + if _, ok := registry.Users[urlKey]; !ok { + return fmt.Errorf("can't delete user %v, user doesn't exist", urlKey) + } + + delete(registry.Users, urlKey) + + return nil +} + +// UpdateUser scrapes an existing user's remote twtxt.txt +// file. Any new statuses are added to the user's entry +// in the Registry. If the remote twtxt data's reported +// Content-Length does not differ from what is stored, +// an error is returned. +func (registry *Registry) UpdateUser(urlKey string) error { + if urlKey == "" || !strings.HasPrefix(urlKey, "http") { + return fmt.Errorf("invalid URL: %v", urlKey) + } + + diff, err := registry.DiffTwtxt(urlKey) + if err != nil { + return err + } else if !diff { + return fmt.Errorf("no new statuses available for %v", urlKey) + } + + out, isRemoteRegistry, err := GetTwtxt(urlKey, registry.HTTPClient) + if err != nil { + return err + } + + if isRemoteRegistry { + return fmt.Errorf("attempting to update registry URL - users should be updated individually") + } + + registry.Mu.Lock() + defer registry.Mu.Unlock() + user := registry.Users[urlKey] + + user.Mu.Lock() + defer user.Mu.Unlock() + nick := user.Nick + + data, err := ParseUserTwtxt(out, nick, urlKey) + if err != nil { + return err + } + + for i, e := range data { + user.Status[i] = e + } + + registry.Users[urlKey] = user + + return nil +} + +// CrawlRemoteRegistry scrapes all nicknames and user URLs +// from a provided registry. The urlKey passed to this function +// must be in the form of https://registry.example.com/api/plain/users +func (registry *Registry) CrawlRemoteRegistry(urlKey string) error { + if urlKey == "" || !strings.HasPrefix(urlKey, "http") { + return fmt.Errorf("invalid URL: %v", urlKey) + } + + out, isRemoteRegistry, err := GetTwtxt(urlKey, registry.HTTPClient) + if err != nil { + return err + } + + if !isRemoteRegistry { + return fmt.Errorf("can't add single user via call to CrawlRemoteRegistry") + } + + users, err := ParseRegistryTwtxt(out) + if err != nil { + return err + } + + // only add new users so we don't overwrite data + // we already have (and lose statuses, etc) + registry.Mu.Lock() + defer registry.Mu.Unlock() + for _, e := range users { + if _, ok := registry.Users[e.URL]; !ok { + registry.Users[e.URL] = e + } + } + + return nil +} + +// GetUserStatuses returns a TimeMap containing single user's statuses +func (registry *Registry) GetUserStatuses(urlKey string) (TimeMap, error) { + if registry == nil { + return nil, fmt.Errorf("can't get statuses from an empty registry") + } else if urlKey == "" || !strings.HasPrefix(urlKey, "http") { + return nil, fmt.Errorf("invalid URL: %v", urlKey) + } + + registry.Mu.RLock() + defer registry.Mu.RUnlock() + if _, ok := registry.Users[urlKey]; !ok { + return nil, fmt.Errorf("can't retrieve statuses of nonexistent user") + } + + registry.Users[urlKey].Mu.RLock() + status := registry.Users[urlKey].Status + registry.Users[urlKey].Mu.RUnlock() + + return status, nil +} + +// GetStatuses returns a TimeMap containing all statuses +// from all users in the Registry. +func (registry *Registry) GetStatuses() (TimeMap, error) { + if registry == nil { + return nil, fmt.Errorf("can't get statuses from an empty registry") + } + + statuses := NewTimeMap() + + registry.Mu.RLock() + defer registry.Mu.RUnlock() + + for _, v := range registry.Users { + v.Mu.RLock() + if v.Status == nil || len(v.Status) == 0 { + v.Mu.RUnlock() + continue + } + for a, b := range v.Status { + if _, ok := v.Status[a]; ok { + statuses[a] = b + } + } + v.Mu.RUnlock() + } + + return statuses, nil +} diff --git a/registry/user_test.go b/registry/user_test.go new file mode 100644 index 0000000..f0c9622 --- /dev/null +++ b/registry/user_test.go @@ -0,0 +1,349 @@ +/* +Copyright (c) 2019 Ben Morrison (gbmor) + +This file is part of Registry. + +Registry is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Registry is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Registry. If not, see <https://www.gnu.org/licenses/>. +*/ + +package registry // import "git.sr.ht/~gbmor/getwtxt/registry" + +import ( + "bufio" + "fmt" + "net/http" + "os" + "reflect" + "testing" +) + +var addUserCases = []struct { + name string + nick string + url string + wantErr bool + localOnly bool +}{ + { + name: "Legitimate User (Local Only)", + nick: "testuser1", + url: "http://localhost:8080/twtxt.txt", + wantErr: false, + localOnly: true, + }, + { + name: "Empty Query", + nick: "", + url: "", + wantErr: true, + localOnly: false, + }, + { + name: "Invalid URL", + nick: "foo", + url: "foobarringtons", + wantErr: true, + localOnly: false, + }, + { + name: "Garbage Data", + nick: "", + url: "", + wantErr: true, + localOnly: false, + }, +} + +// Tests if we can successfully add a user to the registry +func Test_Registry_AddUser(t *testing.T) { + registry := initTestEnv() + if !addUserCases[0].localOnly { + http.Handle("/twtxt.txt", http.HandlerFunc(twtxtHandler)) + go fmt.Println(http.ListenAndServe(":8080", nil)) + } + var buf = make([]byte, 256) + // read random data into case 5 + rando, _ := os.Open("/dev/random") + reader := bufio.NewReader(rando) + n, err := reader.Read(buf) + if err != nil || n == 0 { + t.Errorf("Couldn't set up test: %v\n", err) + } + addUserCases[3].nick = string(buf) + addUserCases[3].url = string(buf) + + statuses, err := registry.GetStatuses() + if err != nil { + t.Errorf("Error setting up test: %v\n", err) + } + + for n, tt := range addUserCases { + t.Run(tt.name, func(t *testing.T) { + if tt.localOnly { + t.Skipf("Local-only test. Skipping ... ") + } + + err := registry.AddUser(tt.nick, tt.url, nil, statuses) + + // only run some checks if we don't want an error + if !tt.wantErr { + if err != nil { + t.Errorf("Got error: %v\n", err) + } + + // make sure we have *something* in the registry + if reflect.ValueOf(registry.Users[tt.url]).IsNil() { + t.Errorf("Failed to add user %v registry.\n", tt.url) + } + + // see if the nick in the registry is the same + // as the test case. verifies the URL and the nick + // since the URL is used as the key + data := registry.Users[tt.url] + if data.Nick != tt.nick { + t.Errorf("Incorrect user data added to registry for user %v.\n", tt.url) + } + } + // check for the cases that should throw an error + if tt.wantErr && err == nil { + t.Errorf("Expected error for case %v, got nil\n", n) + } + }) + } +} +func Benchmark_Registry_AddUser(b *testing.B) { + registry := initTestEnv() + statuses, err := registry.GetStatuses() + if err != nil { + b.Errorf("Error setting up test: %v\n", err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, tt := range addUserCases { + err := registry.AddUser(tt.nick, tt.url, nil, statuses) + if err != nil { + continue + } + registry.Users[tt.url] = &User{} + } + } +} + +var delUserCases = []struct { + name string + url string + wantErr bool +}{ + { + name: "Valid User", + url: "https://example.com/twtxt.txt", + wantErr: false, + }, + { + name: "Valid User", + url: "https://example3.com/twtxt.txt", + wantErr: false, + }, + { + name: "Already Deleted User", + url: "https://example3.com/twtxt.txt", + wantErr: true, + }, + { + name: "Empty Query", + url: "", + wantErr: true, + }, + { + name: "Garbage Data", + url: "", + wantErr: true, + }, +} + +// Tests if we can successfully delete a user from the registry +func Test_Registry_DelUser(t *testing.T) { + registry := initTestEnv() + var buf = make([]byte, 256) + // read random data into case 5 + rando, _ := os.Open("/dev/random") + reader := bufio.NewReader(rando) + n, err := reader.Read(buf) + if err != nil || n == 0 { + t.Errorf("Couldn't set up test: %v\n", err) + } + delUserCases[4].url = string(buf) + + for n, tt := range delUserCases { + t.Run(tt.name, func(t *testing.T) { + + err := registry.DelUser(tt.url) + if !reflect.ValueOf(registry.Users[tt.url]).IsNil() { + t.Errorf("Failed to delete user %v from registry.\n", tt.url) + } + if tt.wantErr && err == nil { + t.Errorf("Expected error but did not receive. Case %v\n", n) + } + if !tt.wantErr && err != nil { + t.Errorf("Unexpected error for case %v: %v\n", n, err) + } + }) + } +} +func Benchmark_Registry_DelUser(b *testing.B) { + registry := initTestEnv() + + data1 := &User{ + Nick: registry.Users[delUserCases[0].url].Nick, + Date: registry.Users[delUserCases[0].url].Date, + Status: registry.Users[delUserCases[0].url].Status, + } + + data2 := &User{ + Nick: registry.Users[delUserCases[1].url].Nick, + Date: registry.Users[delUserCases[1].url].Date, + Status: registry.Users[delUserCases[1].url].Status, + } + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, tt := range delUserCases { + err := registry.DelUser(tt.url) + if err != nil { + continue + } + } + + registry.Users[delUserCases[0].url] = data1 + registry.Users[delUserCases[1].url] = data2 + } +} + +var getUserStatusCases = []struct { + name string + url string + wantErr bool +}{ + { + name: "Valid User", + url: "https://example.com/twtxt.txt", + wantErr: false, + }, + { + name: "Valid User", + url: "https://example3.com/twtxt.txt", + wantErr: false, + }, + { + name: "Nonexistent User", + url: "https://doesn't.exist/twtxt.txt", + wantErr: true, + }, + { + name: "Empty Query", + url: "", + wantErr: true, + }, + { + name: "Garbage Data", + url: "", + wantErr: true, + }, +} + +// Checks if we can retrieve a single user's statuses +func Test_Registry_GetUserStatuses(t *testing.T) { + registry := initTestEnv() + var buf = make([]byte, 256) + // read random data into case 5 + rando, _ := os.Open("/dev/random") + reader := bufio.NewReader(rando) + n, err := reader.Read(buf) + if err != nil || n == 0 { + t.Errorf("Couldn't set up test: %v\n", err) + } + getUserStatusCases[4].url = string(buf) + + for n, tt := range getUserStatusCases { + t.Run(tt.name, func(t *testing.T) { + + statuses, err := registry.GetUserStatuses(tt.url) + + if !tt.wantErr { + if reflect.ValueOf(statuses).IsNil() { + t.Errorf("Failed to pull statuses for user %v\n", tt.url) + } + // see if the function returns the same data + // that we already have + data := registry.Users[tt.url] + if !reflect.DeepEqual(data.Status, statuses) { + t.Errorf("Incorrect data retrieved as statuses for user %v.\n", tt.url) + } + } + + if tt.wantErr && err == nil { + t.Errorf("Expected error, received nil for case %v: %v\n", n, tt.url) + } + }) + } +} +func Benchmark_Registry_GetUserStatuses(b *testing.B) { + registry := initTestEnv() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, tt := range getUserStatusCases { + _, err := registry.GetUserStatuses(tt.url) + if err != nil { + continue + } + } + } +} + +// Tests if we can retrieve all user statuses at once +func Test_Registry_GetStatuses(t *testing.T) { + registry := initTestEnv() + t.Run("Registry.GetStatuses()", func(t *testing.T) { + + statuses, err := registry.GetStatuses() + if reflect.ValueOf(statuses).IsNil() || err != nil { + t.Errorf("Failed to pull all statuses. %v\n", err) + } + + // Now do the same query manually to see + // if we get the same result + unionmap := NewTimeMap() + for _, v := range registry.Users { + for i, e := range v.Status { + unionmap[i] = e + } + } + if !reflect.DeepEqual(statuses, unionmap) { + t.Errorf("Incorrect data retrieved as statuses.\n") + } + }) +} +func Benchmark_Registry_GetStatuses(b *testing.B) { + registry := initTestEnv() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _, err := registry.GetStatuses() + if err != nil { + continue + } + } +} |