diff options
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | bonus/gmi2html/.gitignore | 1 | ||||
-rw-r--r-- | bonus/gmi2html/Makefile | 10 | ||||
-rw-r--r-- | bonus/gmi2html/gmi2html.c | 241 | ||||
-rw-r--r-- | bonus/gmifetch/.gitignore | 1 | ||||
-rw-r--r-- | bonus/gmifetch/Makefile | 11 | ||||
-rw-r--r-- | bonus/gmifetch/gmifetch.c | 675 | ||||
-rw-r--r-- | bonus/trans.cgi | 13 | ||||
-rw-r--r-- | doc/cha.1 | 2 | ||||
-rw-r--r-- | doc/urimethodmap.md | 139 | ||||
-rw-r--r-- | res/config.toml | 6 | ||||
-rw-r--r-- | src/config/config.nim | 50 | ||||
-rw-r--r-- | src/loader/connecterror.nim | 2 | ||||
-rw-r--r-- | src/loader/loader.nim | 109 | ||||
-rw-r--r-- | src/local/client.nim | 1 | ||||
-rw-r--r-- | src/local/container.nim | 6 | ||||
-rw-r--r-- | src/local/pager.nim | 10 | ||||
-rw-r--r-- | src/server/buffer.nim | 7 | ||||
-rw-r--r-- | src/server/forkserver.nim | 23 | ||||
-rw-r--r-- | src/types/urimethodmap.nim | 68 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 6 | ||||
-rw-r--r-- | todo | 3 |
23 files changed, 1298 insertions, 91 deletions
diff --git a/Makefile b/Makefile index 916ad5f9..26922569 100644 --- a/Makefile +++ b/Makefile @@ -59,12 +59,13 @@ $(OBJDIR)/man/cha-%.md: doc/%.md | $(OBJDIR)/man/ $(OBJDIR)/man/cha-%.5: $(OBJDIR)/man/cha-%.md | $(OBJDIR)/man/ pandoc --standalone --to man $< -o $@ -$(OBJDIR)/man/cha.1: $(OBJDIR)/man/ doc/cha.1 +$(OBJDIR)/man/cha.1: doc/cha.1 | $(OBJDIR)/man/ cp doc/cha.1 "$(OBJDIR)/man/cha.1" .PHONY: manpage manpage: $(OBJDIR)/man/cha-config.5 $(OBJDIR)/man/cha-mailcap.5 \ $(OBJDIR)/man/cha-mime.types.5 $(OBJDIR)/man/cha-localcgi.5 \ + $(OBJDIR)/man/cha-urimethodmap.5 \ $(OBJDIR)/man/cha.1 .PHONY: install @@ -76,6 +77,7 @@ install: install -m755 "$(OBJDIR)/man/cha-mailcap.5" "$(DESTDIR)$(manprefix5)"; \ install -m755 "$(OBJDIR)/man/cha-mime.types.5" "$(DESTDIR)$(manprefix5)"; \ install -m755 "$(OBJDIR)/man/cha-localcgi.5" "$(DESTDIR)$(manprefix5)"; \ + install -m755 "$(OBJDIR)/man/cha-urimethodmap.5" "$(DESTDIR)$(manprefix5)"; \ install -m755 "$(OBJDIR)/cha.1" "$(DESTDIR)$(manprefix1)"; \ fi diff --git a/README.md b/README.md index e5621076..bc455110 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ Currently implemented features are: * FTP support * Gopher support * [local CGI](doc/localcgi.md) support +* [urimethodmap](doc/urimethodmap.md) support ...with a lot more [planned](todo). diff --git a/bonus/gmi2html/.gitignore b/bonus/gmi2html/.gitignore new file mode 100644 index 00000000..fd3a71e5 --- /dev/null +++ b/bonus/gmi2html/.gitignore @@ -0,0 +1 @@ +gmi2html diff --git a/bonus/gmi2html/Makefile b/bonus/gmi2html/Makefile new file mode 100644 index 00000000..f655100a --- /dev/null +++ b/bonus/gmi2html/Makefile @@ -0,0 +1,10 @@ +CFLAGS = -Wall -Wextra -std=c89 -pedantic -g -O2 -fsanitize=address +prefix = /usr/local + +gmi2html: gmi2html.c + $(CC) $(CFLAGS) gmi2html.c -o gmi2html + +.PHONY: install +install: + mkdir -p "$(DESTDIR)$(prefix)/bin" + install -m755 gmi2html "$(DESTDIR)$(prefix)/bin" diff --git a/bonus/gmi2html/gmi2html.c b/bonus/gmi2html/gmi2html.c new file mode 100644 index 00000000..869b3112 --- /dev/null +++ b/bonus/gmi2html/gmi2html.c @@ -0,0 +1,241 @@ +/* This file is dedicated to the public domain. + * + * Convert gemtext to HTML. Only accepts input on stdin. + */ + +#include <stdio.h> +#include <stdlib.h> + +typedef enum { + STATE_NORMAL, + STATE_BLOCKQUOTE, + STATE_NEWLINE, + STATE_NEWLINE_EQUALS, + STATE_NEWLINE_EQUALS_ARROW, + STATE_BEFORE_URL, + STATE_IN_URL, + STATE_BEFORE_URL_NAME, + STATE_URL_NAME, + STATE_SINGLE_BACKTICK, + STATE_DOUBLE_BACKTICK, + STATE_PRE_START, + STATE_IN_PRE, + STATE_PRE_SINGLE_BACKTICK, + STATE_PRE_DOUBLE_BACKTICK, + STATE_SKIP_LINE, + STATE_HASH, + STATE_DOUBLE_HASH, + STATE_AFTER_HASH, + STATE_AFTER_DOUBLE_HASH, + STATE_AFTER_TRIPLE_HASH +} ParseState; + +static ParseState state = STATE_NEWLINE; +static ParseState prev_state = STATE_NORMAL; + +int main() { + int c; +#define BUFSIZE 4096 + char urlbuf[BUFSIZE + 1]; + char *urlp; + + urlp = urlbuf; + printf("<!DOCTYPE html>"); +#define SET_STATE(s) do { \ + prev_state = state; \ + state = s; \ + } while (0) +#define REDO_NORMAL do { \ + SET_STATE(STATE_NORMAL); \ + goto normal; \ + } while (0) + while ((c = getc(stdin)) != EOF) { + switch (state) { + case STATE_NORMAL: + case STATE_BLOCKQUOTE: + case STATE_IN_PRE: + case STATE_PRE_START: + case STATE_SKIP_LINE: + case STATE_URL_NAME: + case STATE_AFTER_HASH: + case STATE_AFTER_DOUBLE_HASH: + case STATE_AFTER_TRIPLE_HASH: +normal: switch (c) { + case '\r': break; + case '\n': + if (state == STATE_BLOCKQUOTE) { + fputs("</blockquote>", stdout); + } else if (state == STATE_PRE_START) { + fputs("\">", stdout); + SET_STATE(STATE_IN_PRE); + } else if (state == STATE_URL_NAME) { + fputs("</a>", stdout); + fputs("<br>", stdout); + } else if (state == STATE_AFTER_HASH) { + fputs("</h1>", stdout); + } else if (state == STATE_AFTER_DOUBLE_HASH) { + fputs("</h2>", stdout); + } else if (state == STATE_AFTER_TRIPLE_HASH) { + fputs("</h3>", stdout); + } else if (state == STATE_SKIP_LINE) { + } else { + fputs("<br>", stdout); + } + SET_STATE(STATE_NEWLINE); + break; + case '<': + fputs("<", stdout); + break; + case '>': + fputs(">", stdout); + break; + case '&': + fputs("&", stdout); + break; + default: + if (state != STATE_SKIP_LINE) + putchar(c); + break; + } + break; + case STATE_NEWLINE: + if (prev_state == STATE_IN_PRE) { + if (c == '`') { + SET_STATE(STATE_PRE_SINGLE_BACKTICK); + break; + } else { + SET_STATE(STATE_IN_PRE); + goto normal; + } + } + switch (c) { + case '=': + SET_STATE(STATE_NEWLINE_EQUALS); + break; + case '>': + SET_STATE(STATE_BLOCKQUOTE); + printf("<blockquote>"); + break; + case '`': + SET_STATE(STATE_SINGLE_BACKTICK); + break; + case '#': + SET_STATE(STATE_HASH); + break; + default: + REDO_NORMAL; + } + break; + case STATE_NEWLINE_EQUALS: + if (c == '>') { + SET_STATE(STATE_NEWLINE_EQUALS_ARROW); + } else { + putchar('='); + REDO_NORMAL; + } + break; + case STATE_NEWLINE_EQUALS_ARROW: + if (c == ' ') { + state = STATE_BEFORE_URL; + } else { + putchar('='); + REDO_NORMAL; + } + break; + case STATE_BEFORE_URL: + if (c == ' ') { + continue; + break; + } else { + fputs("<a href=\"", stdout); + SET_STATE(STATE_IN_URL); + urlp = urlbuf; + } + /* fall through */ + case STATE_IN_URL: + switch (c) { + case '"': + fputs("%22", stdout); + if (urlp < &urlbuf[BUFSIZE]) + *urlp++ = '"'; + break; + case ' ': + case '\t': + fputs("\">", stdout); + *urlp = '\0'; + SET_STATE(STATE_BEFORE_URL_NAME); + break; + case '\n': + *urlp = '\0'; + fputs("\">", stdout); + fputs(urlbuf, stdout); + fputs("</a><br>", stdout); + SET_STATE(STATE_NEWLINE); + break; + default: + if (urlp < &urlbuf[BUFSIZE] && c != '>' + && c != '<') + *urlp++ = c; + putchar(c); + } + break; + case STATE_BEFORE_URL_NAME: + if (c != ' ' && c != '\t') { + SET_STATE(STATE_URL_NAME); + goto normal; + } + break; + case STATE_SINGLE_BACKTICK: + case STATE_PRE_SINGLE_BACKTICK: + if (c == '`') { + SET_STATE(state == STATE_SINGLE_BACKTICK ? + STATE_DOUBLE_BACKTICK : + STATE_PRE_DOUBLE_BACKTICK); + } else { + putchar('`'); + REDO_NORMAL; + } + break; + case STATE_DOUBLE_BACKTICK: + case STATE_PRE_DOUBLE_BACKTICK: + if (c == '`') { + if (state == STATE_DOUBLE_BACKTICK) { + SET_STATE(STATE_PRE_START); + fputs("<pre title=\"", stdout); + } else { + fputs("</pre>", stdout); + SET_STATE(STATE_SKIP_LINE); + } + } else { + fputs("``", stdout); + if (state == STATE_DOUBLE_BACKTICK) { + REDO_NORMAL; + } else { + SET_STATE(STATE_IN_PRE); + goto normal; + } + } + break; + case STATE_HASH: + if (c == '#') { + SET_STATE(STATE_DOUBLE_HASH); + } else { + fputs("<h1>", stdout); + SET_STATE(STATE_AFTER_HASH); + goto normal; + } + break; + case STATE_DOUBLE_HASH: + if (c == '#') { + fputs("<h3>", stdout); + SET_STATE(STATE_AFTER_TRIPLE_HASH); + } else { + fputs("<h2>", stdout); + SET_STATE(STATE_AFTER_DOUBLE_HASH); + goto normal; + } + break; + } + } + exit(0); +} diff --git a/bonus/gmifetch/.gitignore b/bonus/gmifetch/.gitignore new file mode 100644 index 00000000..99270cf6 --- /dev/null +++ b/bonus/gmifetch/.gitignore @@ -0,0 +1 @@ +gmifetch diff --git a/bonus/gmifetch/Makefile b/bonus/gmifetch/Makefile new file mode 100644 index 00000000..5ee37b3a --- /dev/null +++ b/bonus/gmifetch/Makefile @@ -0,0 +1,11 @@ +CFLAGS = -Wall -Wextra -std=c89 -pedantic -lcrypto -lssl -g -O2 + +gmifetch: gmifetch.c + $(CC) $(CFLAGS) gmifetch.c -o gmifetch + +.PHONY: clean +clean: + rm -f gmifetch + +.PHONY: all +all: gmifetch diff --git a/bonus/gmifetch/gmifetch.c b/bonus/gmifetch/gmifetch.c new file mode 100644 index 00000000..f10554c6 --- /dev/null +++ b/bonus/gmifetch/gmifetch.c @@ -0,0 +1,675 @@ +/* This file is dedicated to the public domain. + * + * Gemini protocol adapter for Chawan. + * Intended to be used through local CGI (by redirection in scheme-map). + * + * (FWIW, it should work with normal CGI or w3m's local CGI too. However, + * it does not rewrite URLs, so you would have to figure out something for + * that, e.g. by setting the base href or rewriting URLs in another layer.) + * + * Usage: gmifetch [URL] + * + * Environment variables: + * - QUERY_STRING is used if no URL arguments are passed. + * - GMIFETCH_KNOWN_HOSTS is used for setting the known_hosts file. If not set, + * we use $XDG_CONFIG_HOME/gmifetch/known_hosts, where $XDG_CONFIG_HOME falls + * back to $HOME/.config/gmifetch if not set. + */ + +#include <ctype.h> +#include <errno.h> +#include <openssl/err.h> +#include <openssl/pem.h> +#include <openssl/ssl.h> +#include <pwd.h> +#include <sys/stat.h> +#include <unistd.h> + +static SSL_CTX* ssl_ctx; +static SSL *ssl; +static BIO *conn; + +/* CGI responses */ +#define INPUT_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>" \ + "<title>Input required</title>" \ + "<base href='%s'>" \ + "<h1>Input required</h1>" \ + "<p>" \ + "%s" \ + "<p>" \ + "<form method=POST><input type='%s' name='input'></form>" + +#define SUCCESS_RESPONSE "Content-Type: %s\r\n" \ + "\r\n" + +#define REDIRECT_RESPONSE "Status: 30%c\r\n" \ + "Location: %s\r\n" \ + "\r\n" + +#define TEMPFAIL_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>" \ + "<title>Temporary failure</title>" \ + "<h1>%s</h1>" \ + "<p>" \ + "%s" + +#define PERMFAIL_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>" \ + "<title>Permanent failure</title>" \ + "<h1>%s</h1>" \ + "<p>" \ + "%s" + +#define CERTFAIL_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>" \ + "<title>Certificate failure</title>" \ + "<h1>%s</h1>" \ + "<p>" \ + "%s" + +#define INVALID_CERT_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>\n" \ + "<title>Invalid certificate</title>\n" \ + "<h1>Invalid certificate</h1>\n" \ + "<p>\n" \ + "The certificate received from the server does not match the\n" \ + "stored certificate (expected %s, but got %s). Somebody may be\n" \ + "tampering with your connection.\n" \ + "<p>\n" \ + "If you are sure that this is not a man-in-the-middle attack,\n" \ + "please remove this host from %s.\n" + +#define UNKNOWN_CERT_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>" \ + "<title>Unknown certificate</title>" \ + "<h1>Unknown certificate</h1>" \ + "<p>\n" \ + "The hostname of the server you are visiting could not be found\n" \ + "in your list of known hosts (%s).\n" \ + "<p>\n" \ + "The server has sent us a certificate with the following\n" \ + "fingerprint:\n" \ + "<pre>%s</pre>\n" \ + "<p>Trust it?\n" \ + "<form method=POST>" \ + "<input type=submit name=trust_cert value=always>\n" \ + "<input type=submit name=trust_cert value=once>" \ + "<input type=hidden name=entry value='%s sha256 %s %lu'>" \ + "</form>" + +#define UPDATED_CERT_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>\n" \ + "<title>Certificate date changed</title>\n" \ + "<h1>Certificate date changed</h1>\n" \ + "<p>\n" \ + "The received certificate's date did not match the date in your\n" \ + "list of known hosts (%s).\n" \ + "<p>\n" \ + "The new expiration date is: %s.\n" \ + "<p>\n" \ + "Update it?\n" \ + "<form method=POST>" \ + "<input type=submit name=trust_cert value=always>" \ + "<input type=submit name=trust_cert value=once>\n" \ + "<input type=hidden name=entry value='%s sha256 %s %lu'>" \ + "</form>\n" + +#define PDIE(x) \ + do { \ + puts("Content-Type: text/plain\r\n"); \ + puts(x); \ + puts(strerror(errno)); \ + exit(1); \ + } while (0) + +#define SDIE(x) \ + do { \ + puts("Content-Type: text/plain\r\n"); \ + puts(x); \ + ERR_print_errors_fp(stdout); \ + exit(1); \ + } while (0) + +#define DIE(x) \ + do { \ + puts("Content-Type: text/plain\r\n\r\n" x); \ + exit(1); \ + } while (0) + +#define FLAGS (SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_COMPRESSION | \ + SSL_OP_NO_TLSv1_1) +#define PREFERRED_CIPHERS "HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4" +#define BUFSIZE 1024 + +/* A larger buffer that we can use for storing the full public key. */ +#define BUFSIZE2 8192 + +static char buffer[BUFSIZE + 1]; +static char buffer2[BUFSIZE + 1]; +static char urlbuf[BUFSIZE + 1]; +static char khsbuf[BUFSIZE + 2]; +static unsigned char hashbuf[EVP_MAX_MD_SIZE]; +static char hashbuf2[EVP_MAX_MD_SIZE * 3 + 1]; +static FILE *known_hosts = NULL; + +static void setup_ssl(void) +{ + SSL_library_init(); + SSL_load_error_strings(); + ssl_ctx = SSL_CTX_new(TLS_client_method()); + SSL_CTX_set_options(ssl_ctx, FLAGS); + if (!(conn = BIO_new_ssl_connect(ssl_ctx))) + SDIE("Error creating BIO"); +} + +static void extract_hostname(const char *s, char **hostp, char **portp, + char **pathp, char **endp) +{ + const char *p; + size_t i, schlen; + + if (!(p = strstr(s, "gemini://"))) + DIE("Invalid URL: scheme delimiter not found"); + p += strlen("gemini://"); + schlen = p - s; + if (schlen >= BUFSIZE) + DIE("Scheme too long"); +#define SCHEME "gemini://" + schlen = strlen(SCHEME); + strcpy(urlbuf, SCHEME); + *hostp = &urlbuf[schlen]; + for (i = schlen; *p && *p != ':' && *p != '/' && i < BUFSIZE; ++p, ++i) + urlbuf[i] = *p; + if (i + 2 >= BUFSIZE) /* +2 for CRLF */ + DIE("Host too long"); + *portp = &urlbuf[i]; + if (*p != ':') { + if (i + 5 >= BUFSIZE) + DIE("Host too long"); + strcpy(&urlbuf[i], ":1965"); + i += 5; + } else { + for (; *p && *p != '/' && i < BUFSIZE; ++i, ++p) + urlbuf[i] = *p; + } + *pathp = &urlbuf[i]; + if (i < BUFSIZE) + urlbuf[i++] = '/'; + if (*p == '/') + ++p; + for (; *p && i < BUFSIZE; ++i, ++p) + urlbuf[i] = *p; + if (i + 2 >= BUFSIZE) /* +2 for CRLF */ + DIE("Host too long"); + *endp = &urlbuf[i]; + urlbuf[i] = '\0'; +} + +int check_cert(const char *theirs, char *linebuf, char *hostp, + char **stored_digestp, time_t their_time) +{ + char *p, *q, *hashp, *timep; + int found; + time_t our_time; + + rewind(known_hosts); + found = 0; + while (!found && fgets(linebuf, BUFSIZE, known_hosts)) { + p = strstr(linebuf, " "); + if (!p) + DIE("Incorrectly formatted known_hosts file"); + *p = '\0'; + found = !strcmp(linebuf, hostp); + } + if (!found) + return -1; + hashp = p + 1; + if (!(q = strstr(hashp, " "))) + DIE("Incorrectly formatted known_hosts file"); + *q = '\0'; + if (strcmp(hashp, "sha256") && strcmp(hashp, "SHA256")) + DIE("Unsupported digest format"); + *stored_digestp = q + 1; + if (!(q = strstr(*stored_digestp, " "))) { + timep = NULL; + if ((q = strstr(*stored_digestp, "\n"))) + *q = '\0'; + } else { + timep = q + 1; + *q = '\0'; + } + if (strcmp(theirs, *stored_digestp)) + return 0; + if (!timep) + return -2; + our_time = (time_t)atol(timep); + if (their_time != our_time) + return -2; + return 1; +} + +static char HexTable[] = "0123456789ABCDEF"; + +void hex_encode(const unsigned char *inp, char *outbuf, int len) +{ + const unsigned char *p; + char *q; + + for (p = inp, q = outbuf; p < &inp[len]; ++p) { + if (p != inp) + *q++ = ':'; + *q++ = HexTable[(*p >> 4) & 0xF]; + *q++ = HexTable[*p & 0xF]; + } + *q++ = '\0'; +} + +static void hash_buf(const unsigned char *ibuf, int len, unsigned char *obuf, + char *obuf2) +{ + unsigned int len2; + EVP_MD_CTX* mdctx; + + if (!(mdctx = EVP_MD_CTX_new())) + SDIE("Failed to initialize MD_CTX"); + if (!EVP_DigestInit_ex(mdctx, EVP_sha256(), NULL)) + SDIE("Failed to initialize sha256"); + if (!EVP_DigestUpdate(mdctx, ibuf, len)) + SDIE("Failed to update digest"); + len2 = 0; + if (!EVP_DigestFinal_ex(mdctx, obuf, &len2)) + SDIE("Failed to finalize digest"); + EVP_MD_CTX_free(mdctx); + hex_encode(obuf, obuf2, len2); +} + +/* 1: cert found & valid + * 0: cert found & invalid + * -1: cert not found + * -2: cert found, but notAfter updated + */ +static int connect(char *hostp, char *portp, char *pathp, char *endp, + char **stored_digestp, time_t *their_time) +{ + X509 *cert; + const EVP_PKEY *pkey; + unsigned char *pubkey_buf, *r; + int len, res; + const ASN1_TIME *notAfter; + struct tm their_tm; + + *pathp = '\0'; + if (!BIO_set_conn_hostname(conn, hostp)) + SDIE("Error setting BIO hostname"); + *pathp = '/'; + BIO_get_ssl(conn, &ssl); + if (!SSL_set_cipher_list(ssl, PREFERRED_CIPHERS)) + SDIE("Error failed to set cipher list"); + *portp = '\0'; + if (!SSL_set_tlsext_host_name(ssl, hostp)) + SDIE("Error failed to set tlsext host name"); + if (BIO_do_connect(conn) <= 0) + SDIE("Failed to connect"); + if (!BIO_do_handshake(conn)) + SDIE("Failed handshake"); + if (!(cert = SSL_get_peer_certificate(ssl))) + DIE("Failed to get certificate"); + if (!(pkey = X509_get0_pubkey(cert))) + SDIE("Failed to decode public key"); + len = i2d_PUBKEY(pkey, NULL); + if (len * 3 > BUFSIZE2) + DIE("Public key too long"); + pubkey_buf = (unsigned char *)buffer2; + r = pubkey_buf; + if (i2d_PUBKEY(pkey, &r) != len) + DIE("wat"); + hash_buf(pubkey_buf, len, hashbuf, hashbuf2); + notAfter = X509_get0_notAfter(cert); + if (!ASN1_TIME_to_tm(notAfter, &their_tm)) + DIE("Failed to parse time"); + if (X509_cmp_current_time(X509_get0_notBefore(cert)) >= 0) + DIE("Wrong time"); + if (X509_cmp_current_time(notAfter) <= 0) + DIE("Wrong time"); + *their_time = mktime(&their_tm); + res = check_cert(hashbuf2, buffer, hostp, stored_digestp, *their_time); + *portp = ':'; + X509_free(cert); + strcpy(endp, "\r\n"); + return res; +} + +static void read_response(void) +{ + int bytes, total; + const char *tmp; + char *q, status0, status1; + + /* Read response */ + total = 0; + /* Status code */ + while (((bytes = BIO_read(conn, buffer, 3 - total)) > 0 || + BIO_should_retry(conn)) && total < 3) + total += bytes; + if (total < 3 || !isdigit(status0 = buffer[0]) || + !isdigit(status1 = buffer[1]) || buffer[2] != ' ') + DIE("Invalid status code"); + /* Meta */ + #define METALEN (total - 3) + while (((bytes = BIO_read(conn, &buffer[METALEN], 1024 - METALEN)) > 0 || + BIO_should_retry(conn)) && METALEN < BUFSIZE) + total += bytes; + q = strstr(buffer, "\r\n"); + if (!q) + DIE("Invalid status line"); + *q = '\0'; + /* buffer is now META. */ + switch (status0) { + case '1': /* input */ + /* META is the prompt. */ + printf(INPUT_RESPONSE, urlbuf, buffer, status1 == '1' ? + "password" /* sensitive input */ : + "search" /* input */); + break; + case '2': /* success */ + /* META is the content type. */ + printf(SUCCESS_RESPONSE, *buffer ? + buffer : + "text/gemini; charset=utf-8" /* fallback */); + /* Body */ + /* flush any data remaining in buffer */ + total -= 5 + (q - buffer); /* code + space + meta + \r\n len */ + if (total > 0) + fwrite(&q[2], 1, total, stdout); + while ((bytes = BIO_read(conn, buffer, BUFSIZE)) > 0 || + BIO_should_retry(conn)) + fwrite(buffer, 1, bytes, stdout); + break; + case '3': /* redirect */ + /* META is the redirection URL. */ + printf(REDIRECT_RESPONSE, status1 == '0' ? + '7' /* temporary */ : + '1' /* permanent */, buffer); + break; + case '4': /* temporary failure */ + /* META is additional information. */ + /* TODO maybe set status code too? */ + switch (status1) { + case '1': + tmp = "Server unavailable"; + break; + case '2': + tmp = "CGI error"; + break; + case '3': + tmp = "Proxy error"; + break; + case '4': + tmp = "Slow down!"; + break; + case '0': + default: /* no additional information provided in the code */ + tmp = "Temporary failure"; + break; + } + printf(TEMPFAIL_RESPONSE, tmp, buffer); + break; + case '5': /* permanent failure */ + /* TODO maybe set status code too? */ + switch (status1) { + case '1': + tmp = "Not found"; + break; + case '2': + tmp = "Gone"; + break; + case '3': + tmp = "Proxy request refused"; + break; + case '9': + tmp = "Bad request"; + break; + case '0': + default: /* no additional information provided in the code */ + tmp = "Permanent failure"; + break; + } + printf(PERMFAIL_RESPONSE, tmp, buffer); + break; + case '6': /* permanent failure */ + /* TODO maybe set status code too? */ + switch (status1) { + case '1': + tmp = "Certificate not authorized"; + break; + case '2': + tmp = "Certificate not valid"; + break; + case '0': + default: /* no additional information provided in the code */ + tmp = "Certificate failure"; + break; + } + printf(CERTFAIL_RESPONSE, tmp, buffer); + } +} + +void decode_query(const char *input_url, char *output_buffer) +{ + const char *p; + char *q, *endp, c; + + endp = &output_buffer[BUFSIZE]; + for (p = input_url, q = output_buffer; *p && q < endp; ++p, ++q) { + if (*p != '%') { + *q = *p; + } else { + if (!isxdigit(p[1] & 0xFF) || !isxdigit(p[2] & 0xFF)) + DIE("Invalid percent encoding"); + c = tolower(p[1] & 0xFF); + *q = ('a' <= c && c <= 'z') ? + c - 'a' + 10 : + c - '0'; + c = tolower(p[2] & 0xFF); + *q = (*q << 4) | (('a' <= c || c <= 'z') ? + c - 'a' + 10 : + c - '0'); + p += 2; + } + } + if (q >= endp) + DIE("Query too long"); + *q = '\0'; +} + +void read_post(const char *hostp, char *portp, char *pathp) +{ + /* TODO move query strings here */ + size_t n; + char *p, *q; + FILE *known_hosts_tmp; + long last_pos, len, total; + size_t khslen; + + n = fread(buffer2, 1, BUFSIZE2, stdin); + buffer2[n] = '\0'; + if ((p = strstr(buffer2, "input="))) { + decode_query(p + 6, buffer); + if (!(q = strstr(pathp, "?"))) /* no query string */ + q = &pathp[strlen(pathp)]; + for (; *p && q < &urlbuf[BUFSIZE]; ++p, ++q) + *q = *p; + if (q >= &urlbuf[BUFSIZE]) + DIE("Query too long"); + } else if (!(p = strstr(buffer2, "trust_cert="))) { + DIE("Invalid POST request: trust_cert missing"); + } + p += sizeof("trust_cert=") - 1; + if (!strncmp(p, "always", 6)) { + /* move to file end */ + fseek(known_hosts, 0L, SEEK_END); + last_pos = ftell(known_hosts); + if (!(p = strstr(p, "entry="))) + DIE("Invalid POST request: missing entry"); + p += sizeof("entry=") - 1; + decode_query(p, buffer); + /* replace plus signs */ + p = buffer; + while ((p = strstr(p, "+"))) + *p = ' '; + fwrite(buffer, 1, strlen(buffer), known_hosts); + fwrite("\n", 1, 1, known_hosts); + khslen = strlen(khsbuf); + khsbuf[khslen] = '~'; + khsbuf[khslen + 1] = '\0'; + if (!(known_hosts_tmp = fopen(khsbuf, "w+"))) + PDIE("Error opening temporary hosts file"); + rewind(known_hosts); + *portp = '\0'; + total = 0; + while (fgets(buffer, BUFSIZE, known_hosts)) { + len = strlen(buffer); + if (!len) + continue; + if ((total += len) > last_pos) { + /* finished */ + fwrite(buffer, 1, len, known_hosts_tmp); + break; + } + if (buffer[len - 1] != '\n') { + /* clean up */ + fclose(known_hosts_tmp); + unlink(khsbuf); + DIE("Line too long"); + } + if (!(p = strstr(buffer, " "))) + DIE("Invalid entry in known_hosts file"); + *p = '\0'; + if (strcmp(buffer, hostp)) { + *p = ' '; + fwrite(buffer, 1, len, known_hosts_tmp); + } + } + *portp = ':'; + memcpy(buffer, khsbuf, BUFSIZE + 1); + buffer[khslen] = '\0'; + fclose(known_hosts); + fclose(known_hosts_tmp); + if (rename(khsbuf, buffer)) + PDIE("Failed to rename temporary file"); + khsbuf[khslen] = '\0'; + if (!(known_hosts = fopen(khsbuf, "a+"))) + PDIE("Failed to re-open known hosts file"); + } else if (strncmp(p, "once", 4)) { + DIE("Invalid POST request"); + } +} + +void open_known_hosts(void) +{ + const char *known_hosts_path, *xdg_dir, *home_dir; + char *p; + size_t len; + struct stat s; + + known_hosts_path = getenv("GMIFETCH_KNOWN_HOSTS"); + if (!known_hosts_path) { + xdg_dir = getenv("XDG_CONFIG_HOME"); + if ((xdg_dir = getenv("XDG_CONFIG_HOME"))) { + len = strlen(xdg_dir); +#define CONFIG_REL "/gmifetch/known_hosts" + if (len + sizeof(CONFIG_REL) > BUFSIZE) + DIE("Error: config directory path too long"); + memcpy(khsbuf, xdg_dir, len); + memcpy(&khsbuf[len], CONFIG_REL, sizeof(CONFIG_REL)); + } else { + if (!(home_dir = getenv("HOME"))) + home_dir = getpwuid(getuid())->pw_dir; + if (!home_dir) + DIE("Error: failed to get HOME directory"); +#undef CONFIG_REL +#define CONFIG_REL "/.config/gmifetch/known_hosts" + len = strlen(home_dir); + if (len + sizeof(CONFIG_REL) > BUFSIZE) + DIE("Error: home directory path too long"); + memcpy(khsbuf, home_dir, len); + memcpy(&khsbuf[len], CONFIG_REL, sizeof(CONFIG_REL)); + } + } else { + len = strlen(known_hosts_path); + if (len > BUFSIZE) + DIE("Error: known hosts path too long"); + memcpy(khsbuf, known_hosts_path, len); + } + p = khsbuf; + if (*p == '/') + ++p; + for (; *p; ++p) { + if (*p == '/') { + *p = '\0'; + if (stat(khsbuf, &s) == -1) { + if (errno != ENOENT) + PDIE("Error calling stat"); + if (mkdir(khsbuf, 0755) == -1) + PDIE("Error calling mkdir"); + } else if (!S_ISDIR(s.st_mode)) { + if (mkdir(khsbuf, 0755) == -1) + PDIE("Error calling mkdir"); + } + *p = '/'; + } + } + if (!(known_hosts = fopen(khsbuf, "a+"))) + PDIE("Error opening known hosts file"); +} + +int main(int argc, const char *argv[]) +{ + const char *input_url, *method; + char *hostp, *portp, *pathp, *endp, *stored_digestp; + int connect_res; + time_t their_time; + + if (argc != 2) { + input_url = getenv("QUERY_STRING"); + if (!input_url) + DIE("Usage: gmifetch [url] (or set QUERY_STRING)"); + decode_query(input_url, buffer); + input_url = buffer; + } else { + input_url = argv[1]; + } + open_known_hosts(); + setup_ssl(); + extract_hostname(input_url, &hostp, &portp, &pathp, &endp); + method = getenv("REQUEST_METHOD"); + if (method && !strcmp(method, "POST")) + read_post(hostp, portp, pathp); + connect_res = connect(hostp, portp, pathp, endp, &stored_digestp, + &their_time); + if (connect_res == 1) { /* valid certificate */ + BIO_puts(conn, urlbuf); + read_response(); + } else if (connect_res == 0) { /* invalid certificate */ + printf(INVALID_CERT_RESPONSE, stored_digestp, buffer, khsbuf); + } else if (connect_res == -1) { /* no certificate */ + *portp = '\0'; + printf(UNKNOWN_CERT_RESPONSE, khsbuf, hashbuf2, hostp, + hashbuf2, (unsigned long)their_time); + } else { /* -2: updated expiration date */ + *portp = '\0'; + printf(UPDATED_CERT_RESPONSE, khsbuf, + ctime(&their_time), hostp, hashbuf2, + (unsigned long)their_time); + } + BIO_free_all(conn); + exit(0); +} diff --git a/bonus/trans.cgi b/bonus/trans.cgi new file mode 100644 index 00000000..f40ed893 --- /dev/null +++ b/bonus/trans.cgi @@ -0,0 +1,13 @@ +#!/bin/sh +# Needs https://github.com/soimort/translate-shell to work. +# Usage: cgi-bin:trans.cgi?word + +TEXT="$(echo "$QUERY_STRING" | sed 's/+/ /g;s/%/\\x/g' | xargs -0 printf "%b")" +printf 'Content-Type: text/plain\n' + +type trans || { + printf "\n\nERROR: translator not found" + exit +} + +printf '\n%s\n' "$(trans "$TEXT")" diff --git a/doc/cha.1 b/doc/cha.1 index aad18ed4..d8de5fc2 100644 --- a/doc/cha.1 +++ b/doc/cha.1 @@ -124,4 +124,4 @@ Configuration options are described in \fBcha-config\fR(5). .SH SEE ALSO \fBcha-mailcap\fR(5), \fBcha-mime.types\fR(5), \fBcha-config\fR(5), -\fBcha-localcgi\fR(5) +\fBcha-localcgi\fR(5) \fBcha-urimethodmap\fR(5) diff --git a/doc/urimethodmap.md b/doc/urimethodmap.md new file mode 100644 index 00000000..fe48ba0a --- /dev/null +++ b/doc/urimethodmap.md @@ -0,0 +1,139 @@ +<!-- MANON +% cha-urimethodmap(5) | URI method map support in Chawan +MANOFF --> + +# URI method map support in Chawan + +Chawan can be used to map unrecognized protocols to known protocols using the +`urimethodmap` format. + +The main use case for this is implementing handlers to protocols unknown to +Chawan through a protocol that the browser *does* understand. + +## Search path + +The search path for urimethodmap files can be overridden using the +configuration variable `external.urimethodmap`. + +The default search path for urimethodmap files is: + +``` +$HOME/.urimethodmap:$HOME/.w3m/urimethodmap:/etc/urimethodmap:/usr/local/etc/urimethodmap +``` +## Format + +The urimethodmap format is taken 1:1 from w3m, with only some modifications +to the interpretation of templates. + +A rough attempt at the formal description of this: + +``` +URIMethodMap-File = *URIMethodMap-line + +URIMethodMap-Line = Comment / URIMethodMap-Entry + +URIMethodMap-Entry = Protocol *WHITESPACE Template *WHITESPACE + +Protocol = 1*CHAR COLON + +Template = [see below] + +Comment = *WHITESPACE CR / "#" *CHAR CR +``` + +Note that an ASCII colon sign (:) must be present after the protocol +name. However, the whitespace may not be omitted. + +Examples: + +``` +# This is ok: +protocol: /cgi-bin/interpret-protocol?%s +# This is ok too: +protocol:/cgi-bin/interpret-protocol?%s +# This is incorrect: +protocol /cgi-bin/interpret-protocol?%s +``` + +The redirection template is the target URL. If the string `%s` is contained +in the template, it will be replaced by the target URL encoded with the +component percent encode set (i.e. EcmaScript's encodeURIComponent). + +For compatibility with w3m, templates starting with `/cgi-bin/` and +`file:/cgi-bin/` are special-cased and the starting string is replaced with +`cgi-bin:`. So for example, the template `/cgi-bin/w3mdict.cgi` is the same as +`cgi-bin:w3mdict.cgi` (and so is `file:/cgi-bin/w3mdict.cgi`). + +Example: + +``` +# The following are the same in Chawan +protocol: /cgi-bin/interpret-protocol?%s +protocol: file:/cgi-bin/interpret-protocol?%s +# Note: this last entry does not work in w3m. +protocol: cgi-bin:interpret-protocol?%s +``` + +Note however that absolute paths to cgi scripts are NOT special cased, so +e.g. `file:///usr/local/libexec/w3m/cgi-bin/w3mdict.cgi` will simply open +w3mdict.cgi in the file viewer. (Unlike in w3m, where it could run +`w3mdict.cgi` depending on the user's configuration.) + +## Examples + +### In config.toml + +``` +# Following sets the urimethodmap search path to the path relative to the +# configuration file. So if your configuration file is in +# ~/.config/chawan/config.toml, Chawan will use ~/.config/chawan/urimethodmap. +# in the same directory. +[external] +urimethodmap = "urimethodmap" +``` + +### In urimethodmap + +#### gmifetch + +For the following example, it is recommended to set a text/gemini handler +in your mailcap file. See +<!-- MANOFF --> +[mailcap](mailcap.md) +<!-- MANON --> +<!-- MANON +**cha-mailcap**(5) +MANOFF --> +for details. + +``` +# Use the `gmifetch` CGI program to retrieve files through the gemini protocol. +gemini: /cgi-bin/gmifetch?%s +``` + +`gmifetch` can be found in the `bonus/` directory. But you could write a +local CGI wrapper for any other gemini document downloader, and it would +work just fine. + +#### dict + +In w3m, urimethodmap is commonly (ab)used to define shorthands for CGI scripts. + +This works in Chawan too; for an example, you could define a `tl:` shorthand +like this: + +``` +# (trans.cgi is a script you can find and study in the bonus/ directory.) +tl: /cgi-bin/trans.cgi?%s +``` + +Then, you could open the translation of any word using `tl:word`. + +Note however that Chawan has a more powerful facility for substitution +shorthands like this in the form of omni-rules. So if you want to redirect +to an on-line dictionary site with tl:word instead of providing a local +CGI interface, it is probably easier to just use omni-rules instead of +urimethodmap + local CGI redirection. + +Rule of thumb: if you find yourself writing local CGI scripts that just +send a `Location:` header, maybe consider just using an omni-rule. diff --git a/res/config.toml b/res/config.toml index a3976eaa..c69d8b66 100644 --- a/res/config.toml +++ b/res/config.toml @@ -24,6 +24,12 @@ mime-types = [ "/usr/etc/mime.types", "/usr/local/etc/mime.types" ] +urimethodmap = [ + "~/.urimethodmap", + "~/.w3m/urimethodmap", + "/etc/urimethodmap", + "/usr/local/etc/w3m/urimethodmap" +] tmpdir = "/tmp/cha" editor = "vi %s +%d" diff --git a/src/config/config.nim b/src/config/config.nim index da897133..fb0218ce 100644 --- a/src/config/config.nim +++ b/src/config/config.nim @@ -11,13 +11,15 @@ import js/error import js/javascript import js/regex import loader/headers +import loader/loader import types/cell import types/color import types/cookie +import types/opt import types/referer +import types/urimethodmap import types/url import utils/mimeguess -import types/opt import utils/twtstr import chakasu/charset @@ -88,6 +90,7 @@ type mailcap* {.jsgetset.}: seq[string] mime_types* {.jsgetset.}: seq[string] cgi_dir* {.jsgetset.}: seq[string] + urimethodmap* {.jsgetset.}: seq[string] InputConfig = object vi_numeric_prefix* {.jsgetset.}: bool @@ -132,15 +135,12 @@ type BufferConfig* = object userstyle*: string - filter*: URLFilter - cookiejar*: CookieJar - headers*: Headers referer_from*: bool referrerpolicy*: ReferrerPolicy scripting*: bool charsets*: seq[Charset] images*: bool - proxy*: URL + loaderConfig*: LoaderConfig mimeTypes*: MimeTypes cgiDir*: seq[string] @@ -197,12 +197,6 @@ proc bindLineKey(config: Config, key, action: string) {.jsfunc.} = proc hasprop(a: ptr ActionMap, s: string): bool {.jshasprop.} = return s in a[] -func getForkServerConfig*(config: Config): ForkServerConfig = - return ForkServerConfig( - tmpdir: config.external.tmpdir, - ambiguous_double: config.display.double_width_ambiguous - ) - func getProxy*(config: Config): URL = if config.network.proxy.isSome: let s = config.network.proxy.get @@ -218,25 +212,28 @@ func getDefaultHeaders*(config: Config): Headers = proc getBufferConfig*(config: Config, location: URL, cookiejar: CookieJar, headers: Headers, referer_from, scripting: bool, charsets: seq[Charset], - images: bool, userstyle: string, proxy: URL, mimeTypes: MimeTypes): - BufferConfig = + images: bool, userstyle: string, proxy: URL, mimeTypes: MimeTypes, + urimethodmap: URIMethodMap): BufferConfig = let filter = newURLFilter( scheme = some(location.scheme), allowschemes = @["data"], default = true ) - result = BufferConfig( + return BufferConfig( userstyle: userstyle, - filter: filter, - cookiejar: cookiejar, - headers: headers, referer_from: referer_from, scripting: scripting, charsets: charsets, images: images, - proxy: proxy, mimeTypes: mimeTypes, - cgiDir: config.external.cgi_dir + loaderConfig: LoaderConfig( + defaultHeaders: headers, + filter: filter, + cookiejar: cookiejar, + proxy: proxy, + cgiDir: config.external.cgi_dir, + urimethodmap: urimethodmap + ) ) proc getSiteConfig*(config: Config, jsctx: JSContext): seq[SiteConfig] = @@ -369,6 +366,21 @@ proc getMimeTypes*(config: Config): MimeTypes = return DefaultGuess return mimeTypes +proc getURIMethodMap*(config: Config): URIMethodMap = + let configDir = getConfigDir() / "chawan" #TODO store this in config? + var urimethodmap: URIMethodMap + for p in config.external.urimethodmap: + let f = openFileExpand(configDir, p) + if f != nil: + urimethodmap.parseURIMethodMap(f.readAll()) + return urimethodmap + +proc getForkServerConfig*(config: Config): ForkServerConfig = + return ForkServerConfig( + tmpdir: config.external.tmpdir, + ambiguous_double: config.display.double_width_ambiguous + ) + proc parseConfig(config: Config, dir: string, stream: Stream, name = "<input>", laxnames = false) proc parseConfig*(config: Config, dir: string, s: string, name = "<input>", diff --git a/src/loader/connecterror.nim b/src/loader/connecterror.nim index f10285d2..8f2f95d2 100644 --- a/src/loader/connecterror.nim +++ b/src/loader/connecterror.nim @@ -1,6 +1,8 @@ import bindings/curl type ConnectErrorCode* = enum + ERROR_TOO_MANY_REWRITES = (-14, "too many URI method map rewrites") + ERROR_INVALID_URI_METHOD_ENTRY = (-13, "invalid URI method entry") ERROR_CGI_FILE_NOT_FOUND = (-12, "CGI file not found") ERROR_INVALID_CGI_PATH = (-11, "invalid CGI path") ERROR_FAIL_SETUP_CGI = (-10, "failed to set up CGI script") diff --git a/src/loader/loader.nim b/src/loader/loader.nim index 809219fe..a148b77b 100644 --- a/src/loader/loader.nim +++ b/src/loader/loader.nim @@ -43,6 +43,7 @@ import loader/request import loader/response import types/cookie import types/referer +import types/urimethodmap import types/url import utils/mimeguess import utils/twtstr @@ -79,6 +80,7 @@ type RESUME ADDREF UNREF + SET_REFERRER_POLICY LoaderContext = ref object refcount: int @@ -89,17 +91,18 @@ type extra_fds: seq[curl_waitfd] handleList: seq[CurlHandle] handleMap: Table[int, LoaderHandle] + referrerpolicy: ReferrerPolicy LoaderConfig* = object defaultheaders*: Headers filter*: URLFilter cookiejar*: CookieJar - referrerpolicy*: ReferrerPolicy proxy*: URL # When set to false, requests with a proxy URL are overridden by the # loader proxy. acceptProxy*: bool cgiDir*: seq[string] + uriMethodMap*: URIMethodMap FetchPromise* = Promise[JSResult[Response]] @@ -109,34 +112,51 @@ proc addFd(ctx: LoaderContext, fd: int, flags: int) = events: cast[cshort](flags) )) +const MaxRewrites = 2 # should be enough? TODO find out what w3m thinks + proc loadResource(ctx: LoaderContext, request: Request, handle: LoaderHandle) = - case request.url.scheme - of "file": - handle.loadFilePath(request.url) - handle.close() - of "http", "https": - let handleData = handle.loadHttp(ctx.curlm, request) - if handleData != nil: - ctx.handleList.add(handleData) - of "about": - handle.loadAbout(request) - handle.close() - of "data": - handle.loadData(request) - handle.close() - of "ftp", "ftps", "sftp": - let handleData = handle.loadFtp(ctx.curlm, request) - if handleData != nil: - ctx.handleList.add(handleData) - of "gopher", "gophers": - let handleData = handle.loadGopher(ctx.curlm, request) - if handleData != nil: - ctx.handleList.add(handleData) - of "cgi-bin": - handle.loadCGI(request, ctx.config.cgiDir) - handle.close() - else: - discard handle.sendResult(ERROR_UNKNOWN_SCHEME) + var redo = true + var tries = 0 + while redo and tries < MaxRewrites: + redo = false + case request.url.scheme + of "file": + handle.loadFilePath(request.url) + handle.close() + of "http", "https": + let handleData = handle.loadHttp(ctx.curlm, request) + if handleData != nil: + ctx.handleList.add(handleData) + of "about": + handle.loadAbout(request) + handle.close() + of "data": + handle.loadData(request) + handle.close() + of "ftp", "ftps", "sftp": + let handleData = handle.loadFtp(ctx.curlm, request) + if handleData != nil: + ctx.handleList.add(handleData) + of "gopher", "gophers": + let handleData = handle.loadGopher(ctx.curlm, request) + if handleData != nil: + ctx.handleList.add(handleData) + of "cgi-bin": + handle.loadCGI(request, ctx.config.cgiDir) + handle.close() + else: + case ctx.config.urimethodmap.findAndRewrite(request.url) + of URI_RESULT_SUCCESS: + inc tries + redo = true + of URI_RESULT_WRONG_URL: + discard handle.sendResult(ERROR_INVALID_URI_METHOD_ENTRY) + handle.close() + of URI_RESULT_NOT_FOUND: + discard handle.sendResult(ERROR_UNKNOWN_SCHEME) + handle.close() + if tries >= MaxRewrites: + discard handle.sendResult(ERROR_TOO_MANY_REWRITES) handle.close() proc onLoad(ctx: LoaderContext, stream: SocketStream) = @@ -155,8 +175,8 @@ proc onLoad(ctx: LoaderContext, stream: SocketStream) = let cookie = ctx.config.cookiejar.serialize(request.url) if cookie != "": request.headers["Cookie"] = cookie - if request.referer != nil and "Referer" notin request.headers.table: - let r = getReferer(request.referer, request.url, ctx.config.referrerpolicy) + if request.referer != nil and "Referer" notin request.headers: + let r = getReferer(request.referer, request.url, ctx.referrerpolicy) if r != "": request.headers["Referer"] = r if request.proxy == nil or not ctx.config.acceptProxy: @@ -185,15 +205,6 @@ proc acceptConnection(ctx: LoaderContext) = let handle = ctx.handleMap[fd] handle.addOutputStream(stream) stream.swrite(true) - of ADDREF: - inc ctx.refcount - of UNREF: - dec ctx.refcount - if ctx.refcount == 0: - ctx.alive = false - stream.close() - else: - assert ctx.refcount > 0 of SUSPEND: var fds: seq[int] stream.sread(fds) @@ -206,6 +217,18 @@ proc acceptConnection(ctx: LoaderContext) = for fd in fds: ctx.handleMap.withValue(fd, handlep): handlep[].resume() + of ADDREF: + inc ctx.refcount + of UNREF: + dec ctx.refcount + if ctx.refcount == 0: + ctx.alive = false + stream.close() + else: + assert ctx.refcount > 0 + of SET_REFERRER_POLICY: + stream.sread(ctx.referrerpolicy) + stream.close() except IOError: # End-of-file, broken pipe, or something else. For now we just # ignore it and pray nothing breaks. @@ -492,8 +515,16 @@ proc addref*(loader: FileLoader) = let stream = connectSocketStream(loader.process) if stream != nil: stream.swrite(ADDREF) + stream.close() proc unref*(loader: FileLoader) = let stream = connectSocketStream(loader.process) if stream != nil: stream.swrite(UNREF) + +proc setReferrerPolicy*(loader: FileLoader, referrerpolicy: ReferrerPolicy) = + let stream = connectSocketStream(loader.process) + if stream != nil: + stream.swrite(SET_REFERRER_POLICY) + stream.swrite(referrerpolicy) + stream.close() diff --git a/src/local/client.nim b/src/local/client.nim index 5a02d2a8..8351c4b1 100644 --- a/src/local/client.nim +++ b/src/local/client.nim @@ -630,6 +630,7 @@ proc newClient*(config: Config, forkserver: ForkServer, mainproc: Pid): Client = loader: forkserver.newFileLoader( defaultHeaders = config.getDefaultHeaders(), proxy = config.getProxy(), + urimethodmap = config.getURIMethodMap(), acceptProxy = true ), jsrt: jsrt, diff --git a/src/local/container.nim b/src/local/container.nim index ce049c08..d3d0b879 100644 --- a/src/local/container.nim +++ b/src/local/container.nim @@ -822,8 +822,10 @@ proc load(container: Container) = if res.code == 0: container.triggerEvent(SUCCESS) # accept cookies - if res.cookies.len > 0 and container.config.cookiejar != nil: - container.config.cookiejar.add(res.cookies) + let cookiejar = container.config.loaderConfig.cookiejar + if res.cookies.len > 0 and cookiejar != nil: + cookiejar.add(res.cookies) + # set referrer policy, if any if res.referrerpolicy.isSome and container.config.referer_from: container.config.referrerpolicy = res.referrerpolicy.get container.setLoadInfo("Connected to " & $container.source.location & ". Downloading...") diff --git a/src/local/pager.nim b/src/local/pager.nim index b07d300e..1333e32f 100644 --- a/src/local/pager.nim +++ b/src/local/pager.nim @@ -35,8 +35,9 @@ import types/buffersource import types/cell import types/color import types/cookie -import types/url import types/opt +import types/urimethodmap +import types/url import utils/twtstr import chakasu/charset @@ -82,6 +83,7 @@ type term*: Terminal tty: File unreg*: seq[(Pid, SocketStream)] + urimethodmap: URIMethodMap username: string jsDestructor(Pager) @@ -211,7 +213,8 @@ proc newPager*(config: Config, attrs: WindowAttributes, statusgrid: newFixedGrid(attrs.width), term: newTerminal(stdout, config, attrs), mimeTypes: config.getMimeTypes(), - mailcap: mailcap + mailcap: mailcap, + urimethodmap: config.getURIMethodMap() ) for err in errs: pager.alert("Error reading mailcap: " & err) @@ -607,6 +610,7 @@ proc applySiteconf(pager: Pager, url: var URL): BufferConfig = var userstyle = pager.config.css.stylesheet var proxy = pager.proxy let mimeTypes = pager.mimeTypes + let urimethodmap = pager.urimethodmap for sc in pager.siteconf: if sc.url.isSome and not sc.url.get.match($url): continue @@ -640,7 +644,7 @@ proc applySiteconf(pager: Pager, url: var URL): BufferConfig = if sc.proxy.isSome: proxy = sc.proxy.get return pager.config.getBufferConfig(url, cookiejar, headers, referer_from, - scripting, charsets, images, userstyle, proxy, mimeTypes) + scripting, charsets, images, userstyle, proxy, mimeTypes, urimethodmap) # Load request in a new buffer. proc gotoURL(pager: Pager, request: Request, prevurl = none(URL), diff --git a/src/server/buffer.nim b/src/server/buffer.nim index 22dd8a8a..1e97a4bb 100644 --- a/src/server/buffer.nim +++ b/src/server/buffer.nim @@ -37,6 +37,7 @@ import js/regex import js/timeout import layout/box import loader/connecterror +import loader/headers import loader/loader import render/renderdocument import render/rendertext @@ -741,8 +742,10 @@ proc connect*(buffer: Buffer): ConnectResult {.proxy.} = let cookie = newCookie(s, response.url) if cookie.isOk: cookies.add(cookie.get) - if "Referrer-Policy" in response.headers.table: - referrerpolicy = getReferrerPolicy(response.headers.table["Referrer-Policy"][0]) + if "Referrer-Policy" in response.headers: + referrerpolicy = getReferrerPolicy(response.headers["Referrer-Policy"]) + if referrerpolicy.isSome: + buffer.loader.setReferrerPolicy(referrerpolicy.get) buffer.connected = true let contentType = buffer.source.contentType.get("") buffer.ishtml = contentType == "text/html" diff --git a/src/server/forkserver.nim b/src/server/forkserver.nim index 8ece45e6..5f89d6ca 100644 --- a/src/server/forkserver.nim +++ b/src/server/forkserver.nim @@ -16,6 +16,7 @@ import loader/loader import server/buffer import types/buffersource import types/cookie +import types/urimethodmap import types/url import utils/twtstr @@ -35,16 +36,14 @@ type children: seq[(Pid, Pid)] proc newFileLoader*(forkserver: ForkServer, defaultHeaders: Headers, - filter = newURLFilter(default = true), cookiejar: CookieJar = nil, - proxy: URL = nil, acceptProxy = false): FileLoader = + proxy: URL, urimethodmap: URIMethodMap, acceptProxy: bool): FileLoader = forkserver.ostream.swrite(FORK_LOADER) - var defaultHeaders = defaultHeaders let config = LoaderConfig( defaultHeaders: defaultHeaders, - filter: filter, - cookiejar: cookiejar, + filter: newURLFilter(default = true), proxy: proxy, - acceptProxy: acceptProxy + acceptProxy: acceptProxy, + urimethodmap: urimethodmap ) forkserver.ostream.swrite(config) forkserver.ostream.flush() @@ -111,17 +110,7 @@ proc forkBuffer(ctx: var ForkServerContext): Pid = ctx.istream.sread(config) ctx.istream.sread(attrs) ctx.istream.sread(mainproc) - let loaderPid = ctx.forkLoader( - LoaderConfig( - defaultHeaders: config.headers, - filter: config.filter, - cookiejar: config.cookiejar, - referrerpolicy: config.referrerpolicy, - #TODO these should be in a separate config I think - proxy: config.proxy, - cgiDir: config.cgiDir - ) - ) + let loaderPid = ctx.forkLoader(config.loaderConfig) var pipefd: array[2, cint] if pipe(pipefd) == -1: raise newException(Defect, "Failed to open pipe.") diff --git a/src/types/urimethodmap.nim b/src/types/urimethodmap.nim new file mode 100644 index 00000000..6d57230b --- /dev/null +++ b/src/types/urimethodmap.nim @@ -0,0 +1,68 @@ +# w3m's URI method map format. + +import strutils +import tables + +import types/opt +import types/url +import utils/twtstr + +type URIMethodMap* = object + map: Table[string, string] + +func rewriteURL(pattern, surl: string): string = + result = "" + var was_perc = false + for c in pattern: + if was_perc: + if c == '%': + result &= '%' + elif c == 's': + result.percentEncode(surl, ComponentPercentEncodeSet) + else: + result &= '%' + result &= c + was_perc = false + elif c != '%': + result &= c + else: + was_perc = true + if was_perc: + result &= '%' + +proc `[]=`*(this: var URIMethodMap, k, v: string) = + this.map[k] = v + +type URIMethodMapResult* = enum + URI_RESULT_NOT_FOUND, URI_RESULT_SUCCESS, URI_RESULT_WRONG_URL + +proc findAndRewrite*(this: URIMethodMap, url: var URL): URIMethodMapResult = + let protocol = url.protocol + if protocol in this.map: + let surl = this.map[protocol].rewriteURL($url) + let x = newURL(surl) + if x.isNone: + return URI_RESULT_WRONG_URL + url = x.get + return URI_RESULT_SUCCESS + return URI_RESULT_NOT_FOUND + +proc parseURIMethodMap*(this: var URIMethodMap, s: string) = + for line in s.split('\n'): + if line.len == 0 or line[0] == '#': + continue # comments + var k = "" + var i = 0 + while i < line.len and line[i] != ':': + k &= line[i].toLowerAscii() + inc i + if i >= line.len: + continue # invalid + while i < line.len and line[i] in AsciiWhitespace: + inc i + var v = line.until(AsciiWhitespace, i) + if v.startsWith("file:/cgi-bin/"): + v = "cgi-bin:" & v.substr("file:/cgi-bin/".len) + elif v.startsWith("/cgi-bin/"): + v = "cgi-bin:" & v.substr("/cgi-bin/".len) + this[k] = v diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index ca79fa76..d8ce9ae8 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -206,13 +206,11 @@ func skipBlanks*(buf: string, at: int): int = while result < buf.len and buf[result].isWhitespace(): inc result -func until*(s: string, c: set[char]): string = - var i = 0 - while i < s.len: +func until*(s: string, c: set[char], starti = 0): string = + for i in starti ..< s.len: if s[i] in c: break result.add(s[i]) - inc i func until*(s: string, c: char): string = s.until({c}) diff --git a/todo b/todo index 83169544..d4fdae57 100644 --- a/todo +++ b/todo @@ -45,9 +45,6 @@ buffer: - do not prompt when submitting forms (it's annoying) * this is mostly fixed, but not completely network: -- urimethodmap - * then we could add gemini as a bonus, without an SSL dependency - outside of curl. - uBO integration? (or at least implement filter lists) - websockets (curl supports ws) - integrate curl-impersonate (LD_PRELOAD works, but still...) |