diff options
Diffstat (limited to 'adapter/protocol/gmifetch.c')
-rw-r--r-- | adapter/protocol/gmifetch.c | 688 |
1 files changed, 688 insertions, 0 deletions
diff --git a/adapter/protocol/gmifetch.c b/adapter/protocol/gmifetch.c new file mode 100644 index 00000000..5b4b5f48 --- /dev/null +++ b/adapter/protocol/gmifetch.c @@ -0,0 +1,688 @@ +/* This file is dedicated to the public domain. + * + * Gemini protocol adapter for Chawan. + * Intended to be used through local CGI (by redirection in scheme-map). + * + * (FWIW, it should work with normal CGI or w3m's local CGI too. However, + * it does not rewrite URLs, so you would have to figure out something for + * that, e.g. by setting the base href or rewriting URLs in another layer.) + * + * Usage: gmifetch [URL] + * + * Environment variables: + * - QUERY_STRING is used if no URL arguments are passed. + * - GMIFETCH_KNOWN_HOSTS is used for setting the known_hosts file. If not set, + * we use $XDG_CONFIG_HOME/gmifetch/known_hosts, where $XDG_CONFIG_HOME falls + * back to $HOME/.config/gmifetch if not set. + */ + +#include <ctype.h> +#include <errno.h> +#include <openssl/err.h> +#include <openssl/pem.h> +#include <openssl/ssl.h> +#include <pwd.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +static SSL_CTX* ssl_ctx; +static SSL *ssl; +static BIO *conn; + +/* CGI responses */ +#define INPUT_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>" \ + "<title>Input required</title>" \ + "<base href='%s'>" \ + "<h1>Input required</h1>" \ + "<p>" \ + "%s" \ + "<p>" \ + "<form method=POST><input type='%s' name='input'></form>" + +#define SUCCESS_RESPONSE "Content-Type: %s\r\n" \ + "\r\n" + +#define REDIRECT_RESPONSE "Status: 30%c\r\n" \ + "Location: %s\r\n" \ + "\r\n" + +#define TEMPFAIL_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>" \ + "<title>Temporary failure</title>" \ + "<h1>%s</h1>" \ + "<p>" \ + "%s" + +#define PERMFAIL_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>" \ + "<title>Permanent failure</title>" \ + "<h1>%s</h1>" \ + "<p>" \ + "%s" + +#define CERTFAIL_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>" \ + "<title>Certificate failure</title>" \ + "<h1>%s</h1>" \ + "<p>" \ + "%s" + +#define INVALID_CERT_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>\n" \ + "<title>Invalid certificate</title>\n" \ + "<h1>Invalid certificate</h1>\n" \ + "<p>\n" \ + "The certificate received from the server does not match the\n" \ + "stored certificate (expected %s, but got %s). Somebody may be\n" \ + "tampering with your connection.\n" \ + "<p>\n" \ + "If you are sure that this is not a man-in-the-middle attack,\n" \ + "please remove this host from %s.\n" + +#define UNKNOWN_CERT_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>" \ + "<title>Unknown certificate</title>" \ + "<h1>Unknown certificate</h1>" \ + "<p>\n" \ + "The hostname of the server you are visiting could not be found\n" \ + "in your list of known hosts (%s).\n" \ + "<p>\n" \ + "The server has sent us a certificate with the following\n" \ + "fingerprint:\n" \ + "<pre>%s</pre>\n" \ + "<p>Trust it?\n" \ + "<form method=POST>" \ + "<input type=submit name=trust_cert value=always>\n" \ + "<input type=submit name=trust_cert value=once>" \ + "<input type=hidden name=entry value='%s sha256 %s %lu'>" \ + "</form>" + +#define UPDATED_CERT_RESPONSE "Content-Type: text/html\r\n" \ + "\r\n" \ + "<!DOCTYPE html>\n" \ + "<title>Certificate date changed</title>\n" \ + "<h1>Certificate date changed</h1>\n" \ + "<p>\n" \ + "The received certificate's date did not match the date in your\n" \ + "list of known hosts (%s).\n" \ + "<p>\n" \ + "The new expiration date is: %s.\n" \ + "<p>\n" \ + "Update it?\n" \ + "<form method=POST>" \ + "<input type=submit name=trust_cert value=always>" \ + "<input type=submit name=trust_cert value=once>\n" \ + "<input type=hidden name=entry value='%s sha256 %s %lu'>" \ + "</form>\n" + +#define PDIE(x) \ + do { \ + puts("Content-Type: text/plain\r\n"); \ + puts(x); \ + puts(strerror(errno)); \ + exit(1); \ + } while (0) + +#define SDIE(x) \ + do { \ + puts("Content-Type: text/plain\r\n"); \ + puts(x); \ + ERR_print_errors_fp(stdout); \ + exit(1); \ + } while (0) + +#define DIE(x) \ + do { \ + puts("Content-Type: text/plain\r\n\r\n" x); \ + exit(1); \ + } while (0) + +#define BUFSIZE 1024 +#define PUBKEY_BUF_SIZE 8192 + +static void setup_ssl(void) +{ + SSL_library_init(); + SSL_load_error_strings(); + ssl_ctx = SSL_CTX_new(TLS_client_method()); + SSL_CTX_set_min_proto_version(ssl_ctx, TLS1_2_VERSION); + if (!(conn = BIO_new_ssl_connect(ssl_ctx))) + SDIE("Error creating BIO"); +#undef FLAGS +} + +static void extract_hostname(const char *s, char **hostp, char **portp, + char **pathp, char **endp, char *urlbuf) +{ + const char *p; + size_t i, schlen; + + if (!(p = strstr(s, "gemini://"))) + DIE("Invalid URL: scheme delimiter not found"); + p += strlen("gemini://"); + schlen = p - s; + if (schlen >= BUFSIZE) + DIE("Scheme too long"); +#define SCHEME "gemini://" + schlen = strlen(SCHEME); + strcpy(urlbuf, SCHEME); + *hostp = &urlbuf[schlen]; + for (i = schlen; *p && *p != ':' && *p != '/' && i < BUFSIZE; ++p, ++i) + urlbuf[i] = *p; + if (i + 2 >= BUFSIZE) /* +2 for CRLF */ + DIE("Host too long"); + *portp = &urlbuf[i]; + if (*p != ':') { + if (i + 5 >= BUFSIZE) + DIE("Host too long"); + strcpy(&urlbuf[i], ":1965"); + i += 5; + } else { + for (; *p && *p != '/' && i < BUFSIZE; ++i, ++p) + urlbuf[i] = *p; + } + *pathp = &urlbuf[i]; + if (i < BUFSIZE) + urlbuf[i++] = '/'; + if (*p == '/') + ++p; + for (; *p && i < BUFSIZE; ++i, ++p) + urlbuf[i] = *p; + if (i + 2 >= BUFSIZE) /* +2 for CRLF */ + DIE("Host too long"); + *endp = &urlbuf[i]; + urlbuf[i] = '\0'; +} + +int check_cert(const char *theirs, char *hostp, char **stored_digestp, + char *linebuf, time_t their_time, FILE *known_hosts) +{ + char *p, *q, *hashp, *timep; + int found; + time_t our_time; + + rewind(known_hosts); + found = 0; + while (!found && fgets(linebuf, BUFSIZE, known_hosts)) { + p = strstr(linebuf, " "); + if (!p) + DIE("Incorrectly formatted known_hosts file"); + *p = '\0'; + found = !strcmp(linebuf, hostp); + } + if (!found) + return -1; + hashp = p + 1; + if (!(q = strstr(hashp, " "))) + DIE("Incorrectly formatted known_hosts file"); + *q = '\0'; + if (strcmp(hashp, "sha256") && strcmp(hashp, "SHA256")) + DIE("Unsupported digest format"); + *stored_digestp = q + 1; + if (!(q = strstr(*stored_digestp, " "))) { + timep = NULL; + if ((q = strstr(*stored_digestp, "\n"))) + *q = '\0'; + } else { + timep = q + 1; + *q = '\0'; + } + if (strcmp(theirs, *stored_digestp)) + return 0; + if (!timep) + return -2; + our_time = (time_t)atol(timep); + if (their_time != our_time) + return -2; + return 1; +} + +static char HexTable[] = "0123456789ABCDEF"; + +void hex_encode(const unsigned char *inp, char *outbuf, int len) +{ + const unsigned char *p; + char *q; + + for (p = inp, q = outbuf; p < &inp[len]; ++p) { + if (p != inp) + *q++ = ':'; + *q++ = HexTable[(*p >> 4) & 0xF]; + *q++ = HexTable[*p & 0xF]; + } + *q++ = '\0'; +} + +static void hash_buf(const unsigned char *ibuf, int len, char *obuf2) +{ + unsigned int len2; + EVP_MD_CTX* mdctx; + unsigned char hashbuf[EVP_MAX_MD_SIZE]; + + if (!(mdctx = EVP_MD_CTX_new())) + SDIE("Failed to initialize MD_CTX"); + if (!EVP_DigestInit_ex(mdctx, EVP_sha256(), NULL)) + SDIE("Failed to initialize sha256"); + if (!EVP_DigestUpdate(mdctx, ibuf, len)) + SDIE("Failed to update digest"); + len2 = 0; + if (!EVP_DigestFinal_ex(mdctx, hashbuf, &len2)) + SDIE("Failed to finalize digest"); + EVP_MD_CTX_free(mdctx); + hex_encode(hashbuf, obuf2, len2); +} + +/* 1: cert found & valid + * 0: cert found & invalid + * -1: cert not found + * -2: cert found, but notAfter updated + */ +static int connect(char *hostp, char *portp, char *pathp, char *endp, + char *linebuf, char **stored_digestp, time_t *their_time, + char *hashbuf2, FILE *known_hosts) +{ + X509 *cert; + const EVP_PKEY *pkey; + unsigned char pubkey_buf[PUBKEY_BUF_SIZE + 1], *r; + int len, res; + const ASN1_TIME *notAfter; + struct tm their_tm; + + *pathp = '\0'; + if (!BIO_set_conn_hostname(conn, hostp)) + SDIE("Error setting BIO hostname"); + *pathp = '/'; + BIO_get_ssl(conn, &ssl); +#define PREFERRED_CIPHERS "HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4" + if (!SSL_set_cipher_list(ssl, PREFERRED_CIPHERS)) + SDIE("Error failed to set cipher list"); + *portp = '\0'; + if (!SSL_set_tlsext_host_name(ssl, hostp)) + SDIE("Error failed to set tlsext host name"); + if (BIO_do_connect(conn) <= 0) + SDIE("Failed to connect"); + if (!BIO_do_handshake(conn)) + SDIE("Failed handshake"); + if (!(cert = SSL_get_peer_certificate(ssl))) + DIE("Failed to get certificate"); + if (!(pkey = X509_get0_pubkey(cert))) + SDIE("Failed to decode public key"); + len = i2d_PUBKEY(pkey, NULL); + if (len * 3 > PUBKEY_BUF_SIZE) + DIE("Public key too long"); + r = pubkey_buf; + if (i2d_PUBKEY(pkey, &r) != len) + DIE("wat"); + hash_buf(pubkey_buf, len, hashbuf2); + notAfter = X509_get0_notAfter(cert); + if (!ASN1_TIME_to_tm(notAfter, &their_tm)) + DIE("Failed to parse time"); + if (X509_cmp_current_time(X509_get0_notBefore(cert)) >= 0) + DIE("Wrong time"); + if (X509_cmp_current_time(notAfter) <= 0) + DIE("Wrong time"); + *their_time = mktime(&their_tm); + res = check_cert(hashbuf2, hostp, stored_digestp, linebuf, *their_time, + known_hosts); + *portp = ':'; + X509_free(cert); + strcpy(endp, "\r\n"); + return res; +} + +static void read_response(const char *urlbuf) +{ + int bytes, total; + const char *tmp; + char *q, status0, status1; + char buffer[BUFSIZE + 1]; + + /* Read response */ + total = 0; + /* Status code */ + while (((bytes = BIO_read(conn, buffer, 3 - total)) > 0 || + BIO_should_retry(conn)) && total < 3) + total += bytes; + if (total < 3 || !isdigit(status0 = buffer[0]) || + !isdigit(status1 = buffer[1]) || buffer[2] != ' ') + DIE("Invalid status code"); + /* Meta */ + #define METALEN (total - 3) + while (((bytes = BIO_read(conn, &buffer[METALEN], 1024 - METALEN)) > 0 || + BIO_should_retry(conn)) && METALEN < BUFSIZE) + total += bytes; + q = strstr(buffer, "\r\n"); + if (!q) + DIE("Invalid status line"); + *q = '\0'; + /* buffer is now META. */ + switch (status0) { + case '1': /* input */ + /* META is the prompt. */ + printf(INPUT_RESPONSE, urlbuf, buffer, status1 == '1' ? + "password" /* sensitive input */ : + "search" /* input */); + break; + case '2': /* success */ + /* META is the content type. */ + printf(SUCCESS_RESPONSE, *buffer ? + buffer : + "text/gemini; charset=utf-8" /* fallback */); + /* Body */ + /* flush any data remaining in buffer */ + total -= 5 + (q - buffer); /* code + space + meta + \r\n len */ + if (total > 0) + fwrite(&q[2], 1, total, stdout); + while ((bytes = BIO_read(conn, buffer, BUFSIZE)) > 0 || + BIO_should_retry(conn)) + fwrite(buffer, 1, bytes, stdout); + break; + case '3': /* redirect */ + /* META is the redirection URL. */ + printf(REDIRECT_RESPONSE, status1 == '0' ? + '7' /* temporary */ : + '1' /* permanent */, buffer); + break; + case '4': /* temporary failure */ + /* META is additional information. */ + /* TODO maybe set status code too? */ + switch (status1) { + case '1': + tmp = "Server unavailable"; + break; + case '2': + tmp = "CGI error"; + break; + case '3': + tmp = "Proxy error"; + break; + case '4': + tmp = "Slow down!"; + break; + case '0': + default: /* no additional information provided in the code */ + tmp = "Temporary failure"; + break; + } + printf(TEMPFAIL_RESPONSE, tmp, buffer); + break; + case '5': /* permanent failure */ + /* TODO maybe set status code too? */ + switch (status1) { + case '1': + tmp = "Not found"; + break; + case '2': + tmp = "Gone"; + break; + case '3': + tmp = "Proxy request refused"; + break; + case '9': + tmp = "Bad request"; + break; + case '0': + default: /* no additional information provided in the code */ + tmp = "Permanent failure"; + break; + } + printf(PERMFAIL_RESPONSE, tmp, buffer); + break; + case '6': /* permanent failure */ + /* TODO maybe set status code too? */ + switch (status1) { + case '1': + tmp = "Certificate not authorized"; + break; + case '2': + tmp = "Certificate not valid"; + break; + case '0': + default: /* no additional information provided in the code */ + tmp = "Certificate failure"; + break; + } + printf(CERTFAIL_RESPONSE, tmp, buffer); + } +} + +void decode_query(const char *input_url, char *output_buffer) +{ + const char *p; + char *q, *endp, c; + + endp = &output_buffer[BUFSIZE]; + for (p = input_url, q = output_buffer; *p && q < endp; ++p, ++q) { + if (*p != '%') { + *q = *p; + } else { + if (!isxdigit(p[1] & 0xFF) || !isxdigit(p[2] & 0xFF)) + DIE("Invalid percent encoding"); + c = tolower(p[1] & 0xFF); + *q = ('a' <= c && c <= 'z') ? + c - 'a' + 10 : + c - '0'; + c = tolower(p[2] & 0xFF); + *q = (*q << 4) | (('a' <= c || c <= 'z') ? + c - 'a' + 10 : + c - '0'); + p += 2; + } + } + if (q >= endp) + DIE("Query too long"); + *q = '\0'; +} + + +void read_post(const char *hostp, char *portp, char *pathp, const char *urlbuf, + char *khsbuf, FILE **known_hosts) +{ + /* TODO move query strings here */ + size_t n; + char *p, *q; + FILE *known_hosts_tmp; + long last_pos, len, total; + size_t khslen; + char inbuf[BUFSIZE + 1], buffer[BUFSIZE + 1]; + + n = fread(inbuf, 1, BUFSIZE, stdin); + inbuf[n] = '\0'; + if ((p = strstr(inbuf, "input="))) { + decode_query(p + 6, buffer); + if (!(q = strstr(pathp, "?"))) /* no query string */ + q = &pathp[strlen(pathp)]; + for (; *p && q < &urlbuf[BUFSIZE]; ++p, ++q) + *q = *p; + if (q >= &urlbuf[BUFSIZE]) + DIE("Query too long"); + } else if (!(p = strstr(inbuf, "trust_cert="))) { + DIE("Invalid POST request: trust_cert missing"); + } + p += sizeof("trust_cert=") - 1; + if (!strncmp(p, "always", 6)) { + /* move to file end */ + fseek(*known_hosts, 0L, SEEK_END); + last_pos = ftell(*known_hosts); + if (!(p = strstr(p, "entry="))) + DIE("Invalid POST request: missing entry"); + p += sizeof("entry=") - 1; + decode_query(p, buffer); + /* replace plus signs */ + p = buffer; + while ((p = strstr(p, "+"))) + *p = ' '; + fwrite(buffer, 1, strlen(buffer), *known_hosts); + fwrite("\n", 1, 1, *known_hosts); + khslen = strlen(khsbuf); + khsbuf[khslen] = '~'; + khsbuf[khslen + 1] = '\0'; + if (!(known_hosts_tmp = fopen(khsbuf, "w+"))) + PDIE("Error opening temporary hosts file"); + rewind(*known_hosts); + *portp = '\0'; + total = 0; + while (fgets(buffer, BUFSIZE, *known_hosts)) { + len = strlen(buffer); + if (!len) + continue; + if ((total += len) > last_pos) { + /* finished */ + fwrite(buffer, 1, len, known_hosts_tmp); + break; + } + if (buffer[len - 1] != '\n') { + /* clean up */ + fclose(known_hosts_tmp); + unlink(khsbuf); + DIE("Line too long"); + } + if (!(p = strstr(buffer, " "))) + DIE("Invalid entry in known_hosts file"); + *p = '\0'; + if (strcmp(buffer, hostp)) { + *p = ' '; + fwrite(buffer, 1, len, known_hosts_tmp); + } + } + *portp = ':'; + memcpy(buffer, khsbuf, BUFSIZE + 1); + buffer[khslen] = '\0'; + fclose(*known_hosts); + fclose(known_hosts_tmp); + if (rename(khsbuf, buffer)) + PDIE("Failed to rename temporary file"); + khsbuf[khslen] = '\0'; + if (!(*known_hosts = fopen(khsbuf, "a+"))) + PDIE("Failed to re-open known hosts file"); + } else if (strncmp(p, "once", 4)) { + DIE("Invalid POST request"); + } +} + +FILE *open_known_hosts(char *khsbuf) +{ + const char *known_hosts_path, *xdg_dir, *home_dir; + char *p; + size_t len; + struct stat s; + FILE *known_hosts; + + known_hosts_path = getenv("GMIFETCH_KNOWN_HOSTS"); + if (!known_hosts_path) { + xdg_dir = getenv("XDG_CONFIG_HOME"); + if ((xdg_dir = getenv("XDG_CONFIG_HOME"))) { + len = strlen(xdg_dir); +#define CONFIG_REL "/gmifetch/known_hosts" + if (len + sizeof(CONFIG_REL) > BUFSIZE) + DIE("Error: config directory path too long"); + memcpy(khsbuf, xdg_dir, len); + memcpy(&khsbuf[len], CONFIG_REL, sizeof(CONFIG_REL)); + } else { + if (!(home_dir = getenv("HOME"))) + home_dir = getpwuid(getuid())->pw_dir; + if (!home_dir) + DIE("Error: failed to get HOME directory"); +#undef CONFIG_REL +#define CONFIG_REL "/.config/gmifetch/known_hosts" + len = strlen(home_dir); + if (len + sizeof(CONFIG_REL) > BUFSIZE) + DIE("Error: home directory path too long"); + memcpy(khsbuf, home_dir, len); + memcpy(&khsbuf[len], CONFIG_REL, sizeof(CONFIG_REL)); + } + } else { + len = strlen(known_hosts_path); + if (len > BUFSIZE) + DIE("Error: known hosts path too long"); + memcpy(khsbuf, known_hosts_path, len); + } + p = khsbuf; + if (*p == '/') + ++p; + for (; *p; ++p) { + if (*p == '/') { + *p = '\0'; + if (stat(khsbuf, &s) == -1) { + if (errno != ENOENT) + PDIE("Error calling stat"); + if (mkdir(khsbuf, 0755) == -1) + PDIE("Error calling mkdir"); + } else if (!S_ISDIR(s.st_mode)) { + if (mkdir(khsbuf, 0755) == -1) + PDIE("Error calling mkdir"); + } + *p = '/'; + } + } + if (!(known_hosts = fopen(khsbuf, "a+"))) + PDIE("Error opening known hosts file"); + return known_hosts; +} + +int main(int argc, const char *argv[]) +{ + const char *input_url, *method, *all_proxy; + char *hostp, *portp, *pathp, *endp, *stored_digestp; + int connect_res; + time_t their_time; + char hashbuf2[EVP_MAX_MD_SIZE * 3 + 1]; + char urlbuf[BUFSIZE + 1], khsbuf[BUFSIZE + 2], linebuf[BUFSIZE + 1]; + FILE *known_hosts; + + if (argc != 2) { + input_url = getenv("QUERY_STRING"); + if (!input_url) + DIE("Usage: gmifetch [url] (or set QUERY_STRING)"); + } else { + input_url = argv[1]; + } + all_proxy = getenv("ALL_PROXY"); +#define PROXY_ERR "gmifetch does not support proxies yet. Please disable" \ + "your proxy for gemini URLs if you wish to proceed anyway." + if (all_proxy && *all_proxy) + DIE(PROXY_ERR); + known_hosts = open_known_hosts(khsbuf); + setup_ssl(); + extract_hostname(input_url, &hostp, &portp, &pathp, &endp, urlbuf); + method = getenv("REQUEST_METHOD"); + if (method && !strcmp(method, "POST")) + read_post(hostp, portp, pathp, urlbuf, khsbuf, &known_hosts); + connect_res = connect(hostp, portp, pathp, endp, linebuf, &stored_digestp, + &their_time, hashbuf2, known_hosts); + if (connect_res == 1) { /* valid certificate */ + /* I really wish this was explicitly mentioned in the + * standard. Something like: + * + * !!!WARNING WARNING WARNING some gemini servers will + * not accept URLs containing the default port number!!! + */ + if (!strncmp(portp, ":1965", 5)) + /* move including null terminator */ + memmove(portp, &portp[5], strlen(&portp[5]) + 1); + BIO_puts(conn, urlbuf); + read_response(urlbuf); + } else if (connect_res == 0) { /* invalid certificate */ + printf(INVALID_CERT_RESPONSE, stored_digestp, hashbuf2, + khsbuf); + } else if (connect_res == -1) { /* no certificate */ + *portp = '\0'; + printf(UNKNOWN_CERT_RESPONSE, khsbuf, hashbuf2, hostp, + hashbuf2, (unsigned long)their_time); + } else { /* -2: updated expiration date */ + *portp = '\0'; + printf(UPDATED_CERT_RESPONSE, khsbuf, + ctime(&their_time), hostp, hashbuf2, + (unsigned long)their_time); + } + BIO_free_all(conn); + exit(0); +} |