diff options
author | bptato <nincsnevem662@gmail.com> | 2024-01-08 00:37:54 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-01-08 00:37:54 +0100 |
commit | b0547ba9f48bf402665b89f84b88b80ee58d8824 (patch) | |
tree | 39bf645df6ea365f912040255eb136c5deafe2b1 | |
parent | 10a02b2ae2613b383453ba99318330560e9371ac (diff) | |
download | chawan-b0547ba9f48bf402665b89f84b88b80ee58d8824.tar.gz |
Add urlenc, urldec; fix a URL encoding bug; improve trans.cgi
* Fix incorrect internal definition of the fragment percent-encode set * urlenc, urldec: these are simple utility programs mainly for use with shell local CGI scripts. (Sadly the printf + xargs solution is not portable.) * Pass libexec directory as an env var to local CGI scripts * Update trans.cgi to use urldec and add an example for combining it with selections
-rw-r--r-- | Makefile | 17 | ||||
-rw-r--r-- | adapter/tools/urldec.nim | 6 | ||||
-rw-r--r-- | adapter/tools/urlenc.nim | 41 | ||||
-rwxr-xr-x | bonus/trans.cgi | 32 | ||||
-rw-r--r-- | doc/localcgi.md | 15 | ||||
-rw-r--r-- | src/config/chapath.nim | 2 | ||||
-rw-r--r-- | src/loader/cgi.nim | 2 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 30 |
8 files changed, 119 insertions, 26 deletions
diff --git a/Makefile b/Makefile index ddeb0fa5..58557e06 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,8 @@ all: $(OUTDIR_BIN)/cha $(OUTDIR_CGI_BIN)/http \ $(OUTDIR_CGI_BIN)/gopher $(OUTDIR_LIBEXEC)/gopher2html \ $(OUTDIR_CGI_BIN)/cha-finger $(OUTDIR_CGI_BIN)/about \ $(OUTDIR_CGI_BIN)/data $(OUTDIR_CGI_BIN)/file $(OUTDIR_CGI_BIN)/ftp \ - $(OUTDIR_CGI_BIN)/spartan + $(OUTDIR_CGI_BIN)/spartan \ + $(OUTDIR_LIBEXEC)/urldec $(OUTDIR_LIBEXEC)/urlenc $(OUTDIR_BIN)/cha: lib/libquickjs.a src/*.nim src/**/*.nim src/**/*.c res/* \ res/**/* res/map/idna_gen.nim @@ -130,6 +131,16 @@ $(OUTDIR_CGI_BIN)/gopher: adapter/protocol/gopher.nim adapter/protocol/curlwrap. $(NIMC) $(FLAGS) -d:curlLibName:$(CURLLIBNAME) --nimcache:"$(OBJDIR)/$(TARGET)/gopher" \ -o:"$(OUTDIR_CGI_BIN)/gopher" adapter/protocol/gopher.nim +$(OUTDIR_LIBEXEC)/urldec: adapter/tools/urldec.nim src/utils/twtstr.nim + @mkdir -p "$(OUTDIR_LIBEXEC)" + $(NIMC) $(FLAGS) --nimcache:"$(OBJDIR)/$(TARGET)/urldec" \ + -o:"$(OUTDIR_LIBEXEC)/urldec" adapter/tools/urldec.nim + +$(OUTDIR_LIBEXEC)/urlenc: adapter/tools/urlenc.nim src/utils/twtstr.nim + @mkdir -p "$(OUTDIR_LIBEXEC)" + $(NIMC) $(FLAGS) --nimcache:"$(OBJDIR)/$(TARGET)/urlenc" \ + -o:"$(OUTDIR_LIBEXEC)/urlenc" adapter/tools/urlenc.nim + CFLAGS = -fwrapv -g -Wall -O2 -DCONFIG_VERSION=\"$(shell cat lib/quickjs/VERSION)\" QJSOBJ = $(OBJDIR)/quickjs @@ -195,6 +206,8 @@ install: install -m755 "$(OUTDIR_CGI_BIN)/gmifetch" $(LIBEXECDIR_CHAWAN)/cgi-bin install -m755 "$(OUTDIR_CGI_BIN)/cha-finger" $(LIBEXECDIR_CHAWAN)/cgi-bin install -m755 "$(OUTDIR_CGI_BIN)/spartan" $(LIBEXECDIR_CHAWAN)/cgi-bin + install -m755 "$(OUTDIR_LIBEXEC)/urldec" $(LIBEXECDIR_CHAWAN)/urldec + install -m755 "$(OUTDIR_LIBEXEC)/urlenc" $(LIBEXECDIR_CHAWAN)/urlenc if test -d "$(OBJDIR)/man"; then \ mkdir -p "$(DESTDIR)$(MANPREFIX5)"; \ mkdir -p "$(DESTDIR)$(MANPREFIX1)"; \ @@ -222,6 +235,8 @@ uninstall: rmdir $(LIBEXECDIR_CHAWAN)/cgi-bin || true rm -f $(LIBEXECDIR_CHAWAN)/gopher2html rm -f $(LIBEXECDIR_CHAWAN)/gmi2html + rm -f $(LIBEXECDIR_CHAWAN)/urldec + rm -f $(LIBEXECDIR_CHAWAN)/urlenc rmdir $(LIBEXECDIR_CHAWAN) || true rm -f "$(DESTDIR)$(MANPREFIX5)/cha-config.5" rm -f "$(DESTDIR)$(MANPREFIX5)/cha-mailcap.5" diff --git a/adapter/tools/urldec.nim b/adapter/tools/urldec.nim new file mode 100644 index 00000000..b26fcb68 --- /dev/null +++ b/adapter/tools/urldec.nim @@ -0,0 +1,6 @@ +# Percent-decode input received on stdin. +#TODO a streaming implementation of this could be useful + +import utils/twtstr + +stdout.write(percentDecode(stdin.readAll())) diff --git a/adapter/tools/urlenc.nim b/adapter/tools/urlenc.nim new file mode 100644 index 00000000..79d3e452 --- /dev/null +++ b/adapter/tools/urlenc.nim @@ -0,0 +1,41 @@ +# Percent-encode input received on stdin with a specified percent-encoding set. +#TODO a streaming implementation of this could be useful + +import std/os + +import utils/twtstr + +proc usage() = + stderr.write(""" +Usage: urlenc [set] +The input to be decoded is read from stdin. +[set] decides which characters are encoded. It can be: + control + fragment + query + path + userinfo + component + application-x-www-form-urlencoded +""") + +proc main() = + if paramCount() != 1: + usage() + quit(1) + let s = stdin.readAll() + let enc = case paramStr(1) + of "control": percentEncode(s, ControlPercentEncodeSet) + of "fragment": percentEncode(s, FragmentPercentEncodeSet) + of "query": percentEncode(s, QueryPercentEncodeSet) + of "path": percentEncode(s, PathPercentEncodeSet) + of "userinfo": percentEncode(s, UserInfoPercentEncodeSet) + of "component": percentEncode(s, ComponentPercentEncodeSet) + of "application-x-www-form-urlencoded": + percentEncode(s, ApplicationXWWWFormUrlEncodedSet) + else: + usage() + quit(1) + stdout.write(enc) + +main() diff --git a/bonus/trans.cgi b/bonus/trans.cgi index bfaca713..693dec2a 100755 --- a/bonus/trans.cgi +++ b/bonus/trans.cgi @@ -1,16 +1,24 @@ #!/bin/sh # Needs https://github.com/soimort/translate-shell to work. # Usage: cgi-bin:trans.cgi?word +# You can also set it as a keybinding (in config.toml): +# +# [page] +# gT = ''' +# async () => { +# if (!pager.currentSelection) { +# pager.alert("No selection to translate."); +# return; +# } +# const text = await pager.getSelectionText(pager.currentSelection); +# pager.cursorToggleSelection(); +# pager.load(`cgi-bin:trans.cgi?${encodeURIComponent(text)}\n`); +# } +# ''' -decode() { - # URL-decode the string passed as the first parameter - printf '%s\n' "$1" | \ - sed 's/+/ /g;s/%/\\x/g' | \ - xargs -0 printf "%b" -} - -# QUERY_STRING is URL-encoded. We decode it using the decode() function. -TEXT="$(decode "$QUERY_STRING")" +# QUERY_STRING is URL-encoded. We decode it using the urldec utility provided +# by Chawan. +TEXT=$(printf '%s\n' "$QUERY_STRING" | "$CHA_LIBEXEC_DIR"/urldec) # Write a Content-Type HTTP header. The `trans' command outputs plain text, # so we use text/plain. @@ -21,10 +29,10 @@ printf 'Content-Type: text/plain\n' printf '\n' # Check if the `trans' program exists, and if not, die. -type trans >/dev/null || { - printf "ERROR: translator not found" +if ! type trans >/dev/null +then printf "ERROR: translator not found" exit 1 -} +fi # Call the `trans' program. It writes its output to standard out, which # Chawan's local CGI will read in as the content body. diff --git a/doc/localcgi.md b/doc/localcgi.md index 2d15e6af..206a3ef7 100644 --- a/doc/localcgi.md +++ b/doc/localcgi.md @@ -118,6 +118,9 @@ Chawan sets the following environment variables: * `REQUEST_URI="$SCRIPT_NAME/$PATH_INFO?$QUERY_STRING` * `REQUEST_METHOD=` HTTP method used for making the request, e.g. GET or POST * `REQUEST_HEADERS=` A newline-separated list of all headers for this request. +* `CHA_LIBEXEC_DIR=` The libexec directory Chawan was configured to use at + compile time. See the [tools](#tools) section below for details of + why this is useful. * `CONTENT_TYPE=` for POST requests, the Content-Type header. Not set for other request types (e.g. GET). * `CONTENT_LENGTH=` the content length, if $CONTENT_TYPE has been set. @@ -165,6 +168,18 @@ Note that this may be both an application/x-www-form-urlencoded or a multipart/form-data request; `CONTENT_TYPE` stores information about the request type, and in case of a multipart request, the boundary as well. +## Tools + +Chawan provides certain helper binaries that may be useful for CGI +scripts. These can be portably accessed by executing +`"$CHA_LIBEXEC_DIR"/[program name]`. + +Currently, the following tools are available: + +* `urldec`: percent-decode strings passed on standard input. +* `urlenc`: percent-encode strings passed on standard input, taking a + percent-encode set as the first parameter. + ## Troubleshooting Note that standard error is redirected to the browser console (by default, diff --git a/src/config/chapath.nim b/src/config/chapath.nim index adf852a0..a2800c6c 100644 --- a/src/config/chapath.nim +++ b/src/config/chapath.nim @@ -9,7 +9,7 @@ import js/tojs import types/opt import utils/twtstr -const libexecPath {.strdefine.} = "${%CHA_BIN_DIR}/../libexec/chawan" +const libexecPath* {.strdefine.} = "${%CHA_BIN_DIR}/../libexec/chawan" type ChaPath* = distinct string diff --git a/src/loader/cgi.nim b/src/loader/cgi.nim index 1da8fc4d..76076b1c 100644 --- a/src/loader/cgi.nim +++ b/src/loader/cgi.nim @@ -4,6 +4,7 @@ import std/posix import std/streams import std/strutils +import config/chapath import extern/stdio import io/posixstream import loader/connecterror @@ -38,6 +39,7 @@ proc setupEnv(cmd, scriptName, pathInfo, requestURI: string, request: Request, putEnv("SCRIPT_FILENAME", cmd) putEnv("REQUEST_URI", requestURI) putEnv("REQUEST_METHOD", $request.httpMethod) + putEnv("CHA_LIBEXEC_DIR", libexecPath) var headers = "" for k, v in request.headers: headers &= k & ": " & v & "\r\n" diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index d7e5a5ba..f1af1998 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -491,19 +491,25 @@ func isNonCharacter*(r: Rune): bool = 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF] -const ControlPercentEncodeSet* = (Controls + NonAscii) -const FragmentPercentEncodeSet* = (Controls + NonAscii) -const QueryPercentEncodeSet* = (ControlPercentEncodeSet + {' ', '"', '#', '<', '>'}) -const SpecialQueryPercentEncodeSet* = (QueryPercentEncodeSet + {'\''}) -const PathPercentEncodeSet* = (QueryPercentEncodeSet + {'?', '`', '{', '}'}) -const UserInfoPercentEncodeSet* = (PathPercentEncodeSet + {'/', ':', ';', '=', '@', '['..'^', '|'}) -const ComponentPercentEncodeSet* = (UserInfoPercentEncodeSet + {'$'..'&', '+', ','}) -const ApplicationXWWWFormUrlEncodedSet* = (ComponentPercentEncodeSet + {'!', '\''..')', '~'}) -# used by client -when defined(windows) or defined(OS2) or defined(DOS): - const LocalPathPercentEncodeSet* = (Ascii - AsciiAlpha - AsciiDigit - {'.', '\\', '/'}) +const ControlPercentEncodeSet* = Controls + NonAscii +const FragmentPercentEncodeSet* = ControlPercentEncodeSet + + {' ', '"', '<', '>', '`'} +const QueryPercentEncodeSet* = FragmentPercentEncodeSet - {'`'} + {'#'} +const SpecialQueryPercentEncodeSet* = QueryPercentEncodeSet + {'\''} +const PathPercentEncodeSet* = QueryPercentEncodeSet + {'?', '`', '{', '}'} +const UserInfoPercentEncodeSet* = PathPercentEncodeSet + + {'/', ':', ';', '=', '@', '['..'^', '|'} +const ComponentPercentEncodeSet* = UserInfoPercentEncodeSet + + {'$'..'&', '+', ','} +const ApplicationXWWWFormUrlEncodedSet* = ComponentPercentEncodeSet + + {'!', '\''..')', '~'} +# used by pager +when DirSep == '\\': + const LocalPathPercentEncodeSet* = Ascii - AsciiAlpha - AsciiDigit - + {'.', '\\', '/'} else: - const LocalPathPercentEncodeSet* = (Ascii - AsciiAlpha - AsciiDigit - {'.', '/'}) + const LocalPathPercentEncodeSet* = Ascii - AsciiAlpha - AsciiDigit - + {'.', '/'} proc percentEncode*(append: var string, c: char, set: set[char], spaceAsPlus = false) {.inline.} = if spaceAsPlus and c == ' ': |