about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-01-08 00:37:54 +0100
committerbptato <nincsnevem662@gmail.com>2024-01-08 00:37:54 +0100
commitb0547ba9f48bf402665b89f84b88b80ee58d8824 (patch)
tree39bf645df6ea365f912040255eb136c5deafe2b1
parent10a02b2ae2613b383453ba99318330560e9371ac (diff)
downloadchawan-b0547ba9f48bf402665b89f84b88b80ee58d8824.tar.gz
Add urlenc, urldec; fix a URL encoding bug; improve trans.cgi
* Fix incorrect internal definition of the fragment percent-encode set
* urlenc, urldec: these are simple utility programs mainly for use
  with shell local CGI scripts. (Sadly the printf + xargs solution is
  not portable.)
* Pass libexec directory as an env var to local CGI scripts
* Update trans.cgi to use urldec and add an example for combining
  it with selections
-rw-r--r--Makefile17
-rw-r--r--adapter/tools/urldec.nim6
-rw-r--r--adapter/tools/urlenc.nim41
-rwxr-xr-xbonus/trans.cgi32
-rw-r--r--doc/localcgi.md15
-rw-r--r--src/config/chapath.nim2
-rw-r--r--src/loader/cgi.nim2
-rw-r--r--src/utils/twtstr.nim30
8 files changed, 119 insertions, 26 deletions
diff --git a/Makefile b/Makefile
index ddeb0fa5..58557e06 100644
--- a/Makefile
+++ b/Makefile
@@ -44,7 +44,8 @@ all: $(OUTDIR_BIN)/cha $(OUTDIR_CGI_BIN)/http \
 	$(OUTDIR_CGI_BIN)/gopher $(OUTDIR_LIBEXEC)/gopher2html \
 	$(OUTDIR_CGI_BIN)/cha-finger $(OUTDIR_CGI_BIN)/about \
 	$(OUTDIR_CGI_BIN)/data $(OUTDIR_CGI_BIN)/file $(OUTDIR_CGI_BIN)/ftp \
-	$(OUTDIR_CGI_BIN)/spartan
+	$(OUTDIR_CGI_BIN)/spartan \
+	$(OUTDIR_LIBEXEC)/urldec $(OUTDIR_LIBEXEC)/urlenc
 
 $(OUTDIR_BIN)/cha: lib/libquickjs.a src/*.nim src/**/*.nim src/**/*.c res/* \
 		res/**/* res/map/idna_gen.nim
@@ -130,6 +131,16 @@ $(OUTDIR_CGI_BIN)/gopher: adapter/protocol/gopher.nim adapter/protocol/curlwrap.
 	$(NIMC) $(FLAGS) -d:curlLibName:$(CURLLIBNAME) --nimcache:"$(OBJDIR)/$(TARGET)/gopher" \
 		-o:"$(OUTDIR_CGI_BIN)/gopher" adapter/protocol/gopher.nim
 
+$(OUTDIR_LIBEXEC)/urldec: adapter/tools/urldec.nim src/utils/twtstr.nim
+	@mkdir -p "$(OUTDIR_LIBEXEC)"
+	$(NIMC) $(FLAGS) --nimcache:"$(OBJDIR)/$(TARGET)/urldec" \
+		-o:"$(OUTDIR_LIBEXEC)/urldec" adapter/tools/urldec.nim
+
+$(OUTDIR_LIBEXEC)/urlenc: adapter/tools/urlenc.nim src/utils/twtstr.nim
+	@mkdir -p "$(OUTDIR_LIBEXEC)"
+	$(NIMC) $(FLAGS) --nimcache:"$(OBJDIR)/$(TARGET)/urlenc" \
+		-o:"$(OUTDIR_LIBEXEC)/urlenc" adapter/tools/urlenc.nim
+
 CFLAGS = -fwrapv -g -Wall -O2 -DCONFIG_VERSION=\"$(shell cat lib/quickjs/VERSION)\"
 QJSOBJ = $(OBJDIR)/quickjs
 
@@ -195,6 +206,8 @@ install:
 	install -m755 "$(OUTDIR_CGI_BIN)/gmifetch" $(LIBEXECDIR_CHAWAN)/cgi-bin
 	install -m755 "$(OUTDIR_CGI_BIN)/cha-finger" $(LIBEXECDIR_CHAWAN)/cgi-bin
 	install -m755 "$(OUTDIR_CGI_BIN)/spartan" $(LIBEXECDIR_CHAWAN)/cgi-bin
+	install -m755 "$(OUTDIR_LIBEXEC)/urldec" $(LIBEXECDIR_CHAWAN)/urldec
+	install -m755 "$(OUTDIR_LIBEXEC)/urlenc" $(LIBEXECDIR_CHAWAN)/urlenc
 	if test -d "$(OBJDIR)/man"; then \
 	mkdir -p "$(DESTDIR)$(MANPREFIX5)"; \
 	mkdir -p "$(DESTDIR)$(MANPREFIX1)"; \
@@ -222,6 +235,8 @@ uninstall:
 	rmdir $(LIBEXECDIR_CHAWAN)/cgi-bin || true
 	rm -f $(LIBEXECDIR_CHAWAN)/gopher2html
 	rm -f $(LIBEXECDIR_CHAWAN)/gmi2html
+	rm -f $(LIBEXECDIR_CHAWAN)/urldec
+	rm -f $(LIBEXECDIR_CHAWAN)/urlenc
 	rmdir $(LIBEXECDIR_CHAWAN) || true
 	rm -f "$(DESTDIR)$(MANPREFIX5)/cha-config.5"
 	rm -f "$(DESTDIR)$(MANPREFIX5)/cha-mailcap.5"
diff --git a/adapter/tools/urldec.nim b/adapter/tools/urldec.nim
new file mode 100644
index 00000000..b26fcb68
--- /dev/null
+++ b/adapter/tools/urldec.nim
@@ -0,0 +1,6 @@
+# Percent-decode input received on stdin.
+#TODO a streaming implementation of this could be useful
+
+import utils/twtstr
+
+stdout.write(percentDecode(stdin.readAll()))
diff --git a/adapter/tools/urlenc.nim b/adapter/tools/urlenc.nim
new file mode 100644
index 00000000..79d3e452
--- /dev/null
+++ b/adapter/tools/urlenc.nim
@@ -0,0 +1,41 @@
+# Percent-encode input received on stdin with a specified percent-encoding set.
+#TODO a streaming implementation of this could be useful
+
+import std/os
+
+import utils/twtstr
+
+proc usage() =
+  stderr.write("""
+Usage: urlenc [set]
+The input to be decoded is read from stdin.
+[set] decides which characters are encoded. It can be:
+    control
+    fragment
+    query
+    path
+    userinfo
+    component
+    application-x-www-form-urlencoded
+""")
+
+proc main() =
+  if paramCount() != 1:
+    usage()
+    quit(1)
+  let s = stdin.readAll()
+  let enc = case paramStr(1)
+  of "control": percentEncode(s, ControlPercentEncodeSet)
+  of "fragment": percentEncode(s, FragmentPercentEncodeSet)
+  of "query": percentEncode(s, QueryPercentEncodeSet)
+  of "path": percentEncode(s, PathPercentEncodeSet)
+  of "userinfo": percentEncode(s, UserInfoPercentEncodeSet)
+  of "component": percentEncode(s, ComponentPercentEncodeSet)
+  of "application-x-www-form-urlencoded":
+    percentEncode(s, ApplicationXWWWFormUrlEncodedSet)
+  else:
+    usage()
+    quit(1)
+  stdout.write(enc)
+
+main()
diff --git a/bonus/trans.cgi b/bonus/trans.cgi
index bfaca713..693dec2a 100755
--- a/bonus/trans.cgi
+++ b/bonus/trans.cgi
@@ -1,16 +1,24 @@
 #!/bin/sh
 # Needs https://github.com/soimort/translate-shell to work.
 # Usage: cgi-bin:trans.cgi?word
+# You can also set it as a keybinding (in config.toml):
+#
+# [page]
+# gT = '''
+# async () => {
+#   if (!pager.currentSelection) {
+#     pager.alert("No selection to translate.");
+#     return;
+#   }
+#   const text = await pager.getSelectionText(pager.currentSelection);
+#   pager.cursorToggleSelection();
+#   pager.load(`cgi-bin:trans.cgi?${encodeURIComponent(text)}\n`);
+# }
+# '''
 
-decode() {
-	# URL-decode the string passed as the first parameter
-	printf '%s\n' "$1" | \
-		sed 's/+/ /g;s/%/\\x/g' | \
-		xargs -0 printf "%b"
-}
-
-# QUERY_STRING is URL-encoded. We decode it using the decode() function.
-TEXT="$(decode "$QUERY_STRING")"
+# QUERY_STRING is URL-encoded. We decode it using the urldec utility provided
+# by Chawan.
+TEXT=$(printf '%s\n' "$QUERY_STRING" | "$CHA_LIBEXEC_DIR"/urldec)
 
 # Write a Content-Type HTTP header. The `trans' command outputs plain text,
 # so we use text/plain.
@@ -21,10 +29,10 @@ printf 'Content-Type: text/plain\n'
 printf '\n'
 
 # Check if the `trans' program exists, and if not, die.
-type trans >/dev/null || {
-	printf "ERROR: translator not found"
+if ! type trans >/dev/null
+then	printf "ERROR: translator not found"
 	exit 1
-}
+fi
 
 # Call the `trans' program. It writes its output to standard out, which
 # Chawan's local CGI will read in as the content body.
diff --git a/doc/localcgi.md b/doc/localcgi.md
index 2d15e6af..206a3ef7 100644
--- a/doc/localcgi.md
+++ b/doc/localcgi.md
@@ -118,6 +118,9 @@ Chawan sets the following environment variables:
 * `REQUEST_URI="$SCRIPT_NAME/$PATH_INFO?$QUERY_STRING`
 * `REQUEST_METHOD=` HTTP method used for making the request, e.g. GET or POST
 * `REQUEST_HEADERS=` A newline-separated list of all headers for this request.
+* `CHA_LIBEXEC_DIR=` The libexec directory Chawan was configured to use at
+  compile time. See the [tools](#tools) section below for details of
+  why this is useful.
 * `CONTENT_TYPE=` for POST requests, the Content-Type header. Not set for
   other request types (e.g. GET).
 * `CONTENT_LENGTH=` the content length, if $CONTENT_TYPE has been set.
@@ -165,6 +168,18 @@ Note that this may be both an application/x-www-form-urlencoded or a
 multipart/form-data request; `CONTENT_TYPE` stores information about the
 request type, and in case of a multipart request, the boundary as well.
 
+## Tools
+
+Chawan provides certain helper binaries that may be useful for CGI
+scripts. These can be portably accessed by executing
+`"$CHA_LIBEXEC_DIR"/[program name]`.
+
+Currently, the following tools are available:
+
+* `urldec`: percent-decode strings passed on standard input.
+* `urlenc`: percent-encode strings passed on standard input, taking a
+  percent-encode set as the first parameter.
+
 ## Troubleshooting
 
 Note that standard error is redirected to the browser console (by default,
diff --git a/src/config/chapath.nim b/src/config/chapath.nim
index adf852a0..a2800c6c 100644
--- a/src/config/chapath.nim
+++ b/src/config/chapath.nim
@@ -9,7 +9,7 @@ import js/tojs
 import types/opt
 import utils/twtstr
 
-const libexecPath {.strdefine.} = "${%CHA_BIN_DIR}/../libexec/chawan"
+const libexecPath* {.strdefine.} = "${%CHA_BIN_DIR}/../libexec/chawan"
 
 type ChaPath* = distinct string
 
diff --git a/src/loader/cgi.nim b/src/loader/cgi.nim
index 1da8fc4d..76076b1c 100644
--- a/src/loader/cgi.nim
+++ b/src/loader/cgi.nim
@@ -4,6 +4,7 @@ import std/posix
 import std/streams
 import std/strutils
 
+import config/chapath
 import extern/stdio
 import io/posixstream
 import loader/connecterror
@@ -38,6 +39,7 @@ proc setupEnv(cmd, scriptName, pathInfo, requestURI: string, request: Request,
   putEnv("SCRIPT_FILENAME", cmd)
   putEnv("REQUEST_URI", requestURI)
   putEnv("REQUEST_METHOD", $request.httpMethod)
+  putEnv("CHA_LIBEXEC_DIR", libexecPath)
   var headers = ""
   for k, v in request.headers:
     headers &= k & ": " & v & "\r\n"
diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim
index d7e5a5ba..f1af1998 100644
--- a/src/utils/twtstr.nim
+++ b/src/utils/twtstr.nim
@@ -491,19 +491,25 @@ func isNonCharacter*(r: Rune): bool =
         0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
         0x10FFFE, 0x10FFFF]
 
-const ControlPercentEncodeSet* = (Controls + NonAscii)
-const FragmentPercentEncodeSet* = (Controls + NonAscii)
-const QueryPercentEncodeSet* = (ControlPercentEncodeSet + {' ', '"', '#', '<', '>'})
-const SpecialQueryPercentEncodeSet* = (QueryPercentEncodeSet + {'\''})
-const PathPercentEncodeSet* = (QueryPercentEncodeSet + {'?', '`', '{', '}'})
-const UserInfoPercentEncodeSet* = (PathPercentEncodeSet + {'/', ':', ';', '=', '@', '['..'^', '|'})
-const ComponentPercentEncodeSet* = (UserInfoPercentEncodeSet + {'$'..'&', '+', ','})
-const ApplicationXWWWFormUrlEncodedSet* = (ComponentPercentEncodeSet + {'!', '\''..')', '~'})
-# used by client
-when defined(windows) or defined(OS2) or defined(DOS):
-  const LocalPathPercentEncodeSet* = (Ascii - AsciiAlpha - AsciiDigit - {'.', '\\', '/'})
+const ControlPercentEncodeSet* = Controls + NonAscii
+const FragmentPercentEncodeSet* = ControlPercentEncodeSet +
+  {' ', '"', '<', '>', '`'}
+const QueryPercentEncodeSet* = FragmentPercentEncodeSet - {'`'} + {'#'}
+const SpecialQueryPercentEncodeSet* = QueryPercentEncodeSet + {'\''}
+const PathPercentEncodeSet* = QueryPercentEncodeSet + {'?', '`', '{', '}'}
+const UserInfoPercentEncodeSet* = PathPercentEncodeSet +
+  {'/', ':', ';', '=', '@', '['..'^', '|'}
+const ComponentPercentEncodeSet* = UserInfoPercentEncodeSet +
+  {'$'..'&', '+', ','}
+const ApplicationXWWWFormUrlEncodedSet* = ComponentPercentEncodeSet +
+  {'!', '\''..')', '~'}
+# used by pager
+when DirSep == '\\':
+  const LocalPathPercentEncodeSet* = Ascii - AsciiAlpha - AsciiDigit -
+    {'.', '\\', '/'}
 else:
-  const LocalPathPercentEncodeSet* = (Ascii - AsciiAlpha - AsciiDigit -  {'.', '/'})
+  const LocalPathPercentEncodeSet* = Ascii - AsciiAlpha - AsciiDigit -
+    {'.', '/'}
 
 proc percentEncode*(append: var string, c: char, set: set[char], spaceAsPlus = false) {.inline.} =
   if spaceAsPlus and c == ' ':