about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-08-03 01:14:41 +0200
committerbptato <nincsnevem662@gmail.com>2024-08-03 01:54:35 +0200
commit4c64687290c908cd791a058dede9bd4f2a1c7757 (patch)
tree4e72720aa016320a02d19b4a051b9b9916b714f9
parent270cf870eb84e80f2de1f2be64b682849ca55585 (diff)
downloadchawan-4c64687290c908cd791a058dede9bd4f2a1c7757.tar.gz
loader: move back data URL handling
data URIs can get megabytes long; however, you can only stuff so many
bytes into the envp. (This was thwarting my efforts to view pandoc-
generated standalone HTML in Chawan.) So put `data:' back into the
loader process.
-rw-r--r--Makefile15
-rw-r--r--adapter/protocol/data.nim32
-rw-r--r--doc/architecture.md3
-rw-r--r--doc/mailcap.md9
-rw-r--r--doc/protocols.md22
-rw-r--r--res/urimethodmap1
-rw-r--r--src/loader/loader.nim51
-rw-r--r--src/loader/loaderhandle.nim4
-rw-r--r--src/types/urimethodmap.nim8
9 files changed, 83 insertions, 62 deletions
diff --git a/Makefile b/Makefile
index 349d1456..86179793 100644
--- a/Makefile
+++ b/Makefile
@@ -49,7 +49,7 @@ all: $(OUTDIR_BIN)/cha $(OUTDIR_BIN)/mancha $(OUTDIR_CGI_BIN)/http \
 	$(OUTDIR_CGI_BIN)/gmifetch $(OUTDIR_LIBEXEC)/gmi2html \
 	$(OUTDIR_CGI_BIN)/gopher $(OUTDIR_LIBEXEC)/gopher2html \
 	$(OUTDIR_CGI_BIN)/cha-finger $(OUTDIR_CGI_BIN)/about \
-	$(OUTDIR_CGI_BIN)/data $(OUTDIR_CGI_BIN)/file $(OUTDIR_CGI_BIN)/ftp \
+	$(OUTDIR_CGI_BIN)/file $(OUTDIR_CGI_BIN)/ftp \
 	$(OUTDIR_CGI_BIN)/man $(OUTDIR_CGI_BIN)/spartan \
 	$(OUTDIR_CGI_BIN)/stbi $(OUTDIR_CGI_BIN)/jebp \
 	$(OUTDIR_LIBEXEC)/urldec $(OUTDIR_LIBEXEC)/urlenc \
@@ -96,8 +96,6 @@ $(OUTDIR_CGI_BIN)/man: lib/monoucha/monoucha/jsregex.nim \
 $(OUTDIR_CGI_BIN)/http: adapter/protocol/curlwrap.nim \
 		adapter/protocol/curlerrors.nim adapter/protocol/curl.nim \
 		src/utils/sandbox.nim $(twtstr)
-$(OUTDIR_CGI_BIN)/data: src/types/opt.nim src/utils/map.nim \
-		src/loader/connecterror.nim $(twtstr)
 $(OUTDIR_CGI_BIN)/about: res/chawan.html res/license.md
 $(OUTDIR_CGI_BIN)/file: adapter/protocol/dirlist.nim $(twtstr) \
 		src/utils/strwidth.nim src/loader/connecterror.nim
@@ -164,8 +162,7 @@ manpages = $(manpages1) $(manpages5)
 .PHONY: manpage
 manpage: $(manpages:%=doc/%)
 
-protocols = http about data file ftp gopher gmifetch cha-finger man spartan \
-	stbi jebp
+protocols = http about file ftp gopher gmifetch cha-finger man spartan stbi jebp
 converters = gopher2html md2html ansi2html gmi2html
 tools = urldec urlenc
 
@@ -174,7 +171,7 @@ install:
 	mkdir -p "$(DESTDIR)$(PREFIX)/bin"
 	install -m755 "$(OUTDIR_BIN)/cha" "$(DESTDIR)$(PREFIX)/bin"
 	install -m755 "$(OUTDIR_BIN)/mancha" "$(DESTDIR)$(PREFIX)/bin"
-	@# intentionally not quoted
+# intentionally not quoted
 	mkdir -p $(LIBEXECDIR_CHAWAN)/cgi-bin
 	for f in $(protocols); \
 	do install -m755 "$(OUTDIR_CGI_BIN)/$$f" $(LIBEXECDIR_CHAWAN)/cgi-bin; done
@@ -189,10 +186,12 @@ install:
 uninstall:
 	rm -f "$(DESTDIR)$(PREFIX)/bin/cha"
 	rm -f "$(DESTDIR)$(PREFIX)/bin/mancha"
-	@# intentionally not quoted
+# intentionally not quoted
 	for f in $(protocols); do rm -f $(LIBEXECDIR_CHAWAN)/cgi-bin/$$f; done
-	@# note: png has been removed in favor of stbi.
+# note: png has been removed in favor of stbi.
 	rm -f $(LIBEXECDIR_CHAWAN)/cgi-bin/png
+# note: data has been moved back into the main binary.
+	rm -f $(LIBEXECDIR_CHAWAN)/cgi-bin/data
 	rmdir $(LIBEXECDIR_CHAWAN)/cgi-bin || true
 	for f in $(converters) $(tools); do rm -f $(LIBEXECDIR_CHAWAN)/$$f; done
 	rmdir $(LIBEXECDIR_CHAWAN) || true
diff --git a/adapter/protocol/data.nim b/adapter/protocol/data.nim
deleted file mode 100644
index 72263780..00000000
--- a/adapter/protocol/data.nim
+++ /dev/null
@@ -1,32 +0,0 @@
-when NimMajor >= 2:
-  import std/envvars
-else:
-  import std/os
-import std/strutils
-
-import loader/connecterror
-import types/opt
-import utils/twtstr
-
-proc main() =
-  let str = getEnv("MAPPED_URI_PATH")
-  const iu = $int(ERROR_INVALID_URL)
-  var ct = str.until(',')
-  if AllChars - Ascii + Controls - {'\t', ' '} in ct:
-    stdout.write("Cha-Control: ConnectionError " & iu  & " invalid data URL")
-    return
-  let sd = ct.len + 1 # data start
-  let body = percentDecode(str, sd)
-  if ct.endsWith(";base64"):
-    let d = atob0(body) # decode from ct end + 1
-    if d.isSome:
-      ct.setLen(ct.len - ";base64".len) # remove base64 indicator
-      stdout.write("Content-Type: " & ct & "\n\n")
-      stdout.write(d.get)
-    else:
-      stdout.write("Cha-Control: ConnectionError " & iu  & " invalid data URL")
-  else:
-    stdout.write("Content-Type: " & ct & "\n\n")
-    stdout.write(body)
-
-main()
diff --git a/doc/architecture.md b/doc/architecture.md
index ae21e181..615fd304 100644
--- a/doc/architecture.md
+++ b/doc/architecture.md
@@ -111,6 +111,9 @@ following steps:
   "view source" operation, and by buffers in the rare situation where their
   initial character encoding guess proves to be incorrect and they need to
   rewind the source.
+* `data:` Decode a data URL. This is done directly in the loader process
+  because very long data URLs wouldn't fit into the environment. (Plus,
+  obviously, it's more efficient this way.)
 
 The loader process distinguishes between clients (i.e the main process or
 buffers) through client keys. In theory this should help against rogue clients,
diff --git a/doc/mailcap.md b/doc/mailcap.md
index 9786b219..fd10b7b5 100644
--- a/doc/mailcap.md
+++ b/doc/mailcap.md
@@ -123,12 +123,17 @@ audio/*; mpv -; needsterminal
 video/*; mpv -
 
 # Open docx files using LibreOffice Writer.
-application/vnd.openxmlformats-officedocument.wordprocessingml.document;lowriter %s
+application/vnd.openxmlformats-officedocument.wordprocessingml.document; lowriter %s
 # (Wow that was ugly.)
 
 # Display manpages using pandoc. (Make sure the mime type matches the one
 # set in your mime.types file for extensions .1, .2, .3, ...)
-application/x-troff-man;pandoc - -f man -t html -o -; x-htmloutput
+application/x-troff-man; pandoc - -f man -t html -o -; x-htmloutput
+
+# epub -> HTML using pandoc. (Again, don't forget to adjust mime.types.)
+# We set http_proxy to keep it from downloading whatever through http/s.
+application/epub+zip; http_proxy=localhost:0 pandoc - -f epub \
+--embed-resources --standalone; x-htmloutput
 
 # Following entry will be ignored, as text/html is supported natively by Chawan.
 text/html; cha -dT text/html -I %{charset}; copiousoutput
diff --git a/doc/protocols.md b/doc/protocols.md
index 2d591bc2..f0f01b65 100644
--- a/doc/protocols.md
+++ b/doc/protocols.md
@@ -18,8 +18,8 @@ this document.
 * [Gemini](#gemini)
 * [Finger](#finger)
 * [Spartan](#spartan)
-* [Local schemes: file:, about:, man:, data:](#local-schemes-file-about-man-data)
-* [Internal schemes: cgi-bin:, stream:, cache:](#internal-schemes-cgi-bin-stream-cache)
+* [Local schemes: file:, about:, man:](#local-schemes-file-about-man-data)
+* [Internal schemes: cgi-bin:, stream:, cache:, data:](#internal-schemes-cgi-bin-stream-cache-data)
 * [Custom protocols](#custom-protocols)
 
 <!-- MANON -->
@@ -110,7 +110,7 @@ protocol-specific line type. This is sort of supported through a sed filter
 for gemtext outputs in the CGI script (in other words, no modification to
 gmi2html was done to support this).
 
-## Local schemes: file:, about:, man:, data:
+## Local schemes: file:, about:, man:
 
 While these are not necessarily *protocols*, they are implemented similarly
 to the protocols listed above (and thus can also be replaced, if the user
@@ -129,14 +129,12 @@ references into links. A wrapper command `mancha` also exists; this has an
 interface similar to `man`. Note: this used to be based on w3mman2html.cgi, but
 it has been rewritten in Nim (and therefore no longer depends on Perl either).
 
-`data:` decodes a data URL as defined in RFC 2397.
+## Internal schemes: cgi-bin:, stream:, cache:, data:
 
-## Internal schemes: cgi-bin:, stream:, cache:
-
-Three internal protocols exist: `cgi-bin:`, `stream:` and `cache:`. These are
-the basic building blocks for the implementation of every protocol mentioned
-above; for this reason, these can *not* be replaced, and are implemented in
-the main browser binary.
+Four internal protocols exist: `cgi-bin:`, `stream:`, `cache:` and `data:`.
+These are the basic building blocks for the implementation of every protocol
+mentioned above; for this reason, these can *not* be replaced, and are
+implemented in the main browser binary.
 
 `cgi-bin:` executes a local CGI script. This scheme is used for the actual
 implementation of the non-internal protocols mentioned above. Local CGI scripts
@@ -160,6 +158,10 @@ real cache; files are deterministically loaded from the "cache" upon certain
 actions, and from the network upon others, but neither is used as a fallback
 to the other.
 
+`data:` decodes a data URL as defined in RFC 2397. This used to be a CGI module,
+but has been moved back into the loader process because these URLs can get
+so long that they no longer fit into the environment.
+
 ## Custom protocols
 
 Chawan is protocol-agnostic. This means that the `cha` binary itself does not
diff --git a/res/urimethodmap b/res/urimethodmap
index 40e97bd7..21e01546 100644
--- a/res/urimethodmap
+++ b/res/urimethodmap
@@ -5,7 +5,6 @@ https:			cgi-bin:http
 finger:			cgi-bin:cha-finger
 gemini:			cgi-bin:gmifetch
 about:			cgi-bin:about
-data:			cgi-bin:data
 file:			cgi-bin:file
 ftp:			cgi-bin:ftp
 sftp:			cgi-bin:ftp
diff --git a/src/loader/loader.nim b/src/loader/loader.nim
index fec07d99..91212e24 100644
--- a/src/loader/loader.nim
+++ b/src/loader/loader.nim
@@ -407,6 +407,49 @@ proc loadFromCache(ctx: LoaderContext; client: ClientData; handle: LoaderHandle;
   else:
     handle.sendResult(ERROR_URL_NOT_IN_CACHE)
 
+# Data URL handler.
+# Moved back into loader from CGI, because data URLs can get extremely long
+# and thus no longer fit into the environment.
+proc loadDataSend(ctx: LoaderContext; handle: LoaderHandle; s, ct: string) =
+  handle.sendResult(0)
+  handle.sendStatus(200)
+  handle.sendHeaders(newHeaders({"Content-Type": ct}))
+  let buffer = newLoaderBuffer(size = s.len)
+  buffer.len = s.len
+  copyMem(buffer.page, unsafeAddr s[0], s.len)
+  let output = handle.output
+  case ctx.pushBuffer(output, buffer, 0)
+  of pbrUnregister:
+    if output.registered:
+      ctx.unregister(output)
+    output.oclose()
+  of pbrDone:
+    if output.registered or output.suspended:
+      output.istreamAtEnd = true
+      ctx.outputMap[output.ostream.fd] = output
+    else:
+      output.oclose()
+
+proc loadData(ctx: LoaderContext; handle: LoaderHandle; request: Request) =
+  let url = request.url
+  var ct = url.path.s.until(',')
+  if AllChars - Ascii + Controls - {'\t', ' '} in ct:
+    handle.sendResult(ERROR_INVALID_URL, "invalid data URL")
+    handle.close()
+    return
+  let sd = ct.len + 1 # data start
+  let body = percentDecode(url.path.s, sd)
+  if ct.endsWith(";base64"):
+    let d = atob0(body) # decode from ct end + 1
+    if d.isNone:
+      handle.sendResult(ERROR_INVALID_URL, "invalid data URL")
+      handle.close()
+      return
+    ct.setLen(ct.len - ";base64".len) # remove base64 indicator
+    ctx.loadDataSend(handle, d.get, ct)
+  else:
+    ctx.loadDataSend(handle, body, ct)
+
 proc loadResource(ctx: LoaderContext; client: ClientData;
     config: LoaderClientConfig; request: Request; handle: LoaderHandle) =
   var redo = true
@@ -452,15 +495,17 @@ proc loadResource(ctx: LoaderContext; client: ClientData;
       ctx.loadFromCache(client, handle, request)
       assert handle.istream == nil
       handle.close()
+    elif request.url.scheme == "data":
+      ctx.loadData(handle, request)
     else:
       prevurl = request.url
       case ctx.config.uriMethodMap.findAndRewrite(request.url)
-      of URI_RESULT_SUCCESS:
+      of ummrSuccess:
         inc tries
         redo = true
-      of URI_RESULT_WRONG_URL:
+      of ummrWrongURL:
         handle.rejectHandle(ERROR_INVALID_URI_METHOD_ENTRY)
-      of URI_RESULT_NOT_FOUND:
+      of ummrNotFound:
         handle.rejectHandle(ERROR_UNKNOWN_SCHEME)
   if tries >= MaxRewrites:
     handle.rejectHandle(ERROR_TOO_MANY_REWRITES)
diff --git a/src/loader/loaderhandle.nim b/src/loader/loaderhandle.nim
index aa3a32d4..cb05efa1 100644
--- a/src/loader/loaderhandle.nim
+++ b/src/loader/loaderhandle.nim
@@ -93,9 +93,9 @@ func cap*(buffer: LoaderBuffer): int {.inline.} =
 template isEmpty*(output: OutputHandle): bool =
   output.currentBuffer == nil and not output.suspended
 
-proc newLoaderBuffer*(): LoaderBuffer =
+proc newLoaderBuffer*(size = LoaderBufferPageSize): LoaderBuffer =
   return LoaderBuffer(
-    page: cast[ptr UncheckedArray[uint8]](alloc(LoaderBufferPageSize)),
+    page: cast[ptr UncheckedArray[uint8]](alloc(size)),
     len: 0
   )
 
diff --git a/src/types/urimethodmap.nim b/src/types/urimethodmap.nim
index 4cb5b9ae..81876c26 100644
--- a/src/types/urimethodmap.nim
+++ b/src/types/urimethodmap.nim
@@ -32,7 +32,7 @@ func rewriteURL(pattern, surl: string): string =
     result &= '%'
 
 type URIMethodMapResult* = enum
-  URI_RESULT_NOT_FOUND, URI_RESULT_SUCCESS, URI_RESULT_WRONG_URL
+  ummrNotFound, ummrSuccess, ummrWrongURL
 
 proc findAndRewrite*(this: URIMethodMap; url: var URL): URIMethodMapResult =
   let protocol = url.protocol
@@ -40,10 +40,10 @@ proc findAndRewrite*(this: URIMethodMap; url: var URL): URIMethodMapResult =
     let surl = this.map[protocol].rewriteURL($url)
     let x = newURL(surl)
     if x.isNone:
-      return URI_RESULT_WRONG_URL
+      return ummrWrongURL
     url = x.get
-    return URI_RESULT_SUCCESS
-  return URI_RESULT_NOT_FOUND
+    return ummrSuccess
+  return ummrNotFound
 
 proc insert(this: var URIMethodMap; k, v: string) =
   if not this.map.hasKeyOrPut(k, v) and k.startsWith("img-codec+"):