diff options
author | bptato <nincsnevem662@gmail.com> | 2024-08-03 01:14:41 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-08-03 01:54:35 +0200 |
commit | 4c64687290c908cd791a058dede9bd4f2a1c7757 (patch) | |
tree | 4e72720aa016320a02d19b4a051b9b9916b714f9 | |
parent | 270cf870eb84e80f2de1f2be64b682849ca55585 (diff) | |
download | chawan-4c64687290c908cd791a058dede9bd4f2a1c7757.tar.gz |
loader: move back data URL handling
data URIs can get megabytes long; however, you can only stuff so many bytes into the envp. (This was thwarting my efforts to view pandoc- generated standalone HTML in Chawan.) So put `data:' back into the loader process.
-rw-r--r-- | Makefile | 15 | ||||
-rw-r--r-- | adapter/protocol/data.nim | 32 | ||||
-rw-r--r-- | doc/architecture.md | 3 | ||||
-rw-r--r-- | doc/mailcap.md | 9 | ||||
-rw-r--r-- | doc/protocols.md | 22 | ||||
-rw-r--r-- | res/urimethodmap | 1 | ||||
-rw-r--r-- | src/loader/loader.nim | 51 | ||||
-rw-r--r-- | src/loader/loaderhandle.nim | 4 | ||||
-rw-r--r-- | src/types/urimethodmap.nim | 8 |
9 files changed, 83 insertions, 62 deletions
diff --git a/Makefile b/Makefile index 349d1456..86179793 100644 --- a/Makefile +++ b/Makefile @@ -49,7 +49,7 @@ all: $(OUTDIR_BIN)/cha $(OUTDIR_BIN)/mancha $(OUTDIR_CGI_BIN)/http \ $(OUTDIR_CGI_BIN)/gmifetch $(OUTDIR_LIBEXEC)/gmi2html \ $(OUTDIR_CGI_BIN)/gopher $(OUTDIR_LIBEXEC)/gopher2html \ $(OUTDIR_CGI_BIN)/cha-finger $(OUTDIR_CGI_BIN)/about \ - $(OUTDIR_CGI_BIN)/data $(OUTDIR_CGI_BIN)/file $(OUTDIR_CGI_BIN)/ftp \ + $(OUTDIR_CGI_BIN)/file $(OUTDIR_CGI_BIN)/ftp \ $(OUTDIR_CGI_BIN)/man $(OUTDIR_CGI_BIN)/spartan \ $(OUTDIR_CGI_BIN)/stbi $(OUTDIR_CGI_BIN)/jebp \ $(OUTDIR_LIBEXEC)/urldec $(OUTDIR_LIBEXEC)/urlenc \ @@ -96,8 +96,6 @@ $(OUTDIR_CGI_BIN)/man: lib/monoucha/monoucha/jsregex.nim \ $(OUTDIR_CGI_BIN)/http: adapter/protocol/curlwrap.nim \ adapter/protocol/curlerrors.nim adapter/protocol/curl.nim \ src/utils/sandbox.nim $(twtstr) -$(OUTDIR_CGI_BIN)/data: src/types/opt.nim src/utils/map.nim \ - src/loader/connecterror.nim $(twtstr) $(OUTDIR_CGI_BIN)/about: res/chawan.html res/license.md $(OUTDIR_CGI_BIN)/file: adapter/protocol/dirlist.nim $(twtstr) \ src/utils/strwidth.nim src/loader/connecterror.nim @@ -164,8 +162,7 @@ manpages = $(manpages1) $(manpages5) .PHONY: manpage manpage: $(manpages:%=doc/%) -protocols = http about data file ftp gopher gmifetch cha-finger man spartan \ - stbi jebp +protocols = http about file ftp gopher gmifetch cha-finger man spartan stbi jebp converters = gopher2html md2html ansi2html gmi2html tools = urldec urlenc @@ -174,7 +171,7 @@ install: mkdir -p "$(DESTDIR)$(PREFIX)/bin" install -m755 "$(OUTDIR_BIN)/cha" "$(DESTDIR)$(PREFIX)/bin" install -m755 "$(OUTDIR_BIN)/mancha" "$(DESTDIR)$(PREFIX)/bin" - @# intentionally not quoted +# intentionally not quoted mkdir -p $(LIBEXECDIR_CHAWAN)/cgi-bin for f in $(protocols); \ do install -m755 "$(OUTDIR_CGI_BIN)/$$f" $(LIBEXECDIR_CHAWAN)/cgi-bin; done @@ -189,10 +186,12 @@ install: uninstall: rm -f "$(DESTDIR)$(PREFIX)/bin/cha" rm -f "$(DESTDIR)$(PREFIX)/bin/mancha" - @# intentionally not quoted +# intentionally not quoted for f in $(protocols); do rm -f $(LIBEXECDIR_CHAWAN)/cgi-bin/$$f; done - @# note: png has been removed in favor of stbi. +# note: png has been removed in favor of stbi. rm -f $(LIBEXECDIR_CHAWAN)/cgi-bin/png +# note: data has been moved back into the main binary. + rm -f $(LIBEXECDIR_CHAWAN)/cgi-bin/data rmdir $(LIBEXECDIR_CHAWAN)/cgi-bin || true for f in $(converters) $(tools); do rm -f $(LIBEXECDIR_CHAWAN)/$$f; done rmdir $(LIBEXECDIR_CHAWAN) || true diff --git a/adapter/protocol/data.nim b/adapter/protocol/data.nim deleted file mode 100644 index 72263780..00000000 --- a/adapter/protocol/data.nim +++ /dev/null @@ -1,32 +0,0 @@ -when NimMajor >= 2: - import std/envvars -else: - import std/os -import std/strutils - -import loader/connecterror -import types/opt -import utils/twtstr - -proc main() = - let str = getEnv("MAPPED_URI_PATH") - const iu = $int(ERROR_INVALID_URL) - var ct = str.until(',') - if AllChars - Ascii + Controls - {'\t', ' '} in ct: - stdout.write("Cha-Control: ConnectionError " & iu & " invalid data URL") - return - let sd = ct.len + 1 # data start - let body = percentDecode(str, sd) - if ct.endsWith(";base64"): - let d = atob0(body) # decode from ct end + 1 - if d.isSome: - ct.setLen(ct.len - ";base64".len) # remove base64 indicator - stdout.write("Content-Type: " & ct & "\n\n") - stdout.write(d.get) - else: - stdout.write("Cha-Control: ConnectionError " & iu & " invalid data URL") - else: - stdout.write("Content-Type: " & ct & "\n\n") - stdout.write(body) - -main() diff --git a/doc/architecture.md b/doc/architecture.md index ae21e181..615fd304 100644 --- a/doc/architecture.md +++ b/doc/architecture.md @@ -111,6 +111,9 @@ following steps: "view source" operation, and by buffers in the rare situation where their initial character encoding guess proves to be incorrect and they need to rewind the source. +* `data:` Decode a data URL. This is done directly in the loader process + because very long data URLs wouldn't fit into the environment. (Plus, + obviously, it's more efficient this way.) The loader process distinguishes between clients (i.e the main process or buffers) through client keys. In theory this should help against rogue clients, diff --git a/doc/mailcap.md b/doc/mailcap.md index 9786b219..fd10b7b5 100644 --- a/doc/mailcap.md +++ b/doc/mailcap.md @@ -123,12 +123,17 @@ audio/*; mpv -; needsterminal video/*; mpv - # Open docx files using LibreOffice Writer. -application/vnd.openxmlformats-officedocument.wordprocessingml.document;lowriter %s +application/vnd.openxmlformats-officedocument.wordprocessingml.document; lowriter %s # (Wow that was ugly.) # Display manpages using pandoc. (Make sure the mime type matches the one # set in your mime.types file for extensions .1, .2, .3, ...) -application/x-troff-man;pandoc - -f man -t html -o -; x-htmloutput +application/x-troff-man; pandoc - -f man -t html -o -; x-htmloutput + +# epub -> HTML using pandoc. (Again, don't forget to adjust mime.types.) +# We set http_proxy to keep it from downloading whatever through http/s. +application/epub+zip; http_proxy=localhost:0 pandoc - -f epub \ +--embed-resources --standalone; x-htmloutput # Following entry will be ignored, as text/html is supported natively by Chawan. text/html; cha -dT text/html -I %{charset}; copiousoutput diff --git a/doc/protocols.md b/doc/protocols.md index 2d591bc2..f0f01b65 100644 --- a/doc/protocols.md +++ b/doc/protocols.md @@ -18,8 +18,8 @@ this document. * [Gemini](#gemini) * [Finger](#finger) * [Spartan](#spartan) -* [Local schemes: file:, about:, man:, data:](#local-schemes-file-about-man-data) -* [Internal schemes: cgi-bin:, stream:, cache:](#internal-schemes-cgi-bin-stream-cache) +* [Local schemes: file:, about:, man:](#local-schemes-file-about-man-data) +* [Internal schemes: cgi-bin:, stream:, cache:, data:](#internal-schemes-cgi-bin-stream-cache-data) * [Custom protocols](#custom-protocols) <!-- MANON --> @@ -110,7 +110,7 @@ protocol-specific line type. This is sort of supported through a sed filter for gemtext outputs in the CGI script (in other words, no modification to gmi2html was done to support this). -## Local schemes: file:, about:, man:, data: +## Local schemes: file:, about:, man: While these are not necessarily *protocols*, they are implemented similarly to the protocols listed above (and thus can also be replaced, if the user @@ -129,14 +129,12 @@ references into links. A wrapper command `mancha` also exists; this has an interface similar to `man`. Note: this used to be based on w3mman2html.cgi, but it has been rewritten in Nim (and therefore no longer depends on Perl either). -`data:` decodes a data URL as defined in RFC 2397. +## Internal schemes: cgi-bin:, stream:, cache:, data: -## Internal schemes: cgi-bin:, stream:, cache: - -Three internal protocols exist: `cgi-bin:`, `stream:` and `cache:`. These are -the basic building blocks for the implementation of every protocol mentioned -above; for this reason, these can *not* be replaced, and are implemented in -the main browser binary. +Four internal protocols exist: `cgi-bin:`, `stream:`, `cache:` and `data:`. +These are the basic building blocks for the implementation of every protocol +mentioned above; for this reason, these can *not* be replaced, and are +implemented in the main browser binary. `cgi-bin:` executes a local CGI script. This scheme is used for the actual implementation of the non-internal protocols mentioned above. Local CGI scripts @@ -160,6 +158,10 @@ real cache; files are deterministically loaded from the "cache" upon certain actions, and from the network upon others, but neither is used as a fallback to the other. +`data:` decodes a data URL as defined in RFC 2397. This used to be a CGI module, +but has been moved back into the loader process because these URLs can get +so long that they no longer fit into the environment. + ## Custom protocols Chawan is protocol-agnostic. This means that the `cha` binary itself does not diff --git a/res/urimethodmap b/res/urimethodmap index 40e97bd7..21e01546 100644 --- a/res/urimethodmap +++ b/res/urimethodmap @@ -5,7 +5,6 @@ https: cgi-bin:http finger: cgi-bin:cha-finger gemini: cgi-bin:gmifetch about: cgi-bin:about -data: cgi-bin:data file: cgi-bin:file ftp: cgi-bin:ftp sftp: cgi-bin:ftp diff --git a/src/loader/loader.nim b/src/loader/loader.nim index fec07d99..91212e24 100644 --- a/src/loader/loader.nim +++ b/src/loader/loader.nim @@ -407,6 +407,49 @@ proc loadFromCache(ctx: LoaderContext; client: ClientData; handle: LoaderHandle; else: handle.sendResult(ERROR_URL_NOT_IN_CACHE) +# Data URL handler. +# Moved back into loader from CGI, because data URLs can get extremely long +# and thus no longer fit into the environment. +proc loadDataSend(ctx: LoaderContext; handle: LoaderHandle; s, ct: string) = + handle.sendResult(0) + handle.sendStatus(200) + handle.sendHeaders(newHeaders({"Content-Type": ct})) + let buffer = newLoaderBuffer(size = s.len) + buffer.len = s.len + copyMem(buffer.page, unsafeAddr s[0], s.len) + let output = handle.output + case ctx.pushBuffer(output, buffer, 0) + of pbrUnregister: + if output.registered: + ctx.unregister(output) + output.oclose() + of pbrDone: + if output.registered or output.suspended: + output.istreamAtEnd = true + ctx.outputMap[output.ostream.fd] = output + else: + output.oclose() + +proc loadData(ctx: LoaderContext; handle: LoaderHandle; request: Request) = + let url = request.url + var ct = url.path.s.until(',') + if AllChars - Ascii + Controls - {'\t', ' '} in ct: + handle.sendResult(ERROR_INVALID_URL, "invalid data URL") + handle.close() + return + let sd = ct.len + 1 # data start + let body = percentDecode(url.path.s, sd) + if ct.endsWith(";base64"): + let d = atob0(body) # decode from ct end + 1 + if d.isNone: + handle.sendResult(ERROR_INVALID_URL, "invalid data URL") + handle.close() + return + ct.setLen(ct.len - ";base64".len) # remove base64 indicator + ctx.loadDataSend(handle, d.get, ct) + else: + ctx.loadDataSend(handle, body, ct) + proc loadResource(ctx: LoaderContext; client: ClientData; config: LoaderClientConfig; request: Request; handle: LoaderHandle) = var redo = true @@ -452,15 +495,17 @@ proc loadResource(ctx: LoaderContext; client: ClientData; ctx.loadFromCache(client, handle, request) assert handle.istream == nil handle.close() + elif request.url.scheme == "data": + ctx.loadData(handle, request) else: prevurl = request.url case ctx.config.uriMethodMap.findAndRewrite(request.url) - of URI_RESULT_SUCCESS: + of ummrSuccess: inc tries redo = true - of URI_RESULT_WRONG_URL: + of ummrWrongURL: handle.rejectHandle(ERROR_INVALID_URI_METHOD_ENTRY) - of URI_RESULT_NOT_FOUND: + of ummrNotFound: handle.rejectHandle(ERROR_UNKNOWN_SCHEME) if tries >= MaxRewrites: handle.rejectHandle(ERROR_TOO_MANY_REWRITES) diff --git a/src/loader/loaderhandle.nim b/src/loader/loaderhandle.nim index aa3a32d4..cb05efa1 100644 --- a/src/loader/loaderhandle.nim +++ b/src/loader/loaderhandle.nim @@ -93,9 +93,9 @@ func cap*(buffer: LoaderBuffer): int {.inline.} = template isEmpty*(output: OutputHandle): bool = output.currentBuffer == nil and not output.suspended -proc newLoaderBuffer*(): LoaderBuffer = +proc newLoaderBuffer*(size = LoaderBufferPageSize): LoaderBuffer = return LoaderBuffer( - page: cast[ptr UncheckedArray[uint8]](alloc(LoaderBufferPageSize)), + page: cast[ptr UncheckedArray[uint8]](alloc(size)), len: 0 ) diff --git a/src/types/urimethodmap.nim b/src/types/urimethodmap.nim index 4cb5b9ae..81876c26 100644 --- a/src/types/urimethodmap.nim +++ b/src/types/urimethodmap.nim @@ -32,7 +32,7 @@ func rewriteURL(pattern, surl: string): string = result &= '%' type URIMethodMapResult* = enum - URI_RESULT_NOT_FOUND, URI_RESULT_SUCCESS, URI_RESULT_WRONG_URL + ummrNotFound, ummrSuccess, ummrWrongURL proc findAndRewrite*(this: URIMethodMap; url: var URL): URIMethodMapResult = let protocol = url.protocol @@ -40,10 +40,10 @@ proc findAndRewrite*(this: URIMethodMap; url: var URL): URIMethodMapResult = let surl = this.map[protocol].rewriteURL($url) let x = newURL(surl) if x.isNone: - return URI_RESULT_WRONG_URL + return ummrWrongURL url = x.get - return URI_RESULT_SUCCESS - return URI_RESULT_NOT_FOUND + return ummrSuccess + return ummrNotFound proc insert(this: var URIMethodMap; k, v: string) = if not this.map.hasKeyOrPut(k, v) and k.startsWith("img-codec+"): |