diff options
author | bptato <nincsnevem662@gmail.com> | 2024-02-12 17:03:35 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-02-12 17:03:35 +0100 |
commit | 8e6783a45fba48dd8f63fe7486e4691f05220b52 (patch) | |
tree | 5aae9f9f95432609a497eea858c4a3401dac172b /src/html | |
parent | 69b1a7e7f6e0a675cd70805768162de5621e8279 (diff) | |
download | chawan-8e6783a45fba48dd8f63fe7486e4691f05220b52.tar.gz |
Remove CLONE BufferSource; cache document sources in tmpdir
At last all BufferSources are unified. To achieve the same effect as the previous CLONE source type, we now use the "fromcache" flag in Request. This *forces* the document to be streamed from the disk; if the file no longer exists for some reason, an error is returned (i.e. the document is not re-downloaded). For a document to be cached, it has to be the main document of the buffer (i.e. no additional resources requested with fetch()), and also not an x-htmloutput HTML file (for those, the original source is saved). The result is that toggleSource now always returns the actual source for e.g. markdown files, not the HTML-transformed version. Also, it is now possible to view the source of a document that is still being downloaded. buffer.sstream has almost been eliminated; it still exists, but only as a pseudo-buffer to interface with EncoderStream and DecoderStream. It no longer holds the entire source of a buffer at any point, and is cleared as soon as the buffer is completely loaded.
Diffstat (limited to 'src/html')
-rw-r--r-- | src/html/chadombuilder.nim | 64 |
1 files changed, 45 insertions, 19 deletions
diff --git a/src/html/chadombuilder.nim b/src/html/chadombuilder.nim index 044d8643..d604f455 100644 --- a/src/html/chadombuilder.nim +++ b/src/html/chadombuilder.nim @@ -29,12 +29,15 @@ type seekable: bool builder*: ChaDOMBuilder opts: HTML5ParserOpts[Node, CAtom] - inputStream: Stream + stream: StringStream encoder: EncoderStream decoder: DecoderStream + rewindImpl: proc() # hack so we don't have to worry about leaks or the GC deallocating parser refs: seq[Document] stoppedFromScript: bool + needsBOMSniff: bool + wasICE: bool # inhibitCheckEnd ChaDOMBuilder = ref object of DOMBuilder[Node, CAtom] charset: Charset @@ -260,17 +263,21 @@ proc parseHTMLFragment*(element: Element, s: string): seq[Node] = builder.finish() return root.childList -#TODO this should be handled by decoderstream -proc bomSniff(inputStream: Stream): Charset = - let bom = inputStream.readStr(2) +#TODO this should be handled by decoderstream or buffer +proc bomSniff(wrapper: HTML5ParserWrapper): Charset = + let stream = wrapper.stream + let op = stream.getPosition() + if op + 2 >= stream.data.len: + return CHARSET_UNKNOWN + let bom = stream.readStr(2) if bom == "\xFE\xFF": return CHARSET_UTF_16_BE if bom == "\xFF\xFE": return CHARSET_UTF_16_LE if bom == "\xEF\xBB": - if inputStream.readChar() == '\xBF': + if op + 3 < stream.data.len and stream.readChar() == '\xBF': return CHARSET_UTF_8 - inputStream.setPosition(0) + wrapper.stream.setPosition(op) return CHARSET_UNKNOWN proc switchCharset(wrapper: HTML5ParserWrapper) = @@ -284,16 +291,18 @@ proc switchCharset(wrapper: HTML5ParserWrapper) = DECODER_ERROR_MODE_REPLACEMENT else: DECODER_ERROR_MODE_FATAL + let ice = wrapper.decoder == nil or wrapper.wasICE wrapper.parser = initHTML5Parser(builder, wrapper.opts) - wrapper.decoder = newDecoderStream(wrapper.inputStream, builder.charset, + wrapper.decoder = newDecoderStream(wrapper.stream, builder.charset, errormode = em) - wrapper.decoder.setInhibitCheckEnd(true) + wrapper.decoder.setInhibitCheckEnd(ice) + wrapper.wasICE = ice wrapper.encoder = newEncoderStream(wrapper.decoder, CHARSET_UTF_8, errormode = ENCODER_ERROR_MODE_FATAL) -proc newHTML5ParserWrapper*(inputStream: Stream, window: Window, url: URL, - factory: CAtomFactory, charsets: seq[Charset] = @[], seekable = true): - HTML5ParserWrapper = +proc newHTML5ParserWrapper*(stream: StringStream, window: Window, url: URL, + factory: CAtomFactory, rewindImpl: proc(), charsets: seq[Charset], + seekable: bool): HTML5ParserWrapper = let opts = HTML5ParserOpts[Node, CAtom]( isIframeSrcdoc: false, #TODO? scripting: window != nil and window.settings.scripting @@ -303,14 +312,12 @@ proc newHTML5ParserWrapper*(inputStream: Stream, window: Window, url: URL, seekable: seekable, builder: builder, opts: opts, - inputStream: inputStream + stream: stream, + rewindImpl: rewindImpl, + needsBOMSniff: seekable ) builder.document.setActiveParser(wrapper) - if seekable and (let scs = inputStream.bomSniff(); scs != CHARSET_UNKNOWN): - builder.confidence = ccCertain - wrapper.charsetStack = @[scs] - wrapper.seekable = false - elif charsets.len == 0: + if charsets.len == 0: wrapper.charsetStack = @[DefaultCharset] # UTF-8 else: for i in countdown(charsets.high, 0): @@ -385,13 +392,23 @@ proc CDB_parseDocumentWriteChunk(wrapper: pointer) {.exportc.} = proc parseAll*(wrapper: HTML5ParserWrapper) = let builder = wrapper.builder + if wrapper.needsBOMSniff: + if wrapper.stream.getPosition() + 3 >= wrapper.stream.data.len: + return + let scs = wrapper.bomSniff() + if scs != CHARSET_UNKNOWN: + builder.confidence = ccCertain + wrapper.charsetStack = @[scs] + wrapper.seekable = false + wrapper.switchCharset() + wrapper.needsBOMSniff = false while true: let buffer = wrapper.encoder.readAll() if wrapper.decoder.failed: assert wrapper.seekable # Retry with another charset. builder.restart(wrapper) - wrapper.inputStream.setPosition(0) + wrapper.rewindImpl() wrapper.switchCharset() continue if buffer.len == 0: @@ -402,13 +419,22 @@ proc parseAll*(wrapper: HTML5ParserWrapper) = # res == PRES_STOP: A meta tag describing the charset has been found; force # use of this charset. builder.restart(wrapper) - wrapper.inputStream.setPosition(0) + wrapper.rewindImpl() wrapper.charsetStack.add(builder.charset) wrapper.seekable = false wrapper.switchCharset() proc finish*(wrapper: HTML5ParserWrapper) = + if wrapper.needsBOMSniff: + let scs = wrapper.bomSniff() + if scs != CHARSET_UNKNOWN: + wrapper.builder.confidence = ccCertain + wrapper.charsetStack = @[scs] + wrapper.seekable = false + wrapper.switchCharset() + wrapper.needsBOMSniff = false wrapper.decoder.setInhibitCheckEnd(false) + wrapper.wasICE = false wrapper.parseAll() wrapper.parser.finish() wrapper.builder.finish() |