about summary refs log tree commit diff stats
path: root/src/html
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-02-05 15:32:05 +0100
committerbptato <nincsnevem662@gmail.com>2024-02-07 22:21:48 +0100
commit407b525332106d84f18d74f6b51ae2f7a1ed3475 (patch)
treec4ca2ad55df079b9c49d9361bc3c495a7a5977d0 /src/html
parent168bd542d989c76ce3ff09a29b8d77af448c3c12 (diff)
downloadchawan-407b525332106d84f18d74f6b51ae2f7a1ed3475.tar.gz
Incremental rendering
Yay!

Admittedly, it is not very useful in its current form, except maybe on
very slow networks.

The problem is that renderDocument is *slow*, so we only run it when
onload fails to consume all bytes from the network in a single pass.
Even then, we are guaranteed to get a FOUC, since CSS is only downloaded
in finishLoad(). Well, I think it's cool, anyway.
Diffstat (limited to 'src/html')
-rw-r--r--src/html/chadombuilder.nim256
-rw-r--r--src/html/dom.nim9
2 files changed, 150 insertions, 115 deletions
diff --git a/src/html/chadombuilder.nim b/src/html/chadombuilder.nim
index 21e94901..a2e80f3b 100644
--- a/src/html/chadombuilder.nim
+++ b/src/html/chadombuilder.nim
@@ -20,14 +20,23 @@ import chame/tags
 # DOMBuilder implementation for Chawan.
 
 type CharsetConfidence = enum
-  CONFIDENCE_TENTATIVE, CONFIDENCE_CERTAIN, CONFIDENCE_IRRELEVANT
+  ccTentative, ccCertain, ccIrrelevant
 
 type
+  HTML5ParserWrapper* = ref object
+    parser: HTML5Parser[Node, CAtom]
+    charsetStack: seq[Charset]
+    seekable: bool
+    builder*: ChaDOMBuilder
+    opts: HTML5ParserOpts[Node, CAtom]
+    inputStream: Stream
+    encoder: EncoderStream
+    decoder: DecoderStream
+
   ChaDOMBuilder = ref object of DOMBuilder[Node, CAtom]
     charset: Charset
     confidence: CharsetConfidence
-    document: Document
-    isFragment: bool
+    document*: Document
     factory: CAtomFactory
     poppedScript: HTMLScriptElement
 
@@ -79,19 +88,21 @@ proc setQuirksModeImpl(builder: ChaDOMBuilder, quirksMode: QuirksMode) =
 
 proc setEncodingImpl(builder: ChaDOMBuilder, encoding: string):
     SetEncodingResult =
-  let charset = getCharset(encoding)
-  if charset == CHARSET_UNKNOWN:
+  if builder.confidence != ccTentative:
     return SET_ENCODING_CONTINUE
   if builder.charset in {CHARSET_UTF_16_LE, CHARSET_UTF_16_BE}:
-    builder.confidence = CONFIDENCE_CERTAIN
+    builder.confidence = ccCertain
     return SET_ENCODING_CONTINUE
-  builder.confidence = CONFIDENCE_CERTAIN
+  let charset = getCharset(encoding)
+  if charset == CHARSET_UNKNOWN:
+    return SET_ENCODING_CONTINUE
+  builder.confidence = ccCertain
   if charset == builder.charset:
     return SET_ENCODING_CONTINUE
-  if charset == CHARSET_X_USER_DEFINED:
-    builder.charset = CHARSET_WINDOWS_1252
+  builder.charset = if charset == CHARSET_X_USER_DEFINED:
+    CHARSET_WINDOWS_1252
   else:
-    builder.charset = charset
+    charset
   return SET_ENCODING_STOP
 
 proc getTemplateContentImpl(builder: ChaDOMBuilder, handle: Node): Node =
@@ -189,7 +200,7 @@ proc elementPoppedImpl(builder: ChaDOMBuilder, element: Node) =
     builder.poppedScript = HTMLScriptElement(element)
 
 proc newChaDOMBuilder(url: URL, window: Window, factory: CAtomFactory,
-    isFragment = false): ChaDOMBuilder =
+    confidence: CharsetConfidence): ChaDOMBuilder =
   let document = newDocument(factory)
   document.contentType = "text/html"
   document.url = url
@@ -198,17 +209,15 @@ proc newChaDOMBuilder(url: URL, window: Window, factory: CAtomFactory,
     window.document = document
   return ChaDOMBuilder(
     document: document,
-    isFragment: isFragment,
-    factory: factory
+    factory: factory,
+    confidence: confidence
   )
 
 # https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
 proc parseHTMLFragment*(element: Element, s: string): seq[Node] =
   let url = parseURL("about:blank").get
   let factory = element.document.factory
-  let builder = newChaDOMBuilder(url, nil, factory)
-  let inputStream = newStringStream(s)
-  builder.isFragment = true
+  let builder = newChaDOMBuilder(url, nil, factory, ccIrrelevant)
   let document = builder.document
   document.mode = element.document.mode
   let state = case element.tagType
@@ -234,12 +243,9 @@ proc parseHTMLFragment*(element: Element, s: string): seq[Node] =
     pushInTemplate: element.tagType == TAG_TEMPLATE
   )
   var parser = initHTML5Parser(builder, opts)
-  var buffer: array[4096, char]
-  while true:
-    let n = inputStream.readData(addr buffer[0], buffer.len)
-    if n == 0: break
-    let res = parser.parseChunk(buffer.toOpenArray(0, n - 1))
-    assert res == PRES_CONTINUE # scripting is false, so this must be continue
+  let res = parser.parseChunk(s.toOpenArray(0, s.high))
+  # scripting is false and confidence is certain -> this must be continue
+  assert res == PRES_CONTINUE
   parser.finish()
   builder.finish()
   return root.childList
@@ -257,107 +263,126 @@ proc bomSniff(inputStream: Stream): Charset =
   inputStream.setPosition(0)
   return CHARSET_UNKNOWN
 
-proc parseHTML*(inputStream: Stream, window: Window, url: URL,
-    factory: CAtomFactory, charsets: seq[Charset] = @[],
-    seekable = true): Document =
+proc switchCharset(wrapper: HTML5ParserWrapper) =
+  let builder = wrapper.builder
+  builder.charset = wrapper.charsetStack.pop()
+  if wrapper.seekable:
+    builder.confidence = ccTentative # used in the next iteration
+  else:
+    builder.confidence = ccCertain
+  let em = if wrapper.charsetStack.len == 0 or not wrapper.seekable:
+    DECODER_ERROR_MODE_REPLACEMENT
+  else:
+    DECODER_ERROR_MODE_FATAL
+  wrapper.parser = initHTML5Parser(builder, wrapper.opts)
+  wrapper.decoder = newDecoderStream(wrapper.inputStream, builder.charset,
+    errormode = em)
+  wrapper.decoder.setInhibitCheckEnd(true)
+  wrapper.encoder = newEncoderStream(wrapper.decoder, CHARSET_UTF_8,
+    errormode = ENCODER_ERROR_MODE_FATAL)
+
+proc newHTML5ParserWrapper*(inputStream: Stream, window: Window, url: URL,
+    factory: CAtomFactory, charsets: seq[Charset] = @[], seekable = true):
+    HTML5ParserWrapper =
   let opts = HTML5ParserOpts[Node, CAtom](
     isIframeSrcdoc: false, #TODO?
     scripting: window != nil and window.settings.scripting
   )
-  let builder = newChaDOMBuilder(url, window, factory)
-  var charsetStack: seq[Charset]
-  for i in countdown(charsets.high, 0):
-    charsetStack.add(charsets[i])
-  var seekable = seekable
-  var inputStream = inputStream
-  if seekable:
-    let scs = inputStream.bomSniff()
-    if scs != CHARSET_UNKNOWN:
-      charsetStack.add(scs)
-      builder.confidence = CONFIDENCE_CERTAIN
-      seekable = false
-  if charsetStack.len == 0:
-    charsetStack.add(DefaultCharset) # UTF-8
-  while true:
-    builder.charset = charsetStack.pop()
-    if seekable:
-      builder.confidence = CONFIDENCE_TENTATIVE # used in the next iteration
-    else:
-      builder.confidence = CONFIDENCE_CERTAIN
-    let em = if charsetStack.len == 0 or not seekable:
-      DECODER_ERROR_MODE_REPLACEMENT
+  let builder = newChaDOMBuilder(url, window, factory, ccTentative)
+  let wrapper = HTML5ParserWrapper(
+    seekable: seekable,
+    builder: builder,
+    opts: opts,
+    inputStream: inputStream
+  )
+  if seekable and (let scs = inputStream.bomSniff(); scs != CHARSET_UNKNOWN):
+    builder.confidence = ccCertain
+    wrapper.charsetStack = @[scs]
+    wrapper.seekable = false
+  elif charsets.len == 0:
+    wrapper.charsetStack = @[DefaultCharset] # UTF-8
+  else:
+    for i in countdown(charsets.high, 0):
+      wrapper.charsetStack.add(charsets[i])
+  wrapper.switchCharset()
+  return wrapper
+
+proc parseBuffer(wrapper: HTML5ParserWrapper, buffer: openArray[char]):
+    ParseResult =
+  let builder = wrapper.builder
+  let document = builder.document
+  var res = wrapper.parser.parseChunk(buffer)
+  # set insertion point for when it's needed
+  var ip = wrapper.parser.getInsertionPoint()
+  while res == PRES_SCRIPT:
+    if builder.poppedScript != nil:
+      #TODO microtask
+      document.writeBuffers.add(DocumentWriteBuffer())
+      builder.poppedScript.prepare()
+    while document.parserBlockingScript != nil:
+      let script = document.parserBlockingScript
+      document.parserBlockingScript = nil
+      #TODO style sheet
+      script.execute()
+      assert document.parserBlockingScript != script
+    builder.poppedScript = nil
+    if document.writeBuffers.len == 0:
+      if ip == buffer.len:
+        # nothing left to re-parse.
+        break
+      # parse rest of input buffer
+      res = wrapper.parser.parseChunk(buffer.toOpenArray(ip, buffer.high))
+      ip += wrapper.parser.getInsertionPoint() # move insertion point
     else:
-      DECODER_ERROR_MODE_FATAL
-    let decoder = newDecoderStream(inputStream, builder.charset, errormode = em)
-    let encoder = newEncoderStream(decoder, CHARSET_UTF_8,
-      errormode = ENCODER_ERROR_MODE_FATAL)
-    var parser = initHTML5Parser(builder, opts)
-    let document = builder.document
-    var buffer: array[4096, char]
-    while true:
-      let n = encoder.readData(addr buffer[0], buffer.len)
-      if n == 0: break
-      var res = parser.parseChunk(buffer.toOpenArray(0, n - 1))
-      # set insertion point for when it's needed
-      var ip = parser.getInsertionPoint()
-      while res == PRES_SCRIPT:
-        if builder.poppedScript != nil:
-          #TODO microtask
-          document.writeBuffers.add(DocumentWriteBuffer())
-          builder.poppedScript.prepare()
-        while document.parserBlockingScript != nil:
-          let script = document.parserBlockingScript
-          document.parserBlockingScript = nil
-          #TODO style sheet
-          script.execute()
-          assert document.parserBlockingScript != script
-        builder.poppedScript = nil
-        if document.writeBuffers.len == 0:
-          if ip == n:
-            # nothing left to re-parse.
-            break
-          # parse rest of input buffer
-          res = parser.parseChunk(buffer.toOpenArray(ip, n - 1))
-          ip += parser.getInsertionPoint() # move insertion point
+      let writeBuffer = document.writeBuffers[^1]
+      let p = writeBuffer.i
+      let H = writeBuffer.data.high
+      res = wrapper.parser.parseChunk(writeBuffer.data.toOpenArray(p, H))
+      case res
+      of PRES_CONTINUE:
+        discard document.writeBuffers.pop()
+        res = PRES_SCRIPT
+      of PRES_SCRIPT:
+        let pp = p + wrapper.parser.getInsertionPoint()
+        if pp == writeBuffer.data.len:
+          discard document.writeBuffers.pop()
         else:
-          let writeBuffer = document.writeBuffers[^1]
-          let p = writeBuffer.i
-          let n = writeBuffer.data.len
-          res = parser.parseChunk(writeBuffer.data.toOpenArray(p, n - 1))
-          case res
-          of PRES_CONTINUE:
-            discard document.writeBuffers.pop()
-            res = PRES_SCRIPT
-          of PRES_SCRIPT:
-            let pp = p + parser.getInsertionPoint()
-            if pp == writeBuffer.data.len:
-              discard document.writeBuffers.pop()
-            else:
-              writeBuffer.i = pp
-          of PRES_STOP:
-            break
-            {.linearScanEnd.}
-      # PRES_STOP is returned when we return SET_ENCODING_STOP from
-      # setEncodingImpl. We immediately stop parsing in this case.
-      if res == PRES_STOP:
+          writeBuffer.i = pp
+      of PRES_STOP:
         break
-    parser.finish()
-    if builder.confidence == CONFIDENCE_CERTAIN and seekable:
-      # A meta tag describing the charset has been found; force use of this
-      # charset.
+        {.linearScanEnd.}
+  return res
+
+proc parseAll*(wrapper: HTML5ParserWrapper) =
+  let builder = wrapper.builder
+  while true:
+    let buffer = wrapper.encoder.readAll()
+    if wrapper.decoder.failed:
+      assert wrapper.seekable
+      # Retry with another charset.
       builder.restart()
-      inputStream.setPosition(0)
-      charsetStack.add(builder.charset)
-      seekable = false
+      wrapper.inputStream.setPosition(0)
+      wrapper.switchCharset()
       continue
-    if decoder.failed and seekable:
-      # Retry with another charset.
+    if buffer.len == 0:
+      break
+    let res = wrapper.parseBuffer(buffer)
+    if res == PRES_STOP:
+      # A meta tag describing the charset has been found; force use of this
+      # charset.
       builder.restart()
-      inputStream.setPosition(0)
+      wrapper.inputStream.setPosition(0)
+      wrapper.charsetStack.add(builder.charset)
+      wrapper.seekable = false
+      wrapper.switchCharset()
       continue
     break
-  builder.finish()
-  return builder.document
+
+proc finish*(wrapper: HTML5ParserWrapper) =
+  wrapper.decoder.setInhibitCheckEnd(false)
+  wrapper.parseAll()
+  wrapper.parser.finish()
+  wrapper.builder.finish()
 
 proc newDOMParser(): DOMParser {.jsctor.} =
   return DOMParser()
@@ -378,8 +403,13 @@ proc parseFromString(ctx: JSContext, parser: DOMParser, str, t: string):
       newURL("about:blank").get
     #TODO this is probably broken in client (or at least sub-optimal)
     let factory = if window != nil: window.factory else: newCAtomFactory()
-    let res = parseHTML(newStringStream(str), Window(nil), url, factory)
-    return ok(res)
+    let builder = newChaDOMBuilder(url, window, factory, ccIrrelevant)
+    var parser = initHTML5Parser(builder, HTML5ParserOpts[Node, CAtom]())
+    let res = parser.parseChunk(str)
+    assert res == PRES_CONTINUE
+    parser.finish()
+    builder.finish()
+    return ok(builder.document)
   of "text/xml", "application/xml", "application/xhtml+xml", "image/svg+xml":
     return err(newInternalError("XML parsing is not supported yet"))
   else:
diff --git a/src/html/dom.nim b/src/html/dom.nim
index 114634e6..cea593cb 100644
--- a/src/html/dom.nim
+++ b/src/html/dom.nim
@@ -268,7 +268,7 @@ type
     value* {.jsget.}: Option[int32]
 
   HTMLStyleElement* = ref object of HTMLElement
-    sheet*: CSSStylesheet
+    sheet: CSSStylesheet
 
   HTMLLinkElement* = ref object of HTMLElement
     sheet*: CSSStylesheet
@@ -2315,6 +2315,11 @@ func form(label: HTMLLabelElement): HTMLFormElement {.jsfget.} =
 proc setRelList(link: HTMLLinkElement, s: string) {.jsfset: "relList".} =
   link.attr("rel", s)
 
+proc setSheet*(link: HTMLLinkElement, sheet: CSSStylesheet) =
+  link.sheet = sheet
+  if link.document != nil:
+    link.document.cachedSheetsInvalid = true
+
 # <form>
 proc setRelList(form: HTMLFormElement, s: string) {.jsfset: "relList".} =
   form.attr("rel", s)
@@ -3276,7 +3281,7 @@ proc fetchClassicScript(element: HTMLScriptElement, url: URL,
     return
   let loader = window.loader.get
   let request = createPotentialCORSRequest(url, RequestDestination.SCRIPT, cors)
-  let response = loader.doRequest(request)
+  let response = loader.doRequest(request, canredir = false)
   if response.res != 0:
     element.onComplete(ScriptResult(t: RESULT_NULL))
     return
uot;lynx", label v2-8-3dev_13' href='/ingrix/lynx-snapshots/commit/build-slang.com?id=2d161b7d97cebd6f38885cf69933291fec6b4381'>2d161b7d ^
57bfc74f ^


e087f6d4




















57bfc74f ^
e087f6d4



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368