diff options
author | bptato <nincsnevem662@gmail.com> | 2024-06-13 23:04:21 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-06-13 23:24:53 +0200 |
commit | d90e456b80ee11a84eb2a29ce01794fa662eb7b9 (patch) | |
tree | 30d358ef3a5c50c6a0272a25f6aff9d150480c4a | |
parent | 17bf0e843084712d9a914868ec44896f48d9d13a (diff) | |
download | chawan-d90e456b80ee11a84eb2a29ce01794fa662eb7b9.tar.gz |
Update Chame, Chagashi
m--------- | lib/chagashi | 0 | ||||
m--------- | lib/chame | 0 | ||||
-rw-r--r-- | src/html/dom.nim | 7 | ||||
-rw-r--r-- | src/js/encoding.nim | 65 | ||||
-rw-r--r-- | src/loader/response.nim | 6 | ||||
-rw-r--r-- | src/local/lineedit.nim | 31 | ||||
-rw-r--r-- | src/local/term.nim | 2 | ||||
-rw-r--r-- | src/server/buffer.nim | 80 | ||||
-rw-r--r-- | src/version.nim | 4 |
9 files changed, 46 insertions, 149 deletions
diff --git a/lib/chagashi b/lib/chagashi -Subproject b2eaf326b280e12e36bc3e9c80bc247458cf865 +Subproject bc61299cf73636214cbc3b59059b8606edac959 diff --git a/lib/chame b/lib/chame -Subproject 0abadff0147d355aa8ddc15ff8a592e9df900e1 +Subproject 5836c2f412bd4323f45c664632836722a057e0c diff --git a/src/html/dom.nim b/src/html/dom.nim index 29a521c6..0a11791f 100644 --- a/src/html/dom.nim +++ b/src/html/dom.nim @@ -46,7 +46,6 @@ import utils/twtstr import chagashi/charset import chagashi/decoder -import chagashi/validator import chame/tags @@ -3589,10 +3588,8 @@ proc fetchClassicScript(element: HTMLScriptElement; url: URL; element.onComplete(ScriptResult(t: RESULT_NULL)) return let s = response.body.recvAll() - let source = if cs in {CHARSET_UNKNOWN, CHARSET_UTF_8}: - s.toValidUTF8() - else: - newTextDecoder(cs).decodeAll(s) + let cs = if cs == CHARSET_UNKNOWN: CHARSET_UTF_8 else: cs + let source = s.decodeAll(cs) let script = window.jsctx.createClassicScript(source, url, options, false) element.onComplete(ScriptResult(t: RESULT_SCRIPT, script: script)) diff --git a/src/js/encoding.nim b/src/js/encoding.nim index ccadce16..6c32ee3c 100644 --- a/src/js/encoding.nim +++ b/src/js/encoding.nim @@ -1,8 +1,6 @@ import chagashi/charset import chagashi/decoder import chagashi/decodercore -import chagashi/validator -import chagashi/validatorcore import monoucha/javascript import monoucha/jserror import monoucha/jstypes @@ -19,8 +17,6 @@ type doNotFlush: bool bomSeen: bool td: TextDecoder - tv: ref TextValidatorUTF8 - validateBuf: seq[uint8] jsDestructor(JSTextDecoder) jsDestructor(JSTextEncoder) @@ -37,8 +33,7 @@ func newJSTextDecoder(label = "utf-8", options = TextDecoderOptions()): return ok(JSTextDecoder( ignoreBOM: options.ignoreBOM, fatal: options.fatal, - td: if encoding != CHARSET_UTF_8: newTextDecoder(encoding) else: nil, - tv: if encoding == CHARSET_UTF_8: (ref TextValidatorUTF8)() else: nil, + td: newTextDecoder(encoding), encoding: encoding )) @@ -82,64 +77,31 @@ proc decode0(this: JSTextDecoder; ctx: JSContext; input: JSArrayBufferView; len: 0, cap: BufferSize ) + let td = this.td var i = 0 let H = int(input.abuf.len) - 1 template handle_error = if this.fatal: return errTypeError("Failed to decode string") oq.write("\uFFFD") - i = this.td.i + i = td.i while true: - case this.td.decode(input.abuf.p.toOpenArray(i, H), + case td.decode(input.abuf.p.toOpenArray(i, H), oq.p.toOpenArray(0, oq.cap - 1), oq.len) of tdrDone: if not stream: - case this.td.finish() + case td.finish() of tdfrDone: discard of tdfrError: handle_error break + of tdrReadInput: + oq.write(input.abuf.p.toOpenArray(i + td.pi, i + td.ri)) of tdrError: handle_error of tdrReqOutput: oq.grow() return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len))) -proc validate0(this: JSTextDecoder; ctx: JSContext; input: JSArrayBufferView; - stream: bool): JSResult[JSValue] = - # assume input is valid; do not allocate yet - var oq = Growbuf(p: nil, len: 0, cap: 0) - var i = 0 - let H = int(input.abuf.len) - 1 - var n = 0 - template handle_error = - if this.fatal: - return errTypeError("Failed to decode string") - # write from previous error (or beginning) to the last valid char - oq.write(input.abuf.p.toOpenArray(i, n)) - oq.write("\uFFFD") - this.validateBuf.setLen(0) - i = this.tv.i - while true: - case this.tv[].validate(input.abuf.p.toOpenArray(i, H), n) - of tvrDone: - break - of tvrError: - handle_error - if not stream: - case this.tv[].finish() - of tvrDone: discard - of tvrError: handle_error - if this.validateBuf.len > 0 and n > -1: - oq.write(this.validateBuf) - oq.write(input.abuf.p.toOpenArray(i, n)) - this.validateBuf.setLen(0) - this.validateBuf.add(input.abuf.p.toOpenArray(n + 1, input.abuf.high)) - if oq.len > 0: - assert oq.p != nil - return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len))) - assert oq.p == nil - return ok(JS_NewStringLen(ctx, cast[cstring](input.abuf.p), csize_t(n + 1))) - type TextDecodeOptions = object of JSDict stream: bool @@ -148,21 +110,12 @@ proc decode(ctx: JSContext; this: JSTextDecoder; input = none(JSArrayBufferView); options = TextDecodeOptions()): JSResult[JSValue] {.jsfunc.} = if not this.doNotFlush: - if this.td != nil: - this.td = newTextDecoder(this.encoding) - else: - assert this.tv != nil - this.tv = (ref TextValidatorUTF8)() + this.td = newTextDecoder(this.encoding) this.bomSeen = false if this.doNotFlush != options.stream: this.doNotFlush = options.stream if input.isSome: - if this.td != nil: - return this.decode0(ctx, input.get, options.stream) - else: - assert this.encoding == CHARSET_UTF_8 - # just validate - return this.validate0(ctx, input.get, options.stream) + return this.decode0(ctx, input.get, options.stream) return ok(JS_NewString(ctx, "")) func jencoding(this: JSTextDecoder): string {.jsfget: "encoding".} = diff --git a/src/loader/response.nim b/src/loader/response.nim index ca300957..8ea17e64 100644 --- a/src/loader/response.nim +++ b/src/loader/response.nim @@ -3,7 +3,6 @@ import std/tables import chagashi/charset import chagashi/decoder -import chagashi/validator import io/promise import io/socketstream import loader/headers @@ -120,10 +119,7 @@ proc text*(response: Response): Promise[JSResult[string]] {.jsfunc.} = let charset = response.getCharset(CHARSET_UTF_8) #TODO this is inefficient # maybe add a JS type that turns a seq[char] into JS strings - if charset == CHARSET_UTF_8: - ok(s.toValidUTF8()) - else: - ok(newTextDecoder(charset).decodeAll(s)) + ok(s.decodeAll(charset)) ) proc blob*(response: Response): Promise[JSResult[Blob]] {.jsfunc.} = diff --git a/src/local/lineedit.nim b/src/local/lineedit.nim index 22fd3988..ba02e2ae 100644 --- a/src/local/lineedit.nim +++ b/src/local/lineedit.nim @@ -12,7 +12,6 @@ import utils/twtstr import utils/wordbreak import chagashi/charset -import chagashi/validator import chagashi/decoder type @@ -153,7 +152,7 @@ proc backspace(edit: LineEdit) {.jsfunc.} = edit.cursori -= len edit.cursorx -= r.width() edit.invalid = true - + proc write*(edit: LineEdit; s: string; cs: Charset): bool = if cs == CHARSET_UTF_8: if s.validateUTF8Surr() != -1: @@ -169,7 +168,10 @@ proc write*(edit: LineEdit; s: string; cs: Charset): bool = return true proc write(edit: LineEdit; s: string): bool {.jsfunc.} = - edit.write(s, CHARSET_UTF_8) + if s.validateUTF8Surr() != -1: + return false + edit.insertCharseq(s) + return true proc delete(edit: LineEdit) {.jsfunc.} = if edit.cursori < edit.news.len: @@ -311,22 +313,23 @@ proc nextHist(edit: LineEdit) {.jsfunc.} = proc windowChange*(edit: LineEdit; attrs: WindowAttributes) = edit.maxwidth = attrs.width - edit.promptw - 1 -proc readLine*(prompt, current: string; termwidth: int; - disallowed: set[char]; hide: bool; hist: LineHistory): LineEdit = - result = LineEdit( +proc readLine*(prompt, current: string; termwidth: int; disallowed: set[char]; + hide: bool; hist: LineHistory): LineEdit = + let promptw = prompt.width() + return LineEdit( prompt: prompt, - promptw: prompt.width(), + promptw: promptw, news: current, disallowed: disallowed, hide: hide, - invalid: true + invalid: true, + cursori: current.len, + cursorx: current.notwidth(), + # - 1, so that the cursor always has place + maxwidth: termwidth - promptw - 1, + hist: hist, + histindex: hist.lines.len ) - result.cursori = result.news.len - result.cursorx = result.news.notwidth() - # - 1, so that the cursor always has place - result.maxwidth = termwidth - result.promptw - 1 - result.hist = hist - result.histindex = result.hist.lines.len proc addLineEditModule*(ctx: JSContext) = ctx.registerType(LineEdit) diff --git a/src/local/term.nim b/src/local/term.nim index 38368515..7dd6d951 100644 --- a/src/local/term.nim +++ b/src/local/term.nim @@ -19,8 +19,8 @@ import utils/strwidth import utils/twtstr import chagashi/charset +import chagashi/decoder import chagashi/encoder -import chagashi/validator #TODO switch away from termcap... diff --git a/src/server/buffer.nim b/src/server/buffer.nim index 8b31fb64..e201b281 100644 --- a/src/server/buffer.nim +++ b/src/server/buffer.nim @@ -54,10 +54,9 @@ import types/winattrs import utils/strwidth import utils/twtstr -from chagashi/decoder import newTextDecoder import chagashi/charset +import chagashi/decoder import chagashi/decodercore -import chagashi/validatorcore import chame/tags @@ -116,9 +115,7 @@ type htmlParser: HTML5ParserWrapper bgcolor: CellColor needsBOMSniff: bool - decoder: TextDecoder - validator: ref TextValidatorUTF8 - validateBuf: seq[char] + ctx: TextDecoderContext charsetStack: seq[Charset] charset: Charset cacheId: int @@ -751,13 +748,10 @@ func canSwitch(buffer: Buffer): bool {.inline.} = return buffer.htmlParser.builder.confidence == ccTentative and buffer.charsetStack.len > 0 +const BufferSize = 16384 + proc initDecoder(buffer: Buffer) = - if buffer.charset != CHARSET_UTF_8: - buffer.validator = nil - buffer.decoder = newTextDecoder(buffer.charset) - else: - buffer.decoder = nil - buffer.validator = (ref TextValidatorUTF8)() + buffer.ctx = initTextDecoderContext(buffer.charset, demFatal, BufferSize) proc switchCharset(buffer: Buffer) = buffer.charset = buffer.charsetStack.pop() @@ -766,57 +760,14 @@ proc switchCharset(buffer: Buffer) = buffer.document = buffer.htmlParser.builder.document buffer.prevStyled = nil -const BufferSize = 16384 - proc decodeData(buffer: Buffer; iq: openArray[uint8]): bool = - var oq {.noinit.}: array[BufferSize, char] - var n = 0 - while true: - case buffer.decoder.decode(iq, oq.toOpenArrayByte(0, oq.high), n) - of tdrDone: - if not buffer.processData0(oq.toOpenArray(0, n - 1)): - buffer.switchCharset() - return false - break - of tdrReqOutput: - # flush output buffer - if not buffer.processData0(oq.toOpenArray(0, n - 1)): - buffer.switchCharset() - return false - n = 0 - of tdrError: - if buffer.canSwitch: - buffer.switchCharset() - return false - doAssert buffer.processData0("\uFFFD") - true - -proc validateData(buffer: Buffer; iq: openArray[char]): bool = - var pi = 0 - var n = 0 - while true: - case buffer.validator[].validate(iq.toOpenArrayByte(0, iq.high), n) - of tvrDone: - if n == -1: - return true - if buffer.validateBuf.len > 0: - doAssert buffer.processData0(buffer.validateBuf) - buffer.validateBuf.setLen(0) - if not buffer.processData0(iq.toOpenArray(pi, n)): - buffer.switchCharset() - return false - buffer.validateBuf.add(iq.toOpenArray(n + 1, iq.high)) - break - of tvrError: - buffer.validateBuf.setLen(0) - if buffer.canSwitch: - buffer.switchCharset() - return false - if n >= pi: - doAssert buffer.processData0(iq.toOpenArray(pi, n)) - doAssert buffer.processData0("\uFFFD") - pi = buffer.validator.i - true + if not buffer.canSwitch(): + buffer.ctx.errorMode = demReplacement + for chunk in buffer.ctx.decode(iq, finish = false): + if not buffer.processData0(chunk.toOpenArray()): + buffer.switchCharset() + return false + return not buffer.ctx.failed proc bomSniff(buffer: Buffer; iq: openArray[char]): int = if iq[0] == '\xFE' and iq[1] == '\xFF': @@ -839,9 +790,7 @@ proc processData(buffer: Buffer; iq: openArray[char]): bool = if iq.len >= 3: # ehm... TODO start += buffer.bomSniff(iq) buffer.needsBOMSniff = false - if buffer.decoder != nil: - return buffer.decodeData(iq.toOpenArrayByte(start, iq.high)) - return buffer.validateData(iq.toOpenArray(start, iq.high)) + return buffer.decodeData(iq.toOpenArrayByte(start, iq.high)) proc windowChange*(buffer: Buffer; attrs: WindowAttributes) {.proxy.} = buffer.attrs = attrs @@ -1162,8 +1111,7 @@ proc finishLoad(buffer: Buffer): EmptyPromise = p.resolve() return p buffer.state = bsLoadingResources - if buffer.decoder != nil and buffer.decoder.finish() == tdfrError or - buffer.validator != nil and buffer.validator[].finish() == tvrError: + if buffer.ctx.td != nil and buffer.ctx.td.finish() == tdfrError: doAssert buffer.processData0("\uFFFD") buffer.htmlParser.finish() buffer.document.readyState = rsInteractive diff --git a/src/version.nim b/src/version.nim index c1c90c54..ef58eb72 100644 --- a/src/version.nim +++ b/src/version.nim @@ -27,6 +27,6 @@ tryImport chame/version, "chame" tryImport monoucha/version, "monoucha" static: - checkVersion("chagashi", 0, 4, 2) - checkVersion("chame", 0, 14, 5) + checkVersion("chagashi", 0, 5, 1) + checkVersion("chame", 1, 0, 0) checkVersion("monoucha", 0, 1, 1) |