diff options
author | bptato <nincsnevem662@gmail.com> | 2022-12-19 23:36:16 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2022-12-19 23:36:16 +0100 |
commit | 91d18d27e4eeebb5aa685b18b28130f3f1b4f513 (patch) | |
tree | 1bc2e19608b576582bb1edc3f0b18aed0eef477c | |
parent | ea9df035a294bf1cfa715c140d0d22aa018e262e (diff) | |
download | chawan-91d18d27e4eeebb5aa685b18b28130f3f1b4f513.tar.gz |
Add unicode normalization, etc
-rw-r--r-- | src/bindings/libunicode.nim | 23 | ||||
-rw-r--r-- | src/buffer/container.nim | 1 | ||||
-rw-r--r-- | src/display/term.nim | 24 | ||||
-rw-r--r-- | src/html/dom.nim | 4 | ||||
-rw-r--r-- | src/io/lineedit.nim | 82 | ||||
-rw-r--r-- | src/layout/engine.nim | 12 | ||||
-rw-r--r-- | src/render/rendertext.nim | 3 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 91 |
8 files changed, 156 insertions, 84 deletions
diff --git a/src/bindings/libunicode.nim b/src/bindings/libunicode.nim new file mode 100644 index 00000000..d53fa060 --- /dev/null +++ b/src/bindings/libunicode.nim @@ -0,0 +1,23 @@ +type + DynBufReallocFunc = proc(opaque: pointer, p: pointer, size: csize_t): pointer {.cdecl.} + + CharRange* = object + len*: cint # in points, always even + size*: cint + points*: ptr uint32 # points sorted by increasing value + mem_opaque*: pointer + realloc_func*: DynBufReallocFunc + + UnicodeNormalizationEnum* {.size: sizeof(cint).} = enum + UNICODE_NFC, UNICODE_NFD, UNICODE_NKFC, UNICODE_NKFD + +proc cr_init*(cr: ptr CharRange, mem_opaque: pointer, + realloc_func: DynBufReallocFunc) {.importc.} + +proc cr_free*(cr: ptr CharRange) {.importc.} + +proc unicode_normalize*(pdst: ptr ptr uint32, src: ptr uint32, src_len: cint, + n_type: UnicodeNormalizationEnum, opaque: pointer, + realloc_func: DynBufReallocFunc): cint {.importc.} + +proc unicode_general_category*(cr: ptr CharRange, gc_name: cstring): cint {.importc.} diff --git a/src/buffer/container.nim b/src/buffer/container.nim index 66ef2780..f85ed39e 100644 --- a/src/buffer/container.nim +++ b/src/buffer/container.nim @@ -286,6 +286,7 @@ proc requestLines*(container: Container, w = container.lineWindow): auto {.disca container.lineshift = w.a for y in 0 ..< min(res.lines.len, w.len): container.lines[y] = res.lines[y] + container.lines[y].str.mnormalize() if res.numLines != container.numLines: container.setNumLines(res.numLines, true) let cw = container.fromy ..< container.fromy + container.height diff --git a/src/display/term.nim b/src/display/term.nim index 965bb6e1..aca20637 100644 --- a/src/display/term.nim +++ b/src/display/term.nim @@ -3,11 +3,13 @@ import options import os import tables import terminal +import unicode import bindings/termcap import buffer/cell import config/config import io/window +import utils/twtstr import types/color #TODO switch from termcap... @@ -318,7 +320,16 @@ proc windowChange*(term: Terminal, attrs: WindowAttributes) = term.canvas = newFixedGrid(attrs.width, attrs.height) term.cleared = false -func generateFullOutput(term: Terminal, grid: FixedGrid): string = +proc processOutputString(term: Terminal, str: string): string = + if str.validateUtf8() != -1: + return "?" + for r in str.runes(): + if r.isControlChar(): + result &= "^" & getControlLetter(char(r)) + elif r.width() != 0: + result &= r + +proc generateFullOutput(term: Terminal, grid: FixedGrid): string = var format = newFormat() result &= term.cursorGoto(0, 0) result &= term.resetFormat() @@ -331,18 +342,19 @@ func generateFullOutput(term: Terminal, grid: FixedGrid): string = inc w let cell = grid[y * grid.width + x] result &= term.processFormat(format, cell.format) - result &= cell.str + result &= term.processOutputString(cell.str) w += cell.width() if y != grid.height - 1: result &= "\r\n" -func generateSwapOutput(term: Terminal, grid: FixedGrid, prev: FixedGrid): string = +proc generateSwapOutput(term: Terminal, grid: FixedGrid, prev: FixedGrid): string = var format = newFormat() var x = 0 var w = 0 var line = "" var lr = false for i in 0 ..< grid.cells.len: + let cell = grid.cells[i] while w < x: line &= " " inc w @@ -358,9 +370,9 @@ func generateSwapOutput(term: Terminal, grid: FixedGrid, prev: FixedGrid): strin w = 0 line = "" lr = lr or (grid[i] != prev[i]) - line &= term.processFormat(format, grid.cells[i].format) - line &= grid.cells[i].str - w += grid.cells[i].width() + line &= term.processFormat(format, cell.format) + line &= term.processOutputString(cell.str) + w += cell.width() inc x if lr: result &= term.cursorGoto(0, grid.height - 1) diff --git a/src/html/dom.nim b/src/html/dom.nim index ed96173d..dd273b71 100644 --- a/src/html/dom.nim +++ b/src/html/dom.nim @@ -774,14 +774,14 @@ func getElementsByClassName(node: Node, classNames: string): HTMLCollection {.js let isquirks = node.document.mode == QUIRKS if isquirks: for i in 0 .. classes.high: - classes[i].toLowerAsciiInPlace() + classes[i].mtoLowerAscii() return newCollection[HTMLCollection](node, func(node: Node): bool = if node.nodeType == ELEMENT_NODE: if isquirks: var cl = Element(node).classList for i in 0 .. cl.high: - cl[i].toLowerAsciiInPlace() + cl[i].mtoLowerAscii() for class in classes: if class notin cl: return false diff --git a/src/io/lineedit.nim b/src/io/lineedit.nim index 8d18ae25..ee25d9a3 100644 --- a/src/io/lineedit.nim +++ b/src/io/lineedit.nim @@ -39,32 +39,6 @@ type func newLineHistory*(): LineHistory = return LineHistory() -func lwidth(r: Rune): int = - if r.isControlChar(): - return 2 - return r.width() - -func lwidth(s: string): int = - for r in s.runes(): - result += lwidth(r) - -func lwidth(s: seq[Rune]): int = - for r in s: - result += lwidth(r) - -func lwidth(s: seq[Rune], min, max: int): int = - var i = min - var mi = min(max, s.len) - while i < mi: - result += lwidth(s[i]) - inc i - -func lwidth(s: seq[Rune], min: int): int = - var i = min - while i < s.len: - result += lwidth(s[i]) - inc i - const colorFormat = (func(): Format = result = newFormat() result.fgcolor = ColorsANSIFg[4] # blue @@ -97,7 +71,7 @@ template kill0(edit: LineEdit, i: int) = edit.backward0(i) template kill0(edit: LineEdit) = - let w = min(edit.news.lwidth(edit.cursor), edit.displen) + let w = min(edit.news.width(edit.cursor), edit.displen) edit.kill0(w) proc backward0(state: LineEdit, i: int) = @@ -118,41 +92,41 @@ proc generateOutput*(edit: LineEdit): FixedGrid = var x = 0 for r in edit.prompt.runes(): result[x].str &= $r - x += r.lwidth() + x += r.width() if edit.hide: for r in edit.news: - let w = r.lwidth() + let w = r.width() result[x].str = '*'.repeat(w) x += w if x >= result.width: break else: for r in edit.news: result[x].str &= $r - x += r.lwidth() + x += r.width() if x >= result.width: break var s = "" for c in result: s &= c.str proc getCursorX*(edit: LineEdit): int = - return edit.promptw + edit.news.lwidth(edit.shift, edit.cursor) + return edit.promptw + edit.news.width(edit.shift, edit.cursor) proc redraw(state: LineEdit) = if state.shift + state.displen > state.news.len: state.displen = state.news.len - state.shift - var dispw = state.news.lwidth(state.shift, state.shift + state.displen) + var dispw = state.news.width(state.shift, state.shift + state.displen) while dispw > state.maxwidth - 1: - dispw -= state.news[state.shift + state.displen - 1].lwidth() + dispw -= state.news[state.shift + state.displen - 1].width() dec state.displen state.begin0() let os = state.news.substr(state.shift, state.shift + state.displen) if state.hide: - state.printesc('*'.repeat(os.lwidth())) + state.printesc('*'.repeat(os.width())) else: state.printesc(os) - state.space(max(state.maxwidth - state.minlen - os.lwidth(), 0)) + state.space(max(state.maxwidth - state.minlen - os.width(), 0)) state.begin0() - state.forward0(state.news.lwidth(state.shift, state.cursor)) + state.forward0(state.news.width(state.shift, state.cursor)) proc zeroShiftRedraw(state: LineEdit) = state.shift = 0 @@ -162,10 +136,10 @@ proc zeroShiftRedraw(state: LineEdit) = proc fullRedraw(state: LineEdit) = state.displen = state.news.len if state.cursor > state.shift: - var shiftw = state.news.lwidth(state.shift, state.cursor) + var shiftw = state.news.width(state.shift, state.cursor) while shiftw > state.maxwidth - 1: inc state.shift - shiftw -= state.news[state.shift].lwidth() + shiftw -= state.news[state.shift].width() else: state.shift = max(state.cursor - 1, 0) state.redraw() @@ -177,11 +151,11 @@ proc insertCharseq(edit: LineEdit, cs: var seq[Rune]) = if cs.len == 0: return - if edit.cursor >= edit.news.len and edit.news.lwidth(edit.shift, edit.cursor) + cs.lwidth() < edit.maxwidth: + if edit.cursor >= edit.news.len and edit.news.width(edit.shift, edit.cursor) + cs.width() < edit.maxwidth: edit.news &= cs edit.cursor += cs.len if edit.hide: - edit.printesc('*'.repeat(cs.lwidth())) + edit.printesc('*'.repeat(cs.width())) else: edit.printesc(cs) else: @@ -200,7 +174,7 @@ proc submit(edit: LineEdit) {.jsfunc.} = proc backspace(edit: LineEdit) {.jsfunc.} = if edit.cursor > 0: - let w = edit.news[edit.cursor - 1].lwidth() + let w = edit.news[edit.cursor - 1].width() edit.news.delete(edit.cursor - 1..edit.cursor - 1) dec edit.cursor if edit.cursor == edit.news.len and edit.shift == 0: @@ -217,7 +191,7 @@ proc write*(edit: LineEdit, s: string): bool {.jsfunc.} = proc delete(edit: LineEdit) {.jsfunc.} = if edit.cursor >= 0 and edit.cursor < edit.news.len: - let w = edit.news[edit.cursor].lwidth() + let w = edit.news[edit.cursor].width() edit.news.delete(edit.cursor..edit.cursor) if edit.cursor == edit.news.len and edit.shift == 0: edit.kill0(w) @@ -242,17 +216,17 @@ proc backward(edit: LineEdit) {.jsfunc.} = if edit.cursor > 0: dec edit.cursor if edit.cursor > edit.shift or edit.shift == 0: - edit.backward0(edit.news[edit.cursor].lwidth()) + edit.backward0(edit.news[edit.cursor].width()) else: edit.fullRedraw() proc forward(edit: LineEdit) {.jsfunc.} = if edit.cursor < edit.news.len: inc edit.cursor - if edit.news.lwidth(edit.shift, edit.cursor) < edit.maxwidth: + if edit.news.width(edit.shift, edit.cursor) < edit.maxwidth: var n = 1 if edit.news.len > edit.cursor: - n = edit.news[edit.cursor].lwidth() + n = edit.news[edit.cursor].width() edit.forward0(n) else: edit.fullRedraw() @@ -265,20 +239,20 @@ proc prevWord(edit: LineEdit, check = none(BoundaryFunction)) {.jsfunc.} = break if edit.cursor != oc: if edit.cursor > edit.shift or edit.shift == 0: - edit.backward0(edit.news.lwidth(edit.cursor, oc)) + edit.backward0(edit.news.width(edit.cursor, oc)) else: edit.fullRedraw() proc nextWord(edit: LineEdit, check = none(BoundaryFunction)) {.jsfunc.} = let oc = edit.cursor - let ow = edit.news.lwidth(edit.shift, edit.cursor) + let ow = edit.news.width(edit.shift, edit.cursor) while edit.cursor < edit.news.len: inc edit.cursor if edit.cursor < edit.news.len: if edit.news[edit.cursor].breaksWord(check): break if edit.cursor != oc: - let dw = edit.news.lwidth(oc, edit.cursor) + let dw = edit.news.width(oc, edit.cursor) if ow + dw < edit.maxwidth: edit.forward0(dw) else: @@ -314,7 +288,7 @@ proc killWord(edit: LineEdit, check = none(BoundaryFunction)) {.jsfunc.} = proc begin(edit: LineEdit) {.jsfunc.} = if edit.cursor > 0: if edit.shift == 0: - edit.backward0(edit.news.lwidth(0, edit.cursor)) + edit.backward0(edit.news.width(0, edit.cursor)) edit.cursor = 0 else: edit.cursor = 0 @@ -322,8 +296,8 @@ proc begin(edit: LineEdit) {.jsfunc.} = proc `end`(edit: LineEdit) {.jsfunc.} = if edit.cursor < edit.news.len: - if edit.news.lwidth(edit.shift, edit.news.len) < edit.maxwidth: - edit.forward0(edit.news.lwidth(edit.cursor, edit.news.len)) + if edit.news.width(edit.shift, edit.news.len) < edit.maxwidth: + edit.forward0(edit.news.width(edit.cursor, edit.news.len)) edit.cursor = edit.news.len else: edit.cursor = edit.news.len @@ -359,15 +333,15 @@ proc readLine*(prompt: string, termwidth: int, current = "", term: Terminal, hist: LineHistory): LineEdit = result = LineEdit( prompt: prompt, - promptw: prompt.lwidth(), + promptw: prompt.width(), current: current, news: current.toRunes(), - minlen: prompt.lwidth(), + minlen: prompt.width(), disallowed: disallowed, hide: hide, term: term ) - result.cursor = result.news.lwidth() + result.cursor = result.news.width() result.maxwidth = termwidth - result.promptw result.displen = result.cursor result.hist = hist diff --git a/src/layout/engine.nim b/src/layout/engine.nim index fbee9f6f..4b8e4784 100644 --- a/src/layout/engine.nim +++ b/src/layout/engine.nim @@ -258,6 +258,7 @@ proc addAtom(ictx: InlineContext, atom: InlineAtom, maxwidth: int, pcomputed, co proc addWord(state: var InlineState) = if state.word.str != "": var word = state.word + word.str.mnormalize() #TODO this may break on EOL. word.height = state.ictx.cellheight word.baseline = word.height state.ictx.addAtom(word, state.maxwidth, state.computed, state.computed) @@ -273,16 +274,21 @@ proc checkWrap(state: var InlineState, r: Rune) = return let shift = state.ictx.computeShift(state.computed) case state.computed{"word-break"} + of WORD_BREAK_NORMAL: + if r.width() == 2: # break cjk + if state.ictx.currentLine.width + state.word.width + shift + r.width() * state.ictx.cellwidth > state.maxwidth: + state.addWord() + state.ictx.finishLine(state.computed, state.maxwidth) + state.ictx.whitespacenum = 0 of WORD_BREAK_BREAK_ALL: if state.ictx.currentLine.width + state.word.width + shift + r.width() * state.ictx.cellwidth > state.maxwidth: state.addWord() - state.ictx.finishLine(state.computed, state.maxwidth, false) + state.ictx.finishLine(state.computed, state.maxwidth) state.ictx.whitespacenum = 0 of WORD_BREAK_KEEP_ALL: if state.ictx.currentLine.width + state.word.width + shift + r.width() * state.ictx.cellwidth > state.maxwidth: - state.ictx.finishLine(state.computed, state.maxwidth, false) + state.ictx.finishLine(state.computed, state.maxwidth) state.ictx.whitespacenum = 0 - else: discard proc processWhitespace(state: var InlineState, c: char) = state.addWord() diff --git a/src/render/rendertext.nim b/src/render/rendertext.nim index 6d2cf9d0..8c50ea16 100644 --- a/src/render/rendertext.nim +++ b/src/render/rendertext.nim @@ -64,9 +64,6 @@ proc renderStream*(grid: var FlexibleGrid, renderer: var StreamRenderer, len: in renderer.spaces = 0 of '\e': renderer.ansiparser.reset() - elif c in Controls: - add_format - grid[^1].str &= '^' & c.getControlLetter() else: add_format grid[^1].str &= c diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index c4605093..48ea54f9 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -9,6 +9,7 @@ import sequtils import options import punycode +import bindings/libunicode import data/idna when defined(posix): @@ -105,7 +106,7 @@ func toLowerAscii2*(str: string): string = for i in i ..< str.len: result[i] = str[i].tolower() -proc toLowerAsciiInPlace*(str: var string) = +proc mtoLowerAscii*(str: var string) = for i in 0 ..< str.len: str[i] = str[i].tolower() @@ -525,8 +526,50 @@ func clearControls*(s: string): string = if c notin Controls: result &= c +proc passRealloc(opaque: pointer, p: pointer, size: csize_t): pointer {.cdecl.} = + return realloc(p, size) + +proc mnormalize*(rs: var seq[Rune], form = UNICODE_NFC) = {.cast(noSideEffect).}: + if rs.len == 0: return + var outbuf: ptr uint32 + let out_len = unicode_normalize(addr outbuf, + cast[ptr uint32](unsafeAddr rs[0]), + cint(rs.len), form, nil, passRealloc) + if out_len < 0: + raise newException(Defect, "Unicode normalization failed") + if out_len == 0: + return + rs = cast[seq[Rune]](newSeqUninitialized[uint32](out_len)) + copyMem(addr rs[0], outbuf, out_len * sizeof(uint32)) + dealloc(outbuf) + +#TODO maybe a utf8 normalization procedure? +proc mnormalize*(s: var string) = + block do_nothing: + for c in s: + if c notin Ascii: + break do_nothing + return # no need to normalize ascii + var rs = s.toRunes() + rs.mnormalize() + s = $rs + +func normalize*(rs: seq[Rune], form = UNICODE_NFC): seq[Rune] = {.cast(noSideEffect).}: + if rs.len == 0: return + var outbuf: ptr uint32 + let out_len = unicode_normalize(addr outbuf, + cast[ptr uint32](unsafeAddr rs[0]), + cint(rs.len), form, nil, passRealloc) + if out_len < 0: + raise newException(Defect, "Unicode normalization failed") + if out_len == 0: + return + result = cast[seq[Rune]](newSeqUninitialized[uint32](out_len)) + copyMem(addr result[0], outbuf, out_len * sizeof(uint32)) + dealloc(outbuf) + func processIdna(str: string, checkhyphens, checkbidi, checkjoiners, transitionalprocessing: bool): Option[string] = - var mapped = "" + var mapped: seq[Rune] var i = 0 while i < str.len: var r: Rune @@ -535,34 +578,48 @@ func processIdna(str: string, checkhyphens, checkbidi, checkjoiners, transitiona case status of IDNA_DISALLOWED: return none(string) #error of IDNA_IGNORED: discard - of IDNA_MAPPED: mapped &= getIdnaMapped(r) + of IDNA_MAPPED: mapped &= getIdnaMapped(r).toRunes() of IDNA_DEVIATION: - if transitionalprocessing: mapped &= getDeviationMapped(r) - else: mapped &= r + if transitionalprocessing: + mapped &= getDeviationMapped(r).toRunes() + else: + mapped &= r of IDNA_VALID: mapped &= r - - #TODO normalize + if mapped.len == 0: return + mapped.mnormalize() + var cr: CharRange + {.cast(noSideEffect).}: + cr_init(addr cr, nil, passRealloc) + assert unicode_general_category(addr cr, "Mark") == 0 var labels: seq[string] - for label in str.split('.'): + for label in ($mapped).split('.'): var s = label if label.startsWith("xn--"): try: s = punycode.decode(label.substr("xn--".len)) except PunyError: return none(string) #error - #TODO check normalization + let x0 = s.toRunes() + block: + let x1 = normalize(x0) + if x0 == x1: + return none(string) #error if checkhyphens: if s.len >= 4 and s[2] == '-' and s[3] == '-': return none(string) #error if s.len > 0 and s[0] == '-' and s[^1] == '-': return none(string) #error - var i = 0 - while i < s.len: - if s[i] == '.': + if x0.len > 0: + let r = x0[0] + for i in 0 ..< cr.len div 2: + #TODO bisearch instead + var a = cast[ptr uint32](cast[int](cr.points) + i * sizeof(uint32) * 2)[] + var b = cast[ptr uint32](cast[int](cr.points) + i * sizeof(uint32) * 2 + 1)[] + if cast[uint32](r) in a .. b: + return none(string) #error + for r in x0: + if r == Rune('.'): return none(string) #error - var r: Rune - fastRuneAt(str, i, r) - #TODO check general category mark let status = getIdnaTableStatus(r) case status of IDNA_DISALLOWED, IDNA_IGNORED, IDNA_MAPPED: @@ -574,6 +631,7 @@ func processIdna(str: string, checkhyphens, checkbidi, checkjoiners, transitiona #TODO check joiners #TODO check bidi labels.add(s) + cr_free(addr cr) return labels.join('.').some func unicodeToAscii*(s: string, checkhyphens, checkbidi, checkjoiners, transitionalprocessing, verifydnslength: bool): Option[string] = @@ -819,10 +877,11 @@ func is_dwidth_cjk(r: Rune): bool = # compute lookup table on startup var width_table*: array[0..0x10FFFF, byte] +# Note: control chars return a width of 2, as we display them as ^{letter}. func makewidthtable*(cjk: bool): array[0..0x10FFFF, byte] {.noInit.} = for r in low(char)..high(char): if r in Controls: - result[int(r)] = 0 + result[int(r)] = 2 else: result[int(r)] = 1 |