about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/bindings/libunicode.nim23
-rw-r--r--src/buffer/container.nim1
-rw-r--r--src/display/term.nim24
-rw-r--r--src/html/dom.nim4
-rw-r--r--src/io/lineedit.nim82
-rw-r--r--src/layout/engine.nim12
-rw-r--r--src/render/rendertext.nim3
-rw-r--r--src/utils/twtstr.nim91
8 files changed, 156 insertions, 84 deletions
diff --git a/src/bindings/libunicode.nim b/src/bindings/libunicode.nim
new file mode 100644
index 00000000..d53fa060
--- /dev/null
+++ b/src/bindings/libunicode.nim
@@ -0,0 +1,23 @@
+type
+  DynBufReallocFunc = proc(opaque: pointer, p: pointer, size: csize_t): pointer {.cdecl.}
+
+  CharRange* = object
+    len*: cint # in points, always even
+    size*: cint
+    points*: ptr uint32 # points sorted by increasing value
+    mem_opaque*: pointer
+    realloc_func*: DynBufReallocFunc
+
+  UnicodeNormalizationEnum* {.size: sizeof(cint).} = enum
+    UNICODE_NFC, UNICODE_NFD, UNICODE_NKFC, UNICODE_NKFD
+
+proc cr_init*(cr: ptr CharRange, mem_opaque: pointer,
+              realloc_func: DynBufReallocFunc) {.importc.}
+
+proc cr_free*(cr: ptr CharRange) {.importc.}
+
+proc unicode_normalize*(pdst: ptr ptr uint32, src: ptr uint32, src_len: cint,
+                        n_type: UnicodeNormalizationEnum, opaque: pointer,
+                        realloc_func: DynBufReallocFunc): cint {.importc.}
+
+proc unicode_general_category*(cr: ptr CharRange, gc_name: cstring): cint {.importc.}
diff --git a/src/buffer/container.nim b/src/buffer/container.nim
index 66ef2780..f85ed39e 100644
--- a/src/buffer/container.nim
+++ b/src/buffer/container.nim
@@ -286,6 +286,7 @@ proc requestLines*(container: Container, w = container.lineWindow): auto {.disca
     container.lineshift = w.a
     for y in 0 ..< min(res.lines.len, w.len):
       container.lines[y] = res.lines[y]
+      container.lines[y].str.mnormalize()
     if res.numLines != container.numLines:
       container.setNumLines(res.numLines, true)
     let cw = container.fromy ..< container.fromy + container.height
diff --git a/src/display/term.nim b/src/display/term.nim
index 965bb6e1..aca20637 100644
--- a/src/display/term.nim
+++ b/src/display/term.nim
@@ -3,11 +3,13 @@ import options
 import os
 import tables
 import terminal
+import unicode
 
 import bindings/termcap
 import buffer/cell
 import config/config
 import io/window
+import utils/twtstr
 import types/color
 
 #TODO switch from termcap...
@@ -318,7 +320,16 @@ proc windowChange*(term: Terminal, attrs: WindowAttributes) =
   term.canvas = newFixedGrid(attrs.width, attrs.height)
   term.cleared = false
 
-func generateFullOutput(term: Terminal, grid: FixedGrid): string =
+proc processOutputString(term: Terminal, str: string): string =
+  if str.validateUtf8() != -1:
+    return "?"
+  for r in str.runes():
+    if r.isControlChar():
+      result &= "^" & getControlLetter(char(r))
+    elif r.width() != 0:
+      result &= r
+
+proc generateFullOutput(term: Terminal, grid: FixedGrid): string =
   var format = newFormat()
   result &= term.cursorGoto(0, 0)
   result &= term.resetFormat()
@@ -331,18 +342,19 @@ func generateFullOutput(term: Terminal, grid: FixedGrid): string =
         inc w
       let cell = grid[y * grid.width + x]
       result &= term.processFormat(format, cell.format)
-      result &= cell.str
+      result &= term.processOutputString(cell.str)
       w += cell.width()
     if y != grid.height - 1:
       result &= "\r\n"
 
-func generateSwapOutput(term: Terminal, grid: FixedGrid, prev: FixedGrid): string =
+proc generateSwapOutput(term: Terminal, grid: FixedGrid, prev: FixedGrid): string =
   var format = newFormat()
   var x = 0
   var w = 0
   var line = ""
   var lr = false
   for i in 0 ..< grid.cells.len:
+    let cell = grid.cells[i]
     while w < x:
       line &= " "
       inc w
@@ -358,9 +370,9 @@ func generateSwapOutput(term: Terminal, grid: FixedGrid, prev: FixedGrid): strin
       w = 0
       line = ""
     lr = lr or (grid[i] != prev[i])
-    line &= term.processFormat(format, grid.cells[i].format)
-    line &= grid.cells[i].str
-    w += grid.cells[i].width()
+    line &= term.processFormat(format, cell.format)
+    line &= term.processOutputString(cell.str)
+    w += cell.width()
     inc x
   if lr:
     result &= term.cursorGoto(0, grid.height - 1)
diff --git a/src/html/dom.nim b/src/html/dom.nim
index ed96173d..dd273b71 100644
--- a/src/html/dom.nim
+++ b/src/html/dom.nim
@@ -774,14 +774,14 @@ func getElementsByClassName(node: Node, classNames: string): HTMLCollection {.js
   let isquirks = node.document.mode == QUIRKS
   if isquirks:
     for i in 0 .. classes.high:
-      classes[i].toLowerAsciiInPlace()
+      classes[i].mtoLowerAscii()
   return newCollection[HTMLCollection](node,
     func(node: Node): bool =
       if node.nodeType == ELEMENT_NODE:
         if isquirks:
           var cl = Element(node).classList
           for i in 0 .. cl.high:
-            cl[i].toLowerAsciiInPlace()
+            cl[i].mtoLowerAscii()
           for class in classes:
             if class notin cl:
               return false
diff --git a/src/io/lineedit.nim b/src/io/lineedit.nim
index 8d18ae25..ee25d9a3 100644
--- a/src/io/lineedit.nim
+++ b/src/io/lineedit.nim
@@ -39,32 +39,6 @@ type
 func newLineHistory*(): LineHistory =
   return LineHistory()
 
-func lwidth(r: Rune): int =
-  if r.isControlChar():
-    return 2
-  return r.width()
-
-func lwidth(s: string): int =
-  for r in s.runes():
-    result += lwidth(r)
-
-func lwidth(s: seq[Rune]): int =
-  for r in s:
-    result += lwidth(r)
-
-func lwidth(s: seq[Rune], min, max: int): int =
-  var i = min
-  var mi = min(max, s.len)
-  while i < mi:
-    result += lwidth(s[i])
-    inc i
-
-func lwidth(s: seq[Rune], min: int): int =
-  var i = min
-  while i < s.len:
-    result += lwidth(s[i])
-    inc i
-
 const colorFormat = (func(): Format =
   result = newFormat()
   result.fgcolor = ColorsANSIFg[4] # blue
@@ -97,7 +71,7 @@ template kill0(edit: LineEdit, i: int) =
   edit.backward0(i)
 
 template kill0(edit: LineEdit) =
-  let w = min(edit.news.lwidth(edit.cursor), edit.displen)
+  let w = min(edit.news.width(edit.cursor), edit.displen)
   edit.kill0(w)
 
 proc backward0(state: LineEdit, i: int) =
@@ -118,41 +92,41 @@ proc generateOutput*(edit: LineEdit): FixedGrid =
   var x = 0
   for r in edit.prompt.runes():
     result[x].str &= $r
-    x += r.lwidth()
+    x += r.width()
   if edit.hide:
     for r in edit.news:
-      let w = r.lwidth()
+      let w = r.width()
       result[x].str = '*'.repeat(w)
       x += w
       if x >= result.width: break
   else:
     for r in edit.news:
       result[x].str &= $r
-      x += r.lwidth()
+      x += r.width()
       if x >= result.width: break
   var s = ""
   for c in result:
     s &= c.str
 
 proc getCursorX*(edit: LineEdit): int =
-  return edit.promptw + edit.news.lwidth(edit.shift, edit.cursor)
+  return edit.promptw + edit.news.width(edit.shift, edit.cursor)
 
 proc redraw(state: LineEdit) =
   if state.shift + state.displen > state.news.len:
     state.displen = state.news.len - state.shift
-  var dispw = state.news.lwidth(state.shift, state.shift + state.displen)
+  var dispw = state.news.width(state.shift, state.shift + state.displen)
   while dispw > state.maxwidth - 1:
-    dispw -= state.news[state.shift + state.displen - 1].lwidth()
+    dispw -= state.news[state.shift + state.displen - 1].width()
     dec state.displen
   state.begin0()
   let os = state.news.substr(state.shift, state.shift + state.displen)
   if state.hide:
-    state.printesc('*'.repeat(os.lwidth()))
+    state.printesc('*'.repeat(os.width()))
   else:
     state.printesc(os)
-  state.space(max(state.maxwidth - state.minlen - os.lwidth(), 0))
+  state.space(max(state.maxwidth - state.minlen - os.width(), 0))
   state.begin0()
-  state.forward0(state.news.lwidth(state.shift, state.cursor))
+  state.forward0(state.news.width(state.shift, state.cursor))
 
 proc zeroShiftRedraw(state: LineEdit) =
   state.shift = 0
@@ -162,10 +136,10 @@ proc zeroShiftRedraw(state: LineEdit) =
 proc fullRedraw(state: LineEdit) =
   state.displen = state.news.len
   if state.cursor > state.shift:
-    var shiftw = state.news.lwidth(state.shift, state.cursor)
+    var shiftw = state.news.width(state.shift, state.cursor)
     while shiftw > state.maxwidth - 1:
       inc state.shift
-      shiftw -= state.news[state.shift].lwidth()
+      shiftw -= state.news[state.shift].width()
   else:
     state.shift = max(state.cursor - 1, 0)
   state.redraw()
@@ -177,11 +151,11 @@ proc insertCharseq(edit: LineEdit, cs: var seq[Rune]) =
   if cs.len == 0:
     return
 
-  if edit.cursor >= edit.news.len and edit.news.lwidth(edit.shift, edit.cursor) + cs.lwidth() < edit.maxwidth:
+  if edit.cursor >= edit.news.len and edit.news.width(edit.shift, edit.cursor) + cs.width() < edit.maxwidth:
     edit.news &= cs
     edit.cursor += cs.len
     if edit.hide:
-      edit.printesc('*'.repeat(cs.lwidth()))
+      edit.printesc('*'.repeat(cs.width()))
     else:
       edit.printesc(cs)
   else:
@@ -200,7 +174,7 @@ proc submit(edit: LineEdit) {.jsfunc.} =
 
 proc backspace(edit: LineEdit) {.jsfunc.} =
   if edit.cursor > 0:
-    let w = edit.news[edit.cursor - 1].lwidth()
+    let w = edit.news[edit.cursor - 1].width()
     edit.news.delete(edit.cursor - 1..edit.cursor - 1)
     dec edit.cursor
     if edit.cursor == edit.news.len and edit.shift == 0:
@@ -217,7 +191,7 @@ proc write*(edit: LineEdit, s: string): bool {.jsfunc.} =
 
 proc delete(edit: LineEdit) {.jsfunc.} =
   if edit.cursor >= 0 and edit.cursor < edit.news.len:
-    let w = edit.news[edit.cursor].lwidth()
+    let w = edit.news[edit.cursor].width()
     edit.news.delete(edit.cursor..edit.cursor)
     if edit.cursor == edit.news.len and edit.shift == 0:
       edit.kill0(w)
@@ -242,17 +216,17 @@ proc backward(edit: LineEdit) {.jsfunc.} =
   if edit.cursor > 0:
     dec edit.cursor
     if edit.cursor > edit.shift or edit.shift == 0:
-      edit.backward0(edit.news[edit.cursor].lwidth())
+      edit.backward0(edit.news[edit.cursor].width())
     else:
       edit.fullRedraw()
 
 proc forward(edit: LineEdit) {.jsfunc.} =
   if edit.cursor < edit.news.len:
     inc edit.cursor
-    if edit.news.lwidth(edit.shift, edit.cursor) < edit.maxwidth:
+    if edit.news.width(edit.shift, edit.cursor) < edit.maxwidth:
       var n = 1
       if edit.news.len > edit.cursor:
-        n = edit.news[edit.cursor].lwidth()
+        n = edit.news[edit.cursor].width()
       edit.forward0(n)
     else:
       edit.fullRedraw()
@@ -265,20 +239,20 @@ proc prevWord(edit: LineEdit, check = none(BoundaryFunction)) {.jsfunc.} =
       break
   if edit.cursor != oc:
     if edit.cursor > edit.shift or edit.shift == 0:
-      edit.backward0(edit.news.lwidth(edit.cursor, oc))
+      edit.backward0(edit.news.width(edit.cursor, oc))
     else:
       edit.fullRedraw()
 
 proc nextWord(edit: LineEdit, check = none(BoundaryFunction)) {.jsfunc.} =
   let oc = edit.cursor
-  let ow = edit.news.lwidth(edit.shift, edit.cursor)
+  let ow = edit.news.width(edit.shift, edit.cursor)
   while edit.cursor < edit.news.len:
     inc edit.cursor
     if edit.cursor < edit.news.len:
       if edit.news[edit.cursor].breaksWord(check):
         break
   if edit.cursor != oc:
-    let dw = edit.news.lwidth(oc, edit.cursor)
+    let dw = edit.news.width(oc, edit.cursor)
     if ow + dw < edit.maxwidth:
       edit.forward0(dw)
     else:
@@ -314,7 +288,7 @@ proc killWord(edit: LineEdit, check = none(BoundaryFunction)) {.jsfunc.} =
 proc begin(edit: LineEdit) {.jsfunc.} =
   if edit.cursor > 0:
     if edit.shift == 0:
-      edit.backward0(edit.news.lwidth(0, edit.cursor))
+      edit.backward0(edit.news.width(0, edit.cursor))
       edit.cursor = 0
     else:
       edit.cursor = 0
@@ -322,8 +296,8 @@ proc begin(edit: LineEdit) {.jsfunc.} =
 
 proc `end`(edit: LineEdit) {.jsfunc.} =
   if edit.cursor < edit.news.len:
-    if edit.news.lwidth(edit.shift, edit.news.len) < edit.maxwidth:
-      edit.forward0(edit.news.lwidth(edit.cursor, edit.news.len))
+    if edit.news.width(edit.shift, edit.news.len) < edit.maxwidth:
+      edit.forward0(edit.news.width(edit.cursor, edit.news.len))
       edit.cursor = edit.news.len
     else:
       edit.cursor = edit.news.len
@@ -359,15 +333,15 @@ proc readLine*(prompt: string, termwidth: int, current = "",
                term: Terminal, hist: LineHistory): LineEdit =
   result = LineEdit(
     prompt: prompt,
-    promptw: prompt.lwidth(),
+    promptw: prompt.width(),
     current: current,
     news: current.toRunes(),
-    minlen: prompt.lwidth(),
+    minlen: prompt.width(),
     disallowed: disallowed,
     hide: hide,
     term: term
   )
-  result.cursor = result.news.lwidth()
+  result.cursor = result.news.width()
   result.maxwidth = termwidth - result.promptw
   result.displen = result.cursor
   result.hist = hist
diff --git a/src/layout/engine.nim b/src/layout/engine.nim
index fbee9f6f..4b8e4784 100644
--- a/src/layout/engine.nim
+++ b/src/layout/engine.nim
@@ -258,6 +258,7 @@ proc addAtom(ictx: InlineContext, atom: InlineAtom, maxwidth: int, pcomputed, co
 proc addWord(state: var InlineState) =
   if state.word.str != "":
     var word = state.word
+    word.str.mnormalize() #TODO this may break on EOL.
     word.height = state.ictx.cellheight
     word.baseline = word.height
     state.ictx.addAtom(word, state.maxwidth, state.computed, state.computed)
@@ -273,16 +274,21 @@ proc checkWrap(state: var InlineState, r: Rune) =
     return
   let shift = state.ictx.computeShift(state.computed)
   case state.computed{"word-break"}
+  of WORD_BREAK_NORMAL:
+    if r.width() == 2: # break cjk
+      if state.ictx.currentLine.width + state.word.width + shift + r.width() * state.ictx.cellwidth > state.maxwidth:
+        state.addWord()
+        state.ictx.finishLine(state.computed, state.maxwidth)
+        state.ictx.whitespacenum = 0
   of WORD_BREAK_BREAK_ALL:
     if state.ictx.currentLine.width + state.word.width + shift + r.width() * state.ictx.cellwidth > state.maxwidth:
       state.addWord()
-      state.ictx.finishLine(state.computed, state.maxwidth, false)
+      state.ictx.finishLine(state.computed, state.maxwidth)
       state.ictx.whitespacenum = 0
   of WORD_BREAK_KEEP_ALL:
     if state.ictx.currentLine.width + state.word.width + shift + r.width() * state.ictx.cellwidth > state.maxwidth:
-      state.ictx.finishLine(state.computed, state.maxwidth, false)
+      state.ictx.finishLine(state.computed, state.maxwidth)
       state.ictx.whitespacenum = 0
-  else: discard
 
 proc processWhitespace(state: var InlineState, c: char) =
   state.addWord()
diff --git a/src/render/rendertext.nim b/src/render/rendertext.nim
index 6d2cf9d0..8c50ea16 100644
--- a/src/render/rendertext.nim
+++ b/src/render/rendertext.nim
@@ -64,9 +64,6 @@ proc renderStream*(grid: var FlexibleGrid, renderer: var StreamRenderer, len: in
           renderer.spaces = 0
       of '\e':
         renderer.ansiparser.reset()
-      elif c in Controls:
-        add_format
-        grid[^1].str &= '^' & c.getControlLetter()
       else:
         add_format
         grid[^1].str &= c
diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim
index c4605093..48ea54f9 100644
--- a/src/utils/twtstr.nim
+++ b/src/utils/twtstr.nim
@@ -9,6 +9,7 @@ import sequtils
 import options
 import punycode
 
+import bindings/libunicode
 import data/idna
 
 when defined(posix):
@@ -105,7 +106,7 @@ func toLowerAscii2*(str: string): string =
   for i in i ..< str.len:
     result[i] = str[i].tolower()
 
-proc toLowerAsciiInPlace*(str: var string) =
+proc mtoLowerAscii*(str: var string) =
   for i in 0 ..< str.len:
     str[i] = str[i].tolower()
 
@@ -525,8 +526,50 @@ func clearControls*(s: string): string =
     if c notin Controls:
       result &= c
 
+proc passRealloc(opaque: pointer, p: pointer, size: csize_t): pointer {.cdecl.} =
+  return realloc(p, size)
+
+proc mnormalize*(rs: var seq[Rune], form = UNICODE_NFC) = {.cast(noSideEffect).}:
+  if rs.len == 0: return
+  var outbuf: ptr uint32
+  let out_len = unicode_normalize(addr outbuf,
+                                  cast[ptr uint32](unsafeAddr rs[0]),
+                                  cint(rs.len), form, nil, passRealloc)
+  if out_len < 0:
+    raise newException(Defect, "Unicode normalization failed")
+  if out_len == 0:
+    return
+  rs = cast[seq[Rune]](newSeqUninitialized[uint32](out_len))
+  copyMem(addr rs[0], outbuf, out_len * sizeof(uint32))
+  dealloc(outbuf)
+
+#TODO maybe a utf8 normalization procedure?
+proc mnormalize*(s: var string) =
+  block do_nothing:
+    for c in s:
+      if c notin Ascii:
+        break do_nothing
+    return # no need to normalize ascii
+  var rs = s.toRunes()
+  rs.mnormalize()
+  s = $rs
+
+func normalize*(rs: seq[Rune], form = UNICODE_NFC): seq[Rune] = {.cast(noSideEffect).}:
+  if rs.len == 0: return
+  var outbuf: ptr uint32
+  let out_len = unicode_normalize(addr outbuf,
+                                  cast[ptr uint32](unsafeAddr rs[0]),
+                                  cint(rs.len), form, nil, passRealloc)
+  if out_len < 0:
+    raise newException(Defect, "Unicode normalization failed")
+  if out_len == 0:
+    return
+  result = cast[seq[Rune]](newSeqUninitialized[uint32](out_len))
+  copyMem(addr result[0], outbuf, out_len * sizeof(uint32))
+  dealloc(outbuf)
+
 func processIdna(str: string, checkhyphens, checkbidi, checkjoiners, transitionalprocessing: bool): Option[string] =
-  var mapped = ""
+  var mapped: seq[Rune]
   var i = 0
   while i < str.len:
     var r: Rune
@@ -535,34 +578,48 @@ func processIdna(str: string, checkhyphens, checkbidi, checkjoiners, transitiona
     case status
     of IDNA_DISALLOWED: return none(string) #error
     of IDNA_IGNORED: discard
-    of IDNA_MAPPED: mapped &= getIdnaMapped(r)
+    of IDNA_MAPPED: mapped &= getIdnaMapped(r).toRunes()
     of IDNA_DEVIATION:
-      if transitionalprocessing: mapped &= getDeviationMapped(r)
-      else: mapped &= r
+      if transitionalprocessing:
+        mapped &= getDeviationMapped(r).toRunes()
+      else:
+        mapped &= r
     of IDNA_VALID: mapped &= r
-  
-  #TODO normalize
+  if mapped.len == 0: return
+  mapped.mnormalize()
+  var cr: CharRange
+  {.cast(noSideEffect).}:
+    cr_init(addr cr, nil, passRealloc)
+    assert unicode_general_category(addr cr, "Mark") == 0
   var labels: seq[string]
-  for label in str.split('.'):
+  for label in ($mapped).split('.'):
     var s = label
     if label.startsWith("xn--"):
       try:
         s = punycode.decode(label.substr("xn--".len))
       except PunyError:
         return none(string) #error
-    #TODO check normalization
+    let x0 = s.toRunes()
+    block:
+      let x1 = normalize(x0)
+      if x0 == x1:
+        return none(string) #error
     if checkhyphens:
       if s.len >= 4 and s[2] == '-' and s[3] == '-':
         return none(string) #error
       if s.len > 0 and s[0] == '-' and s[^1] == '-':
         return none(string) #error
-    var i = 0
-    while i < s.len:
-      if s[i] == '.':
+    if x0.len > 0:
+      let r = x0[0]
+      for i in 0 ..< cr.len div 2:
+        #TODO bisearch instead
+        var a = cast[ptr uint32](cast[int](cr.points) + i * sizeof(uint32) * 2)[]
+        var b = cast[ptr uint32](cast[int](cr.points) + i * sizeof(uint32) * 2 + 1)[]
+        if cast[uint32](r) in a .. b:
+          return none(string) #error
+    for r in x0:
+      if r == Rune('.'):
         return none(string) #error
-      var r: Rune
-      fastRuneAt(str, i, r)
-      #TODO check general category mark
       let status = getIdnaTableStatus(r)
       case status
       of IDNA_DISALLOWED, IDNA_IGNORED, IDNA_MAPPED:
@@ -574,6 +631,7 @@ func processIdna(str: string, checkhyphens, checkbidi, checkjoiners, transitiona
       #TODO check joiners
       #TODO check bidi
     labels.add(s)
+  cr_free(addr cr)
   return labels.join('.').some
 
 func unicodeToAscii*(s: string, checkhyphens, checkbidi, checkjoiners, transitionalprocessing, verifydnslength: bool): Option[string] =
@@ -819,10 +877,11 @@ func is_dwidth_cjk(r: Rune): bool =
 # compute lookup table on startup
 var width_table*: array[0..0x10FFFF, byte]
 
+# Note: control chars return a width of 2, as we display them as ^{letter}.
 func makewidthtable*(cjk: bool): array[0..0x10FFFF, byte] {.noInit.} =
   for r in low(char)..high(char):
     if r in Controls:
-      result[int(r)] = 0
+      result[int(r)] = 2
     else:
       result[int(r)] = 1