about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-12-22 16:35:05 +0100
committerbptato <nincsnevem662@gmail.com>2024-12-22 19:27:43 +0100
commit1d1b7854e2397b4aef915d5a15c2796d89349a9b (patch)
tree032d8aac7bed24a69df13094321ad9edf0a53a46 /src
parent9c11f691bf781f21dc7ec9628238d06eba6391dc (diff)
downloadchawan-1d1b7854e2397b4aef915d5a15c2796d89349a9b.tar.gz
Misc character display fixes
* fix luwrap comparison function
* strip high unicode controls in term/pager
* use wcwidth in dirlist2html
* remove combining category from strwidth
* remove unused "disallowed" field from lineedit

My initial goal with switching to wcwidth in dirlist2html was just to
get rid of the outdated combining map in charwidth_gen.  Then I realized
that layout will normalize those out anyway, so we don't actually have
to deal with them anywhere.

Also, I found a few bugs in the process; high unicode control chars not
being stripped out was a particularly bad one, as it allows for pages to
mess up the terminal output. (Easiest way to replicate: just open a
random binary file without this patch.)
Diffstat (limited to 'src')
-rw-r--r--src/local/container.nim4
-rw-r--r--src/local/lineedit.nim16
-rw-r--r--src/local/pager.nim6
-rw-r--r--src/local/select.nim4
-rw-r--r--src/local/term.nim2
-rw-r--r--src/server/loader.nim2
-rw-r--r--src/utils/luwrap.nim2
-rw-r--r--src/utils/proptable.nim21
-rw-r--r--src/utils/strwidth.nim19
-rw-r--r--src/utils/twtstr.nim26
10 files changed, 35 insertions, 67 deletions
diff --git a/src/local/container.nim b/src/local/container.nim
index 7e43f296..36d6f91d 100644
--- a/src/local/container.nim
+++ b/src/local/container.nim
@@ -1789,8 +1789,8 @@ proc drawLines*(container: Container; display: var FixedGrid; hlcolor: CellColor
       if nf.pos != -1 and nf.pos <= pw:
         cf = nf
         nf = line.findNextFormat(pw)
-      if u <= 0xFF and char(u) in Controls:
-        display[dls + k].str &= '^' & char(u).getControlLetter()
+      if u.isControlChar():
+        display[dls + k].str = u.controlToVisual()
       elif u in TabPUARange:
         for i in 0 ..< uw:
           display[dls + k].str &= ' '
diff --git a/src/local/lineedit.nim b/src/local/lineedit.nim
index 30c672d9..4346ddd8 100644
--- a/src/local/lineedit.nim
+++ b/src/local/lineedit.nim
@@ -29,7 +29,6 @@ type
     shifti: int # 0 ..< news.len
     padding: int # 0 or 1
     maxwidth: int
-    disallowed: set[char]
     hide: bool
     hist: History
     currHist: HistoryEntry
@@ -95,10 +94,8 @@ proc generateOutput*(edit: LineEdit): FixedGrid =
       let w = u.width()
       if x + w > result.width: break
       if u.isControlChar():
-        result[x].str &= '^'
-        inc x
-        result[x].str &= char(u).getControlLetter()
-        inc x
+        result[x].str = u.controlToVisual()
+        x += result[x].str.len
       else:
         for j in pi ..< i:
           result[x].str &= edit.news[j]
@@ -112,10 +109,6 @@ proc getCursorX*(edit: LineEdit): int =
   return edit.promptw + edit.cursorx + edit.padding - edit.shiftx
 
 proc insertCharseq(edit: LineEdit; s: string) =
-  let s = if edit.escNext:
-    s
-  else:
-    deleteChars(s, edit.disallowed)
   edit.escNext = false
   if s.len == 0:
     return
@@ -301,14 +294,13 @@ proc nextHist(edit: LineEdit) {.jsfunc.} =
 proc windowChange*(edit: LineEdit; attrs: WindowAttributes) =
   edit.maxwidth = attrs.width - edit.promptw - 1
 
-proc readLine*(prompt, current: string; termwidth: int; disallowed: set[char];
-    hide: bool; hist: History; luctx: LUContext): LineEdit =
+proc readLine*(prompt, current: string; termwidth: int; hide: bool;
+    hist: History; luctx: LUContext): LineEdit =
   let promptw = prompt.width()
   return LineEdit(
     prompt: prompt,
     promptw: promptw,
     news: current,
-    disallowed: disallowed,
     hide: hide,
     redraw: true,
     cursori: current.len,
diff --git a/src/local/pager.nim b/src/local/pager.nim
index dfd73b05..06534bc5 100644
--- a/src/local/pager.nim
+++ b/src/local/pager.nim
@@ -356,7 +356,7 @@ proc setLineEdit(pager: Pager; mode: LineMode; current = ""; hide = false;
   if pager.term.isatty() and pager.config.input.use_mouse:
     pager.term.disableMouse()
   pager.lineedit = readLine($mode & extraPrompt, current, pager.attrs.width,
-    {}, hide, hist, pager.luctx)
+    hide, hist, pager.luctx)
   pager.linemode = mode
 
 # Reuse the line editor as an alert message viewer.
@@ -892,9 +892,7 @@ proc writeStatusMessage(pager: Pager; str: string; format = Format();
       x = lx + 1 # clip must be 1 cell wide
       break
     if u.isControlChar():
-      pager.status.grid[x].str = "^"
-      pager.status.grid[x + 1].str = $getControlLetter(char(u))
-      pager.status.grid[x + 1].format = format
+      pager.status.grid[x].str = u.controlToVisual()
     else:
       pager.status.grid[x].str = u.toUTF8()
     pager.status.grid[x].format = format
diff --git a/src/local/select.nim b/src/local/select.nim
index f98fb944..eba97c27 100644
--- a/src/local/select.nim
+++ b/src/local/select.nim
@@ -351,8 +351,8 @@ proc drawSelect*(select: Select; display: var FixedGrid) =
       if nx > ex:
         break
       display[dls + x].str = ""
-      if u <= 0xFF and char(u) in Controls:
-        display[dls + x].str &= '^' & char(u).getControlLetter()
+      if u.isControlChar():
+        display[dls + x].str &= u.controlToVisual()
       else:
         for l in pj ..< j:
           display[dls + x].str &= select.options[i].s[l]
diff --git a/src/local/term.nim b/src/local/term.nim
index 6821de97..b30c1a56 100644
--- a/src/local/term.nim
+++ b/src/local/term.nim
@@ -536,7 +536,7 @@ proc processOutputString*(res: var string; term: Terminal; s: openArray[char];
     return
   if w != -1:
     for u in s.points:
-      assert u > 0x7F or char(u) notin Controls
+      assert u > 0x9F or u != 0x7F and u > 0x1F
       w += u.width()
   let L = res.len
   res.setLen(L + s.len)
diff --git a/src/server/loader.nim b/src/server/loader.nim
index 31c4f767..f68c8f30 100644
--- a/src/server/loader.nim
+++ b/src/server/loader.nim
@@ -1005,7 +1005,7 @@ proc loadDataSend(ctx: LoaderContext; handle: InputHandle; s, ct: string) =
 proc loadData(ctx: LoaderContext; handle: InputHandle; request: Request) =
   let url = request.url
   var ct = url.pathname.until(',')
-  if AllChars - Ascii + Controls - {'\t', ' '} in ct:
+  if AllChars - Ascii + Controls - {'\t'} in ct:
     handle.sendResult(ceInvalidURL, "invalid data URL")
     handle.close()
     return
diff --git a/src/utils/luwrap.nim b/src/utils/luwrap.nim
index 06cf8d6e..06057d8d 100644
--- a/src/utils/luwrap.nim
+++ b/src/utils/luwrap.nim
@@ -67,7 +67,7 @@ type u32pair* {.packed.} = object
 func cmpRange*(x: u32pair; y: uint32): int =
   if x.a > y:
     return 1
-  elif x.b < y:
+  elif x.b <= y:
     return -1
   return 0
 
diff --git a/src/utils/proptable.nim b/src/utils/proptable.nim
index 02a342f5..57e49f0e 100644
--- a/src/utils/proptable.nim
+++ b/src/utils/proptable.nim
@@ -8,27 +8,6 @@ type
   PropertyTable* = array[0x10000 div (sizeof(ptint) * 8), ptint]
   RangeMap* = openArray[(uint32, uint32)]
 
-func makePropertyTable*(ranges: RangeMap; skip: RangeMap = @[]): PropertyTable =
-  var ucs: uint32 = 0
-  var j = 0
-  var k = 0
-  while ucs <= 0xFFFF:
-    if k > ranges.len:
-      break
-    if ranges[k][0] > ucs:
-      ucs = ranges[k][0]
-      continue
-    if ranges[k][1] < ucs:
-      inc k
-      continue
-    if j != skip.len and ucs == skip[j][0]:
-      ucs = skip[j][1] + 1
-      continue
-    let i = ucs div (sizeof(ptint) * 8)
-    let m = ucs mod (sizeof(ptint) * 8)
-    result[i] = result[i] or ptint(1 shl m)
-    inc ucs
-
 {.push boundChecks:off.}
 func contains*(props: PropertyTable; u: ptint): bool {.inline.} =
   const isz = sizeof(ptint) * 8
diff --git a/src/utils/strwidth.nim b/src/utils/strwidth.nim
index 7276c587..ea26c852 100644
--- a/src/utils/strwidth.nim
+++ b/src/utils/strwidth.nim
@@ -30,29 +30,24 @@ func tabPUAPoint*(n: int): uint32 =
   assert u in TabPUARange
   return u
 
-# One of the few global variables in the code. Honestly, it should not exist.
-var isCJKAmbiguous* = false
+var isCJKAmbiguous* {.global.} = false
 
 # Warning: this shouldn't be called without normalization.
 func width*(u: uint32): int =
   if u <= 0xFFFF: # fast path for BMP
-    if u in CombiningTable:
-      return 0
     if u in DoubleWidthTable:
       return 2
+    if u in 0x80u32 .. 0x9Fu32:
+      # Represent Unicode control chars as [XX] where X is a hex digit.
+      return 4
     if u in TabPUARange:
       return int(((u - TabPUARange.a) and 7) + 1)
-    {.cast(noSideEffect).}:
-      if isCJKAmbiguous and DoubleWidthAmbiguousRanges.isInRange(u):
-        return 2
   else:
-    if Combining.isInRange(u):
-      return 0
     if DoubleWidthRanges.isInRange(u):
       return 2
-    {.cast(noSideEffect).}:
-      if isCJKAmbiguous and DoubleWidthAmbiguousRanges.isInRange(u):
-        return 2
+  {.cast(noSideEffect).}:
+    if isCJKAmbiguous and DoubleWidthAmbiguousRanges.isInRange(u):
+      return 2
   return 1
 
 func width*(s: openArray[char]): int =
diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim
index 18f7dd13..06e5279e 100644
--- a/src/utils/twtstr.nim
+++ b/src/utils/twtstr.nim
@@ -121,18 +121,13 @@ func onlyWhitespace*(s: string): bool =
   return AllChars - AsciiWhitespace notin s
 
 func isControlChar*(u: uint32): bool =
-  return u <= 0x1F or u == 0x7F
+  return u <= 0x1F or u >= 0x7F and u <= 0x9F
 
 func getControlChar*(c: char): char =
   if c == '?':
     return char(127)
   return char(int(c) and 0x1F)
 
-func getControlLetter*(c: char): char =
-  if c == char(127):
-    return '?'
-  return char(int(c) or 0x40)
-
 func toHeaderCase*(s: string): string =
   result = s
   var flip = true
@@ -210,6 +205,16 @@ func toHexLower*(u: uint16): string =
     x = x shr 4
   return s
 
+func controlToVisual*(u: uint32): string =
+  if u <= 0x1F:
+    return "^" & char(u or 0x40)
+  if u == 0x7F:
+    return "^?"
+  var res = "["
+  res.pushHex(uint8(u))
+  res &= ']'
+  return res
+
 proc add*(s: var string; u: uint8) =
   s.addInt(uint64(u))
 
@@ -603,12 +608,11 @@ func deleteChars*(s: openArray[char]; todel: set[char]): string =
 
 func replaceControls*(s: openArray[char]): string =
   result = newStringOfCap(s.len)
-  for c in s:
-    if c in Controls:
-      result &= '^'
-      result &= c.getControlLetter()
+  for u in s.points:
+    if u.isControlChar():
+      result &= u.controlToVisual()
     else:
-      result &= c
+      result.addUTF8(u)
 
 #https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart/form-data-encoding-algorithm
 proc makeCRLF*(s: openArray[char]): string =