layout, pager: preserve tabs on display, selection & output

Substitute tabs with one of eight PUA characters based on their width, and convert them back in the pager: * TUI mode always prints spaces, but now handles tabs appropriately on cursor movement * dump mode tries to preserve hard tabs, but uses soft tabs when that is not possible (e.g. tabs after a margin, tab with background color, etc) * selection mode always outputs hard tabs.
author: bptato <nincsnevem662@gmail.com> 2024-10-10 17:22:15 +0200
committer: bptato <nincsnevem662@gmail.com> 2024-10-10 17:31:18 +0200
commit: 8c64d7cb4e54c289a6c18f6c144125196d888296 (patch)
tree: 4cc8de480912beebe62ccf9cf9ba826dc160fe2c /src/utils
parent: cf31d252ffe0aa195909a03efa714c6fc6e5fddf (diff)
download: chawan-8c64d7cb4e54c289a6c18f6c144125196d888296.tar.gz
1 files changed, 43 insertions, 0 deletions
diff --git a/src/utils/strwidth.nim b/src/utils/strwidth.nim
index df84dbad..a651162b 100644
--- a/src/utils/strwidth.nim
+++ b/src/utils/strwidth.nim
@@ -4,6 +4,32 @@ import utils/twtuni
 
 include res/map/charwidth_gen
 
+# Tabs are a bit of a problem: depending on their position in text, they may
+# represent one to eight characters. Inferring their size after layout is wrong
+# because a rendered line is obviously not equivalent to a CSS line.
+#
+# In the past, we worked around this by always passing the string's current
+# width to width(), but this only ever worked properly for plain text documents,
+# which we no longer distinguish from HTML.
+#
+# So now, to preserve tabs, we do the following:
+#
+# * Define Unicode PUA U+E000 to U+E007 as a tab range. The final digit
+#   represents the number of characters the tab occupies, minus one. (Tab size
+#   ranges from 1 char to 8 chars.)
+# * In layout, replace characters in this range with U+FFFD. Then, translate
+#   literal tabs into the range depending on their width in the document.
+# * In width(), substitute the size of these characters accordingly.
+# * Finally, in buffer drawing code, translate the range back into the necessary
+#   number of spaces - except in dump mode, where properly aligned tabs become
+#   hard tabs, and in selection mode, where *all* tabs become hard tabs.
+const TabPUARange* = 0xE000u32 .. 0xE007u32
+
+func tabPUAPoint*(n: int): uint32 =
+  let u = 0xE000 + uint32(n) - 1
+  assert u in TabPUARange
+  return u
+
 # One of the few global variables in the code. Honestly, it should not exist.
 var isCJKAmbiguous* = false
 
@@ -14,6 +40,8 @@ func width*(u: uint32): int =
       return 0
     if u in DoubleWidthTable:
       return 2
+    if u in TabPUARange:
+      return int(((u - TabPUARange.a) and 7) + 1)
     {.cast(noSideEffect).}:
       if isCJKAmbiguous and DoubleWidthAmbiguousRanges.isInRange(u):
         return 2
@@ -65,3 +93,18 @@ func padToWidth*(s: string; size: int; schar = '$'): string =
   while w < size:
     result &= ' '
     inc w
+
+# Expand all PUA tabs into hard tabs, disregarding their position.
+# (This is mainly intended for copy/paste, where the actual characters
+# are more interesting than cell alignment.)
+func expandPUATabsHard*(s: openArray[char]): string =
+  var res = newStringOfCap(s.len)
+  var i = 0
+  while i < s.len:
+    let pi = i
+    if s.nextUTF8(i) in TabPUARange:
+      res &= '\t'
+    else:
+      for j in pi ..< i:
+        res &= s[j]
+  return res
author	bptato <nincsnevem662@gmail.com>	2024-10-10 17:22:15 +0200
committer	bptato <nincsnevem662@gmail.com>	2024-10-10 17:31:18 +0200
commit	8c64d7cb4e54c289a6c18f6c144125196d888296 (patch)
tree	4cc8de480912beebe62ccf9cf9ba826dc160fe2c /src/utils
parent	cf31d252ffe0aa195909a03efa714c6fc6e5fddf (diff)
download	chawan-8c64d7cb4e54c289a6c18f6c144125196d888296.tar.gz