about summary refs log tree commit diff stats
path: root/src/utils
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-10-10 17:22:15 +0200
committerbptato <nincsnevem662@gmail.com>2024-10-10 17:31:18 +0200
commit8c64d7cb4e54c289a6c18f6c144125196d888296 (patch)
tree4cc8de480912beebe62ccf9cf9ba826dc160fe2c /src/utils
parentcf31d252ffe0aa195909a03efa714c6fc6e5fddf (diff)
downloadchawan-8c64d7cb4e54c289a6c18f6c144125196d888296.tar.gz
layout, pager: preserve tabs on display, selection & output
Substitute tabs with one of eight PUA characters based on their width,
and convert them back in the pager:

* TUI mode always prints spaces, but now handles tabs appropriately on
  cursor movement
* dump mode tries to preserve hard tabs, but uses soft tabs when that is
  not possible (e.g. tabs after a margin, tab with background color,
  etc)
* selection mode always outputs hard tabs.
Diffstat (limited to 'src/utils')
-rw-r--r--src/utils/strwidth.nim43
1 files changed, 43 insertions, 0 deletions
diff --git a/src/utils/strwidth.nim b/src/utils/strwidth.nim
index df84dbad..a651162b 100644
--- a/src/utils/strwidth.nim
+++ b/src/utils/strwidth.nim
@@ -4,6 +4,32 @@ import utils/twtuni
 
 include res/map/charwidth_gen
 
+# Tabs are a bit of a problem: depending on their position in text, they may
+# represent one to eight characters. Inferring their size after layout is wrong
+# because a rendered line is obviously not equivalent to a CSS line.
+#
+# In the past, we worked around this by always passing the string's current
+# width to width(), but this only ever worked properly for plain text documents,
+# which we no longer distinguish from HTML.
+#
+# So now, to preserve tabs, we do the following:
+#
+# * Define Unicode PUA U+E000 to U+E007 as a tab range. The final digit
+#   represents the number of characters the tab occupies, minus one. (Tab size
+#   ranges from 1 char to 8 chars.)
+# * In layout, replace characters in this range with U+FFFD. Then, translate
+#   literal tabs into the range depending on their width in the document.
+# * In width(), substitute the size of these characters accordingly.
+# * Finally, in buffer drawing code, translate the range back into the necessary
+#   number of spaces - except in dump mode, where properly aligned tabs become
+#   hard tabs, and in selection mode, where *all* tabs become hard tabs.
+const TabPUARange* = 0xE000u32 .. 0xE007u32
+
+func tabPUAPoint*(n: int): uint32 =
+  let u = 0xE000 + uint32(n) - 1
+  assert u in TabPUARange
+  return u
+
 # One of the few global variables in the code. Honestly, it should not exist.
 var isCJKAmbiguous* = false
 
@@ -14,6 +40,8 @@ func width*(u: uint32): int =
       return 0
     if u in DoubleWidthTable:
       return 2
+    if u in TabPUARange:
+      return int(((u - TabPUARange.a) and 7) + 1)
     {.cast(noSideEffect).}:
       if isCJKAmbiguous and DoubleWidthAmbiguousRanges.isInRange(u):
         return 2
@@ -65,3 +93,18 @@ func padToWidth*(s: string; size: int; schar = '$'): string =
   while w < size:
     result &= ' '
     inc w
+
+# Expand all PUA tabs into hard tabs, disregarding their position.
+# (This is mainly intended for copy/paste, where the actual characters
+# are more interesting than cell alignment.)
+func expandPUATabsHard*(s: openArray[char]): string =
+  var res = newStringOfCap(s.len)
+  var i = 0
+  while i < s.len:
+    let pi = i
+    if s.nextUTF8(i) in TabPUARange:
+      res &= '\t'
+    else:
+      for j in pi ..< i:
+        res &= s[j]
+  return res