diff options
author | bptato <nincsnevem662@gmail.com> | 2024-10-10 17:22:15 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-10-10 17:31:18 +0200 |
commit | 8c64d7cb4e54c289a6c18f6c144125196d888296 (patch) | |
tree | 4cc8de480912beebe62ccf9cf9ba826dc160fe2c /src/utils | |
parent | cf31d252ffe0aa195909a03efa714c6fc6e5fddf (diff) | |
download | chawan-8c64d7cb4e54c289a6c18f6c144125196d888296.tar.gz |
layout, pager: preserve tabs on display, selection & output
Substitute tabs with one of eight PUA characters based on their width, and convert them back in the pager: * TUI mode always prints spaces, but now handles tabs appropriately on cursor movement * dump mode tries to preserve hard tabs, but uses soft tabs when that is not possible (e.g. tabs after a margin, tab with background color, etc) * selection mode always outputs hard tabs.
Diffstat (limited to 'src/utils')
-rw-r--r-- | src/utils/strwidth.nim | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/src/utils/strwidth.nim b/src/utils/strwidth.nim index df84dbad..a651162b 100644 --- a/src/utils/strwidth.nim +++ b/src/utils/strwidth.nim @@ -4,6 +4,32 @@ import utils/twtuni include res/map/charwidth_gen +# Tabs are a bit of a problem: depending on their position in text, they may +# represent one to eight characters. Inferring their size after layout is wrong +# because a rendered line is obviously not equivalent to a CSS line. +# +# In the past, we worked around this by always passing the string's current +# width to width(), but this only ever worked properly for plain text documents, +# which we no longer distinguish from HTML. +# +# So now, to preserve tabs, we do the following: +# +# * Define Unicode PUA U+E000 to U+E007 as a tab range. The final digit +# represents the number of characters the tab occupies, minus one. (Tab size +# ranges from 1 char to 8 chars.) +# * In layout, replace characters in this range with U+FFFD. Then, translate +# literal tabs into the range depending on their width in the document. +# * In width(), substitute the size of these characters accordingly. +# * Finally, in buffer drawing code, translate the range back into the necessary +# number of spaces - except in dump mode, where properly aligned tabs become +# hard tabs, and in selection mode, where *all* tabs become hard tabs. +const TabPUARange* = 0xE000u32 .. 0xE007u32 + +func tabPUAPoint*(n: int): uint32 = + let u = 0xE000 + uint32(n) - 1 + assert u in TabPUARange + return u + # One of the few global variables in the code. Honestly, it should not exist. var isCJKAmbiguous* = false @@ -14,6 +40,8 @@ func width*(u: uint32): int = return 0 if u in DoubleWidthTable: return 2 + if u in TabPUARange: + return int(((u - TabPUARange.a) and 7) + 1) {.cast(noSideEffect).}: if isCJKAmbiguous and DoubleWidthAmbiguousRanges.isInRange(u): return 2 @@ -65,3 +93,18 @@ func padToWidth*(s: string; size: int; schar = '$'): string = while w < size: result &= ' ' inc w + +# Expand all PUA tabs into hard tabs, disregarding their position. +# (This is mainly intended for copy/paste, where the actual characters +# are more interesting than cell alignment.) +func expandPUATabsHard*(s: openArray[char]): string = + var res = newStringOfCap(s.len) + var i = 0 + while i < s.len: + let pi = i + if s.nextUTF8(i) in TabPUARange: + res &= '\t' + else: + for j in pi ..< i: + res &= s[j] + return res |