diff options
author | bptato <nincsnevem662@gmail.com> | 2022-07-23 16:54:02 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2022-07-23 17:39:06 +0200 |
commit | e7b53775a3f52cb5d8da865213c5dc38b954e33c (patch) | |
tree | 200741f8ed965f407c1a6af8823705db133bf3d4 | |
parent | c7f25b2fe470849e028f9502d3da0851f149f065 (diff) | |
download | chawan-e7b53775a3f52cb5d8da865213c5dc38b954e33c.tar.gz |
Improved incremental search: support unicode
-rw-r--r-- | src/bindings/libregexp.nim | 2 | ||||
-rw-r--r-- | src/client.nim | 33 | ||||
-rw-r--r-- | src/io/buffer.nim | 79 | ||||
-rw-r--r-- | src/io/cell.nim | 6 | ||||
-rw-r--r-- | src/js/regex.nim | 50 | ||||
-rw-r--r-- | src/strings/charset.nim | 69 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 9 |
7 files changed, 189 insertions, 59 deletions
diff --git a/src/bindings/libregexp.nim b/src/bindings/libregexp.nim index 1b84400e..e0a05543 100644 --- a/src/bindings/libregexp.nim +++ b/src/bindings/libregexp.nim @@ -17,3 +17,5 @@ proc lre_exec*(capture: ptr ptr uint8, bc_buf: ptr uint8, cbuf: ptr uint8, opaque: pointer): cint {.importc: "lre_exec", header: lreheader.} proc lre_get_capture_count*(bc_buf: ptr uint8): cint {.importc: "lre_get_capture_count", header: lreheader.} + +proc lre_get_flags*(bc_buf: ptr uint8): cint {.importc: "lre_get_flags", header: lreheader.} diff --git a/src/client.nim b/src/client.nim index d8f6e2e7..f16e6883 100644 --- a/src/client.nim +++ b/src/client.nim @@ -7,6 +7,7 @@ import unicode import css/sheet import config/config import io/buffer +import io/cell import io/lineedit import io/loader import js/javascript @@ -304,25 +305,32 @@ proc isearch(client: Client) = client.statusMode() var iput: string let cpos = client.buffer.cpos + var mark: Mark + var my: int + template del_mark() = + if mark != nil: + client.buffer.removeMark(my, mark) + let status = readLine("/", iput, client.buffer.width, {}, (proc(state: var LineState): bool = - client.buffer.marks.setLen(0) + del_mark let regex = compileSearchRegex($state.news) client.buffer.cpos = cpos if regex.issome: let match = client.buffer.cursorNextMatch(regex.get) if match.success: - client.buffer.addMark(match.x, match.y, match.str) - let nos = client.buffer.nostatus + mark = client.buffer.addMark(match.x, match.y, match.str.width()) client.buffer.redraw = true client.buffer.refreshBuffer(true) print(HVP(client.buffer.height + 1, 2)) else: - client.buffer.marks.setLen(0) + del_mark return true false )) - client.buffer.marks.setLen(0) + + del_mark client.buffer.redraw = true + client.buffer.refreshBuffer(true) if status: client.regex = compileSearchRegex(iput) else: @@ -332,22 +340,29 @@ proc isearchBack(client: Client) = client.statusMode() var iput: string let cpos = client.buffer.cpos + var mark: Mark + var my: int + template del_mark() = + if mark != nil: + client.buffer.removeMark(my, mark) let status = readLine("?", iput, client.buffer.width, {}, (proc(state: var LineState): bool = - client.buffer.marks.setLen(0) + del_mark let regex = compileSearchRegex($state.news) client.buffer.cpos = cpos if regex.issome: let match = client.buffer.cursorPrevMatch(regex.get) if match.success: - client.buffer.addMark(match.x, match.y, match.str) - let nos = client.buffer.nostatus + mark = client.buffer.addMark(match.x, match.y, match.str.width()) + my = match.y client.buffer.redraw = true client.buffer.refreshBuffer(true) print(HVP(client.buffer.height + 1, 2)) + else: + del_mark return true false )) - client.buffer.marks.setLen(0) + del_mark client.buffer.redraw = true if status: client.regex = compileSearchRegex(iput) diff --git a/src/io/buffer.nim b/src/io/buffer.nim index 065a85a8..b3022615 100644 --- a/src/io/buffer.nim +++ b/src/io/buffer.nim @@ -38,12 +38,6 @@ type y*: int str*: string - Mark* = object - x: int - y: int - format: Format - grid: FixedGrid - Buffer* = ref object contenttype*: string title*: string @@ -73,7 +67,6 @@ type next*: Buffer userstyle*: CSSStylesheet loader*: FileLoader - marks*: seq[Mark] proc newBuffer*(): Buffer = new(result) @@ -195,14 +188,6 @@ func generateStatusMessage*(buffer: Buffer): string = if w < buffer.width: result &= EL() -func generateMark*(buffer: Buffer, mark: Mark): string = - var format = newFormat() - var w = 0 - for cell in mark.grid: - result &= format.processFormat(cell.format) - result &= $cell.runes - w += cell.width() - func numLines(buffer: Buffer): int = buffer.lines.len func lastVisibleLine(buffer: Buffer): int = min(buffer.fromy + buffer.height, buffer.numLines) @@ -332,27 +317,36 @@ proc refreshDisplay(buffer: Buffer) = for line in buffer.lines[buffer.fromy.. buffer.lastVisibleLine - 1]: - var w = 0 - var i = 0 + var w = 0 # width of the row so far + var i = 0 # byte in line.str + + # Skip cells till buffer.fromx. while w < buffer.fromx and i < line.str.len: fastRuneAt(line.str, i, r) w += r.width() - let dls = y * buffer.width + let dls = y * buffer.width # starting position of row in display + + # Fill in the gap in case we skipped more cells than fromx mandates (i.e. + # we encountered a double-width character.) var k = 0 - var cf = line.findFormat(w) - var nf = line.findNextFormat(w) if w > buffer.fromx: while k < w - buffer.fromx: buffer.display[dls + k].runes.add(Rune(' ')) inc k + var cf = line.findFormat(w) + var nf = line.findNextFormat(w) + + let startw = w # save this for later + + # Now fill in the visible part of the row. while i < line.str.len: let pw = w fastRuneAt(line.str, i, r) w += r.width() if w > buffer.fromx + buffer.width: - break + break # die on exceeding the width limit if nf.pos != -1 and nf.pos <= pw: cf = nf nf = line.findNextFormat(pw) @@ -364,6 +358,14 @@ proc refreshDisplay(buffer: Buffer) = while k < tk and k < buffer.width - 1: inc k + # Then, for each cell that has a mark, override its formatting with that + # specified by the mark. + let aw = buffer.width - (startw - buffer.fromx) # actual width + for mark in line.marks: + if mark.x >= startw + aw or mark.x + mark.width < startw: continue + for i in max(mark.x, startw)..<min(mark.x + mark.width, startw + aw): + buffer.display[dls + i].format = mark.format + inc y proc setCursorX(buffer: Buffer, x: int, refresh = true, save = true) = @@ -698,19 +700,17 @@ proc gotoAnchor*(buffer: Buffer) = return inc i -proc addMark*(buffer: Buffer, x, y: int, str: string) = +proc addMark*(buffer: Buffer, x, y, width: int): Mark = assert y < buffer.lines.len var format = newFormat() format.reverse = true - #TODO get rid of the string part; marks should only consist of a position and - # a length. - var grid = newFixedGrid(str.width()) - var i = 0 - for r in str.runes: - grid[i].runes.add(r) - grid[i].format = format - i += r.width() - buffer.marks.add(Mark(x: x, y: y, format: format, grid: grid)) + result = Mark(x: x, width: width, format: format) + buffer.lines[y].marks.add(result) + +proc removeMark*(buffer: Buffer, y: int, mark: Mark) = + let i = buffer.lines[y].marks.find(mark) + if i != -1: + buffer.lines[y].marks.delete(i) proc cursorNextMatch(buffer: Buffer, regex: Regex, sy, ey: int, wrap = false): BufferMatch = for y in sy..ey: @@ -721,10 +721,11 @@ proc cursorNextMatch(buffer: Buffer, regex: Regex, sy, ey: int, wrap = false): B let res = regex.exec(buffer.lines[y].str, s) if res.success and res.captures.len > 0: let cap = res.captures[0] - buffer.setCursorXY(cap.s, y) + let x = buffer.lines[y].str.width(cap.s) + buffer.setCursorXY(x, y) result.success = true result.y = y - result.x = buffer.cursorBytes(y, cap.s) + result.x = x result.str = buffer.lines[y].str.substr(cap.s, cap.e - 1) return @@ -752,10 +753,11 @@ proc cursorPrevMatch*(buffer: Buffer, regex: Regex, sy, ey: int, wrap = false): for i in countdown(res.captures.high, 0): let cap = res.captures[i] if cap.s < e: - buffer.setCursorXY(cap.s, y) + let x = buffer.lines[y].str.width(cap.s) + buffer.setCursorXY(x, y) result.success = true result.y = y - result.x = buffer.cursorBytes(y, cap.s) + result.x = x result.str = buffer.lines[y].str.substr(cap.s, cap.e - 1) return @@ -1214,13 +1216,6 @@ proc refreshBuffer*(buffer: Buffer, peek = false) = buffer.refreshDisplay() buffer.displayBufferSwapOutput() - for mark in buffer.marks: - if mark.y in buffer.fromy..(buffer.fromy + buffer.height) and mark.x in buffer.fromx..(buffer.fromx + buffer.width): - print(HVP(mark.y - buffer.fromy + 1, mark.x - buffer.fromx + 1)) - print(SGR()) - print(buffer.generateMark(mark)) - print(SGR()) - if not peek: if not buffer.nostatus: buffer.statusMsgForBuffer() diff --git a/src/io/cell.nim b/src/io/cell.nim index abcb8f92..4d808dda 100644 --- a/src/io/cell.nim +++ b/src/io/cell.nim @@ -35,6 +35,12 @@ type FlexibleLine* = object str*: string formats*: seq[FormatCell] + marks*: seq[Mark] + + Mark* = ref object + x*: int + width*: int + format*: Format FlexibleGrid* = seq[FlexibleLine] diff --git a/src/js/regex.nim b/src/js/regex.nim index a1bd7d35..75ff7535 100644 --- a/src/js/regex.nim +++ b/src/js/regex.nim @@ -1,10 +1,12 @@ # Interface for QuickJS libregexp. import options +import unicode import bindings/libregexp import bindings/quickjs import js/javascript +import strings/charset export LRE_FLAG_GLOBAL, @@ -36,8 +38,10 @@ proc compileRegex*(buf: string, flags: int): Option[Regex] = var error_msg_size = 64 var error_msg = cast[cstring](alloc0(error_msg_size)) let bytecode = lre_compile(addr len, error_msg, cint(error_msg_size), cstring(buf), csize_t(buf.len), cint(flags), dummyContext) + if error_msg != nil: #TODO error handling? + #eprint "err", error_msg dealloc(error_msg) error_msg = nil if bytecode == nil: @@ -55,10 +59,9 @@ proc compileSearchRegex*(str: string): Option[Regex] = while i >= 0: case str[i] of '/': - if i > 0 and str[i - 1] == '\\': break # escaped flagsi = i break - of 'i', 'm', 's': discard + of 'i', 'm', 's', 'u': discard else: break # invalid flag dec i @@ -73,20 +76,36 @@ proc compileSearchRegex*(str: string): Option[Regex] = of 'i': flags = flags or LRE_FLAG_IGNORECASE of 'm': flags = flags or LRE_FLAG_MULTILINE of 's': flags = flags or LRE_FLAG_DOTALL + of 'u': flags = flags or LRE_FLAG_UTF16 else: assert false return compileRegex(str.substr(0, flagsi - 1), flags) proc exec*(regex: Regex, str: string, start = 0): RegexResult = assert 0 <= start and start <= str.len - let cstr = cstring(str) - let captureCount = lre_get_capture_count(cast[ptr uint8](regex.bytecode)) + + let captureCount = lre_get_capture_count(regex.bytecode) + var capture: ptr ptr uint8 = nil if captureCount > 0: capture = cast[ptr ptr uint8](alloc0(sizeof(ptr uint8) * captureCount * 2)) + + var cstr = cstring(str) + var ascii = true + for c in str: + if c > char(0x80): + ascii = false + break + var ustr: string16 + if not ascii: + ustr = toUTF16(str) + cstr = cstring(ustr) + let ret = lre_exec(capture, regex.bytecode, cast[ptr uint8](cstr), cint(start), - cint(str.len), cint(0), dummyContext) + cint(str.len), cint(not ascii), dummyContext) + result.success = ret == 1 #TODO error handling? (-1) + if result.success: var i = 0 let cstrAddress = cast[int](cstr) @@ -99,7 +118,22 @@ proc exec*(regex: Regex, str: string, start = 0): RegexResult = let endPointer = cast[ptr ptr uint8](endPointerAddress) let startAddress = cast[int](startPointer[]) let endAddress = cast[int](endPointer[]) - let s = startAddress - cstrAddress - let e = endAddress - cstrAddress - result.captures.add((s, e)) + var s = startAddress - cstrAddress + var e = endAddress - cstrAddress + if ascii: + result.captures.add((s, e)) + else: + var s8 = 0 + var e8 = 0 + var i = 0 + var r: Rune + while i < s: + fastRuneAt(ustr, i, r) + let si = r.size() + s8 += si + e8 += si + while i < e: + fastRuneAt(ustr, i, r) + e8 += r.size() + result.captures.add((s8, e8)) dealloc(capture) diff --git a/src/strings/charset.nim b/src/strings/charset.nim new file mode 100644 index 00000000..8726268a --- /dev/null +++ b/src/strings/charset.nim @@ -0,0 +1,69 @@ +import unicode + +type string16* = distinct string + +# Convert a UTF-8 string to UTF-16. +# Note: this doesn't check for (invalid) UTF-8 containing surrogates. +proc toUTF16*(s: string): string16 = + var res = "" + var i = 0 + template put16(c: uint16) = + res.setLen(res.len + 2) + res[i] = cast[char](c) + inc i + res[i] = cast[char](c shr 8) + inc i + for r in s.runes: + var c = uint32(r) + if c < 0x10000: # ucs-2 + put16 uint16(c) + elif c <= 0x10FFFF: # surrogate + c -= 0x10000 + put16 uint16((c shr 10) + 0xD800) + put16 uint16((c and 0x3FF) + 0xDC00) + else: # invalid + put16 uint16(0xFFFD) + result = string16(res) + +proc len*(s: string16): int {.borrow.} +proc `[]`*(s: string16, i: int): char = string(s)[i] +proc `[]`*(s: string16, i: BackwardsIndex): char = string(s)[i] + +template fastRuneAt*(s: string16, i: int, r: untyped, doInc = true, be = false) = + if i + 1 == s.len: # unmatched byte + when doInc: inc i + r = Rune(0xFFFD) + else: + when be: + var c1: uint32 = (uint32(s[i]) shl 8) + uint32(s[i + 1]) + else: + var c1: uint32 = uint32(s[i]) + (uint32(s[i + 1]) shl 8) + if c1 >= 0xD800 or c1 < 0xDC00: + if i + 3 == s.len: + when doInc: i += 2 + r = Rune(c1) # unmatched surrogate + else: + when be: + var c2: uint32 = (uint32(s[i + 2]) shl 8) + uint32(s[i + 3]) + else: + var c2: uint32 = uint32(s[i + 2]) + (uint32(s[i + 3]) shl 8) + if c2 >= 0xDC00 and c2 < 0xE000: + r = Rune((((c1 and 0x3FF) shl 10) or (c2 and 0x3FF)) + 0x10000) + when doInc: i += 4 + else: + r = Rune(c1) # unmatched surrogate + when doInc: i += 2 + else: + r = Rune(c1) # ucs-2 + when doInc: i += 2 + +iterator runes*(s: string16): Rune = + var i = 0 + var r: Rune + while i < s.len: + fastRuneAt(s, i, r) + yield r + +proc fromUTF16*(s: string16): string = + for r in s.runes: + result &= r diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index 4937d0be..c88c5980 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -867,6 +867,15 @@ func width*(s: string): int = for r in s.runes(): result += width(r) +func width*(s: string, len: int): int = + var i = 0 + var m = len + if m > s.len: m = s.len + while i < m: + var r: Rune + fastRuneAt(s, i, r) + result += width(r) + func width*(s: seq[Rune]): int = for r in s: result += width(r) |