diff options
author | bptato <nincsnevem662@gmail.com> | 2024-02-17 22:03:07 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-02-17 22:07:11 +0100 |
commit | 390772358cedc9ed541a27b3cac1f8d97beef0ef (patch) | |
tree | 621e56ad7b1b4957b1ba686a8902381ce270619c | |
parent | e98d0ad1dc51050eb17120f835847d55950c2a0b (diff) | |
download | chawan-390772358cedc9ed541a27b3cac1f8d97beef0ef.tar.gz |
regex: re-work compileSearchRegex
I've gotten tired of not being able to search for forward slashes. Now it works like in vim, and you can also set default ignore case in the config.
-rw-r--r-- | README.md | 5 | ||||
-rw-r--r-- | doc/config.md | 10 | ||||
-rw-r--r-- | src/bindings/libregexp.nim | 23 | ||||
-rw-r--r-- | src/config/config.nim | 34 | ||||
-rw-r--r-- | src/js/fromjs.nim | 23 | ||||
-rw-r--r-- | src/js/javascript.nim | 1 | ||||
-rw-r--r-- | src/js/opaque.nim | 8 | ||||
-rw-r--r-- | src/js/regex.nim | 84 | ||||
-rw-r--r-- | src/js/tojs.nim | 11 | ||||
-rw-r--r-- | src/local/pager.nim | 6 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 16 |
11 files changed, 139 insertions, 82 deletions
diff --git a/README.md b/README.md index ea8717a7..7bbf4e5d 100644 --- a/README.md +++ b/README.md @@ -141,9 +141,8 @@ than if it doesn't. Chawan does not have browser tabs. Instead, each website is opened in a new buffer, which is added to the buffer tree. This is very similar to how w3m -handles buffers, except a) source files are stored in memory, not on the disk, -and b) instead of a linked list of buffers, they are stored in a tree. (And -of course, c) there are no tabs.) +handles buffers, except instead of a linked list of buffers, they are stored in +a tree. This model has the advantage of allowing the user to instantly view the previous page in all cases, without any complicated caching mechanism. It diff --git a/doc/config.md b/doc/config.md index 361323ee..10074a84 100644 --- a/doc/config.md +++ b/doc/config.md @@ -108,6 +108,16 @@ Following is a list of search options: <td>When set to true, searchNext/searchPrev wraps around the document.</td> </tr> +<tr> +<td>default-flags</td> +<td>Array of JS regex flags</td> +<td>Theoretically, you could use any JS regex flag. Practically, the only values +that work/make sense right now are either `[]` (the default; an empty array) or +`["i"]` (an array with the string "i").<br> +Note: this can also be overridden inline in the search bar (vim-style), with the +escape sequences `\c` (ignore case) and `\C` (strict case).</td> +</tr> + </table> ## Encoding diff --git a/src/bindings/libregexp.nim b/src/bindings/libregexp.nim index 85e7c1ca..d4c10b42 100644 --- a/src/bindings/libregexp.nim +++ b/src/bindings/libregexp.nim @@ -1,10 +1,19 @@ -const - LRE_FLAG_GLOBAL* = 1 shl 0 - LRE_FLAG_IGNORECASE* = 1 shl 1 - LRE_FLAG_MULTILINE* = 1 shl 2 - LRE_FLAG_DOTALL* = 1 shl 3 - LRE_FLAG_UTF16* = 1 shl 4 - LRE_FLAG_STICKY* = 1 shl 5 +type + LREFlag* {.size: sizeof(cint).} = enum + LRE_FLAG_GLOBAL = "g" + LRE_FLAG_IGNORECASE = "i" + LRE_FLAG_MULTILINE = "m" + LRE_FLAG_DOTALL = "s" + LRE_FLAG_UTF16 = "u" + LRE_FLAG_STICKY = "y" + + LREFlags* = set[LREFlag] + +func toCInt*(flags: LREFlags): cint = + cast[cint](flags) + +func toLREFlags*(flags: cint): LREFlags = + cast[LREFlags](flags) {.passc: "-Ilib/".} diff --git a/src/config/config.nim b/src/config/config.nim index 678f7f7b..7be0c4a2 100644 --- a/src/config/config.nim +++ b/src/config/config.nim @@ -81,6 +81,7 @@ type SearchConfig = object wrap* {.jsgetset.}: bool + default_flags* {.jsgetset.}: LREFlags EncodingConfig = object display_charset* {.jsgetset.}: Opt[Charset] @@ -107,8 +108,8 @@ type DisplayConfig = object color_mode* {.jsgetset.}: Opt[ColorMode] - format_mode*: Opt[FormatMode] #TODO getset - no_format_mode*: FormatMode #TODO getset + format_mode* {.jsgetset.}: Opt[FormatMode] + no_format_mode* {.jsgetset.}: FormatMode emulate_overline* {.jsgetset.}: bool alt_screen* {.jsgetset.}: Opt[bool] highlight_color* {.jsgetset.}: RGBAColor @@ -470,6 +471,7 @@ proc parseConfigValue[T](x: var Opt[T], v: TomlValue, k: string) proc parseConfigValue(x: var ActionMap, v: TomlValue, k: string) proc parseConfigValue(x: var CSSConfig, v: TomlValue, k: string) proc parseConfigValue[U, V](x: var Table[U, V], v: TomlValue, k: string) +proc parseConfigValue[T](x: var set[T], v: TomlValue, k: string) proc typeCheck(v: TomlValue, vt: ValueType, k: string) = if v.vt != vt: @@ -566,9 +568,10 @@ proc parseConfigValue(x: var Opt[FormatMode], v: TomlValue, k: string) = proc parseConfigValue(x: var FormatMode, v: TomlValue, k: string) = typeCheck(v, VALUE_ARRAY, k) for i in 0 ..< v.a.len: - let s = v.a[i].s let kk = k & "[" & $i & "]" - case s + let vv = v.a[i] + typeCheck(vv, VALUE_STRING, kk) + case vv.s of "bold": x.incl(FLAG_BOLD) of "italic": x.incl(FLAG_ITALIC) of "underline": x.incl(FLAG_UNDERLINE) @@ -577,7 +580,7 @@ proc parseConfigValue(x: var FormatMode, v: TomlValue, k: string) = of "overline": x.incl(FLAG_OVERLINE) of "blink": x.incl(FLAG_BLINK) else: - raise newException(ValueError, "unknown format mode '" & s & + raise newException(ValueError, "unknown format mode '" & vv.s & "' for key " & kk) proc parseConfigValue(x: var RGBAColor, v: TomlValue, k: string) = @@ -615,6 +618,27 @@ proc parseConfigValue(x: var ActionMap, v: TomlValue, k: string) = discard x.hasKeyOrPut(buf, "client.feedNext()") x[rk] = vv.s +proc parseConfigValue[T: enum](x: var T, v: TomlValue, k: string) = + typeCheck(v, VALUE_STRING, k) + let e = strictParseEnum[T](v.s) + if e.isNone: + raise newException(ValueError, "invalid value '" & v.s & "' for key " & k) + x = e.get + +proc parseConfigValue[T](x: var set[T], v: TomlValue, k: string) = + typeCheck(v, {VALUE_STRING, VALUE_ARRAY}, k) + if v.vt == VALUE_STRING: + var xx: T + xx.parseConfigValue(v, k) + x = {xx} + else: + x = {} + for i in 0 ..< v.a.len: + let kk = k & "[" & $i & "]" + var xx: T + xx.parseConfigValue(v.a[i], kk) + x.incl(xx) + var gdir {.compileTime.}: string proc parseConfigValue(x: var CSSConfig, v: TomlValue, k: string) = typeCheck(v, VALUE_TABLE, k) diff --git a/src/js/fromjs.nim b/src/js/fromjs.nim index 823a045e..f0ff8a91 100644 --- a/src/js/fromjs.nim +++ b/src/js/fromjs.nim @@ -10,6 +10,7 @@ import js/jstypes import js/opaque import js/tojs import types/opt +import utils/twtstr proc fromJS*[T](ctx: JSContext, val: JSValue): JSResult[T] @@ -230,7 +231,7 @@ proc fromJSSeq[T](ctx: JSContext, val: JSValue): JSResult[seq[T]] = s.add(genericRes.get) return ok(s) -proc fromJSSet[T](ctx: JSContext, val: JSValue): Opt[set[T]] = +proc fromJSSet[T](ctx: JSContext, val: JSValue): JSResult[set[T]] = let itprop = JS_GetProperty(ctx, val, ctx.getOpaque().sym_refs[ITERATOR]) if JS_IsException(itprop): return err() @@ -249,14 +250,14 @@ proc fromJSSet[T](ctx: JSContext, val: JSValue): Opt[set[T]] = if JS_IsException(next): return err() defer: JS_FreeValue(ctx, next) - let doneVal = JS_GetProperty(ctx, next, ctx.getOpaque().done) + let doneVal = JS_GetProperty(ctx, next, ctx.getOpaque().str_refs[DONE]) if JS_IsException(doneVal): return err() defer: JS_FreeValue(ctx, doneVal) let done = ?fromJS[bool](ctx, doneVal) if done: break - let valueVal = JS_GetProperty(ctx, next, ctx.getOpaque().value) + let valueVal = JS_GetProperty(ctx, next, ctx.getOpaque().str_refs[VALUE]) if JS_IsException(valueVal): return err() defer: JS_FreeValue(ctx, valueVal) @@ -377,19 +378,9 @@ proc fromJSEnum[T: enum](ctx: JSContext, val: JSValue): JSResult[T] = if JS_IsException(val): return err() let s = ?toString(ctx, val) - # cmp when len is small enough, otherwise hashmap - when {T.low..T.high}.len <= 4: - for e in T.low .. T.high: - if $e == s: - return ok(e) - else: - const tab = (func(): Table[string, T] = - result = initTable[string, T]() - for e in T.low .. T.high: - result[$e] = e - )() - if s in tab: - return ok(tab[s]) + let r = strictParseEnum[T](s) + if r.isSome: + return ok(r.get) return errTypeError("`" & s & "' is not a valid value for enumeration " & $T) proc fromJSPObj0(ctx: JSContext, val: JSValue, t: string): diff --git a/src/js/javascript.nim b/src/js/javascript.nim index 9e814386..d208ffad 100644 --- a/src/js/javascript.nim +++ b/src/js/javascript.nim @@ -170,6 +170,7 @@ proc free*(ctx: var JSContext) = JS_FreeValue(ctx, opaque.Array_prototype_values) JS_FreeValue(ctx, opaque.Object_prototype_valueOf) JS_FreeValue(ctx, opaque.Uint8Array_ctor) + JS_FreeValue(ctx, opaque.Set_ctor) for v in opaque.err_ctors: JS_FreeValue(ctx, v) GC_unref(opaque) diff --git a/src/js/opaque.nim b/src/js/opaque.nim index 135b6994..bae2bd5a 100644 --- a/src/js/opaque.nim +++ b/src/js/opaque.nim @@ -33,6 +33,7 @@ type Array_prototype_values*: JSValue Object_prototype_valueOf*: JSValue Uint8Array_ctor*: JSValue + Set_ctor*: JSValue err_ctors*: array[JSErrorEnum, JSValue] htmldda*: JSClassID # only one of these exists: document.all. @@ -71,8 +72,11 @@ func newJSContextOpaque*(ctx: JSContext): JSContextOpaque = opaque.Object_prototype_valueOf = JS_GetPropertyStr(ctx, objproto, "valueOf") JS_FreeValue(ctx, objproto) block: - let u8actor = JS_GetPropertyStr(ctx, global, "Uint8Array") - opaque.Uint8Array_ctor = u8actor + opaque.Set_ctor = JS_GetPropertyStr(ctx, global, "Set") + assert not JS_IsException(opaque.Set_ctor) + block: + opaque.Uint8Array_ctor = JS_GetPropertyStr(ctx, global, "Uint8Array") + assert not JS_IsException(opaque.Uint8Array_ctor) for e in JSErrorEnum: let s = $e let err = JS_GetPropertyStr(ctx, global, cstring(s)) diff --git a/src/js/regex.nim b/src/js/regex.nim index fdc9e8e0..1d78f806 100644 --- a/src/js/regex.nim +++ b/src/js/regex.nim @@ -6,13 +6,7 @@ import bindings/quickjs import types/opt import utils/twtstr -export - LRE_FLAG_GLOBAL, - LRE_FLAG_IGNORECASE, - LRE_FLAG_MULTILINE, - LRE_FLAG_DOTALL, - LRE_FLAG_UTF16, - LRE_FLAG_STICKY +export LREFlags type Regex* = object @@ -34,13 +28,13 @@ var dummyContext = JS_NewContextRaw(dummyRuntime) func `$`*(regex: Regex): string = regex.buf -proc compileRegex*(buf: string, flags: int): Result[Regex, string] = +proc compileRegex*(buf: string, flags: LREFlags = {}): Result[Regex, string] = var error_msg_size = 64 var error_msg = newString(error_msg_size) prepareMutation(error_msg) var plen: cint let bytecode = lre_compile(addr plen, cstring(error_msg), - cint(error_msg_size), cstring(buf), csize_t(buf.len), cint(flags), + cint(error_msg_size), cstring(buf), csize_t(buf.len), flags.toCInt, dummyContext) if bytecode == nil: return err(error_msg.until('\0')) # Failed to compile. @@ -68,16 +62,16 @@ func countBackslashes(buf: string, i: int): int = # mnop -> ^mnop$ proc compileMatchRegex*(buf: string): Result[Regex, string] = if buf.len == 0: - return compileRegex(buf, 0) + return compileRegex(buf) if buf[0] == '^': - return compileRegex(buf, 0) + return compileRegex(buf) if buf[^1] == '$': # Check whether the final dollar sign is escaped. if buf.len == 1 or buf[^2] != '\\': - return compileRegex(buf, 0) + return compileRegex(buf) let j = buf.countBackslashes(buf.high - 2) if j mod 2 == 1: # odd, because we do not count the last backslash - return compileRegex(buf, 0) + return compileRegex(buf) # escaped. proceed as if no dollar sign was at the end if buf[^1] == '\\': # Check if the regex contains an invalid trailing backslash. @@ -87,38 +81,34 @@ proc compileMatchRegex*(buf: string): Result[Regex, string] = var buf2 = "^" buf2 &= buf buf2 &= "$" - return compileRegex(buf2, 0) - -proc compileSearchRegex*(str: string): Result[Regex, string] = - # Parse any applicable flags in regex/<flags>. The last forward slash is - # dropped when <flags> is empty, and interpreted as a character when the - # flags are is invalid. - - var i = str.high - var flagsi = -1 - while i >= 0: - case str[i] - of '/': - flagsi = i - break - of 'i', 'm', 's', 'u': discard - else: break # invalid flag - dec i - - var flags = LRE_FLAG_GLOBAL # for easy backwards matching - - if flagsi == -1: - return compileRegex(str, flags) - - for i in flagsi..str.high: - case str[i] - of '/': discard - of 'i': flags = flags or LRE_FLAG_IGNORECASE - of 'm': flags = flags or LRE_FLAG_MULTILINE - of 's': flags = flags or LRE_FLAG_DOTALL - of 'u': flags = flags or LRE_FLAG_UTF16 - else: assert false - return compileRegex(str.substr(0, flagsi - 1), flags) + return compileRegex(buf2) + +proc compileSearchRegex*(str: string, defaultFlags: LREFlags): + Result[Regex, string] = + # Emulate vim's \c/\C: override defaultFlags if one is found, then remove it + # from str. + var flags = defaultFlags + var s = newStringOfCap(str.len) + var quot = false + for c in str: + if quot: + quot = false + if c == 'c': + flags.incl(LRE_FLAG_IGNORECASE) + continue + elif c == 'C': + flags.excl(LRE_FLAG_IGNORECASE) + continue + else: + s &= '\\' + if c == '\\': + quot = true + else: + s &= c + if quot: + s &= '\\' + flags.incl(LRE_FLAG_GLOBAL) # for easy backwards matching + return compileRegex(s, flags) proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): RegexResult = let length = if length == -1: @@ -134,7 +124,7 @@ proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): R let size = sizeof(ptr uint8) * captureCount * 2 capture = cast[ptr UncheckedArray[int]](alloc0(size)) var cstr = cstring(str) - let flags = lre_get_flags(bytecode) + let flags = lre_get_flags(bytecode).toLREFlags var start = start while true: let ret = lre_exec(cast[ptr ptr uint8](capture), bytecode, @@ -151,7 +141,7 @@ proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): R let s = capture[i * 2] - cstrAddress let e = capture[i * 2 + 1] - cstrAddress result.captures.add((s, e)) - if (flags and LRE_FLAG_GLOBAL) != 1: + if LRE_FLAG_GLOBAL notin flags: break if start >= str.len: break diff --git a/src/js/tojs.nim b/src/js/tojs.nim index c79d1bf2..757552eb 100644 --- a/src/js/tojs.nim +++ b/src/js/tojs.nim @@ -64,6 +64,7 @@ proc toJS*[U, V](ctx: JSContext, t: Table[U, V]): JSValue proc toJS*(ctx: JSContext, opt: Option): JSValue proc toJS*[T, E](ctx: JSContext, opt: Result[T, E]): JSValue proc toJS*(ctx: JSContext, s: seq): JSValue +proc toJS*[T](ctx: JSContext, s: set[T]): JSValue proc toJS*(ctx: JSContext, t: tuple): JSValue proc toJS*(ctx: JSContext, e: enum): JSValue proc toJS*(ctx: JSContext, j: JSValue): JSValue @@ -209,6 +210,16 @@ proc toJS(ctx: JSContext, s: seq): JSValue = return JS_EXCEPTION return a +proc toJS*[T](ctx: JSContext, s: set[T]): JSValue = + #TODO this is a bit lazy :p + var x = newSeq[T]() + for e in s: + x.add(e) + var a = toJS(ctx, x) + if JS_IsException(a): + return a + return JS_CallConstructor(ctx, ctx.getOpaque().Set_ctor, 1, addr a) + proc toJS(ctx: JSContext, t: tuple): JSValue = let a = JS_NewArray(ctx) if not JS_IsException(a): diff --git a/src/local/pager.nim b/src/local/pager.nim index aa546082..d30a0dbc 100644 --- a/src/local/pager.nim +++ b/src/local/pager.nim @@ -807,7 +807,8 @@ proc updateReadLineISearch(pager: Pager, linemode: LineMode) = pager.isearchpromise = nil of EDIT: if lineedit.news != "": - pager.iregex = compileSearchRegex(lineedit.news) + pager.iregex = compileSearchRegex(lineedit.news, + pager.config.search.default_flags) pager.container.popCursorPos(true) pager.container.pushCursorPos() if pager.iregex.isSome: @@ -857,7 +858,8 @@ proc updateReadLine*(pager: Pager) = of BUFFER: pager.container.readSuccess(lineedit.news) of SEARCH_F, SEARCH_B: if lineedit.news != "": - pager.regex = pager.checkRegex(compileSearchRegex(lineedit.news)) + pager.regex = pager.checkRegex(compileSearchRegex(lineedit.news, + pager.config.search.default_flags)) pager.reverseSearch = pager.linemode == SEARCH_B pager.searchNext() of GOTO_LINE: diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index 80b6be65..1916fff5 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -713,3 +713,19 @@ proc makeCRLF*(s: string): string = result &= '\n' else: result &= s[i] + +func strictParseEnum*[T: enum](s: string): Opt[T] = + # cmp when len is small enough, otherwise hashmap + when {T.low..T.high}.len <= 4: + for e in T.low .. T.high: + if $e == s: + return ok(e) + else: + const tab = (func(): Table[string, T] = + result = initTable[string, T]() + for e in T.low .. T.high: + result[$e] = e + )() + if s in tab: + return ok(tab[s]) + return err() |