7 files changed, 266 insertions, 59 deletions
diff --git a/bonus/w3m.toml b/bonus/w3m.toml
index 63b07ecc..5a92b8c8 100644
--- a/bonus/w3m.toml
+++ b/bonus/w3m.toml
@@ -24,7 +24,7 @@ C-a = 'pager.cursorLineBegin()'
 '$' = 'pager.cursorLineEnd()'
 C-e = 'pager.cursorLineEnd()'
 w = 'pager.cursorNextWord()'
-W = 'pager.cursorPrevWord()'
+W = 'pager.cursorWordBegin()'
 '<' = 'n => pager.pageLeft(n)'
 '>' = 'n => pager.pageRight(n)'
 '.' = 'n => pager.scrollLeft(n)'
diff --git a/doc/config.md b/doc/config.md
index dcc63d6c..cf5cf744 100644
--- a/doc/config.md
+++ b/doc/config.md
@@ -43,6 +43,10 @@ examples.
 * [Appendix](#appendix)
    * [Regex handling](#regex-handling)
    * [Path handling](#path-handling)
+   * [Word types](#word-types)
+     * [w3m word](#w3m-word)
+     * [vi word](#vi-word)
+     * [Big word](#big-word)
 
 <!-- MANON -->
 
@@ -122,7 +126,7 @@ Following is a list of encoding options:
 
 <tr>
 <td>document-charset</td>
-<td>string/array</td>
+<td>array of charset label strings</td>
 <td>List of character sets for loading documents.<br>
 All listed character sets are enumerated until the document has been decoded
 without errors. In HTML, meta tags and the BOM may override this with a
@@ -315,19 +319,17 @@ Following is a list of display options:
 
 <tr>
 <td>color-mode</td>
-<td>"monochrome"/"ansi"/"eight-bit","8bit"/"true-color","24bit"/"auto"</td>
+<td>"monochrome" / "ansi" / "eight-bit" / "true-color" / "auto"</td>
 <td>Set the color mode. "auto" for automatic detection, "monochrome"
 for black on white, "ansi" for ansi colors, "eight-bit" for 256-color mode, and
 "true-color" for true colors.<br>
-"8bit" is a legacy alias of "eight-bit". "24bit" is a legacy alias of
-"true-color". (The only difference is that when overriding these values with
-the `-o` command line switch, you can use "eight-bit" and "true-color"
-without quoting.)</td>
+"8bit" is accepted as a legacy alias of "eight-bit". "24bit" is accepted as
+a legacy alias of "true-color".</td>
 </tr>
 
 <tr>
 <td>format-mode</td>
-<td>"auto"/["bold", "italic", "underline", "reverse", "strike", "overline",
+<td>"auto" / ["bold", "italic", "underline", "reverse", "strike", "overline",
 "blink"]</td>
 <td>Specifies output formatting modes. Accepts the string "auto" or an array
 of specific attributes. An empty array (`[]`) disables formatting
@@ -349,7 +351,7 @@ overlines are substituted by underlines on the previous line.</td>
 
 <tr>
 <td>alt-screen</td>
-<td>"auto"/boolean</td>
+<td>"auto" / boolean</td>
 <td>Enable/disable the alternative screen.</td>
 </tr>
 
@@ -529,7 +531,7 @@ false for all websites.</td>
 
 <tr>
 <td>third-party-cookie</td>
-<td>regex/array of regexes</td>
+<td>array of regexes</td>
 <td>Domains for which third-party cookies are allowed on this domain. Note:
 this only works for buffers which share the same cookie jar.<br>
 Note: regexes are handled according to the [regex handling](#regex-handling)
@@ -705,13 +707,29 @@ Note: this does not suspend buffer processes.</td>
 </tr>
 
 <tr>
-<td>`pager.cursorNextWord()`</td>
-<td>Move the cursor to the beginning of the next word.</td>
+<td>`pager.cursorNextWord()`, `pager.cursorNextViWord()`,
+`pager.cursorNextBigWord()`</td>
+<td>Move the cursor to the beginning of the next [word](#word-types).</td>
 </tr>
 
 <tr>
-<td>`pager.cursorPrevWord()`</td>
-<td>Move the cursor to the end of the previous word.</td>
+<td>`pager.cursorPrevWord()`, `pager.cursorPrevViWord()`,
+`pager.cursorPrevBigWord()`</td>
+<td>Move the cursor to the end of the previous [word](#word-types).</td>
+</tr>
+
+<tr>
+<td>`pager.cursorWordEnd()`, `pager.cursorViWordEnd()`,
+`pager.cursorBigWordEnd()`</td>
+<td>Move the cursor to the end of the current [word](#word-types), or if already
+there, to the end of the next word.</td>
+</tr>
+
+<tr>
+<td>`pager.cursorWordBegin()`, `pager.cursorViWordBegin()`,
+`pager.cursorBigWordBegin()`</td>
+<td>Move the cursor to the beginning of the current [word](#word-types), or if
+already there, to the end of the previous word.</td>
 </tr>
 
 <tr>
@@ -853,7 +871,7 @@ buffer.</td>
 
 <tr>
 <td>`pager.toggleSource()`</td>
-<td>If viewing a HTML buffer, open a new buffer with its source. Otherwise,
+<td>If viewing an HTML buffer, open a new buffer with its source. Otherwise,
 open the current buffer's contents as HTML.</td>
 </tr>
 
@@ -1131,12 +1149,12 @@ value is `false`.</td>
 </tr>
 
 <tr>
-<td>`line.clearWord(bounds)`</td>
+<td>`line.clearWord()`</td>
 <td>Delete word before cursor</td>
 </tr>
 
 <tr>
-<td>`line.killWord(bounds)`</td>
+<td>`line.killWord()`</td>
 <td>Delete word after cursor</td>
 </tr>
 
@@ -1151,12 +1169,12 @@ value is `false`.</td>
 </tr>
 
 <tr>
-<td>`line.prevWord(bounds)`</td>
+<td>`line.prevWord()`</td>
 <td>Move cursor to the previous word by one character</td>
 </tr>
 
 <tr>
-<td>`line.nextWord(bounds)`</td>
+<td>`line.nextWord()`</td>
 <td>Move cursor to the previous word by one character</td>
 </tr>
 
@@ -1187,10 +1205,11 @@ value is `false`.</td>
 
 </table>
 
-Some of these entries have an optional `bounds` parameter. If passed, this
-must be a JavaScript function that expects one parameter (the current
-unicode character), and returns true if the passed character should count
-as a word boundary.
+Note: to facilitate URL editing, the line editor has a different definition
+of what a word is than the pager. For the line editor, a word is either a
+sequence of alphanumeric characters, or any single non-alphanumeric
+character. (This means that e.g. `https://` consists of four words: `https`,
+`:`, `/` and `/`.)
 
 ```Examples:
 # Control+A moves the cursor to the beginning of the line.
@@ -1199,9 +1218,6 @@ as a word boundary.
 # Escape+D deletes everything after the cursor until it reaches a word-breaking
 # character.
 'M-d' = 'line.killWord()'
-
-# Control+W deletes everything before the cursor until it reaches a space. 
-'C-w' = 'line.clearWord(x => x == " ")'
 ```
 
 ## Appendix
@@ -1237,6 +1253,39 @@ using the syntax `${%VARIABLE}`:
 * `${%CHA_LIBEXEC_DIR}`: the directory for all executables Chawan uses
   for operation. By default, this is `${%CHA_BIN_DIR}/../libexec/chawan`.
 
+### Word types
+
+Word-based pager commands can operate with different definitions of
+words. Currently, these are:
+
+* w3m words
+* vi words
+* Big words
+
+#### w3m word
+
+A w3m word is a sequence of alphanumeric characters. Symbols are treated
+in the same way as whitespace.
+
+#### vi word
+
+A vi word is a sequence of alphanumeric characters, OR a sequence of symbols.
+
+vi words may be separated by whitespace; however, symbolic and alphanumeric
+vi words do not have to be whitespace-separated. e.g. following character
+sequence contains two words:
+
+```
+hello[]+{}@`!
+```
+
+#### Big word
+
+A big word is a sequence of non-whitespace characters.
+
+It is essentially the same as a w3m word, but with symbols being defined as
+non-whitespace.
+
 <!-- MANON
 
 ## See also
diff --git a/doc/localcgi.md b/doc/localcgi.md
index 206a3ef7..1a0d9021 100644
--- a/doc/localcgi.md
+++ b/doc/localcgi.md
@@ -145,7 +145,7 @@ original URL. Then:
 * `MAPPED_URI_PASSWORD=` the password part, in this case `password`. If no
   password was specified, the variable is set to the empty string.
 * `MAPPED_URI_HOST=` the host part, in this case `host.org` If no host was
-  specified, the variable is set to the empty string. (An example of an URL
+  specified, the variable is set to the empty string. (An example of a URL
   with no host: `about:blank`, here `blank` is the path name.)
 * `MAPPED_URI_PORT=` the port, in this case `1234`. If no port was specified,
   the variable is set to the empty string. (In this case, the CGI script is
diff --git a/res/config.toml b/res/config.toml
index 85ac9aa8..13691234 100644
--- a/res/config.toml
+++ b/res/config.toml
@@ -84,8 +84,12 @@ l = 'n => pager.cursorRight(n)'
 '0' = 'pager.cursorLineBegin()'
 '^' = 'pager.cursorLineTextStart()'
 '$' = 'pager.cursorLineEnd()'
-b = 'pager.cursorPrevWord()'
-w = 'pager.cursorNextWord()'
+b = 'pager.cursorViWordBegin()'
+e = 'pager.cursorViWordEnd()'
+w = 'pager.cursorNextViWord()'
+B = 'pager.cursorBigWordBegin()'
+E = 'pager.cursorBigWordEnd()'
+W = 'pager.cursorNextBigWord()'
 '[' = 'n => pager.cursorPrevLink(n)'
 ']' = 'n => pager.cursorNextLink(n)'
 '{' = 'n => pager.cursorPrevParagraph(n)'
diff --git a/src/display/lineedit.nim b/src/display/lineedit.nim
index 7eef9bfb..7a5067eb 100644
--- a/src/display/lineedit.nim
+++ b/src/display/lineedit.nim
@@ -208,50 +208,50 @@ proc forward(edit: LineEdit) {.jsfunc.} =
     if edit.cursorx >= edit.shiftx + edit.maxwidth:
       edit.invalid = true
 
-proc prevWord(edit: LineEdit, check = opt(BoundaryFunction)) {.jsfunc.} =
+proc prevWord(edit: LineEdit) {.jsfunc.} =
   if edit.cursori == 0:
     return
   let (r, len) = edit.news.lastRune(edit.cursori - 1)
-  if r.breaksWord(check):
+  if r.breaksWord():
     edit.cursori -= len
     edit.cursorx -= r.width()
   while edit.cursori > 0:
     let (r, len) = edit.news.lastRune(edit.cursori - 1)
-    if r.breaksWord(check):
+    if r.breaksWord():
       break
     edit.cursori -= len
     edit.cursorx -= r.width()
   if edit.cursorx < edit.shiftx:
     edit.invalid = true
 
-proc nextWord(edit: LineEdit, check = opt(BoundaryFunction)) {.jsfunc.} =
+proc nextWord(edit: LineEdit) {.jsfunc.} =
   if edit.cursori >= edit.news.len:
     return
   let oc = edit.cursori
   var r: Rune
   fastRuneAt(edit.news, edit.cursori, r)
-  if r.breaksWord(check):
+  if r.breaksWord():
     edit.cursorx += r.width()
   else:
     edit.cursori = oc
   while edit.cursori < edit.news.len:
     let pc = edit.cursori
     fastRuneAt(edit.news, edit.cursori, r)
-    if r.breaksWord(check):
+    if r.breaksWord():
       edit.cursori = pc
       break
     edit.cursorx += r.width()
   if edit.cursorx >= edit.shiftx + edit.maxwidth:
     edit.invalid = true
 
-proc clearWord(edit: LineEdit, check = opt(BoundaryFunction)) {.jsfunc.} =
+proc clearWord(edit: LineEdit) {.jsfunc.} =
   let oc = edit.cursori
-  edit.prevWord(check)
+  edit.prevWord()
   if oc != edit.cursori:
     edit.news.delete(edit.cursori .. oc - 1)
     edit.invalid = true
 
-proc killWord(edit: LineEdit, check = opt(BoundaryFunction)) {.jsfunc.} =
+proc killWord(edit: LineEdit) {.jsfunc.} =
   if edit.cursori >= edit.news.len:
     return
   let oc = edit.cursori
diff --git a/src/local/container.nim b/src/local/container.nim
index 83ba16f0..fe6e30e5 100644
--- a/src/local/container.nim
+++ b/src/local/container.nim
@@ -655,28 +655,140 @@ proc cursorLineBegin(container: Container) {.jsfunc.} =
 proc cursorLineEnd(container: Container) {.jsfunc.} =
   container.setCursorX(container.currentLineWidth() - 1)
 
-proc cursorNextWord(container: Container) {.jsfunc.} =
+type BreakFunc = proc(r: Rune): BreakCategory {.nimcall.}
+
+proc cursorNextWord(container: Container, breakFunc: BreakFunc) =
   if container.numLines == 0: return
   var r: Rune
   var b = container.currentCursorBytes()
   var x = container.cursorx
+  # meow
+  let currentCat = if b < container.currentLine.len:
+    container.currentLine.runeAt(b).breakFunc()
+  else:
+    BREAK_SPACE
+  if currentCat != BREAK_SPACE:
+    # not in space, skip chars that have the same category
+    while b < container.currentLine.len:
+      let pb = b
+      fastRuneAt(container.currentLine, b, r)
+      if r.breakFunc() != currentCat:
+        b = pb
+        break
+      x += r.twidth(x)
+
+  # skip space
   while b < container.currentLine.len:
     let pb = b
     fastRuneAt(container.currentLine, b, r)
-    if r.breaksWord():
+    if r.breakFunc() != BREAK_SPACE:
       b = pb
       break
     x += r.twidth(x)
 
+  if b < container.currentLine.len:
+    container.setCursorX(x)
+  else:
+    if container.cursory < container.numLines - 1:
+      container.cursorDown()
+      container.cursorLineBegin()
+    else:
+      container.cursorLineEnd()
+
+proc cursorNextWord(container: Container) {.jsfunc.} =
+  container.cursorNextWord(breaksWordCat)
+
+proc cursorNextViWord(container: Container) {.jsfunc.} =
+  container.cursorNextWord(breaksViWordCat)
+
+proc cursorNextBigWord(container: Container) {.jsfunc.} =
+  container.cursorNextWord(breaksBigWordCat)
+
+proc cursorPrevWord(container: Container, breakFunc: BreakFunc) =
+  if container.numLines == 0: return
+  var b = container.currentCursorBytes()
+  var x = container.cursorx
+  if container.currentLine.len > 0:
+    b = min(b, container.currentLine.len - 1)
+    let currentCat = if b >= 0:
+      container.currentLine.runeAt(b).breakFunc()
+    else:
+      BREAK_SPACE
+    if currentCat != BREAK_SPACE:
+      # not in space, skip chars that have the same category
+      while b >= 0:
+        let (r, o) = lastRune(container.currentLine, b)
+        if r.breakFunc() != currentCat:
+          break
+        b -= o
+        x -= r.twidth(x)
+
+    # skip space
+    while b >= 0:
+      let (r, o) = lastRune(container.currentLine, b)
+      if r.breakFunc() != BREAK_SPACE:
+        break
+      b -= o
+      x -= r.twidth(x)
+  else:
+    b = -1
+
+  if b >= 0:
+    container.setCursorX(x)
+  else:
+    if container.cursory > 0:
+      container.cursorUp()
+      container.cursorLineEnd()
+    else:
+      container.cursorLineBegin()
+
+proc cursorPrevWord(container: Container) {.jsfunc.} =
+  container.cursorPrevWord(breaksWordCat)
+
+proc cursorPrevViWord(container: Container) {.jsfunc.} =
+  container.cursorPrevWord(breaksViWordCat)
+
+proc cursorPrevBigWord(container: Container) {.jsfunc.} =
+  container.cursorPrevWord(breaksBigWordCat)
+
+proc cursorWordEnd(container: Container, breakFunc: BreakFunc) =
+  if container.numLines == 0: return
+  var r: Rune
+  var b = container.currentCursorBytes()
+  var x = container.cursorx
+  var px = x
+  # if not in space, move to the right by one
+  if b < container.currentLine.len:
+    let pb = b
+    fastRuneAt(container.currentLine, b, r)
+    if r.breakFunc() == BREAK_SPACE:
+      b = pb
+    else:
+      px = x
+      x += r.twidth(x)
+
+  # skip space
   while b < container.currentLine.len:
     let pb = b
     fastRuneAt(container.currentLine, b, r)
-    if not r.breaksWord():
+    if r.breakFunc() != BREAK_SPACE:
       b = pb
       break
     x += r.twidth(x)
 
   if b < container.currentLine.len:
+    let currentCat = container.currentLine.runeAt(b).breakFunc()
+    while b < container.currentLine.len:
+      let pb = b
+      fastRuneAt(container.currentLine, b, r)
+      if r.breakFunc() != currentCat:
+        b = pb
+        break
+      px = x
+      x += r.twidth(x)
+    x = px
+
+  if b < container.currentLine.len:
     container.setCursorX(x)
   else:
     if container.cursory < container.numLines - 1:
@@ -685,25 +797,50 @@ proc cursorNextWord(container: Container) {.jsfunc.} =
     else:
       container.cursorLineEnd()
 
-proc cursorPrevWord(container: Container) {.jsfunc.} =
+proc cursorWordEnd(container: Container) {.jsfunc.} =
+  container.cursorWordEnd(breaksWordCat)
+
+proc cursorViWordEnd(container: Container) {.jsfunc.} =
+  container.cursorWordEnd(breaksViWordCat)
+
+proc cursorBigWordEnd(container: Container) {.jsfunc.} =
+  container.cursorWordEnd(breaksBigWordCat)
+
+proc cursorWordBegin(container: Container, breakFunc: BreakFunc) =
   if container.numLines == 0: return
   var b = container.currentCursorBytes()
   var x = container.cursorx
+  var px = x
   if container.currentLine.len > 0:
     b = min(b, container.currentLine.len - 1)
-    while b >= 0:
+    if b >= 0:
       let (r, o) = lastRune(container.currentLine, b)
-      if r.breaksWord():
-        break
-      b -= o
-      x -= r.twidth(x)
+      # if not in space, move to the left by one
+      if r.breakFunc() != BREAK_SPACE:
+        b -= o
+        px = x
+        x -= r.twidth(x)
 
+    # skip space
     while b >= 0:
       let (r, o) = lastRune(container.currentLine, b)
-      if not r.breaksWord():
+      if r.breakFunc() != BREAK_SPACE:
         break
       b -= o
       x -= r.twidth(x)
+
+    # move to the last char in the current category
+    if b >= 0:
+      let (r, _) = lastRune(container.currentLine, b)
+      let currentCat = r.breakFunc()
+      while b >= 0:
+        let (r, o) = lastRune(container.currentLine, b)
+        if r.breakFunc() != currentCat:
+          break
+        b -= o
+        px = x
+        x -= r.twidth(x)
+    x = px
   else:
     b = -1
 
@@ -716,6 +853,15 @@ proc cursorPrevWord(container: Container) {.jsfunc.} =
     else:
       container.cursorLineBegin()
 
+proc cursorWordBegin(container: Container) {.jsfunc.} =
+  container.cursorWordBegin(breaksWordCat)
+
+proc cursorViWordBegin(container: Container) {.jsfunc.} =
+  container.cursorWordBegin(breaksViWordCat)
+
+proc cursorBigWordBegin(container: Container) {.jsfunc.} =
+  container.cursorWordBegin(breaksBigWordCat)
+
 proc pageDown(container: Container, n = 1) {.jsfunc.} =
   container.setFromY(container.fromy + container.height * n)
   container.setCursorY(container.cursory + container.height * n)
diff --git a/src/utils/strwidth.nim b/src/utils/strwidth.nim
index 32e8ca7f..5ce2ba52 100644
--- a/src/utils/strwidth.nim
+++ b/src/utils/strwidth.nim
@@ -2,8 +2,6 @@ import std/strutils
 import std/unicode
 
 import utils/proptable
-import js/error
-import types/opt
 import utils/charcategory
 import utils/map
 
@@ -101,15 +99,25 @@ func padToWidth*(str: string, size: int, schar = '$'): string =
 func isDigitAscii(r: Rune): bool =
   return uint32(r) < 128 and char(r) in AsciiDigit
 
+type BreakCategory* = enum
+  BREAK_ALPHA, BREAK_SPACE, BREAK_SYMBOL
+
 func breaksWord*(r: Rune): bool =
   return not (r.isDigitAscii() or r.width() == 0 or r.isAlpha())
 
-type BoundaryFunction* = proc(x: Rune): JSResult[bool]
-
-proc breaksWord*(r: Rune, check: Opt[BoundaryFunction]): bool =
-  if check.isSome:
-    let f = check.get()
-    let v = f(r)
-    if v.isSome: #TODO report error?
-      return v.get()
-  return r.breaksWord()
+func breaksViWordCat*(r: Rune): BreakCategory =
+  if r.isWhiteSpace():
+    return BREAK_SPACE
+  elif r.breaksWord():
+    return BREAK_SYMBOL
+  return BREAK_ALPHA
+
+func breaksWordCat*(r: Rune): BreakCategory =
+  if not r.breaksWord():
+    return BREAK_ALPHA
+  return BREAK_SPACE
+
+func breaksBigWordCat*(r: Rune): BreakCategory =
+  if not r.isWhiteSpace():
+    return BREAK_ALPHA
+  return BREAK_SPACE