man: rewrite in Nim

Depending on Perl just for this is silly. Now we use libregexp for filtering basically the same things as w3mman2html did. This required another patch to QuickJS to avoid pulling in the entire JS engine, but in return, we can now run regexes without a dummy JS context global variable. Also, man.nim now tries to find a man command on the system even if it's not in /usr/bin/man.
author: bptato <nincsnevem662@gmail.com> 2024-03-13 15:21:05 +0100
committer: bptato <nincsnevem662@gmail.com> 2024-03-13 15:21:05 +0100
commit: 9ee1dd6e5167d9c2054dee5f9241e3bba286706f (patch)
tree: 4c36afa6c45f33f581206583fde23ca389a04a23
parent: 73909b09756a3ae2c987b3ef05d02b49c4f37eaa (diff)
download: chawan-9ee1dd6e5167d9c2054dee5f9241e3bba286706f.tar.gz
10 files changed, 349 insertions, 312 deletions
diff --git a/Makefile b/Makefile
index 2add0049..fa0b6ddc 100644
--- a/Makefile
+++ b/Makefile
@@ -38,6 +38,8 @@ else ifeq ($(TARGET),release1)
 FLAGS += -d:release --debugger:native
 endif
 
+QJSOBJ = $(OBJDIR)/quickjs
+
 .PHONY: all
 all: $(OUTDIR_BIN)/cha $(OUTDIR_BIN)/mancha $(OUTDIR_CGI_BIN)/http \
 	$(OUTDIR_CGI_BIN)/gmifetch $(OUTDIR_LIBEXEC)/gmi2html \
@@ -107,9 +109,13 @@ $(OUTDIR_CGI_BIN)/cha-finger: adapter/protocol/cha-finger
 	@mkdir -p $(OUTDIR_CGI_BIN)
 	cp adapter/protocol/cha-finger $(OUTDIR_CGI_BIN)
 
-$(OUTDIR_CGI_BIN)/man: adapter/protocol/man
+$(OUTDIR_CGI_BIN)/man: adapter/protocol/man.nim $(QJSOBJ)/libregexp.o \
+		$(QJSOBJ)/libunicode.o $(QJSOBJ)/cutils.o src/js/regex.nim \
+		src/bindings/libregexp.nim src/types/opt.nim src/utils/twtstr.nim
 	@mkdir -p $(OUTDIR_CGI_BIN)
-	cp adapter/protocol/man $(OUTDIR_CGI_BIN)
+	$(NIMC) $(FLAGS) --nimcache:"$(OBJDIR)/$(TARGET)/man" \
+		--passL:"$(QJSOBJ)/libregexp.o $(QJSOBJ)/cutils.o $(QJSOBJ)/libunicode.o" \
+		-o:"$(OUTDIR_CGI_BIN)/man" adapter/protocol/man.nim
 
 $(OUTDIR_CGI_BIN)/spartan: adapter/protocol/spartan
 	@mkdir -p $(OUTDIR_CGI_BIN)
@@ -164,7 +170,6 @@ $(OUTDIR_LIBEXEC)/urlenc: adapter/tools/urlenc.nim src/utils/twtstr.nim
 		-o:"$(OUTDIR_LIBEXEC)/urlenc" adapter/tools/urlenc.nim
 
 CFLAGS = -fwrapv -g -Wall -O2 -DCONFIG_VERSION=\"$(shell cat lib/quickjs/VERSION)\"
-QJSOBJ = $(OBJDIR)/quickjs
 
 # Dependencies
 $(QJSOBJ)/cutils.o: lib/quickjs/cutils.h
diff --git a/README.md b/README.md
index 4abb7dd7..f479c7dd 100644
--- a/README.md
+++ b/README.md
@@ -35,14 +35,13 @@ supported yet.)
 6. If you want manpages, run `make manpage`. (This requires pandoc to be
    installed.)
 7. Finally, install using `make install` (e.g. `sudo make install`).
-8. (Optional): install Perl so that the man page viewer (`mancha`) works too.
 
 Then, try:
 
 ```bash
 $ cha -V # open in visual mode for a list of default keybindings
 $ cha example.org # open your favorite website directly from the shell
-$ mancha cha # read the cha(1) man page using `mancha' (requires Perl)
+$ mancha cha # read the cha(1) man page using `mancha'
 ```
 
 ## Features
@@ -66,7 +65,7 @@ Currently implemented features are:
 * supports several protocols: HTTP(S), FTP, Gopher, Gemini, Finger, etc.
 * can load user-defined protocols/file formats using [local CGI](doc/localcgi.md),
   [urimethodmap](doc/urimethodmap.md) and [mailcap](doc/mailcap.md)
-* man page viewer (based on w3mman)
+* man page viewer (like w3mman)
 * mouse support
 
 ...with a lot more [planned](todo).
diff --git a/adapter/protocol/man b/adapter/protocol/man
deleted file mode 100755
index c322a6d2..00000000
--- a/adapter/protocol/man
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env perl
-#
-# From w3m.
-#
-# Note that this script has licensing terms different from those of Chawan.
-# See /res/license.html#w3m for details.
-#
-# Usage: install perl, then look up man pages using:
-#
-# $ cha man:cha # view in any manual (man cha)
-# $ cha 'man:cha(1)' # view in a specific manual (man -s 1 cha)
-# $ cha man-k:cha # search in any manual (man -k cha)
-# $ cha 'man-k:cha(1)' # search in a specific manual (man -k cha -s 1)
-#
-# You may also use the `mancha` wrapper.
-
-$MAN = $ENV{'MANCHA_MAN'} || '/usr/bin/man';
-$QUERY = $ENV{'QUERY_STRING'} || $ARGV[0];
-$SCRIPT_NAME = $ENV{'SCRIPT_NAME'} || $0;
-$CGI = "man:";
-$CGI2 = "file:";
-# $CGI2 = "file:///\$LIB/hlink.cgi?";
-$SQUEEZE = 1;
-$ENV{'PAGER'} = 'cat';
-
-if ($QUERY =~ /^man-k:/) {
-  $QUERY =~ s/^man-k://;
-  my $keyword = &form_decode($QUERY);
-  my $sectionopt = "";
-  if ($keyword =~ s/(.*)\((\w+)\)$//) {
-    $keyword = $1;
-    $sectionopt = "-s $2 ";
-  }
-  my $k = &html_quote($keyword);
-  print <<EOF;
-Content-Type: text/html
-
-<html>
-<head><title>man $sectionopt-k $k</title></head>
-<body>
-<h2>man $sectionopt-k <b>$k</b></h2>
-<ul>
-EOF
-    $keyword =~ s:([^-\w\200-\377.,])::g;
-    open(F, "$MAN $sectionopt -k $keyword 2> /dev/null |");
-    while(<F>) {
-      chop;
-      $_ = &html_quote($_);
-      s/(\s+-.*)$//;
-      $title = $1;
-      s@(\w[\w.\-]*(\s*\,\s*\w[\w.\-]*)*)\s*(\([\dn]\w*\))@&keyword_ref($1, $3)@ge;
-      print "<li>$_$title\n";
-    }
-    close(F);
-    print <<EOF;
-</ul>
-</body>
-</html>
-EOF
-  exit;
-} elsif ($QUERY =~ /^man-l:/) {
-  $QUERY =~ s/^man-l://;
-  $file = &form_decode($QUERY);
-  open(F, "GROFF_NO_SGR=1 MAN_KEEP_FORMATTING=1 $MAN $file 2> /dev/null |");
-} else {
-  $QUERY =~ s/^man://;
-  my $man = &form_decode($QUERY);
-  if ($man =~ s/\((\w+)\)$//) {
-    $section = $1;
-    $man_section = "$man($1)";
-  } else {
-    $section = "";
-    $man_section = "$man";
-  }
-
-  $section =~ s:([^-\w\200-\377.,])::g;
-  $man =~ s:([^-\w\200-\377.,])::g;
-  open(F, "GROFF_NO_SGR=1 MAN_KEEP_FORMATTING=1 $MAN $section $man 2> /dev/null |");
-}
-
-$ok = 0;
-undef $header;
-$blank = -1;
-$cmd = "";
-$prev = "";
-while(<F>) {
-  if (! defined($header)) {
-    /^\s*$/ && next;
-    $header = $_;
-    $space = $header;
-    chop $space;
-    $space =~ s/\S.*//;
-  } elsif ($_ eq $header) {		# delete header
-    $blank = -1;
-    next;
-  } elsif (!/\010/ && /^$space[\w\200-\377].*\s\S/o) {	# delete footer
-    $blank = -1;
-    next;
-  }
-  if ($SQUEEZE) {
-    if (/^\s*$/) {
-      $blank || $blank++;
-      next;
-    } elsif ($blank) {
-      $blank > 0 && print "\n";
-      $blank = 0;
-    }
-  }
-
-  s/\&/\&amp;/g;
-  s/\</\&lt;/g;
-  s/\>/\&gt;/g;
-  # non ASCII UTF-8 codepoint
-  my $utf8="[\300-\337][\200-\277]|[\340-\357][\200-\277]{2}|[\360-\367][\200-\277]{3}|[\370-\373][\200-\277]{4}|[\374\375][\200-\277]{5}";
-
-  s@($utf8)(\010\1)+@<b>$1</b>@g;
-  s@(\&\w+;|.)(\010\1)+@<b>$1</b>@g;
-  s@_\010((\<b\>)?($utf8)(\</b\>)?)@<u>$1</u>@g;
-  s@_\010((\<b\>)?(\&\w+\;|.)(\</b\>)?)@<u>$1</u>@g;
-  s@((\<b\>)?($utf8)(\</b\>)?)\010_@<u>$1</u>@g;
-  s@((\<b\>)?(\&\w+\;|.)(\</b\>)?)\010_@<u>$1</u>@g;
-  s@.\010(.)@$1@g;
-
-  s@\</b\>\</u\>\<b\>_\</b\>\<u\>\<b\>@_@g;
-  s@\</u\>\<b\>_\</b\>\<u\>@_@g;
-  s@\</u\>\<u\>@@g;
-  s@\</b\>\<b\>@@g;
-
-  if (! $ok) {
-    /^No/ && last;
-    print <<EOF;
-Content-Type: text/html
-
-<html>
-<head><title>man $man_section</title></head>
-<body>
-<pre>
-EOF
-    print;
-    $ok = 1;
-    next;
-  }
-
-  s@(https?|ftp)://[\w.\-/~]+[\w/]@<a href="$&">$&</a>@g;
-  s@\b(mailto:|)(\w[\w.\-]*\@\w[\w.\-]*\.[\w.\-]*\w)@<a href="mailto:$2">$1$2</a>@g;
-  s@(\W)(\~?/[\w.][\w.\-/~]*)@$1 . &file_ref($2)@ge;
-  s@(include(<\/?[bu]\>|\s)*\&lt;)([\w.\-/]+)@$1 . &include_ref($3)@ge;
-  if ($prev && m@^\s*(\<[bu]\>)*(\w[\w.\-]*)(\</[bu]\>)*(\([\dm]\w*\))@) {
-    $cmd .= "$2$4";
-    $prev =~ s@(\w[\w.\-]*-)((\</[bu]\>)*\s*)$@<a href="$CGI$cmd">$1</a>$2@;
-    print $prev;
-    $prev = '';
-    s@^(\s*(\<[bu]\>)*)(\w[\w.\-]*)@@;
-    print "$1<a href=\"$CGI$cmd\">$3</a>";
-  } elsif ($prev) {
-    print $prev;
-    $prev = '';
-  }
-  s@(\w[\w.\-]*)((\</[bu]\>)*)(\([\dm]\w*\))@<a href="$CGI$1$4">$1</a>$2$4@g;
-  if (m@(\w[\w.\-]*)-(\</[bu]\>)*\s*$@) {
-    $cmd = $1;
-    $prev = $_;
-    next;
-  }
-  print;
-}
-if ($prev) {
-  print $prev;
-}
-close(F);
-if (! $ok) {
-  if ($file) {
-    print "Cha-Control: ConnectionError 4 file $file not found";
-  } else {
-    print "Cha-Control: ConnectionError 4 no manual entry for $man_section";
-  }
-  exit 1;
-}
-print <<EOF;
-</pre>
-</body>
-</html>
-EOF
-
-sub is_command {
-  local($_) = @_;
-  local($p);
-
-  (! -d && -x) || return 0;
-  if (! %PATH) {
-    for $p (split(":", $ENV{'PATH'})) {
-      $p =~ s@/+$@@;
-      $PATH{$p} = 1;
-    }
-  }
-  s@/[^/]*$@@;
-  return defined($PATH{$_});
-}
-
-sub file_ref {
-  local($_) = @_;
-
-  if (&is_command($_)) {
-    ($man = $_) =~ s@.*/@@;
-    return "<a href=\"$CGI$man\">$_</a>";
-  }
-  if (/^\~/ || -f || -d) {
-    ($file = $_) =~ s/^\~/$ENV{"HOME"}/;
-    return "<a href=\"$CGI2$file\">$_</a>";
-  }
-  return $_;
-}
-
-sub include_ref {
-  local($_) = @_;
-  local($d);
-
-  for $d (
-	"/usr/include",
-	"/usr/local/include",
-	"/usr/X11R6/include",
-	"/usr/X11/include",
-	"/usr/X/include",
-	"/usr/include/X11"
-  ) {
-    -f "$d/$_" && return "<a href=\"$CGI2$d/$_\">$_</a>";
-  }
-  return $_;
-}
-
-sub keyword_ref {
-  local($_, $s) = @_;
-  local(@a) = ();
-
-  for (split(/\s*,\s*/)) {
-    push(@a, "<a href=\"$CGI$_$s\">$_</a>");
-  }
-  return join(", ", @a) . $s;
-}
-
-sub html_quote {
-  local($_) = @_;
-  local(%QUOTE) = (
-    '<', '&lt;',
-    '>', '&gt;',
-    '&', '&amp;',
-    '"', '&quot;',
-  );
-  s/[<>&"]/$QUOTE{$&}/g;
-  return $_;
-}
-
-sub form_decode {
-  local($_) = @_;
-  s/\+/ /g;
-  s/%([\da-f][\da-f])/pack('c', hex($1))/egi;
-  return $_;
-}
diff --git a/adapter/protocol/man.nim b/adapter/protocol/man.nim
new file mode 100644
index 00000000..4fc418d4
--- /dev/null
+++ b/adapter/protocol/man.nim
@@ -0,0 +1,308 @@
+import std/os
+import std/posix
+import std/strutils
+import std/unicode
+
+import bindings/libregexp
+import js/regex
+import types/opt
+import utils/twtstr
+
+proc lre_check_stack_overflow(opaque: pointer; alloca_size: csize_t): cint
+    {.exportc.} =
+  return 0
+
+proc parseSection(query: string): tuple[page, section: string] =
+  var section = ""
+  if query.len > 0 and query[^1] == ')':
+    for i in countdown(query.high, 0):
+      if query[i] == '(':
+        section = query.substr(i + 1, query.high - 1)
+        break
+  if section != "":
+    return (query.substr(0, query.high - 2 - section.len), section)
+  return (query, "")
+
+func processBackspace(line: string): string =
+  var s = ""
+  var i = 0
+  var thiscs = 0 .. -1
+  var bspace = false
+  var inU = false
+  var inB = false
+  var pendingInU = false
+  var pendingInB = false
+  template flushChar =
+    if pendingInU != inU:
+      s &= (if inU: "</u>" else: "<u>")
+      inU = pendingInU
+    if pendingInB != inB:
+      s &= (if inB: "</b>" else: "<b>")
+      inB = pendingInB
+    if thiscs.len > 0:
+      let cs = case line[thiscs.a]
+      of '&': "&amp;"
+      of '<': "&lt;"
+      of '>': "&gt;"
+      else: line.substr(thiscs.a, thiscs.b)
+      s &= cs
+    thiscs = i ..< i + n
+    pendingInU = false
+    pendingInB = false
+  while i < line.len:
+    # this is the same "sometimes works" algorithm as in ansi2html
+    if line[i] == '\b' and thiscs.len > 0:
+      bspace = true
+      inc i
+      continue
+    let n = line.runeLenAt(i)
+    if thiscs.len == 0:
+      thiscs = i ..< i + n
+      i += n
+      continue
+    if bspace and thiscs.len > 0:
+      if line[i] == '_' and not pendingInU and line[thiscs.a] != '_':
+        pendingInU = true
+      elif line[thiscs.a] == '_' and not pendingInU and line[i] != '_':
+        # underscore comes first; set thiscs to the current charseq
+        thiscs = i ..< i + n
+        pendingInU = true
+      elif line[i] == '_' and line[thiscs.a] == '_':
+        if pendingInB:
+          pendingInU = true
+        else:
+          pendingInB = true
+      elif not pendingInB:
+        pendingInB = true
+      bspace = false
+    else:
+      flushChar
+    i += n
+  let n = 0
+  flushChar
+  pendingInU = false
+  pendingInB = false
+  flushChar
+  return s
+
+proc isCommand(paths: seq[string]; s: string): bool =
+  for p in paths:
+    if fileExists(p & s):
+      return true
+  false
+
+iterator myCaptures(captures: var seq[RegexCapture]; target: int;
+    offset: var int): RegexCapture =
+  for cap in captures.mitems:
+    if cap.i == target:
+      cap.s += offset
+      cap.e += offset
+      yield cap
+
+proc processManpage(file: File) =
+  template re(s: static string): Regex =
+    let r = s.compileRegex({LRE_FLAG_GLOBAL, LRE_FLAG_UTF16})
+    if r.isNone:
+      stdout.write(s & ": " & r.error)
+      return
+    r.get
+  # regexes partially from w3mman2html
+  let linkRe = re"(https?|ftp)://[\w/~.-]+"
+  let mailRe = re"(mailto:|)(\w[\w.-]*@[\w-]+\.[\w.-]*)"
+  let fileRe = re"(file:)?[/~][\w/~.-]+[\w/]"
+  let includeRe = re"#include(</?[bu]>|\s)*&lt;([\w./-]+)"
+  let manRe = re"(</?[bu]>)*(\w[\w.-]*)(</?[bu]>)*(\([0-9nlx]\w*\))"
+  var paths: seq[string] = @[]
+  for p in getEnv("PATH").split(':'):
+    var i = p.high
+    while i > 0 and p[i] == '/':
+      dec i
+    paths.add(p.substr(0, i) & "/")
+  var line: string
+  var wasBlank = false
+  while file.readLine(line):
+    if line == "":
+      if wasBlank:
+        continue
+      wasBlank = true
+    else:
+      wasBlank = false
+    var line = line.processBackspace()
+    if (var res = linkRe.exec(line); res.success):
+      var offset = 0
+      for cap in res.captures.myCaptures(0, offset):
+        let s = line[cap.s..<cap.e]
+        let link = "<a href='" & s & "'>" & s & "</a>"
+        line[cap.s..<cap.e] = link
+        offset += link.len - (cap.e - cap.s)
+    if (var res = mailRe.exec(line); res.success):
+      var offset = 0
+      for cap in res.captures.myCaptures(2, offset):
+        let s = line[cap.s..<cap.e]
+        let link = "<a href='mailto:" & s & "'>" & s & "</a>"
+        line[cap.s..<cap.e] = link
+        offset += link.len - (cap.e - cap.s)
+    if (var res = fileRe.exec(line); res.success):
+      var offset = 0
+      for cap in res.captures.myCaptures(0, offset):
+        let s = line[cap.s..<cap.e]
+        let target = s.expandTilde()
+        if not fileExists(target) and not symlinkExists(target) and
+            not dirExists(target):
+          continue
+        let link = if paths.isCommand(target):
+          "<a href='man:" & target.afterLast('/') & "'>" & s & "</a>"
+        else:
+          "<a href='file:" & target & "'>" & s & "</a>"
+        line[cap.s..<cap.e] = link
+        offset += link.len - (cap.e - cap.s)
+    if (var res = includeRe.exec(line); res.success):
+      var offset = 0
+      for cap in res.captures.myCaptures(2, offset):
+        let s = line[cap.s..<cap.e]
+        const includePaths = [
+          "/usr/include/",
+          "/usr/local/include/",
+          "/usr/X11R6/include/",
+          "/usr/X11/include/",
+          "/usr/X/include/",
+          "/usr/include/X11/"
+        ]
+        for path in includePaths:
+          let file = path & s
+          if fileExists(file):
+            let link = "<a href='file:" & file & "'>" & s & "</a>"
+            line[cap.s..<cap.e] = link
+            offset += link.len - (cap.e - cap.s)
+            break
+    if (var res = manRe.exec(line); res.success):
+      var offset = 0
+      for j, cap in res.captures.mpairs:
+        if cap.i == 0:
+          cap.s += offset
+          cap.e += offset
+          var manCap = res.captures[j + 2]
+          manCap.s += offset
+          manCap.e += offset
+          var secCap = res.captures[j + 4]
+          secCap.s += offset
+          secCap.e += offset
+          let man = line[manCap.s..<manCap.e]
+          let cat = man & line[secCap.s..<secCap.e]
+          let link = "<a href='man:" & cat & "'>" & man & "</a>"
+          line[manCap.s..<manCap.e] = link
+          offset += link.len - (manCap.e - manCap.s)
+    stdout.write(line & "\n")
+
+proc doMan(man, keyword, section: string) =
+  let sectionOpt = if section == "": "" else: " -s " & section
+  let cmd = "GROFF_NO_SGR=1 MAN_KEEP_FORMATTING=1 " &
+    man & sectionOpt & " " & keyword & " 2>&1"
+  let file = popen(cstring(cmd), "r")
+  if file == nil:
+    stdout.write("Cha-Control: ConnectionError 1 failed to run " & cmd)
+    return
+  var line0: string
+  if not file.readLine(line0) or file.endOfFile():
+    discard file.pclose()
+    if line0.startsWith("man: "):
+      line0 = line0.after(' ')
+    stdout.write("Cha-Control: ConnectionError 4 " & line0)
+    return
+  var manword = keyword
+  if section != "":
+    manword &= '(' & section & ')'
+  stdout.write("""Content-Type: text/html
+
+<title>man """ & manword & """</title>
+<pre>""" & line0 & "\n")
+  file.processManpage()
+  discard file.pclose()
+
+proc doLocal(man, keyword: string) =
+  let cmd = "GROFF_NO_SGR=1 MAN_KEEP_FORMATTING=1 " &
+    man & " -l " & keyword & " 2>/dev/null"
+  let file = popen(cstring(cmd), "r")
+  if file == nil:
+    stdout.write("Cha-Control: ConnectionError 1 failed to run " & cmd)
+    return
+  stdout.write("""Content-Type: text/html
+
+<title>man -l """ & keyword & """</title>
+<h1>man -l <b>""" & keyword & """</b></h1>
+<pre>""")
+  file.processManpage()
+  discard file.pclose()
+
+proc doKeyword(man, keyword, section: string) =
+  let sectionOpt = if section == "": "" else: " -s " & section
+  let cmd = man & sectionOpt & " -k " & keyword & " 2>/dev/null"
+  let file = popen(cstring(cmd), "r")
+  if file == nil:
+    stdout.write("Cha-Control: ConnectionError 1 failed to run " & cmd)
+    return
+  stdout.write("Content-Type: text/html\n\n")
+  stdout.write("<title>man" & sectionOpt & " -k " & keyword & "</title>\n")
+  stdout.write("<h1>man" & sectionOpt & " -k <b>" & keyword & "</b></h1>\n")
+  stdout.write("<ul>")
+  var line: string
+  template die =
+    stdout.write("Error parsing line! " & line)
+    return
+  while file.readLine(line):
+    if line.len == 0:
+      stdout.write("\n")
+      continue
+    # collect titles
+    var titles: seq[string] = @[]
+    var i = 0
+    while true:
+      let title = line.until({'(', ','}, i)
+      i += title.len
+      titles.add(title)
+      if i >= line.len or line[i] == '(':
+        break
+      i = line.skipBlanks(i + 1)
+    # collect section
+    if line[i] != '(': die
+    let sectionText = line.substr(i, line.find(')', i))
+    i += sectionText.len
+    # create line
+    var section = sectionText.until(',') # for multiple sections, take first
+    if section[^1] != ')':
+      section &= ')'
+    var s = "<li>"
+    for i, title in titles:
+      let title = title.htmlEscape()
+      s &= "<a href='man:" & title & section & "'>" & title & "</a>"
+      if i < titles.high:
+        s &= ", "
+    s &= sectionText
+    s &= line.substr(i)
+    s &= "\n"
+    stdout.write(s)
+
+proc main() =
+  var man = getEnv("MANCHA_MAN")
+  if man == "":
+    man = "/usr/bin/man"
+    if not fileExists(man):
+      man = "/bin/man"
+      if not fileExists(man):
+        man = "/usr/local/bin/man"
+        if not fileExists(man):
+          man = "/usr/bin/env man"
+    doAssert getAppFilename() != man # don't accidentally fork bomb ourselves
+  let query = percentDecode(getEnv("QUERY_STRING"))
+  if query.startsWith("man:"):
+    let (keyword, section) = parseSection(query.after(':'))
+    doMan(man, keyword, section)
+  elif query.startsWith("man-k:"):
+    let (keyword, section) = parseSection(query.after(':'))
+    doKeyword(man, keyword, section)
+  elif query.startsWith("man-l:"):
+    doLocal(man, query.after(':'))
+  else:
+    stdout.write("Cha-Control: ConnectionError 1 invalid invocation")
+
+main()
diff --git a/doc/cha.1 b/doc/cha.1
index 5af4319d..b5943465 100644
--- a/doc/cha.1
+++ b/doc/cha.1
@@ -1,4 +1,4 @@
-.TH CHA 1
+.TH cha 1
 .SH NAME
 cha - the Chawan text-mode browser
 .SH SYNOPSIS
diff --git a/doc/protocols.md b/doc/protocols.md
index 2f1c7afd..84a8dde1 100644
--- a/doc/protocols.md
+++ b/doc/protocols.md
@@ -125,8 +125,8 @@ and `about:license`.
 `man:`, `man-k:` and `man-l:` are wrappers around the commands `man`, `man -k`
 and `man -l`. These look up man pages using `/usr/bin/man` and turn on-page
 references into links. A wrapper command `mancha` also exists; this has an
-interface similar to `man`. Note that Perl is required for these protocols
-to work.
+interface similar to `man`. Note: this used to be based on w3mman2html.cgi, but
+it has been rewritten in Nim (and therefore no longer depends on Perl either).
 
 `data:` decodes a data URL as defined in RFC 2397.
 
diff --git a/lib/quickjs/quickjs.c b/lib/quickjs/quickjs.c
index a5e51d19..9c5e948c 100644
--- a/lib/quickjs/quickjs.c
+++ b/lib/quickjs/quickjs.c
@@ -43960,16 +43960,20 @@ fail:
 
 BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size)
 {
+    if (!opaque)
+        return 0;
     JSContext *ctx = opaque;
     return js_check_stack_overflow(ctx->rt, alloca_size);
 }
 
+#if 0
 void *lre_realloc(void *opaque, void *ptr, size_t size)
 {
     JSContext *ctx = opaque;
     /* No JS exception is raised here */
     return js_realloc_rt(ctx->rt, ptr, size);
 }
+#endif
 
 static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
                               int argc, JSValueConst *argv)
diff --git a/res/license.html b/res/license.html
index e58a4876..405d6e2d 100644
--- a/res/license.html
+++ b/res/license.html
@@ -23,7 +23,6 @@ Table of contents:
 <LI><A HREF="#chawan">Chawan</A>
 <LI><A HREF="#quickjs">QuickJS</A>
 <LI><A HREF="#punycode">Punycode library</A>
-<LI><A HREF="#w3mman">w3mman</A>
 </UL>
 <H2 id=chawan>Chawan</H2>
 <PRE>
@@ -115,22 +114,5 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 </PRE>
 
-<H2 id=w3mman>w3mman</H2>
-Chawan uses a customized version of w3mman2html.cgi from w3m for viewing man
-pages. This script is distributed under the following terms:
-<PRE>
-(C) Copyright 1994-2002 by Akinori Ito
-(C) Copyright 2002-2011 by Akinori Ito, Hironori Sakamoto, Fumitoshi Ukai
-
-Use, modification and redistribution of this software is hereby granted,
-provided that this entire copyright notice is included on any copies of
-this software and applications and derivations thereof.
-
-This software is provided on an "as is" basis, without warranty of any
-kind, either expressed or implied, as to any matter including, but not
-limited to warranty of fitness of purpose, or merchantability, or
-results obtained from use of this software.
-</PRE>
-
 </BODY>
 </HTML>
diff --git a/src/js/regex.nim b/src/js/regex.nim
index fbbaf55f..e189b430 100644
--- a/src/js/regex.nim
+++ b/src/js/regex.nim
@@ -2,7 +2,6 @@
 import std/unicode
 
 import bindings/libregexp
-import bindings/quickjs
 import types/opt
 import utils/twtstr
 
@@ -13,35 +12,37 @@ type
     bytecode: seq[uint8]
     buf: string
 
+  RegexCapture* = tuple # start, end, index
+    s, e: int
+    i: int32
+
   RegexResult* = object
     success*: bool
-    captures*: seq[tuple[s, e: int]] # start, end
+    captures*: seq[RegexCapture]
 
   RegexReplace* = object
     regex: Regex
     rule: string
     global: bool
 
-var dummyRuntime = JS_NewRuntime()
-var dummyContext = JS_NewContextRaw(dummyRuntime)
-
 func `$`*(regex: Regex): string =
   regex.buf
 
-proc compileRegex*(buf: string, flags: LREFlags = {}): Result[Regex, string] =
-  var error_msg_size = 64
-  var error_msg = newString(error_msg_size)
-  prepareMutation(error_msg)
+# this is hardcoded into quickjs, so we must override it here.
+proc lre_realloc(opaque, p: pointer; size: csize_t): pointer {.exportc.} =
+  return realloc(p, size)
+
+proc compileRegex*(buf: string; flags: LREFlags = {}): Result[Regex, string] =
+  var errorMsg = newString(64)
   var plen: cint
-  let bytecode = lre_compile(addr plen, cstring(error_msg),
-    cint(error_msg_size), cstring(buf), csize_t(buf.len), flags.toCInt,
-    dummyContext)
+  let bytecode = lre_compile(addr plen, cstring(errorMsg), cint(errorMsg.len),
+    cstring(buf), csize_t(buf.len), flags.toCInt, nil)
   if bytecode == nil:
-    return err(error_msg.until('\0')) # Failed to compile.
+    return err(errorMsg.until('\0')) # Failed to compile.
   assert plen > 0
   var bcseq = newSeqUninitialized[uint8](plen)
   copyMem(addr bcseq[0], bytecode, plen)
-  dummyRuntime.js_free_rt(bytecode)
+  dealloc(bytecode)
   let regex = Regex(
     buf: buf,
     bytecode: bcseq
@@ -83,7 +84,7 @@ proc compileMatchRegex*(buf: string): Result[Regex, string] =
   buf2 &= "$"
   return compileRegex(buf2)
 
-proc compileSearchRegex*(str: string, defaultFlags: LREFlags):
+proc compileSearchRegex*(str: string; defaultFlags: LREFlags):
     Result[Regex, string] =
   # Emulate vim's \c/\C: override defaultFlags if one is found, then remove it
   # from str.
@@ -108,13 +109,13 @@ proc compileSearchRegex*(str: string, defaultFlags: LREFlags):
   flags.incl(LRE_FLAG_GLOBAL) # for easy backwards matching
   return compileRegex(s, flags)
 
-proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): RegexResult =
+proc exec*(regex: Regex; str: string; start = 0; length = -1; nocaps = false):
+    RegexResult =
   let length = if length == -1:
     str.len
   else:
     length
-  assert 0 <= start and start <= length
-
+  assert start in 0 .. length
   let bytecode = unsafeAddr regex.bytecode[0]
   let captureCount = lre_get_capture_count(bytecode)
   var capture: ptr UncheckedArray[int] = nil
@@ -126,7 +127,7 @@ proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): R
   var start = start
   while true:
     let ret = lre_exec(cast[ptr ptr uint8](capture), bytecode,
-      cast[ptr uint8](cstr), cint(start), cint(length), cint(3), dummyContext)
+      cast[ptr uint8](cstr), cint(start), cint(length), cint(3), nil)
     if ret != 1: #TODO error handling? (-1)
       break
     result.success = true
@@ -138,7 +139,7 @@ proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): R
     for i in 0 ..< captureCount:
       let s = capture[i * 2] - cstrAddress
       let e = capture[i * 2 + 1] - cstrAddress
-      result.captures.add((s, e))
+      result.captures.add((s, e, i))
     if LRE_FLAG_GLOBAL notin flags:
       break
     if start >= str.len:
@@ -148,5 +149,5 @@ proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): R
   if captureCount > 0:
     dealloc(capture)
 
-proc match*(regex: Regex, str: string, start = 0, length = str.len): bool =
+proc match*(regex: Regex; str: string; start = 0; length = str.len): bool =
   return regex.exec(str, start, length, nocaps = true).success
diff --git a/todo b/todo
index 6205e475..0dc4736d 100644
--- a/todo
+++ b/todo
@@ -80,12 +80,8 @@ images:
 - incremental decoding (maybe implement streams first?)
 - separate image decoder process? or just run on a different thread?
 man:
-- mancha:
-	* detect man directory automatically
-	* eventually rewrite in Nim
-- man pages:
-	* add a DOM -> man page converter so that we do not depend on pandoc
-	  for man page conversion
+* add a DOM -> man page converter so that we do not depend on pandoc
+  for man page conversion
 gmifetch:
 - rewrite in Nim
 etc:
author	bptato <nincsnevem662@gmail.com>	2024-03-13 15:21:05 +0100
committer	bptato <nincsnevem662@gmail.com>	2024-03-13 15:21:05 +0100
commit	9ee1dd6e5167d9c2054dee5f9241e3bba286706f (patch)
tree	4c36afa6c45f33f581206583fde23ca389a04a23
parent	73909b09756a3ae2c987b3ef05d02b49c4f37eaa (diff)
download	chawan-9ee1dd6e5167d9c2054dee5f9241e3bba286706f.tar.gz