diff options
author | bptato <nincsnevem662@gmail.com> | 2024-03-13 15:21:05 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-03-13 15:21:05 +0100 |
commit | 9ee1dd6e5167d9c2054dee5f9241e3bba286706f (patch) | |
tree | 4c36afa6c45f33f581206583fde23ca389a04a23 | |
parent | 73909b09756a3ae2c987b3ef05d02b49c4f37eaa (diff) | |
download | chawan-9ee1dd6e5167d9c2054dee5f9241e3bba286706f.tar.gz |
man: rewrite in Nim
Depending on Perl just for this is silly. Now we use libregexp for filtering basically the same things as w3mman2html did. This required another patch to QuickJS to avoid pulling in the entire JS engine, but in return, we can now run regexes without a dummy JS context global variable. Also, man.nim now tries to find a man command on the system even if it's not in /usr/bin/man.
-rw-r--r-- | Makefile | 11 | ||||
-rw-r--r-- | README.md | 5 | ||||
-rwxr-xr-x | adapter/protocol/man | 258 | ||||
-rw-r--r-- | adapter/protocol/man.nim | 308 | ||||
-rw-r--r-- | doc/cha.1 | 2 | ||||
-rw-r--r-- | doc/protocols.md | 4 | ||||
-rw-r--r-- | lib/quickjs/quickjs.c | 4 | ||||
-rw-r--r-- | res/license.html | 18 | ||||
-rw-r--r-- | src/js/regex.nim | 43 | ||||
-rw-r--r-- | todo | 8 |
10 files changed, 349 insertions, 312 deletions
diff --git a/Makefile b/Makefile index 2add0049..fa0b6ddc 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,8 @@ else ifeq ($(TARGET),release1) FLAGS += -d:release --debugger:native endif +QJSOBJ = $(OBJDIR)/quickjs + .PHONY: all all: $(OUTDIR_BIN)/cha $(OUTDIR_BIN)/mancha $(OUTDIR_CGI_BIN)/http \ $(OUTDIR_CGI_BIN)/gmifetch $(OUTDIR_LIBEXEC)/gmi2html \ @@ -107,9 +109,13 @@ $(OUTDIR_CGI_BIN)/cha-finger: adapter/protocol/cha-finger @mkdir -p $(OUTDIR_CGI_BIN) cp adapter/protocol/cha-finger $(OUTDIR_CGI_BIN) -$(OUTDIR_CGI_BIN)/man: adapter/protocol/man +$(OUTDIR_CGI_BIN)/man: adapter/protocol/man.nim $(QJSOBJ)/libregexp.o \ + $(QJSOBJ)/libunicode.o $(QJSOBJ)/cutils.o src/js/regex.nim \ + src/bindings/libregexp.nim src/types/opt.nim src/utils/twtstr.nim @mkdir -p $(OUTDIR_CGI_BIN) - cp adapter/protocol/man $(OUTDIR_CGI_BIN) + $(NIMC) $(FLAGS) --nimcache:"$(OBJDIR)/$(TARGET)/man" \ + --passL:"$(QJSOBJ)/libregexp.o $(QJSOBJ)/cutils.o $(QJSOBJ)/libunicode.o" \ + -o:"$(OUTDIR_CGI_BIN)/man" adapter/protocol/man.nim $(OUTDIR_CGI_BIN)/spartan: adapter/protocol/spartan @mkdir -p $(OUTDIR_CGI_BIN) @@ -164,7 +170,6 @@ $(OUTDIR_LIBEXEC)/urlenc: adapter/tools/urlenc.nim src/utils/twtstr.nim -o:"$(OUTDIR_LIBEXEC)/urlenc" adapter/tools/urlenc.nim CFLAGS = -fwrapv -g -Wall -O2 -DCONFIG_VERSION=\"$(shell cat lib/quickjs/VERSION)\" -QJSOBJ = $(OBJDIR)/quickjs # Dependencies $(QJSOBJ)/cutils.o: lib/quickjs/cutils.h diff --git a/README.md b/README.md index 4abb7dd7..f479c7dd 100644 --- a/README.md +++ b/README.md @@ -35,14 +35,13 @@ supported yet.) 6. If you want manpages, run `make manpage`. (This requires pandoc to be installed.) 7. Finally, install using `make install` (e.g. `sudo make install`). -8. (Optional): install Perl so that the man page viewer (`mancha`) works too. Then, try: ```bash $ cha -V # open in visual mode for a list of default keybindings $ cha example.org # open your favorite website directly from the shell -$ mancha cha # read the cha(1) man page using `mancha' (requires Perl) +$ mancha cha # read the cha(1) man page using `mancha' ``` ## Features @@ -66,7 +65,7 @@ Currently implemented features are: * supports several protocols: HTTP(S), FTP, Gopher, Gemini, Finger, etc. * can load user-defined protocols/file formats using [local CGI](doc/localcgi.md), [urimethodmap](doc/urimethodmap.md) and [mailcap](doc/mailcap.md) -* man page viewer (based on w3mman) +* man page viewer (like w3mman) * mouse support ...with a lot more [planned](todo). diff --git a/adapter/protocol/man b/adapter/protocol/man deleted file mode 100755 index c322a6d2..00000000 --- a/adapter/protocol/man +++ /dev/null @@ -1,258 +0,0 @@ -#!/usr/bin/env perl -# -# From w3m. -# -# Note that this script has licensing terms different from those of Chawan. -# See /res/license.html#w3m for details. -# -# Usage: install perl, then look up man pages using: -# -# $ cha man:cha # view in any manual (man cha) -# $ cha 'man:cha(1)' # view in a specific manual (man -s 1 cha) -# $ cha man-k:cha # search in any manual (man -k cha) -# $ cha 'man-k:cha(1)' # search in a specific manual (man -k cha -s 1) -# -# You may also use the `mancha` wrapper. - -$MAN = $ENV{'MANCHA_MAN'} || '/usr/bin/man'; -$QUERY = $ENV{'QUERY_STRING'} || $ARGV[0]; -$SCRIPT_NAME = $ENV{'SCRIPT_NAME'} || $0; -$CGI = "man:"; -$CGI2 = "file:"; -# $CGI2 = "file:///\$LIB/hlink.cgi?"; -$SQUEEZE = 1; -$ENV{'PAGER'} = 'cat'; - -if ($QUERY =~ /^man-k:/) { - $QUERY =~ s/^man-k://; - my $keyword = &form_decode($QUERY); - my $sectionopt = ""; - if ($keyword =~ s/(.*)\((\w+)\)$//) { - $keyword = $1; - $sectionopt = "-s $2 "; - } - my $k = &html_quote($keyword); - print <<EOF; -Content-Type: text/html - -<html> -<head><title>man $sectionopt-k $k</title></head> -<body> -<h2>man $sectionopt-k <b>$k</b></h2> -<ul> -EOF - $keyword =~ s:([^-\w\200-\377.,])::g; - open(F, "$MAN $sectionopt -k $keyword 2> /dev/null |"); - while(<F>) { - chop; - $_ = &html_quote($_); - s/(\s+-.*)$//; - $title = $1; - s@(\w[\w.\-]*(\s*\,\s*\w[\w.\-]*)*)\s*(\([\dn]\w*\))@&keyword_ref($1, $3)@ge; - print "<li>$_$title\n"; - } - close(F); - print <<EOF; -</ul> -</body> -</html> -EOF - exit; -} elsif ($QUERY =~ /^man-l:/) { - $QUERY =~ s/^man-l://; - $file = &form_decode($QUERY); - open(F, "GROFF_NO_SGR=1 MAN_KEEP_FORMATTING=1 $MAN $file 2> /dev/null |"); -} else { - $QUERY =~ s/^man://; - my $man = &form_decode($QUERY); - if ($man =~ s/\((\w+)\)$//) { - $section = $1; - $man_section = "$man($1)"; - } else { - $section = ""; - $man_section = "$man"; - } - - $section =~ s:([^-\w\200-\377.,])::g; - $man =~ s:([^-\w\200-\377.,])::g; - open(F, "GROFF_NO_SGR=1 MAN_KEEP_FORMATTING=1 $MAN $section $man 2> /dev/null |"); -} - -$ok = 0; -undef $header; -$blank = -1; -$cmd = ""; -$prev = ""; -while(<F>) { - if (! defined($header)) { - /^\s*$/ && next; - $header = $_; - $space = $header; - chop $space; - $space =~ s/\S.*//; - } elsif ($_ eq $header) { # delete header - $blank = -1; - next; - } elsif (!/\010/ && /^$space[\w\200-\377].*\s\S/o) { # delete footer - $blank = -1; - next; - } - if ($SQUEEZE) { - if (/^\s*$/) { - $blank || $blank++; - next; - } elsif ($blank) { - $blank > 0 && print "\n"; - $blank = 0; - } - } - - s/\&/\&/g; - s/\</\</g; - s/\>/\>/g; - # non ASCII UTF-8 codepoint - my $utf8="[\300-\337][\200-\277]|[\340-\357][\200-\277]{2}|[\360-\367][\200-\277]{3}|[\370-\373][\200-\277]{4}|[\374\375][\200-\277]{5}"; - - s@($utf8)(\010\1)+@<b>$1</b>@g; - s@(\&\w+;|.)(\010\1)+@<b>$1</b>@g; - s@_\010((\<b\>)?($utf8)(\</b\>)?)@<u>$1</u>@g; - s@_\010((\<b\>)?(\&\w+\;|.)(\</b\>)?)@<u>$1</u>@g; - s@((\<b\>)?($utf8)(\</b\>)?)\010_@<u>$1</u>@g; - s@((\<b\>)?(\&\w+\;|.)(\</b\>)?)\010_@<u>$1</u>@g; - s@.\010(.)@$1@g; - - s@\</b\>\</u\>\<b\>_\</b\>\<u\>\<b\>@_@g; - s@\</u\>\<b\>_\</b\>\<u\>@_@g; - s@\</u\>\<u\>@@g; - s@\</b\>\<b\>@@g; - - if (! $ok) { - /^No/ && last; - print <<EOF; -Content-Type: text/html - -<html> -<head><title>man $man_section</title></head> -<body> -<pre> -EOF - print; - $ok = 1; - next; - } - - s@(https?|ftp)://[\w.\-/~]+[\w/]@<a href="$&">$&</a>@g; - s@\b(mailto:|)(\w[\w.\-]*\@\w[\w.\-]*\.[\w.\-]*\w)@<a href="mailto:$2">$1$2</a>@g; - s@(\W)(\~?/[\w.][\w.\-/~]*)@$1 . &file_ref($2)@ge; - s@(include(<\/?[bu]\>|\s)*\<)([\w.\-/]+)@$1 . &include_ref($3)@ge; - if ($prev && m@^\s*(\<[bu]\>)*(\w[\w.\-]*)(\</[bu]\>)*(\([\dm]\w*\))@) { - $cmd .= "$2$4"; - $prev =~ s@(\w[\w.\-]*-)((\</[bu]\>)*\s*)$@<a href="$CGI$cmd">$1</a>$2@; - print $prev; - $prev = ''; - s@^(\s*(\<[bu]\>)*)(\w[\w.\-]*)@@; - print "$1<a href=\"$CGI$cmd\">$3</a>"; - } elsif ($prev) { - print $prev; - $prev = ''; - } - s@(\w[\w.\-]*)((\</[bu]\>)*)(\([\dm]\w*\))@<a href="$CGI$1$4">$1</a>$2$4@g; - if (m@(\w[\w.\-]*)-(\</[bu]\>)*\s*$@) { - $cmd = $1; - $prev = $_; - next; - } - print; -} -if ($prev) { - print $prev; -} -close(F); -if (! $ok) { - if ($file) { - print "Cha-Control: ConnectionError 4 file $file not found"; - } else { - print "Cha-Control: ConnectionError 4 no manual entry for $man_section"; - } - exit 1; -} -print <<EOF; -</pre> -</body> -</html> -EOF - -sub is_command { - local($_) = @_; - local($p); - - (! -d && -x) || return 0; - if (! %PATH) { - for $p (split(":", $ENV{'PATH'})) { - $p =~ s@/+$@@; - $PATH{$p} = 1; - } - } - s@/[^/]*$@@; - return defined($PATH{$_}); -} - -sub file_ref { - local($_) = @_; - - if (&is_command($_)) { - ($man = $_) =~ s@.*/@@; - return "<a href=\"$CGI$man\">$_</a>"; - } - if (/^\~/ || -f || -d) { - ($file = $_) =~ s/^\~/$ENV{"HOME"}/; - return "<a href=\"$CGI2$file\">$_</a>"; - } - return $_; -} - -sub include_ref { - local($_) = @_; - local($d); - - for $d ( - "/usr/include", - "/usr/local/include", - "/usr/X11R6/include", - "/usr/X11/include", - "/usr/X/include", - "/usr/include/X11" - ) { - -f "$d/$_" && return "<a href=\"$CGI2$d/$_\">$_</a>"; - } - return $_; -} - -sub keyword_ref { - local($_, $s) = @_; - local(@a) = (); - - for (split(/\s*,\s*/)) { - push(@a, "<a href=\"$CGI$_$s\">$_</a>"); - } - return join(", ", @a) . $s; -} - -sub html_quote { - local($_) = @_; - local(%QUOTE) = ( - '<', '<', - '>', '>', - '&', '&', - '"', '"', - ); - s/[<>&"]/$QUOTE{$&}/g; - return $_; -} - -sub form_decode { - local($_) = @_; - s/\+/ /g; - s/%([\da-f][\da-f])/pack('c', hex($1))/egi; - return $_; -} diff --git a/adapter/protocol/man.nim b/adapter/protocol/man.nim new file mode 100644 index 00000000..4fc418d4 --- /dev/null +++ b/adapter/protocol/man.nim @@ -0,0 +1,308 @@ +import std/os +import std/posix +import std/strutils +import std/unicode + +import bindings/libregexp +import js/regex +import types/opt +import utils/twtstr + +proc lre_check_stack_overflow(opaque: pointer; alloca_size: csize_t): cint + {.exportc.} = + return 0 + +proc parseSection(query: string): tuple[page, section: string] = + var section = "" + if query.len > 0 and query[^1] == ')': + for i in countdown(query.high, 0): + if query[i] == '(': + section = query.substr(i + 1, query.high - 1) + break + if section != "": + return (query.substr(0, query.high - 2 - section.len), section) + return (query, "") + +func processBackspace(line: string): string = + var s = "" + var i = 0 + var thiscs = 0 .. -1 + var bspace = false + var inU = false + var inB = false + var pendingInU = false + var pendingInB = false + template flushChar = + if pendingInU != inU: + s &= (if inU: "</u>" else: "<u>") + inU = pendingInU + if pendingInB != inB: + s &= (if inB: "</b>" else: "<b>") + inB = pendingInB + if thiscs.len > 0: + let cs = case line[thiscs.a] + of '&': "&" + of '<': "<" + of '>': ">" + else: line.substr(thiscs.a, thiscs.b) + s &= cs + thiscs = i ..< i + n + pendingInU = false + pendingInB = false + while i < line.len: + # this is the same "sometimes works" algorithm as in ansi2html + if line[i] == '\b' and thiscs.len > 0: + bspace = true + inc i + continue + let n = line.runeLenAt(i) + if thiscs.len == 0: + thiscs = i ..< i + n + i += n + continue + if bspace and thiscs.len > 0: + if line[i] == '_' and not pendingInU and line[thiscs.a] != '_': + pendingInU = true + elif line[thiscs.a] == '_' and not pendingInU and line[i] != '_': + # underscore comes first; set thiscs to the current charseq + thiscs = i ..< i + n + pendingInU = true + elif line[i] == '_' and line[thiscs.a] == '_': + if pendingInB: + pendingInU = true + else: + pendingInB = true + elif not pendingInB: + pendingInB = true + bspace = false + else: + flushChar + i += n + let n = 0 + flushChar + pendingInU = false + pendingInB = false + flushChar + return s + +proc isCommand(paths: seq[string]; s: string): bool = + for p in paths: + if fileExists(p & s): + return true + false + +iterator myCaptures(captures: var seq[RegexCapture]; target: int; + offset: var int): RegexCapture = + for cap in captures.mitems: + if cap.i == target: + cap.s += offset + cap.e += offset + yield cap + +proc processManpage(file: File) = + template re(s: static string): Regex = + let r = s.compileRegex({LRE_FLAG_GLOBAL, LRE_FLAG_UTF16}) + if r.isNone: + stdout.write(s & ": " & r.error) + return + r.get + # regexes partially from w3mman2html + let linkRe = re"(https?|ftp)://[\w/~.-]+" + let mailRe = re"(mailto:|)(\w[\w.-]*@[\w-]+\.[\w.-]*)" + let fileRe = re"(file:)?[/~][\w/~.-]+[\w/]" + let includeRe = re"#include(</?[bu]>|\s)*<([\w./-]+)" + let manRe = re"(</?[bu]>)*(\w[\w.-]*)(</?[bu]>)*(\([0-9nlx]\w*\))" + var paths: seq[string] = @[] + for p in getEnv("PATH").split(':'): + var i = p.high + while i > 0 and p[i] == '/': + dec i + paths.add(p.substr(0, i) & "/") + var line: string + var wasBlank = false + while file.readLine(line): + if line == "": + if wasBlank: + continue + wasBlank = true + else: + wasBlank = false + var line = line.processBackspace() + if (var res = linkRe.exec(line); res.success): + var offset = 0 + for cap in res.captures.myCaptures(0, offset): + let s = line[cap.s..<cap.e] + let link = "<a href='" & s & "'>" & s & "</a>" + line[cap.s..<cap.e] = link + offset += link.len - (cap.e - cap.s) + if (var res = mailRe.exec(line); res.success): + var offset = 0 + for cap in res.captures.myCaptures(2, offset): + let s = line[cap.s..<cap.e] + let link = "<a href='mailto:" & s & "'>" & s & "</a>" + line[cap.s..<cap.e] = link + offset += link.len - (cap.e - cap.s) + if (var res = fileRe.exec(line); res.success): + var offset = 0 + for cap in res.captures.myCaptures(0, offset): + let s = line[cap.s..<cap.e] + let target = s.expandTilde() + if not fileExists(target) and not symlinkExists(target) and + not dirExists(target): + continue + let link = if paths.isCommand(target): + "<a href='man:" & target.afterLast('/') & "'>" & s & "</a>" + else: + "<a href='file:" & target & "'>" & s & "</a>" + line[cap.s..<cap.e] = link + offset += link.len - (cap.e - cap.s) + if (var res = includeRe.exec(line); res.success): + var offset = 0 + for cap in res.captures.myCaptures(2, offset): + let s = line[cap.s..<cap.e] + const includePaths = [ + "/usr/include/", + "/usr/local/include/", + "/usr/X11R6/include/", + "/usr/X11/include/", + "/usr/X/include/", + "/usr/include/X11/" + ] + for path in includePaths: + let file = path & s + if fileExists(file): + let link = "<a href='file:" & file & "'>" & s & "</a>" + line[cap.s..<cap.e] = link + offset += link.len - (cap.e - cap.s) + break + if (var res = manRe.exec(line); res.success): + var offset = 0 + for j, cap in res.captures.mpairs: + if cap.i == 0: + cap.s += offset + cap.e += offset + var manCap = res.captures[j + 2] + manCap.s += offset + manCap.e += offset + var secCap = res.captures[j + 4] + secCap.s += offset + secCap.e += offset + let man = line[manCap.s..<manCap.e] + let cat = man & line[secCap.s..<secCap.e] + let link = "<a href='man:" & cat & "'>" & man & "</a>" + line[manCap.s..<manCap.e] = link + offset += link.len - (manCap.e - manCap.s) + stdout.write(line & "\n") + +proc doMan(man, keyword, section: string) = + let sectionOpt = if section == "": "" else: " -s " & section + let cmd = "GROFF_NO_SGR=1 MAN_KEEP_FORMATTING=1 " & + man & sectionOpt & " " & keyword & " 2>&1" + let file = popen(cstring(cmd), "r") + if file == nil: + stdout.write("Cha-Control: ConnectionError 1 failed to run " & cmd) + return + var line0: string + if not file.readLine(line0) or file.endOfFile(): + discard file.pclose() + if line0.startsWith("man: "): + line0 = line0.after(' ') + stdout.write("Cha-Control: ConnectionError 4 " & line0) + return + var manword = keyword + if section != "": + manword &= '(' & section & ')' + stdout.write("""Content-Type: text/html + +<title>man """ & manword & """</title> +<pre>""" & line0 & "\n") + file.processManpage() + discard file.pclose() + +proc doLocal(man, keyword: string) = + let cmd = "GROFF_NO_SGR=1 MAN_KEEP_FORMATTING=1 " & + man & " -l " & keyword & " 2>/dev/null" + let file = popen(cstring(cmd), "r") + if file == nil: + stdout.write("Cha-Control: ConnectionError 1 failed to run " & cmd) + return + stdout.write("""Content-Type: text/html + +<title>man -l """ & keyword & """</title> +<h1>man -l <b>""" & keyword & """</b></h1> +<pre>""") + file.processManpage() + discard file.pclose() + +proc doKeyword(man, keyword, section: string) = + let sectionOpt = if section == "": "" else: " -s " & section + let cmd = man & sectionOpt & " -k " & keyword & " 2>/dev/null" + let file = popen(cstring(cmd), "r") + if file == nil: + stdout.write("Cha-Control: ConnectionError 1 failed to run " & cmd) + return + stdout.write("Content-Type: text/html\n\n") + stdout.write("<title>man" & sectionOpt & " -k " & keyword & "</title>\n") + stdout.write("<h1>man" & sectionOpt & " -k <b>" & keyword & "</b></h1>\n") + stdout.write("<ul>") + var line: string + template die = + stdout.write("Error parsing line! " & line) + return + while file.readLine(line): + if line.len == 0: + stdout.write("\n") + continue + # collect titles + var titles: seq[string] = @[] + var i = 0 + while true: + let title = line.until({'(', ','}, i) + i += title.len + titles.add(title) + if i >= line.len or line[i] == '(': + break + i = line.skipBlanks(i + 1) + # collect section + if line[i] != '(': die + let sectionText = line.substr(i, line.find(')', i)) + i += sectionText.len + # create line + var section = sectionText.until(',') # for multiple sections, take first + if section[^1] != ')': + section &= ')' + var s = "<li>" + for i, title in titles: + let title = title.htmlEscape() + s &= "<a href='man:" & title & section & "'>" & title & "</a>" + if i < titles.high: + s &= ", " + s &= sectionText + s &= line.substr(i) + s &= "\n" + stdout.write(s) + +proc main() = + var man = getEnv("MANCHA_MAN") + if man == "": + man = "/usr/bin/man" + if not fileExists(man): + man = "/bin/man" + if not fileExists(man): + man = "/usr/local/bin/man" + if not fileExists(man): + man = "/usr/bin/env man" + doAssert getAppFilename() != man # don't accidentally fork bomb ourselves + let query = percentDecode(getEnv("QUERY_STRING")) + if query.startsWith("man:"): + let (keyword, section) = parseSection(query.after(':')) + doMan(man, keyword, section) + elif query.startsWith("man-k:"): + let (keyword, section) = parseSection(query.after(':')) + doKeyword(man, keyword, section) + elif query.startsWith("man-l:"): + doLocal(man, query.after(':')) + else: + stdout.write("Cha-Control: ConnectionError 1 invalid invocation") + +main() diff --git a/doc/cha.1 b/doc/cha.1 index 5af4319d..b5943465 100644 --- a/doc/cha.1 +++ b/doc/cha.1 @@ -1,4 +1,4 @@ -.TH CHA 1 +.TH cha 1 .SH NAME cha - the Chawan text-mode browser .SH SYNOPSIS diff --git a/doc/protocols.md b/doc/protocols.md index 2f1c7afd..84a8dde1 100644 --- a/doc/protocols.md +++ b/doc/protocols.md @@ -125,8 +125,8 @@ and `about:license`. `man:`, `man-k:` and `man-l:` are wrappers around the commands `man`, `man -k` and `man -l`. These look up man pages using `/usr/bin/man` and turn on-page references into links. A wrapper command `mancha` also exists; this has an -interface similar to `man`. Note that Perl is required for these protocols -to work. +interface similar to `man`. Note: this used to be based on w3mman2html.cgi, but +it has been rewritten in Nim (and therefore no longer depends on Perl either). `data:` decodes a data URL as defined in RFC 2397. diff --git a/lib/quickjs/quickjs.c b/lib/quickjs/quickjs.c index a5e51d19..9c5e948c 100644 --- a/lib/quickjs/quickjs.c +++ b/lib/quickjs/quickjs.c @@ -43960,16 +43960,20 @@ fail: BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size) { + if (!opaque) + return 0; JSContext *ctx = opaque; return js_check_stack_overflow(ctx->rt, alloca_size); } +#if 0 void *lre_realloc(void *opaque, void *ptr, size_t size) { JSContext *ctx = opaque; /* No JS exception is raised here */ return js_realloc_rt(ctx->rt, ptr, size); } +#endif static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) diff --git a/res/license.html b/res/license.html index e58a4876..405d6e2d 100644 --- a/res/license.html +++ b/res/license.html @@ -23,7 +23,6 @@ Table of contents: <LI><A HREF="#chawan">Chawan</A> <LI><A HREF="#quickjs">QuickJS</A> <LI><A HREF="#punycode">Punycode library</A> -<LI><A HREF="#w3mman">w3mman</A> </UL> <H2 id=chawan>Chawan</H2> <PRE> @@ -115,22 +114,5 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. </PRE> -<H2 id=w3mman>w3mman</H2> -Chawan uses a customized version of w3mman2html.cgi from w3m for viewing man -pages. This script is distributed under the following terms: -<PRE> -(C) Copyright 1994-2002 by Akinori Ito -(C) Copyright 2002-2011 by Akinori Ito, Hironori Sakamoto, Fumitoshi Ukai - -Use, modification and redistribution of this software is hereby granted, -provided that this entire copyright notice is included on any copies of -this software and applications and derivations thereof. - -This software is provided on an "as is" basis, without warranty of any -kind, either expressed or implied, as to any matter including, but not -limited to warranty of fitness of purpose, or merchantability, or -results obtained from use of this software. -</PRE> - </BODY> </HTML> diff --git a/src/js/regex.nim b/src/js/regex.nim index fbbaf55f..e189b430 100644 --- a/src/js/regex.nim +++ b/src/js/regex.nim @@ -2,7 +2,6 @@ import std/unicode import bindings/libregexp -import bindings/quickjs import types/opt import utils/twtstr @@ -13,35 +12,37 @@ type bytecode: seq[uint8] buf: string + RegexCapture* = tuple # start, end, index + s, e: int + i: int32 + RegexResult* = object success*: bool - captures*: seq[tuple[s, e: int]] # start, end + captures*: seq[RegexCapture] RegexReplace* = object regex: Regex rule: string global: bool -var dummyRuntime = JS_NewRuntime() -var dummyContext = JS_NewContextRaw(dummyRuntime) - func `$`*(regex: Regex): string = regex.buf -proc compileRegex*(buf: string, flags: LREFlags = {}): Result[Regex, string] = - var error_msg_size = 64 - var error_msg = newString(error_msg_size) - prepareMutation(error_msg) +# this is hardcoded into quickjs, so we must override it here. +proc lre_realloc(opaque, p: pointer; size: csize_t): pointer {.exportc.} = + return realloc(p, size) + +proc compileRegex*(buf: string; flags: LREFlags = {}): Result[Regex, string] = + var errorMsg = newString(64) var plen: cint - let bytecode = lre_compile(addr plen, cstring(error_msg), - cint(error_msg_size), cstring(buf), csize_t(buf.len), flags.toCInt, - dummyContext) + let bytecode = lre_compile(addr plen, cstring(errorMsg), cint(errorMsg.len), + cstring(buf), csize_t(buf.len), flags.toCInt, nil) if bytecode == nil: - return err(error_msg.until('\0')) # Failed to compile. + return err(errorMsg.until('\0')) # Failed to compile. assert plen > 0 var bcseq = newSeqUninitialized[uint8](plen) copyMem(addr bcseq[0], bytecode, plen) - dummyRuntime.js_free_rt(bytecode) + dealloc(bytecode) let regex = Regex( buf: buf, bytecode: bcseq @@ -83,7 +84,7 @@ proc compileMatchRegex*(buf: string): Result[Regex, string] = buf2 &= "$" return compileRegex(buf2) -proc compileSearchRegex*(str: string, defaultFlags: LREFlags): +proc compileSearchRegex*(str: string; defaultFlags: LREFlags): Result[Regex, string] = # Emulate vim's \c/\C: override defaultFlags if one is found, then remove it # from str. @@ -108,13 +109,13 @@ proc compileSearchRegex*(str: string, defaultFlags: LREFlags): flags.incl(LRE_FLAG_GLOBAL) # for easy backwards matching return compileRegex(s, flags) -proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): RegexResult = +proc exec*(regex: Regex; str: string; start = 0; length = -1; nocaps = false): + RegexResult = let length = if length == -1: str.len else: length - assert 0 <= start and start <= length - + assert start in 0 .. length let bytecode = unsafeAddr regex.bytecode[0] let captureCount = lre_get_capture_count(bytecode) var capture: ptr UncheckedArray[int] = nil @@ -126,7 +127,7 @@ proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): R var start = start while true: let ret = lre_exec(cast[ptr ptr uint8](capture), bytecode, - cast[ptr uint8](cstr), cint(start), cint(length), cint(3), dummyContext) + cast[ptr uint8](cstr), cint(start), cint(length), cint(3), nil) if ret != 1: #TODO error handling? (-1) break result.success = true @@ -138,7 +139,7 @@ proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): R for i in 0 ..< captureCount: let s = capture[i * 2] - cstrAddress let e = capture[i * 2 + 1] - cstrAddress - result.captures.add((s, e)) + result.captures.add((s, e, i)) if LRE_FLAG_GLOBAL notin flags: break if start >= str.len: @@ -148,5 +149,5 @@ proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): R if captureCount > 0: dealloc(capture) -proc match*(regex: Regex, str: string, start = 0, length = str.len): bool = +proc match*(regex: Regex; str: string; start = 0; length = str.len): bool = return regex.exec(str, start, length, nocaps = true).success diff --git a/todo b/todo index 6205e475..0dc4736d 100644 --- a/todo +++ b/todo @@ -80,12 +80,8 @@ images: - incremental decoding (maybe implement streams first?) - separate image decoder process? or just run on a different thread? man: -- mancha: - * detect man directory automatically - * eventually rewrite in Nim -- man pages: - * add a DOM -> man page converter so that we do not depend on pandoc - for man page conversion +* add a DOM -> man page converter so that we do not depend on pandoc + for man page conversion gmifetch: - rewrite in Nim etc: |