From e88886243f2282e913d44006916397e076a76425 Mon Sep 17 00:00:00 2001 From: bptato Date: Sat, 7 Sep 2024 19:05:03 +0200 Subject: md2html: code, pre, inline fixes --- Makefile | 1 + adapter/format/md2html.nim | 313 ++++++++++++++++++++++++++++----------------- res/license.md | 2 +- src/utils/strwidth.nim | 2 +- src/utils/twtstr.nim | 4 +- 5 files changed, 198 insertions(+), 124 deletions(-) diff --git a/Makefile b/Makefile index 9c4753fb..42c85932 100644 --- a/Makefile +++ b/Makefile @@ -118,6 +118,7 @@ $(OUTDIR_CGI_BIN)/canvas: src/img/bitmap.nim src/img/painter.nim \ $(OUTDIR_LIBEXEC)/urlenc: $(twtstr) $(OUTDIR_LIBEXEC)/gopher2html: adapter/gophertypes.nim $(twtstr) $(OUTDIR_LIBEXEC)/ansi2html: src/types/color.nim $(twtstr) +$(OUTDIR_LIBEXEC)/md2html: $(twtstr) $(OUTDIR_CGI_BIN)/%: adapter/protocol/%.nim @mkdir -p "$(OUTDIR_CGI_BIN)" diff --git a/adapter/format/md2html.nim b/adapter/format/md2html.nim index 60f4f013..5f617b05 100644 --- a/adapter/format/md2html.nim +++ b/adapter/format/md2html.nim @@ -1,53 +1,37 @@ import std/strutils -proc toggle[T](s: var set[T], t: T): bool = - result = t notin s - if result: - s.incl(t) - else: - s.excl(t) +import utils/twtstr type BracketState = enum - bsNone, bsInBracketRef, bsInBracket, bsAfterBracket, bsInParen, bsInImage - -const AsciiAlphaNumeric = {'0'..'9', 'A'..'Z', 'a'..'z'} -const AsciiWhitespace = {' ', '\n', '\r', '\t', '\f'} + bsNone, bsInBracket proc getId(line: openArray[char]): string = result = "" var i = 0 var bs = bsNone - var escape = false while i < line.len: - let c = line[i] - if bs == bsInParen: - if escape: - escape = false - inc i - continue - if c == ')': - bs = bsNone - elif c == '\\': - escape = true - inc i - continue - case c + case (let c = line[i]; c) of AsciiAlphaNumeric, '-', '_', '.': result &= c.toLowerAscii() of ' ': result &= '-' of '[': - if bs != bsNone: - bs = bsInBracket + bs = bsInBracket of ']': if bs == bsInBracket: - bs = bsAfterBracket - of '(': - if bs == bsAfterBracket: - bs = bsInParen + if i + 1 < line.len and line[i + 1] == '(': + inc i + while i < line.len: + let c = line[i] + if c == '\\': + inc i + elif c == ')': + break + inc i + bs = bsNone else: discard inc i -type InlineState = enum - isItalic, isBold, isDel +type InlineFlag = enum + ifItalic, ifBold, ifDel func startsWithScheme(s: string): bool = for i, c in s: @@ -61,21 +45,17 @@ type ParseInlineContext = object i: int bracketChars: string bs: BracketState + bracketRef: bool + flags: set[InlineFlag] proc parseInTag(ctx: var ParseInlineContext; line: openArray[char]) = var buf = "" - var i = ctx.i + var i = ctx.i + 1 while i < line.len: let c = line[i] if c == '>': # done if buf.startsWithScheme(): # link - var linkChars = "" - for c in buf: - if c == '\'': - linkChars &= "&apos" - else: - linkChars &= c - stdout.write("" & buf & "") + stdout.write("" & buf & "") else: # tag stdout.write('<' & buf & '>') buf = "" @@ -91,21 +71,21 @@ proc parseInTag(ctx: var ParseInlineContext; line: openArray[char]) = stdout.write(buf) ctx.i = i -type CommentState = enum - csNone, csDash, csDashDash - proc append(ctx: var ParseInlineContext; s: string) = - if ctx.bs in {bsInBracketRef, bsInBracket}: + if ctx.bs == bsInBracket: ctx.bracketChars &= s else: stdout.write(s) proc append(ctx: var ParseInlineContext; c: char) = - if ctx.bs in {bsInBracketRef, bsInBracket}: + if ctx.bs == bsInBracket: ctx.bracketChars &= c else: stdout.write(c) +type CommentState = enum + csNone, csDash, csDashDash + proc parseComment(ctx: var ParseInlineContext; line: openArray[char]) = var i = ctx.i var cs = csNone @@ -125,33 +105,147 @@ proc parseComment(ctx: var ParseInlineContext; line: openArray[char]) = ctx.i = i proc parseCode(ctx: var ParseInlineContext; line: openArray[char]) = - var i = ctx.i + let i = ctx.i + 1 + let j = line.toOpenArray(i, line.high).find('`') + if j != -1: + ctx.append("") + ctx.append(line.toOpenArray(i, i + j - 1).htmlEscape()) + ctx.append("") + ctx.i = i + j + else: + ctx.append('`') + +proc parseLinkDestination(url: var string; line: openArray[char]; i: int): int = + var i = i + var quote = false + var parens = 0 + let sc = line[i] + if sc == '<': + inc i while i < line.len: let c = line[i] - case c - of '<': ctx.append("<") - of '>': ctx.append(">") - of '"': ctx.append(""") - of '\'': ctx.append("'") - of '&': ctx.append("&") - of '`': - ctx.append("") + if quote: + quote = false + elif sc == '<' and c == '>' or sc != '<' and c in AsciiWhitespace + {')'}: break - else: ctx.append(c) + elif c in {'<', '\n'} or c in Controls and sc != '<': + return -1 + elif c == '\\': + quote = true + elif c == '(': + inc parens + url &= c + elif c == ')' and sc != '>': + if parens == 0: + break + dec parens + url &= c + else: + url &= c inc i - ctx.i = i + if sc != '>' and parens != 0 or quote: + return -1 + return line.skipBlanks(i) + +proc parseTitle(title: var string; line: openArray[char]; i: int): int = + let ec = line[i] + var i = i + 1 + var quote = false + while i < line.len: + let c = line[i] + if quote: + quote = false + elif c == '\\': + quote = true + elif c == ec: + inc i + break + else: + title &= c + inc i + return line.skipBlanks(i) + +proc parseLink(ctx: var ParseInlineContext; line: openArray[char]) = + let i = ctx.i + 1 + if i >= line.len or line[i] != '(': + #TODO reference links + stdout.write('[' & ctx.bracketChars & ']') + return + var url = "" + var j = url.parseLinkDestination(line, line.skipBlanks(i + 1)) + var title = "" + if j != -1 and j < line.len and line[j] in {'(', '"', '\''}: + j = title.parseTitle(line, j) + if j == -1 or j >= line.len or line[j] != ')': + stdout.write('[' & ctx.bracketChars & ']') + else: + let url = url.htmlEscape() + stdout.write("") + stdout.write(ctx.bracketChars) + stdout.write("") + ctx.i = j + +proc parseImageAlt(text: var string; line: openArray[char]; i: int): int = + var i = i + var brackets = 0 + while i < line.len: + let c = line[i] + if c == '\\': + inc i + elif c == '<': + while i < line.len and line[i] != '>': + text &= c + inc i + elif c == '[': + inc brackets + text &= c + elif line[i] == ']': + if brackets == 0: + break + dec brackets + text &= c + else: + text &= c + inc i + return i + +proc parseImage(ctx: var ParseInlineContext; line: openArray[char]) = + var text = "" + let i = text.parseImageAlt(line, ctx.i + 2) + if i == -1 or i + 1 >= line.len or line[i] != ']' or line[i + 1] != '(': + ctx.append("![") + return + var url = "" + var j = url.parseLinkDestination(line, line.skipBlanks(i + 2)) + var title = "" + if j != -1 and j < line.len and line[j] in {'(', '"', '\''}: + j = title.parseTitle(line, j) + if j == -1 or j >= line.len or line[j] != ')': + ctx.append("![") + else: + ctx.append("" & text.htmlEscape())
+    ctx.append("") + ctx.i = j + +proc appendToggle(ctx: var ParseInlineContext; f: InlineFlag; s, e: string) = + if f notin ctx.flags: + ctx.flags.incl(f) + ctx.append(s) + else: + ctx.flags.excl(f) + ctx.append(e) proc parseInline(line: openArray[char]) = - var state: set[InlineState] = {} var ctx = ParseInlineContext() - var image = false while ctx.i < line.len: let c = line[ctx.i] - if ctx.bs == bsAfterBracket and c != '(': - stdout.write("[" & ctx.bracketChars & "]") - ctx.bracketChars = "" - ctx.bs = bsNone - image = false if c == '\\': inc ctx.i if ctx.i < line.len: @@ -164,58 +258,35 @@ proc parseInline(line: openArray[char]) = ctx.i + 1 >= line.len or line[ctx.i + 1] notin AsciiAlphaNumeric + {'_'})): if ctx.i + 1 < line.len and line[ctx.i + 1] == c: - if state.toggle(isBold): - ctx.append("") - else: - ctx.append("") + ctx.appendToggle(ifBold, "", "") inc ctx.i else: - if state.toggle(isItalic): - stdout.write("") - else: - stdout.write("") + ctx.appendToggle(ifItalic, "", "") elif c == '`': - ctx.append("") - inc ctx.i ctx.parseCode(line) elif c == '~' and ctx.i + 1 < line.len and line[ctx.i + 1] == '~': - if state.toggle(isDel): - ctx.append("") - else: - ctx.append("") + ctx.appendToggle(ifDel, "", "") inc ctx.i - elif c == '!' and ctx.bs == bsNone and ctx.i + 1 < line.len and - line[ctx.i + 1] == '[': - image = true - elif c == '[' and ctx.bs == bsNone: + elif c == '!' and ctx.i + 1 < line.len and line[ctx.i + 1] == '[': + ctx.parseImage(line) + elif c == '[': + if ctx.bs == bsInBracket: + stdout.write('[' & ctx.bracketChars) + ctx.bracketChars = "" ctx.bs = bsInBracket - if ctx.i + 1 < line.len and line[ctx.i + 1] == '^': + ctx.bracketRef = ctx.i + 1 < line.len and line[ctx.i + 1] == '^' + if ctx.bracketRef: inc ctx.i - ctx.bs = bsInBracketRef - elif c == ']' and ctx.bs == bsInBracketRef: - let id = ctx.bracketChars.getId() - stdout.write("" & ctx.bracketChars & "") - ctx.bracketChars = "" elif c == ']' and ctx.bs == bsInBracket: - ctx.bs = bsAfterBracket - elif c == '(' and ctx.bs == bsAfterBracket: - if image: - stdout.write("" & ctx.bracketChars & "") else: - stdout.write("") - else: - stdout.write("'>" & ctx.bracketChars & "") - image = false + ctx.parseLink(line) ctx.bracketChars = "" + ctx.bracketRef = false ctx.bs = bsNone - elif c == '\'' and ctx.bs == bsInParen: - stdout.write("'") - elif c == '<' and ctx.bs == bsNone: - inc ctx.i + elif c == '<': ctx.parseInTag(line) elif ctx.i + 4 < line.len and line.toOpenArray(ctx.i, ctx.i + 3) == "