about summary refs log tree commit diff stats
path: root/adapter
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-01-30 22:09:53 +0100
committerbptato <nincsnevem662@gmail.com>2024-01-30 22:13:42 +0100
commit053e0be996d48e3aa550a3d8d46c92700e6ff469 (patch)
tree92faf4e9fa71d7a4b6dd1a6d5df9a892efcbedee /adapter
parentd2feac8a4798fb09441376f107e9b97bc4141e52 (diff)
downloadchawan-053e0be996d48e3aa550a3d8d46c92700e6ff469.tar.gz
Add default md2html converter
Diffstat (limited to 'adapter')
-rw-r--r--adapter/format/md2html.nim380
1 files changed, 380 insertions, 0 deletions
diff --git a/adapter/format/md2html.nim b/adapter/format/md2html.nim
new file mode 100644
index 00000000..1d68e7b9
--- /dev/null
+++ b/adapter/format/md2html.nim
@@ -0,0 +1,380 @@
+import std/strutils
+
+proc toggle[T](s: var set[T], t: T): bool =
+  result = t notin s
+  if result:
+    s.incl(t)
+  else:
+    s.excl(t)
+
+type BracketState = enum
+  bsNone, bsInBracketRef, bsInBracket, bsAfterBracket, bsInParen, bsInImage
+
+proc getId(line: openArray[char]): string =
+  result = ""
+  var i = 0
+  var bs = bsNone
+  var escape = false
+  while i < line.len:
+    let c = line[i]
+    if bs == bsInParen:
+      if escape:
+        escape = false
+        inc i
+        continue
+      if c == ')':
+        bs = bsNone
+      elif c == '\\':
+        escape = true
+      inc i
+      continue
+    case c
+    of 'A'..'Z': result &= char(int(c) - int('A') + int('a'))
+    of 'a'..'z', '-', '_', '.': result &= c
+    of ' ': result &= '-'
+    of '[':
+      if bs != bsNone:
+        bs = bsInBracket
+    of ']':
+      if bs == bsInBracket:
+        bs = bsAfterBracket
+    of '(':
+      if bs == bsAfterBracket:
+        bs = bsInParen
+    else: discard
+    inc i
+
+type InlineState = enum
+  isItalic, isBold, isCode, isComment
+
+const AsciiWhitespace = {' ', '\t', '\n', '\r'}
+proc parseInline(line: openArray[char]) =
+  var state: set[InlineState] = {}
+  var bs = bsNone
+  var i = 0
+  var bracketChars = ""
+  var quote = false
+  var image = false
+  template append(s: untyped) =
+    if bs in {bsInBracketRef, bsInBracket}:
+      bracketChars &= s
+    else:
+      stdout.write(s)
+  while i < line.len:
+    let c = line[i]
+    if bs == bsAfterBracket and c != '(':
+      stdout.write("[" & bracketChars & "]")
+      bracketChars = ""
+      bs = bsNone
+      image = false
+    if quote:
+      append c
+    elif isComment in state:
+      if i + 2 < line.len and line.toOpenArray(i, i + 2) == "-->":
+        state.excl(isComment)
+        append "-->"
+        i += 2
+      else:
+        append c
+    elif isCode in state:
+      case c
+      of '<': append "&lt;"
+      of '>': append "&gt;"
+      of '"': append "&quot;"
+      of '\'': append "&apos;"
+      of '`':
+        append "</CODE>"
+        state.excl(isCode)
+      else: append c
+    elif c == '\\':
+      quote = true
+    elif c == '*' or c == '_' and (i == 0 or line[i - 1] in AsciiWhitespace):
+      if i + 1 < line.len and line[i + 1] == c:
+        if state.toggle(isBold):
+          append "<B>"
+        else:
+          append "</B>"
+        inc i
+      else:
+        if state.toggle(isItalic):
+          stdout.write("<I>")
+        else:
+          stdout.write("</I>")
+    elif c == '`':
+      state.incl(isCode)
+      append "<CODE>"
+    elif c == '!' and bs == bsNone and i + 1 < line.len and line[i + 1] == '[':
+      image = true
+    elif c == '[' and bs == bsNone:
+      bs = bsInBracket
+      if i + 1 < line.len and line[i + 1] == '^':
+        inc i
+        bs = bsInBracketRef
+    elif c == ']' and bs == bsInBracketRef:
+      let id = bracketChars.getId()
+      stdout.write("<A HREF='#" & id & "'>" & bracketChars & "</A>")
+      bracketChars = ""
+    elif c == ']' and bs == bsInBracket:
+      bs = bsAfterBracket
+    elif c == '(' and bs == bsAfterBracket:
+      if image:
+        stdout.write("<IMG SRC='")
+      else:
+        stdout.write("<A HREF='")
+      bs = bsInParen
+    elif c == ')' and bs == bsInParen:
+      if image:
+        stdout.write("' ALT='" & bracketChars & "'>")
+      else:
+        stdout.write("'>" & bracketChars & "</A>")
+      image = false
+      bracketChars = ""
+      bs = bsNone
+    elif c == '\'' and bs == bsInParen:
+      stdout.write("&apos;")
+    elif i + 4 < line.len and line.toOpenArray(i, i + 3) == "<!--":
+      append "<!--"
+      i += 3
+      state.incl(isComment)
+    else:
+      append c
+    inc i
+  if bracketChars != "":
+    stdout.write(bracketChars)
+  if isBold in state:
+    stdout.write("</B>")
+  if isItalic in state:
+    stdout.write("</I>")
+
+proc parseHash(line: openArray[char]): bool =
+  var n = -1
+  for i, c in line:
+    if line[i] != '#':
+      if line[i] != ' ':
+        return false
+      n = i + 1
+      break
+  if n == -1:
+    return false
+  n = min(n, 6)
+  let L = n
+  var H = line.high
+  for i in countdown(line.high, L):
+    if line[i] != '#':
+      if line[i] != ' ':
+        break
+      H = i - 1
+      break
+  H = max(L - 1, H)
+  let id = line.toOpenArray(L, H).getId()
+  stdout.write("<H" & $n & " id='" & id & "'>")
+  line.toOpenArray(L, H).parseInline()
+  stdout.write("</H" & $n & ">\n")
+  return true
+
+type ListType = enum
+  ltOl, ltUl
+
+proc getListDepth(line: string): tuple[depth, len: int, ol: ListType] =
+  var depth = 0
+  for i, c in line:
+    if c == '\t':
+      depth += 8
+    elif c == ' ':
+      inc depth
+    elif c == '*':
+      let i = i + 1
+      if i < line.len and line[i] in {'\t', ' '}:
+        return (depth, i, ltUl)
+      break
+    elif c in {'0'..'9'}:
+      let i = i + 1
+      if i < line.len and line[i] == '.':
+        let i = i + 1
+        if i < line.len and line[i] in {'\t', ' '}:
+          return (depth, i, ltOl)
+      break
+    else:
+      break
+  return (-1, -1, ltUl)
+
+proc matchHTMLPreStart(line: string): bool =
+  var tagn = ""
+  for i, c in line:
+    if i == 0:
+      if c != '<':
+        return false
+      continue
+    if c in {' ', '\t', '>'}:
+      break
+    if c notin {'A'..'Z', 'a'..'z'}:
+      return false
+    tagn &= c.toLowerAscii()
+  return tagn in ["pre", "script", "style", "textarea"]
+
+proc matchHTMLPreEnd(line: string): bool =
+  var tagn = ""
+  for i, c in line:
+    if i == 0:
+      if c != '<':
+        return false
+      continue
+    if i == 1:
+      if c != '/':
+        return false
+      continue
+    if c in {' ', '\t', '>'}:
+      break
+    if c notin {'A'..'Z', 'a'..'z'}:
+      return false
+    tagn &= c.toLowerAscii()
+  return tagn in ["pre", "script", "style", "textarea"]
+
+type
+  BlockType = enum
+    btNone, btPar, btList, btPre, btHTML, btHTMLPre, btComment
+
+  ParseState = object
+    blockType: BlockType
+    blockData: string
+    listDepth: int
+    lists: seq[ListType]
+    hasp: bool
+    reprocess: bool
+
+proc pushList(state: var ParseState, t: ListType) =
+  case t
+  of ltOl: stdout.write("<OL>\n<LI>")
+  of ltUl: stdout.write("<UL>\n<LI>")
+  state.lists.add(t)
+
+proc popList(state: var ParseState) =
+  case state.lists.pop()
+  of ltOl: stdout.write("</OL>\n")
+  of ltUl: stdout.write("</UL>\n")
+
+proc parseNone(state: var ParseState, line: string) =
+  if line == "":
+    discard
+  elif line[0] == '#' and line.toOpenArray(1, line.high).parseHash():
+    discard
+  elif line.startsWith("<!--"):
+    state.blockType = btComment
+    state.reprocess = true
+  elif line[0] == '<' and line.find('>') == line.high:
+    state.blockType = if line.matchHTMLPreStart(): btHTMLPre else: btHTML
+    state.reprocess = true
+  elif line.len >= 3 and line.startsWith("```"):
+    state.blockType = btPre
+    stdout.write("<PRE>")
+  elif (let (n, len, t) = line.getListDepth(); n != -1):
+    state.blockType = btList
+    state.listDepth = n
+    state.hasp = false
+    state.pushList(t)
+    state.blockData = line.substr(len) & "\n"
+  else:
+    state.blockType = btPar
+    state.hasp = true
+    stdout.write("<P>\n")
+    state.reprocess = true
+
+proc parsePre(state: var ParseState, line: string) =
+  if line.startsWith("```"):
+    state.blockType = btNone
+    stdout.write("</PRE>\n")
+  else:
+    stdout.write(line & "\n")
+
+proc parseList(state: var ParseState, line: string) =
+  if line == "":
+    state.blockData.parseInline()
+    state.blockData = ""
+    while state.lists.len > 0:
+      state.popList()
+    state.blockType = btNone
+  elif (let (n, len, t) = line.getListDepth(); n != -1):
+    state.blockData.parseInline()
+    state.blockData = ""
+    if n < state.listDepth:
+      if state.lists.len > 0:
+        state.popList()
+      else:
+        state.pushList(t)
+    elif n > state.listDepth:
+      state.pushList(t)
+    stdout.write("<LI>")
+    state.listDepth = n
+    state.blockData = line.substr(len) & "\n"
+  else:
+    state.blockData &= line & "\n"
+
+proc parsePar(state: var ParseState, line: string) =
+  if line == "":
+    state.blockData.parseInline()
+    state.blockData = ""
+    state.blockType = btNone
+  elif line[0] == '<' and line.find('>') == line.high:
+    state.blockData.parseInline()
+    state.blockData = ""
+    if line.matchHTMLPreStart():
+      state.blockType = btHTMLPre
+    else:
+      state.blockType = btHTML
+    state.reprocess = true
+  elif line.len >= 3 and line.startsWith("```"):
+    state.blockData.parseInline()
+    state.blockData = ""
+    state.blockType = btPre
+    state.hasp = false
+    stdout.write("<PRE>")
+  else:
+    state.blockData &= line & "\n"
+
+proc parseHTML(state: var ParseState, line: string) =
+  if state.hasp:
+    state.hasp = false
+    stdout.write("</P>\n")
+  if line == "":
+    state.blockData.parseInline()
+    state.blockData = ""
+    state.blockType = btNone
+  else:
+    state.blockData &= line & "\n"
+
+proc parseHTMLPre(state: var ParseState, line: string) =
+  if state.hasp:
+    state.hasp = false
+    stdout.write("</P>\n")
+  if line.matchHTMLPreEnd():
+    stdout.write(state.blockData)
+    state.blockData = ""
+    state.blockType = btNone
+  else:
+    state.blockData &= line & "\n"
+
+proc parseComment(state: var ParseState, line: string) =
+  let i = line.find("-->")
+  if i != -1:
+    stdout.write(line.substr(0, i + 2))
+    state.blockType = btNone
+    line.substr(i + 3).parseInline()
+  else:
+    stdout.write(line & "\n")
+
+proc main() =
+  var line: string
+  var state = ParseState(listDepth: -1)
+  while state.reprocess or stdin.readLine(line):
+    state.reprocess = false
+    case state.blockType
+    of btNone: state.parseNone(line)
+    of btPre: state.parsePre(line)
+    of btList: state.parseList(line)
+    of btPar: state.parsePar(line)
+    of btHTML: state.parseHTML(line)
+    of btHTMLPre: state.parseHTMLPre(line)
+    of btComment: state.parseComment(line)
+  state.blockData.parseInline()
+
+main()