about summary refs log tree commit diff stats
path: root/adapter/format/ansi2html.nim
diff options
context:
space:
mode:
Diffstat (limited to 'adapter/format/ansi2html.nim')
-rw-r--r--adapter/format/ansi2html.nim380
1 files changed, 380 insertions, 0 deletions
diff --git a/adapter/format/ansi2html.nim b/adapter/format/ansi2html.nim
new file mode 100644
index 00000000..a7242be7
--- /dev/null
+++ b/adapter/format/ansi2html.nim
@@ -0,0 +1,380 @@
+import std/options
+import std/selectors
+
+import io/posixstream
+import types/color
+import utils/twtstr
+
+type
+  FormatFlag = enum
+    ffBold
+    ffItalic
+    ffUnderline
+    ffReverse
+    ffStrike
+    ffOverline
+    ffBlink
+
+  Format = object
+    fgcolor: CellColor
+    bgcolor: CellColor
+    flags: set[FormatFlag]
+
+# https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf
+type
+  AnsiCodeParseState = enum
+    acpsDone, acpsStart, acpsParams, acpsInterm, acpsFinal, acpsBackspace,
+    acpsInBackspaceTransition, acpsInBackspace
+
+  AnsiCodeParser = object
+    state: AnsiCodeParseState
+    params: string
+
+proc getParam(parser: AnsiCodeParser, i: var int, colon = false): string =
+  while i < parser.params.len and
+      not (parser.params[i] == ';' or colon and parser.params[i] == ':'):
+    result &= parser.params[i]
+    inc i
+  if i < parser.params.len:
+    inc i
+
+template getParamU8(parser: AnsiCodeParser, i: var int,
+    colon = false): uint8 =
+  if i >= parser.params.len:
+    return false
+  let u = parseUInt8(parser.getParam(i))
+  if u.isNone:
+    return false
+  u.get
+
+proc parseSGRDefColor(parser: AnsiCodeParser, format: var Format,
+    i: var int, isfg: bool): bool =
+  let u = parser.getParamU8(i, colon = true)
+  template set_color(c: CellColor) =
+    if isfg:
+      format.fgcolor = c
+    else:
+      format.bgcolor = c
+  if u == 2:
+    let param0 = parser.getParamU8(i, colon = true)
+    if i < parser.params.len:
+      let r = param0
+      let g = parser.getParamU8(i, colon = true)
+      let b = parser.getParamU8(i, colon = true)
+      set_color cellColor(rgb(r, g, b))
+    else:
+      set_color cellColor(gray(param0))
+  elif u == 5:
+    let param0 = parser.getParamU8(i, colon = true)
+    if param0 in 0u8..15u8:
+      set_color cellColor(ANSIColor(param0))
+    elif param0 in 16u8..255u8:
+      set_color cellColor(EightBitColor(param0))
+  else:
+    return false
+
+proc parseSGRColor(parser: AnsiCodeParser, format: var Format,
+    i: var int, u: uint8): bool =
+  if u in 30u8..37u8:
+    format.fgcolor = cellColor(ANSIColor(u - 30))
+  elif u == 38:
+    return parser.parseSGRDefColor(format, i, isfg = true)
+  elif u == 39:
+    format.fgcolor = defaultColor
+  elif u in 40u8..47u8:
+    format.bgcolor = cellColor(ANSIColor(u - 40))
+  elif u == 48:
+    return parser.parseSGRDefColor(format, i, isfg = false)
+  elif u == 49:
+    format.bgcolor = defaultColor
+  elif u in 90u8..97u8:
+    format.fgcolor = cellColor(ANSIColor(u - 82))
+  elif u in 100u8..107u8:
+    format.bgcolor = cellColor(ANSIColor(u - 92))
+  else:
+    return false
+  return true
+
+const FormatCodes: array[FormatFlag, tuple[s, e: uint8]] = [
+  ffBold: (1u8, 22u8),
+  ffItalic: (3u8, 23u8),
+  ffUnderline: (4u8, 24u8),
+  ffReverse: (7u8, 27u8),
+  ffStrike: (9u8, 29u8),
+  ffOverline: (53u8, 55u8),
+  ffBlink: (5u8, 25u8),
+]
+
+proc parseSGRAspect(parser: AnsiCodeParser, format: var Format,
+    i: var int): bool =
+  let u = parser.getParamU8(i)
+  for flag, (s, e) in FormatCodes:
+    if u == s:
+      format.flags.incl(flag)
+      return true
+    if u == e:
+      format.flags.excl(flag)
+      return true
+  if u == 0:
+    format = Format()
+    return true
+  else:
+    return parser.parseSGRColor(format, i, u)
+
+proc parseSGR(parser: AnsiCodeParser, format: var Format) =
+  if parser.params.len == 0:
+    format = Format()
+  else:
+    var i = 0
+    while i < parser.params.len:
+      if not parser.parseSGRAspect(format, i):
+        break
+
+proc parseControlFunction(parser: var AnsiCodeParser, format: var Format,
+    f: char) =
+  if f == 'm':
+    parser.parseSGR(format)
+  else:
+    discard # unknown
+
+proc reset(parser: var AnsiCodeParser) =
+  parser.state = acpsStart
+  parser.params = ""
+
+type State = object
+  os: PosixStream
+  outbufIdx: int
+  outbuf: array[4096, char]
+  parser: AnsiCodeParser
+  currentFmt: Format
+  pendingFmt: Format
+  tmpFlags: set[FormatFlag]
+  af: bool
+  spanOpen: bool
+  hasPrintingBuf: bool
+  backspaceDecay: int
+
+proc flushOutbuf(state: var State) =
+  if state.outbufIdx > 0:
+    discard state.os.sendData(addr state.outbuf[0], state.outbufIdx)
+    state.outbufIdx = 0
+
+proc putc(state: var State, c: char) {.inline.} =
+  if state.outbufIdx + 4 >= state.outbuf.len: # max utf-8 char length
+    state.flushOutbuf()
+  state.outbuf[state.outbufIdx] = c
+  inc state.outbufIdx
+
+proc puts(state: var State, s: string) =
+  #TODO this is slower than it could be
+  for c in s:
+    state.putc(c)
+
+proc puts(state: var State, s: openArray[char]) =
+  #TODO this is slower than it could be
+  for c in s:
+    state.putc(c)
+
+proc puts(state: var State, s: static string) {.inline.} =
+  for c in s:
+    state.putc(c)
+
+proc flushFmt(state: var State) =
+  if state.pendingFmt != state.currentFmt:
+    if state.spanOpen:
+      state.puts("</span>")
+    if state.pendingFmt == Format():
+      state.currentFmt = state.pendingFmt
+      state.spanOpen = false
+      return
+    state.spanOpen = true
+    state.puts("<span style='")
+    let fmt = state.pendingFmt
+    var buf = ""
+    if fmt.fgcolor.t != ctNone:
+      buf &= "color: "
+      case fmt.fgcolor.t
+      of ctNone: discard
+      of ctANSI: buf &= "-cha-ansi(" & $fmt.fgcolor.color & ")"
+      of ctRGB: buf &= $fmt.fgcolor
+      buf &= ";"
+    if fmt.bgcolor.t != ctNone:
+      buf &= "background-color: "
+      case fmt.bgcolor.t
+      of ctNone: discard
+      of ctANSI: buf &= "-cha-ansi(" & $fmt.bgcolor.color & ")"
+      of ctRGB: buf &= $fmt.bgcolor
+      buf &= ";"
+    if ffOverline in fmt.flags or ffUnderline in fmt.flags or
+        ffStrike in fmt.flags or ffBlink in fmt.flags:
+      buf &= "text-decoration: "
+      if ffOverline in fmt.flags:
+        buf &= "overline "
+      if ffUnderline in fmt.flags:
+        buf &= "underline "
+      if ffStrike in fmt.flags:
+        buf &= "line-through "
+      if ffBlink in fmt.flags:
+        buf &= "blink "
+      buf &= ";"
+    if ffBold in fmt.flags:
+      buf &= "font-weight: bold;"
+    if ffItalic in fmt.flags:
+      buf &= "font-style: italic;"
+    #TODO reverse
+    buf &= "'>"
+    state.puts(buf)
+    state.currentFmt = fmt
+    state.hasPrintingBuf = false
+
+type ParseAnsiCodeResult = enum
+  pacrProcess, pacrSkip
+
+proc parseAnsiCode(state: var State, format: var Format, c: char):
+    ParseAnsiCodeResult =
+  case state.parser.state
+  of acpsStart:
+    if 0x40 <= int(c) and int(c) <= 0x5F:
+      if c != '[':
+        #C1, TODO?
+        state.parser.state = acpsDone
+      else:
+        state.parser.state = acpsParams
+    else:
+      state.parser.state = acpsDone
+      return pacrProcess
+  of acpsParams:
+    if 0x30 <= int(c) and int(c) <= 0x3F:
+      state.parser.params &= c
+    else:
+      state.parser.state = acpsInterm
+      return state.parseAnsiCode(format, c)
+  of acpsInterm:
+    if 0x20 <= int(c) and int(c) <= 0x2F:
+      discard
+    else:
+      state.parser.state = acpsFinal
+      return state.parseAnsiCode(format, c)
+  of acpsFinal:
+    state.parser.state = acpsDone
+    if 0x40 <= int(c) and int(c) <= 0x7E:
+      state.parser.parseControlFunction(format, c)
+    else:
+      return pacrProcess
+  of acpsDone:
+    discard
+  of acpsBackspace:
+    # We used to emulate less here, but it seems to yield dubious benefits
+    # considering that
+    # a) the only place backspace-based formatting is used in is manpages
+    # b) we have w3mman now, which is superior in all respects, so this is
+    # pretty much never used
+    # c) if we drop generality, the output can be parsed much more efficiently
+    # (without having to buffer the entire line first)
+    #
+    # So we buffer only the last non-formatted UTF-8 char, and override it when
+    # necessary.
+    if not state.hasPrintingBuf:
+      state.parser.state = acpsDone
+      return pacrProcess
+    var i = state.outbufIdx - 1
+    while true:
+      if i < 0:
+        state.parser.state = acpsDone
+        return pacrProcess
+      if (int(state.outbuf[i]) and 0xC0) != 0x80:
+        break
+      dec i
+    if state.outbuf[i] == '_' or c == '_':
+      # underline for underscore overstrike
+      if ffUnderline notin state.pendingFmt.flags:
+        state.tmpFlags.incl(ffUnderline)
+        state.pendingFmt.flags.incl(ffUnderline)
+      elif c == '_' and ffBold notin state.pendingFmt.flags:
+        state.tmpFlags.incl(ffBold)
+        state.pendingFmt.flags.incl(ffBold)
+    else:
+      # represent *any* non-underline overstrike with bold.
+      # it is sloppy, but enough for our purposes.
+      if ffBold notin state.pendingFmt.flags:
+        state.tmpFlags.incl(ffBold)
+        state.pendingFmt.flags.incl(ffBold)
+    state.outbufIdx = i # move back output pointer
+    state.parser.state = acpsInBackspaceTransition
+    state.flushFmt()
+    return pacrProcess
+  of acpsInBackspaceTransition:
+    if (int(c) and 0xC0) != 0x80:
+      # backspace char end, next char begin
+      state.parser.state = acpsInBackspace
+    return pacrProcess
+  of acpsInBackspace:
+    if (int(c) and 0xC0) != 0x80:
+      # second char after backspaced char begin
+      if c == '\b':
+        # got backspace again, overstriking previous char. here we don't have to
+        # override anything
+        state.parser.state = acpsBackspace
+        return pacrProcess
+      # welp. we have to fixup the previous char's formatting
+      var i = state.outbufIdx - 1
+      while true:
+        assert i >= 0
+        if (int(state.outbuf[i]) and 0xC0) != 0x80:
+          break
+        dec i
+      let s = state.outbuf[i..<state.outbufIdx]
+      state.outbufIdx = i
+      for flag in FormatFlag:
+        if flag in state.tmpFlags:
+          state.pendingFmt.flags.excl(flag)
+      state.tmpFlags = {}
+      state.flushFmt()
+      state.puts(s)
+      state.parser.state = acpsDone
+    return pacrProcess
+  state.flushFmt()
+  pacrSkip
+
+proc processData(state: var State, buf: openArray[char]) =
+  for c in buf:
+    if state.parser.state != acpsDone:
+      case state.parseAnsiCode(state.pendingFmt, c)
+      of pacrSkip: continue
+      of pacrProcess: discard
+    state.hasPrintingBuf = true
+    case c
+    of '<': state.puts("&lt;")
+    of '>': state.puts("&gt;")
+    of '\'': state.puts("&apos;")
+    of '"': state.puts("&quot;")
+    of '\e': state.parser.reset()
+    of '\b': state.parser.state = acpsBackspace
+    of '\0': state.puts("\uFFFD") # HTML eats NUL, so replace it here
+    else: state.putc(c)
+
+proc main() =
+  let ps = newPosixStream(stdin.getFileHandle())
+  var state = State(os: newPosixStream(stdout.getFileHandle()))
+  state.puts("<!DOCTYPE html>\n<body><pre style='margin: 0'>")
+  ps.setBlocking(false)
+  var buffer {.noinit.}: array[4096, char]
+  var selector = newSelector[int]()
+  block mainloop:
+    while true:
+      try:
+        let n = ps.recvData(buffer.toOpenArrayByte(0, buffer.high))
+        if n == 0:
+          break
+        state.processData(buffer.toOpenArray(0, n - 1))
+      except ErrorAgain:
+        state.flushOutbuf()
+        selector.registerHandle(ps.fd, {Read}, 0)
+        let events = selector.select(-1)
+        for event in events:
+          if Error in event.events:
+            break mainloop
+        selector.unregister(ps.fd)
+  state.flushOutbuf()
+
+main()