about summary refs log tree commit diff stats
path: root/adapter/format/ansi2html.nim
blob: 9f24dd0b3e719ffbd1c517cc29c88ca0a6342b63 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
import std/options
import std/os
import std/posix

import io/dynstream
import io/poll
import types/color
import utils/twtstr

type
  FormatFlag = enum
    ffBold
    ffItalic
    ffUnderline
    ffReverse
    ffStrike
    ffOverline
    ffBlink

  Format = object
    fgcolor: CellColor
    bgcolor: CellColor
    flags: set[FormatFlag]

# https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf
type
  AnsiCodeParseState = enum
    acpsDone, acpsStart, acpsParams, acpsInterm, acpsFinal, acpsBackspace,
    acpsInBackspaceTransition, acpsInBackspace

  AnsiCodeParser = object
    state: AnsiCodeParseState
    params: string

proc getParam(parser: AnsiCodeParser; i: var int; colon = false): string =
  while i < parser.params.len and
      not (parser.params[i] == ';' or colon and parser.params[i] == ':'):
    result &= parser.params[i]
    inc i
  if i < parser.params.len:
    inc i

template getParamU8(parser: AnsiCodeParser; i: var int; colon = false): uint8 =
  if i >= parser.params.len:
    return false
  let u = parseUInt8(parser.getParam(i), allowSign = false)
  if u.isNone:
    return false
  u.get

proc parseSGRDefColor(parser: AnsiCodeParser; format: var Format;
    i: var int; isfg: bool): bool =
  let u = parser.getParamU8(i, colon = true)
  template set_color(c: CellColor) =
    if isfg:
      format.fgcolor = c
    else:
      format.bgcolor = c
  if u == 2:
    let param0 = parser.getParamU8(i, colon = true)
    if i < parser.params.len:
      let r = param0
      let g = parser.getParamU8(i, colon = true)
      let b = parser.getParamU8(i, colon = true)
      set_color cellColor(rgb(r, g, b))
    else:
      set_color cellColor(gray(param0))
  elif u == 5:
    let param0 = parser.getParamU8(i, colon = true)
    if param0 in 0u8..15u8:
      set_color cellColor(ANSIColor(param0))
    elif param0 in 16u8..255u8:
      set_color cellColor(EightBitColor(param0))
  else:
    return false

proc parseSGRColor(parser: AnsiCodeParser; format: var Format;
    i: var int; u: uint8): bool =
  if u in 30u8..37u8:
    format.fgcolor = cellColor(ANSIColor(u - 30))
  elif u == 38:
    return parser.parseSGRDefColor(format, i, isfg = true)
  elif u == 39:
    format.fgcolor = defaultColor
  elif u in 40u8..47u8:
    format.bgcolor = cellColor(ANSIColor(u - 40))
  elif u == 48:
    return parser.parseSGRDefColor(format, i, isfg = false)
  elif u == 49:
    format.bgcolor = defaultColor
  elif u in 90u8..97u8:
    format.fgcolor = cellColor(ANSIColor(u - 82))
  elif u in 100u8..107u8:
    format.bgcolor = cellColor(ANSIColor(u - 92))
  else:
    return false
  return true

const FormatCodes: array[FormatFlag, tuple[s, e: uint8]] = [
  ffBold: (1u8, 22u8),
  ffItalic: (3u8, 23u8),
  ffUnderline: (4u8, 24u8),
  ffReverse: (7u8, 27u8),
  ffStrike: (9u8, 29u8),
  ffOverline: (53u8, 55u8),
  ffBlink: (5u8, 25u8),
]

proc parseSGRAspect(parser: AnsiCodeParser; format: var Format;
    i: var int): bool =
  let u = parser.getParamU8(i)
  for flag, (s, e) in FormatCodes:
    if u == s:
      format.flags.incl(flag)
      return true
    if u == e:
      format.flags.excl(flag)
      return true
  if u == 0:
    format = Format()
    return true
  else:
    return parser.parseSGRColor(format, i, u)

proc parseSGR(parser: AnsiCodeParser; format: var Format) =
  if parser.params.len == 0:
    format = Format()
  else:
    var i = 0
    while i < parser.params.len:
      if not parser.parseSGRAspect(format, i):
        break

proc parseControlFunction(parser: var AnsiCodeParser; format: var Format;
    f: char) =
  if f == 'm':
    parser.parseSGR(format)
  else:
    discard # unknown

proc reset(parser: var AnsiCodeParser) =
  parser.state = acpsStart
  parser.params = ""

type State = object
  outbufIdx: int
  outbuf: array[4096, char]
  parser: AnsiCodeParser
  currentFmt: Format
  pendingFmt: Format
  tmpFlags: set[FormatFlag]
  af: bool
  spanOpen: bool
  hasPrintingBuf: bool
  backspaceDecay: int

const STDIN_FILENO = 0
const STDOUT_FILENO = 1
proc flushOutbuf(state: var State) =
  if state.outbufIdx > 0:
    discard write(STDOUT_FILENO, addr state.outbuf[0], state.outbufIdx)
    state.outbufIdx = 0

proc putc(state: var State; c: char) {.inline.} =
  if state.outbufIdx + 4 >= state.outbuf.len: # max utf-8 char length
    state.flushOutbuf()
  state.outbuf[state.outbufIdx] = c
  inc state.outbufIdx

proc puts(state: var State; s: openArray[char]) {.inline.} =
  #TODO this is slower than it could be
  for c in s:
    state.putc(c)

proc flushFmt(state: var State) =
  if state.pendingFmt != state.currentFmt:
    if state.spanOpen:
      state.puts("</span>")
    if state.pendingFmt == Format():
      state.currentFmt = state.pendingFmt
      state.spanOpen = false
      return
    state.spanOpen = true
    state.puts("<span style='")
    let fmt = state.pendingFmt
    var buf = ""
    if fmt.fgcolor.t != ctNone:
      buf &= "color: "
      case fmt.fgcolor.t
      of ctNone: discard
      of ctANSI: buf &= "-cha-ansi(" & $fmt.fgcolor.color & ")"
      of ctRGB: buf &= $fmt.fgcolor
      buf &= ";"
    if fmt.bgcolor.t != ctNone:
      buf &= "background-color: "
      case fmt.bgcolor.t
      of ctNone: discard
      of ctANSI: buf &= "-cha-ansi(" & $fmt.bgcolor.color & ")"
      of ctRGB: buf &= $fmt.bgcolor
      buf &= ";"
    if ffOverline in fmt.flags or ffUnderline in fmt.flags or
        ffStrike in fmt.flags or ffBlink in fmt.flags:
      buf &= "text-decoration: "
      if ffOverline in fmt.flags:
        buf &= "overline "
      if ffUnderline in fmt.flags:
        buf &= "underline "
      if ffStrike in fmt.flags:
        buf &= "line-through "
      if ffBlink in fmt.flags:
        buf &= "blink "
      buf &= ";"
    if ffBold in fmt.flags:
      buf &= "font-weight: bold;"
    if ffItalic in fmt.flags:
      buf &= "font-style: italic;"
    #TODO reverse
    buf &= "'>"
    state.puts(buf)
    state.currentFmt = fmt
    state.hasPrintingBuf = false

type ParseAnsiCodeResult = enum
  pacrProcess, pacrSkip

proc parseAnsiCode(state: var State; format: var Format; c: char):
    ParseAnsiCodeResult =
  case state.parser.state
  of acpsStart:
    if 0x40 <= int(c) and int(c) <= 0x5F:
      if c != '[':
        #C1, TODO?
        state.parser.state = acpsDone
      else:
        state.parser.state = acpsParams
    else:
      state.parser.state = acpsDone
      return pacrProcess
  of acpsParams:
    if 0x30 <= int(c) and int(c) <= 0x3F:
      state.parser.params &= c
    else:
      state.parser.state = acpsInterm
      return state.parseAnsiCode(format, c)
  of acpsInterm:
    if 0x20 <= int(c) and int(c) <= 0x2F:
      discard
    else:
      state.parser.state = acpsFinal
      return state.parseAnsiCode(format, c)
  of acpsFinal:
    state.parser.state = acpsDone
    if 0x40 <= int(c) and int(c) <= 0x7E:
      state.parser.parseControlFunction(format, c)
    else:
      return pacrProcess
  of acpsDone:
    discard
  of acpsBackspace:
    # We used to emulate less here, but it seems to yield dubious benefits
    # considering that
    # a) the only place backspace-based formatting is used in is manpages
    # b) we have w3mman now, which is superior in all respects, so this is
    # pretty much never used
    # c) if we drop generality, the output can be parsed much more efficiently
    # (without having to buffer the entire line first)
    #
    # So we buffer only the last non-formatted UTF-8 char, and override it when
    # necessary.
    if not state.hasPrintingBuf:
      state.parser.state = acpsDone
      return pacrProcess
    var i = state.outbufIdx - 1
    while true:
      if i < 0:
        state.parser.state = acpsDone
        return pacrProcess
      if (int(state.outbuf[i]) and 0xC0) != 0x80:
        break
      dec i
    if state.outbuf[i] == '_' or c == '_':
      # underline for underscore overstrike
      if ffUnderline notin state.pendingFmt.flags:
        state.tmpFlags.incl(ffUnderline)
        state.pendingFmt.flags.incl(ffUnderline)
      elif c == '_' and ffBold notin state.pendingFmt.flags:
        state.tmpFlags.incl(ffBold)
        state.pendingFmt.flags.incl(ffBold)
    else:
      # represent *any* non-underline overstrike with bold.
      # it is sloppy, but enough for our purposes.
      if ffBold notin state.pendingFmt.flags:
        state.tmpFlags.incl(ffBold)
        state.pendingFmt.flags.incl(ffBold)
    state.outbufIdx = i # move back output pointer
    state.parser.state = acpsInBackspaceTransition
    state.flushFmt()
    return pacrProcess
  of acpsInBackspaceTransition:
    if (int(c) and 0xC0) != 0x80:
      # backspace char end, next char begin
      state.parser.state = acpsInBackspace
    return pacrProcess
  of acpsInBackspace:
    if (int(c) and 0xC0) != 0x80:
      # second char after backspaced char begin
      if c == '\b':
        # got backspace again, overstriking previous char. here we don't have to
        # override anything
        state.parser.state = acpsBackspace
        return pacrProcess
      # welp. we have to fixup the previous char's formatting
      var i = state.outbufIdx - 1
      while true:
        assert i >= 0
        if (int(state.outbuf[i]) and 0xC0) != 0x80:
          break
        dec i
      let s = state.outbuf[i..<state.outbufIdx]
      state.outbufIdx = i
      for flag in FormatFlag:
        if flag in state.tmpFlags:
          state.pendingFmt.flags.excl(flag)
      state.tmpFlags = {}
      state.flushFmt()
      state.puts(s)
      state.parser.state = acpsDone
    return pacrProcess
  state.flushFmt()
  pacrSkip

proc processData(state: var State; buf: openArray[char]) =
  for c in buf:
    if state.parser.state != acpsDone:
      case state.parseAnsiCode(state.pendingFmt, c)
      of pacrSkip: continue
      of pacrProcess: discard
    state.hasPrintingBuf = true
    case c
    of '<': state.puts("&lt;")
    of '>': state.puts("&gt;")
    of '\'': state.puts("&apos;")
    of '"': state.puts("&quot;")
    of '&': state.puts("&amp;")
    of '\e': state.parser.reset()
    of '\b': state.parser.state = acpsBackspace
    of '\0': state.puts("\uFFFD") # HTML eats NUL, so replace it here
    else: state.putc(c)

proc usage() =
  stderr.write("Usage: ansihtml [-s] [-t title]\n")
  quit(1)

proc main() =
  var state = State()
  # parse args
  let H = paramCount()
  var i = 1
  var standalone = false
  var title = ""
  while i <= H:
    let s = paramStr(i)
    if s == "":
      inc i
    if s[0] != '-':
      usage()
    for j in 1 ..< s.len:
      case s[j]
      of 's':
        standalone = true
      of 't':
        inc i
        if i > H: usage()
        title = paramStr(i).percentDecode()
      else:
        usage()
    inc i
  if standalone:
    state.puts("<!DOCTYPE html>\n")
  if title != "":
    state.puts("<title>" & title.htmlEscape() & "</title>\n")
  if standalone:
    state.puts("<body>\n")
  state.puts("<pre style='margin: 0'>\n")
  let ps = newPosixStream(STDIN_FILENO)
  ps.setBlocking(false)
  var buffer {.noinit.}: array[4096, char]
  var pollData = PollData()
  while true:
    try:
      let n = ps.recvData(buffer)
      if n == 0:
        break
      state.processData(buffer.toOpenArray(0, n - 1))
    except ErrorAgain:
      state.flushOutbuf()
      pollData.register(ps.fd, POLLIN)
      pollData.poll(-1)
      pollData.unregister(ps.fd)
  if standalone:
    state.puts("</body>")
  state.flushOutbuf()

main()