diff options
author | bptato <nincsnevem662@gmail.com> | 2023-12-22 00:53:14 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2023-12-22 00:53:55 +0100 |
commit | e58016bff983c043bb533e0bf07c0e5b3df5677e (patch) | |
tree | 5b2696ac5c20c5cffd78754fcc5e9680422cc4df /adapter | |
parent | 6ae8b1bbb0f172cb8c096b3e6792fc3766dbdb2d (diff) | |
download | chawan-e58016bff983c043bb533e0bf07c0e5b3df5677e.tar.gz |
gmi2html: rewrite
* Rewrite in Nim * This time, do not use a state machine (it was a very bad idea) * Do not emit <br> for every line; use CSS instead * Avoid double-newline caused by margins using CSS * Properly support list items
Diffstat (limited to 'adapter')
-rw-r--r-- | adapter/format/gmi2html.c | 241 | ||||
-rw-r--r-- | adapter/format/gmi2html.nim | 69 |
2 files changed, 69 insertions, 241 deletions
diff --git a/adapter/format/gmi2html.c b/adapter/format/gmi2html.c deleted file mode 100644 index 2c8c2bf6..00000000 --- a/adapter/format/gmi2html.c +++ /dev/null @@ -1,241 +0,0 @@ -/* This file is dedicated to the public domain. - * - * Convert gemtext to HTML. Only accepts input on stdin. - */ - -#include <stdio.h> -#include <stdlib.h> - -typedef enum { - STATE_NORMAL, - STATE_BLOCKQUOTE, - STATE_NEWLINE, - STATE_NEWLINE_EQUALS, - STATE_NEWLINE_EQUALS_ARROW, - STATE_BEFORE_URL, - STATE_IN_URL, - STATE_BEFORE_URL_NAME, - STATE_URL_NAME, - STATE_SINGLE_BACKTICK, - STATE_DOUBLE_BACKTICK, - STATE_PRE_START, - STATE_IN_PRE, - STATE_PRE_SINGLE_BACKTICK, - STATE_PRE_DOUBLE_BACKTICK, - STATE_SKIP_LINE, - STATE_HASH, - STATE_DOUBLE_HASH, - STATE_AFTER_HASH, - STATE_AFTER_DOUBLE_HASH, - STATE_AFTER_TRIPLE_HASH -} ParseState; - -static ParseState state = STATE_NEWLINE; -static ParseState prev_state = STATE_NORMAL; - -int main(void) { - int c; -#define BUFSIZE 4096 - char urlbuf[BUFSIZE + 1]; - char *urlp; - - urlp = urlbuf; - printf("<!DOCTYPE html>"); -#define SET_STATE(s) do { \ - prev_state = state; \ - state = s; \ - } while (0) -#define REDO_NORMAL do { \ - SET_STATE(STATE_NORMAL); \ - goto normal; \ - } while (0) - while ((c = getc(stdin)) != EOF) { - switch (state) { - case STATE_NORMAL: - case STATE_BLOCKQUOTE: - case STATE_IN_PRE: - case STATE_PRE_START: - case STATE_SKIP_LINE: - case STATE_URL_NAME: - case STATE_AFTER_HASH: - case STATE_AFTER_DOUBLE_HASH: - case STATE_AFTER_TRIPLE_HASH: -normal: switch (c) { - case '\r': break; - case '\n': - if (state == STATE_BLOCKQUOTE) { - fputs("</blockquote>", stdout); - } else if (state == STATE_PRE_START) { - fputs("\">", stdout); - SET_STATE(STATE_IN_PRE); - } else if (state == STATE_URL_NAME) { - fputs("</a>", stdout); - fputs("<br>", stdout); - } else if (state == STATE_AFTER_HASH) { - fputs("</h1>", stdout); - } else if (state == STATE_AFTER_DOUBLE_HASH) { - fputs("</h2>", stdout); - } else if (state == STATE_AFTER_TRIPLE_HASH) { - fputs("</h3>", stdout); - } else if (state == STATE_SKIP_LINE) { - } else { - fputs("<br>", stdout); - } - SET_STATE(STATE_NEWLINE); - break; - case '<': - fputs("<", stdout); - break; - case '>': - fputs(">", stdout); - break; - case '&': - fputs("&", stdout); - break; - default: - if (state != STATE_SKIP_LINE) - putchar(c); - break; - } - break; - case STATE_NEWLINE: - if (prev_state == STATE_IN_PRE) { - if (c == '`') { - SET_STATE(STATE_PRE_SINGLE_BACKTICK); - break; - } else { - SET_STATE(STATE_IN_PRE); - goto normal; - } - } - switch (c) { - case '=': - SET_STATE(STATE_NEWLINE_EQUALS); - break; - case '>': - SET_STATE(STATE_BLOCKQUOTE); - printf("<blockquote>"); - break; - case '`': - SET_STATE(STATE_SINGLE_BACKTICK); - break; - case '#': - SET_STATE(STATE_HASH); - break; - default: - REDO_NORMAL; - } - break; - case STATE_NEWLINE_EQUALS: - if (c == '>') { - SET_STATE(STATE_NEWLINE_EQUALS_ARROW); - } else { - putchar('='); - REDO_NORMAL; - } - break; - case STATE_NEWLINE_EQUALS_ARROW: - if (c == ' ') { - state = STATE_BEFORE_URL; - } else { - putchar('='); - REDO_NORMAL; - } - break; - case STATE_BEFORE_URL: - if (c == ' ') { - continue; - break; - } else { - fputs("<a href=\"", stdout); - SET_STATE(STATE_IN_URL); - urlp = urlbuf; - } - /* fall through */ - case STATE_IN_URL: - switch (c) { - case '"': - fputs("%22", stdout); - if (urlp < &urlbuf[BUFSIZE]) - *urlp++ = '"'; - break; - case ' ': - case '\t': - fputs("\">", stdout); - *urlp = '\0'; - SET_STATE(STATE_BEFORE_URL_NAME); - break; - case '\n': - *urlp = '\0'; - fputs("\">", stdout); - fputs(urlbuf, stdout); - fputs("</a><br>", stdout); - SET_STATE(STATE_NEWLINE); - break; - default: - if (urlp < &urlbuf[BUFSIZE] && c != '>' - && c != '<') - *urlp++ = c; - putchar(c); - } - break; - case STATE_BEFORE_URL_NAME: - if (c != ' ' && c != '\t') { - SET_STATE(STATE_URL_NAME); - goto normal; - } - break; - case STATE_SINGLE_BACKTICK: - case STATE_PRE_SINGLE_BACKTICK: - if (c == '`') { - SET_STATE(state == STATE_SINGLE_BACKTICK ? - STATE_DOUBLE_BACKTICK : - STATE_PRE_DOUBLE_BACKTICK); - } else { - putchar('`'); - REDO_NORMAL; - } - break; - case STATE_DOUBLE_BACKTICK: - case STATE_PRE_DOUBLE_BACKTICK: - if (c == '`') { - if (state == STATE_DOUBLE_BACKTICK) { - SET_STATE(STATE_PRE_START); - fputs("<pre title=\"", stdout); - } else { - fputs("</pre>", stdout); - SET_STATE(STATE_SKIP_LINE); - } - } else { - fputs("``", stdout); - if (state == STATE_DOUBLE_BACKTICK) { - REDO_NORMAL; - } else { - SET_STATE(STATE_IN_PRE); - goto normal; - } - } - break; - case STATE_HASH: - if (c == '#') { - SET_STATE(STATE_DOUBLE_HASH); - } else { - fputs("<h1>", stdout); - SET_STATE(STATE_AFTER_HASH); - goto normal; - } - break; - case STATE_DOUBLE_HASH: - if (c == '#') { - fputs("<h3>", stdout); - SET_STATE(STATE_AFTER_TRIPLE_HASH); - } else { - fputs("<h2>", stdout); - SET_STATE(STATE_AFTER_DOUBLE_HASH); - goto normal; - } - break; - } - } - exit(0); -} diff --git a/adapter/format/gmi2html.nim b/adapter/format/gmi2html.nim new file mode 100644 index 00000000..ee3c06bd --- /dev/null +++ b/adapter/format/gmi2html.nim @@ -0,0 +1,69 @@ +import std/strutils + +import utils/twtstr + +proc main() = + # We use `display: block' for anchors because they are supposed to be + # presented on separate lines per standard. + # We use `white-space: pre-line' on the entire body so that we do not have + # to emit a <br> character for each paragraph. ("Why not p?" Because gemini + # does not allow collapsing newlines, so we would have to use <br> or empty + # <p> tags for them. Neither make a lot more sense semantically than the + # simplest and most efficient solution, which is just using newlines.) + stdout.write(""" +<!DOCTYPE html> +<style> +a { display: block } +body { white-space: pre-line } +a, pre, ul, blockquote, li, h1, h2, h3 { margin-top: 0; margin-bottom: 0 } +</style> +""") + var inpre = false + var inul = false + while not stdin.endOfFile: + let line = stdin.readLine() + if inpre and not line.startsWith("```"): + stdout.write(line.htmlEscape() & "\n") + continue + if inul and not line.startsWith("* "): + stdout.write("</ul>") + inul = false + if line.len == 0: + stdout.write("\n") + continue + if line.startsWith("=>"): # link + let i = line.skipBlanks(2) + let url = line.until(AsciiWhitespace, i) + let text = if i + url.len < line.len: + let j = line.skipBlanks(i + url.len) + line.substr(j).htmlEscape() + else: + url.htmlEscape() + stdout.write("<a href='" & url.htmlEscape() & "'>" & text & "</a>") + elif line.startsWith("```"): # preformatting toggle + inpre = not inpre + let title = line.substr(3).htmlEscape() + if inpre: + stdout.write("<pre title='" & title & "'>") + else: + stdout.write("</pre>") + elif line.startsWith("#"): # heading line + var i = 1 + while i < line.len and i < 3 and line[i] == '#': + inc i + let h = "h" & $i + i = line.skipBlanks(i) # ignore whitespace after # + stdout.write("<" & h & ">" & line.substr(i).htmlEscape() & "</" & h & ">") + elif line.startsWith("* "): # unordered list item + if not inul: + inul = true + stdout.write("<ul>") + stdout.write("<li>" & line.substr(2).htmlEscape() & "</li>") + elif line.startsWith(">"): # quote + stdout.write("<blockquote>") + stdout.write(line.substr(1).htmlEscape()) + stdout.write("</blockquote>") + else: + stdout.write(line.htmlEscape() & "\n") + +main() |