about summary refs log tree commit diff stats
path: root/adapter
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2023-12-22 00:53:14 +0100
committerbptato <nincsnevem662@gmail.com>2023-12-22 00:53:55 +0100
commite58016bff983c043bb533e0bf07c0e5b3df5677e (patch)
tree5b2696ac5c20c5cffd78754fcc5e9680422cc4df /adapter
parent6ae8b1bbb0f172cb8c096b3e6792fc3766dbdb2d (diff)
downloadchawan-e58016bff983c043bb533e0bf07c0e5b3df5677e.tar.gz
gmi2html: rewrite
* Rewrite in Nim
* This time, do not use a state machine (it was a very bad idea)
* Do not emit <br> for every line; use CSS instead
* Avoid double-newline caused by margins using CSS
* Properly support list items
Diffstat (limited to 'adapter')
-rw-r--r--adapter/format/gmi2html.c241
-rw-r--r--adapter/format/gmi2html.nim69
2 files changed, 69 insertions, 241 deletions
diff --git a/adapter/format/gmi2html.c b/adapter/format/gmi2html.c
deleted file mode 100644
index 2c8c2bf6..00000000
--- a/adapter/format/gmi2html.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/* This file is dedicated to the public domain.
- *
- * Convert gemtext to HTML. Only accepts input on stdin.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-typedef enum {
-	STATE_NORMAL,
-	STATE_BLOCKQUOTE,
-	STATE_NEWLINE,
-	STATE_NEWLINE_EQUALS,
-	STATE_NEWLINE_EQUALS_ARROW,
-	STATE_BEFORE_URL,
-	STATE_IN_URL,
-	STATE_BEFORE_URL_NAME,
-	STATE_URL_NAME,
-	STATE_SINGLE_BACKTICK,
-	STATE_DOUBLE_BACKTICK,
-	STATE_PRE_START,
-	STATE_IN_PRE,
-	STATE_PRE_SINGLE_BACKTICK,
-	STATE_PRE_DOUBLE_BACKTICK,
-	STATE_SKIP_LINE,
-	STATE_HASH,
-	STATE_DOUBLE_HASH,
-	STATE_AFTER_HASH,
-	STATE_AFTER_DOUBLE_HASH,
-	STATE_AFTER_TRIPLE_HASH
-} ParseState;
-
-static ParseState state = STATE_NEWLINE;
-static ParseState prev_state = STATE_NORMAL;
-
-int main(void) {
-	int c;
-#define BUFSIZE 4096
-	char urlbuf[BUFSIZE + 1];
-	char *urlp;
-
-	urlp = urlbuf;
-	printf("<!DOCTYPE html>");
-#define SET_STATE(s) do { \
-		prev_state = state; \
-		state = s; \
-	} while (0)
-#define REDO_NORMAL do { \
-		SET_STATE(STATE_NORMAL); \
-		goto normal; \
-	} while (0)
-	while ((c = getc(stdin)) != EOF) {
-		switch (state) {
-		case STATE_NORMAL:
-		case STATE_BLOCKQUOTE:
-		case STATE_IN_PRE:
-		case STATE_PRE_START:
-		case STATE_SKIP_LINE:
-		case STATE_URL_NAME:
-		case STATE_AFTER_HASH:
-		case STATE_AFTER_DOUBLE_HASH:
-		case STATE_AFTER_TRIPLE_HASH:
-normal:			switch (c) {
-			case '\r': break;
-			case '\n':
-				if (state == STATE_BLOCKQUOTE) {
-					fputs("</blockquote>", stdout);
-				} else if (state == STATE_PRE_START) {
-					fputs("\">", stdout);
-					SET_STATE(STATE_IN_PRE);
-				} else if (state == STATE_URL_NAME) {
-					fputs("</a>", stdout);
-					fputs("<br>", stdout);
-				} else if (state == STATE_AFTER_HASH) {
-					fputs("</h1>", stdout);
-				} else if (state == STATE_AFTER_DOUBLE_HASH) {
-					fputs("</h2>", stdout);
-				} else if (state == STATE_AFTER_TRIPLE_HASH) {
-					fputs("</h3>", stdout);
-				} else if (state == STATE_SKIP_LINE) {
-				} else {
-					fputs("<br>", stdout);
-				}
-				SET_STATE(STATE_NEWLINE);
-				break;
-			case '<':
-				fputs("&lt;", stdout);
-				break;
-			case '>':
-				fputs("&gt;", stdout);
-				break;
-			case '&':
-				fputs("&amp;", stdout);
-				break;
-			default:
-				if (state != STATE_SKIP_LINE)
-					putchar(c);
-				break;
-			}
-			break;
-		case STATE_NEWLINE:
-			if (prev_state == STATE_IN_PRE) {
-				if (c == '`') {
-					SET_STATE(STATE_PRE_SINGLE_BACKTICK);
-					break;
-				} else {
-					SET_STATE(STATE_IN_PRE);
-					goto normal;
-				}
-			}
-			switch (c) {
-			case '=':
-				SET_STATE(STATE_NEWLINE_EQUALS);
-				break;
-			case '>':
-				SET_STATE(STATE_BLOCKQUOTE);
-				printf("<blockquote>");
-				break;
-			case '`':
-				SET_STATE(STATE_SINGLE_BACKTICK);
-				break;
-			case '#':
-				SET_STATE(STATE_HASH);
-				break;
-			default:
-				REDO_NORMAL;
-			}
-			break;
-		case STATE_NEWLINE_EQUALS:
-			if (c == '>') {
-				SET_STATE(STATE_NEWLINE_EQUALS_ARROW);
-			} else {
-				putchar('=');
-				REDO_NORMAL;
-			}
-			break;
-		case STATE_NEWLINE_EQUALS_ARROW:
-			if (c == ' ') {
-				state = STATE_BEFORE_URL;
-			} else {
-				putchar('=');
-				REDO_NORMAL;
-			}
-			break;
-		case STATE_BEFORE_URL:
-			if (c == ' ') {
-				continue;
-				break;
-			} else {
-				fputs("<a href=\"", stdout);
-				SET_STATE(STATE_IN_URL);
-				urlp = urlbuf;
-			}
-			/* fall through */
-		case STATE_IN_URL:
-			switch (c) {
-			case '"':
-				fputs("%22", stdout);
-				if (urlp < &urlbuf[BUFSIZE])
-					*urlp++ = '"';
-				break;
-			case ' ':
-			case '\t':
-				fputs("\">", stdout);
-				*urlp = '\0';
-				SET_STATE(STATE_BEFORE_URL_NAME);
-				break;
-			case '\n':
-				*urlp = '\0';
-				fputs("\">", stdout);
-				fputs(urlbuf, stdout);
-				fputs("</a><br>", stdout);
-				SET_STATE(STATE_NEWLINE);
-				break;
-			default:
-				if (urlp < &urlbuf[BUFSIZE] && c != '>'
-						&& c != '<')
-					*urlp++ = c;
-				putchar(c);
-			}
-			break;
-		case STATE_BEFORE_URL_NAME:
-			if (c != ' ' && c != '\t') {
-				SET_STATE(STATE_URL_NAME);
-				goto normal;
-			}
-			break;
-		case STATE_SINGLE_BACKTICK:
-		case STATE_PRE_SINGLE_BACKTICK:
-			if (c == '`') {
-				SET_STATE(state == STATE_SINGLE_BACKTICK ?
-					STATE_DOUBLE_BACKTICK :
-					STATE_PRE_DOUBLE_BACKTICK);
-			} else {
-				putchar('`');
-				REDO_NORMAL;
-			}
-			break;
-		case STATE_DOUBLE_BACKTICK:
-		case STATE_PRE_DOUBLE_BACKTICK:
-			if (c == '`') {
-				if (state == STATE_DOUBLE_BACKTICK) {
-					SET_STATE(STATE_PRE_START);
-					fputs("<pre title=\"", stdout);
-				} else {
-					fputs("</pre>", stdout);
-					SET_STATE(STATE_SKIP_LINE);
-				}
-			} else {
-				fputs("``", stdout);
-				if (state == STATE_DOUBLE_BACKTICK) {
-					REDO_NORMAL;
-				} else {
-					SET_STATE(STATE_IN_PRE);
-					goto normal;
-				}
-			}
-			break;
-		case STATE_HASH:
-			if (c == '#') {
-				SET_STATE(STATE_DOUBLE_HASH);
-			} else {
-				fputs("<h1>", stdout);
-				SET_STATE(STATE_AFTER_HASH);
-				goto normal;
-			}
-			break;
-		case STATE_DOUBLE_HASH:
-			if (c == '#') {
-				fputs("<h3>", stdout);
-				SET_STATE(STATE_AFTER_TRIPLE_HASH);
-			} else {
-				fputs("<h2>", stdout);
-				SET_STATE(STATE_AFTER_DOUBLE_HASH);
-				goto normal;
-			}
-			break;
-		}
-	}
-	exit(0);
-}
diff --git a/adapter/format/gmi2html.nim b/adapter/format/gmi2html.nim
new file mode 100644
index 00000000..ee3c06bd
--- /dev/null
+++ b/adapter/format/gmi2html.nim
@@ -0,0 +1,69 @@
+import std/strutils
+
+import utils/twtstr
+
+proc main() =
+  # We use `display: block' for anchors because they are supposed to be
+  # presented on separate lines per standard.
+  # We use `white-space: pre-line' on the entire body so that we do not have
+  # to emit a <br> character for each paragraph. ("Why not p?" Because gemini
+  # does not allow collapsing newlines, so we would have to use <br> or empty
+  # <p> tags for them. Neither make a lot more sense semantically than the
+  # simplest and most efficient solution, which is just using newlines.)
+  stdout.write("""
+<!DOCTYPE html>
+<style>
+a { display: block }
+body { white-space: pre-line }
+a, pre, ul, blockquote, li, h1, h2, h3 { margin-top: 0; margin-bottom: 0 }
+</style>
+""")
+  var inpre = false
+  var inul = false
+  while not stdin.endOfFile:
+    let line = stdin.readLine()
+    if inpre and not line.startsWith("```"):
+      stdout.write(line.htmlEscape() & "\n")
+      continue
+    if inul and not line.startsWith("* "):
+      stdout.write("</ul>")
+      inul = false
+    if line.len == 0:
+      stdout.write("\n")
+      continue
+    if line.startsWith("=>"): # link
+      let i = line.skipBlanks(2)
+      let url = line.until(AsciiWhitespace, i)
+      let text = if i + url.len < line.len:
+        let j = line.skipBlanks(i + url.len)
+        line.substr(j).htmlEscape()
+      else:
+        url.htmlEscape()
+      stdout.write("<a href='" & url.htmlEscape() & "'>" & text & "</a>")
+    elif line.startsWith("```"): # preformatting toggle
+      inpre = not inpre
+      let title = line.substr(3).htmlEscape()
+      if inpre:
+        stdout.write("<pre title='" & title & "'>")
+      else:
+        stdout.write("</pre>")
+    elif line.startsWith("#"): # heading line
+      var i = 1
+      while i < line.len and i < 3 and line[i] == '#':
+        inc i
+      let h = "h" & $i
+      i = line.skipBlanks(i) # ignore whitespace after #
+      stdout.write("<" & h & ">" & line.substr(i).htmlEscape() & "</" & h & ">")
+    elif line.startsWith("* "): # unordered list item
+      if not inul:
+        inul = true
+        stdout.write("<ul>")
+      stdout.write("<li>" & line.substr(2).htmlEscape() & "</li>")
+    elif line.startsWith(">"): # quote
+      stdout.write("<blockquote>")
+      stdout.write(line.substr(1).htmlEscape())
+      stdout.write("</blockquote>")
+    else:
+      stdout.write(line.htmlEscape() & "\n")
+
+main()