about summary refs log tree commit diff stats
path: root/src/LYCharUtils.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/LYCharUtils.c')
-rw-r--r--src/LYCharUtils.c127
1 files changed, 127 insertions, 0 deletions
diff --git a/src/LYCharUtils.c b/src/LYCharUtils.c
index bb5e8224..d4df1e23 100644
--- a/src/LYCharUtils.c
+++ b/src/LYCharUtils.c
@@ -3827,3 +3827,130 @@ PUBLIC BOOLEAN LYCheckForCSI ARGS2(
     StrAllocCopy(*url, anchor->address);
     return TRUE;
 }
+
+/*
+**  This function is called from the SGML parser to look at comments
+**  and see whether we should collect some info from them.  Currently
+**  it only looks for comments with Message-Id and Subject info, in the
+**  exact form generated by MHonArc for archived mailing list.  If found,
+**  the info is stored in the document's HTParentAnchor.  It can later be
+**  used for generating a mail response.
+**
+**  We are extra picky here because there isn't any official definition
+**  for these kinds of comments - we might (and still can) misinterpret
+**  arbitrary comments as something they aren't.
+**
+**  If something doesn't look right, for example invalid characters, the
+**  strings are not stored.  Mail responses will use something else as
+**  the subject, probably the document URL, and will not have an
+**  In-Reply-To header.
+**
+**  All this is a hack - to do this the right way, mailing list archivers
+**  would have to agree on some better mechanism to make this kind of info
+**  from original mail headers available, for example using LINK.  - kw
+*/
+PUBLIC BOOLEAN LYCommentHacks ARGS2(
+	HTParentAnchor *,	anchor,
+	CONST char *,		comment)
+{
+    CONST char *cp = comment;
+    size_t len;
+
+    if (comment == NULL)
+	return FALSE;
+
+    if (!(anchor && anchor->address))
+	return FALSE;
+
+    if (strncmp(comment, "!--X-Message-Id: ", 17) == 0) {
+	char *messageid = NULL;
+	char *p;
+	for (cp = comment+17; *cp; cp++) {
+	    if ((unsigned char)*cp >= 127 || !isgraph((unsigned char)*cp)) {
+		break;
+	    }
+	}
+	if (strcmp(cp, " --")) {
+	    return FALSE;
+	}
+	cp = comment + 17;
+	StrAllocCopy(messageid, cp);
+	/* This should be ok - message-id should only contain 7-bit ASCII */
+	if (!LYUCFullyTranslateString(&messageid, 0, 0, NO, NO, YES, st_URL))
+	    return FALSE;
+	for (p = messageid; *p; p++) {
+	    if ((unsigned char)*p >= 127 || !isgraph((unsigned char)*p)) {
+		break;
+	    }
+	}
+	if (strcmp(p, " --")) {
+	    FREE(messageid);
+	    return FALSE;
+	}
+	if ((p = strchr(messageid, '@')) == NULL || p[1] == '\0') {
+	    FREE(messageid);
+	    return FALSE;
+	}
+	p = messageid;
+	if ((len = strlen(p)) >= 8 && !strcmp(&p[len-3], " --")) {
+	    p[len-3] = '\0';
+	} else {
+	    FREE(messageid);
+	    return FALSE;
+	}
+	if (HTAnchor_setMessageID(anchor, messageid)) {
+	    FREE(messageid);
+	    return TRUE;
+	} else {
+	    FREE(messageid);
+	    return FALSE;
+	}
+    }
+    if (strncmp(comment, "!--X-Subject: ", 14) == 0) {
+	char *subject = NULL;
+	char *p;
+	for (cp = comment+14; *cp; cp++) {
+	    if ((unsigned char)*cp >= 127 || !isprint((unsigned char)*cp)) {
+		return FALSE;
+	    }
+	}
+	cp = comment + 14;
+	StrAllocCopy(subject, cp);
+	/* @@@
+	 * This may not be the right thing for the subject - but mail
+	 * subjects shouldn't contain 8-bit characters in raw form anyway.
+	 * We have to unescape character entities, since that's what MHonArc
+	 * seems to generate.  But if after that there are 8-bit characters
+	 * the string is rejected.  We would probably not know correctly
+	 * what charset to assume anyway - the mail sender's can differ from
+	 * the archive's.  And the code for sending mail cannot deal well
+	 * with 8-bit characters - we should not put them in the Subject
+	 * header in raw form, but don't have MIME encoding implemented.
+	 * Someone may want to do more about this...  - kw
+	 */
+	if (!LYUCFullyTranslateString(&subject, 0, 0, NO, YES, NO, st_HTML))
+	    return FALSE;
+	for (p = subject; *p; p++) {
+	    if ((unsigned char)*p >= 127 || !isprint((unsigned char)*p)) {
+		FREE(subject);
+		return FALSE;
+	    }
+	}
+	p = subject;
+	if ((len = strlen(p)) >= 4 && !strcmp(&p[len-3], " --")) {
+	    p[len-3] = '\0';
+	} else {
+	    FREE(subject);
+	    return FALSE;
+	}
+	if (HTAnchor_setSubject(anchor, subject)) {
+	    FREE(subject);
+	    return TRUE;
+	} else {
+	    FREE(subject);
+	    return FALSE;
+	}
+    }
+
+    return FALSE;
+}