diff options
Diffstat (limited to 'src/LYCharUtils.c')
-rw-r--r-- | src/LYCharUtils.c | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/src/LYCharUtils.c b/src/LYCharUtils.c index bb5e8224..d4df1e23 100644 --- a/src/LYCharUtils.c +++ b/src/LYCharUtils.c @@ -3827,3 +3827,130 @@ PUBLIC BOOLEAN LYCheckForCSI ARGS2( StrAllocCopy(*url, anchor->address); return TRUE; } + +/* +** This function is called from the SGML parser to look at comments +** and see whether we should collect some info from them. Currently +** it only looks for comments with Message-Id and Subject info, in the +** exact form generated by MHonArc for archived mailing list. If found, +** the info is stored in the document's HTParentAnchor. It can later be +** used for generating a mail response. +** +** We are extra picky here because there isn't any official definition +** for these kinds of comments - we might (and still can) misinterpret +** arbitrary comments as something they aren't. +** +** If something doesn't look right, for example invalid characters, the +** strings are not stored. Mail responses will use something else as +** the subject, probably the document URL, and will not have an +** In-Reply-To header. +** +** All this is a hack - to do this the right way, mailing list archivers +** would have to agree on some better mechanism to make this kind of info +** from original mail headers available, for example using LINK. - kw +*/ +PUBLIC BOOLEAN LYCommentHacks ARGS2( + HTParentAnchor *, anchor, + CONST char *, comment) +{ + CONST char *cp = comment; + size_t len; + + if (comment == NULL) + return FALSE; + + if (!(anchor && anchor->address)) + return FALSE; + + if (strncmp(comment, "!--X-Message-Id: ", 17) == 0) { + char *messageid = NULL; + char *p; + for (cp = comment+17; *cp; cp++) { + if ((unsigned char)*cp >= 127 || !isgraph((unsigned char)*cp)) { + break; + } + } + if (strcmp(cp, " --")) { + return FALSE; + } + cp = comment + 17; + StrAllocCopy(messageid, cp); + /* This should be ok - message-id should only contain 7-bit ASCII */ + if (!LYUCFullyTranslateString(&messageid, 0, 0, NO, NO, YES, st_URL)) + return FALSE; + for (p = messageid; *p; p++) { + if ((unsigned char)*p >= 127 || !isgraph((unsigned char)*p)) { + break; + } + } + if (strcmp(p, " --")) { + FREE(messageid); + return FALSE; + } + if ((p = strchr(messageid, '@')) == NULL || p[1] == '\0') { + FREE(messageid); + return FALSE; + } + p = messageid; + if ((len = strlen(p)) >= 8 && !strcmp(&p[len-3], " --")) { + p[len-3] = '\0'; + } else { + FREE(messageid); + return FALSE; + } + if (HTAnchor_setMessageID(anchor, messageid)) { + FREE(messageid); + return TRUE; + } else { + FREE(messageid); + return FALSE; + } + } + if (strncmp(comment, "!--X-Subject: ", 14) == 0) { + char *subject = NULL; + char *p; + for (cp = comment+14; *cp; cp++) { + if ((unsigned char)*cp >= 127 || !isprint((unsigned char)*cp)) { + return FALSE; + } + } + cp = comment + 14; + StrAllocCopy(subject, cp); + /* @@@ + * This may not be the right thing for the subject - but mail + * subjects shouldn't contain 8-bit characters in raw form anyway. + * We have to unescape character entities, since that's what MHonArc + * seems to generate. But if after that there are 8-bit characters + * the string is rejected. We would probably not know correctly + * what charset to assume anyway - the mail sender's can differ from + * the archive's. And the code for sending mail cannot deal well + * with 8-bit characters - we should not put them in the Subject + * header in raw form, but don't have MIME encoding implemented. + * Someone may want to do more about this... - kw + */ + if (!LYUCFullyTranslateString(&subject, 0, 0, NO, YES, NO, st_HTML)) + return FALSE; + for (p = subject; *p; p++) { + if ((unsigned char)*p >= 127 || !isprint((unsigned char)*p)) { + FREE(subject); + return FALSE; + } + } + p = subject; + if ((len = strlen(p)) >= 4 && !strcmp(&p[len-3], " --")) { + p[len-3] = '\0'; + } else { + FREE(subject); + return FALSE; + } + if (HTAnchor_setSubject(anchor, subject)) { + FREE(subject); + return TRUE; + } else { + FREE(subject); + return FALSE; + } + } + + return FALSE; +} |