From 159e9403a44c5f95e8a9c8bf9ef45011c2f80baf Mon Sep 17 00:00:00 2001 From: "Thomas E. Dickey" Date: Fri, 7 Oct 2011 08:38:58 -0400 Subject: snapshot of project "lynx", label v2-8-8dev_9c --- CHANGES | 5 +- WWW/Library/Implementation/hdr_HTMLDTD.h | 13 +- WWW/Library/Implementation/src0_HTMLDTD.h | 4 +- WWW/Library/Implementation/src0_HTMLDTD.txt | 22 +-- WWW/Library/Implementation/src1_HTMLDTD.h | 4 +- WWW/Library/Implementation/src1_HTMLDTD.txt | 22 +-- src/LYCharUtils.c | 266 +++++++++++++++------------- 7 files changed, 183 insertions(+), 153 deletions(-) diff --git a/CHANGES b/CHANGES index 21dba669..193e0919 100644 --- a/CHANGES +++ b/CHANGES @@ -1,9 +1,10 @@ --- $LynxId: CHANGES,v 1.558 2011/10/07 00:50:29 tom Exp $ +-- $LynxId: CHANGES,v 1.559 2011/10/07 08:38:58 tom Exp $ =============================================================================== Changes since Lynx 2.8 release =============================================================================== -2011-10-06 (2.8.8dev.10) +2011-10-07 (2.8.8dev.10) +* add check for charset attribute on meta element -Kihara Hideto * eliminate ON/OFF macros, using TRUE/FALSE both to work around breakage from zlib 1.2.5.1 changes as well as because they were unnecessary (GenToo #383113) -Nikos Chantziaras, TD diff --git a/WWW/Library/Implementation/hdr_HTMLDTD.h b/WWW/Library/Implementation/hdr_HTMLDTD.h index a358caf1..9e4d47b0 100644 --- a/WWW/Library/Implementation/hdr_HTMLDTD.h +++ b/WWW/Library/Implementation/hdr_HTMLDTD.h @@ -1,4 +1,4 @@ -/* $LynxId: hdr_HTMLDTD.h,v 1.21 2011/05/19 23:49:11 tom Exp $ */ +/* $LynxId: hdr_HTMLDTD.h,v 1.22 2011/10/07 00:54:36 Kihara.Hideto Exp $ */ #ifndef hdr_HTMLDTD_H #define hdr_HTMLDTD_H 1 @@ -670,11 +670,12 @@ extern "C" { #define HTML_MATH_TITLE 7 #define HTML_MATH_ATTRIBUTES 8 -#define HTML_META_CONTENT 0 -#define HTML_META_HTTP_EQUIV 1 -#define HTML_META_NAME 2 -#define HTML_META_SCHEME 3 -#define HTML_META_ATTRIBUTES 4 +#define HTML_META_CHARSET 0 +#define HTML_META_CONTENT 1 +#define HTML_META_HTTP_EQUIV 2 +#define HTML_META_NAME 3 +#define HTML_META_SCHEME 4 +#define HTML_META_ATTRIBUTES 5 #define HTML_NEXTID_N 0 #define HTML_NEXTID_ATTRIBUTES 1 diff --git a/WWW/Library/Implementation/src0_HTMLDTD.h b/WWW/Library/Implementation/src0_HTMLDTD.h index 000e739a..83884709 100644 --- a/WWW/Library/Implementation/src0_HTMLDTD.h +++ b/WWW/Library/Implementation/src0_HTMLDTD.h @@ -1,4 +1,4 @@ -/* $LynxId: src0_HTMLDTD.h,v 1.44 2011/05/19 23:49:11 tom Exp $ */ +/* $LynxId: src0_HTMLDTD.h,v 1.45 2011/10/07 00:54:36 Kihara.Hideto Exp $ */ #ifndef src_HTMLDTD_H0 #define src_HTMLDTD_H0 1 @@ -847,6 +847,7 @@ static const AttrType MATH_attr_type[] = { }; static const attr META_attr_list[] = { + { "CHARSET" T(N) }, { "CONTENT" T(N) }, { "HTTP-EQUIV" T(N) }, { "NAME" T(N) }, @@ -1794,6 +1795,7 @@ static const attr MATH_attr[] = { /* MATH attributes */ }; static const attr META_attr[] = { /* META attributes */ + { "CHARSET" T(N) }, { "CONTENT" T(N) }, { "HTTP-EQUIV" T(N) }, { "NAME" T(N) }, diff --git a/WWW/Library/Implementation/src0_HTMLDTD.txt b/WWW/Library/Implementation/src0_HTMLDTD.txt index 923623d8..87bebeea 100644 --- a/WWW/Library/Implementation/src0_HTMLDTD.txt +++ b/WWW/Library/Implementation/src0_HTMLDTD.txt @@ -336,11 +336,12 @@ 0:0:BOX 1:0:CLEAR 40:META - 4 attributes: - 0:0:CONTENT - 1:0:HTTP-EQUIV - 2:0:NAME - 3:0:SCHEME + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME 41:NEXTID 1 attributes: 0:0:N @@ -2509,11 +2510,12 @@ flags: 75:META justify - 4 attributes: - 0:0:CONTENT - 1:0:HTTP-EQUIV - 2:0:NAME - 3:0:SCHEME + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME 1 attr_types META contents: SGML_EMPTY diff --git a/WWW/Library/Implementation/src1_HTMLDTD.h b/WWW/Library/Implementation/src1_HTMLDTD.h index 7ec80d6a..a56e4a09 100644 --- a/WWW/Library/Implementation/src1_HTMLDTD.h +++ b/WWW/Library/Implementation/src1_HTMLDTD.h @@ -1,4 +1,4 @@ -/* $LynxId: src1_HTMLDTD.h,v 1.44 2011/05/19 23:49:11 tom Exp $ */ +/* $LynxId: src1_HTMLDTD.h,v 1.45 2011/10/07 00:54:36 Kihara.Hideto Exp $ */ #ifndef src_HTMLDTD_H1 #define src_HTMLDTD_H1 1 @@ -847,6 +847,7 @@ static const AttrType MATH_attr_type[] = { }; static const attr META_attr_list[] = { + { "CHARSET" T(N) }, { "CONTENT" T(N) }, { "HTTP-EQUIV" T(N) }, { "NAME" T(N) }, @@ -1794,6 +1795,7 @@ static const attr MATH_attr[] = { /* MATH attributes */ }; static const attr META_attr[] = { /* META attributes */ + { "CHARSET" T(N) }, { "CONTENT" T(N) }, { "HTTP-EQUIV" T(N) }, { "NAME" T(N) }, diff --git a/WWW/Library/Implementation/src1_HTMLDTD.txt b/WWW/Library/Implementation/src1_HTMLDTD.txt index aef07fb7..6e7ac733 100644 --- a/WWW/Library/Implementation/src1_HTMLDTD.txt +++ b/WWW/Library/Implementation/src1_HTMLDTD.txt @@ -336,11 +336,12 @@ 0:0:BOX 1:0:CLEAR 40:META - 4 attributes: - 0:0:CONTENT - 1:0:HTTP-EQUIV - 2:0:NAME - 3:0:SCHEME + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME 41:NEXTID 1 attributes: 0:0:N @@ -2509,11 +2510,12 @@ flags: 75:META justify - 4 attributes: - 0:0:CONTENT - 1:0:HTTP-EQUIV - 2:0:NAME - 3:0:SCHEME + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME 1 attr_types META contents: SGML_EMPTY diff --git a/src/LYCharUtils.c b/src/LYCharUtils.c index adc50300..5baa5a7f 100644 --- a/src/LYCharUtils.c +++ b/src/LYCharUtils.c @@ -1,5 +1,5 @@ /* - * $LynxId: LYCharUtils.c,v 1.113 2011/06/11 12:15:50 tom Exp $ + * $LynxId: LYCharUtils.c,v 1.114 2011/10/07 00:57:58 Kihara.Hideto Exp $ * * Functions associated with LYCharSets.c and the Lynx version of HTML.c - FM * ========================================================================== @@ -2029,7 +2029,7 @@ void LYHandleMETA(HTStructured * me, const BOOL *present, const char **value, char **include GCC_UNUSED) { - char *http_equiv = NULL, *name = NULL, *content = NULL; + char *http_equiv = NULL, *name = NULL, *content = NULL, *charset = NULL; char *href = NULL, *id_string = NULL, *temp = NULL; char *cp, *cp0, *cp1 = NULL; int url_type = 0; @@ -2079,141 +2079,49 @@ void LYHandleMETA(HTStructured * me, const BOOL *present, FREE(content); } } + if (present[HTML_META_CHARSET] && + non_empty(value[HTML_META_CHARSET])) { + StrAllocCopy(charset, value[HTML_META_CHARSET]); + convert_to_spaces(charset, TRUE); + LYUCTranslateHTMLString(&charset, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); + if (*charset == '\0') { + FREE(charset); + } + } CTRACE((tfp, - "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\"\n", + "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\" CHARSET=\"%s\"\n", NONNULL(http_equiv), NONNULL(name), - NONNULL(content))); + NONNULL(content), + NONNULL(charset))); /* - * Make sure we have META name/value pairs to handle. - FM + * Check for a text/html Content-Type with a charset directive, if we + * didn't already set the charset via a server's header. - AAC & FM */ - if (!(http_equiv || name) || !content) - goto free_META_copies; - - /* - * Check for a no-cache Pragma - * or Cache-Control directive. - FM - */ - if (!strcasecomp(NonNull(http_equiv), "Pragma") || - !strcasecomp(NonNull(http_equiv), "Cache-Control")) { - LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, - NO, NO, YES, st_other); - if (!strcasecomp(content, "no-cache")) { - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - } - - /* - * If we didn't get a Cache-Control MIME header, and the META has one, - * convert to lowercase, store it in the anchor element, and if we - * haven't yet set no_cache, check whether we should. - FM - */ - if ((!me->node_anchor->cache_control) && - !strcasecomp(NonNull(http_equiv), "Cache-Control")) { - LYLowerCase(content); - StrAllocCopy(me->node_anchor->cache_control, content); - if (me->node_anchor->no_cache == FALSE) { - cp0 = content; - while ((cp = strstr(cp0, "no-cache")) != NULL) { - cp += 8; - while (*cp != '\0' && WHITE(*cp)) - cp++; - if (*cp == '\0' || *cp == ';') { - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - break; - } - cp0 = cp; - } - if (me->node_anchor->no_cache == TRUE) - goto free_META_copies; - cp0 = content; - while ((cp = strstr(cp0, "max-age")) != NULL) { - cp += 7; - while (*cp != '\0' && WHITE(*cp)) - cp++; - if (*cp == '=') { - cp++; - while (*cp != '\0' && WHITE(*cp)) - cp++; - if (isdigit(UCH(*cp))) { - cp0 = cp; - while (isdigit(UCH(*cp))) - cp++; - if (*cp0 == '0' && cp == (cp0 + 1)) { - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - break; - } - } - } - cp0 = cp; - } - } - } - - /* - * Check for an Expires directive. - FM - */ - } else if (!strcasecomp(NonNull(http_equiv), "Expires")) { - /* - * If we didn't get an Expires MIME header, store it in the anchor - * element, and if we haven't yet set no_cache, check whether we - * should. Note that we don't accept a Date header via META tags, - * because it's likely to be untrustworthy, but do check for a Date - * header from a server when making the comparison. - FM - */ - LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, - NO, NO, YES, st_other); - StrAllocCopy(me->node_anchor->expires, content); - if (me->node_anchor->no_cache == FALSE) { - if (!strcmp(content, "0")) { - /* - * The value is zero, which we treat as an absolute no-cache - * directive. - FM - */ - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - } else if (me->node_anchor->date != NULL) { - /* - * We have a Date header, so check if the value is less than or - * equal to that. - FM - */ - if (LYmktime(content, TRUE) <= - LYmktime(me->node_anchor->date, TRUE)) { - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - } - } else if (LYmktime(content, FALSE) == 0) { - /* - * We don't have a Date header, and the value is in past for - * us. - FM - */ - me->node_anchor->no_cache = TRUE; - HText_setNoCache(me->text); - } - } - - /* - * Check for a text/html Content-Type with a charset directive, if we - * didn't already set the charset via a server's header. - AAC & FM - */ - } else if (isEmpty(me->node_anchor->charset) && - !strcasecomp(NonNull(http_equiv), "Content-Type")) { + if (isEmpty(me->node_anchor->charset) && + (charset || + (!strcasecomp(NonNull(http_equiv), "Content-Type") && content))) { LYUCcharset *p_in = NULL; LYUCcharset *p_out = NULL; - LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, - NO, NO, YES, st_other); - LYLowerCase(content); + if (charset) { + LYLowerCase(charset); + } else { + LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); + LYLowerCase(content); + } - if ((cp1 = strstr(content, "charset")) != NULL) { + if ((cp1 = charset) != NULL || + (cp1 = strstr(content, "charset")) != NULL) { BOOL chartrans_ok = NO; char *cp3 = NULL, *cp4; int chndl; - cp1 += 7; + if (!charset) + cp1 += 7; while (*cp1 == ' ' || *cp1 == '=' || *cp1 == '"') cp1++; @@ -2378,6 +2286,117 @@ void LYHandleMETA(HTStructured * me, const BOOL *present, * Set the kcode element based on the charset. - FM */ HText_setKcode(me->text, me->node_anchor->charset, p_in); + } + + /* + * Make sure we have META name/value pairs to handle. - FM + */ + if (!(http_equiv || name) || !content) + goto free_META_copies; + + /* + * Check for a no-cache Pragma + * or Cache-Control directive. - FM + */ + if (!strcasecomp(NonNull(http_equiv), "Pragma") || + !strcasecomp(NonNull(http_equiv), "Cache-Control")) { + LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); + if (!strcasecomp(content, "no-cache")) { + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + } + + /* + * If we didn't get a Cache-Control MIME header, and the META has one, + * convert to lowercase, store it in the anchor element, and if we + * haven't yet set no_cache, check whether we should. - FM + */ + if ((!me->node_anchor->cache_control) && + !strcasecomp(NonNull(http_equiv), "Cache-Control")) { + LYLowerCase(content); + StrAllocCopy(me->node_anchor->cache_control, content); + if (me->node_anchor->no_cache == FALSE) { + cp0 = content; + while ((cp = strstr(cp0, "no-cache")) != NULL) { + cp += 8; + while (*cp != '\0' && WHITE(*cp)) + cp++; + if (*cp == '\0' || *cp == ';') { + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + break; + } + cp0 = cp; + } + if (me->node_anchor->no_cache == TRUE) + goto free_META_copies; + cp0 = content; + while ((cp = strstr(cp0, "max-age")) != NULL) { + cp += 7; + while (*cp != '\0' && WHITE(*cp)) + cp++; + if (*cp == '=') { + cp++; + while (*cp != '\0' && WHITE(*cp)) + cp++; + if (isdigit(UCH(*cp))) { + cp0 = cp; + while (isdigit(UCH(*cp))) + cp++; + if (*cp0 == '0' && cp == (cp0 + 1)) { + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + break; + } + } + } + cp0 = cp; + } + } + } + + /* + * Check for an Expires directive. - FM + */ + } else if (!strcasecomp(NonNull(http_equiv), "Expires")) { + /* + * If we didn't get an Expires MIME header, store it in the anchor + * element, and if we haven't yet set no_cache, check whether we + * should. Note that we don't accept a Date header via META tags, + * because it's likely to be untrustworthy, but do check for a Date + * header from a server when making the comparison. - FM + */ + LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); + StrAllocCopy(me->node_anchor->expires, content); + if (me->node_anchor->no_cache == FALSE) { + if (!strcmp(content, "0")) { + /* + * The value is zero, which we treat as an absolute no-cache + * directive. - FM + */ + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + } else if (me->node_anchor->date != NULL) { + /* + * We have a Date header, so check if the value is less than or + * equal to that. - FM + */ + if (LYmktime(content, TRUE) <= + LYmktime(me->node_anchor->date, TRUE)) { + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + } + } else if (LYmktime(content, FALSE) == 0) { + /* + * We don't have a Date header, and the value is in past for + * us. - FM + */ + me->node_anchor->no_cache = TRUE; + HText_setNoCache(me->text); + } + } /* * Check for a Refresh directive. - FM @@ -2566,6 +2585,7 @@ void LYHandleMETA(HTStructured * me, const BOOL *present, FREE(http_equiv); FREE(name); FREE(content); + FREE(charset); } /* -- cgit 1.4.1-2-gfad0