about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--CHANGES5
-rw-r--r--WWW/Library/Implementation/hdr_HTMLDTD.h13
-rw-r--r--WWW/Library/Implementation/src0_HTMLDTD.h4
-rw-r--r--WWW/Library/Implementation/src0_HTMLDTD.txt22
-rw-r--r--WWW/Library/Implementation/src1_HTMLDTD.h4
-rw-r--r--WWW/Library/Implementation/src1_HTMLDTD.txt22
-rw-r--r--src/LYCharUtils.c266
7 files changed, 183 insertions, 153 deletions
diff --git a/CHANGES b/CHANGES
index 21dba669..193e0919 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,9 +1,10 @@
--- $LynxId: CHANGES,v 1.558 2011/10/07 00:50:29 tom Exp $
+-- $LynxId: CHANGES,v 1.559 2011/10/07 08:38:58 tom Exp $
 ===============================================================================
 Changes since Lynx 2.8 release
 ===============================================================================
 
-2011-10-06 (2.8.8dev.10)
+2011-10-07 (2.8.8dev.10)
+* add check for charset attribute on meta element -Kihara Hideto
 * eliminate ON/OFF macros, using TRUE/FALSE both to work around breakage from
   zlib 1.2.5.1 changes as well as because they were unnecessary (GenToo
   #383113) -Nikos Chantziaras, TD
diff --git a/WWW/Library/Implementation/hdr_HTMLDTD.h b/WWW/Library/Implementation/hdr_HTMLDTD.h
index a358caf1..9e4d47b0 100644
--- a/WWW/Library/Implementation/hdr_HTMLDTD.h
+++ b/WWW/Library/Implementation/hdr_HTMLDTD.h
@@ -1,4 +1,4 @@
-/* $LynxId: hdr_HTMLDTD.h,v 1.21 2011/05/19 23:49:11 tom Exp $ */
+/* $LynxId: hdr_HTMLDTD.h,v 1.22 2011/10/07 00:54:36 Kihara.Hideto Exp $ */
 #ifndef hdr_HTMLDTD_H
 #define hdr_HTMLDTD_H 1
 
@@ -670,11 +670,12 @@ extern "C" {
 #define HTML_MATH_TITLE           7
 #define HTML_MATH_ATTRIBUTES      8
 
-#define HTML_META_CONTENT         0
-#define HTML_META_HTTP_EQUIV      1
-#define HTML_META_NAME            2
-#define HTML_META_SCHEME          3
-#define HTML_META_ATTRIBUTES      4
+#define HTML_META_CHARSET         0
+#define HTML_META_CONTENT         1
+#define HTML_META_HTTP_EQUIV      2
+#define HTML_META_NAME            3
+#define HTML_META_SCHEME          4
+#define HTML_META_ATTRIBUTES      5
 
 #define HTML_NEXTID_N             0
 #define HTML_NEXTID_ATTRIBUTES    1
diff --git a/WWW/Library/Implementation/src0_HTMLDTD.h b/WWW/Library/Implementation/src0_HTMLDTD.h
index 000e739a..83884709 100644
--- a/WWW/Library/Implementation/src0_HTMLDTD.h
+++ b/WWW/Library/Implementation/src0_HTMLDTD.h
@@ -1,4 +1,4 @@
-/* $LynxId: src0_HTMLDTD.h,v 1.44 2011/05/19 23:49:11 tom Exp $ */
+/* $LynxId: src0_HTMLDTD.h,v 1.45 2011/10/07 00:54:36 Kihara.Hideto Exp $ */
 #ifndef src_HTMLDTD_H0
 #define src_HTMLDTD_H0 1
 
@@ -847,6 +847,7 @@ static const AttrType MATH_attr_type[] = {
 };
 
 static const attr META_attr_list[] = {
+	{ "CHARSET"       T(N) },
 	{ "CONTENT"       T(N) },
 	{ "HTTP-EQUIV"    T(N) },
 	{ "NAME"          T(N) },
@@ -1794,6 +1795,7 @@ static const attr MATH_attr[] = {       /* MATH attributes */
 };
 
 static const attr META_attr[] = {       /* META attributes */
+	{ "CHARSET"       T(N) },
 	{ "CONTENT"       T(N) },
 	{ "HTTP-EQUIV"    T(N) },
 	{ "NAME"          T(N) },
diff --git a/WWW/Library/Implementation/src0_HTMLDTD.txt b/WWW/Library/Implementation/src0_HTMLDTD.txt
index 923623d8..87bebeea 100644
--- a/WWW/Library/Implementation/src0_HTMLDTD.txt
+++ b/WWW/Library/Implementation/src0_HTMLDTD.txt
@@ -336,11 +336,12 @@
 			0:0:BOX
 			1:0:CLEAR
 	40:META
-		4 attributes:
-			0:0:CONTENT
-			1:0:HTTP-EQUIV
-			2:0:NAME
-			3:0:SCHEME
+		5 attributes:
+			0:0:CHARSET
+			1:0:CONTENT
+			2:0:HTTP-EQUIV
+			3:0:NAME
+			4:0:SCHEME
 	41:NEXTID
 		1 attributes:
 			0:0:N
@@ -2509,11 +2510,12 @@
 		flags:
 	75:META
 		justify
-		4 attributes:
-			0:0:CONTENT
-			1:0:HTTP-EQUIV
-			2:0:NAME
-			3:0:SCHEME
+		5 attributes:
+			0:0:CHARSET
+			1:0:CONTENT
+			2:0:HTTP-EQUIV
+			3:0:NAME
+			4:0:SCHEME
 		1 attr_types
 			META
 		contents: SGML_EMPTY
diff --git a/WWW/Library/Implementation/src1_HTMLDTD.h b/WWW/Library/Implementation/src1_HTMLDTD.h
index 7ec80d6a..a56e4a09 100644
--- a/WWW/Library/Implementation/src1_HTMLDTD.h
+++ b/WWW/Library/Implementation/src1_HTMLDTD.h
@@ -1,4 +1,4 @@
-/* $LynxId: src1_HTMLDTD.h,v 1.44 2011/05/19 23:49:11 tom Exp $ */
+/* $LynxId: src1_HTMLDTD.h,v 1.45 2011/10/07 00:54:36 Kihara.Hideto Exp $ */
 #ifndef src_HTMLDTD_H1
 #define src_HTMLDTD_H1 1
 
@@ -847,6 +847,7 @@ static const AttrType MATH_attr_type[] = {
 };
 
 static const attr META_attr_list[] = {
+	{ "CHARSET"       T(N) },
 	{ "CONTENT"       T(N) },
 	{ "HTTP-EQUIV"    T(N) },
 	{ "NAME"          T(N) },
@@ -1794,6 +1795,7 @@ static const attr MATH_attr[] = {       /* MATH attributes */
 };
 
 static const attr META_attr[] = {       /* META attributes */
+	{ "CHARSET"       T(N) },
 	{ "CONTENT"       T(N) },
 	{ "HTTP-EQUIV"    T(N) },
 	{ "NAME"          T(N) },
diff --git a/WWW/Library/Implementation/src1_HTMLDTD.txt b/WWW/Library/Implementation/src1_HTMLDTD.txt
index aef07fb7..6e7ac733 100644
--- a/WWW/Library/Implementation/src1_HTMLDTD.txt
+++ b/WWW/Library/Implementation/src1_HTMLDTD.txt
@@ -336,11 +336,12 @@
 			0:0:BOX
 			1:0:CLEAR
 	40:META
-		4 attributes:
-			0:0:CONTENT
-			1:0:HTTP-EQUIV
-			2:0:NAME
-			3:0:SCHEME
+		5 attributes:
+			0:0:CHARSET
+			1:0:CONTENT
+			2:0:HTTP-EQUIV
+			3:0:NAME
+			4:0:SCHEME
 	41:NEXTID
 		1 attributes:
 			0:0:N
@@ -2509,11 +2510,12 @@
 		flags:
 	75:META
 		justify
-		4 attributes:
-			0:0:CONTENT
-			1:0:HTTP-EQUIV
-			2:0:NAME
-			3:0:SCHEME
+		5 attributes:
+			0:0:CHARSET
+			1:0:CONTENT
+			2:0:HTTP-EQUIV
+			3:0:NAME
+			4:0:SCHEME
 		1 attr_types
 			META
 		contents: SGML_EMPTY
diff --git a/src/LYCharUtils.c b/src/LYCharUtils.c
index adc50300..5baa5a7f 100644
--- a/src/LYCharUtils.c
+++ b/src/LYCharUtils.c
@@ -1,5 +1,5 @@
 /*
- * $LynxId: LYCharUtils.c,v 1.113 2011/06/11 12:15:50 tom Exp $
+ * $LynxId: LYCharUtils.c,v 1.114 2011/10/07 00:57:58 Kihara.Hideto Exp $
  *
  *  Functions associated with LYCharSets.c and the Lynx version of HTML.c - FM
  *  ==========================================================================
@@ -2029,7 +2029,7 @@ void LYHandleMETA(HTStructured * me, const BOOL *present,
 		  const char **value,
 		  char **include GCC_UNUSED)
 {
-    char *http_equiv = NULL, *name = NULL, *content = NULL;
+    char *http_equiv = NULL, *name = NULL, *content = NULL, *charset = NULL;
     char *href = NULL, *id_string = NULL, *temp = NULL;
     char *cp, *cp0, *cp1 = NULL;
     int url_type = 0;
@@ -2079,141 +2079,49 @@ void LYHandleMETA(HTStructured * me, const BOOL *present,
 	    FREE(content);
 	}
     }
+    if (present[HTML_META_CHARSET] &&
+	non_empty(value[HTML_META_CHARSET])) {
+	StrAllocCopy(charset, value[HTML_META_CHARSET]);
+	convert_to_spaces(charset, TRUE);
+	LYUCTranslateHTMLString(&charset, me->tag_charset, me->tag_charset,
+				NO, NO, YES, st_other);
+	if (*charset == '\0') {
+	    FREE(charset);
+	}
+    }
     CTRACE((tfp,
-	    "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\"\n",
+	    "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\" CHARSET=\"%s\"\n",
 	    NONNULL(http_equiv),
 	    NONNULL(name),
-	    NONNULL(content)));
+	    NONNULL(content),
+	    NONNULL(charset)));
 
     /*
-     * Make sure we have META name/value pairs to handle.  - FM
+     * Check for a text/html Content-Type with a charset directive, if we
+     * didn't already set the charset via a server's header.  - AAC & FM
      */
-    if (!(http_equiv || name) || !content)
-	goto free_META_copies;
-
-    /*
-     * Check for a no-cache Pragma
-     * or Cache-Control directive. - FM
-     */
-    if (!strcasecomp(NonNull(http_equiv), "Pragma") ||
-	!strcasecomp(NonNull(http_equiv), "Cache-Control")) {
-	LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
-				NO, NO, YES, st_other);
-	if (!strcasecomp(content, "no-cache")) {
-	    me->node_anchor->no_cache = TRUE;
-	    HText_setNoCache(me->text);
-	}
-
-	/*
-	 * If we didn't get a Cache-Control MIME header, and the META has one,
-	 * convert to lowercase, store it in the anchor element, and if we
-	 * haven't yet set no_cache, check whether we should.  - FM
-	 */
-	if ((!me->node_anchor->cache_control) &&
-	    !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
-	    LYLowerCase(content);
-	    StrAllocCopy(me->node_anchor->cache_control, content);
-	    if (me->node_anchor->no_cache == FALSE) {
-		cp0 = content;
-		while ((cp = strstr(cp0, "no-cache")) != NULL) {
-		    cp += 8;
-		    while (*cp != '\0' && WHITE(*cp))
-			cp++;
-		    if (*cp == '\0' || *cp == ';') {
-			me->node_anchor->no_cache = TRUE;
-			HText_setNoCache(me->text);
-			break;
-		    }
-		    cp0 = cp;
-		}
-		if (me->node_anchor->no_cache == TRUE)
-		    goto free_META_copies;
-		cp0 = content;
-		while ((cp = strstr(cp0, "max-age")) != NULL) {
-		    cp += 7;
-		    while (*cp != '\0' && WHITE(*cp))
-			cp++;
-		    if (*cp == '=') {
-			cp++;
-			while (*cp != '\0' && WHITE(*cp))
-			    cp++;
-			if (isdigit(UCH(*cp))) {
-			    cp0 = cp;
-			    while (isdigit(UCH(*cp)))
-				cp++;
-			    if (*cp0 == '0' && cp == (cp0 + 1)) {
-				me->node_anchor->no_cache = TRUE;
-				HText_setNoCache(me->text);
-				break;
-			    }
-			}
-		    }
-		    cp0 = cp;
-		}
-	    }
-	}
-
-	/*
-	 * Check for an Expires directive. - FM
-	 */
-    } else if (!strcasecomp(NonNull(http_equiv), "Expires")) {
-	/*
-	 * If we didn't get an Expires MIME header, store it in the anchor
-	 * element, and if we haven't yet set no_cache, check whether we
-	 * should.  Note that we don't accept a Date header via META tags,
-	 * because it's likely to be untrustworthy, but do check for a Date
-	 * header from a server when making the comparison.  - FM
-	 */
-	LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
-				NO, NO, YES, st_other);
-	StrAllocCopy(me->node_anchor->expires, content);
-	if (me->node_anchor->no_cache == FALSE) {
-	    if (!strcmp(content, "0")) {
-		/*
-		 * The value is zero, which we treat as an absolute no-cache
-		 * directive.  - FM
-		 */
-		me->node_anchor->no_cache = TRUE;
-		HText_setNoCache(me->text);
-	    } else if (me->node_anchor->date != NULL) {
-		/*
-		 * We have a Date header, so check if the value is less than or
-		 * equal to that.  - FM
-		 */
-		if (LYmktime(content, TRUE) <=
-		    LYmktime(me->node_anchor->date, TRUE)) {
-		    me->node_anchor->no_cache = TRUE;
-		    HText_setNoCache(me->text);
-		}
-	    } else if (LYmktime(content, FALSE) == 0) {
-		/*
-		 * We don't have a Date header, and the value is in past for
-		 * us.  - FM
-		 */
-		me->node_anchor->no_cache = TRUE;
-		HText_setNoCache(me->text);
-	    }
-	}
-
-	/*
-	 * Check for a text/html Content-Type with a charset directive, if we
-	 * didn't already set the charset via a server's header.  - AAC & FM
-	 */
-    } else if (isEmpty(me->node_anchor->charset) &&
-	       !strcasecomp(NonNull(http_equiv), "Content-Type")) {
+    if (isEmpty(me->node_anchor->charset) &&
+	(charset ||
+	 (!strcasecomp(NonNull(http_equiv), "Content-Type") && content))) {
 	LYUCcharset *p_in = NULL;
 	LYUCcharset *p_out = NULL;
 
-	LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
-				NO, NO, YES, st_other);
-	LYLowerCase(content);
+	if (charset) {
+	    LYLowerCase(charset);
+	} else {
+	    LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
+				    NO, NO, YES, st_other);
+	    LYLowerCase(content);
+	}
 
-	if ((cp1 = strstr(content, "charset")) != NULL) {
+	if ((cp1 = charset) != NULL ||
+	    (cp1 = strstr(content, "charset")) != NULL) {
 	    BOOL chartrans_ok = NO;
 	    char *cp3 = NULL, *cp4;
 	    int chndl;
 
-	    cp1 += 7;
+	    if (!charset)
+		cp1 += 7;
 	    while (*cp1 == ' ' || *cp1 == '=' || *cp1 == '"')
 		cp1++;
 
@@ -2378,6 +2286,117 @@ void LYHandleMETA(HTStructured * me, const BOOL *present,
 	 * Set the kcode element based on the charset.  - FM
 	 */
 	HText_setKcode(me->text, me->node_anchor->charset, p_in);
+    }
+
+    /*
+     * Make sure we have META name/value pairs to handle.  - FM
+     */
+    if (!(http_equiv || name) || !content)
+	goto free_META_copies;
+
+    /*
+     * Check for a no-cache Pragma
+     * or Cache-Control directive. - FM
+     */
+    if (!strcasecomp(NonNull(http_equiv), "Pragma") ||
+	!strcasecomp(NonNull(http_equiv), "Cache-Control")) {
+	LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
+				NO, NO, YES, st_other);
+	if (!strcasecomp(content, "no-cache")) {
+	    me->node_anchor->no_cache = TRUE;
+	    HText_setNoCache(me->text);
+	}
+
+	/*
+	 * If we didn't get a Cache-Control MIME header, and the META has one,
+	 * convert to lowercase, store it in the anchor element, and if we
+	 * haven't yet set no_cache, check whether we should.  - FM
+	 */
+	if ((!me->node_anchor->cache_control) &&
+	    !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
+	    LYLowerCase(content);
+	    StrAllocCopy(me->node_anchor->cache_control, content);
+	    if (me->node_anchor->no_cache == FALSE) {
+		cp0 = content;
+		while ((cp = strstr(cp0, "no-cache")) != NULL) {
+		    cp += 8;
+		    while (*cp != '\0' && WHITE(*cp))
+			cp++;
+		    if (*cp == '\0' || *cp == ';') {
+			me->node_anchor->no_cache = TRUE;
+			HText_setNoCache(me->text);
+			break;
+		    }
+		    cp0 = cp;
+		}
+		if (me->node_anchor->no_cache == TRUE)
+		    goto free_META_copies;
+		cp0 = content;
+		while ((cp = strstr(cp0, "max-age")) != NULL) {
+		    cp += 7;
+		    while (*cp != '\0' && WHITE(*cp))
+			cp++;
+		    if (*cp == '=') {
+			cp++;
+			while (*cp != '\0' && WHITE(*cp))
+			    cp++;
+			if (isdigit(UCH(*cp))) {
+			    cp0 = cp;
+			    while (isdigit(UCH(*cp)))
+				cp++;
+			    if (*cp0 == '0' && cp == (cp0 + 1)) {
+				me->node_anchor->no_cache = TRUE;
+				HText_setNoCache(me->text);
+				break;
+			    }
+			}
+		    }
+		    cp0 = cp;
+		}
+	    }
+	}
+
+	/*
+	 * Check for an Expires directive. - FM
+	 */
+    } else if (!strcasecomp(NonNull(http_equiv), "Expires")) {
+	/*
+	 * If we didn't get an Expires MIME header, store it in the anchor
+	 * element, and if we haven't yet set no_cache, check whether we
+	 * should.  Note that we don't accept a Date header via META tags,
+	 * because it's likely to be untrustworthy, but do check for a Date
+	 * header from a server when making the comparison.  - FM
+	 */
+	LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
+				NO, NO, YES, st_other);
+	StrAllocCopy(me->node_anchor->expires, content);
+	if (me->node_anchor->no_cache == FALSE) {
+	    if (!strcmp(content, "0")) {
+		/*
+		 * The value is zero, which we treat as an absolute no-cache
+		 * directive.  - FM
+		 */
+		me->node_anchor->no_cache = TRUE;
+		HText_setNoCache(me->text);
+	    } else if (me->node_anchor->date != NULL) {
+		/*
+		 * We have a Date header, so check if the value is less than or
+		 * equal to that.  - FM
+		 */
+		if (LYmktime(content, TRUE) <=
+		    LYmktime(me->node_anchor->date, TRUE)) {
+		    me->node_anchor->no_cache = TRUE;
+		    HText_setNoCache(me->text);
+		}
+	    } else if (LYmktime(content, FALSE) == 0) {
+		/*
+		 * We don't have a Date header, and the value is in past for
+		 * us.  - FM
+		 */
+		me->node_anchor->no_cache = TRUE;
+		HText_setNoCache(me->text);
+	    }
+	}
 
 	/*
 	 * Check for a Refresh directive.  - FM
@@ -2566,6 +2585,7 @@ void LYHandleMETA(HTStructured * me, const BOOL *present,
     FREE(http_equiv);
     FREE(name);
     FREE(content);
+    FREE(charset);
 }
 
 /*