about summary refs log tree commit diff stats
path: root/WWW/Library/Implementation/SGML.c
diff options
context:
space:
mode:
authorThomas E. Dickey <dickey@invisible-island.net>2005-10-17 00:37:10 -0400
committerThomas E. Dickey <dickey@invisible-island.net>2005-10-17 00:37:10 -0400
commit1876fe93dd4a772ba8a6894f0ba0780b0171a5f2 (patch)
treefaccb53146b913cf579194b46183f8798fb192ee /WWW/Library/Implementation/SGML.c
parent956e895c75cc47e66b5ff6f43ce0e1e2cbdc194e (diff)
downloadlynx-snapshots-1876fe93dd4a772ba8a6894f0ba0780b0171a5f2.tar.gz
snapshot of project "lynx", label v2-8-6dev_14
Diffstat (limited to 'WWW/Library/Implementation/SGML.c')
-rw-r--r--WWW/Library/Implementation/SGML.c44
1 files changed, 40 insertions, 4 deletions
diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c
index c73d312c..48e0587d 100644
--- a/WWW/Library/Implementation/SGML.c
+++ b/WWW/Library/Implementation/SGML.c
@@ -1605,9 +1605,45 @@ static void SGML_character(HTStream *context, char c_in)
      * If we want the raw input converted to Unicode, try that now.  - FM
      */
     if (context->T.trans_to_uni &&
-	((TOASCII(unsign_c) >= LYlowest_eightbit[context->inUCLYhndl]) ||	/* S/390 -- gil -- 0744 */
-	 (unsign_c < ' ' && unsign_c != 0 &&
-	  context->T.trans_C0_to_uni))) {
+#ifdef EXP_JAPANESEUTF8_SUPPORT
+	((strcmp(LYCharSet_UC[context->inUCLYhndl].MIMEname, "euc-jp") == 0) ||
+	 (strcmp(LYCharSet_UC[context->inUCLYhndl].MIMEname, "shift_jis") == 0))) {
+	if (strcmp(LYCharSet_UC[context->inUCLYhndl].MIMEname, "shift_jis") == 0) {
+	    if (context->utf_count == 0) {
+		if (IS_SJIS_HI1((unsigned char) c) ||
+		    IS_SJIS_HI2((unsigned char) c)) {
+		    context->utf_buf[0] = c;
+		    context->utf_count = 1;
+		    clong = -11;
+		}
+	    } else {
+		if (IS_SJIS_LO((unsigned char) c)) {
+		    context->utf_buf[1] = c;
+		    clong = UCTransJPToUni(context->utf_buf, 2, context->inUCLYhndl);
+		}
+		context->utf_count = 0;
+	    }
+	} else {
+	    if (context->utf_count == 0) {
+		if (IS_EUC_HI((unsigned char) c)) {
+		    context->utf_buf[0] = c;
+		    context->utf_count = 1;
+		    clong = -11;
+		}
+	    } else {
+		if (IS_EUC_LOX((unsigned char) c)) {
+		    context->utf_buf[1] = c;
+		    clong = UCTransJPToUni(context->utf_buf, 2, context->inUCLYhndl);
+		}
+		context->utf_count = 0;
+	    }
+	}
+	goto top1;
+    } else if (context->T.trans_to_uni &&
+#endif
+	       ((TOASCII(unsign_c) >= LYlowest_eightbit[context->inUCLYhndl]) ||	/* S/390 -- gil -- 0744 */
+		(unsign_c < ' ' && unsign_c != 0 &&
+		 context->T.trans_C0_to_uni))) {
 	/*
 	 * Convert the octet to Unicode.  - FM
 	 */
@@ -4851,7 +4887,7 @@ unsigned char *TO_EUC(const unsigned char *jis,
 		}
 	    }
 	}
-	if (c == ESC) {
+	if (c == CH_ESC) {
 	    if (*s == to2B) {
 		if ((s[1] == 'B') || (s[1] == '@')) {
 		    jis_stat = 0x80;