diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 2000-02-15 04:10:19 -0500 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 2000-02-15 04:10:19 -0500 |
commit | 383b3f2ddbc1e2b64996958c1d9d944979fd190b (patch) | |
tree | 5c448e24f1b69593713b31529d1bdedf39a22a9d /WWW/Library/Implementation/SGML.c | |
parent | d61bc4e56055c372ab0b8d027f376c2f4a2ecfff (diff) | |
download | lynx-snapshots-383b3f2ddbc1e2b64996958c1d9d944979fd190b.tar.gz |
snapshot of project "lynx", label v2-8-3dev_19
Diffstat (limited to 'WWW/Library/Implementation/SGML.c')
-rw-r--r-- | WWW/Library/Implementation/SGML.c | 35 |
1 files changed, 32 insertions, 3 deletions
diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index 03db38da..db7e9e40 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -39,9 +39,11 @@ # include <LYPrettySrc.h> #endif +#if 0 #ifdef CJK_EX /* 1997/12/12 (Fri) 16:54:58 */ extern HTkcode last_kcode; #endif +#endif #define INVALID (-1) @@ -157,6 +159,7 @@ struct _HTStream { S_esc_dq, S_dollar_dq, S_paren_dq, S_nonascii_text_dq, S_dollar_paren_dq, S_in_kanji, S_junk_tag, S_junk_pi} state; + unsigned char kanji_buf; #ifdef CALLERDATA void * callerData; #endif /* CALLERDATA */ @@ -1473,7 +1476,7 @@ PRIVATE void SGML_character ARGS2( ** we can revert back to the unchanged c_in. - KW */ #define unsign_c clong -#ifdef CJK_EX /* 1997/12/12 (Fri) 18:08:48 */ +#if 0 static unsigned char sjis_1st = '\0'; unsigned char sjis_hi, sjis_lo; #endif @@ -1690,6 +1693,9 @@ top1: HTCJK == NOCJK) goto after_switch; +#if 0 /* This JIS X0201 Kana to JIS X0208 Kana conversion is/should be + * done in the HTextAppendCharacter. -- TH + */ #ifdef CJK_EX /* 1998/11/24 (Tue) 17:02:31 */ if (HTCJK == JAPANESE && last_kcode == SJIS) { if (sjis_1st == '\0' && (IS_SJIS_HI1(c) || IS_SJIS_HI2(c))) { @@ -1708,6 +1714,7 @@ top1: } } #endif +#endif /* ** Ignore 127 if we don't have HTPassHighCtrlRaw @@ -1727,6 +1734,24 @@ top1: !(PASSHICTRL || HTCJK != NOCJK)) goto after_switch; + /* Almost all CJK characters are double byte but only Japanese + * JIS X0201 Kana is single byte. To prevent to fail SGML parsing + * we have to care them here. -- TH + */ + if ((HTCJK==JAPANESE) && (context->state==S_in_kanji) && + !IS_JAPANESE_2BYTE(context->kanji_buf,(unsigned char)c)) { +#ifdef CONV_JISX0201KANA_TO_JISX0208KANA + if (IS_SJIS_X0201KANA(context->kanji_buf)) { + JISx0201TO0208_SJIS(context->kanji_buf, &sjis_hi, &sjis_lo); + PUTC(sjis_hi); + PUTC(sjis_lo); + } + else +#endif + PUTC(context->kanji_buf); + context->state = S_text; + } + /* ** Handle character based on context->state. */ @@ -1744,6 +1769,7 @@ top1: ** (see below). - FM */ context->state = S_text; + PUTC(context->kanji_buf); PUTC(c); break; @@ -1772,7 +1798,7 @@ top1: ** to having raw mode off with CJK. - FM */ context->state = S_in_kanji; - PUTC(c); + context->kanji_buf = c; break; } else if (HTCJK != NOCJK && TOASCII(c) == '\033') { /* S/390 -- gil -- 0881 */ /* @@ -3514,7 +3540,7 @@ top1: PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); - } else + } else #endif if (context->current_tag->name) start_element(context); @@ -4097,6 +4123,8 @@ top1: context->state = S_esc; } PUTC(c); + if (c < 32) + context->state = S_text; break; case S_esc_sq: /* Expecting '$'or '(' following CJK ESC. */ @@ -4383,6 +4411,7 @@ PUBLIC HTStream* SGML_new ARGS3( /* context->extra_tags = dtd->tags + dtd->number_of_tags; */ context->current_tag = context->slashedtag = NULL; context->state = S_text; + context->kanji_buf = '\0'; context->element_stack = 0; /* empty */ context->inSELECT = FALSE; context->no_lynx_specialcodes = NO; /* special codes normally generated */ |