diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 2021-07-02 00:15:34 +0000 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 2021-07-02 00:15:34 +0000 |
commit | 35787b45f4cbf08d6e4d913e859a39a4e4369766 (patch) | |
tree | 01ee977a699ad9f78393fca6bef3888bc707b715 /WWW | |
parent | 811be0812233351687f2215e264eeb886a2a0060 (diff) | |
download | lynx-snapshots-35787b45f4cbf08d6e4d913e859a39a4e4369766.tar.gz |
snapshot of project "lynx", label v2-9-0dev_6l
Diffstat (limited to 'WWW')
-rw-r--r-- | WWW/Library/Implementation/HTCJK.h | 3 | ||||
-rw-r--r-- | WWW/Library/Implementation/SGML.c | 89 |
2 files changed, 76 insertions, 16 deletions
diff --git a/WWW/Library/Implementation/HTCJK.h b/WWW/Library/Implementation/HTCJK.h index 11a43c52..7edf50b4 100644 --- a/WWW/Library/Implementation/HTCJK.h +++ b/WWW/Library/Implementation/HTCJK.h @@ -1,5 +1,5 @@ /* - * $LynxId: HTCJK.h,v 1.21 2021/06/30 17:16:36 tom Exp $ + * $LynxId: HTCJK.h,v 1.22 2021/07/01 23:51:38 tom Exp $ * * CJK character converter HTCJK.h * ======================= @@ -43,6 +43,7 @@ extern "C" { #define IS_SJIS_2BYTE(hi,lo) (IS_SJIS_LO(lo) && (IS_SJIS_HI1(hi) || IS_SJIS_HI2(hi))) #define IS_SJIS_X0201KANA(lo) ((0xA1 <= (lo)) && ((lo) <= 0xDF)) +#define IS_EUC_LOS(lo) ((0x21 <= (lo)) && ((lo) <= 0x7E)) /* standard */ #define IS_EUC_LOX(lo) ((0xA1 <= (lo)) && ((lo) <= 0xFE)) /* extended */ #define IS_EUC_HI(hi) ((0xA1 <= (hi)) && ((hi) <= 0xFE)) #define IS_EUC_X0201KANA(hi,lo) (((hi) == 0x8E) && (0xA1 <= (lo)) && ((lo) <= 0xDF)) diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index 7d1d44a8..97a25eee 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -1,5 +1,5 @@ /* - * $LynxId: SGML.c,v 1.172 2021/06/30 20:25:01 tom Exp $ + * $LynxId: SGML.c,v 1.176 2021/07/02 00:08:26 tom Exp $ * * General SGML Parser code SGML.c * ======================== @@ -1646,12 +1646,6 @@ static void SGML_character(HTStream *me, int c_in) c = UCH(c_in); clong = UCH(c); -#if 0 - CTRACE((tfp, "%s:%d PUTC %02x %c\n", - LYCharSet_UC[me->inUCLYhndl].MIMEname, me->T.do_cjk, c, (c > 32 && - c < 127) - ? c : '#')); -#endif if (me->T.decode_utf8) { switch (HTDecodeUTF8(&(me->U), &c_in, &clong)) { case dUTF8_ok: @@ -1761,12 +1755,11 @@ static void SGML_character(HTStream *me, int c_in) if (me->T.trans_to_uni && ((strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "euc-cn") == 0))) { if (me->U.utf_count == 0) { - if (IS_GBK_HI(c) || - IS_GBK_HI(c)) { + if (IS_GBK_HI(c)) { me->U.utf_buf[0] = (char) c; me->U.utf_count = 1; clong = ucCannotConvert; - CTRACE((tfp, "Get EUC-CN: 0x%02X\n", c & 0xff)); + CTRACE((tfp, "Get EUC-CN: 0x%02X\n", UCH(c))); } } else { if (IS_GBK_LO(c)) { @@ -1774,13 +1767,77 @@ static void SGML_character(HTStream *me, int c_in) clong = UCTransJPToUni(me->U.utf_buf, 2, me->inUCLYhndl); if (clong > 0) { CTRACE((tfp, "... second: [%02X%02X] U+%04lX\n", - me->U.utf_buf[0] & 0xff, - me->U.utf_buf[1] & 0xff, + UCH(me->U.utf_buf[0]), + UCH(me->U.utf_buf[1]), + clong)); + } else { + CTRACE((tfp, "... second: [%02X%02X] %ld\n", + UCH(me->U.utf_buf[0]), + UCH(me->U.utf_buf[1]), + clong)); + } + } + me->U.utf_count = 0; + } + goto top1; + } else +#endif /* EXP_CHINESEUTF8_SUPPORT */ +#ifdef EXP_CHINESEUTF8_SUPPORT + if (me->T.trans_to_uni && + ((strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "euc-kr") == 0))) { + if (me->U.utf_count == 0) { + if (IS_EUC_HI(c)) { + me->U.utf_buf[0] = (char) c; + me->U.utf_count = 1; + clong = ucCannotConvert; + CTRACE((tfp, "Get EUC-KR: 0x%02X\n", UCH(c))); + } + } else { + if (IS_EUC_LOS(c) || + IS_EUC_LOX(c)) { + me->U.utf_buf[1] = (char) c; + clong = UCTransJPToUni(me->U.utf_buf, 2, me->inUCLYhndl); + if (clong > 0) { + CTRACE((tfp, "... second: [%02X%02X] U+%04lX\n", + UCH(me->U.utf_buf[0]), + UCH(me->U.utf_buf[1]), + clong)); + } else { + CTRACE((tfp, "... second: [%02X%02X] %ld\n", + UCH(me->U.utf_buf[0]), + UCH(me->U.utf_buf[1]), + clong)); + } + } + me->U.utf_count = 0; + } + goto top1; + } else +#endif /* EXP_CHINESEUTF8_SUPPORT */ +#ifdef EXP_CHINESEUTF8_SUPPORT + if (me->T.trans_to_uni && + ((strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "big5") == 0))) { + if (me->U.utf_count == 0) { + if (IS_BIG5_HI(c)) { + me->U.utf_buf[0] = (char) c; + me->U.utf_count = 1; + clong = ucCannotConvert; + CTRACE((tfp, "Get BIG5: 0x%02X\n", UCH(c))); + } + } else { + if (IS_BIG5_LOS(c) || + IS_BIG5_LOX(c)) { + me->U.utf_buf[1] = (char) c; + clong = UCTransJPToUni(me->U.utf_buf, 2, me->inUCLYhndl); + if (clong > 0) { + CTRACE((tfp, "... second: [%02X%02X] U+%04lX\n", + UCH(me->U.utf_buf[0]), + UCH(me->U.utf_buf[1]), clong)); } else { CTRACE((tfp, "... second: [%02X%02X] %ld\n", - me->U.utf_buf[0] & 0xff, - me->U.utf_buf[1] & 0xff, + UCH(me->U.utf_buf[0]), + UCH(me->U.utf_buf[1]), clong)); } } @@ -1991,7 +2048,9 @@ static void SGML_character(HTStream *me, int c_in) case S_text: #ifdef EXP_CHINESEUTF8_SUPPORT if (IS_CJK_TTY && - !strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "euc-cn")) { + (!strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "euc-cn") || + !strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "big5") || + !strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "euc-kr"))) { /* * Leave the case statement if we have not collected both of the * bytes for the EUC-CN character. If we have, then continue on |