diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 2008-12-26 01:32:23 -0500 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 2008-12-26 01:32:23 -0500 |
commit | ed2d970693bd42f56001960e78f70fc97c925491 (patch) | |
tree | 7ff880dda30d349acac8a5534473904efdd04298 /src | |
parent | 759155d6bf272ff2cc210cc3e1cc588366b220de (diff) | |
download | lynx-snapshots-ed2d970693bd42f56001960e78f70fc97c925491.tar.gz |
snapshot of project "lynx", label v2-8-7dev_11b
Diffstat (limited to 'src')
-rw-r--r-- | src/LYBookmark.c | 30 | ||||
-rw-r--r-- | src/LYMain.c | 20 | ||||
-rw-r--r-- | src/LYShowInfo.c | 4 | ||||
-rw-r--r-- | src/LYmktime.c | 39 | ||||
-rw-r--r-- | src/UCdomap.c | 265 | ||||
-rw-r--r-- | src/makefile.in | 32 | ||||
-rw-r--r-- | src/parsdate.c | 187 | ||||
-rw-r--r-- | src/parsdate.y | 107 |
8 files changed, 434 insertions, 250 deletions
diff --git a/src/LYBookmark.c b/src/LYBookmark.c index 2a00a1bc..8c744f08 100644 --- a/src/LYBookmark.c +++ b/src/LYBookmark.c @@ -975,28 +975,38 @@ void LYMBM_statusline(const char *text) */ static BOOLEAN havevisible(const char *Title) { + BOOLEAN result = FALSE; const char *p = Title; unsigned char c; long unicode; for (; *p; p++) { c = UCH(TOASCII(*p)); - if (c > 32 && c < 127) - return (TRUE); + if (c > 32 && c < 127) { + result = TRUE; + break; + } if (c <= 32 || c == 127) continue; - if (LYHaveCJKCharacterSet || !UCCanUniTranslateFrom(current_char_set)) - return (TRUE); + if (LYHaveCJKCharacterSet || !UCCanUniTranslateFrom(current_char_set)) { + result = TRUE; + break; + } unicode = UCTransToUni(*p, current_char_set); - if (unicode > 32 && unicode < 127) - return (TRUE); - if (unicode <= 32 || unicode == 0xa0 || unicode == 0xad) + if (unicode == ucNeedMore) continue; - if (unicode >= 0x2000 && unicode < 0x200f) + if (unicode > 32 && unicode < 127) { + result = TRUE; + break; + } + if (unicode <= 32 || unicode == 0xa0 || unicode == 0xad) continue; - return (TRUE); + if (unicode < 0x2000 || unicode >= 0x200f) { + result = TRUE; + break; + } } - return (FALSE); /* if we came here */ + return (result); } /* diff --git a/src/LYMain.c b/src/LYMain.c index ee40c047..f28db16e 100644 --- a/src/LYMain.c +++ b/src/LYMain.c @@ -1,5 +1,5 @@ /* - * $LynxId: LYMain.c,v 1.188 2008/12/14 18:07:56 tom Exp $ + * $LynxId: LYMain.c,v 1.190 2008/12/26 01:18:43 tom Exp $ */ #include <HTUtils.h> #include <HTTP.h> @@ -1106,7 +1106,7 @@ int main(int argc, LYAddPathToHome(LYTraceLogPath, LY_MAXPATH, cp); /* - * Act on -help NOW, so we only output the help and exit. - FM + * Act on -version, -trace and -trace-mask NOW. */ for (i = 1; i < argc; i++) { parse_arg(&argv[i], 1, &i); @@ -3308,11 +3308,11 @@ static Config_Type Arg_Table [] = "=MIMEname\ncharset for documents that don't specify it" ), PARSE_FUN( - "assume_local_charset", 4|NEED_FUNCTION_ARG,assume_local_charset_fun, + "assume_local_charset", 4|NEED_FUNCTION_ARG, assume_local_charset_fun, "=MIMEname\ncharset assumed for local files" ), PARSE_FUN( - "assume_unrec_charset", 4|NEED_FUNCTION_ARG,assume_unrec_charset_fun, + "assume_unrec_charset", 4|NEED_FUNCTION_ARG, assume_unrec_charset_fun, "=MIMEname\nuse this instead of unrecognized charsets" ), PARSE_FUN( @@ -3370,12 +3370,12 @@ outputs for -source dumps" ), #ifdef EXP_CMD_LOGGING PARSE_STR( - "cmd_log", 2|NEED_LYSTRING_ARG, lynx_cmd_logfile, - "=FILENAME\nlog keystroke commands to the given file" + "cmd_log", 2|NEED_LYSTRING_ARG, lynx_cmd_logfile, + "=FILENAME\nlog keystroke commands to the given file" ), PARSE_STR( - "cmd_script", 2|NEED_LYSTRING_ARG, lynx_cmd_script, - "=FILENAME\nread keystroke commands from the given file\n(see -cmd_log)" + "cmd_script", 2|NEED_LYSTRING_ARG, lynx_cmd_script, + "=FILENAME\nread keystroke commands from the given file\n(see -cmd_log)" ), #endif #ifdef USE_SLANG @@ -3400,7 +3400,7 @@ outputs for -source dumps" "=FILENAME\nspecifies a file to use to read cookies" ), PARSE_STR( - "cookie_save_file", 4|LYSTRING_ARG, LYCookieSaveFile, + "cookie_save_file", 4|LYSTRING_ARG, LYCookieSaveFile, "=FILENAME\nspecifies a file to use to store cookies" ), #endif /* USE_PERSISTENT_COOKIES */ @@ -3527,7 +3527,7 @@ soon as they are seen)" "send a HEAD request" ), PARSE_FUN( - "help", 1|FUNCTION_ARG, help_fun, + "help", 4|FUNCTION_ARG, help_fun, "print this usage message" ), PARSE_FUN( diff --git a/src/LYShowInfo.c b/src/LYShowInfo.c index f335a4a8..c24a3cd6 100644 --- a/src/LYShowInfo.c +++ b/src/LYShowInfo.c @@ -1,4 +1,4 @@ -/* $LynxId: LYShowInfo.c,v 1.66 2008/12/07 22:14:05 tom Exp $ */ +/* $LynxId: LYShowInfo.c,v 1.67 2008/12/25 14:35:50 tom Exp $ */ #include <HTUtils.h> #include <HTFile.h> #include <HTParse.h> @@ -210,7 +210,7 @@ int LYShowInfo(DocInfo *doc, buf[buf_size] = '\0'; } else { sprintf(buf, "%.*s", (int) sizeof(buf) - 1, - gettext("Unable to follow link")); + gettext("Unable to follow link")); } ADD_SS(gettext("Points to file:"), buf); } diff --git a/src/LYmktime.c b/src/LYmktime.c index 50caad3b..8116dad0 100644 --- a/src/LYmktime.c +++ b/src/LYmktime.c @@ -1,4 +1,4 @@ -/* $LynxId: LYmktime.c,v 1.7 2008/07/06 12:55:40 tom Exp $ */ +/* $LynxId: LYmktime.c,v 1.8 2008/12/25 00:42:09 tom Exp $ */ #include <LYStrings.h> #include <LYUtils.h> @@ -57,21 +57,36 @@ time_t LYmktime(char *string, BOOL absolute) { #if USE_PARSDATE - time_t result; + time_t result = 0; if (non_empty(string)) { - CTRACE((tfp, "LYmktime: Parsing '%s'\n", string)); - result = parsedate(string, 0); +#ifdef EBCDIC + int n; + char *copied = NULL; - if (!absolute) { - if ((time((time_t *) 0) - result) >= 0) - result = 0; - } - if (result != 0) { - CTRACE((tfp, "LYmktime: clock=%" PRI_time_t ", ctime=%s", - CAST_time_t(result), - ctime(&result))); + StrAllocCopy(copied, string); + if (copied != NULL) { + /* parsedate() expects ASCII input */ + for (n = 0; copied[n] != '\0'; ++n) + copied[n] = TOASCII(copied[n]); + string = copied; +#endif + CTRACE((tfp, "LYmktime: Parsing '%s'\n", string)); + result = parsedate(string, 0); + + if (!absolute) { + if ((time((time_t *) 0) - result) >= 0) + result = 0; + } + if (result != 0) { + CTRACE((tfp, "LYmktime: clock=%" PRI_time_t ", ctime=%s", + CAST_time_t(result), + ctime(&result))); + } +#ifdef EBCDIC + free(copied); } +#endif } return result; #else diff --git a/src/UCdomap.c b/src/UCdomap.c index 48c77400..bc2dccda 100644 --- a/src/UCdomap.c +++ b/src/UCdomap.c @@ -1,5 +1,5 @@ /* - * $LynxId: UCdomap.c,v 1.67 2007/07/30 19:43:05 tom Exp $ + * $LynxId: UCdomap.c,v 1.70 2008/12/26 00:10:51 tom Exp $ * * UCdomap.c * ========= @@ -24,8 +24,11 @@ #include <LYGlobalDefs.h> #include <UCdomap.h> #include <UCMap.h> +#include <UCAux.h> #include <UCDefs.h> #include <LYCharSets.h> +#include <LYStrings.h> +#include <LYUtils.h> #if defined(USE_LOCALE_CHARSET) && defined(HAVE_LANGINFO_CODESET) #include <langinfo.h> @@ -89,10 +92,14 @@ int auto_display_charset = -1; #endif static const char *UC_GNsetMIMEnames[4] = -{"iso-8859-1", "x-dec-graphics", "cp437", "x-transparent"}; +{ + "iso-8859-1", "x-dec-graphics", "cp437", "x-transparent" +}; static int UC_GNhandles[4] = -{-1, -1, -1, -1}; +{ + -1, -1, -1, -1 +}; /* * Some of the code below, and some of the comments, are left in for @@ -453,7 +460,7 @@ static int con_insert_unipair(u16 unicode, u16 fontpos, int fordefault) else uni_pagedir[n] = p1; if (!p1) - return -1; + return ucError; for (i = 0; i < 32; i++) { p1[i] = NULL; @@ -463,7 +470,7 @@ static int con_insert_unipair(u16 unicode, u16 fontpos, int fordefault) if (!(p2 = p1[n = (unicode >> 6) & 0x1f])) { p2 = p1[n] = (u16 *) malloc(64 * sizeof(u16)); if (!p2) - return -1; + return ucError; for (i = 0; i < 64; i++) { p2[i] = 0xffff; /* No glyph for this character (yet) */ @@ -494,7 +501,7 @@ static int con_insert_unipair_str(u16 unicode, const char *replace_str, else uni_pagedir_str[n] = p1; if (!p1) - return -1; + return ucError; for (i = 0; i < 32; i++) { p1[i] = NULL; @@ -506,7 +513,7 @@ static int con_insert_unipair_str(u16 unicode, const char *replace_str, p1[n] = (char **) malloc(64 * sizeof(char *)); if (!p1[n]) - return -1; + return ucError; p2 = (const char **) p1[n]; for (i = 0; i < 64; i++) { @@ -639,7 +646,7 @@ static int UC_con_set_unimap(int UC_charset_out_hndl, if (!UC_valid_UC_charset(UC_charset_out_hndl)) { CTRACE((tfp, "UC_con_set_unimap: Invalid charset handle %d.\n", UC_charset_out_hndl)); - return -1; + return ucError; } p = UCInfo[UC_charset_out_hndl].unitable; @@ -713,12 +720,12 @@ static int conv_uni_to_pc(long ucs, /* * Not a printable character. */ - return -1; + return ucError; } else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f)) { /* * Zero-width space. */ - return -2; + return ucZeroWidth; } else if ((ucs & ~UNI_DIRECT_MASK) == UNI_DIRECT_BASE) { /* * UNI_DIRECT_BASE indicates the start of the region in the @@ -731,11 +738,11 @@ static int conv_uni_to_pc(long ucs, if (usedefault) { if (!unidefault_contents_valid) - return -3; + return ucInvalidHash; p1 = unidefault_pagedir[ucs >> 11]; } else { if (!hashtable_contents_valid) - return -3; + return ucInvalidHash; p1 = uni_pagedir[ucs >> 11]; } @@ -748,7 +755,7 @@ static int conv_uni_to_pc(long ucs, /* * Not found. */ - return -4; + return ucNotFound; } /* @@ -777,21 +784,21 @@ static int conv_uni_to_str(char *outbuf, /* * Not a printable character. */ - return -1; + return ucError; } else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f)) { /* * Zero-width space. */ - return -2; + return ucZeroWidth; } if (usedefault) { if (!unidefault_str_contents_valid) - return -3; + return ucInvalidHash; p1 = unidefault_pagedir_str[ucs >> 11]; } else { if (!hashtable_str_contents_valid) - return -3; + return ucInvalidHash; p1 = uni_pagedir_str[ucs >> 11]; } @@ -805,7 +812,7 @@ static int conv_uni_to_str(char *outbuf, /* * Not found. */ - return -4; + return ucNotFound; } int UCInitialized = 0; @@ -837,7 +844,7 @@ int UCTransUniChar(long unicode, if (LYCharSet_UC[charset_out].codepage < 0) return (unicode < 128) ? (int) unicode : LYCharSet_UC[charset_out].codepage; if ((UChndl_out = default_UChndl) < 0) - return -12; + return ucCannotOutput; isdefault = 1; } else { isdefault = UCInfo[UChndl_out].replacedesc.isdefault; @@ -886,13 +893,13 @@ int UCTransUniCharStr(char *outbuf, const u16 *ut; if (buflen < 2) - return -13; + return ucBufferTooSmall; if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) { if (LYCharSet_UC[charset_out].codepage < 0) return LYCharSet_UC[charset_out].codepage; if ((UChndl_out = default_UChndl) < 0) - return -12; + return ucCannotOutput; isdefault = 1; } else { isdefault = UCInfo[UChndl_out].replacedesc.isdefault; @@ -938,8 +945,8 @@ int UCTransUniCharStr(char *outbuf, } if (isdefault || trydefault) { #ifdef EXP_JAPANESEUTF8_SUPPORT - if ((strcmp(LYCharSet_UC[charset_out].MIMEname, "shift_jis") == 0) || - (strcmp(LYCharSet_UC[charset_out].MIMEname, "euc-jp") == 0)) { + if (LYCharSet_UC[charset_out].codepage == 0 && + LYCharSet_UC[charset_out].codepoints == 0) { iconv_t cd; char str[3], *pin, *pout; size_t inleft, outleft; @@ -951,22 +958,41 @@ int UCTransUniCharStr(char *outbuf, pin = str; inleft = 2; pout = outbuf, outleft = buflen; + /* + * Try TRANSLIT first, since it is an extension which can provide + * translations when there is no available exact translation to + * the target character set. + */ HTSprintf0(&tocode, "%s//TRANSLIT", LYCharSet_UC[charset_out].MIMEname); cd = iconv_open(tocode, "UTF-16BE"); + if (cd == (iconv_t) - 1) { + /* + * Try again, without TRANSLIT + */ + HTSprintf0(&tocode, "%s", LYCharSet_UC[charset_out].MIMEname); + cd = iconv_open(tocode, "UTF-16BE"); + + if (cd == (iconv_t) - 1) { + CTRACE((tfp, + "Warning: Cannot transcode form charset %s to %s!\n", + "UTF-16BE", tocode)); + } + } FREE(tocode); - if (cd == (iconv_t) (-1)) - cd = iconv_open(LYCharSet_UC[charset_out].MIMEname, "UTF-16BE"); - rc = iconv(cd, (ICONV_CONST char **) &pin, &inleft, &pout, &outleft); - iconv_close(cd); - if ((pout - outbuf) == 3) { - CTRACE((tfp, - "It seems to be a JIS X 0201 code(%ld). Not supported.\n", unicode)); - pin = str; - inleft = 2; - pout = outbuf, outleft = buflen; - } else if (rc >= 0) { - *pout = '\0'; - return (strlen(outbuf)); + + if (cd != (iconv_t) - 1) { + rc = iconv(cd, (ICONV_CONST char **) &pin, &inleft, &pout, &outleft); + iconv_close(cd); + if ((pout - outbuf) == 3) { + CTRACE((tfp, + "It seems to be a JIS X 0201 code(%ld). Not supported.\n", unicode)); + pin = str; + inleft = 2; + pout = outbuf, outleft = buflen; + } else if (rc >= 0) { + *pout = '\0'; + return (strlen(outbuf)); + } } } #endif @@ -994,7 +1020,7 @@ int UCTransUniCharStr(char *outbuf, } return rc; } - return -4; + return ucNotFound; } static int UC_lastautoGN = 0; @@ -1042,32 +1068,30 @@ int UCTransChar(char ch_in, int charset_out) { int unicode, Gn; - int rc = -4; + int rc = ucNotFound; int UChndl_in, UChndl_out; int isdefault, trydefault = 0; const u16 *ut; int upd = 0; -#ifndef UC_NO_SHORTCUTS if (charset_in == charset_out) return UCH(ch_in); -#endif /* UC_NO_SHORTCUTS */ if (charset_in < 0) - return -11; + return ucCannotConvert; if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) - return -11; + return ucCannotConvert; if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) { if (LYCharSet_UC[charset_out].codepage < 0) return LYCharSet_UC[charset_out].codepage; if ((UChndl_out = default_UChndl) < 0) - return -12; + return ucCannotOutput; isdefault = 1; } else { isdefault = UCInfo[UChndl_out].replacedesc.isdefault; trydefault = UCInfo[UChndl_out].replacedesc.trydefault; } if (!UCInfo[UChndl_in].num_uni) - return -11; + return ucCannotConvert; if ((Gn = UCInfo[UChndl_in].GN) < 0) { Gn = UC_MapGN(UChndl_in, 0); upd = 1; @@ -1127,27 +1151,58 @@ long int UCTransJPToUni(char *inbuf, if ((ilen == 0) && (olen == 0)) { return (((unsigned char) outbuf[0]) << 8) + (unsigned char) outbuf[1]; } - return -11; + return ucCannotConvert; } #endif +/* + * Translate a character to Unicode. If additional bytes are needed, this + * returns ucNeedMore, based on its internal state. To reset the state, + * call this with charset_in < 0. + */ long int UCTransToUni(char ch_in, int charset_in) { + static char buffer[10]; + static unsigned inx = 0; + int unicode, Gn; - unsigned char ch_iu; + unsigned char ch_iu = UCH(ch_in); int UChndl_in; - ch_iu = UCH(ch_in); -#ifndef UC_NO_SHORTCUTS - if (charset_in == LATIN1) + /* + * Reset saved-state. + */ + if (charset_in < 0) { + inx = 0; + return ucCannotConvert; + } else if (charset_in == LATIN1) { return ch_iu; + } else if (charset_in == UTF8_handle) { + if (is8bits(ch_in)) { + unsigned need; + char *ptr; + + buffer[inx++] = ch_iu; + buffer[inx] = '\0'; + need = utf8_length(TRUE, buffer); + if (need && (need + 1) == inx) { + inx = 0; + ptr = buffer; + return UCGetUniFromUtf8String(&ptr); + } else if (inx < sizeof(buffer) - 1) { + return ucNeedMore; + } else { + inx = 0; + } + } else { + inx = 0; + } + } #ifdef EXP_JAPANESEUTF8_SUPPORT if ((strcmp(LYCharSet_UC[charset_in].MIMEname, "shift_jis") == 0) || (strcmp(LYCharSet_UC[charset_in].MIMEname, "euc-jp") == 0)) { - static char buffer[3]; char obuffer[3], *pin, *pout; - static int inx = 0; size_t rc, ilen, olen; iconv_t cd; @@ -1156,14 +1211,14 @@ long int UCTransToUni(char ch_in, ilen = olen = 2; if (strcmp(LYCharSet_UC[charset_in].MIMEname, "shift_jis") == 0) { if (inx == 0) { - if (IS_SJIS_HI1((unsigned char) ch_in) || - IS_SJIS_HI2((unsigned char) ch_in)) { + if (IS_SJIS_HI1(ch_iu) || + IS_SJIS_HI2(ch_iu)) { buffer[0] = ch_in; inx = 1; - return -11; + return ucNeedMore; } } else { - if (IS_SJIS_LO((unsigned char) ch_in)) { + if (IS_SJIS_LO(ch_iu)) { buffer[1] = ch_in; buffer[2] = 0; @@ -1172,21 +1227,20 @@ long int UCTransToUni(char ch_in, iconv_close(cd); inx = 0; if ((ilen == 0) && (olen == 0)) { - return (((unsigned char) obuffer[0]) << 8) - + (unsigned char) obuffer[1]; + return (UCH(obuffer[0]) << 8) + UCH(obuffer[1]); } } } } if (strcmp(LYCharSet_UC[charset_in].MIMEname, "euc-jp") == 0) { if (inx == 0) { - if (IS_EUC_HI((unsigned char) ch_in)) { + if (IS_EUC_HI(ch_iu)) { buffer[0] = ch_in; inx = 1; - return -11; + return ucNeedMore; } } else { - if (IS_EUC_LOX((unsigned char) ch_in)) { + if (IS_EUC_LOX(ch_iu)) { buffer[1] = ch_in; buffer[2] = 0; @@ -1195,8 +1249,7 @@ long int UCTransToUni(char ch_in, iconv_close(cd); inx = 0; if ((ilen == 0) && (olen == 0)) { - return (((unsigned char) obuffer[0]) << 8) - + (unsigned char) obuffer[1]; + return (UCH(obuffer[0]) << 8) + UCH(obuffer[1]); } } } @@ -1204,27 +1257,27 @@ long int UCTransToUni(char ch_in, inx = 0; } #endif - if (UCH(ch_in) < 128 && UCH(ch_in) >= 32) + if (ch_iu < 128 && ch_iu >= 32) return ch_iu; -#endif /* UC_NO_SHORTCUTS */ - if (charset_in < 0) - return -11; - if (UCH(ch_in) < 32 && - LYCharSet_UC[charset_in].enc != UCT_ENC_8BIT_C0) + + if (ch_iu < 32 && + LYCharSet_UC[charset_in].enc != UCT_ENC_8BIT_C0) { /* * Don't translate C0 chars except for specific charsets. */ return ch_iu; - if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) - return -11; - if (!UCInfo[UChndl_in].num_uni) - return -11; + } else if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) { + return ucCannotConvert; + } else if (!UCInfo[UChndl_in].num_uni) { + return ucCannotConvert; + } + if ((Gn = UCInfo[UChndl_in].GN) < 0) { Gn = UC_MapGN(UChndl_in, 1); } UC_translate = set_translate(Gn); - unicode = UC_translate[UCH(ch_in)]; + unicode = UC_translate[ch_iu]; return unicode; } @@ -1234,29 +1287,27 @@ int UCReverseTransChar(char ch_out, int charset_out) { int Gn; - int rc = -1; + int rc = ucError; int UChndl_in, UChndl_out; int isdefault; int i_ch = UCH(ch_out); const u16 *ut; -#ifndef UC_NO_SHORTCUTS if (charset_in == charset_out) return UCH(ch_out); -#endif /* UC_NO_SHORTCUTS */ if (charset_in < 0) - return -11; + return ucCannotConvert; if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) - return -11; + return ucCannotConvert; if (!UCInfo[UChndl_in].num_uni) - return -11; + return ucCannotConvert; if (charset_out < 0) - return -12; + return ucCannotOutput; if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) { if (LYCharSet_UC[charset_out].codepage < 0) return LYCharSet_UC[charset_out].codepage; if ((UChndl_out = default_UChndl) < 0) - return -12; + return ucCannotOutput; isdefault = 1; } else { isdefault = UCInfo[UChndl_out].replacedesc.isdefault; @@ -1304,25 +1355,23 @@ int UCTransCharStr(char *outbuf, int upd = 0; if (buflen < 2) - return -13; -#ifndef UC_NO_SHORTCUTS + return ucBufferTooSmall; if (chk_single_flag && charset_in == charset_out) { outbuf[0] = ch_in; outbuf[1] = '\0'; return 1; } -#endif /* UC_NO_SHORTCUTS */ if (charset_in < 0) - return -11; + return ucCannotConvert; if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) - return -11; + return ucCannotConvert; if (!UCInfo[UChndl_in].num_uni) - return -11; + return ucCannotConvert; if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) { if (LYCharSet_UC[charset_out].codepage < 0) return LYCharSet_UC[charset_out].codepage; if ((UChndl_out = default_UChndl) < 0) - return -12; + return ucCannotOutput; isdefault = 1; } else { isdefault = UCInfo[UChndl_out].replacedesc.isdefault; @@ -1406,7 +1455,7 @@ int UCTransCharStr(char *outbuf, } return rc; } - return -4; + return ucNotFound; } static int UC_FindGN_byMIME(const char *UC_MIMEcharset) @@ -1418,7 +1467,7 @@ static int UC_FindGN_byMIME(const char *UC_MIMEcharset) return i; } } - return -1; + return ucError; } int UCGetRawUniMode_byLYhndl(int i) @@ -1437,7 +1486,7 @@ static int getLYhndl_byCP(const char *prefix, const char *codepage) { static int nested; - int result = -1; + int result = ucError; if (!nested++) { char *cptmp = NULL; @@ -1464,7 +1513,7 @@ int UCGetLYhndl_byMIME(const char *value) if (!value || !(*value)) { CTRACE((tfp, "UCGetLYhndl_byMIME: NULL argument instead of MIME name.\n")); - return -1; + return ucError; } for (i = 0; @@ -1488,14 +1537,19 @@ int UCGetLYhndl_byMIME(const char *value) return UCGetLYhndl_byMIME("utf-8"); } #endif + if (!strncasecomp(value, "iso", 3) && !strncmp(value + 3, "8859", 4)) { + return getLYhndl_byCP("iso-", value + 3); + } #if !NO_CHARSET_euc_jp - if (!strcasecomp(value, "x-euc-jp")) { + if (!strcasecomp(value, "x-euc-jp") || + !strcasecomp(value, "eucjp")) { return UCGetLYhndl_byMIME("euc-jp"); } #endif #if !NO_CHARSET_shift_jis if ((!strcasecomp(value, "x-shift-jis")) || - (!strcasecomp(value, "x-sjis"))) { + (!strcasecomp(value, "x-sjis")) || + (!strcasecomp(value, "pck"))) { return UCGetLYhndl_byMIME("shift_jis"); } #endif @@ -1542,6 +1596,11 @@ int UCGetLYhndl_byMIME(const char *value) return UCGetLYhndl_byMIME("windows-1252"); } #endif +#if !NO_CHARSET_windows_1251 + if (!strcasecomp(value, "ansi-1251")) { + return UCGetLYhndl_byMIME("windows-1251"); + } +#endif #if !NO_CHARSET_windows_1250 if (!strcasecomp(value, "iso-8859-2-windows-latin-2") || !strcasecomp(value, "cp1250") || @@ -1589,7 +1648,7 @@ int UCGetLYhndl_byMIME(const char *value) /* no more synonyms if come here... */ CTRACE((tfp, "UCGetLYhndl_byMIME: unrecognized MIME name \"%s\"\n", value)); - return -1; /* returns -1 if no charset found by that MIME name */ + return ucError; /* returns -1 if no charset found by that MIME name */ } /* @@ -1642,7 +1701,7 @@ static const char **UC_setup_LYCharSets_repl(int UC_charset_in_hndl, const char **p; char **prepl; const u16 *pp; - char **tp; + const char **tp; const char *s7; const char *s8; size_t i; @@ -1653,7 +1712,7 @@ static const char **UC_setup_LYCharSets_repl(int UC_charset_in_hndl, /* * Create a temporary table for reverse lookup of latin1 codes: */ - tp = (char **) malloc(96 * sizeof(char *)); + tp = (const char **) malloc(96 * sizeof(char *)); if (!tp) return NULL; @@ -1698,7 +1757,7 @@ static const char **UC_setup_LYCharSets_repl(int UC_charset_in_hndl, list = UCInfo[UC_charset_in_hndl].replacedesc.entries; while (ct--) { if ((k = list->unicode) >= 160 && k < 256) { - tp[k - 160] = (char *) list->replace_str; + tp[k - 160] = list->replace_str; } list++; } @@ -1813,7 +1872,7 @@ static int UC_Register_with_LYCharSets(int s, CTRACE((tfp, "UC_Register_with_LYCharSets: Too many. Ignoring %s/%s.", UC_MIMEcharset, UC_LYNXcharset)); - return -1; + return ucError; } /* * Add to LYCharSets.c lists. @@ -1962,7 +2021,7 @@ static int UC_NoUctb_Register_with_LYCharSets(const char *UC_MIMEcharset, for (i = 0; i < MAXCHARSETS && LYchar_set_names[i] && LYhndl < 0; i++) { if (LYCharSet_UC[i].MIMEname && !strcmp(UC_MIMEcharset, LYCharSet_UC[i].MIMEname)) { - return -1; + return ucError; } } @@ -1971,7 +2030,7 @@ static int UC_NoUctb_Register_with_LYCharSets(const char *UC_MIMEcharset, CTRACE((tfp, "UC_NoUctb_Register_with_LYCharSets: Too many. Ignoring %s/%s.", UC_MIMEcharset, UC_LYNXcharset)); - return -1; + return ucError; } /* * Add to LYCharSets.c lists. @@ -2027,7 +2086,7 @@ static void UC_Charset_NoUctb_Setup(const char *UC_MIMEcharset, * be returned immediately by UCTrans* functions. */ if (!trydefault && codepage == 0) - codepage = -12; /* if not already set; any negative should do. */ + codepage = ucCannotOutput; /* if not already set; any negative should do. */ UC_NoUctb_Register_with_LYCharSets(UC_MIMEcharset, UC_LYNXcharset, lowest_eight, @@ -2076,7 +2135,7 @@ static int CpOrdinal(const unsigned long cp, const int other) s = i; } if (s < 0) - return -1; + return ucError; /* Store the "real" charset info */ real_charsets[other != 0] = UCGetLYhndl_byMIME(mimeName); /* Duplicate the record. */ diff --git a/src/makefile.in b/src/makefile.in index d990cd97..89bdf027 100644 --- a/src/makefile.in +++ b/src/makefile.in @@ -1,4 +1,4 @@ -# $LynxId: makefile.in,v 1.53 2008/07/06 13:28:03 tom Exp $ +# $LynxId: makefile.in,v 1.54 2008/12/24 18:11:29 tom Exp $ # template-makefile for Lynx src directory SHELL = @CONFIG_SHELL@ @@ -34,6 +34,8 @@ BUILD_CFLAGS = @BUILD_CFLAGS@ BUILD_CPPFLAGS = @BUILD_CPPFLAGS@ @DEFS@ BUILD_EXEEXT = @BUILD_EXEEXT@ +YACC = @YACC@ + LIBS = @LIBS@ $(RESOLVLIB) $(WAISLIB) $(SITE_LIBS) LDFLAGS = @LDFLAGS@ @@ -68,7 +70,7 @@ COMPRESS_PROG =@COMPRESS_PROG@ COMPRESS_EXT =@COMPRESS_EXT@ CHARTRANS_OBJS = UCdomap$o UCAux$o UCAuto$o -OBJS = \ +OBJS = \ LYClean$o LYShowInfo$o LYEdit$o LYStrings$o LYMail$o \ HTAlert$o GridText$o LYGetFile$o LYMain$o LYMainLoop$o \ LYCurses$o LYBookmark$o LYmktime$o LYUtils$o LYOptions$o \ @@ -78,9 +80,12 @@ OBJS = \ LYLeaks$o LYexit$o LYJump$o LYList$o LYCgi$o \ LYTraversal$o LYEditmap$o LYCharSets$o LYCharUtils$o \ LYMap$o LYCookie$o LYStyle$o LYHash$o LYPrettySrc$o \ - TRSTable$o parsdate$o $(CHARTRANS_OBJS) @EXTRA_OBJS@ @LIBOBJS@ + TRSTable$o $(CHARTRANS_OBJS) @EXTRA_OBJS@ @LIBOBJS@ + +GEN_OBJS = parsdate$o -C_SRC = $(OBJS:$o=.c) +C_SRC = $(OBJS:$o=.c) +GEN_SRC = $(GEN_OBJS:$o=.c) all: lynx$x @@ -94,9 +99,9 @@ all: lynx$x @RULE_CC@ @ECHO_CC@$(CPP) -C $(CPP_OPTS) $(srcdir)/$*.c >$@ -lynx$x: message do_chartrans_stuff $(top_builddir)/LYHelp.h $(OBJS) $(WWWLIB) +lynx$x: message do_chartrans_stuff $(top_builddir)/LYHelp.h $(OBJS) $(GEN_OBJS) $(WWWLIB) @echo "Linking and creating Lynx executable" - $(CC) $(CC_OPTS) $(LDFLAGS) -o $@ $(OBJS) $(WWWLIB) $(INTLLIB) $(LDFLAGS) $(LIBS) + $(CC) $(CC_OPTS) $(LDFLAGS) -o $@ $(OBJS) $(GEN_OBJS) $(WWWLIB) $(INTLLIB) $(LDFLAGS) $(LIBS) @echo "Copying Lynx executable into top-level directory" rm -f $(top_builddir)/$@ cp $@ $(top_builddir)/ @@ -119,6 +124,7 @@ lint: clean: rm -f lynx$x core *.core *.leaks *.i *$o *.bak tags TAGS test_* + test -f parsdate.y || rm -f parsdate.c cd chrtrans && $(MAKE) clean tags: @@ -207,11 +213,19 @@ test_mktime: LYmktime.c parsdate.o $(CC) -o $@ $(CC_OPTS) -DTEST_DRIVER LYmktime.c parsdate.o # update generated source -parsdate.c : parsdate.y - yacc parsdate.y +parsdate.c : $(srcdir)/parsdate.y + $(YACC) $(srcdir)/parsdate.y mv y.tab.c $@ +# allow for regenerating parsdate.c, e.g., for non-ASCII systems, while +# handling configure --srcdir option. +parsdate$o: parsdate.c + @-rm -f $@ + -test -f parsdate.c && $(CC) $(CC_OPTS) -c parsdate.c + -test -f parsdate.c || $(CC) $(CC_OPTS) -c $(srcdir)/parsdate.c + test -f $@ + depend : $(TABLES) - makedepend -fmakefile -- $(CC_OPTS) -- $(C_SRC) + makedepend -fmakefile -- $(CC_OPTS) -- $(C_SRC) $(GEN_SRC) # DO NOT DELETE THIS LINE -- make depend depends on it. diff --git a/src/parsdate.c b/src/parsdate.c index e95869f5..bb692ee7 100644 --- a/src/parsdate.c +++ b/src/parsdate.c @@ -8,7 +8,7 @@ static const char yysccsid[] = "@(#)yaccpar 1.9 (Berkeley) 02/21/93"; #define YYBYACC 1 #define YYMAJOR 1 #define YYMINOR 9 -#define YYPATCH 20080827 +#define YYPATCH 20081224 #define YYEMPTY (-1) #define yyclearin (yychar = YYEMPTY) @@ -31,9 +31,9 @@ extern int YYPARSE_DECL(); static int yygrowstack(void); #define YYPREFIX "yy" -#line 2 "parsdate.y" +#line 2 "./parsdate.y" /* - * $LynxId: parsdate.c,v 1.4 2008/09/23 23:13:34 tom Exp $ + * $LynxId: parsdate.c,v 1.6 2008/12/24 21:13:01 tom Exp $ * * This module is adapted and extended from tin, to use for LYmktime(). * @@ -73,7 +73,16 @@ static int yygrowstack(void); */ #define ENDOF(array) (&array[ARRAY_SIZE(array)]) -#define CTYPE(isXXXXX, c) (((unsigned char)(c) < 128) && isXXXXX(((int)c))) +#ifdef EBCDIC +#define TO_ASCII(c) TOASCII(c) +#define TO_LOCAL(c) FROMASCII(c) +#else +#define TO_ASCII(c) (c) +#define TO_LOCAL(c) (c) +#endif + +#define IS7BIT(x) ((unsigned) TO_ASCII(x) < 128) +#define CTYPE(isXXXXX, c) (IS7BIT(c) && isXXXXX(((unsigned char)c))) typedef char *PD_STRING; @@ -96,7 +105,6 @@ extern int date_parse(void); #define LPAREN '(' #define RPAREN ')' -#define IS7BIT(x) ((unsigned int)(x) < 0200) /* @@ -156,12 +164,12 @@ date_error(const char GCC_UNUSED *s) /*NOTREACHED*/ } -#line 128 "parsdate.y" +#line 136 "./parsdate.y" typedef union { time_t Number; enum _MERIDIAN Meridian; } YYSTYPE; -#line 164 "y.tab.c" +#line 172 "y.tab.c" #define tDAY 257 #define tDAYZONE 258 #define tMERIDIAN 259 @@ -371,7 +379,7 @@ static short *yyss; static short *yysslim; static YYSTYPE *yyvs; static unsigned yystacksize; -#line 350 "parsdate.y" +#line 358 "./parsdate.y" /* ** An entry in the lexical lookup table. @@ -553,8 +561,7 @@ ToSeconds( if (Meridian == MER24) { if (Hours < 0 || Hours > 23) return -1; - } - else { + } else { if (Hours < 1 || Hours > 12) return -1; if (Hours == 12) @@ -614,9 +621,10 @@ Convert( } Julian = Day - 1 + (Year - EPOCH) * 365; - for (yp = LeapYears; yp < ENDOF(LeapYears); yp++, Julian++) + for (yp = LeapYears; yp < ENDOF(LeapYears); yp++, Julian++) { if (Year <= *yp) break; + } for (i = 1; i < Month; i++) Julian += *++mp; Julian *= SECSPERDAY; @@ -680,7 +688,7 @@ LookupWord( c = p[0]; /* See if we have an abbreviation for a month. */ - if (length == 3 || (length == 4 && p[3] == '.')) + if (length == 3 || (length == 4 && p[3] == '.')) { for (tp = MonthDayTable; tp < ENDOF(MonthDayTable); tp++) { q = tp->name; if (c == q[0] && p[1] == q[1] && p[2] == q[2]) { @@ -688,48 +696,54 @@ LookupWord( return tp->type; } } - else - for (tp = MonthDayTable; tp < ENDOF(MonthDayTable); tp++) + } else { + for (tp = MonthDayTable; tp < ENDOF(MonthDayTable); tp++) { if (c == tp->name[0] && strcmp(p, tp->name) == 0) { yylval.Number = tp->value; return tp->type; } + } + } /* Try for a timezone. */ - for (tp = TimezoneTable; tp < ENDOF(TimezoneTable); tp++) + for (tp = TimezoneTable; tp < ENDOF(TimezoneTable); tp++) { if (c == tp->name[0] && p[1] == tp->name[1] && strcmp(p, tp->name) == 0) { yylval.Number = tp->value; return tp->type; } + } if (strcmp(buff, "dst") == 0) return tDST; /* Try the units table. */ - for (tp = UnitsTable; tp < ENDOF(UnitsTable); tp++) + for (tp = UnitsTable; tp < ENDOF(UnitsTable); tp++) { if (c == tp->name[0] && strcmp(p, tp->name) == 0) { yylval.Number = tp->value; return tp->type; } + } /* Strip off any plural and try the units table again. */ if (--length > 0 && p[length] == 's') { p[length] = '\0'; - for (tp = UnitsTable; tp < ENDOF(UnitsTable); tp++) + for (tp = UnitsTable; tp < ENDOF(UnitsTable); tp++) { if (c == tp->name[0] && strcmp(p, tp->name) == 0) { p[length] = 's'; yylval.Number = tp->value; return tp->type; } + } p[length] = 's'; } length++; /* Drop out any periods. */ - for (p = buff, q = (PD_STRING)buff; *q; q++) + for (p = buff, q = (PD_STRING)buff; *q; q++) { if (*q != '.') *p++ = *q; + } *p = '\0'; /* Try the meridians. */ @@ -747,12 +761,13 @@ LookupWord( /* If we saw any periods, try the timezones again. */ if (p - buff != length) { c = buff[0]; - for (p = buff, tp = TimezoneTable; tp < ENDOF(TimezoneTable); tp++) + for (p = buff, tp = TimezoneTable; tp < ENDOF(TimezoneTable); tp++) { if (c == tp->name[0] && p[1] == tp->name[1] && strcmp(p, tp->name) == 0) { yylval.Number = tp->value; return tp->type; } + } } /* Unknown word -- assume GMT timezone. */ @@ -761,34 +776,52 @@ LookupWord( } +/* + * This returns characters as-is (the ones that are not part of some token), + * and codes greater than 256 (the token values). + * + * yacc generates tables that may use the character value. In particular, + * byacc's yycheck[] table contains integer values for the expected codes from + * this function, which (unless byacc is run locally) are ASCII codes. + * + * The TO_LOCAL() function assumes its input is in ASCII, and the output is + * whatever native encoding is used on the machine, e.g., EBCDIC. + * + * The TO_ASCII() function is the inverse of TO_LOCAL(). + */ static int date_lex(void) { - int c; + int c; char *p; - char buff[20]; - int sign; - int i; - int nesting; + char buff[20]; + int sign; + int i; + int nesting; for(;;) { /* Get first character after the whitespace. */ for(;;) { - while (CTYPE(isspace, *yyInput)) + while (CTYPE(isspace, TO_LOCAL(*yyInput))) yyInput++; - c = *yyInput; + c = TO_LOCAL(*yyInput); /* Ignore RFC 822 comments, typically time zone names. */ if (c != LPAREN) break; - for (nesting = 1; (c = *++yyInput) != RPAREN || --nesting; ) - if (c == LPAREN) + for (nesting = 1; + (c = TO_LOCAL(*++yyInput)) != RPAREN || --nesting; + ) { + if (c == LPAREN) { nesting++; - else if (!IS7BIT(c) || c == '\0' || c == '\r' - || (c == '\\' && ((c = *++yyInput) == '\0' || !IS7BIT(c)))) { + } else if (!IS7BIT(c) || c == '\0' || c == '\r' + || (c == '\\' + && ((c = TO_LOCAL(*++yyInput)) == '\0' + || !IS7BIT(c)))) { /* Lexical error: bad comment. */ - return '?'; + return TO_ASCII('?'); } + } yyInput++; } @@ -797,16 +830,23 @@ date_lex(void) if (c == '-' || c == '+') { sign = c == '-' ? -1 : 1; yyInput++; - if (!CTYPE(isdigit, *yyInput)) { + if (!CTYPE(isdigit, TO_LOCAL(*yyInput))) { /* Return the isolated plus or minus sign. */ --yyInput; return *yyInput++; } - } - else + } else { sign = 0; - for (i = 0; (c = *yyInput++) != '\0' && CTYPE(isdigit, c); ) - i = 10 * i + c - '0'; + } + for (p = buff; + (c = TO_LOCAL(*yyInput++)) != '\0' && CTYPE(isdigit, c); + ) { + if (p < &buff[sizeof buff - 1]) + *p++ = c; + } + *p = '\0'; + i = atoi(buff); + yyInput--; yylval.Number = sign < 0 ? -i : i; return sign ? tSNUMBER : tUNUMBER; @@ -814,9 +854,12 @@ date_lex(void) /* A word? */ if (CTYPE(isalpha, c)) { - for (p = buff; (c = *yyInput++) == '.' || CTYPE(isalpha, c); ) + for (p = buff; + (c = TO_LOCAL(*yyInput++)) == '.' || CTYPE(isalpha, c); + ) { if (p < &buff[sizeof buff - 1]) *p++ = CTYPE(isupper, c) ? tolower(c) : c; + } *p = '\0'; yyInput--; return LookupWord(buff, p - buff); @@ -943,7 +986,7 @@ parsedate( * from the error return value. (Alternately could set errno on error.) */ return Start == -1 ? 0 : Start; } -#line 946 "y.tab.c" +#line 989 "y.tab.c" /* allocate initial stack or double stack size, up to YYMAXDEPTH */ static int yygrowstack(void) { @@ -1129,7 +1172,7 @@ yyreduce: switch (yyn) { case 3: -#line 146 "parsdate.y" +#line 154 "./parsdate.y" { yyHaveTime++; #if defined(lint) @@ -1141,27 +1184,27 @@ case 3: } break; case 4: -#line 155 "parsdate.y" +#line 163 "./parsdate.y" { yyHaveTime++; yyTimezone = yyvsp[0].Number; } break; case 5: -#line 159 "parsdate.y" +#line 167 "./parsdate.y" { yyHaveDate++; } break; case 6: -#line 162 "parsdate.y" +#line 170 "./parsdate.y" { yyHaveDate++; yyHaveTime++; } break; case 7: -#line 166 "parsdate.y" +#line 174 "./parsdate.y" { yyHaveDate++; yyHaveTime++; @@ -1169,13 +1212,13 @@ case 7: } break; case 8: -#line 171 "parsdate.y" +#line 179 "./parsdate.y" { yyHaveRel = 1; } break; case 9: -#line 176 "parsdate.y" +#line 184 "./parsdate.y" { if (yyvsp[-1].Number < 100) { yyHour = yyvsp[-1].Number; @@ -1190,7 +1233,7 @@ case 9: } break; case 10: -#line 188 "parsdate.y" +#line 196 "./parsdate.y" { yyHour = yyvsp[-3].Number; yyMinutes = yyvsp[-1].Number; @@ -1199,7 +1242,7 @@ case 10: } break; case 11: -#line 194 "parsdate.y" +#line 202 "./parsdate.y" { yyHour = yyvsp[-3].Number; yyMinutes = yyvsp[-1].Number; @@ -1209,7 +1252,7 @@ case 11: } break; case 12: -#line 201 "parsdate.y" +#line 209 "./parsdate.y" { yyHour = yyvsp[-5].Number; yyMinutes = yyvsp[-3].Number; @@ -1218,7 +1261,7 @@ case 12: } break; case 13: -#line 207 "parsdate.y" +#line 215 "./parsdate.y" { yyHour = yyvsp[-5].Number; yyMinutes = yyvsp[-3].Number; @@ -1229,28 +1272,28 @@ case 13: } break; case 14: -#line 217 "parsdate.y" +#line 225 "./parsdate.y" { yyval.Number = yyvsp[0].Number; yyDSTmode = DSToff; } break; case 15: -#line 221 "parsdate.y" +#line 229 "./parsdate.y" { yyval.Number = yyvsp[0].Number; yyDSTmode = DSTon; } break; case 16: -#line 225 "parsdate.y" +#line 233 "./parsdate.y" { yyTimezone = yyvsp[-1].Number; yyDSTmode = DSTon; } break; case 17: -#line 229 "parsdate.y" +#line 237 "./parsdate.y" { /* Only allow "GMT+300" and "GMT-0800" */ if (yyvsp[-1].Number != 0) { @@ -1261,14 +1304,14 @@ case 17: } break; case 18: -#line 237 "parsdate.y" +#line 245 "./parsdate.y" { yyval.Number = yyvsp[0].Number; yyDSTmode = DSToff; } break; case 19: -#line 243 "parsdate.y" +#line 251 "./parsdate.y" { int i; @@ -1290,14 +1333,14 @@ case 19: } break; case 20: -#line 264 "parsdate.y" +#line 272 "./parsdate.y" { yyMonth = yyvsp[-2].Number; yyDay = yyvsp[0].Number; } break; case 21: -#line 268 "parsdate.y" +#line 276 "./parsdate.y" { if (yyvsp[-4].Number > 100) { yyYear = yyvsp[-4].Number; @@ -1312,14 +1355,14 @@ case 21: } break; case 22: -#line 280 "parsdate.y" +#line 288 "./parsdate.y" { yyMonth = yyvsp[-1].Number; yyDay = yyvsp[0].Number; } break; case 23: -#line 284 "parsdate.y" +#line 292 "./parsdate.y" { yyMonth = yyvsp[-3].Number; yyDay = yyvsp[-2].Number; @@ -1327,14 +1370,14 @@ case 23: } break; case 24: -#line 289 "parsdate.y" +#line 297 "./parsdate.y" { yyDay = yyvsp[-1].Number; yyMonth = yyvsp[0].Number; } break; case 25: -#line 293 "parsdate.y" +#line 301 "./parsdate.y" { yyDay = yyvsp[-2].Number; yyMonth = yyvsp[-1].Number; @@ -1342,7 +1385,7 @@ case 25: } break; case 26: -#line 298 "parsdate.y" +#line 306 "./parsdate.y" { yyDay = yyvsp[-2].Number; yyMonth = yyvsp[-1].Number; @@ -1350,7 +1393,7 @@ case 26: } break; case 27: -#line 303 "parsdate.y" +#line 311 "./parsdate.y" { yyDay = yyvsp[-3].Number; yyMonth = yyvsp[-1].Number; @@ -1358,7 +1401,7 @@ case 27: } break; case 28: -#line 308 "parsdate.y" +#line 316 "./parsdate.y" { yyDay = yyvsp[-2].Number; yyMonth = -yyvsp[-1].Number; @@ -1368,7 +1411,7 @@ case 28: } break; case 29: -#line 317 "parsdate.y" +#line 325 "./parsdate.y" { yyMonth = yyvsp[-7].Number; yyDay = yyvsp[-6].Number; @@ -1379,42 +1422,42 @@ case 29: } break; case 30: -#line 327 "parsdate.y" +#line 335 "./parsdate.y" { yyRelSeconds += yyvsp[-1].Number * yyvsp[0].Number; } break; case 31: -#line 330 "parsdate.y" +#line 338 "./parsdate.y" { yyRelSeconds += yyvsp[-1].Number * yyvsp[0].Number; } break; case 32: -#line 333 "parsdate.y" +#line 341 "./parsdate.y" { yyRelMonth += yyvsp[-1].Number * yyvsp[0].Number; } break; case 33: -#line 336 "parsdate.y" +#line 344 "./parsdate.y" { yyRelMonth += yyvsp[-1].Number * yyvsp[0].Number; } break; case 34: -#line 341 "parsdate.y" +#line 349 "./parsdate.y" { yyval.Meridian = MER24; } break; case 35: -#line 344 "parsdate.y" +#line 352 "./parsdate.y" { yyval.Meridian = yyvsp[0].Meridian; } break; -#line 1419 "y.tab.c" +#line 1460 "y.tab.c" } yyssp -= yym; yystate = *yyssp; diff --git a/src/parsdate.y b/src/parsdate.y index 77cc7e94..e4e2c751 100644 --- a/src/parsdate.y +++ b/src/parsdate.y @@ -1,6 +1,6 @@ %{ /* - * $LynxId: parsdate.y,v 1.6 2008/09/23 23:13:34 tom Exp $ + * $LynxId: parsdate.y,v 1.10 2008/12/24 21:12:49 tom Exp $ * * This module is adapted and extended from tin, to use for LYmktime(). * @@ -40,7 +40,16 @@ */ #define ENDOF(array) (&array[ARRAY_SIZE(array)]) -#define CTYPE(isXXXXX, c) (((unsigned char)(c) < 128) && isXXXXX(((int)c))) +#ifdef EBCDIC +#define TO_ASCII(c) TOASCII(c) +#define TO_LOCAL(c) FROMASCII(c) +#else +#define TO_ASCII(c) (c) +#define TO_LOCAL(c) (c) +#endif + +#define IS7BIT(x) ((unsigned) TO_ASCII(x) < 128) +#define CTYPE(isXXXXX, c) (IS7BIT(c) && isXXXXX(((unsigned char)c))) typedef char *PD_STRING; @@ -63,7 +72,6 @@ extern int date_parse(void); #define LPAREN '(' #define RPAREN ')' -#define IS7BIT(x) ((unsigned int)(x) < 0200) /* @@ -528,8 +536,7 @@ ToSeconds( if (Meridian == MER24) { if (Hours < 0 || Hours > 23) return -1; - } - else { + } else { if (Hours < 1 || Hours > 12) return -1; if (Hours == 12) @@ -589,9 +596,10 @@ Convert( } Julian = Day - 1 + (Year - EPOCH) * 365; - for (yp = LeapYears; yp < ENDOF(LeapYears); yp++, Julian++) + for (yp = LeapYears; yp < ENDOF(LeapYears); yp++, Julian++) { if (Year <= *yp) break; + } for (i = 1; i < Month; i++) Julian += *++mp; Julian *= SECSPERDAY; @@ -655,7 +663,7 @@ LookupWord( c = p[0]; /* See if we have an abbreviation for a month. */ - if (length == 3 || (length == 4 && p[3] == '.')) + if (length == 3 || (length == 4 && p[3] == '.')) { for (tp = MonthDayTable; tp < ENDOF(MonthDayTable); tp++) { q = tp->name; if (c == q[0] && p[1] == q[1] && p[2] == q[2]) { @@ -663,48 +671,54 @@ LookupWord( return tp->type; } } - else - for (tp = MonthDayTable; tp < ENDOF(MonthDayTable); tp++) + } else { + for (tp = MonthDayTable; tp < ENDOF(MonthDayTable); tp++) { if (c == tp->name[0] && strcmp(p, tp->name) == 0) { yylval.Number = tp->value; return tp->type; } + } + } /* Try for a timezone. */ - for (tp = TimezoneTable; tp < ENDOF(TimezoneTable); tp++) + for (tp = TimezoneTable; tp < ENDOF(TimezoneTable); tp++) { if (c == tp->name[0] && p[1] == tp->name[1] && strcmp(p, tp->name) == 0) { yylval.Number = tp->value; return tp->type; } + } if (strcmp(buff, "dst") == 0) return tDST; /* Try the units table. */ - for (tp = UnitsTable; tp < ENDOF(UnitsTable); tp++) + for (tp = UnitsTable; tp < ENDOF(UnitsTable); tp++) { if (c == tp->name[0] && strcmp(p, tp->name) == 0) { yylval.Number = tp->value; return tp->type; } + } /* Strip off any plural and try the units table again. */ if (--length > 0 && p[length] == 's') { p[length] = '\0'; - for (tp = UnitsTable; tp < ENDOF(UnitsTable); tp++) + for (tp = UnitsTable; tp < ENDOF(UnitsTable); tp++) { if (c == tp->name[0] && strcmp(p, tp->name) == 0) { p[length] = 's'; yylval.Number = tp->value; return tp->type; } + } p[length] = 's'; } length++; /* Drop out any periods. */ - for (p = buff, q = (PD_STRING)buff; *q; q++) + for (p = buff, q = (PD_STRING)buff; *q; q++) { if (*q != '.') *p++ = *q; + } *p = '\0'; /* Try the meridians. */ @@ -722,12 +736,13 @@ LookupWord( /* If we saw any periods, try the timezones again. */ if (p - buff != length) { c = buff[0]; - for (p = buff, tp = TimezoneTable; tp < ENDOF(TimezoneTable); tp++) + for (p = buff, tp = TimezoneTable; tp < ENDOF(TimezoneTable); tp++) { if (c == tp->name[0] && p[1] == tp->name[1] && strcmp(p, tp->name) == 0) { yylval.Number = tp->value; return tp->type; } + } } /* Unknown word -- assume GMT timezone. */ @@ -736,34 +751,52 @@ LookupWord( } +/* + * This returns characters as-is (the ones that are not part of some token), + * and codes greater than 256 (the token values). + * + * yacc generates tables that may use the character value. In particular, + * byacc's yycheck[] table contains integer values for the expected codes from + * this function, which (unless byacc is run locally) are ASCII codes. + * + * The TO_LOCAL() function assumes its input is in ASCII, and the output is + * whatever native encoding is used on the machine, e.g., EBCDIC. + * + * The TO_ASCII() function is the inverse of TO_LOCAL(). + */ static int date_lex(void) { - int c; + int c; char *p; - char buff[20]; - int sign; - int i; - int nesting; + char buff[20]; + int sign; + int i; + int nesting; for(;;) { /* Get first character after the whitespace. */ for(;;) { - while (CTYPE(isspace, *yyInput)) + while (CTYPE(isspace, TO_LOCAL(*yyInput))) yyInput++; - c = *yyInput; + c = TO_LOCAL(*yyInput); /* Ignore RFC 822 comments, typically time zone names. */ if (c != LPAREN) break; - for (nesting = 1; (c = *++yyInput) != RPAREN || --nesting; ) - if (c == LPAREN) + for (nesting = 1; + (c = TO_LOCAL(*++yyInput)) != RPAREN || --nesting; + ) { + if (c == LPAREN) { nesting++; - else if (!IS7BIT(c) || c == '\0' || c == '\r' - || (c == '\\' && ((c = *++yyInput) == '\0' || !IS7BIT(c)))) { + } else if (!IS7BIT(c) || c == '\0' || c == '\r' + || (c == '\\' + && ((c = TO_LOCAL(*++yyInput)) == '\0' + || !IS7BIT(c)))) { /* Lexical error: bad comment. */ - return '?'; + return TO_ASCII('?'); } + } yyInput++; } @@ -772,16 +805,23 @@ date_lex(void) if (c == '-' || c == '+') { sign = c == '-' ? -1 : 1; yyInput++; - if (!CTYPE(isdigit, *yyInput)) { + if (!CTYPE(isdigit, TO_LOCAL(*yyInput))) { /* Return the isolated plus or minus sign. */ --yyInput; return *yyInput++; } - } - else + } else { sign = 0; - for (i = 0; (c = *yyInput++) != '\0' && CTYPE(isdigit, c); ) - i = 10 * i + c - '0'; + } + for (p = buff; + (c = TO_LOCAL(*yyInput++)) != '\0' && CTYPE(isdigit, c); + ) { + if (p < &buff[sizeof buff - 1]) + *p++ = c; + } + *p = '\0'; + i = atoi(buff); + yyInput--; yylval.Number = sign < 0 ? -i : i; return sign ? tSNUMBER : tUNUMBER; @@ -789,9 +829,12 @@ date_lex(void) /* A word? */ if (CTYPE(isalpha, c)) { - for (p = buff; (c = *yyInput++) == '.' || CTYPE(isalpha, c); ) + for (p = buff; + (c = TO_LOCAL(*yyInput++)) == '.' || CTYPE(isalpha, c); + ) { if (p < &buff[sizeof buff - 1]) *p++ = CTYPE(isupper, c) ? tolower(c) : c; + } *p = '\0'; yyInput--; return LookupWord(buff, p - buff); |