diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 1997-11-17 14:36:49 -0500 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 1997-11-17 14:36:49 -0500 |
commit | e47cfd5646f55de9688ff42df3055fd9c09b503f (patch) | |
tree | aab6f3c275e0f2ef58110654e4d87e5bc6676130 /src | |
parent | b63d287c6f3e67f8574ca2155c661288bc7dcd05 (diff) | |
download | lynx-snapshots-e47cfd5646f55de9688ff42df3055fd9c09b503f.tar.gz |
snapshot of project "lynx", label v2-7-1ac_0-95
Diffstat (limited to 'src')
50 files changed, 3655 insertions, 898 deletions
diff --git a/src/GridText.c b/src/GridText.c index 25ebf20b..2cfae408 100644 --- a/src/GridText.c +++ b/src/GridText.c @@ -6175,16 +6175,19 @@ PUBLIC char * HText_setLastOptionValue ARGS7( * No option items yet. */ if (text->last_anchor->input_field->type != F_OPTION_LIST_TYPE) { - if (TRACE) + if (TRACE) { fprintf(stderr, - "HText_setLastOptionValue: last input_field not OPTION_LIST_TYPE but %d, ignoring!\n", + "HText_setLastOptionValue: last input_field not F_OPTION_LIST_TYPE (%d)\n", + F_OPTION_LIST_TYPE); + fprintf(stderr, + " but %d, ignoring!\n", text->last_anchor->input_field->type); + } return NULL; } - new_ptr = text->last_anchor->input_field->select_list = - (OptionType *) calloc(1, sizeof(OptionType)); + (OptionType *)calloc(1, sizeof(OptionType)); if (new_ptr == NULL) outofmem(__FILE__, "HText_setLastOptionValue"); @@ -6197,7 +6200,7 @@ PUBLIC char * HText_setLastOptionValue ARGS7( number++; /* add one more */ op_ptr->next = new_ptr = - (OptionType *) calloc(1, sizeof(OptionType)); + (OptionType *)calloc(1, sizeof(OptionType)); if (new_ptr == NULL) outofmem(__FILE__, "HText_setLastOptionValue"); } @@ -8222,8 +8225,10 @@ PUBLIC void HText_setKcode ARGS3( !strcmp(charset, "euc-kr") || !strcmp(charset, "iso-2022-kr") || !strcmp(charset, "big5") || + !strcmp(charset, "cn-big5") || !strcmp(charset, "euc-cn") || !strcmp(charset, "gb2312") || + !strncmp(charset, "cn-gb", 5) || !strcmp(charset, "iso-2022-cn")) { text->kcode = EUC; } else { diff --git a/src/HTInit.c b/src/HTInit.c index 38856772..fdcd7d07 100644 --- a/src/HTInit.c +++ b/src/HTInit.c @@ -151,6 +151,7 @@ PUBLIC void HTFormatInit NOARGS HTSetConversion("text/sgml", "www/source", HTPlainPresent, 1.0, 0.0, 0.0, 0); HTSetConversion("text/sgml", "www/present", HTMLPresent, 1.0, 0.0, 0.0, 0); HTSetConversion("text/plain","www/present", HTPlainPresent, 1.0, 0.0, 0.0, 0); + HTSetConversion("text/plain","www/source", HTPlainPresent, 1.0, 0.0, 0.0, 0); HTSetConversion("text/html", "www/source", HTPlainPresent, 1.0, 0.0, 0.0, 0); HTSetConversion("text/html", "text/x-c", HTMLToC, 0.5, 0.0, 0.0, 0); HTSetConversion("text/html", "text/plain", HTMLToPlain, 0.5, 0.0, 0.0, 0); @@ -235,10 +236,12 @@ struct MailcapEntry { PRIVATE int ExitWithError PARAMS((char *txt)); PRIVATE int PassesTest PARAMS((struct MailcapEntry *mc)); -#define LINE_BUF_SIZE 2000 -#define TMPFILE_NAME_SIZE 256 +#define LINE_BUF_SIZE 2048 +#define TMPFILE_NAME_SIZE 256 -PRIVATE char *GetCommand ARGS2(char *,s, char **,t) +PRIVATE char *GetCommand ARGS2( + char *, s, + char **, t) { char *s2; int quoted = 0; @@ -257,7 +260,7 @@ PRIVATE char *GetCommand ARGS2(char *,s, char **,t) quoted = 0; } else { if (*s == ';') { - *s2 = 0; + *s2 = '\0'; return(++s); } if (*s == '\\') { @@ -268,12 +271,13 @@ PRIVATE char *GetCommand ARGS2(char *,s, char **,t) } } } - *s2 = 0; + *s2 = '\0'; return(NULL); } /* no leading or trailing space, all lower case */ -PRIVATE char *Cleanse ARGS1(char *,s) +PRIVATE char *Cleanse ARGS1( + char *, s) { char *tmp, *news; @@ -291,19 +295,21 @@ PRIVATE char *Cleanse ARGS1(char *,s) return(news); } -PRIVATE int ProcessMailcapEntry ARGS2(FILE *,fp, struct MailcapEntry *,mc) +PRIVATE int ProcessMailcapEntry ARGS2( + FILE *, fp, + struct MailcapEntry *, mc) { int i, j; size_t rawentryalloc = 2000, len; char *rawentry, *s, *t, *LineBuf; - LineBuf = malloc(LINE_BUF_SIZE); + LineBuf = (char *)malloc(LINE_BUF_SIZE); if (!LineBuf) ExitWithError("Out of memory"); - rawentry = malloc(1 + rawentryalloc); + rawentry = (char *)malloc(1 + rawentryalloc); if (!rawentry) ExitWithError("Out of memory"); - *rawentry = 0; + *rawentry = '\0'; while (fgets(LineBuf, LINE_BUF_SIZE, fp)) { if (LineBuf[0] == '#') continue; @@ -311,7 +317,7 @@ PRIVATE int ProcessMailcapEntry ARGS2(FILE *,fp, struct MailcapEntry *,mc) if (len == 0) continue; if (LineBuf[len-1] == '\n') - LineBuf[--len] = 0; + LineBuf[--len] = '\0'; if ((len + strlen(rawentry)) > rawentryalloc) { rawentryalloc += 2000; rawentry = realloc(rawentry, rawentryalloc+1); @@ -319,7 +325,7 @@ PRIVATE int ProcessMailcapEntry ARGS2(FILE *,fp, struct MailcapEntry *,mc) ExitWithError("Out of memory"); } if (len > 0 && LineBuf[len-1] == '\\') { - LineBuf[len-1] = 0; + LineBuf[len-1] = '\0'; strcat(rawentry, LineBuf); } else { strcat(rawentry, LineBuf); @@ -340,20 +346,20 @@ PRIVATE int ProcessMailcapEntry ARGS2(FILE *,fp, struct MailcapEntry *,mc) if (s == NULL) { if (TRACE) { fprintf(stderr, - "metamail: Ignoring invalid mailcap entry: %s\n", + "ProcessMailcapEntry: Ignoring invalid mailcap entry: %s\n", rawentry); } FREE(rawentry); return(0); } - *s++ = 0; + *s++ = '\0'; if (!strncasecomp(rawentry, "text/html", 9) || !strncasecomp(rawentry, "text/plain", 10)) { --s; *s = ';'; if (TRACE) { fprintf(stderr, - "metamail: Ignoring mailcap entry: %s\n", + "ProcessMailcapEntry: Ignoring mailcap entry: %s\n", rawentry); } FREE(rawentry); @@ -371,7 +377,7 @@ PRIVATE int ProcessMailcapEntry ARGS2(FILE *,fp, struct MailcapEntry *,mc) mc->testcommand = NULL; mc->label = NULL; mc->printcommand = NULL; - mc->contenttype = malloc(1+strlen(rawentry)); + mc->contenttype = (char *)malloc(1 + strlen(rawentry)); if (!mc->contenttype) ExitWithError("Out of memory"); strcpy(mc->contenttype, rawentry); @@ -389,7 +395,9 @@ PRIVATE int ProcessMailcapEntry ARGS2(FILE *,fp, struct MailcapEntry *,mc) t = GetCommand(s, &mallocd_string); arg = mallocd_string; eq = strchr(arg, '='); - if (eq) *eq++ = 0; + if (eq) { + *eq++ = '\0'; + } if (arg && *arg) { arg = Cleanse(arg); if (!strcmp(arg, "needsterminal")) { @@ -401,7 +409,7 @@ PRIVATE int ProcessMailcapEntry ARGS2(FILE *,fp, struct MailcapEntry *,mc) StrAllocCopy(mc->testcommand, eq); if (TRACE) fprintf(stderr, - "[HTInit]: found testcommand:%s\n", + "ProcessMailcapEntry: Found testcommand:%s\n", mc->testcommand); } else if (eq && !strcmp(arg, "description")) { mc->label = eq; @@ -423,7 +431,8 @@ PRIVATE int ProcessMailcapEntry ARGS2(FILE *,fp, struct MailcapEntry *,mc) } else if (strcmp(arg, "notes")) { /* IGNORE notes field */ if (*arg && TRACE) fprintf(stderr, - "metamail: Ignoring mailcap flag: %s\n", arg); + "ProcessMailcapEntry: Ignoring mailcap flag '%s'.\n", + arg); } } @@ -437,7 +446,7 @@ assign_presentation: if (PassesTest(mc)) { if (TRACE) fprintf(stderr, - "[HTInit] Setting up conversion %s : %s\n", + "ProcessMailcapEntry Setting up conversion %s : %s\n", mc->contenttype, mc->command); HTSetPresentation(mc->contenttype, mc->command, mc->quality, 3.0, 0.0, mc->maxbytes); @@ -449,11 +458,11 @@ assign_presentation: } PRIVATE void BuildCommand ARGS5( - char **, pBuf, - size_t, Bufsize, - char *, controlstring, - char *, TmpFileName, - size_t, TmpFileLen) + char **, pBuf, + size_t, Bufsize, + char *, controlstring, + char *, TmpFileName, + size_t, TmpFileLen) { char *from, *to; int prefixed = 0; @@ -469,7 +478,7 @@ PRIVATE void BuildCommand ARGS5( case 'F': if (TRACE) { fprintf(stderr, - "metamail: Bad mailcap \"test\" clause: %s\n", + "BuildCommand: Bad mailcap \"test\" clause: %s\n", controlstring); } case 's': @@ -478,7 +487,9 @@ PRIVATE void BuildCommand ARGS5( *to = '\0'; if (TRACE) { fprintf(stderr, - "Too long mailcap \"test\" clause, ignoring: %s%s...\n", + "BuildCommand: Too long mailcap \"test\" clause,\n"); + fprintf(stderr, + " ignoring: %s%s...\n", *pBuf, TmpFileName); } **pBuf = '\0'; @@ -491,7 +502,7 @@ PRIVATE void BuildCommand ARGS5( default: if (TRACE) { fprintf(stderr, - "Ignoring unrecognized format code in mailcap file: %%%c\n", + "BuildCommand: Ignoring unrecognized format code in mailcap file '%%%c'.\n", *from); } break; @@ -505,17 +516,20 @@ PRIVATE void BuildCommand ARGS5( (*pBuf)[Bufsize - 1] = '\0'; if (TRACE) { fprintf(stderr, - "Too long mailcap \"test\" clause, ignoring: %s...\n", + "BuildCommand: Too long mailcap \"test\" clause,\n"); + fprintf(stderr, + " ignoring: %s...\n", *pBuf); } **pBuf = '\0'; return; } } - *to = 0; + *to = '\0'; } -PRIVATE int PassesTest ARGS1(struct MailcapEntry *,mc) +PRIVATE int PassesTest ARGS1( + struct MailcapEntry *, mc) { int result; char *cmd, TmpFileName[TMPFILE_NAME_SIZE]; @@ -533,28 +547,30 @@ PRIVATE int PassesTest ARGS1(struct MailcapEntry *,mc) if (0 == strcasecomp(mc->testcommand, "test -n \"$DISPLAY\"")) { FREE(mc->testcommand); if (TRACE) - fprintf(stderr,"Testing for XWINDOWS environment.\n"); + fprintf(stderr, + "PassesTest: Testing for XWINDOWS environment - "); if ((cp = getenv(DISPLAY)) != NULL && *cp != '\0') { if (TRACE) - fprintf(stderr,"[HTInit] Test passed!\n"); + fprintf(stderr,"passed!\n"); return(0 == 0); } else { if (TRACE) - fprintf(stderr,"[HTInit] Test failed!\n"); + fprintf(stderr,"failed!\n"); return(-1 == 0); } } if (0 == strcasecomp(mc->testcommand, "test -z \"$DISPLAY\"")) { FREE(mc->testcommand); if (TRACE) - fprintf(stderr,"Testing for NON_XWINDOWS environment.\n"); + fprintf(stderr, + "PassesTest: Testing for NON_XWINDOWS environment - "); if (!((cp = getenv(DISPLAY)) != NULL && *cp != '\0')) { if (TRACE) - fprintf(stderr,"[HTInit] Test passed!\n"); + fprintf(stderr,"passed!\n"); return(0 == 0); } else { if (TRACE) - fprintf(stderr,"[HTInit] Test failed!\n"); + fprintf(stderr,"failed!\n"); return(-1 == 0); } } @@ -565,8 +581,9 @@ PRIVATE int PassesTest ARGS1(struct MailcapEntry *,mc) if (0 == strcasecomp(mc->testcommand, "test -n \"$LYNX_VERSION\"")){ FREE(mc->testcommand); if (TRACE) { - fprintf(stderr,"Testing for LYNX environment.\n"); - fprintf(stderr,"[HTInit] Test passed!\n"); + fprintf(stderr, + "PassesTest: Testing for LYNX environment - "); + fprintf(stderr,"passed, of course!\n"); } return(0 == 0); } else @@ -576,8 +593,9 @@ PRIVATE int PassesTest ARGS1(struct MailcapEntry *,mc) if (0 == strcasecomp(mc->testcommand, "test -z \"$LYNX_VERSION\"")) { FREE(mc->testcommand); if (TRACE) { - fprintf(stderr,"Testing for non-LYNX environment.\n"); - fprintf(stderr,"[HTInit] Test failed!\n"); + fprintf(stderr, + "PassesTest: Testing for non-LYNX environment - "); + fprintf(stderr,"failed, of course!\n"); } return(-1 == 0); } @@ -591,9 +609,10 @@ PRIVATE int PassesTest ARGS1(struct MailcapEntry *,mc) ExitWithError("Out of memory"); BuildCommand(&cmd, 1024, mc->testcommand, - TmpFileName, strlen(TmpFileName)); + TmpFileName, + strlen(TmpFileName)); if (TRACE) - fprintf(stderr,"Executing test command: %s\n", cmd); + fprintf(stderr,"PassesTest: Executing test command: %s\n", cmd); result = system(cmd); FREE(cmd); @@ -604,24 +623,28 @@ PRIVATE int PassesTest ARGS1(struct MailcapEntry *,mc) FREE(mc->testcommand); if (TRACE && result) - fprintf(stderr,"[HTInit] Test failed!\n"); + fprintf(stderr,"PassesTest: Test failed!\n"); else if (TRACE) - fprintf(stderr,"[HTInit] Test passed!\n"); + fprintf(stderr,"PassesTest: Test passed!\n"); return(result == 0); } -PRIVATE int ProcessMailcapFile ARGS1(char *,file) +PRIVATE int ProcessMailcapFile ARGS1( + char *, file) { struct MailcapEntry mc; FILE *fp; if (TRACE) - fprintf (stderr, "Loading types config file '%s'\n", file); - + fprintf(stderr, + "ProcessMailcapFile: Loading file '%s'.\n", + file); if ((fp = fopen(file, "r")) == NULL) { if (TRACE) - fprintf (stderr, "Could not open types config file '%s'\n",file); + fprintf(stderr, + "ProcessMailcapFile: Could not open '%s'.\n", + file); return(-1 == 0); } @@ -632,7 +655,8 @@ PRIVATE int ProcessMailcapFile ARGS1(char *,file) return(0 == 0); } -PRIVATE int ExitWithError ARGS1(char *,txt) +PRIVATE int ExitWithError ARGS1( + char *, txt) { if (txt) fprintf(stderr, "metamail: %s\n", txt); @@ -652,7 +676,8 @@ PRIVATE int ExitWithError ARGS1(char *,txt) } -PRIVATE int HTLoadTypesConfigFile ARGS1(char *,fn) +PRIVATE int HTLoadTypesConfigFile ARGS1( + char *, fn) { return ProcessMailcapFile(fn); } @@ -680,7 +705,8 @@ PUBLIC void HTFileInit NOARGS FILE *fp; if (TRACE) - fprintf (stderr, "@@@ Using default extension map\n"); + fprintf(stderr, + "HTFileInit: Loading default (HTInit) extension maps.\n"); /* default suffix interpretation */ HTSetSuffix("*", "text/plain", "7bit", 1.0); @@ -908,13 +934,19 @@ PUBLIC void HTFileInit NOARGS /* -------------------- Extension config file reading --------------------- */ -/* The following is lifted from NCSA httpd 1.0a1, by Rob McCool; - NCSA httpd is in the public domain, as is this code. */ -/* modified Oct 97 - kw */ +/* + * The following is lifted from NCSA httpd 1.0a1, by Rob McCool; + * NCSA httpd is in the public domain, as is this code. + * + * Modified Oct 97 - KW + */ #define MAX_STRING_LEN 256 -PRIVATE int HTGetLine ARGS3(char *,s, int,n, FILE *,f) +PRIVATE int HTGetLine ARGS3( + char *, s, + int, n, + FILE *, f) { register int i = 0, r; @@ -941,7 +973,11 @@ PRIVATE int HTGetLine ARGS3(char *,s, int,n, FILE *,f) } } -PRIVATE void HTGetWord ARGS4(char *,word, char *,line, char ,stop, char ,stop2) +PRIVATE void HTGetWord ARGS4( + char *, word, + char *, line, + char , stop, + char , stop2) { int x = 0, y; @@ -960,18 +996,21 @@ PRIVATE void HTGetWord ARGS4(char *,word, char *,line, char ,stop, char ,stop2) return; } -PRIVATE int HTLoadExtensionsConfigFile ARGS1(char *,fn) +PRIVATE int HTLoadExtensionsConfigFile ARGS1( + char *, fn) { char l[MAX_STRING_LEN],w[MAX_STRING_LEN],*ct; FILE *f; int x, count = 0; if (TRACE) - fprintf (stderr, "Loading extensions config file '%s'\n", fn); + fprintf(stderr, + "HTLoadExtensionsConfigFile: Loading file '%s'.\n", fn); if ((f = fopen(fn,"r")) == NULL) { if (TRACE) - fprintf(stderr, "Could not open extensions config file '%s'\n",fn); + fprintf(stderr, + "HTLoadExtensionsConfigFile: Could not open '%s'.\n", fn); return count; } @@ -996,10 +1035,12 @@ PRIVATE int HTLoadExtensionsConfigFile ARGS1(char *,fn) for (x = 0; w[x]; x++) ext[x+1] = TOLOWER(w[x]); ext[0] = '.'; - ext[strlen(w)+1] = 0; + ext[strlen(w)+1] = '\0'; - if (TRACE) - fprintf (stderr, "SETTING SUFFIX '%s' to '%s'\n", ext, ct); + if (TRACE) { + fprintf (stderr, + "SETTING SUFFIX '%s' to '%s'.\n", ext, ct); + } if (strstr(ct, "tex") != NULL || strstr(ct, "postscript") != NULL || diff --git a/src/HTML.c b/src/HTML.c index a020ba56..283c375f 100644 --- a/src/HTML.c +++ b/src/HTML.c @@ -139,8 +139,10 @@ PRIVATE void actually_set_style ARGS1(HTStructured *, me) LYGetChartransInfo(me); UCSetTransParams(&me->T, me->UCLYhndl, me->UCI, - HTAnchor_getUCLYhndl(me->node_anchor,UCT_STAGE_HTEXT), - HTAnchor_getUCInfoStage(me->node_anchor,UCT_STAGE_HTEXT)); + HTAnchor_getUCLYhndl(me->node_anchor, + UCT_STAGE_HTEXT), + HTAnchor_getUCInfoStage(me->node_anchor, + UCT_STAGE_HTEXT)); #endif /* EXP_CHARTRANS */ me->text = HText_new2(me->node_anchor, me->target); HText_beginAppend(me->text); @@ -2793,7 +2795,15 @@ PRIVATE void HTML_start_element ARGS6( HTAnchor_setTitle(dest, title); if (dest && dest_ismap) dest->isISMAPScript = TRUE; - if (dest && dest_char_set >= 0) { + if (dest && dest_char_set >= 0) { + /* + ** Load the anchor's chartrans structures. + ** This should be done more intelligently + ** when setting up the structured object. - FM + */ + HTAnchor_setUCInfoStage(dest, dest_char_set, + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); HTAnchor_setUCInfoStage(dest, dest_char_set, UCT_STAGE_PARSER, UCT_SETBY_LINK); @@ -4636,8 +4646,10 @@ PRIVATE void HTML_start_element ARGS6( * Not yet implemented. */ HTML_put_string(me,"[RANGE Input] (Not yet implemented.)"); +#ifdef NOTDEFINED if (me->inFORM) HText_DisableCurrentForm(); +#endif /* NOTDEFINED */ if (TRACE) fprintf(stderr, "HTML: Ignoring TYPE=\"range\"\n"); break; @@ -4657,8 +4669,10 @@ PRIVATE void HTML_start_element ARGS6( HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR); } +#ifdef NOTDEFINED if (me->inFORM) HText_DisableCurrentForm(); +#endif /* NOTDEFINED */ if (TRACE) fprintf(stderr, "HTML: Ignoring TYPE=\"file\"\n"); break; @@ -5537,9 +5551,23 @@ PRIVATE void HTML_start_element ARGS6( /* * Not implemented. Just treat as a division * with respect to any ALIGN attribute, with - * a default of HT_LEFT. - FM + * a default of HT_LEFT, or leave as a PRE + * block if we are presently in one. - FM */ + if (me->inA) { + SET_SKIP_STACK(HTML_A); + HTML_end_element(me, HTML_A, (char **)&include); + } + if (me->Underline_Level > 0) { + SET_SKIP_STACK(HTML_U); + HTML_end_element(me, HTML_U, (char **)&include); + } me->inTABLE = TRUE; + if (me->sp[0].tag_number == HTML_PRE) { + UPDATE_STYLE; + LYCheckForID(me, present, value, (int)HTML_TABLE_ID); + break; + } if (me->Division_Level < (MAX_NESTING - 1)) { me->Division_Level++; } else if (TRACE) { @@ -5580,6 +5608,14 @@ PRIVATE void HTML_start_element ARGS6( * if needed, act on an ALIGN attribute if present, * and check for an ID link. - FM */ + if (me->inA) { + SET_SKIP_STACK(HTML_A); + HTML_end_element(me, HTML_A, (char **)&include); + } + if (me->Underline_Level > 0) { + SET_SKIP_STACK(HTML_U); + HTML_end_element(me, HTML_U, (char **)&include); + } UPDATE_STYLE; if (HText_LastLineSize(me->text, FALSE)) { HText_setLastChar(me->text, ' '); /* absorb white space */ @@ -5619,6 +5655,14 @@ PRIVATE void HTML_start_element ARGS6( /* * Not yet implemented. Just check for an ID link. - FM */ + if (me->inA) { + SET_SKIP_STACK(HTML_A); + HTML_end_element(me, HTML_A, (char **)&include); + } + if (me->Underline_Level > 0) { + SET_SKIP_STACK(HTML_U); + HTML_end_element(me, HTML_U, (char **)&include); + } UPDATE_STYLE; CHECK_ID(HTML_TR_ID); break; @@ -5628,11 +5672,27 @@ PRIVATE void HTML_start_element ARGS6( /* * Not yet implemented. Just check for an ID link. - FM */ + if (me->inA) { + SET_SKIP_STACK(HTML_A); + HTML_end_element(me, HTML_A, (char **)&include); + } + if (me->Underline_Level > 0) { + SET_SKIP_STACK(HTML_U); + HTML_end_element(me, HTML_U, (char **)&include); + } UPDATE_STYLE; CHECK_ID(HTML_COL_ID); break; case HTML_TH: + if (me->inA) { + SET_SKIP_STACK(HTML_A); + HTML_end_element(me, HTML_A, (char **)&include); + } + if (me->Underline_Level > 0) { + SET_SKIP_STACK(HTML_U); + HTML_end_element(me, HTML_U, (char **)&include); + } UPDATE_STYLE; CHECK_ID(HTML_TD_ID); /* @@ -5643,6 +5703,14 @@ PRIVATE void HTML_start_element ARGS6( break; case HTML_TD: + if (me->inA) { + SET_SKIP_STACK(HTML_A); + HTML_end_element(me, HTML_A, (char **)&include); + } + if (me->Underline_Level > 0) { + SET_SKIP_STACK(HTML_U); + HTML_end_element(me, HTML_U, (char **)&include); + } UPDATE_STYLE; CHECK_ID(HTML_TD_ID); /* @@ -6120,8 +6188,8 @@ PRIVATE void HTML_end_element ARGS3( * paragraph style's spaceAfter. Don't insert * spacing required for starting the next paragraph * as required by its style->spaceBefore, since we - * don't know yet whether the next structure element - * (if any) will be. If it is a another P, it will + * don't know yet what the next structure element + * (if any) will be. If it is another P, it will * take care of its leading space on its own. - kw */ if (me->List_Nesting_Level >= 0) { @@ -7107,6 +7175,9 @@ End_Object: case HTML_TABLE: me->inTABLE = FALSE; + if (me->sp[0].tag_number == HTML_PRE) { + break; + } if (me->Division_Level >= 0) me->Division_Level--; if (me->Division_Level >= 0) @@ -7816,10 +7887,98 @@ PUBLIC HTStructured* HTML_new ARGS3( #endif #ifdef EXP_CHARTRANS + +#ifdef NOTUSED_FOTEMODS + /* + ** If the anchor already has stage info, make sure that it is + ** appropriate for the current display charset. HTMIMEConvert() + ** does this for the http and https schemes, and HTCharsetFormat() + ** does it for the file and and ftp schemes, be we need to do it, + ** if necessary, for the gateway schemes. - FM + */ + if (me->node_anchor->UCStages) { + if (HTAnchor_getUCLYhndl(me->node_anchor, + UCT_STAGE_STRUCTURED) != current_char_set) { + /* + ** We are reloading due to a change in the display character + ** set. Free the stage info and let the stage info creation + ** mechanisms create a new UCStages structure appropriate for + ** the current display character set. - FM + */ + + FREE(anchor->UCStages); + } else if (HTAnchor_getUCLYhndl(me->node_anchor, + UCT_STAGE_MIME) == current_char_set) { + /* + ** The MIME stage is set to the current display character + ** set. If it is CJK, and HTCJK does not point to a CJK + ** character set, assume we are reloading due to a raw + ** mode toggle and reset the MIME and PARSER stages to + ** an ISO Latin 1 default. - FM + */ + LYUCcharset *p_in = HTAnchor_getUCInfoStage(me->node_anchor, + UCT_STAGE_MIME); + if (p_in->enc == UCT_ENC_CJK && HTCJK == NOCJK) { + HTAnchor_resetUCInfoStage(me->node_anchor, 0, + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); + HTAnchor_setUCInfoStage(me->node_anchor, 0, + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); + HTAnchor_resetUCInfoStage(me->node_anchor, 0, + UCT_STAGE_PARSER, + UCT_SETBY_DEFAULT); + HTAnchor_setUCInfoStage(me->node_anchor, 0, + UCT_STAGE_PARSER, + UCT_SETBY_DEFAULT); + } + } + } +#endif /* NOTUSED_FOTEMODS */ + + /* + ** Create a chartrans stage info structure for the anchor, + ** if it does not exist already (in which case the default + ** MIME stage info will be loaded as well), and load the + ** HTML stage info into me->UCI and me->UCLYhndl. - FM + */ LYGetChartransInfo(me); UCTransParams_clear(&me->T); #endif /* EXP_CHARTRANS */ + /* + ** Load the existing or default input charset info + ** into the holding elements. We'll believe what + ** is indicated for UCT_STAGE_PARSER. - FM + */ + me->inUCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor, + UCT_STAGE_PARSER); + if (me->inUCLYhndl < 0) { + me->inUCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor, + UCT_STAGE_MIME); + me->inUCI = HTAnchor_getUCInfoStage(me->node_anchor, + UCT_STAGE_MIME); + } else { + me->inUCI = HTAnchor_getUCInfoStage(me->node_anchor, + UCT_STAGE_PARSER); + } + + /* + ** Load the existing or default output charset info + ** into the holding elements, UCT_STAGE_STRUCTURED + ** should be the same as UCT_STAGE_TEXT at this point, + ** but we could check, perhaps. - FM + */ + me->outUCI = HTAnchor_getUCInfoStage(me->node_anchor, + UCT_STAGE_STRUCTURED); + me->outUCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor, + UCT_STAGE_STRUCTURED); +#ifdef NOTUSED_FOTEMODS + UCSetTransParams(&me->T, + me->inUCLYhndl, me->inUCI, + me->outUCLYhndl, me->outUCI); +#endif + me->target = stream; if (stream) me->targetClass = *stream->isa; /* Copy pointers */ diff --git a/src/HTML.h b/src/HTML.h index c5a13c31..2a50d3b1 100644 --- a/src/HTML.h +++ b/src/HTML.h @@ -146,12 +146,46 @@ struct _HTStructured { BOOL needBoldH; -#ifdef EXP_CHARTRANS - LYUCcharset * UCI; /* pointer to node_anchor's UCInfo */ - int UCLYhndl; /* tells us what charset we are fed */ - UCTransParams T; + /* + ** UCI and UCLYhndl give the UCInfo and charset registered for + ** the HTML parser in the node_anchor's UCStages structure. It + ** indicates what is fed to the HTML parser as the stream of character + ** data (not necessarily tags and attributes). It should currently + ** always be set to be the same as UCI and UCLhndl for the HTEXT stage + ** in the node_anchor's UCStages structure, since the HTML parser sends + ** its input character data to the output without further charset + ** translation. + */ + LYUCcharset * UCI; + int UCLYhndl; + /* + ** inUCI and inUCLYhndl indicate the UCInfo and charset which the + ** HTML parser treats at the input charset. It is normally set + ** to the UCI and UCLhndl for the SGML parser in the node_anchor's + ** UCStages structure (which may be a dummy, based on the MIME + ** parser's UCI and UCLhndl in that structure, when we are handling + ** a local file or non-http(s) gateway). It could be changed + ** temporarily by the HTML parser, for conversions of attribute + ** strings, but should be reset once done. - FM + */ + LYUCcharset * inUCI; + int inUCLYhndl; + /* + ** outUCI and outUCLYhndl indicate the UCInfo and charset which + ** the HTML parser treats as the output charset. It is normally + ** set to its own UCI and UCLhndl. It could be changed for + ** conversions of attribute strings, but should be reset once + ** done. - FM + */ + LYUCcharset * outUCI; + int outUCLYhndl; + /* + ** T holds the transformation rules for conversions of strings + ** between the input and output charsets by the HTML parser. - FM + */ + UCTransParams T; + int tag_charset; /* charset for attribute values etc. */ -#endif }; struct _HTStream { diff --git a/src/LYCharSets.c b/src/LYCharSets.c index 17fdd3af..78b25519 100644 --- a/src/LYCharSets.c +++ b/src/LYCharSets.c @@ -1,15 +1,13 @@ #include "HTUtils.h" #include "tcp.h" #include "HTCJK.h" +#include "HTMLDTD.h" #include "LYGlobalDefs.h" #include "UCMap.h" #include "UCDefs.h" #include "LYCharSets.h" #include "LYCharUtils.h" -#ifdef EXP_CHARTRANS -#include "UCMap.h" -#endif /* EXP_CHARTRANS */ #include "HTFont.h" #include "GridText.h" #include "LYCurses.h" @@ -25,31 +23,24 @@ extern BOOL HTPassHighCtrlNum; extern HTCJKlang HTCJK; PUBLIC HTkcode kanji_code = NOKANJI; PUBLIC BOOLEAN LYHaveCJKCharacterSet = FALSE; - -#ifdef EXP_CHARTRANS extern void UCInit NOARGS; extern int UCInitialized; -#else -#ifndef MAXCHARSETS -#define MAXCHARSETS -#endif -#ifndef MAXCHARSETSP -#define MAXCHARSETSP -#endif -#endif /* EXP_CHARTRANS */ +PUBLIC int LYNumCharsets = 0; /* Will be initialized later by UC_Register. */ -/* INSTRUCTIONS for adding new character sets !!!! - * - * Make up a character set and add it in the same - * style as the ISO_LATIN1 set below, giving it a unique name. +/* + * INSTRUCTIONS for adding new character sets which do not have + * Unicode tables. * - * Near the end of this file is a place marked "Add your character sets HERE". + * Make up a character set and add it in the same + * style as the ISO_LATIN1 set below, giving it a unique name. * - * Add the name of the set to LYCharSets at the bottom of this file, and - * also add it to the LYchar_set_names table below LYCharSets. - * LYCharSets and LYchar_set_names MUST have the same order. + * Near the end of this file is a place marked "Add your character sets HERE". * - * No string substitutions can exceed 5 characeters. + * Add the name of the set to LYCharSets at the bottom of this file, and + * also add it to the LYchar_set_names table below LYCharSets. + * Similarly add the appropriate information to LYCharSet_UC and to + * LYlowest_eightbit below that. + * These tables all MUST have the same order. */ /* Entity values -- for ISO Latin 1 local representation @@ -57,49 +48,49 @@ extern int UCInitialized; ** This MUST match exactly the table referred to in the DTD! */ PRIVATE char * ISO_Latin1[] = { - "\306", /* capital AE diphthong (ligature) - AElig */ - "\301", /* capital A, acute accent - Aacute */ - "\302", /* capital A, circumflex accent - Acirc */ - "\300", /* capital A, grave accent - Agrave */ - "\305", /* capital A, ring - Aring */ - "\303", /* capital A, tilde - Atilde */ - "\304", /* capital A, dieresis or umlaut mark - Auml */ - "\307", /* capital C, cedilla - Ccedil */ - "\320", /* capital Eth or D with stroke - Dstrok */ - "\320", /* capital Eth, Icelandic - ETH */ - "\311", /* capital E, acute accent - Eacute */ - "\312", /* capital E, circumflex accent - Ecirc */ - "\310", /* capital E, grave accent - Egrave */ - "\313", /* capital E, dieresis or umlaut mark - Euml */ - "\315", /* capital I, acute accent - Iacute */ - "\316", /* capital I, circumflex accent - Icirc */ - "\314", /* capital I, grave accent - Igrave */ - "\317", /* capital I, dieresis or umlaut mark - Iuml */ - "\321", /* capital N, tilde - Ntilde */ - "\323", /* capital O, acute accent - Oacute */ - "\324", /* capital O, circumflex accent - Ocirc */ - "\322", /* capital O, grave accent - Ograve */ - "\330", /* capital O, slash - Oslash */ - "\325", /* capital O, tilde - Otilde */ - "\326", /* capital O, dieresis or umlaut mark - Ouml */ - "\336", /* capital THORN, Icelandic - THORN */ - "\332", /* capital U, acute accent - Uacute */ - "\333", /* capital U, circumflex accent - Ucirc */ - "\331", /* capital U, grave accent - Ugrave */ - "\334", /* capital U, dieresis or umlaut mark - Uuml */ - "\335", /* capital Y, acute accent - Yacute */ - "\341", /* small a, acute accent - aacute */ - "\342", /* small a, circumflex accent - acirc */ + "\306", /* capital AE diphthong (ligature) (Æ) - AElig */ + "\301", /* capital A, acute accent (Á) - Aacute */ + "\302", /* capital A, circumflex accent (Â) - Acirc */ + "\300", /* capital A, grave accent (À) - Agrave */ + "\305", /* capital A, ring - Aring (Å) */ + "\303", /* capital A, tilde - Atilde (Ã) */ + "\304", /* capital A, dieresis or umlaut mark (Ä) - Auml */ + "\307", /* capital C, cedilla - Ccedil (Ç) */ + "\320", /* capital Eth (Ð) or D with stroke - Dstrok */ + "\320", /* capital Eth, Icelandic (Ð) - ETH */ + "\311", /* capital E, acute accent (É) - Eacute */ + "\312", /* capital E, circumflex accent (Ê) - Ecirc */ + "\310", /* capital E, grave accent (È) - Egrave */ + "\313", /* capital E, dieresis or umlaut mark (Ë) - Euml */ + "\315", /* capital I, acute accent (Í) - Iacute */ + "\316", /* capital I, circumflex accent (Î) - Icirc */ + "\314", /* capital I, grave accent (È) - Igrave */ + "\317", /* capital I, dieresis or umlaut mark (Ï) - Iuml */ + "\321", /* capital N, tilde (Ñ) - Ntilde */ + "\323", /* capital O, acute accent (Ó) - Oacute */ + "\324", /* capital O, circumflex accent (Ô) - Ocirc */ + "\322", /* capital O, grave accent (Ò) - Ograve */ + "\330", /* capital O, slash (Ø) - Oslash */ + "\325", /* capital O, tilde (Õ) - Otilde */ + "\326", /* capital O, dieresis or umlaut mark (Ö) - Ouml */ + "\336", /* capital THORN, Icelandic (Þ) - THORN */ + "\332", /* capital U, acute accent (Ú) - Uacute */ + "\333", /* capital U, circumflex accent (Û) - Ucirc */ + "\331", /* capital U, grave accent (Ù) - Ugrave */ + "\334", /* capital U, dieresis or umlaut mark (Ü) - Uuml */ + "\335", /* capital Y, acute accent (Ý) - Yacute */ + "\341", /* small a, acute accent (á) - aacute */ + "\342", /* small a, circumflex accent (â) - acirc */ "\264", /* spacing acute (´) - acute */ - "\346", /* small ae diphthong (ligature) - aelig */ - "\340", /* small a, grave accent - agrave */ - "\046", /* ampersand - amp */ - "\345", /* small a, ring - aring */ - "\343", /* small a, tilde - atilde */ - "\344", /* small a, dieresis or umlaut mark - auml */ + "\346", /* small ae diphthong (ligature) (æ) - aelig */ + "\340", /* small a, grave accent (à) - agrave */ + "\046", /* ampersand (&) - amp */ + "\345", /* small a, ring (å) - aring */ + "\343", /* small a, tilde (ã) - atilde */ + "\344", /* small a, dieresis or umlaut mark (ä) - auml */ "\246", /* broken vertical bar (¦) - brkbar */ "\246", /* broken vertical bar (¦) - brvbar */ - "\347", /* small c, cedilla - ccedil */ + "\347", /* small c, cedilla (ç) - ccedil */ "\270", /* spacing cedilla (¸) - cedil */ "\242", /* cent sign (¢) - cent */ "\251", /* copyright sign (©) - copy */ @@ -107,28 +98,28 @@ PRIVATE char * ISO_Latin1[] = { "\260", /* degree sign (°) - deg */ "\250", /* spacing diaresis (¨) - die */ "\367", /* division sign (÷) - divide */ - "\351", /* small e, acute accent - eacute */ - "\352", /* small e, circumflex accent - ecirc */ - "\350", /* small e, grave accent - egrave */ + "\351", /* small e, acute accent (é) - eacute */ + "\352", /* small e, circumflex accent (ê) - ecirc */ + "\350", /* small e, grave accent (è) - egrave */ "-", /* dash the width of emsp - emdash */ "\002", /* emsp, em space - not collapsed NEVER CHANGE THIS - emsp */ "-", /* dash the width of ensp - endash */ "\002", /* ensp, en space - not collapsed NEVER CHANGE THIS - ensp */ - "\360", /* small eth, Icelandic - eth */ - "\353", /* small e, dieresis or umlaut mark - euml */ + "\360", /* small eth, Icelandic (ð) - eth */ + "\353", /* small e, dieresis or umlaut mark (ë) - euml */ "\275", /* fraction 1/2 (½) - frac12 */ "\274", /* fraction 1/4 (¼) - frac14 */ "\276", /* fraction 3/4 (¾) - frac34 */ - "\076", /* greater than - gt */ + "\076", /* greater than (>) - gt */ "\257", /* spacing macron (¯) - hibar */ - "\355", /* small i, acute accent - iacute */ - "\356", /* small i, circumflex accent - icirc */ + "\355", /* small i, acute accent (í) - iacute */ + "\356", /* small i, circumflex accent (î) - icirc */ "\241", /* inverted exclamation mark (¡) - iexcl */ - "\354", /* small i, grave accent - igrave */ + "\354", /* small i, grave accent (ì) - igrave */ "\277", /* inverted question mark (¿) - iquest */ - "\357", /* small i, dieresis or umlaut mark - iuml */ + "\357", /* small i, dieresis or umlaut mark (ï) - iuml */ "\253", /* angle quotation mark, left («) - laquo */ - "\074", /* less than - lt */ + "\074", /* less than (<) - lt */ "\257", /* spacing macron (¯) - macr */ "-", /* dash the width of emsp - mdash */ "\265", /* micro sign (µ) - micro */ @@ -136,19 +127,19 @@ PRIVATE char * ISO_Latin1[] = { "\001", /* nbsp non-breaking space NEVER CHANGE THIS - nbsp */ "-", /* dash the width of ensp - ndash */ "\254", /* negation sign (¬) - not */ - "\361", /* small n, tilde - ntilde */ - "\363", /* small o, acute accent - oacute */ - "\364", /* small o, circumflex accent - ocirc */ - "\362", /* small o, grave accent - ograve */ + "\361", /* small n, tilde (ñ) - ntilde */ + "\363", /* small o, acute accent (ó) - oacute */ + "\364", /* small o, circumflex accent (ô) - ocirc */ + "\362", /* small o, grave accent (ò) - ograve */ "\252", /* feminine ordinal indicator (ª) - ordf */ "\272", /* masculine ordinal indicator (º) - ordm */ - "\370", /* small o, slash - oslash */ - "\365", /* small o, tilde - otilde */ - "\366", /* small o, dieresis or umlaut mark - ouml */ + "\370", /* small o, slash (ø) - oslash */ + "\365", /* small o, tilde (õ) - otilde */ + "\366", /* small o, dieresis or umlaut mark (ö) - ouml */ "\266", /* paragraph sign (¶) - para */ "\261", /* plus-or-minus sign (±) - plusmn */ "\243", /* pound sign (£) - pound */ - "\042", /* quote '"' - quot */ + "\042", /* quote '"' (") - quot */ "\273", /* angle quotation mark, right (») - raquo */ "\256", /* circled R registered sign (®) - reg */ "\247", /* section sign (§) - sect */ @@ -156,19 +147,19 @@ PRIVATE char * ISO_Latin1[] = { "\271", /* superscript 1 (¹) - sup1 */ "\262", /* superscript 2 (²) - sup2 */ "\263", /* superscript 3 (³) - sup3 */ - "\337", /* small sharp s, German (sz ligature) - szlig */ + "\337", /* small sharp s, German (sz ligature) (ß) - szlig */ "\002", /* thin space - not collapsed NEVER CHANGE THIS - thinsp */ - "\376", /* small thorn, Icelandic - thorn */ + "\376", /* small thorn, Icelandic (þ) - thorn */ "\327", /* multiplication sign (×) - times */ "(TM)", /* circled TM trade mark sign (™) - trade */ - "\372", /* small u, acute accent - uacute */ - "\373", /* small u, circumflex accent - ucirc */ - "\371", /* small u, grave accent - ugrave */ + "\372", /* small u, acute accent (ú) - uacute */ + "\373", /* small u, circumflex accent (û) - ucirc */ + "\371", /* small u, grave accent (ù) - ugrave */ "\250", /* spacing diaresis (¨) - uml */ - "\374", /* small u, dieresis or umlaut mark - uuml */ - "\375", /* small y, acute accent - yacute */ + "\374", /* small u, dieresis or umlaut mark (ü) - uuml */ + "\375", /* small y, acute accent (ý) - yacute */ "\245", /* yen sign (¥) - yen */ - "\377", /* small y, dieresis or umlaut mark - yuml */ + "\377", /* small y, dieresis or umlaut mark (ÿ) - yuml */ }; /* Entity values -- for ISO Latin 2 local representation @@ -1754,7 +1745,7 @@ PRIVATE char * Korean[] = { /* Entity values -- Taipei (Big5) ** -** This MUST match exactly the table referred to in the DTD! +** This MUST match exactly the table referred to in the DTD! */ PRIVATE char * Taipei[] = { "AE", /* capital AE diphthong (ligature) - AElig */ @@ -1827,7 +1818,7 @@ PRIVATE char * Taipei[] = { "e", /* small e, circumflex accent - ecirc */ "e", /* small e, grave accent - egrave */ "-", /* dash the width of emsp - emdash */ - "\002", /* emsp NEVER CHANGE THIS - emsp */ + "\002", /* emsp, em space - not collapsed NEVER CHANGE THIS - emsp */ "-", /* dash the width of ensp - endash */ "\002", /* ensp NEVER CHANGE THIS - ensp */ "dh", /* small eth, Icelandic eth */ @@ -1900,65 +1891,65 @@ PRIVATE char * Taipei[] = { ** This MUST match exactly the table referred to in the DTD! */ PUBLIC char * SevenBitApproximations[] = { - "AE", /* capital AE diphthong (ligature) - AElig */ - "A", /* capital A, acute accent - Aacute */ - "A", /* capital A, circumflex accent - Acirc */ - "A", /* capital A, grave accent - Agrave */ - "A", /* capital A, ring - Aring */ - "A", /* capital A, tilde - Atilde */ + "AE", /* capital AE diphthong (ligature) (Æ) - AElig */ + "A", /* capital A, acute accent (Á) - Aacute */ + "A", /* capital A, circumflex accent (Â) - Acirc */ + "A", /* capital A, grave accent (À) - Agrave */ + "A", /* capital A, ring - Aring (Å) */ + "A", /* capital A, tilde - Atilde (Ã) */ #ifdef LY_UMLAUT - "Ae", /* capital A, dieresis or umlaut mark - Auml*/ + "Ae", /* capital A, dieresis or umlaut mark (Ä) - Auml*/ #else - "A", /* capital A, dieresis or umlaut mark - Auml*/ + "A", /* capital A, dieresis or umlaut mark (Ä) - Auml*/ #endif /* LY_UMLAUT */ - "C", /* capital C, cedilla - Ccedil */ - "Dj", /* capital D with stroke - Dstrok */ - "DH", /* capital Eth, Icelandic - ETH */ - "E", /* capital E, acute accent - Eacute */ - "E", /* capital E, circumflex accent - Ecirc */ - "E", /* capital E, grave accent - Egrave */ - "E", /* capital E, dieresis or umlaut mark - Euml */ - "I", /* capital I, acute accent - Iacute */ - "I", /* capital I, circumflex accent - Icirc */ - "I", /* capital I, grave accent - Igrave */ - "I", /* capital I, dieresis or umlaut mark - Iuml */ - "N", /* capital N, tilde - Ntilde */ - "O", /* capital O, acute accent - Oacute */ - "O", /* capital O, circumflex accent - Ocirc */ - "O", /* capital O, grave accent - Ograve */ - "O", /* capital O, slash - Oslash */ - "O", /* capital O, tilde - Otilde */ + "C", /* capital C, cedilla (Ç) - Ccedil */ + "Dj", /* capital D with stroke (Ð) - Dstrok */ + "DH", /* capital Eth, Icelandic (Ð) - ETH */ + "E", /* capital E, acute accent (É) - Eacute */ + "E", /* capital E, circumflex accent (Ê) - Ecirc */ + "E", /* capital E, grave accent (È) - Egrave */ + "E", /* capital E, dieresis or umlaut mark (Ë) - Euml */ + "I", /* capital I, acute accent (Í) - Iacute */ + "I", /* capital I, circumflex accent (Î) - Icirc */ + "I", /* capital I, grave accent (Ì) - Igrave */ + "I", /* capital I, dieresis or umlaut mark (Ï) - Iuml */ + "N", /* capital N, tilde - Ntilde (Ñ) */ + "O", /* capital O, acute accent (Ó) - Oacute */ + "O", /* capital O, circumflex accent (Ô) - Ocirc */ + "O", /* capital O, grave accent (Ò) - Ograve */ + "O", /* capital O, slash (Ø) - Oslash */ + "O", /* capital O, tilde (Õ) - Otilde */ #ifdef LY_UMLAUT - "Oe", /* capital O, dieresis or umlaut mark - Ouml */ + "Oe", /* capital O, dieresis or umlaut mark (Ö) - Ouml */ #else - "O", /* capital O, dieresis or umlaut mark - Ouml */ + "O", /* capital O, dieresis or umlaut mark (Ö) - Ouml */ #endif /* LY_UMLAUT */ - "P", /* capital THORN, Icelandic - THORN */ - "U", /* capital U, acute accent - Uacute */ - "U", /* capital U, circumflex accent - Ucirc */ - "U", /* capital U, grave accent - Ugrave */ + "P", /* capital THORN, Icelandic (Þ) - THORN */ + "U", /* capital U, acute accent (Ú) - Uacute */ + "U", /* capital U, circumflex accent (Û) - Ucirc */ + "U", /* capital U, grave accent (Ù) - Ugrave */ #ifdef LY_UMLAUT - "Ue", /* capital U, dieresis or umlaut mark - Uuml */ + "Ue", /* capital U, dieresis or umlaut mark (Ü) - Uuml */ #else - "U", /* capital U, dieresis or umlaut mark - Uuml */ + "U", /* capital U, dieresis or umlaut mark (Ü) - Uuml */ #endif /* LY_UMLAUT */ - "Y", /* capital Y, acute accent - Yacute */ - "a", /* small a, acute accent - aacute */ - "a", /* small a, circumflex accent - acirc */ + "Y", /* capital Y, acute accent (Ý) - Yacute */ + "a", /* small a, acute accent (á) - aacute */ + "a", /* small a, circumflex accent (â) - acirc */ "'", /* spacing acute (´) - acute */ - "ae", /* small ae diphthong (ligature) - aelig */ - "`a", /* small a, grave accent - agrave */ - "&", /* ampersand - amp */ - "a", /* small a, ring - aring */ - "a", /* small a, tilde - atilde */ + "ae", /* small ae diphthong (ligature) (æ) - aelig */ + "`a", /* small a, grave accent (è) - agrave */ + "&", /* ampersand (&) - amp */ + "a", /* small a, ring (å) - aring */ + "a", /* small a, tilde (ã) - atilde */ #ifdef LY_UMLAUT - "ae", /* small a, dieresis or umlaut mark - auml */ + "ae", /* small a, dieresis or umlaut mark (ä) - auml */ #else - "a", /* small a, dieresis or umlaut mark - auml */ + "a", /* small a, dieresis or umlaut mark (ä) - auml */ #endif /* LY_UMLAUT */ "|", /* broken vertical bar (¦) - brkbar */ "|", /* broken vertical bar (¦) - brvbar */ - "c", /* small c, cedilla - ccedil */ + "c", /* small c, cedilla (ç) - ccedil */ ",", /* spacing cedilla (¸) - cedil */ "-c-", /* cent sign (¢) - cent */ "(c)", /* copyright sign (©) - copy */ @@ -1966,28 +1957,28 @@ PUBLIC char * SevenBitApproximations[] = { "DEG", /* degree sign (°) - deg */ "\042", /* spacing diaresis (¨) - die */ "/", /* division sign (÷) - divide */ - "e", /* small e, acute accent - eacute */ - "e", /* small e, circumflex accent - ecirc */ - "e", /* small e, grave accent - egrave */ + "e", /* small e, acute accent (é) - eacute */ + "e", /* small e, circumflex accent (ê) - ecirc */ + "e", /* small e, grave accent (è) - egrave */ "-", /* dash the width of emsp - emdash */ "\002", /* emsp NEVER CHANGE THIS - emsp */ "-", /* dash the width of ensp - endash */ "\002", /* ensp NEVER CHANGE THIS - ensp */ - "dh", /* small eth, Icelandic eth */ - "e", /* small e, dieresis or umlaut mark - euml */ + "dh", /* small eth, Icelandic eth (ð) */ + "e", /* small e, dieresis or umlaut mark (ë) - euml */ " 1/2", /* fraction 1/2 (½) - frac12 */ " 1/4", /* fraction 1/4 (¼) - frac14 */ " 3/4", /* fraction 3/4 (¾) - frac34 */ - ">", /* greater than - gt */ + ">", /* greater than (>) - gt */ "-", /* spacing macron (¯) - hibar */ - "i", /* small i, acute accent - iacute */ - "i", /* small i, circumflex accent - icirc*/ + "i", /* small i, acute accent (í) - iacute */ + "i", /* small i, circumflex accent (î) - icirc*/ "!", /* inverted exclamation mark (¡) - iexcl */ - "`i", /* small i, grave accent - igrave */ + "`i", /* small i, grave accent (ì) - igrave */ "?", /* inverted question mark (¿) - iquest */ - "i", /* small i, dieresis or umlaut mark - iuml */ + "i", /* small i, dieresis or umlaut mark (ï) - iuml */ "<<", /* angle quotation mark, left («) - laquo */ - "<", /* less than - lt */ + "<", /* less than - lt (<) */ "-", /* spacing macron (¯) - macr */ "-", /* dash the width of emsp - mdash */ "u", /* micro sign (µ) - micro */ @@ -1995,23 +1986,23 @@ PUBLIC char * SevenBitApproximations[] = { "\001", /* nbsp non-breaking space NEVER CHANGE THIS - nbsp */ "-", /* dash the width of ensp - ndash */ "NOT", /* negation sign (¬) - not */ - "n", /* small n, tilde - ntilde */ - "o", /* small o, acute accent - oacute */ - "o", /* small o, circumflex accent - ocirc */ - "o", /* small o, grave accent - ograve */ + "n", /* small n, tilde (ñ) - ntilde */ + "o", /* small o, acute accent (ó) - oacute */ + "o", /* small o, circumflex accent (ô) - ocirc */ + "o", /* small o, grave accent (ò) - ograve */ "-a", /* feminine ordinal indicator (ª) - ordf */ "-o", /* masculine ordinal indicator (º) - ordm */ - "o", /* small o, slash - oslash */ - "o", /* small o, tilde - otilde */ + "o", /* small o, slash (ø) - oslash */ + "o", /* small o, tilde (õ) - otilde */ #ifdef LY_UMLAUT - "oe", /* small o, dieresis or umlaut mark - ouml */ + "oe", /* small o, dieresis or umlaut mark (ö) - ouml */ #else - "o", /* small o, dieresis or umlaut mark - ouml */ + "o", /* small o, dieresis or umlaut mark (ö) - ouml */ #endif /* LY_UMLAUT */ "P:", /* paragraph sign (¶) - para */ "+-", /* plus-or-minus sign (±) - plusmn */ "-L-", /* pound sign (£) - pound */ - "\"", /* quote '"' - quot */ + "\"", /* quote '"' (") - quot */ ">>", /* angle quotation mark, right (») - raquo */ "(R)", /* circled R registered sign (®) - reg */ "S:", /* section sign (§) - sect */ @@ -2019,51 +2010,52 @@ PUBLIC char * SevenBitApproximations[] = { "^1", /* superscript 1 (¹) - sup1 */ "^2", /* superscript 2 (²) - sup2 */ "^3", /* superscript 3 (³) - sup3 */ - "ss", /* small sharp s, German (sz ligature) - szlig */ + "ss", /* small sharp s, German (sz ligature) (ß) - szlig */ "\002", /* thin space - not collapsed NEVER CHANGE THIS - thinsp */ - "p", /* small thorn, Icelandic - thorn */ + "p", /* small thorn, Icelandic (þ) - thorn */ "*", /* multiplication sign (×) - times */ "(TM)", /* circled TM trade mark sign (™) - trade */ - "u", /* small u, acute accent - uacute */ - "u", /* small u, circumflex accent - ucirc */ - "u", /* small u, grave accent - ugrave */ + "u", /* small u, acute accent (ú) - uacute */ + "u", /* small u, circumflex accent (û) - ucirc */ + "u", /* small u, grave accent (ù) - ugrave */ "\042", /* spacing diaresis (¨) - uml */ #ifdef LY_UMLAUT - "ue", /* small u, dieresis or umlaut mark - uuml */ + "ue", /* small u, dieresis or umlaut mark (ü) - uuml */ #else - "u", /* small u, dieresis or umlaut mark - uuml */ + "u", /* small u, dieresis or umlaut mark (ü) - uuml */ #endif /* LY_UMLAUT */ - "y", /* small y, acute accent - yacute */ + "y", /* small y, acute accent (ý) - yacute */ "YEN", /* yen sign (¥) - yen */ - "y", /* small y, dieresis or umlaut mark - yuml */ + "y", /* small y, dieresis or umlaut mark (ÿ) - yuml */ }; - /* - * Add your new character sets HERE. - * No string substitutions can exceed 5 characters. + * Add your new character sets HERE (but only if you + * can't contruct Unicode tables for them). - FM */ /* * Add the array name to LYCharSets */ PUBLIC char ** LYCharSets[MAXCHARSETS]={ - ISO_Latin1, + ISO_Latin1, /* ISO Latin 1 */ ISO_Latin2, - ISO_LatinN, - DEC_Multinational, - PC_charset, - PC_850_charset, - Macintosh, - NeXT_Step, + ISO_LatinN, /* Other ISO Latin */ + SevenBitApproximations, /* WinLatin1 (cp1252) */ + DEC_Multinational, /* DEC Multinational */ + Macintosh, /* Macintosh (8 bit) */ + NeXT_Step, /* NeXT character set */ KOI8_R, - Chinese, - EUC_JP, - Shift_JIS, - Korean, - Taipei, - SevenBitApproximations, - ISO_Latin1 /* Maybe... - KW */ + Chinese, /* Chinese */ + EUC_JP, /* Japanese (EUC) */ + Shift_JIS, /* Japanese (SJIS) */ + Korean, /* Korean */ + Taipei, /* Taipei (Big5) */ + SevenBitApproximations, /* Vietnamese (VISCII) */ + SevenBitApproximations, /* 7 Bit Approximations */ + SevenBitApproximations, /* Transparent */ + PC_charset, /* DosLatinUS (cp437) */ + PC_850_charset /* DosLatin1 (cp850) */ }; /* @@ -2074,24 +2066,23 @@ PUBLIC CONST char * LYchar_set_names[MAXCHARSETSP]={ "ISO Latin 1 ", "ISO Latin 2 ", "Other ISO Latin ", + "WinLatin1 (cp1252) ", "DEC Multinational ", - "IBM PC character set", - "IBM PC codepage 850 ", "Macintosh (8 bit) ", "NeXT character set ", - "KOI8-R character set", - "Chinese ", + "KOI8-R Cyrillic ", + "Chinese ", "Japanese (EUC) ", "Japanese (SJIS) ", "Korean ", "Taipei (Big5) ", + "Vietnamese (VISCII) ", "7 bit approximations", "Transparent ", + "IBM PC character set", + "IBM PC codepage 850 ", (char *) 0 }; -#ifdef EXP_CHARTRANS - -PUBLIC int LYNumCharsets = 0; /* will be initialized later by UC_Register... */ /* * Associate additional pieces of info with each of the charsets listed @@ -2107,63 +2098,92 @@ PUBLIC int LYNumCharsets = 0; /* will be initialized later by UC_Register... */ */ PUBLIC LYUCcharset LYCharSet_UC[MAXCHARSETS]= { - {-1,"iso-8859-1", UCT_ENC_8BIT,UCT_REP_IS_LAT1,UCT_CP_IS_LAT1,UCT_R_LAT1, - UCT_R_LAT1}, + /* + * Zero position placeholder and HTMLGetEntityUCValue() reference. - FM + */ + {-1,"iso-8859-1", UCT_ENC_8BIT, + UCT_REP_IS_LAT1, + UCT_CP_IS_LAT1, UCT_R_LAT1,UCT_R_LAT1}, + /* + * Placeholder for Unicode table. + */ {-1,"iso-8859-2", UCT_ENC_8BIT,0,0, UCT_R_LAT1,UCT_R_8859S}, + /* + * For unknown iso-8859-# charsets. - FM + */ {-1,"x-iso-8859-other",UCT_ENC_8BIT,0,0, UCT_R_LAT1,UCT_R_8859S}, - {-1,"dec-mcs", UCT_ENC_8BIT,0,0, UCT_R_LAT1,UCT_R_8859S}, - {-1,"cp437", UCT_ENC_8BIT,0,0, UCT_R_8BIT,UCT_R_ASCII}, - {-1,"cp850", UCT_ENC_8BIT,UCT_REP_SUPERSETOF_LAT1,0, - UCT_R_8BIT,UCT_R_ASCII}, + + /* + * Placeholders for Unicode tables. - FM + */ + {-1,"cp1252", UCT_ENC_8BIT, + UCT_REP_SUPERSETOF_LAT1, + 0, UCT_R_8BIT,UCT_R_ASCII}, + {-1,"dec-mcs", UCT_ENC_8BIT,0,0, UCT_R_8BIT,UCT_R_ASCII}, {-1,"macintosh", UCT_ENC_8BIT,0,0, UCT_R_8BIT,UCT_R_ASCII}, {-1,"x-next", UCT_ENC_8BIT,0,0, UCT_R_8BIT,UCT_R_ASCII}, {-1,"koi8-r", UCT_ENC_8BIT,0,0, UCT_R_8BIT,UCT_R_ASCII}, + /* * There is no strict correlation for the next five, since the tranfer * charset gets decoded into Display Char Set by the CJK code (separate - * from EXP_CHARTRANS mechanism). For now, just put something there for - * MIME charset name. + * from Unicode mechanism). For now we use the MIME name that describes + * what is output to the terminal. - KW */ {-1,"euc-cn", UCT_ENC_CJK,0,0, UCT_R_8BIT,UCT_R_ASCII}, {-1,"euc-jp", UCT_ENC_CJK,0,0, UCT_R_8BIT,UCT_R_ASCII}, {-1,"shift_jis", UCT_ENC_CJK,0,0, UCT_R_8BIT,UCT_R_ASCII}, {-1,"euc-kr", UCT_ENC_CJK,0,0, UCT_R_8BIT,UCT_R_ASCII}, {-1,"big5", UCT_ENC_CJK,0,0, UCT_R_8BIT,UCT_R_ASCII}, - {-1,"us-ascii", UCT_ENC_7BIT,UCT_REP_SUBSETOF_LAT1, - UCT_CP_SUBSETOF_LAT1, - UCT_R_ASCII,UCT_R_ASCII}, - {-1,"x-transparent", UCT_ENC_8BIT,0,0, UCT_R_8BIT,UCT_R_ASCII} -}; -#endif -#if defined(USE_SLANG) || defined(EXP_CHARTRANS) + /* + * Placeholders for Unicode tables. - FM + */ + {-1,"viscii", UCT_ENC_8BIT_C0,0,0, UCT_R_8BIT,UCT_R_ASCII}, + {-1,"us-ascii", UCT_ENC_7BIT, + UCT_REP_SUBSETOF_LAT1, + UCT_CP_SUBSETOF_LAT1, UCT_R_ASCII,UCT_R_ASCII}, + + /* + * Placeholder for non-translation mode. - FM + */ + {-1,"x-transparent", UCT_ENC_8BIT,0,0, UCT_R_8BIT,UCT_R_ASCII}, + + /* + * Placeholders for Unicode tables. + */ + {-1,"cp437", UCT_ENC_8BIT,0,0, UCT_R_8BIT,UCT_R_ASCII}, + {-1,"cp850", UCT_ENC_8BIT, + UCT_REP_SUPERSETOF_LAT1, + 0, UCT_R_8BIT,UCT_R_ASCII} +}; /* * Add the code of the the lowest character with the high bit set * that can be directly displayed. - * Used by SLANG and for EXP_CHARTRANS. + * Used by SLANG and for character translation. * The order of LYCharSets and LYlowest_eightbit MUST be the same. */ PUBLIC int LYlowest_eightbit[MAXCHARSETS]={ - 160, /* ISO Latin 1 */ + 160, /* ISO Latin 1 */ 160, /* ISO Latin 2 */ - 160, /* Other ISO Latin */ - 160, /* DEC Multinational */ - 128, /* IBM PC character set */ - 128, /* IBM PC codepage 850 */ - 128, /* Macintosh (8 bit) */ - 128, /* NeXT character set */ - 128, /* KOI8-R character set */ - 128, /* Chinese */ - 128, /* Japanese (EUC) */ - 128, /* Japanese (SJIS) */ - 128, /* Korean */ - 128, /* Taipei (Big5) */ + 160, /* Other ISO Latin */ + 130, /* WinLatin1 (cp1252) */ + 160, /* DEC Multinational */ + 128, /* Macintosh (8 bit) */ + 128, /* NeXT character set */ + 128, /* KOI8-R Cyrillic */ + 128, /* Chinese */ + 128, /* Japanese (EUC) */ + 128, /* Japanese (SJIS) */ + 128, /* Korean */ + 128, /* Taipei (Big5) */ + 128, /* Vietnamese (VISCII) */ 999, /* 7 bit approximations */ - 128 /* Transparent (???) */ + 128, /* Transparent (???) */ + 128, /* DosLatinUS (cp437) */ + 128 /* DosLatin1 (cp850) */ }; -#endif /* USE_SLANG || EXP_CHARTRANS */ - /* * The default character set. @@ -2181,9 +2201,9 @@ PUBLIC int current_char_set = 0; /* Index for tranaslation */ */ PUBLIC void HTMLSetCharacterHandling ARGS1(int,i) { -#ifdef EXP_CHARTRANS + int chndl = -2; if (LYCharSet_UC[i].enc != UCT_ENC_CJK) { - int chndl = 0; + chndl = 0; if (UCAssume_MIMEcharset) chndl = UCGetLYhndl_byMIME(UCAssume_MIMEcharset); @@ -2211,9 +2231,9 @@ PUBLIC void HTMLSetCharacterHandling ARGS1(int,i) } HTPassHighCtrlNum = FALSE; - } else -#endif /* EXP_CHARTRANS */ - if (!strncmp(LYchar_set_names[i], "ISO Latin 1", 11)) { + +#ifdef NOTDEFINED + } else if (!strncmp(LYchar_set_names[i], "ISO Latin 1", 11)) { HTCJK = NOCJK; kanji_code = NOKANJI; HTPassEightBitRaw = LYUseDefaultRawMode ? TRUE : FALSE; @@ -2222,7 +2242,7 @@ PUBLIC void HTMLSetCharacterHandling ARGS1(int,i) HTPassHighCtrlRaw = FALSE; HTPassHighCtrlNum = FALSE; - } else if (!strncmp(LYchar_set_names[i], "KOI8-R character set", 20)) { + } else if (!strncmp(LYchar_set_names[i], "KOI8-R Cyrillic", 15)) { HTCJK = NOCJK; kanji_code = NOKANJI; HTPassEightBitRaw = LYUseDefaultRawMode ? FALSE : TRUE; @@ -2230,6 +2250,7 @@ PUBLIC void HTMLSetCharacterHandling ARGS1(int,i) HTPassEightBitNum = FALSE; HTPassHighCtrlRaw = TRUE; HTPassHighCtrlNum = FALSE; +#endif /* NOTDEFINED */ } else if (!strncmp(LYchar_set_names[i], "Chinese", 7)) { HTCJK = LYUseDefaultRawMode ? CHINESE : NOCJK; @@ -2286,23 +2307,23 @@ PUBLIC void HTMLSetCharacterHandling ARGS1(int,i) HTPassHighCtrlNum = FALSE; } -#ifdef EXP_CHARTRANS + if (LYRawMode) { + UCLYhndl_for_unspec = i; + } else { + if (chndl == -2) { + chndl = 0; - if (LYRawMode) { - UCLYhndl_for_unspec = i; - } else { - int chndl = 0; - if (UCAssume_MIMEcharset) - chndl = UCGetLYhndl_byMIME(UCAssume_MIMEcharset); - if (chndl != i && chndl >= 0 && - (LYCharSet_UC[i].enc != UCT_ENC_CJK || - LYCharSet_UC[chndl].enc != UCT_ENC_CJK)) { - UCLYhndl_for_unspec = chndl; - } else { - UCLYhndl_for_unspec = 0; - } - } -#endif /* EXP_CHARTRANS */ + if (UCAssume_MIMEcharset) + chndl = UCGetLYhndl_byMIME(UCAssume_MIMEcharset); + } + if (chndl != i && chndl >= 0 && + (LYCharSet_UC[i].enc != UCT_ENC_CJK || + LYCharSet_UC[chndl].enc != UCT_ENC_CJK)) { + UCLYhndl_for_unspec = chndl; + } else { + UCLYhndl_for_unspec = 0; + } + } #ifdef USE_SLANG if (LYlowest_eightbit[i] > 191) { @@ -2344,20 +2365,20 @@ PUBLIC void HTMLSetRawModeDefault ARGS1(int,i) * based on the selected character set and the * current LYRawMode value. - FM */ -PUBLIC void HTMLSetUseDefaultRawMode ARGS2(int,i, BOOLEAN,modeflag) +PUBLIC void HTMLSetUseDefaultRawMode ARGS2( + int, i, + BOOLEAN, modeflag) { -#ifdef EXP_CHARTRANS if (LYCharSet_UC[i].enc != UCT_ENC_CJK) { int chndl = 0; + if (UCAssume_MIMEcharset) chndl = UCGetLYhndl_byMIME(UCAssume_MIMEcharset); if (i == chndl) LYUseDefaultRawMode = modeflag; else LYUseDefaultRawMode = (!modeflag); - } else -#endif /* EXP_CHARTRANS */ - if (!strncmp(LYchar_set_names[i], "ISO Latin 1", 11) || + } else if (!strncmp(LYchar_set_names[i], "ISO Latin 1", 11) || !strncmp(LYchar_set_names[i], "Chinese", 7) || !strncmp(LYchar_set_names[i], "Japanese (EUC)", 14) || !strncmp(LYchar_set_names[i], "Japanese (SJIS)", 15) || @@ -2403,7 +2424,7 @@ PUBLIC void HTMLSetHaveCJKCharacterSet ARGS1(int,i) * Must be in order of ascending value. */ PUBLIC CONST char * LYEntityNames[] = { -/* NAME DECIMAL VALUE */ +/* NAME DECIMAL VALUE */ "nbsp", /* 160, non breaking space */ "iexcl", /* 161, inverted exclamation mark */ "cent", /* 162, cent sign */ @@ -2506,9 +2527,109 @@ PUBLIC CONST char * LYEntityNames[] = { * Function to return the entity names of * ISO-8859-1 8-bit characters. - FM */ -PUBLIC CONST char * HTMLGetEntityName ARGS1(int,i) +PUBLIC CONST char * HTMLGetEntityName ARGS1( + int, i) +{ +#define IntValue i + int MaxValue = ((sizeof(LYEntityNames)/sizeof(char **)) - 1); + + if (IntValue < 0 || IntValue > MaxValue) { + return ""; + } + + return LYEntityNames[IntValue]; +} + +/* + * Function to return the UCode_t (long int) value for entity names + * in the ISO_Latin1 and UC_entity_info extra_entities arrays. It + * returns 0 if not found. - FM + */ +PUBLIC UCode_t HTMLGetEntityUCValue ARGS1( + CONST char *, name) { - return LYEntityNames[i]; + UCode_t value = 0; + int i, high, low, diff = 0; + CONST char ** entities = HTML_dtd.entity_names; + CONST UC_entity_info * extra_entities = HTML_dtd.extra_entity_info; + + /* + * Make sure we have a non-zero length name. - FM + */ + if (!(name && *name)) + return(value); + + /* + * Handle names that have control characters + * or strings in ISO_Latin1[]. - FM + */ + if (!strcmp(name, "nbsp")) { + return(160); + } + if (!strcmp(name, "shy")) { + return(173); + } + if (!strcmp(name, "ensp")) { + return(8194); + } + if (!strcmp(name, "emsp")) { + return(8195); + } + if (!strcmp(name, "thinsp")) { + return(8201); + } + if (!strcmp(name, "ndash") || + !strcmp(name, "endash")) { + return(8211); + } + if (!strcmp(name, "mdash") || + !strcmp(name, "emdash")) { + return(8212); + } + if (!strcmp(name, "trade")) { + return(8482); + } + + /* + * Now try ISO_Latin1[]. - FM + */ + for (low = 0, high = HTML_dtd.number_of_entities; + high > low; + diff < 0 ? (low = i+1) : (high = i)) { + /* + ** Binary search. + */ + i = (low + (high-low)/2); + diff = strcmp(entities[i], name); + if (diff == 0) { + if (strlen(ISO_Latin1[i]) == 1) { + value = (unsigned char)(ISO_Latin1[i][0]); + } + break; + } + } + if (value > 0) { + return(value); + } + + /* + * Not yet found, so try UC_entity_info extra_entities[]. - FM + */ + for (low = 0, high = HTML_dtd.number_of_extra_entities; + high > low; + diff < 0 ? (low = i+1) : (high = i)) { + /* + ** Binary search. + */ + i = (low + (high-low)/2); + diff = strcmp(extra_entities[i].name, name); + if (diff == 0) { + value = extra_entities[i].code; + break; + } + } + + return(value); } /* @@ -2519,7 +2640,7 @@ PUBLIC CONST char * HTMLGetEntityName ARGS1(int,i) * * Return '\0' to signal that there isn't a one-character * equivalent. Caller must check! and do whatever additional - * processing it wants to do instead. - kw + * processing it wants to do instead. - KW */ PUBLIC char HTMLGetLatinOneValue ARGS1(int,i) { @@ -2575,10 +2696,11 @@ PUBLIC int LYCharSetsDeclared NOPARAMS UCInit(); status = UCInitialized; - if (UCAssume_MIMEcharset && *UCAssume_MIMEcharset) + if (UCAssume_MIMEcharset && *UCAssume_MIMEcharset) { UCLYhndl_for_unspec = UCGetLYhndl_byMIME(UCAssume_MIMEcharset); - else + } else { UCLYhndl_for_unspec = 0; + } if (UCAssume_localMIMEcharset && *UCAssume_localMIMEcharset) UCLYhndl_HTFile_for_unspec = UCGetLYhndl_byMIME(UCAssume_localMIMEcharset); diff --git a/src/LYCharSets.h b/src/LYCharSets.h index ca53bb79..bd873fb7 100644 --- a/src/LYCharSets.h +++ b/src/LYCharSets.h @@ -2,7 +2,13 @@ #ifndef LYCHARSETS_H #define LYCHARSETS_H -/* LYchar_set_name[current_char_set] points to the currently active set */ +#ifndef UCMAP_H +#include "UCMap.h" +#endif /* !UCMAP_H */ + +/* + * LYchar_set_name[current_char_set] points to the currently active set. + */ extern int current_char_set; extern CONST char * LYchar_set_names[]; @@ -28,6 +34,7 @@ extern void HTMLSetUseDefaultRawMode PARAMS((int i, BOOLEAN modeflag)); extern void HTMLSetHaveCJKCharacterSet PARAMS((int i)); extern CONST char * LYEntityNames[]; extern CONST char * HTMLGetEntityName PARAMS((int i)); +extern UCode_t HTMLGetEntityUCValue PARAMS((CONST char *name)); extern char HTMLGetLatinOneValue PARAMS((int i)); extern void HTMLUseCharacterSet PARAMS((int i)); diff --git a/src/LYCharUtils.c b/src/LYCharUtils.c index e41a9047..8b72ffac 100644 --- a/src/LYCharUtils.c +++ b/src/LYCharUtils.c @@ -1032,7 +1032,7 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( char * cp = 0; char cpe = 0; char *esc = NULL; - char replace_buf[64]; + char replace_buf [64]; int uck; int lowest_8; UCode_t code = 0; @@ -1042,15 +1042,15 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( int high, low, diff = 0, i; CONST char ** entities = HTML_dtd.entity_names; CONST UC_entity_info * extra_entities = HTML_dtd.extra_entity_info; - CONST char * name = 0; + CONST char * name = NULL; BOOLEAN no_bytetrans; UCTransParams T; BOOL from_is_utf8 = FALSE; char * puni; enum _state { S_text, S_esc, S_dollar, S_paren, S_nonascii_text, S_dollar_paren, - S_trans_byte, S_check_ent, S_ncr, S_check_uni, S_check_name, S_named, - S_recover, + S_trans_byte, S_check_ent, S_ncr, S_check_uni, S_named, S_check_name, + S_check_name_trad, S_recover, S_got_oututf8, S_got_outstring, S_put_urlstring, S_got_outchar, S_put_urlchar, S_next_char, S_done} state = S_text; enum _parsing_what @@ -1261,11 +1261,11 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( case S_trans_byte: /* character translation goes here */ /* - ** Don't do anything if we have no string, - ** or if original AND target character sets - ** are both iso-8859-1, - ** or if we are in CJK mode. - */ + ** Don't do anything if we have no string, + ** or if original AND target character sets + ** are both iso-8859-1, + ** or if we are in CJK mode. + */ if (*p == '\0' || no_bytetrans) { state = S_got_outchar; break; @@ -1370,7 +1370,7 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( ** Check for a numeric entity. - FM */ if (*pp == '#' && len > 2 && - (unsigned char)*(pp+1) == 'x' && + (*(pp+1) == 'x' || *(pp+1) == 'X') && (unsigned char)*(pp+2) < 127 && isxdigit((unsigned char)*(pp+2))) { what = P_hex; @@ -1596,6 +1596,21 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( } break; /* + ** Ignore 8204 (zwnj), 8205 (zwj) + ** 8206 (lrm), and 8207 (rlm), + ** for now, if we got this far without + ** finding a representation for them. + */ + } else if (code == 8204 || code == 8205 || + code == 8206 || code == 8207) { + if (TRACE) { + fprintf(stderr, + "LYUCFullyTranslateString: Ignoring '%ld'.\n", code); + } + replace_buf[0] = '\0'; + state = S_got_outstring; + break; + /* ** Show the numeric entity if the value: ** (1) Is greater than 255 and unhandled Unicode. */ @@ -1635,7 +1650,7 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( } else { name = HTMLGetEntityName(code - 160); } - state = S_check_name; + state = S_check_name_trad; break; } @@ -1689,7 +1704,11 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( state = S_check_name; break; - case S_check_name: + case S_check_name_trad: + /* + * Check for an entity name in the traditional (LYCharSets.c) + * table. + */ for (low = 0, high = HTML_dtd.number_of_entities; high > low; diff < 0 ? (low = i+1) : (high = i)) { @@ -1704,53 +1723,6 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( LYCharSets[cs_to][i] : LYCharSets[0][i], sizeof(replace_buf)); replace_buf[sizeof(replace_buf) - 1] = '\0'; - if (hidden) { - /* - ** If it's hidden, use 160 for nbsp. - FM - */ - if (!strcmp("nbsp", entities[i]) || - (replace_buf[1] == '\0' && - replace_buf[0] == HT_NON_BREAK_SPACE)) { - replace_buf[0] = 160; - replace_buf[1] = '\0'; - state = S_got_outstring; - break; - /* - ** If it's hidden, use 173 for shy. - FM - */ - } else if (!strcmp("shy", entities[i]) || - (replace_buf[1] == '\0' && - replace_buf[0] == LY_SOFT_HYPHEN)) { - replace_buf[0] = 173; - replace_buf[1] = '\0'; - state = S_got_outstring; - break; - } - /* - ** Check whether we want a plain space for nbsp, - ** ensp, emsp or thinsp. - FM - */ - } else if (plain_space) { - if (!strcmp("nbsp", entities[i]) || - !strcmp("emsp", entities[i]) || - !strcmp("ensp", entities[i]) || - !strcmp("thinsp", entities[i]) || - (replace_buf[1] == '\0' && - replace_buf[0] == HT_EM_SPACE)) { - code = ' '; - state = S_got_outchar; - break; - /* - ** If plain_space is set, ignore shy. - FM - */ - } else if (!strcmp("shy", entities[i]) || - (replace_buf[1] == '\0' && - replace_buf[0] == LY_SOFT_HYPHEN)) { - replace_buf[0] = '\0'; - state = S_got_outstring; - break; - } - } /* ** Found the entity. If the length ** of the value exceeds the length of @@ -1765,19 +1737,65 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( } /* ** Entity name lookup failed (diff != 0). - ** No point in repeating for extra entities. - kw + ** Recover and continue. */ - if (what != P_named) { + state = S_recover; + break; + + case S_check_name: + /* + * Check for a name that was really given as a named + * entity. - kw + */ + if (hidden) { /* - ** Didn't find the entity. - ** Recover the "&#" and continue - ** from there. - FM + ** If it's hidden, use 160 for nbsp. - FM */ - state = S_recover; - break; + if (!strcmp("nbsp", name) || + (replace_buf[1] == '\0' && + replace_buf[0] == HT_NON_BREAK_SPACE)) { + replace_buf[0] = 160; + replace_buf[1] = '\0'; + state = S_got_outstring; + break; + /* + ** If it's hidden, use 173 for shy. - FM + */ + } else if (!strcmp("shy", name) || + (replace_buf[1] == '\0' && + replace_buf[0] == LY_SOFT_HYPHEN)) { + replace_buf[0] = 173; + replace_buf[1] = '\0'; + state = S_got_outstring; + break; + } + /* + ** Check whether we want a plain space for nbsp, + ** ensp, emsp or thinsp. - FM + */ + } else if (plain_space) { + if (!strcmp("nbsp", name) || + !strcmp("emsp", name) || + !strcmp("ensp", name) || + !strcmp("thinsp", name) || + (replace_buf[1] == '\0' && + replace_buf[0] == HT_EM_SPACE)) { + code = ' '; + state = S_got_outchar; + break; + /* + ** If plain_space is set, ignore shy. - FM + */ + } else if (!strcmp("shy", name) || + (replace_buf[1] == '\0' && + replace_buf[0] == LY_SOFT_HYPHEN)) { + replace_buf[0] = '\0'; + state = S_got_outstring; + break; + } } /* - ** Not found, repeat for extra entities. - FM + ** Not recognized specially, look up in extra entities table. */ for (low = 0, high = HTML_dtd.number_of_extra_entities; high > low; @@ -1802,11 +1820,21 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( } if (diff == 0) break; + + /* + ** Seek the Unicode value for the entity. + ** This could possibly replace all the rest of + ** `case S_check_name'. - kw + */ + if ((code = HTMLGetEntityUCValue(name)) > 0) { + state = S_check_uni; + break; + } /* ** Didn't find the entity. - ** Recover. + ** Check the traditional tables. */ - state = S_recover; + state = S_check_name_trad; break; /* * * O U T P U T S T A T E S * * */ @@ -2003,7 +2031,7 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( ** Check for a numeric entity. - FM */ if (*p == '#' && len > 2 && - (unsigned char)*(p+1) == 'x' && + TOLOWER((unsigned char)*(p+1)) == 'x' && (unsigned char)*(p+2) < 127 && isxdigit((unsigned char)*(p+2))) { isHex = TRUE; @@ -2302,7 +2330,6 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( ** the character as a named entity. - FM */ } else { - CONST char * name; if (code == 8482) { /* ** Trade mark sign falls through to here. - KW @@ -2322,11 +2349,7 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( diff = strcmp(entities[i], name); if (diff == 0) { /* - ** Found the entity. Assume that the length - ** of the value does not exceed the length of - ** the raw entity, so that the overall string - ** does not need to grow. Make sure this stays - ** true in the LYCharSets arrays. - FM + ** Found the entity. */ int j; for (j = 0; p_entity_values[i][j]; j++) @@ -3361,16 +3384,6 @@ PUBLIC void LYHandleMETA ARGS4( *cp4 = '\0'; cp4 = cp3; chndl = UCGetLYhndl_byMIME(cp3); - if (chndl < 0) { - if (!strcmp(cp4, "cn-big5")) { - cp4 += 3; - chndl = UCGetLYhndl_byMIME(cp4); - } else if (!strncmp(cp4, "cn-gb", 5)) { - StrAllocCopy(cp3, "gb2312"); - cp4 = cp3; - chndl = UCGetLYhndl_byMIME(cp4); - } - } if (UCCanTranslateFromTo(chndl, current_char_set)) { chartrans_ok = YES; StrAllocCopy(me->node_anchor->charset, cp4); @@ -3482,7 +3495,7 @@ PUBLIC void LYHandleMETA ARGS4( } else if (!strncmp(cp1, "koi8-r", 6) && !strncmp(LYchar_set_names[current_char_set], - "KOI8-R character set", 20)) { + "KOI8-R Cyrillic", 15)) { StrAllocCopy(me->node_anchor->charset, "koi8-r"); HTPassEightBitRaw = TRUE; @@ -3884,9 +3897,14 @@ PUBLIC void LYHandleSELECT ARGS5( } /* - * Too likely to cause a crash, so we'll ignore it. - FM - */ + * We should have covered all crash possibilities with the + * current TagSoup parser, so we'll allow it because some + * people with other browsers use SELECT for "information" + * popups, outside of FORM blocks, though no Lynx user + * would do anything that awful, right? - FM + *//*** return; + ***/ } /* @@ -4051,9 +4069,11 @@ PUBLIC void LYHandleSELECT ARGS5( /* * Add end option character. */ - HText_appendCharacter(me->text, ']'); - HText_setLastChar(me->text, ']'); - me->in_word = YES; + if (!me->first_option) { + HText_appendCharacter(me->text, ']'); + HText_setLastChar(me->text, ']'); + me->in_word = YES; + } HText_setIgnoreExcess(me->text, FALSE); } HTChunkClear(&me->option); diff --git a/src/LYCurses.c b/src/LYCurses.c index b4738a0f..23daf95a 100644 --- a/src/LYCurses.c +++ b/src/LYCurses.c @@ -7,9 +7,8 @@ #include "LYSignal.h" #include "LYClean.h" #include "LYStrings.h" -#ifdef USE_SLANG #include "LYCharSets.h" -#endif /* USE_SLANG */ +#include "UCAux.h" #include "LYexit.h" #include "LYLeaks.h" @@ -67,10 +66,12 @@ PUBLIC int PHYSICAL_SLtt_Screen_Cols = 10; PUBLIC void LY_SLrefresh NOARGS { if (FullRefresh) { - SLsmg_touch_lines(0, LYlines); + SLsmg_suspend_smg(); + SLsmg_resume_smg(); FullRefresh = FALSE; + } else { + SLsmg_refresh(); } - SLsmg_refresh(); return; } @@ -171,6 +172,82 @@ PRIVATE void sl_suspend ARGS1( #endif /* SIGSTOP */ return; } + +#else /* Not slang: */ + +#ifdef VMS +/* +** This function boxes windows with graphic characters for +** VMS curses. Pass it the window, it's height, and it's +** width. - FM +*/ +PUBLIC void VMSbox ARGS3( + WINDOW *, win, + int, height, + int, width) +{ + int i; + + wmove(win, 0, 0); + waddstr(win, "\033)0\016l"); + for (i = 1; i < width; i++) + waddch(win, 'q'); + waddch(win, 'k'); + for (i = 1; i < height-1; i++) { + wmove(win, i, 0); + waddch(win, 'x'); + wmove(win, i, width-1); + waddch(win, 'x'); + } + wmove(win, i, 0); + waddch(win, 'm'); + for (i = 1; i < width; i++) + waddch(win, 'q'); + waddstr(win, "j\017"); +} +#else +/* +** This function boxes windows for non-VMS (n)curses. +** Pass it the window. - FM +*/ +PUBLIC void LYbox ARGS2( + WINDOW *, win, + BOOLEAN, formfield) +{ + /* + * If the terminal is in UTF-8 mode, it probably cannot understand + * box drawing characters as (n)curses handles them. (This may also + * be true for other display character sets, but isn't currently + * checked.) In that case, substitute ASCII characters for BOXVERT + * and BOXHORI if they were defined to 0 for automatic use of box + * drawing characters. They'll stay as they are otherwise. - KW & FM + */ + int boxvert, boxhori; + + UCSetBoxChars(current_char_set, &boxvert, &boxhori, BOXVERT, BOXHORI); +#ifdef CSS + if (formfield) + wcurses_css(win, "frame", ABS_ON); +#endif + /* + * If we don't have explicitly specified characters for either + * vertical or horizontal lines, the characters that box() would + * use for the corners probably also won't work well. So we + * specifiy our own ASCII characters for the corners and call + * wborder() instead of box(). - kw + */ + if (!boxvert || !boxhori) + box(win, boxvert, boxhori); + else if (boxvert == '*' || boxhori == '*') + wborder(win, boxvert, boxvert, boxhori, boxhori, '*', '*', '*', '*'); + else + wborder(win, boxvert, boxvert, boxhori, boxhori, '/', '\\', '\\', '/'); +#ifdef CSS + if (formfield) + wcurses_css(win, "frame", ABS_OFF); +#endif +} +#endif /* VMS */ #endif /* USE_SLANG */ #if defined(USE_COLOR_STYLE) @@ -912,6 +989,27 @@ PUBLIC BOOLEAN setup ARGS1( { static char term_putenv[120]; char buffer[120]; +#if !defined(NO_SIZECHANGEHACK) +#if defined(HAVE_SIZECHANGE) && !defined(USE_SLANG) +/* + * Hack to deal with a problem in sysV curses, that screen can't be + * resized to greater than the size used by initscr, which can only + * be called once. So set environment variables LINES and COLUMNS + * to some suitably large size to force initscr to allocate enough + * space. Later we get the real window size for setting LYlines + * and LYcols. - AJL & FM + */ + char *lines_putenv = NULL; + char *cols_putenv = NULL; + + if (getenv("LINES") == NULL && getenv("COLUMNS") == NULL) { + StrAllocCopy(lines_putenv, "LINES=120"); + (void) putenv(lines_putenv); + StrAllocCopy(cols_putenv, "COLUMNS=240"); + (void) putenv(cols_putenv); + } +#endif /* !NO_SIZECHANGE && !USE_SLANG */ +#endif /* !NO_SIZECHANGEHACK */ /* * If the display was not set by a command line option then @@ -958,8 +1056,26 @@ PUBLIC BOOLEAN setup ARGS1( } #endif /* HAVE_TTYTYPE */ +#if defined(HAVE_SIZECHANGE) && !defined(USE_SLANG) && !defined(NO_SIZECHANGEHACK) + if (lines_putenv != NULL) { + /* + * Use SIGWINCH handler to set the true window size. - AJL && FM + */ + size_change(0); + lines_putenv[6] = '\0'; + (void) putenv(lines_putenv); + cols_putenv[8] = '\0'; + (void) putenv(cols_putenv); + FREE(lines_putenv); + FREE(cols_putenv); + } else { + LYlines = LINES; + LYcols = COLS; + } +#else LYlines = LINES; LYcols = COLS; +#endif /* !NO_SIZECHANGE && !USE_SLANG && !NO_SIZECHANGEHACK */ if (LYlines <= 0) LYlines = 24; if (LYcols <= 0) @@ -1522,37 +1638,6 @@ PUBLIC int DCLsystem ARGS1( */ return(status); } - -#ifndef USE_SLANG -/* -** This function boxes windows with graphic characters for curses. -** Pass it the window, it's height, and it's width. - FM -*/ -PUBLIC void VMSbox ARGS3( - WINDOW *, win, - int, height, - int, width) -{ - int i; - - wmove(win, 0, 0); - waddstr(win, "\033)0\016l"); - for (i = 1; i < width; i++) - waddch(win, 'q'); - waddch(win, 'k'); - for (i = 1; i < height-1; i++) { - wmove(win, i, 0); - waddch(win, 'x'); - wmove(win, i, width-1); - waddch(win, 'x'); - } - wmove(win, i, 0); - waddch(win, 'm'); - for (i = 1; i < width; i++) - waddch(win, 'q'); - waddstr(win, "j\017"); -} -#endif /* !USE_SLANG */ #endif /* VMS */ PUBLIC void lynx_force_repaint NOARGS diff --git a/src/LYCurses.h b/src/LYCurses.h index 89d64c5e..7247b7e5 100644 --- a/src/LYCurses.h +++ b/src/LYCurses.h @@ -24,6 +24,9 @@ #if defined(UNIX) && !defined(unix) #define unix #endif /* UNIX && !unix */ +#ifdef va_start +#undef va_start /* not used, undef to avoid warnings on some systems */ +#endif /* va_start */ #include <slang.h> #else /* Using curses: */ @@ -74,6 +77,11 @@ extern void LYsubwindow PARAMS((WINDOW * param)); # endif /* VMS && __GNUC__ */ #endif /* HAVE_CONFIG_H */ +#ifdef VMS +extern void VMSbox PARAMS((WINDOW *win, int height, int width)); +#else +extern void LYbox PARAMS((WINDOW *win, BOOLEAN formfield)); +#endif /* VMS */ #endif /* USE_SLANG */ @@ -120,9 +128,6 @@ extern int ttopen(); extern int ttclose(); extern int ttgetc(); extern void *VMSsignal PARAMS((int sig, void (*func)())); -#ifndef USE_SLANG -extern void VMSbox PARAMS((WINDOW *win, int height, int width)); -#endif /* !USE_SLANG */ #endif /* VMS */ #if defined(USE_COLOR_STYLE) diff --git a/src/LYEditmap.c b/src/LYEditmap.c index 4cba958a..cf42ddd2 100644 --- a/src/LYEditmap.c +++ b/src/LYEditmap.c @@ -28,7 +28,7 @@ LYE_NOP, LYE_ENTER, LYE_FORWW, LYE_ABORT, LYE_BACKW, LYE_NOP, LYE_DELN, LYE_NOP, /* ^P XON ^R XOFF */ -LYE_UPPER, LYE_ERASE, LYE_NOP, LYE_NOP, +LYE_UPPER, LYE_ERASE, LYE_LINKN, LYE_NOP, /* ^T ^U ^V ^W */ LYE_ERASE, LYE_NOP, LYE_NOP, LYE_NOP, diff --git a/src/LYForms.c b/src/LYForms.c index d9dee701..df0b3b1c 100644 --- a/src/LYForms.c +++ b/src/LYForms.c @@ -119,14 +119,14 @@ PUBLIC int change_form_link ARGS6( form->value_cs = opt_ptr->value_cs; } #if defined(FANCY_CURSES) || defined(USE_SLANG) - if (!enable_scrollback) + if (!enable_scrollback) #if defined(VMS) && !defined(USE_SLANG) - if (form->num_value == OrigNumValue) - c = DO_NOTHING; - else + if (form->num_value == OrigNumValue) + c = DO_NOTHING; + else #endif /* VMS && !USE_SLANG*/ c = 23; /* CTRL-W refresh without clearok */ - else + else #endif /* FANCY_CURSES || USE_SLANG */ c = 12; /* CTRL-L for repaint */ break; @@ -358,6 +358,12 @@ again: action = EditBinding(ch); if (action == LYE_ENTER) break; + if (action == LYE_LINKN) { + if ((ch = LYReverseKeymap(LYK_F_LINK_NUM)) <= 0) { + ch = DO_NOTHING; + } + break; + } if (action == LYE_AIX && (HTCJK == NOCJK && LYlowest_eightbit[current_char_set] > 0x97)) break; @@ -369,7 +375,7 @@ again: return(DO_NOTHING); } if (keymap[ch + 1] == LYK_REFRESH) - goto breakfor; + break; switch (ch) { case DNARROW: case UPARROW: @@ -749,17 +755,7 @@ redraw: #ifdef VMS VMSbox(form_window, (bottom - top), (width + 4)); #else - { - int boxvert, boxhori; - UCSetBoxChars(current_char_set, &boxvert, &boxhori, BOXVERT, BOXHORI); -#ifdef CSS - wcurses_css(form_window, "frame", ABS_ON); - box(form_window, boxvert, boxhori); - wcurses_css(form_window, "frame", ABS_OFF); -#else - box(form_window, boxvert, boxhori); -#endif - } + LYbox(form_window, TRUE); #endif /* VMS */ wrefresh(form_window); #endif /* USE_SLANG */ diff --git a/src/LYKeymap.c b/src/LYKeymap.c index 1ceef654..14b80ef1 100644 --- a/src/LYKeymap.c +++ b/src/LYKeymap.c @@ -761,7 +761,8 @@ PUBLIC void reset_numbers_as_arrows NOARGS did_number_keys = FALSE; } -PUBLIC char *key_for_func ARGS1 (int,func) +PUBLIC char *key_for_func ARGS1 ( + int, func) { static char buf[512]; size_t i; @@ -779,16 +780,34 @@ PUBLIC char *key_for_func ARGS1 (int,func) /* * This function returns TRUE if the ch is non-alphanumeric - * and maps to keyname (LYK_foo in the keymap[] array). - FM + * and maps to key_name (LYK_foo in the keymap[] array). - FM */ PUBLIC BOOL LYisNonAlnumKeyname ARGS2( int, ch, - int, keyname) + int, key_name) { if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'z') || ch < 0 || ch > 269) return (FALSE); - return(keymap[ch+1] == keyname); + return(keymap[ch+1] == key_name); +} + +/* + * This function returns the (int)ch mapped to the + * LYK_foo value passed to it as an argument. - FM + */ +PUBLIC int LYReverseKeymap ARGS1( + int, key_name) +{ + int i; + + for (i = 1; i < sizeof(keymap); i++) { + if (keymap[i] == key_name) { + return(i - 1); + } + } + + return(0); } diff --git a/src/LYKeymap.h b/src/LYKeymap.h index a426d2ba..133e1f2f 100644 --- a/src/LYKeymap.h +++ b/src/LYKeymap.h @@ -11,7 +11,8 @@ extern void set_numbers_as_arrows NOPARAMS; extern void reset_numbers_as_arrows NOPARAMS; extern void print_keymap PARAMS((char **newfile)); extern char *key_for_func PARAMS((int func)); -extern BOOLEAN LYisNonAlnumKeyname PARAMS((int ch, int Keyname)); +extern BOOLEAN LYisNonAlnumKeyname PARAMS((int ch, int key_name)); +extern int LYReverseKeymap PARAMS((int key_name)); extern char keymap[]; /* main keymap matrix */ diff --git a/src/LYMainLoop.c b/src/LYMainLoop.c index 11020387..d73b9a22 100644 --- a/src/LYMainLoop.c +++ b/src/LYMainLoop.c @@ -175,6 +175,7 @@ int mainloop NOARGS BOOLEAN FirstURLRecall = TRUE; char *temp = NULL; BOOLEAN ForcePush = FALSE; + BOOLEAN override_LYresubmit_posts = FALSE; unsigned int len; int i; @@ -332,10 +333,7 @@ try_again: } else if (!newdoc.address) { /* * If newdoc.address is empty then pop a file - * and load it. Force a no_cache override unless - * it's a bookmark file, or it has POST content - * and LYresubmit_posts is set without safe also - * set. + * and load it. - FM */ LYpop(&newdoc); popped_doc = TRUE; @@ -366,15 +364,27 @@ try_again: try_internal = TRUE; } else #endif /* TRACK_INTERNAL_LINKS */ + /* + * Force a no_cache override unless + * it's a bookmark file, or it has POST content + * and LYresubmit_posts is set without safe also + * set, and we are not going to another position + * in the current document or restoring the previous + * document due to a NOT_FOUND or NULLFILE return + * value from getfile(). - FM + */ if ((newdoc.bookmark != NULL) || - (newdoc.post_data != NULL && !newdoc.safe && + (newdoc.post_data != NULL && + !newdoc.safe && LYresubmit_posts && + !override_LYresubmit_posts && NO_INTERNAL_OR_DIFFERENT(&curdoc, &newdoc))) { LYoverride_no_cache = FALSE; } else { LYoverride_no_cache = TRUE; } } + override_LYresubmit_posts = FALSE; if (HEAD_request) { /* @@ -709,6 +719,10 @@ try_again: */ if (history[nhist - 1].post_data && !history[nhist - 1].safe) { + if (HText_POSTReplyLoaded((document *)&history[(nhist - 1)])) { + override_LYresubmit_posts = TRUE; + goto try_again; + } /* Set newdoc fields, just in case the PREV_DOC * gets cancelled. - kw */ if (!curdoc.address) { @@ -740,6 +754,7 @@ try_again: cmd = LYK_PREV_DOC; goto new_cmd; } + override_LYresubmit_posts = TRUE; goto try_again; break; @@ -939,8 +954,9 @@ try_again: } /* end switch */ if (TRACE) { - if (!LYTraceLogFP || trace_mode_flag) + if (!LYTraceLogFP || trace_mode_flag) { sleep(AlertSecs); /* allow me to look at the results */ + } } /* @@ -1149,8 +1165,9 @@ try_again: } if (TRACE) { refresh_screen = TRUE; - if (!LYTraceLogFP || trace_mode_flag) + if (!LYTraceLogFP || trace_mode_flag) { sleep(AlertSecs); + } } } @@ -1905,7 +1922,7 @@ new_cmd: /* LYUCPushAssumed(HTMainAnchor); HTOutputFormat = WWW_SOURCE; } - HTuncache_current_document(); + LYforce_no_cache = TRUE; FREE(curdoc.address); /* so it doesn't get pushed */ break; @@ -1929,7 +1946,11 @@ new_cmd: /* HTOutputFormat = WWW_SOURCE; } HEAD_request = HTLoadedDocumentIsHEAD(); - HTuncache_current_document(); + if (real_cmd == LYK_RELOAD) { + HTuncache_current_document(); + } else { + LYforce_no_cache = TRUE; + } #ifdef NO_ASSUME_SAME_DOC /* * Don't assume the reloaded document will be the same. - FM @@ -3606,7 +3627,6 @@ check_goto_URL: sleep(InfoSecs); } else { - LYforce_no_cache = TRUE; StrAllocCopy(newdoc.address, curdoc.address); if (((strcmp(CurrentUserAgent, (LYUserAgent ? LYUserAgent : "")) || @@ -3637,7 +3657,6 @@ check_goto_URL: HTOutputFormat = WWW_SOURCE; } HEAD_request = HTLoadedDocumentIsHEAD(); - HTuncache_current_document(); #ifdef NO_ASSUME_SAME_DOC newdoc.line = 1; newdoc.link = 0; @@ -3647,7 +3666,8 @@ check_goto_URL: newdoc.link = ((curdoc.link > -1) ? curdoc.link : 0); #endif /* NO_ASSUME_SAME_DOC */ - FREE(curdoc.address); + LYforce_no_cache = TRUE; + FREE(curdoc.address); /* So it doesn't get pushed. */ } } keypad_mode_flag = keypad_mode; @@ -3974,8 +3994,7 @@ check_goto_URL: if (local_create(&curdoc)) { HTuncache_current_document(); StrAllocCopy(newdoc.address, curdoc.address); - FREE(curdoc.address); - FREE(curdoc.address); + FREE(curdoc.address); FREE(newdoc.post_data); FREE(newdoc.post_content_type); FREE(newdoc.bookmark); diff --git a/src/LYMap.c b/src/LYMap.c index 1c2a194e..db491280 100644 --- a/src/LYMap.c +++ b/src/LYMap.c @@ -429,7 +429,11 @@ PRIVATE int LYLoadIMGmap ARGS4 ( if (!theList) { if (anAnchor->post_data && !WWWDoc.safe && ((underlying && underlying->document && !LYforce_no_cache) || - HTConfirm("LYNXIMGMAP: " CONFIRM_POST_RESUBMISSION) != TRUE)) { + HTConfirm( +#if __STDC__ + "LYNXIMGMAP: " +#endif + CONFIRM_POST_RESUBMISSION) != TRUE)) { HTAlert("Image map from POST response not available!"); return(HT_NOT_LOADED); } @@ -466,7 +470,11 @@ PRIVATE int LYLoadIMGmap ARGS4 ( if (!(theMap && theMap->elements)) { if (anAnchor->post_data && !WWWDoc.safe && ((underlying && underlying->document && !LYforce_no_cache) || - HTConfirm("LYNXIMGMAP: " CONFIRM_POST_RESUBMISSION) != TRUE)) { + HTConfirm( +#if __STDC__ + "LYNXIMGMAP: " +#endif + CONFIRM_POST_RESUBMISSION) != TRUE)) { HTAlert("Image map from POST response not available!"); return(HT_NOT_LOADED); } diff --git a/src/LYOptions.c b/src/LYOptions.c index 8eaa64a0..8cc9886c 100644 --- a/src/LYOptions.c +++ b/src/LYOptions.c @@ -2328,11 +2328,7 @@ redraw: #ifdef VMS VMSbox(form_window, (bottom - top), (Lnum + width + 4)); #else - { - int boxvert, boxhori; - UCSetBoxChars(current_char_set, &boxvert, &boxhori, BOXVERT, BOXHORI); - box(form_window, boxvert, boxhori); - } + LYbox(form_window, FALSE); #endif /* VMS */ wrefresh(form_window); #endif /* USE_SLANG */ diff --git a/src/LYStrings.c b/src/LYStrings.c index f11b2e7f..04cad25f 100644 --- a/src/LYStrings.c +++ b/src/LYStrings.c @@ -153,6 +153,37 @@ PUBLIC char * LYmbcsstrncpy ARGS5(char *,dst, CONST char *,src, int,n_bytes, #endif /* EXP_CHARTRANS */ /* + * LYmbcs_skip_glyphs() skips a given number of display positions + * in a string and returns the resulting pointer. It takes account + * of UTF-8 encoded characters. - kw + */ +PUBLIC char * LYmbcs_skip_glyphs ARGS3( + char *, data, + int, n_glyphs, + BOOL, utf_flag) +{ + int i_glyphs = 0; + + if (n_glyphs < 0) + n_glyphs = 0; + + if (!data) + return NULL; + if (!utf_flag) + return (data + n_glyphs); + + while(*data) { + if (IS_NEW_GLYPH(*data)) { + if (i_glyphs++ >= n_glyphs) { + return data; + } + } + data++; + } + return data; +} + +/* * LYmbcsstrlen() returns the printable length of a string * that might contain IsSpecial or multibyte (CJK or UTF8) * characters. - FM @@ -190,6 +221,7 @@ PUBLIC int LYmbcsstrlen ARGS2( return(len); } + #undef GetChar #ifdef USE_SLANG @@ -506,8 +538,9 @@ re_read: default: if (TRACE) { fprintf(stderr,"Unknown key sequence: %d:%d:%d\n",c,b,a); - if (!LYTraceLogFP) + if (!LYTraceLogFP) { sleep(MessageSecs); + } } } if (isdigit(a) && (b == '[' || c == 155) && d != -1 && d != '~') @@ -1053,8 +1086,16 @@ again: return(-1); break; + case LYE_LINKN: + /* + * Used only in form_getstr() for invoking + * the LYK_F_LINK_NUM prompt when in form + * text fields. - FM + */ + break; + default: - LYLineEdit(&MyEdit,ch, FALSE); + LYLineEdit(&MyEdit, ch, FALSE); } } } diff --git a/src/LYStrings.h b/src/LYStrings.h index 5dc64c06..e6d7f178 100644 --- a/src/LYStrings.h +++ b/src/LYStrings.h @@ -38,6 +38,10 @@ extern char * LYno_attr_mbcs_case_strstr PARAMS(( extern int LYmbcsstrlen PARAMS(( char * str, BOOL utf_flag)); +extern char * LYmbcs_skip_glyphs PARAMS(( + char * data, + int n_glyphs, + BOOL utf_flag)); extern char * LYmbcsstrncpy PARAMS(( char * dst, CONST char * src, @@ -140,7 +144,9 @@ typedef struct _EditFieldData { #define LYE_LOWER (LYE_BACKW +1) /* Lower case the line */ #define LYE_UPPER (LYE_LOWER +1) /* Upper case the line */ -#define LYE_AIX (LYE_UPPER +1) /* Hex 97 */ +#define LYE_LINKN (LYE_UPPER +1) /* Invoke F_LINK_NUM */ + +#define LYE_AIX (LYE_LINKN +1) /* Hex 97 */ extern void LYSetupEdit PARAMS(( EditFieldData * edit, diff --git a/src/LYUtils.c b/src/LYUtils.c index 47a2fca1..f4ba7728 100644 --- a/src/LYUtils.c +++ b/src/LYUtils.c @@ -526,6 +526,13 @@ PUBLIC void highlight ARGS3( * within the hightext. - FM */ data = (Data + (offset - Offset)); + if (!utf_flag) { + data = Data + (offset - Offset); + } else { + data = LYmbcs_skip_glyphs(Data, + (offset - Offset), + utf_flag); + } if ((case_sensitive ? (cp = LYno_attr_mbcs_strstr(data, target, @@ -574,7 +581,14 @@ highlight_hit_within_hightext: (flag == ON ? (hLen - 1) : hLen)) { goto highlight_search_hightext2; } - data += (Offset - offset); + if (!utf_flag) { + data += (Offset - offset); + } else { + refresh(); + data = LYmbcs_skip_glyphs(data, + (Offset - offset), + utf_flag); + } offset = Offset; itmp = 0; written = 0; @@ -793,7 +807,13 @@ highlight_hit_within_hightext: * See if we have another hit that starts * within the hightext. - FM */ - data = (Data + (offset - Offset)); + if (!utf_flag) { + data = Data + (offset - Offset); + } else { + data = LYmbcs_skip_glyphs(Data, + (offset - Offset), + utf_flag); + } if ((case_sensitive ? (cp = LYno_attr_mbcs_strstr(data, target, @@ -826,7 +846,14 @@ highlight_hit_within_hightext: if ((HitOffset + (offset + tLen)) >= (hoffset + hLen)) { offset = (HitOffset + offset); - data = (buffer + (offset - hoffset)); + if (!utf_flag) { + data = buffer + (offset - hoffset); + } else { + refresh(); + data = LYmbcs_skip_glyphs(buffer, + (offset - hoffset), + utf_flag); + } move(hLine, offset); itmp = 0; written = 0; @@ -1233,6 +1260,13 @@ highlight_search_hightext2: * See if we have another hit that starts * within the hightext2. - FM */ + if (!utf_flag) { + data = Data + (offset - Offset); + } else { + data = LYmbcs_skip_glyphs(Data, + (offset - Offset), + utf_flag); + } data = (Data + (offset - Offset)); if ((case_sensitive ? (cp = LYno_attr_mbcs_strstr(data, @@ -1282,7 +1316,14 @@ highlight_hit_within_hightext2: (flag == ON ? (hLen - 1) : hLen)) { goto highlight_search_done; } - data += (Offset - offset); + if (!utf_flag) { + data += (Offset - offset); + } else { + refresh(); + data = LYmbcs_skip_glyphs(data, + (Offset - offset), + utf_flag); + } offset = Offset; itmp = 0; written = 0; @@ -1501,7 +1542,13 @@ highlight_hit_within_hightext2: * See if we have another hit that starts * within the hightext2. - FM */ - data = (Data + (offset - Offset)); + if (!utf_flag) { + data = (Data + (offset - Offset)); + } else { + data = LYmbcs_skip_glyphs(Data, + (offset - Offset), + utf_flag); + } if ((case_sensitive ? (cp = LYno_attr_mbcs_strstr(data, target, @@ -1534,7 +1581,14 @@ highlight_hit_within_hightext2: if ((HitOffset + (offset + tLen)) >= (hoffset + hLen)) { offset = (HitOffset + offset); - data = (buffer + (offset - hoffset)); + if (!utf_flag) { + data = buffer + (offset - hoffset); + } else { + refresh(); + data = LYmbcs_skip_glyphs(buffer, + (offset - hoffset), + utf_flag); + } move(hLine, offset); itmp = 0; written = 0; @@ -1851,7 +1905,13 @@ PUBLIC void statusline ARGS1( FREE(temp); } else { /* - * Strip any escapes, and shorten text if necessary. - FM + * Strip any escapes, and shorten text if necessary. Note + * that we don't deal with the possibility of UTF-8 characters + * in the string. This is unlikely, but if strings with such + * characters are used in LYMessages_en.h, a compilation + * symbol of HAVE_UTF8_STATUSLINES could be added there, and + * code added here for determining the displayed string length, + * as we do above for CJK. - FM */ for (i = 0, len = 0; text[i] != '\0' && len < max_length; i++) { if (text[i] != '\033') { @@ -1881,12 +1941,12 @@ PUBLIC void statusline ARGS1( move(LYlines-1, 0); } clrtoeol(); - if (text != NULL) { -#ifdef EXP_CHARTRANS + if (text != NULL && text[0] != '\0') { +#ifdef HAVE_UTF8_STATUSLINES if (LYCharSet_UC[current_char_set].enc == UCT_ENC_UTF8) { refresh(); } -#endif +#endif /* HAVE_UTF8_STATUSLINES */ #ifndef USE_COLOR_STYLE lynx_start_status_color (); addstr (buffer); @@ -2576,6 +2636,7 @@ PUBLIC int is_url ARGS1( for (i = 0; i < 10; i++) cp[i] = TOUPPER(cp[i]); } + (void)is_url(&cp[11]); return(LYNXIMGMAP_URL_TYPE); } else if (!strncasecomp(cp, "LYNXCOOKIE:", 11)) { @@ -2873,6 +2934,9 @@ PUBLIC BOOLEAN inlocaldomain NOARGS PUBLIC void size_change ARGS1( int, sig) { + int old_lines = LYlines; + int old_cols = LYcols; + #ifdef USE_SLANG SLtt_get_screen_size(); LYlines = SLtt_Screen_Rows; @@ -2925,7 +2989,17 @@ PUBLIC void size_change ARGS1( LYcols = 80; #endif /* USE_SLANG */ - recent_sizechange = TRUE; + /* + * Check if the screen size has actually changed. - AJL + */ + if (LYlines != old_lines || LYcols != old_cols) { + recent_sizechange = TRUE; + } + if (TRACE) { + fprintf(stderr, + "Window size changed from (%d,%d) to (%d,%d)\n", + old_lines, old_cols, LYlines, LYcols); + } #ifdef SIGWINCH (void)signal (SIGWINCH, size_change); #endif /* SIGWINCH */ diff --git a/src/Makefile.old b/src/Makefile.old index 3b325018..8435f4bb 100644 --- a/src/Makefile.old +++ b/src/Makefile.old @@ -56,9 +56,28 @@ LYCharSets.o: ../userdefs.h CHRTR= chrtrans/ -TABLES= $(CHRTR)iso02_uni.h \ - $(CHRTR)iso01_uni.h \ +TABLES= $(CHRTR)iso01_uni.h \ $(CHRTR)def7_uni.h \ + $(CHRTR)iso02_uni.h \ + $(CHRTR)cp1252_uni.h \ + $(CHRTR)dmcs_uni.h \ + $(CHRTR)mac_uni.h \ + $(CHRTR)next_uni.h \ + $(CHRTR)koi8r_uni.h \ + $(CHRTR)viscii_uni.h \ + $(CHRTR)cp437_uni.h \ + $(CHRTR)cp850_uni.h \ + $(CHRTR)cp852_uni.h \ + $(CHRTR)cp866_uni.h \ + $(CHRTR)cp737_uni.h \ + $(CHRTR)cp869_uni.h \ + $(CHRTR)cp864_uni.h \ + $(CHRTR)cp862_uni.h \ + $(CHRTR)cp1250_uni.h \ + $(CHRTR)cp1251_uni.h \ + $(CHRTR)cp1253_uni.h \ + $(CHRTR)cp1255_uni.h \ + $(CHRTR)cp1256_uni.h \ $(CHRTR)iso03_uni.h \ $(CHRTR)iso04_uni.h \ $(CHRTR)iso05_uni.h \ @@ -67,15 +86,6 @@ TABLES= $(CHRTR)iso02_uni.h \ $(CHRTR)iso08_uni.h \ $(CHRTR)iso09_uni.h \ $(CHRTR)iso10_uni.h \ - $(CHRTR)koi8r_uni.h \ - $(CHRTR)cp437_uni.h \ - $(CHRTR)cp850_uni.h \ - $(CHRTR)cp852_uni.h \ - $(CHRTR)cp866_uni.h \ - $(CHRTR)cp1250_uni.h \ - $(CHRTR)cp1251_uni.h \ - $(CHRTR)cp1252_uni.h \ - $(CHRTR)viscii_uni.h \ $(CHRTR)utf8_uni.h \ $(CHRTR)rfc_suni.h \ $(CHRTR)mnemonic_suni.h \ diff --git a/src/UCAux.c b/src/UCAux.c index f03bbfa9..98fb9e37 100644 --- a/src/UCAux.c +++ b/src/UCAux.c @@ -21,6 +21,7 @@ PUBLIC UCTQ_t UCCanUniTranslateFrom ARGS1( return TQ_EXCELLENT; return ((LYCharSet_UC[from].UChndl >= 0) ? TQ_GOOD : TQ_NO); } + PUBLIC UCTQ_t UCCanTranslateUniTo ARGS1( int, to) { @@ -36,6 +37,7 @@ PUBLIC UCTQ_t UCCanTranslateUniTo ARGS1( return TQ_GOOD; return TQ_GOOD; /* at least some characters, we don't know more */ } + PUBLIC UCTQ_t UCCanTranslateFromTo ARGS2( int, from, int, to) @@ -59,14 +61,28 @@ PUBLIC UCTQ_t UCCanTranslateFromTo ARGS2( return TQ_GOOD; } if (LYCharSet_UC[from].enc == UCT_ENC_CJK) { - if (HTCJK == NOCJK) /* use that global flag, for now */ + /* + ** CJK mode may be off (i.e., HTCJK == NOCJK) because + ** the current document is not CJK, but the check may + ** be for capability in relation to another document, + ** for which CJK mode might be turned on when retrieved. + ** Thus, when the from charset is CJK, check if the to + ** charset is CJK, and return TQ_NO or TQ_GOOD depending on + ** that. - FM + */ + if (LYCharSet_UC[to].enc != UCT_ENC_CJK) return TQ_NO; - if (HTCJK == JAPANESE && + if ((!strcmp(toname, "euc-jp") || + !strcmp(toname, "shift_jis")) && (!strcmp(fromname, "euc-jp") || - !strncmp(fromname, "iso-2022-jp",11) || !strcmp(fromname, "shift_jis"))) return TQ_GOOD; - return TQ_NO; /* if not handled by (from == to) above */ + /* + ** The other charsets for CJK were handled + ** by the (from == to) above, so we need not + ** check those. - FM + **/ + return TQ_NO; } if (!strcmp(fromname, "koi8-r")) { /* @@ -90,70 +106,78 @@ PUBLIC UCTQ_t UCCanTranslateFromTo ARGS2( } } -/* Returns YES if no tranlation necessary (because charsets -** are equal, are equivalent, etc.) +/* +** Returns YES if no tranlation necessary (because +** charsets are equal, are equivalent, etc.). */ -PUBLIC BOOL UCNeedNotTranslate ARGS2(int, from, int, to) +PUBLIC BOOL UCNeedNotTranslate ARGS2( + int, from, + int, to) { CONST char *fromname; CONST char *toname; - if (from==to) + if (from == to) return YES; if (from < 0) return NO; /* ??? */ if (LYCharSet_UC[from].enc == UCT_ENC_7BIT) { - return YES; /* only 7bit chars */ + return YES; /* Only 7bit chars. */ } fromname = LYCharSet_UC[from].MIMEname; - if (0==strcmp(fromname,"x-transparent") || - 0==strcmp(fromname,"us-ascii")) { + if (!strcmp(fromname, "x-transparent") || + !strcmp(fromname, "us-ascii")) { return YES; } if (to < 0) return NO; /* ??? */ - if (to==0) { + if (to == 0) { if (LYCharSet_UC[from].codepoints & (UCT_CP_SUBSETOF_LAT1)) return YES; } toname = LYCharSet_UC[to].MIMEname; - if (0==strcmp(toname,"x-transparent")) { + if (!strcmp(toname, "x-transparent")) { return YES; } if (LYCharSet_UC[to].enc == UCT_ENC_UTF8) { return NO; } - if (from==0) { + if (from == 0) { if (LYCharSet_UC[from].codepoints & (UCT_CP_SUPERSETOF_LAT1)) return YES; } if (LYCharSet_UC[from].enc == UCT_ENC_CJK) { - if (HTCJK == NOCJK) /* use that global flag, for now */ + if (HTCJK == NOCJK) /* Use that global flag, for now. */ return NO; - if (HTCJK == JAPANESE && ( - 0==strcmp(fromname,"euc-jp") || - 0==strncmp(fromname,"iso-2022-jp",11) || - 0==strcmp(fromname,"shift_jis") - )) + if (HTCJK == JAPANESE && + /* + ** Always strip the "x-" from "x-euc-jp", + ** or convert "x-shift-jis" to "shift_jis", + ** before calling this function, and so + ** don't check for them here. - FM + */ + (!strcmp(fromname, "euc-jp") || + !strncmp(fromname, "iso-2022-jp",11) || + !strcmp(fromname, "shift_jis"))) return YES; /* ??? */ - return NO; /* if not handled by (from==to) above */ + return NO; /* If not handled by (from == to) above. */ } return NO; } /* - * The idea here is that any stage of the stream pipe which is interested - * in some charset dependent processing will call this function. - * Given input and ouptput charsets, this function will set various flags - * in a UCTransParams structure that _suggest_ to the caller what to do. - * - * Should be called once when a stage starts processing text (and the - * input and output charsets are known), or whenever one of input or - * output charsets has changed (e.g. by SGML.c stage after HTML.c stage - * has processed a META tag). - * The global flags (LYRawMode, HTPassEightBitRaw etc.) are currently - * not taken into account here (except for HTCJK, somewhat), it's still - * up to the caller to do something about them. - */ +** The idea here is that any stage of the stream pipe which is interested +** in some charset dependent processing will call this function. +** Given input and ouptput charsets, this function will set various flags +** in a UCTransParams structure that _suggest_ to the caller what to do. +** +** Should be called once when a stage starts processing text (and the +** input and output charsets are known), or whenever one of input or +** output charsets has changed (e.g. by SGML.c stage after HTML.c stage +** has processed a META tag). +** The global flags (LYRawMode, HTPassEightBitRaw etc.) are currently +** not taken into account here (except for HTCJK, somewhat), it's still +** up to the caller to do something about them. - KW +*/ PUBLIC void UCSetTransParams ARGS5( UCTransParams *, pT, int, cs_in, @@ -161,13 +185,26 @@ PUBLIC void UCSetTransParams ARGS5( int, cs_out, CONST LYUCcharset*, p_out) { + /* + ** Initialize this element to FALSE, and set it TRUE + ** below if we're dealing with VISCII. - FM + */ pT->trans_C0_to_uni = FALSE; + + /* + ** The "transparent" display character set is a + ** "super raw mode". - FM + */ pT->transp = (!strcmp(p_in->MIMEname, "x-transparent") || !strcmp(p_out->MIMEname, "x-transparent")); + if (pT->transp) { + /* + ** Set up the structure for "transparent". - FM + */ pT->do_cjk = FALSE; pT->decode_utf8 = FALSE; - pT->output_utf8 = FALSE; /* we may, but won't know about it */ + pT->output_utf8 = FALSE; /* We may, but won't know about it. - KW */ pT->do_8bitraw = TRUE; pT->use_raw_char_in = TRUE; pT->strip_raw_char_in = FALSE; @@ -176,41 +213,120 @@ PUBLIC void UCSetTransParams ARGS5( pT->trans_C0_to_uni = (p_in->enc == UCT_ENC_8BIT_C0 || p_out->enc == UCT_ENC_8BIT_C0); } else { + /* + ** Initialize local flags. - FM + */ BOOL intm_ucs = FALSE; BOOL use_ucs = FALSE; + /* + ** Set this element if we want to treat + ** the input as CJK. - FM + */ pT->do_cjk = ((p_in->enc == UCT_ENC_CJK) && (HTCJK != NOCJK)); + /* + ** Set these elements based on whether + ** we are dealing with UTF-8. - FM + */ pT->decode_utf8 = (p_in->enc == UCT_ENC_UTF8); pT->output_utf8 = (p_out->enc == UCT_ENC_UTF8); if (pT->do_cjk) { + /* + ** Set up the structure for a CJK input with + ** a CJK output (HTCJK != NOCJK). - FM + */ intm_ucs = FALSE; pT->trans_to_uni = FALSE; use_ucs = FALSE; pT->do_8bitraw = FALSE; pT->pass_160_173_raw = TRUE; - pT->use_raw_char_in = FALSE; /* not used for CJK */ + pT->use_raw_char_in = FALSE; /* Not used for CJK. - KW */ pT->repl_translated_C0 = FALSE; - pT->trans_from_uni = FALSE; /* not used for CJK */ + pT->trans_from_uni = FALSE; /* Not used for CJK. - KW */ } else { + /* + ** Set up for all other charset combinations. + ** The intm_ucs flag is set TRUE if the input + ** charset is iso-8859-1 or UTF-8, or largely + ** equivalent to them, i.e. if we have UCS without + ** having to do a table translation. + */ intm_ucs = (cs_in == 0 || pT->decode_utf8 || (p_in->codepoints & (UCT_CP_SUBSETOF_LAT1|UCT_CP_SUBSETOF_UCS2))); + /* + ** pT->trans_to_uni is set TRUE if we do not have that as + ** input already, and we can translate to Unicode. Note + ** that UTF-8 always is converted to Unicode in functions + ** that use the transformation structure, so it is + ** treated as already Unicode here. + */ pT->trans_to_uni = (!intm_ucs && UCCanUniTranslateFrom(cs_in)); + /* + ** We set this if we are translating to Unicode and + ** what normally are low value control characters in + ** fact are encoding octets for the input charset + ** (presently, this applies to VISCII). - FM + */ pT->trans_C0_to_uni = (pT->trans_to_uni && p_in->enc == UCT_ENC_8BIT_C0); + /* + ** We set this, presently, for VISCII. - FM + */ pT->repl_translated_C0 = (p_out->enc == UCT_ENC_8BIT_C0); + /* + ** This is a flag for whether we are dealing with koi8-r + ** as the input, and could do 8th-bit stripping for other + ** output charsets. Note that this always sets 8th-bit + ** stripping if the input charset is KOI8-R and the output + ** charset needs it, i.e., regardless of the RawMode and + ** consequent HTPassEightBitRaw setting, so you can't look + ** at raw koi8-r without selecting that as the display + ** character set (or transparent). That's just as well, + ** but worth noting for developers - FM + */ pT->strip_raw_char_in = ((!intm_ucs || (p_out->enc == UCT_ENC_7BIT) || - (p_out->repertoire & - UCT_REP_SUBSETOF_LAT1)) && + (p_out->repertoire & + UCT_REP_SUBSETOF_LAT1)) && cs_in != cs_out && !strcmp(p_in->MIMEname, "koi8-r")); + /* + ** use_ucs should be set TRUE if we have or will create + ** Unicode values for input octets or UTF multibytes. - FM + */ use_ucs = (intm_ucs || pT->trans_to_uni); + /* + ** This is set TRUE if use_ucs was set FALSE. It is + ** parallel to the HTPassEightBitRaw flag, which + ** is set TRUE or FALSE elsewhere based on the raw mode + ** setting in relation to the current Display Character + ** Set. - FM + */ pT->do_8bitraw = (!use_ucs); + /* + ** This is set TRUE when 160 and 173 should not be treated + ** specially as nbsp and shy, respectively. - FM + */ pT->pass_160_173_raw = (!use_ucs && !(p_in->like8859 & UCT_R_8859SPECL)); - pT->use_raw_char_in = (!pT->output_utf8 && cs_in == cs_out && + /* + ** This is set when the input and output charsets match, + ** and they are not ones which should go through a Unicode + ** translation process anyway. - FM + */ + pT->use_raw_char_in = (!pT->output_utf8 && + cs_in == cs_out && !pT->trans_C0_to_uni); + /* + ** This should be set TRUE when we expect to have + ** done translation to Unicode or had the equivalent + ** as input, can translate it to our output charset, + ** and normally want to do so. The latter depends on + ** the pT->do_8bitraw and pT->use_raw_char_in values set + ** above, but also on HTPassEightBitRaw in any functions + ** which use the transformation structure.. - FM + */ pT->trans_from_uni = (use_ucs && !pT->do_8bitraw && !pT->use_raw_char_in && UCCanTranslateUniTo(cs_out)); @@ -218,6 +334,11 @@ PUBLIC void UCSetTransParams ARGS5( } } +/* +** This function initalizes the transformation +** structure by setting all its elements to +** FALSE. - KW +*/ PUBLIC void UCTransParams_clear ARGS1( UCTransParams *, pT) { @@ -308,3 +429,60 @@ PUBLIC BOOL UCPutUtf8_charstring ARGS3( return NO; return YES; } + +/* +** This function converts a Unicode (UCode_t) value +** to a multibyte UTF-8 character, which is loaded +** into the buffer received as an argument. The +** buffer should be large enough to hold at least +** seven characters (but should be declared as 8 +** to minimize byte alignment problems with some +** compilers). - FM +*/ +PUBLIC BOOL UCConvertUniToUtf8 ARGS2( + UCode_t, code, + char *, buffer) +{ + char *ch = buffer; + + if (!ch) + return NO; + + if (code <= 0 || code > 0x7fffffffL) { + *ch = '\0'; + return NO; + } + + if (code < 0x800L) { + *ch++ = (char)(0xc0 | (code>>6)); + *ch++ = (char)(0x80 | (0x3f & (code))); + *ch = '\0'; + } else if (code < 0x10000L) { + *ch++ = (char)(0xe0 | (code>>12)); + *ch++ = (char)(0x80 | (0x3f & (code>>6))); + *ch++ = (char)(0x80 | (0x3f & (code))); + *ch = '\0'; + } else if (code < 0x200000L) { + *ch++ = (char)(0xf0 | (code>>18)); + *ch++ = (char)(0x80 | (0x3f & (code>>12))); + *ch++ = (char)(0x80 | (0x3f & (code>>6))); + *ch++ = (char)(0x80 | (0x3f & (code))); + *ch = '\0'; + } else if (code < 0x4000000L) { + *ch++ = (char)(0xf8 | (code>>24)); + *ch++ = (char)(0x80 | (0x3f & (code>>18))); + *ch++ = (char)(0x80 | (0x3f & (code>>12))); + *ch++ = (char)(0x80 | (0x3f & (code>>6))); + *ch++ = (char)(0x80 | (0x3f & (code))); + *ch = '\0'; + } else { + *ch++ = (char)(0xfc | (code>>30)); + *ch++ = (char)(0x80 | (0x3f & (code>>24))); + *ch++ = (char)(0x80 | (0x3f & (code>>18))); + *ch++ = (char)(0x80 | (0x3f & (code>>12))); + *ch++ = (char)(0x80 | (0x3f & (code>>6))); + *ch++ = (char)(0x80 | (0x3f & (code))); + *ch = '\0'; + } + return YES; +} diff --git a/src/UCdomap.c b/src/UCdomap.c index 344f3057..e81ad05c 100644 --- a/src/UCdomap.c +++ b/src/UCdomap.c @@ -33,56 +33,76 @@ * Include tables & parameters. */ #ifdef VMS -#include "[.chrtrans]def7_uni.h" -#include "[.chrtrans]iso01_uni.h" -#include "[.chrtrans]iso02_uni.h" -#include "[.chrtrans]cp437_uni.h" -#include "[.chrtrans]cp850_uni.h" -#include "[.chrtrans]koi8r_uni.h" -#include "[.chrtrans]cp852_uni.h" -#include "[.chrtrans]cp866_uni.h" -#include "[.chrtrans]cp1252_uni.h" -#include "[.chrtrans]cp1250_uni.h" -#include "[.chrtrans]cp1251_uni.h" -#include "[.chrtrans]viscii_uni.h" -#include "[.chrtrans]iso03_uni.h" -#include "[.chrtrans]iso04_uni.h" -#include "[.chrtrans]iso05_uni.h" -#include "[.chrtrans]iso06_uni.h" -#include "[.chrtrans]iso07_uni.h" -#include "[.chrtrans]iso08_uni.h" -#include "[.chrtrans]iso09_uni.h" -#include "[.chrtrans]iso10_uni.h" -#include "[.chrtrans]utf8_uni.h" -#include "[.chrtrans]rfc_suni.h" -#include "[.chrtrans]mnemonic_suni.h" +#include "[.chrtrans]def7_uni.h" /* 7 bit approximations */ +#include "[.chrtrans]iso01_uni.h" /* ISO Latin 1 */ +#include "[.chrtrans]iso02_uni.h" /* ISO Latin 2 */ +#include "[.chrtrans]cp1252_uni.h" /* WinLatin1 (cp1252) */ +#include "[.chrtrans]dmcs_uni.h" /* DEC Multinational */ +#include "[.chrtrans]mac_uni.h" /* Macintosh (8 bit) */ +#include "[.chrtrans]next_uni.h" /* NeXT character set */ +#include "[.chrtrans]koi8r_uni.h" /* KOI8-R Cyrillic */ +#include "[.chrtrans]viscii_uni.h" /* Vietnamese (VISCII) */ +#include "[.chrtrans]cp437_uni.h" /* DosLatinUS (cp437) */ +#include "[.chrtrans]cp850_uni.h" /* DosLatin1 (cp850) */ +#include "[.chrtrans]cp852_uni.h" /* DosLatin2 (cp852) */ +#include "[.chrtrans]cp866_uni.h" /* DosCyrillic (cp866) */ +#include "[.chrtrans]cp737_uni.h" /* DosGreek (cp737) */ +#include "[.chrtrans]cp869_uni.h" /* DosGreek2 (cp869) */ +#include "[.chrtrans]cp864_uni.h" /* DosArabic (cp864) */ +#include "[.chrtrans]cp862_uni.h" /* DosHebrew (cp862) */ +#include "[.chrtrans]cp1250_uni.h" /* WinLatin2 (cp1250) */ +#include "[.chrtrans]cp1251_uni.h" /* WinCyrillic (cp1251) */ +#include "[.chrtrans]cp1253_uni.h" /* WinGreek (cp1253) */ +#include "[.chrtrans]cp1255_uni.h" /* WinHebrew (cp1255) */ +#include "[.chrtrans]cp1256_uni.h" /* WinArabic (cp1256) */ +#include "[.chrtrans]iso03_uni.h" /* ISO Latin 3 */ +#include "[.chrtrans]iso04_uni.h" /* ISO Latin 4 */ +#include "[.chrtrans]iso05_uni.h" /* ISO Latin 5 Cyrillic */ +#include "[.chrtrans]iso06_uni.h" /* ISO 8859-6 Arabic */ +#include "[.chrtrans]iso07_uni.h" /* ISO 8859-7 Greek */ +#include "[.chrtrans]iso08_uni.h" /* ISO 8859-8 Hebrew */ +#include "[.chrtrans]iso09_uni.h" /* ISO 8859-9 (Latin 5) */ +#include "[.chrtrans]iso10_uni.h" /* ISO 8859-10 */ +#include "[.chrtrans]utf8_uni.h" /* UNICODE UTF 8 */ +#include "[.chrtrans]rfc_suni.h" /* RFC 1345 w/o Intro */ +#include "[.chrtrans]mnemonic_suni.h" /* RFC 1345 Mnemonic */ #ifdef NOTDEFINED #include "[.chrtrans]mnem_suni.h" #endif /* NOTDEFINED */ #else -#include "chrtrans/def7_uni.h" -#include "chrtrans/iso01_uni.h" -#include "chrtrans/iso02_uni.h" -#include "chrtrans/cp437_uni.h" -#include "chrtrans/cp850_uni.h" -#include "chrtrans/koi8r_uni.h" -#include "chrtrans/cp852_uni.h" -#include "chrtrans/cp866_uni.h" -#include "chrtrans/cp1250_uni.h" -#include "chrtrans/cp1251_uni.h" -#include "chrtrans/cp1252_uni.h" -#include "chrtrans/viscii_uni.h" -#include "chrtrans/iso03_uni.h" -#include "chrtrans/iso04_uni.h" -#include "chrtrans/iso05_uni.h" -#include "chrtrans/iso06_uni.h" -#include "chrtrans/iso07_uni.h" -#include "chrtrans/iso08_uni.h" -#include "chrtrans/iso09_uni.h" -#include "chrtrans/iso10_uni.h" -#include "chrtrans/utf8_uni.h" -#include "chrtrans/rfc_suni.h" -#include "chrtrans/mnemonic_suni.h" +#include "chrtrans/def7_uni.h" /* 7 bit approximations */ +#include "chrtrans/iso01_uni.h" /* ISO Latin 1 */ +#include "chrtrans/iso02_uni.h" /* ISO Latin 2 */ +#include "chrtrans/cp1252_uni.h" /* WinLatin1 (cp1252) */ +#include "chrtrans/dmcs_uni.h" /* DEC Multinational */ +#include "chrtrans/mac_uni.h" /* Macintosh (8 bit) */ +#include "chrtrans/next_uni.h" /* NeXT character set */ +#include "chrtrans/koi8r_uni.h" /* KOI8-R Cyrillic */ +#include "chrtrans/viscii_uni.h" /* Vietnamese (VISCII) */ +#include "chrtrans/cp437_uni.h" /* DosLatinUS (cp437) */ +#include "chrtrans/cp850_uni.h" /* DosLatin1 (cp850) */ +#include "chrtrans/cp852_uni.h" /* DosLatin2 (cp852) */ +#include "chrtrans/cp866_uni.h" /* DosCyrillic (cp866) */ +#include "chrtrans/cp737_uni.h" /* DosGreek (cp737) */ +#include "chrtrans/cp869_uni.h" /* DosGreek2 (cp869) */ +#include "chrtrans/cp864_uni.h" /* DosArabic (cp864) */ +#include "chrtrans/cp862_uni.h" /* DosHebrew (cp862) */ +#include "chrtrans/cp1250_uni.h" /* WinLatin2 (cp1250) */ +#include "chrtrans/cp1251_uni.h" /* WinCyrillic (cp1251) */ +#include "chrtrans/cp1253_uni.h" /* WinGreek (cp1253) */ +#include "chrtrans/cp1255_uni.h" /* WinHebrew (cp1255) */ +#include "chrtrans/cp1256_uni.h" /* WinArabic (cp1256) */ +#include "chrtrans/iso03_uni.h" /* ISO Latin 3 */ +#include "chrtrans/iso04_uni.h" /* ISO Latin 4 */ +#include "chrtrans/iso05_uni.h" /* ISO Latin 5 Cyrillic */ +#include "chrtrans/iso06_uni.h" /* ISO 8859-6 Arabic */ +#include "chrtrans/iso07_uni.h" /* ISO 8859-7 Greek */ +#include "chrtrans/iso08_uni.h" /* ISO 8859-8 Hebrew */ +#include "chrtrans/iso09_uni.h" /* ISO 8859-9 (Latin 5) */ +#include "chrtrans/iso10_uni.h" /* ISO 8859-10 */ +#include "chrtrans/utf8_uni.h" /* UNICODE UTF 8 */ +#include "chrtrans/rfc_suni.h" /* RFC 1345 w/o Intro */ +#include "chrtrans/mnemonic_suni.h" /* RFC 1345 Mnemonic */ #ifdef NOTDEFINED #include "chrtrans/mnem_suni.h" #endif /* NOTDEFINED */ @@ -1001,7 +1021,7 @@ PUBLIC int UCTransUniChar ARGS2( { int rc; int UChndl_out; - int isdefault, trydefault; + int isdefault, trydefault = 0; u16 * ut; if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) { @@ -1049,9 +1069,9 @@ PUBLIC int UCTransUniCharStr ARGS5( int, charset_out, int, chk_single_flag) { - int rc, src = 0, ignore_err; + int rc = -14, src = 0, ignore_err; int UChndl_out; - int isdefault, trydefault; + int isdefault, trydefault = 0; struct unimapdesc_str * repl; u16 * ut; @@ -1162,9 +1182,11 @@ PRIVATE int UC_MapGN ARGS2( UCInfo[UChndl].GN = Gn; UC_GNhandles[Gn] = UChndl; } - if (TRACE) - fprintf(stderr,"UC_Map...... Using %i <- %i (%s)\n", + if (TRACE) { + fprintf(stderr, + "UC_MapGN: Using %i <- %i (%s)\n", Gn, UChndl, UCInfo[UChndl].MIMEname); + } UC_con_set_trans(UChndl,Gn,update_flag); return Gn; } @@ -1177,7 +1199,7 @@ PUBLIC int UCTransChar ARGS3( int unicode, Gn; int rc = -4; int UChndl_in, UChndl_out; - int isdefault, trydefault; + int isdefault, trydefault = 0; u16 * ut; int upd = 0; @@ -1276,7 +1298,10 @@ PUBLIC long int UCTransToUni ARGS2( return unicode; } -PUBLIC int UCReverseTransChar ARGS3(char, ch_out, int, charset_in, int, charset_out) +PUBLIC int UCReverseTransChar ARGS3( + char, ch_out, + int, charset_in, + int, charset_out) { int Gn; int rc; @@ -1332,7 +1357,7 @@ PUBLIC int UCReverseTransChar ARGS3(char, ch_out, int, charset_in, int, charset_ } return UCTransChar(ch_out, charset_out, charset_in); } - + /* * Returns string length, or negative value for error. */ @@ -1345,9 +1370,9 @@ PUBLIC int UCTransCharStr ARGS6( int, chk_single_flag) { int unicode, Gn; - int rc, src = 0, ignore_err; + int rc = -14, src = 0, ignore_err; int UChndl_in, UChndl_out; - int isdefault, trydefault; + int isdefault, trydefault = 0; struct unimapdesc_str * repl; u16 * ut; int upd = 0; @@ -1494,7 +1519,7 @@ PUBLIC int UCGetLYhndl_byMIME ARGS1( } if (LYhndl < 0) { /* - * Not yet found, special treatment for several CJK charsets etc... + * Not yet found, special treatment for several CJK charsets, etc. * Cheating here. Also recognize UTF-8 as synonym for * UNICODE-1-1-UTF-8 (The example file for now still uses the * long name, so that's what will be used internally.). @@ -1504,30 +1529,38 @@ PUBLIC int UCGetLYhndl_byMIME ARGS1( } if (!strncmp(UC_MIMEcharset, "iso-2022-jp", 11) || !strcmp(UC_MIMEcharset, "x-euc-jp")) { - return UCGetLYhndl_byMIME("euc-jp"); + return UCGetLYhndl_byMIME("euc-jp"); } else if (!strcmp(UC_MIMEcharset, "iso-2022-kr")) { - return UCGetLYhndl_byMIME("euc-kr"); + return UCGetLYhndl_byMIME("euc-kr"); } else if (!strcmp(UC_MIMEcharset, "gb2312") || - !strncmp(UC_MIMEcharset, "cn-gb", 5)) { - return UCGetLYhndl_byMIME("euc-cn"); - } else if (!strcmp(UC_MIMEcharset, "iso-2022-cn")) { + !strncmp(UC_MIMEcharset, "cn-gb", 5) || + !strcmp(UC_MIMEcharset, "iso-2022-cn")) { return UCGetLYhndl_byMIME("euc-cn"); } else if (!strcmp(UC_MIMEcharset, "cn-big5")) { return UCGetLYhndl_byMIME("big5"); + } else if (!strcmp(UC_MIMEcharset, "x-mac-roman") || + !strcmp(UC_MIMEcharset, "mac-roman")) { + return UCGetLYhndl_byMIME("macintosh"); + } else if (!strcmp(UC_MIMEcharset, "next") || + !strcmp(UC_MIMEcharset, "nextstep") || + !strcmp(UC_MIMEcharset, "x-nextstep")) { + return UCGetLYhndl_byMIME("x-next"); } else if (!strcmp(UC_MIMEcharset, "windows-1252")) { /* * It's not my fault that Microsoft hasn't registered * the name people are using. - KW */ return UCGetLYhndl_byMIME("iso-8859-1-windows-3.1-latin-1"); - } else if (!strncmp(UC_MIMEcharset, "ibm", 3)) { + } else if (!strncmp(UC_MIMEcharset, "ibm", 3) || + !strncmp(UC_MIMEcharset, "cp-", 3)) { CONST char * cp = UC_MIMEcharset + 3; char * cptmp = NULL; if (*cp && isdigit(*cp) && *(cp++) && isdigit(*cp) && *(cp++) && isdigit(*cp)) { /* - * For "ibmNNN<...>", try "cpNNN<...>" if not yet found. - KW + * For "ibmNNN<...>" or "cp-NNN", try "cpNNN<...>" + * if not yet found. - KW & FM */ StrAllocCopy(cptmp, UC_MIMEcharset + 1); cptmp[0] = 'c'; @@ -1535,6 +1568,20 @@ PUBLIC int UCGetLYhndl_byMIME ARGS1( LYhndl = UCGetLYhndl_byMIME(cptmp); FREE(cptmp); } + } else if (UC_MIMEcharset[0] == 'c' && + UC_MIMEcharset[1] == 'p' && + UC_MIMEcharset[2] == '1' && + strlen(UC_MIMEcharset) == 6 && + isdigit((unsigned char)UC_MIMEcharset[3]) && + isdigit((unsigned char)UC_MIMEcharset[4]) && + isdigit((unsigned char)UC_MIMEcharset[5])) { + char tmp[16]; + /* + * For "cpNNNN", try "windows-NNNN" + * if not yet found. - kw + */ + sprintf(tmp, "windows-%s", UC_MIMEcharset + 2); + return UCGetLYhndl_byMIME(tmp); } else if (!strcmp(UC_MIMEcharset, "koi-8")) { /* accentsoft bogosity */ return UCGetLYhndl_byMIME("koi8-r"); } @@ -1545,7 +1592,7 @@ PUBLIC int UCGetLYhndl_byMIME ARGS1( /* * Function UC_setup_LYCharSets_repl() tries to set up a subtable in * LYCharSets[] appropriate for this new charset, for compatibility - * with the "old method". Maybe not nice (maybe not evene necessary + * with the "old method". Maybe not nice (maybe not even necessary * any more), but it works (as far as it goes..). * * We try to be conservative and only allocate new memory for this @@ -1910,31 +1957,46 @@ PUBLIC void UCInit NOARGS atexit(UCcleanup_mem); UCconsole_map_init(); - UC_CHARSET_SETUP; - UC_CHARSET_SETUP_iso_8859_1; - UC_CHARSET_SETUP_iso_8859_2; - UC_CHARSET_SETUP_cp437; - UC_CHARSET_SETUP_cp850; - UC_CHARSET_SETUP_koi8_r; - - UC_CHARSET_SETUP_cp852; - UC_CHARSET_SETUP_cp866; - UC_CHARSET_SETUP_iso_8859_1_windows_; - UC_CHARSET_SETUP_windows_1250; - UC_CHARSET_SETUP_windows_1251; - UC_CHARSET_SETUP_viscii; - UC_CHARSET_SETUP_iso_8859_3; - UC_CHARSET_SETUP_iso_8859_4; - UC_CHARSET_SETUP_iso_8859_5; - UC_CHARSET_SETUP_iso_8859_6; - UC_CHARSET_SETUP_iso_8859_7; - UC_CHARSET_SETUP_iso_8859_8; - UC_CHARSET_SETUP_iso_8859_9; - UC_CHARSET_SETUP_iso_8859_10; - - UC_CHARSET_SETUP_unicode_1_1_utf_8; - UC_CHARSET_SETUP_mnemonic_ascii_0; - UC_CHARSET_SETUP_mnemonic; + UC_CHARSET_SETUP; /* us-ascii */ /* 7 bit approximations */ + + UC_CHARSET_SETUP_iso_8859_1; /* ISO Latin 1 */ + UC_CHARSET_SETUP_iso_8859_2; /* ISO Latin 2 */ + + UC_CHARSET_SETUP_iso_8859_1_windows_; /* WinLatin1 (cp1252) */ + UC_CHARSET_SETUP_dec_mcs; /* DEC Multinational */ + UC_CHARSET_SETUP_macintosh; /* Macintosh (8 bit) */ + UC_CHARSET_SETUP_x_next; /* NeXT character set */ + UC_CHARSET_SETUP_koi8_r; /* KOI8-5 Cyrillic */ + + UC_CHARSET_SETUP_viscii; /* Vietnamese (VISCII) */ + + UC_CHARSET_SETUP_cp437; /* DosLatinUS (cp437) */ + UC_CHARSET_SETUP_cp850; /* DosLatin1 (cp850) */ + + UC_CHARSET_SETUP_cp852; /* DosLatin2 (cp852) */ + UC_CHARSET_SETUP_cp866; /* DosCyrillic (cp866) */ + UC_CHARSET_SETUP_cp864; /* DosArabic (cp864) */ + UC_CHARSET_SETUP_cp737; /* DosGreek (cp737) */ + UC_CHARSET_SETUP_cp869; /* DosGreek2 (cp869) */ + UC_CHARSET_SETUP_cp862; /* DosHebrew (cp862) */ + + UC_CHARSET_SETUP_windows_1250; /* WinLatin1 (cp1251) */ + UC_CHARSET_SETUP_windows_1251; /* WinCyrillic (cp1251) */ + UC_CHARSET_SETUP_windows_1253; /* WinGreek (cp1253) */ + UC_CHARSET_SETUP_windows_1255; /* WinHebrew (cp1255) */ + UC_CHARSET_SETUP_windows_1256; /* WinArabic (cp1256) */ + UC_CHARSET_SETUP_iso_8859_3; /* ISO Latin 3 */ + UC_CHARSET_SETUP_iso_8859_4; /* ISO Latin 4 */ + UC_CHARSET_SETUP_iso_8859_5; /* ISO 8859-5 Cyrillic */ + UC_CHARSET_SETUP_iso_8859_6; /* ISO 8869-6 Arabic */ + UC_CHARSET_SETUP_iso_8859_7; /* ISO 8859-7 Greek */ + UC_CHARSET_SETUP_iso_8859_8; /* ISO 8859-8 Hebrew */ + UC_CHARSET_SETUP_iso_8859_9; /* ISO 8859-9 (Latin 5) */ + UC_CHARSET_SETUP_iso_8859_10; /* ISO 8859-10 */ + + UC_CHARSET_SETUP_unicode_1_1_utf_8; /* UNICODE UTF 8 */ + UC_CHARSET_SETUP_mnemonic_ascii_0; /* RFC 1345 w/o Intro */ + UC_CHARSET_SETUP_mnemonic; /* RFC 1345 Mnemonic */ #ifdef NOTDEFINED UC_CHARSET_SETUP_mnem; #endif /* NOTDEFINED */ diff --git a/src/chrtrans/Makefile.old b/src/chrtrans/Makefile.old index 3b75ab08..7e6c9a99 100644 --- a/src/chrtrans/Makefile.old +++ b/src/chrtrans/Makefile.old @@ -22,26 +22,36 @@ FONTMAP_INC = iso01_uni.h# default, if not set by recursive call CHRTR= -TABLES= $(CHRTR)iso02_uni.h \ - $(CHRTR)iso01_uni.h \ +TABLES= $(CHRTR)iso01_uni.h \ + $(CHRTR)cp850_uni.h \ + $(CHRTR)cp1252_uni.h \ + $(CHRTR)cp437_uni.h \ + $(CHRTR)dmcs_uni.h \ + $(CHRTR)mac_uni.h \ + $(CHRTR)next_uni.h \ + $(CHRTR)viscii_uni.h \ $(CHRTR)def7_uni.h \ + $(CHRTR)iso02_uni.h \ + $(CHRTR)cp852_uni.h \ + $(CHRTR)cp1250_uni.h \ $(CHRTR)iso03_uni.h \ $(CHRTR)iso04_uni.h \ $(CHRTR)iso05_uni.h \ + $(CHRTR)cp866_uni.h \ + $(CHRTR)cp1251_uni.h \ + $(CHRTR)koi8r_uni.h \ $(CHRTR)iso06_uni.h \ + $(CHRTR)cp864_uni.h \ + $(CHRTR)cp1256_uni.h \ $(CHRTR)iso07_uni.h \ + $(CHRTR)cp737_uni.h \ + $(CHRTR)cp869_uni.h \ + $(CHRTR)cp1253_uni.h \ $(CHRTR)iso08_uni.h \ + $(CHRTR)cp862_uni.h \ + $(CHRTR)cp1255_uni.h \ $(CHRTR)iso09_uni.h \ $(CHRTR)iso10_uni.h \ - $(CHRTR)koi8r_uni.h \ - $(CHRTR)cp437_uni.h \ - $(CHRTR)cp850_uni.h \ - $(CHRTR)cp852_uni.h \ - $(CHRTR)cp866_uni.h \ - $(CHRTR)cp1250_uni.h \ - $(CHRTR)cp1251_uni.h \ - $(CHRTR)cp1252_uni.h \ - $(CHRTR)viscii_uni.h \ $(CHRTR)utf8_uni.h \ $(CHRTR)rfc_suni.h \ $(CHRTR)mnemonic_suni.h \ @@ -57,9 +67,28 @@ makeuctb: makeuctb.c UCkd.h .tbl.h: ./makeuctb $*.tbl > $@ +def7_uni.h: def7_uni.tbl makeuctb iso01_uni.h: iso01_uni.tbl makeuctb iso02_uni.h: iso02_uni.tbl makeuctb -def7_uni.h: def7_uni.tbl makeuctb +cp1252_uni.h: cp1252_uni.tbl makeuctb +dmcs_uni.h: dmcs_uni.tbl makeuctb +mac_uni.h: mac_uni.tbl makeuctb +next_uni.h: next_uni.tbl makeuctb +koi8r_uni.h: koi8r_uni.tbl makeuctb +viscii_uni.h: viscii_uni.tbl makeuctb +cp437_uni.h: cp437_uni.tbl makeuctb +cp850_uni.h: cp850_uni.tbl makeuctb +cp852_uni.h: cp852_uni.tbl makeuctb +cp866_uni.h: cp866_uni.tbl makeuctb +cp737_uni.h: cp737_uni.tbl makeuctb +cp869_uni.h: cp869_uni.tbl makeuctb +cp864_uni.h: cp864_uni.tbl makeuctb +cp862_uni.h: cp862_uni.tbl makeuctb +cp1250_uni.h: cp1250_uni.tbl makeuctb +cp1251_uni.h: cp1251_uni.tbl makeuctb +cp1253_uni.h: cp1253_uni.tbl makeuctb +cp1255_uni.h: cp1255_uni.tbl makeuctb +cp1256_uni.h: cp1256_uni.tbl makeuctb iso03_uni.h: iso03_uni.tbl makeuctb iso04_uni.h: iso04_uni.tbl makeuctb iso05_uni.h: iso05_uni.tbl makeuctb @@ -68,17 +97,10 @@ iso07_uni.h: iso07_uni.tbl makeuctb iso08_uni.h: iso08_uni.tbl makeuctb iso09_uni.h: iso09_uni.tbl makeuctb iso10_uni.h: iso10_uni.tbl makeuctb -koi8r_uni.h: koi8r_uni.tbl makeuctb -cp437_uni.h: cp437_uni.tbl makeuctb -cp850_uni.h: cp850_uni.tbl makeuctb -cp852_uni.h: cp852_uni.tbl makeuctb -cp1250_uni.h: cp1250_uni.tbl makeuctb -cp1251_uni.h: cp1251_uni.tbl makeuctb -cp1252_uni.h: cp1252_uni.tbl makeuctb utf8_uni.h: utf8_uni.tbl makeuctb +rfc_suni.h: rfc_suni.tbl makeuctb mnemonic_suni.h: mnemonic_suni.tbl makeuctb mnem_suni.h: mnem_suni.tbl makeuctb -rfc_suni.h: rfc_suni.tbl makeuctb clean: rm -f makeuctb *.o *uni.h diff --git a/src/chrtrans/README.format b/src/chrtrans/README.format index 0ec556a2..8c21714c 100644 --- a/src/chrtrans/README.format +++ b/src/chrtrans/README.format @@ -34,12 +34,15 @@ b) directives: The name for this charset in MIME syntax (one word with digits and some other non-letters allowed, should be IANA registered) Default - This is the default (fallback) translation table, it will be used - for Unicode -> 8bit (or 7bit) translation if no translation is found - in the specific table. + If "Y[es]" or "1", this is the default (fallback) translation table, + it will be used for Unicode -> 8bit (or 7bit) translation if no + translation is found in the specific table. FallBack Whether to use the default table if no translation is found in - this table. Normally fallback is used, "FallBack NO" disables it. + this table. Normally fallback is used, "FallBack NO" or "FallBack 0" + disables it (actually, other values than "FallBack Y[es]" or + "FallBack 1" disable it). + RawOrEnc a number which flags some special property (encoding) for this charset [see utf8.uni for example, see UCDefs.h for details]. @@ -103,7 +106,7 @@ d) string replacement definitions: * and <unicode> ::= U+<h><h><h><h> * and <h> ::= <hexadecimal digit> * and <replace> any string not containing '\n' or '\0', taken verbatim - * and <C replace> any string, with backslash having the usual C meaning + * and <C replace> any string, with backslash having the usual C meaning. Motivation: diff --git a/src/chrtrans/build-chrtrans.com b/src/chrtrans/build-chrtrans.com index 23c9a2ae..5ddb2590 100644 --- a/src/chrtrans/build-chrtrans.com +++ b/src/chrtrans/build-chrtrans.com @@ -44,13 +44,13 @@ $ THEN $ CHRcompiler := "GNUC" $ v1 = f$verify(1) $! GNUC: -$ cc := gcc 'cc_opts'/INCLUDE=([-],[--],[--.WWW.Library.Implementation]) +$ cc := gcc 'CHRcc_opts'/INCLUDE=([-],[--],[--.WWW.Library.Implementation]) $ v1 = 'f$verify(0)' $ ELSE $ CHRcompiler := "VAXC" $ v1 = f$verify(1) $! VAXC: -$ cc := cc 'cc_opts'/INCLUDE=([-],[--],[--.WWW.Library.Implementation]) +$ cc := cc 'CHRcc_opts'/INCLUDE=([-],[--],[--.WWW.Library.Implementation]) $ v1 = 'f$verify(0)' $ ENDIF $ ENDIF @@ -65,7 +65,7 @@ $ v1 = f$verify(1) $! $! Link the Lynx [.SRC.CHRTRANS]makeuctb module. $! -$ link/exe=makeuctb.exe'link_opts' makeuctb, - +$ link/exe=makeuctb.exe'CHRlink_opts' makeuctb, - sys$disk:[-]'CHRcompiler'.opt/opt $ v1 = 'f$verify(0)' $! @@ -76,54 +76,74 @@ $! $! Create the Lynx [.SRC.CHRTRANS] header files. $! $ makeuctb := $'CHRwhere'makeuctb -$ define/user sys$output 'CHRwhere'iso01_uni.h +$ define/user sys$output 'CHRwhere'iso01_uni.h !ISO Latin 1 $ makeuctb iso01_uni.tbl -$ define/user sys$output 'CHRwhere'iso02_uni.h -$ makeuctb iso02_uni.tbl -$ define/user sys$output 'CHRwhere'def7_uni.h +$ define/user sys$output 'CHRwhere'cp850_uni.h ! cp850 +$ makeuctb cp850_uni.tbl +$ define/user sys$output 'CHRwhere'cp1252_uni.h +$ makeuctb cp1252_uni.tbl +$ define/user sys$output 'CHRwhere'cp437_uni.h ! cp437 +$ makeuctb cp437_uni.tbl +$ define/user sys$output 'CHRwhere'dmcs_uni.h !DEC Multinational +$ makeuctb dmcs_uni.tbl +$ define/user sys$output 'CHRwhere'mac_uni.h !Macintosh (8 bit) +$ makeuctb mac_uni.tbl +$ define/user sys$output 'CHRwhere'next_uni.h !NeXT character set +$ makeuctb next_uni.tbl +$ define/user sys$output 'CHRwhere'viscii_uni.h !Vietnamese (VISCII) +$ makeuctb viscii_uni.tbl +$ define/user sys$output 'CHRwhere'def7_uni.h !7 bit approximations $ makeuctb def7_uni.tbl -$ define/user sys$output 'CHRwhere'iso03_uni.h +$ define/user sys$output 'CHRwhere'iso02_uni.h !ISO Latin 2 +$ makeuctb iso02_uni.tbl +$ define/user sys$output 'CHRwhere'cp852_uni.h !DosLatin2 (cp852) +$ makeuctb cp852_uni.tbl +$ define/user sys$output 'CHRwhere'cp1250_uni.h !WinLatin2 (cp1250) +$ makeuctb cp1250_uni.tbl +$ define/user sys$output 'CHRwhere'iso03_uni.h !ISO Latin 3 $ makeuctb iso03_uni.tbl -$ define/user sys$output 'CHRwhere'iso04_uni.h +$ define/user sys$output 'CHRwhere'iso04_uni.h !ISO Latin 4 $ makeuctb iso04_uni.tbl -$ define/user sys$output 'CHRwhere'iso05_uni.h +$ define/user sys$output 'CHRwhere'iso05_uni.h !ISO Latin 5 Cyrillic $ makeuctb iso05_uni.tbl -$ define/user sys$output 'CHRwhere'iso06_uni.h +$ define/user sys$output 'CHRwhere'cp866_uni.h !DosCyrillic (cp866) +$ makeuctb cp866_uni.tbl +$ define/user sys$output 'CHRwhere'cp1251_uni.h !WinCyrillic (cp1251) +$ makeuctb cp1251_uni.tbl +$ define/user sys$output 'CHRwhere'koi8r_uni.h !KOI8-R Cyrillic +$ makeuctb koi8r_uni.tbl +$ define/user sys$output 'CHRwhere'iso06_uni.h !ISO 8859-6 Arabic $ makeuctb iso06_uni.tbl -$ define/user sys$output 'CHRwhere'iso07_uni.h +$ define/user sys$output 'CHRwhere'cp864_uni.h !DosArabic (cp864) +$ makeuctb cp864_uni.tbl +$ define/user sys$output 'CHRwhere'cp1256_uni.h !WinArabic (cp1256) +$ makeuctb cp1256_uni.tbl +$ define/user sys$output 'CHRwhere'iso07_uni.h !ISO 8859-7 Greek $ makeuctb iso07_uni.tbl -$ define/user sys$output 'CHRwhere'iso08_uni.h +$ define/user sys$output 'CHRwhere'cp737_uni.h !DosGreek (cp737) +$ makeuctb cp737_uni.tbl +$ define/user sys$output 'CHRwhere'cp869_uni.h !DosGreek2 (cp869) +$ makeuctb cp869_uni.tbl +$ define/user sys$output 'CHRwhere'cp1253_uni.h !WinGreek (cp1253) +$ makeuctb cp1253_uni.tbl +$ define/user sys$output 'CHRwhere'iso08_uni.h !ISO 8859-8 Hebrew $ makeuctb iso08_uni.tbl -$ define/user sys$output 'CHRwhere'iso09_uni.h +$ define/user sys$output 'CHRwhere'cp862_uni.h !DosHebrew (cp862) +$ makeuctb cp862_uni.tbl +$ define/user sys$output 'CHRwhere'cp1255_uni.h !WinHebrew (cp1255) +$ makeuctb cp1255_uni.tbl +$ define/user sys$output 'CHRwhere'iso09_uni.h !ISO 8859-9 (Latin 5) $ makeuctb iso09_uni.tbl -$ define/user sys$output 'CHRwhere'iso10_uni.h +$ define/user sys$output 'CHRwhere'iso10_uni.h !ISO 8859-10 $ makeuctb iso10_uni.tbl -$ define/user sys$output 'CHRwhere'koi8r_uni.h -$ makeuctb koi8r_uni.tbl -$ define/user sys$output 'CHRwhere'cp437_uni.h -$ makeuctb cp437_uni.tbl -$ define/user sys$output 'CHRwhere'cp850_uni.h -$ makeuctb cp850_uni.tbl -$ define/user sys$output 'CHRwhere'cp852_uni.h -$ makeuctb cp852_uni.tbl -$ define/user sys$output 'CHRwhere'cp866_uni.h -$ makeuctb cp866_uni.tbl -$ define/user sys$output 'CHRwhere'cp1250_uni.h -$ makeuctb cp1250_uni.tbl -$ define/user sys$output 'CHRwhere'cp1251_uni.h -$ makeuctb cp1251_uni.tbl -$ define/user sys$output 'CHRwhere'cp1252_uni.h -$ makeuctb cp1252_uni.tbl -$ define/user sys$output 'CHRwhere'viscii_uni.h -$ makeuctb viscii_uni.tbl -$ define/user sys$output 'CHRwhere'utf8_uni.h +$ define/user sys$output 'CHRwhere'utf8_uni.h !UNICODE UTF 8 $ makeuctb utf8_uni.tbl -$ define/user sys$output 'CHRwhere'mnemonic_suni.h +$ define/user sys$output 'CHRwhere'rfc_suni.h !RFC 1345 w/o Intro +$ makeuctb rfc_suni.tbl +$ define/user sys$output 'CHRwhere'mnemonic_suni.h !RFC 1345 Mnemonic $ makeuctb mnemonic_suni.tbl -$ define/user sys$output 'CHRwhere'mnem_suni.h +$ define/user sys$output 'CHRwhere'mnem_suni.h !(not used) $ makeuctb mnem_suni.tbl -$ define/user sys$output 'CHRwhere'rfc_suni.h -$ makeuctb rfc_suni.tbl $ v1 = 'f$verify(0)' $ exit $! diff --git a/src/chrtrans/cp1250_uni.tbl b/src/chrtrans/cp1250_uni.tbl index 75416b67..6f148398 100644 --- a/src/chrtrans/cp1250_uni.tbl +++ b/src/chrtrans/cp1250_uni.tbl @@ -2,7 +2,7 @@ Mwindows-1250 #Name as a Display Charset (used on Options screen) -O MS Windows CP 1250 +OptionName WinLatin2 (cp1250) # # Name: cp1250_WinLatin2 to Unicode table diff --git a/src/chrtrans/cp1252_uni.tbl b/src/chrtrans/cp1252_uni.tbl index e9660295..22fa585f 100644 --- a/src/chrtrans/cp1252_uni.tbl +++ b/src/chrtrans/cp1252_uni.tbl @@ -1,14 +1,12 @@ #Shall this become the "default" translation? -#Meaning of that is currently unclear... It's different -#from the default input or defualt output charset... -#but there has to be exactly one table marked as "default". +#There has to be exactly one table marked as "default". D0 # #The MIME name of this charset. Miso-8859-1-windows-3.1-latin-1 #Name as a Display Charset (used on Options screen) -O MS Windows CP 1252 +O WinLatin1 (cp1252) # # Name: cp1252_WinLatin1 to Unicode table diff --git a/src/chrtrans/cp1253_uni.tbl b/src/chrtrans/cp1253_uni.tbl new file mode 100644 index 00000000..176ba7e6 --- /dev/null +++ b/src/chrtrans/cp1253_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +MIMEname windows-1253 + +#Name as a Display Charset (used on Options screen) +OWinGreek (cp1253) + +# Name: cp1253_WinGreek to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp1253_WinGreek code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp1253_WinGreek order +# +################## + +0x20-0x7f idem +# +#0x80 #UNDEFINED +#0x81 #UNDEFINED +0x82 U+201A #SINGLE LOW-9 QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK +0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +#0x88 #UNDEFINED +0x89 U+2030 #PER MILLE SIGN +#0x8A #UNDEFINED +0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +#0x8C #UNDEFINED +#0x8D #UNDEFINED +#0x8E #UNDEFINED +#0x8F #UNDEFINED +#0x90 #UNDEFINED +0x91 U+2018 #LEFT SINGLE QUOTATION MARK +0x92 U+2019 #RIGHT SINGLE QUOTATION MARK +0x93 U+201C #LEFT DOUBLE QUOTATION MARK +0x94 U+201D #RIGHT DOUBLE QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +#0x98 #UNDEFINED +0x99 U+2122 #TRADE MARK SIGN +#0x9A #UNDEFINED +0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +#0x9C #UNDEFINED +#0x9D #UNDEFINED +#0x9E #UNDEFINED +#0x9F #UNDEFINED +0xA0 U+00A0 #NO-BREAK SPACE +0xA1 U+0385 #GREEK DIALYTIKA TONOS +0xA2 U+0386 #GREEK CAPITAL LETTER ALPHA WITH TONOS +0xA3 U+00A3 #POUND SIGN +0xA4 U+00A4 #CURRENCY SIGN +0xA5 U+00A5 #YEN SIGN +0xA6 U+00A6 #BROKEN BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+00A8 #DIAERESIS +0xA9 U+00A9 #COPYRIGHT SIGN +#0xAA #UNDEFINED +0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC U+00AC #NOT SIGN +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED SIGN +0xAF U+2015 #HORIZONTAL BAR +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT TWO +0xB3 U+00B3 #SUPERSCRIPT THREE +0xB4 U+0384 #GREEK TONOS +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PILCROW SIGN +0xB7 U+00B7 #MIDDLE DOT +0xB8 U+0388 #GREEK CAPITAL LETTER EPSILON WITH TONOS +0xB9 U+0389 #GREEK CAPITAL LETTER ETA WITH TONOS +0xBA U+038A #GREEK CAPITAL LETTER IOTA WITH TONOS +0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC U+038C #GREEK CAPITAL LETTER OMICRON WITH TONOS +0xBD U+00BD #VULGAR FRACTION ONE HALF +0xBE U+038E #GREEK CAPITAL LETTER UPSILON WITH TONOS +0xBF U+038F #GREEK CAPITAL LETTER OMEGA WITH TONOS +0xC0 U+0390 #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0xC1 U+0391 #GREEK CAPITAL LETTER ALPHA +0xC2 U+0392 #GREEK CAPITAL LETTER BETA +0xC3 U+0393 #GREEK CAPITAL LETTER GAMMA +0xC4 U+0394 #GREEK CAPITAL LETTER DELTA +0xC5 U+0395 #GREEK CAPITAL LETTER EPSILON +0xC6 U+0396 #GREEK CAPITAL LETTER ZETA +0xC7 U+0397 #GREEK CAPITAL LETTER ETA +0xC8 U+0398 #GREEK CAPITAL LETTER THETA +0xC9 U+0399 #GREEK CAPITAL LETTER IOTA +0xCA U+039A #GREEK CAPITAL LETTER KAPPA +0xCB U+039B #GREEK CAPITAL LETTER LAMDA +0xCC U+039C #GREEK CAPITAL LETTER MU +0xCD U+039D #GREEK CAPITAL LETTER NU +0xCE U+039E #GREEK CAPITAL LETTER XI +0xCF U+039F #GREEK CAPITAL LETTER OMICRON +0xD0 U+03A0 #GREEK CAPITAL LETTER PI +0xD1 U+03A1 #GREEK CAPITAL LETTER RHO +#0xD2 #UNDEFINED +0xD3 U+03A3 #GREEK CAPITAL LETTER SIGMA +0xD4 U+03A4 #GREEK CAPITAL LETTER TAU +0xD5 U+03A5 #GREEK CAPITAL LETTER UPSILON +0xD6 U+03A6 #GREEK CAPITAL LETTER PHI +0xD7 U+03A7 #GREEK CAPITAL LETTER CHI +0xD8 U+03A8 #GREEK CAPITAL LETTER PSI +0xD9 U+03A9 #GREEK CAPITAL LETTER OMEGA +0xDA U+03AA #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +0xDB U+03AB #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +0xDC U+03AC #GREEK SMALL LETTER ALPHA WITH TONOS +0xDD U+03AD #GREEK SMALL LETTER EPSILON WITH TONOS +0xDE U+03AE #GREEK SMALL LETTER ETA WITH TONOS +0xDF U+03AF #GREEK SMALL LETTER IOTA WITH TONOS +0xE0 U+03B0 #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +0xE1 U+03B1 #GREEK SMALL LETTER ALPHA +0xE2 U+03B2 #GREEK SMALL LETTER BETA +0xE3 U+03B3 #GREEK SMALL LETTER GAMMA +0xE4 U+03B4 #GREEK SMALL LETTER DELTA +0xE5 U+03B5 #GREEK SMALL LETTER EPSILON +0xE6 U+03B6 #GREEK SMALL LETTER ZETA +0xE7 U+03B7 #GREEK SMALL LETTER ETA +0xE8 U+03B8 #GREEK SMALL LETTER THETA +0xE9 U+03B9 #GREEK SMALL LETTER IOTA +0xEA U+03BA #GREEK SMALL LETTER KAPPA +0xEB U+03BB #GREEK SMALL LETTER LAMDA +0xEC U+03BC #GREEK SMALL LETTER MU +0xED U+03BD #GREEK SMALL LETTER NU +0xEE U+03BE #GREEK SMALL LETTER XI +0xEF U+03BF #GREEK SMALL LETTER OMICRON +0xF0 U+03C0 #GREEK SMALL LETTER PI +0xF1 U+03C1 #GREEK SMALL LETTER RHO +0xF2 U+03C2 #GREEK SMALL LETTER FINAL SIGMA +0xF3 U+03C3 #GREEK SMALL LETTER SIGMA +0xF4 U+03C4 #GREEK SMALL LETTER TAU +0xF5 U+03C5 #GREEK SMALL LETTER UPSILON +0xF6 U+03C6 #GREEK SMALL LETTER PHI +0xF7 U+03C7 #GREEK SMALL LETTER CHI +0xF8 U+03C8 #GREEK SMALL LETTER PSI +0xF9 U+03C9 #GREEK SMALL LETTER OMEGA +0xFA U+03CA #GREEK SMALL LETTER IOTA WITH DIALYTIKA +0xFB U+03CB #GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0xFC U+03CC #GREEK SMALL LETTER OMICRON WITH TONOS +0xFD U+03CD #GREEK SMALL LETTER UPSILON WITH TONOS +0xFE U+03CE #GREEK SMALL LETTER OMEGA WITH TONOS +#0xFF #UNDEFINED + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp1255_uni.tbl b/src/chrtrans/cp1255_uni.tbl new file mode 100644 index 00000000..eb446da8 --- /dev/null +++ b/src/chrtrans/cp1255_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +MIMEname windows-1255 + +#Name as a Display Charset (used on Options screen). +OWinHebrew (cp1255) + +# Name: cp1255_WinHebrew to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp1255_WinHebrew code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp1255_WinHebrew order +# +################## + +0x20-0x7f idem +# +#0x80 #UNDEFINED +#0x81 #UNDEFINED +0x82 U+201A #LOW SINGLE COMMA QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER SCRIPT F +0x84 U+201E #LOW DOUBLE COMMA QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX +0x89 U+2030 #PER MILLE SIGN +#0x8A #UNDEFINED +0x8B U+2039 #LEFT POINTING SINGLE GUILLEMET +#0x8C #UNDEFINED +#0x8D #UNDEFINED +#0x8E #UNDEFINED +#0x8F #UNDEFINED +#0x90 #UNDEFINED +0x91 U+2018 #SINGLE TURNED COMMA QUOTATION MARK +0x92 U+2019 #SINGLE COMMA QUOTATION MARK +0x93 U+201C #DOUBLE TURNED COMMA QUOTATION MARK +0x94 U+201D #DOUBLE COMMA QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +0x98 U+02DC #SPACING TILDE +0x99 U+2122 #TRADEMARK +#0x9A #UNDEFINED +0x9B U+203A #RIGHT POINTING SINGLE GUILLEMET +#0x9C #UNDEFINED +#0x9D #UNDEFINED +#0x9E #UNDEFINED +#0x9F #UNDEFINED +0xA0 U+00A0 #NON-BREAKING SPACE +#0xA1 #UNDEFINED +0xA2 U+00A2 #CENT SIGN +0xA3 U+00A3 #POUND SIGN +0xA4 U+20AA #NEW SHEQEL SIGN +0xA5 U+00A5 #YEN SIGN +0xA6 U+00A6 #BROKEN VERTICAL BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+00A8 #SPACING DIAERESIS +0xA9 U+00A9 #COPYRIGHT SIGN +#0xAA #UNDEFINED +0xAB U+00AB #LEFT POINTING GUILLEMET +0xAC U+00AC #NOT SIGN +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED TRADE MARK SIGN +0xAF U+00AF #SPACING MACRON +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-OR-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT DIGIT TWO +0xB3 U+00B3 #SUPERSCRIPT DIGIT THREE +0xB4 U+00B4 #SPACING ACUTE +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PARAGRAPH SIGN +0xB7 U+00B7 #MIDDLE DOT +#0xB8 #UNDEFINED +0xB9 U+00B9 #SUPERSCRIPT DIGIT ONE +#0xBA #UNDEFINED +0xBB U+00BB #RIGHT POINTING GUILLEMET +0xBC U+00BC #FRACTION ONE QUARTER +0xBD U+00BD #FRACTION ONE HALF +0xBE U+00BE #FRACTION THREE QUARTERS +#0xBF #UNDEFINED +0xC0 U+05B0 #HEBREW POINT SHEVA +0xC1 U+05B1 #HEBREW POINT HATAF SEGOL +0xC2 U+05B2 #HEBREW POINT HATAF PATAH +0xC3 U+05B3 #HEBREW POINT HATAF QAMATS +0xC4 U+05B4 #HEBREW POINT HIRIQ +0xC5 U+05B5 #HEBREW POINT TSERE +0xC6 U+05B6 #HEBREW POINT SEGOL +0xC7 U+05B7 #HEBREW POINT PATAH +0xC8 U+05B8 #HEBREW POINT QAMATS +0xC9 U+05B9 #HEBREW POINT HOLAM +0xCA U+05BA #HEBREW POINT +0xCB U+05BB #HEBREW POINT QUBUTS +0xCC U+05BC #HEBREW POINT DAGESH +0xCD U+05BD #HEBREW POINT METEG +0xCE U+05BE #HEBREW PUNCTUATION MAQAF +0xCF U+05BF #HEBREW POINT RAFE +0xD0 U+05C0 #HEBREW POINT PASEQ +0xD1 U+05C1 #HEBREW POINT SHIN DOT +0xD2 U+05C2 #HEBREW POINT SIN DOT +0xD3 U+05C3 #HEBREW PUNCTUATION SOF PASUQ +0xD4 U+05F0 #HEBREW LETTER DOUBLE VAV +0xD5 U+05F1 #HEBREW LETTER VAV YOD +0xD6 U+05F2 #HEBREW LETTER DOUBLE YOD +#0xD7 #UNDEFINED +#0xD8 #UNDEFINED +#0xD9 #UNDEFINED +#0xDA #UNDEFINED +#0xDB #UNDEFINED +#0xDC #UNDEFINED +#0xDD #UNDEFINED +#0xDE #UNDEFINED +#0xDF #UNDEFINED +0xE0 U+05D0 #HEBREW LETTER ALEF +0xE1 U+05D1 #HEBREW LETTER BET +0xE2 U+05D2 #HEBREW LETTER GIMEL +0xE3 U+05D3 #HEBREW LETTER DALET +0xE4 U+05D4 #HEBREW LETTER HE +0xE5 U+05D5 #HEBREW LETTER VAV +0xE6 U+05D6 #HEBREW LETTER ZAYIN +0xE7 U+05D7 #HEBREW LETTER HET +0xE8 U+05D8 #HEBREW LETTER TET +0xE9 U+05D9 #HEBREW LETTER YOD +0xEA U+05DA #HEBREW LETTER FINAL KAF +0xEB U+05DB #HEBREW LETTER KAF +0xEC U+05DC #HEBREW LETTER LAMED +0xED U+05DD #HEBREW LETTER FINAL MEM +0xEE U+05DE #HEBREW LETTER MEM +0xEF U+05DF #HEBREW LETTER FINAL NUN +0xF0 U+05E0 #HEBREW LETTER NUN +0xF1 U+05E1 #HEBREW LETTER SAMEKH +0xF2 U+05E2 #HEBREW LETTER AYIN +0xF3 U+05E3 #HEBREW LETTER FINAL PE +0xF4 U+05E4 #HEBREW LETTER PE +0xF5 U+05E5 #HEBREW LETTER FINAL TSADI +0xF6 U+05E6 #HEBREW LETTER TSADI +0xF7 U+05E7 #HEBREW LETTER QOF +0xF8 U+05E8 #HEBREW LETTER RESH +0xF9 U+05E9 #HEBREW LETTER SHIN +0xFA U+05EA #HEBREW LETTER TAV +#0xFB #UNDEFINED +#0xFC #UNDEFINED +0xFD U+200E #LEFT-TO-RIGHT MARK +0xFE U+200F #RIGHT-TO-LEFT MARK +#0xFF #UNDEFINED + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp1256_uni.tbl b/src/chrtrans/cp1256_uni.tbl new file mode 100644 index 00000000..900c72c3 --- /dev/null +++ b/src/chrtrans/cp1256_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +MIMEname windows-1256 + +#Name as a Display Charset (used on Options screen). +OWinArabic (cp1256) + +# Name: cp1256_WinArabic to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp1256_WinArabic code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp1256_WinArabic order +# +################## + +0x20-0x7f idem +# +#0x80 #UNDEFINED +0x81 U+067E #ARABIC TAA WITH THREE DOTS BELOW +0x82 U+201A #LOW SINGLE COMMA QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER SCRIPT F +0x84 U+201E #LOW DOUBLE COMMA QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX +0x89 U+2030 #PER MILLE SIGN +#0x8A #UNDEFINED +0x8B U+2039 #LEFT POINTING SINGLE GUILLEMET +0x8C U+0152 #LATIN CAPITAL LETTER O E +0x8D U+0686 #ARABIC HAA WITH MIDDLE THREE DOTS DOWNWARD +0x8E U+0698 #ARABIC RA WITH THREE DOTS ABOVE +#0x8F #UNDEFINED +0x90 U+06AF #ARABIC GAF +0x91 U+2018 #SINGLE TURNED COMMA QUOTATION MARK +0x92 U+2019 #SINGLE COMMA QUOTATION MARK +0x93 U+201C #DOUBLE TURNED COMMA QUOTATION MARK +0x94 U+201D #DOUBLE COMMA QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +#0x98 #UNDEFINED +0x99 U+2122 #TRADEMARK +#0x9A #UNDEFINED +0x9B U+203A #RIGHT POINTING SINGLE GUILLEMET +0x9C U+0153 #LATIN SMALL LETTER O E +0x9D U+200C #ZERO WIDTH NON-JOINER +0x9E U+200D #ZERO WIDTH JOINER +#0x9F #UNDEFINED +0xA0 U+00A0 #NON-BREAKING SPACE +0xA1 U+060C #ARABIC COMMA +0xA2 U+00A2 #CENT SIGN +0xA3 U+00A3 #POUND SIGN +0xA4 U+00A4 #CURRENCY SIGN +0xA5 U+00A5 #YEN SIGN +0xA6 U+00A6 #BROKEN VERTICAL BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+00A8 #SPACING DIAERESIS +0xA9 U+00A9 #COPYRIGHT SIGN +#0xAA #UNDEFINED +0xAB U+00AB #LEFT POINTING GUILLEMET +0xAC U+00AC #NOT SIGN +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED TRADE MARK SIGN +0xAF U+00AF #SPACING MACRON +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-OR-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT DIGIT TWO +0xB3 U+00B3 #SUPERSCRIPT DIGIT THREE +0xB4 U+00B4 #SPACING ACUTE +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PARAGRAPH SIGN +0xB7 U+00B7 #MIDDLE DOT +0xB8 U+00B8 #SPACING CEDILLA +0xB9 U+00B9 #SUPERSCRIPT DIGIT ONE +0xBA U+061B #ARABIC SEMICOLON +0xBB U+00BB #RIGHT POINTING GUILLEMET +0xBC U+00BC #FRACTION ONE QUARTER +0xBD U+00BD #FRACTION ONE HALF +0xBE U+00BE #FRACTION THREE QUARTERS +0xBF U+061F #ARABIC QUESTION MARK +#0xC0 #UNDEFINED +0xC1 U+0621 #ARABIC LETTER HAMZAH +0xC2 U+0622 #ARABIC LETTER MADDAH ON ALEF +0xC3 U+0623 #ARABIC LETTER HAMZAH ON ALEF +0xC4 U+0624 #ARABIC LETTER HAMZAH ON WAW +0xC5 U+0625 #ARABIC LETTER HAMZAH UNDER ALEF +0xC6 U+0626 #ARABIC LETTER HAMZAH ON YA +0xC7 U+0627 #ARABIC LETTER ALEF +0xC8 U+0628 #ARABIC LETTER BAA +0xC9 U+0629 #ARABIC LETTER TAA MARBUTAH +0xCA U+062A #ARABIC LETTER TAA +0xCB U+062B #ARABIC LETTER THAA +0xCC U+062C #ARABIC LETTER JEEM +0xCD U+062D #ARABIC LETTER HAA +0xCE U+062E #ARABIC LETTER KHAA +0xCF U+062F #ARABIC LETTER DAL +0xD0 U+0630 #ARABIC LETTER THAL +0xD1 U+0631 #ARABIC LETTER RA +0xD2 U+0632 #ARABIC LETTER ZAIN +0xD3 U+0633 #ARABIC LETTER SEEN +0xD4 U+0634 #ARABIC LETTER SHEEN +0xD5 U+0635 #ARABIC LETTER SAD +0xD6 U+0636 #ARABIC LETTER DAD +0xD7 U+00D7 #MULTIPLICATION SIGN +0xD8 U+0637 #ARABIC LETTER TAH +0xD9 U+0638 #ARABIC LETTER DHAH +0xDA U+0639 #ARABIC LETTER AIN +0xDB U+063A #ARABIC LETTER GHAIN +0xDC U+0640 #ARABIC TATWEEL +0xDD U+0641 #ARABIC LETTER FA +0xDE U+0642 #ARABIC LETTER QAF +0xDF U+0643 #ARABIC LETTER CAF +0xE0 U+00E0 #LATIN SMALL LETTER A GRAVE +0xE1 U+0644 #ARABIC LETTER LAM +0xE2 U+00E2 #LATIN SMALL LETTER A CIRCUMFLEX +0xE3 U+0645 #ARABIC LETTER MEEM +0xE4 U+0646 #ARABIC LETTER NOON +0xE5 U+0647 #ARABIC LETTER HA +0xE6 U+0648 #ARABIC LETTER WAW +0xE7 U+00E7 #LATIN SMALL LETTER C CEDILLA +0xE8 U+00E8 #LATIN SMALL LETTER E GRAVE +0xE9 U+00E9 #LATIN SMALL LETTER E ACUTE +0xEA U+00EA #LATIN SMALL LETTER E CIRCUMFLEX +0xEB U+00EB #LATIN SMALL LETTER E DIAERESIS +0xEC U+0649 #ARABIC LETTER ALEF MAQSURAH +0xED U+064A #ARABIC LETTER YA +0xEE U+00EE #LATIN SMALL LETTER I CIRCUMFLEX +0xEF U+00EF #LATIN SMALL LETTER I DIAERESIS +0xF0 U+064B #ARABIC FATHATAN +0xF1 U+064C #ARABIC DAMMATAN +0xF2 U+064D #ARABIC KASRATAN +0xF3 U+064E #ARABIC FATHAH +0xF4 U+00F4 #LATIN SMALL LETTER O CIRCUMFLEX +0xF5 U+064F #ARABIC DAMMAH +0xF6 U+0650 #ARABIC KASRAH +0xF7 U+00F7 #DIVISION SIGN +0xF8 U+0651 #ARABIC SHADDAH +0xF9 U+00F9 #LATIN SMALL LETTER U GRAVE +0xFA U+0652 #ARABIC SUKUN +0xFB U+00FB #LATIN SMALL LETTER U CIRCUMFLEX +0xFC U+00FC #LATIN SMALL LETTER U DIAERESIS +0xFD U+200E #LEFT-TO-RIGHT MARK +0xFE U+200F #RIGHT-TO-LEFT MARK +#0xFF #UNDEFINED + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp437_uni.tbl b/src/chrtrans/cp437_uni.tbl index 6bfbab22..56c937a5 100644 --- a/src/chrtrans/cp437_uni.tbl +++ b/src/chrtrans/cp437_uni.tbl @@ -18,7 +18,7 @@ OIBM PC character set # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1255_WinHebrew code (in hex) +# Column #1 is the cp437 code (in hex) # Column #2 is the Unicode (in hex as U+XXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # diff --git a/src/chrtrans/cp737_uni.tbl b/src/chrtrans/cp737_uni.tbl new file mode 100644 index 00000000..b1d44adf --- /dev/null +++ b/src/chrtrans/cp737_uni.tbl @@ -0,0 +1,158 @@ +#The MIME name of this charset. +Mcp737 + +#Name as a Display Charset (used on Options screen) +ODosGreek (cp737) + +# +# Name: cp737_DOSGreek to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp737_DOSGreek code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp737_DOSGreek order +# +################## + +0x20-0x7f idem +# +0x80 U+0391 #GREEK CAPITAL LETTER ALPHA +0x81 U+0392 #GREEK CAPITAL LETTER BETA +0x82 U+0393 #GREEK CAPITAL LETTER GAMMA +0x83 U+0394 #GREEK CAPITAL LETTER DELTA +0x84 U+0395 #GREEK CAPITAL LETTER EPSILON +0x85 U+0396 #GREEK CAPITAL LETTER ZETA +0x86 U+0397 #GREEK CAPITAL LETTER ETA +0x87 U+0398 #GREEK CAPITAL LETTER THETA +0x88 U+0399 #GREEK CAPITAL LETTER IOTA +0x89 U+039a #GREEK CAPITAL LETTER KAPPA +0x8a U+039b #GREEK CAPITAL LETTER LAMDA +0x8b U+039c #GREEK CAPITAL LETTER MU +0x8c U+039d #GREEK CAPITAL LETTER NU +0x8d U+039e #GREEK CAPITAL LETTER XI +0x8e U+039f #GREEK CAPITAL LETTER OMICRON +0x8f U+03a0 #GREEK CAPITAL LETTER PI +0x90 U+03a1 #GREEK CAPITAL LETTER RHO +0x91 U+03a3 #GREEK CAPITAL LETTER SIGMA +0x92 U+03a4 #GREEK CAPITAL LETTER TAU +0x93 U+03a5 #GREEK CAPITAL LETTER UPSILON +0x94 U+03a6 #GREEK CAPITAL LETTER PHI +0x95 U+03a7 #GREEK CAPITAL LETTER CHI +0x96 U+03a8 #GREEK CAPITAL LETTER PSI +0x97 U+03a9 #GREEK CAPITAL LETTER OMEGA +0x98 U+03b1 #GREEK SMALL LETTER ALPHA +0x99 U+03b2 #GREEK SMALL LETTER BETA +0x9a U+03b3 #GREEK SMALL LETTER GAMMA +0x9b U+03b4 #GREEK SMALL LETTER DELTA +0x9c U+03b5 #GREEK SMALL LETTER EPSILON +0x9d U+03b6 #GREEK SMALL LETTER ZETA +0x9e U+03b7 #GREEK SMALL LETTER ETA +0x9f U+03b8 #GREEK SMALL LETTER THETA +0xa0 U+03b9 #GREEK SMALL LETTER IOTA +0xa1 U+03ba #GREEK SMALL LETTER KAPPA +0xa2 U+03bb #GREEK SMALL LETTER LAMDA +0xa3 U+03bc #GREEK SMALL LETTER MU +0xa4 U+03bd #GREEK SMALL LETTER NU +0xa5 U+03be #GREEK SMALL LETTER XI +0xa6 U+03bf #GREEK SMALL LETTER OMICRON +0xa7 U+03c0 #GREEK SMALL LETTER PI +0xa8 U+03c1 #GREEK SMALL LETTER RHO +0xa9 U+03c3 #GREEK SMALL LETTER SIGMA +0xaa U+03c2 #GREEK SMALL LETTER FINAL SIGMA +0xab U+03c4 #GREEK SMALL LETTER TAU +0xac U+03c5 #GREEK SMALL LETTER UPSILON +0xad U+03c6 #GREEK SMALL LETTER PHI +0xae U+03c7 #GREEK SMALL LETTER CHI +0xaf U+03c8 #GREEK SMALL LETTER PSI +0xb0 U+2591 #LIGHT SHADE +0xb1 U+2592 #MEDIUM SHADE +0xb2 U+2593 #DARK SHADE +0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 U+2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xb6 U+2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xb7 U+2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xb8 U+2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xb9 U+2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba U+2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb U+2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc U+255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd U+255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xbe U+255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xbf U+2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 U+2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 U+2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 U+252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 U+251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 U+2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 U+253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 U+255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xc7 U+255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xc8 U+255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 U+2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca U+2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb U+2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc U+2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd U+2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce U+256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf U+2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xd0 U+2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xd1 U+2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xd2 U+2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xd3 U+2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xd4 U+2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xd5 U+2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xd6 U+2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xd7 U+256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xd8 U+256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb U+2588 #FULL BLOCK +0xdc U+2584 #LOWER HALF BLOCK +0xdd U+258c #LEFT HALF BLOCK +0xde U+2590 #RIGHT HALF BLOCK +0xdf U+2580 #UPPER HALF BLOCK +0xe0 U+03c9 #GREEK SMALL LETTER OMEGA +0xe1 U+03ac #GREEK SMALL LETTER ALPHA WITH TONOS +0xe2 U+03ad #GREEK SMALL LETTER EPSILON WITH TONOS +0xe3 U+03ae #GREEK SMALL LETTER ETA WITH TONOS +0xe4 U+03ca #GREEK SMALL LETTER IOTA WITH DIALYTIKA +0xe5 U+03af #GREEK SMALL LETTER IOTA WITH TONOS +0xe6 U+03cc #GREEK SMALL LETTER OMICRON WITH TONOS +0xe7 U+03cd #GREEK SMALL LETTER UPSILON WITH TONOS +0xe8 U+03cb #GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0xe9 U+03ce #GREEK SMALL LETTER OMEGA WITH TONOS +0xea U+0386 #GREEK CAPITAL LETTER ALPHA WITH TONOS +0xeb U+0388 #GREEK CAPITAL LETTER EPSILON WITH TONOS +0xec U+0389 #GREEK CAPITAL LETTER ETA WITH TONOS +0xed U+038a #GREEK CAPITAL LETTER IOTA WITH TONOS +0xee U+038c #GREEK CAPITAL LETTER OMICRON WITH TONOS +0xef U+038e #GREEK CAPITAL LETTER UPSILON WITH TONOS +0xf0 U+038f #GREEK CAPITAL LETTER OMEGA WITH TONOS +0xf1 U+00b1 #PLUS-MINUS SIGN +0xf2 U+2265 #GREATER-THAN OR EQUAL TO +0xf3 U+2264 #LESS-THAN OR EQUAL TO +0xf4 U+03aa #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +0xf5 U+03ab #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +0xf6 U+00f7 #DIVISION SIGN +0xf7 U+2248 #ALMOST EQUAL TO +0xf8 U+00b0 #DEGREE SIGN +0xf9 U+2219 #BULLET OPERATOR +0xfa U+00b7 #MIDDLE DOT +0xfb U+221a #SQUARE ROOT +0xfc U+207f #SUPERSCRIPT LATIN SMALL LETTER N +0xfd U+00b2 #SUPERSCRIPT TWO +0xfe U+25a0 #BLACK SQUARE +0xff U+00a0 #NO-BREAK SPACE + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp850_uni.tbl b/src/chrtrans/cp850_uni.tbl index 96de277b..759bf950 100644 --- a/src/chrtrans/cp850_uni.tbl +++ b/src/chrtrans/cp850_uni.tbl @@ -1,7 +1,5 @@ #Shall this become the "default" translation? -#Meaning of that is currently unclear... It's different -#from the default input or defualt output charset... -#but there has to be exactly one table marked as "default". +#There has to be exactly one table marked as "default". D0 # #The MIME name of this charset. diff --git a/src/chrtrans/cp862_uni.tbl b/src/chrtrans/cp862_uni.tbl new file mode 100644 index 00000000..f1a7dd02 --- /dev/null +++ b/src/chrtrans/cp862_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +Mcp862 + +#Name as a Display Charset (used on Options screen). +ODosHebrew (cp862) + +# Name: cp862_DOSHebrew to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp862_DOSHebrew code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp862_DOSHebrew order +# +################## + +0x20-0x7f idem +# +0x80 U+05d0 #HEBREW LETTER ALEF +0x81 U+05d1 #HEBREW LETTER BET +0x82 U+05d2 #HEBREW LETTER GIMEL +0x83 U+05d3 #HEBREW LETTER DALET +0x84 U+05d4 #HEBREW LETTER HE +0x85 U+05d5 #HEBREW LETTER VAV +0x86 U+05d6 #HEBREW LETTER ZAYIN +0x87 U+05d7 #HEBREW LETTER HET +0x88 U+05d8 #HEBREW LETTER TET +0x89 U+05d9 #HEBREW LETTER YOD +0x8a U+05da #HEBREW LETTER FINAL KAF +0x8b U+05db #HEBREW LETTER KAF +0x8c U+05dc #HEBREW LETTER LAMED +0x8d U+05dd #HEBREW LETTER FINAL MEM +0x8e U+05de #HEBREW LETTER MEM +0x8f U+05df #HEBREW LETTER FINAL NUN +0x90 U+05e0 #HEBREW LETTER NUN +0x91 U+05e1 #HEBREW LETTER SAMEKH +0x92 U+05e2 #HEBREW LETTER AYIN +0x93 U+05e3 #HEBREW LETTER FINAL PE +0x94 U+05e4 #HEBREW LETTER PE +0x95 U+05e5 #HEBREW LETTER FINAL TSADI +0x96 U+05e6 #HEBREW LETTER TSADI +0x97 U+05e7 #HEBREW LETTER QOF +0x98 U+05e8 #HEBREW LETTER RESH +0x99 U+05e9 #HEBREW LETTER SHIN +0x9a U+05ea #HEBREW LETTER TAV +0x9b U+00a2 #CENT SIGN +0x9c U+00a3 #POUND SIGN +0x9d U+00a5 #YEN SIGN +0x9e U+20a7 #PESETA SIGN +0x9f U+0192 #LATIN SMALL LETTER F WITH HOOK +0xa0 U+00e1 #LATIN SMALL LETTER A WITH ACUTE +0xa1 U+00ed #LATIN SMALL LETTER I WITH ACUTE +0xa2 U+00f3 #LATIN SMALL LETTER O WITH ACUTE +0xa3 U+00fa #LATIN SMALL LETTER U WITH ACUTE +0xa4 U+00f1 #LATIN SMALL LETTER N WITH TILDE +0xa5 U+00d1 #LATIN CAPITAL LETTER N WITH TILDE +0xa6 U+00aa #FEMININE ORDINAL INDICATOR +0xa7 U+00ba #MASCULINE ORDINAL INDICATOR +0xa8 U+00bf #INVERTED QUESTION MARK +0xa9 U+2310 #REVERSED NOT SIGN +0xaa U+00ac #NOT SIGN +0xab U+00bd #VULGAR FRACTION ONE HALF +0xac U+00bc #VULGAR FRACTION ONE QUARTER +0xad U+00a1 #INVERTED EXCLAMATION MARK +0xae U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xaf U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xb0 U+2591 #LIGHT SHADE +0xb1 U+2592 #MEDIUM SHADE +0xb2 U+2593 #DARK SHADE +0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 U+2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xb6 U+2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xb7 U+2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xb8 U+2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xb9 U+2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba U+2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb U+2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc U+255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd U+255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xbe U+255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xbf U+2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 U+2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 U+2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 U+252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 U+251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 U+2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 U+253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 U+255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xc7 U+255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xc8 U+255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 U+2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca U+2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb U+2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc U+2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd U+2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce U+256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf U+2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xd0 U+2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xd1 U+2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xd2 U+2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xd3 U+2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xd4 U+2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xd5 U+2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xd6 U+2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xd7 U+256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xd8 U+256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb U+2588 #FULL BLOCK +0xdc U+2584 #LOWER HALF BLOCK +0xdd U+258c #LEFT HALF BLOCK +0xde U+2590 #RIGHT HALF BLOCK +0xdf U+2580 #UPPER HALF BLOCK +0xe0 U+03b1 #GREEK SMALL LETTER ALPHA +0xe1 U+00df #LATIN SMALL LETTER SHARP S (GERMAN) +0xe2 U+0393 #GREEK CAPITAL LETTER GAMMA +0xe3 U+03c0 #GREEK SMALL LETTER PI +0xe4 U+03a3 #GREEK CAPITAL LETTER SIGMA +0xe5 U+03c3 #GREEK SMALL LETTER SIGMA +0xe6 U+00b5 #MICRO SIGN +0xe7 U+03c4 #GREEK SMALL LETTER TAU +0xe8 U+03a6 #GREEK CAPITAL LETTER PHI +0xe9 U+0398 #GREEK CAPITAL LETTER THETA +0xea U+03a9 #GREEK CAPITAL LETTER OMEGA +0xeb U+03b4 #GREEK SMALL LETTER DELTA +0xec U+221e #INFINITY +0xed U+03c6 #GREEK SMALL LETTER PHI +0xee U+03b5 #GREEK SMALL LETTER EPSILON +0xef U+2229 #INTERSECTION +0xf0 U+2261 #IDENTICAL TO +0xf1 U+00b1 #PLUS-MINUS SIGN +0xf2 U+2265 #GREATER-THAN OR EQUAL TO +0xf3 U+2264 #LESS-THAN OR EQUAL TO +0xf4 U+2320 #TOP HALF INTEGRAL +0xf5 U+2321 #BOTTOM HALF INTEGRAL +0xf6 U+00f7 #DIVISION SIGN +0xf7 U+2248 #ALMOST EQUAL TO +0xf8 U+00b0 #DEGREE SIGN +0xf9 U+2219 #BULLET OPERATOR +0xfa U+00b7 #MIDDLE DOT +0xfb U+221a #SQUARE ROOT +0xfc U+207f #SUPERSCRIPT LATIN SMALL LETTER N +0xfd U+00b2 #SUPERSCRIPT TWO +0xfe U+25a0 #BLACK SQUARE +0xff U+00a0 #NO-BREAK SPACE + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp864_uni.tbl b/src/chrtrans/cp864_uni.tbl new file mode 100644 index 00000000..14097a6e --- /dev/null +++ b/src/chrtrans/cp864_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +Mcp864 + +#Name as a Display Charset (used on Options screen). +ODosArabic (cp864) + +# Name: cp864_DOSArabic to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp864_DOSArabic code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp864_DOSArabic order +# +################## + +0x20-0x7f idem +# +0x80 U+00b0 #DEGREE SIGN +0x81 U+00b7 #MIDDLE DOT +0x82 U+2219 #BULLET OPERATOR +0x83 U+221a #SQUARE ROOT +0x84 U+2592 #MEDIUM SHADE +0x85 U+2500 #FORMS LIGHT HORIZONTAL +0x86 U+2502 #FORMS LIGHT VERTICAL +0x87 U+253c #FORMS LIGHT VERTICAL AND HORIZONTAL +0x88 U+2524 #FORMS LIGHT VERTICAL AND LEFT +0x89 U+252c #FORMS LIGHT DOWN AND HORIZONTAL +0x8a U+251c #FORMS LIGHT VERTICAL AND RIGHT +0x8b U+2534 #FORMS LIGHT UP AND HORIZONTAL +0x8c U+2510 #FORMS LIGHT DOWN AND LEFT +0x8d U+250c #FORMS LIGHT DOWN AND RIGHT +0x8e U+2514 #FORMS LIGHT UP AND RIGHT +0x8f U+2518 #FORMS LIGHT UP AND LEFT +0x90 U+03b2 #GREEK SMALL BETA +0x91 U+221e #INFINITY +0x92 U+03c6 #GREEK SMALL PHI +0x93 U+00b1 #PLUS-OR-MINUS SIGN +0x94 U+00bd #FRACTION 1/2 +0x95 U+00bc #FRACTION 1/4 +0x96 U+2248 #ALMOST EQUAL TO +0x97 U+00ab #LEFT POINTING GUILLEMET +0x98 U+00bb #RIGHT POINTING GUILLEMET +0x99 U+fef7 #ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM +0x9a U+fef8 #ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM +#0x9b #UNDEFINED +#0x9c #UNDEFINED +0x9d U+fefb #ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM +0x9e U+fefc #ARABIC LIGATURE LAM WITH ALEF FINAL FORM +#0x9f #UNDEFINED +0xa0 U+00a0 #NON-BREAKING SPACE +0xa1 U+00ad #SOFT HYPHEN +0xa2 U+fe82 #ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM +0xa3 U+00a3 #POUND SIGN +0xa4 U+00a4 #CURRENCY SIGN +0xa5 U+fe84 #ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM +#0xa6 #UNDEFINED +#0xa7 #UNDEFINED +0xa8 U+fe8e #ARABIC LETTER ALEF FINAL FORM +0xa9 U+fe8f #ARABIC LETTER BEH ISOLATED FORM +0xaa U+fe95 #ARABIC LETTER TEH ISOLATED FORM +0xab U+fe99 #ARABIC LETTER THEH ISOLATED FORM +0xac U+060c #ARABIC COMMA +0xad U+fe9d #ARABIC LETTER JEEM ISOLATED FORM +0xae U+fea1 #ARABIC LETTER HAH ISOLATED FORM +0xaf U+fea5 #ARABIC LETTER KHAH ISOLATED FORM +0xb0 U+0660 #ARABIC-INDIC DIGIT ZERO +0xb1 U+0661 #ARABIC-INDIC DIGIT ONE +0xb2 U+0662 #ARABIC-INDIC DIGIT TWO +0xb3 U+0663 #ARABIC-INDIC DIGIT THREE +0xb4 U+0664 #ARABIC-INDIC DIGIT FOUR +0xb5 U+0665 #ARABIC-INDIC DIGIT FIVE +0xb6 U+0666 #ARABIC-INDIC DIGIT SIX +0xb7 U+0667 #ARABIC-INDIC DIGIT SEVEN +0xb8 U+0668 #ARABIC-INDIC DIGIT EIGHT +0xb9 U+0669 #ARABIC-INDIC DIGIT NINE +0xba U+fed1 #ARABIC LETTER FEH ISOLATED FORM +0xbb U+061b #ARABIC SEMICOLON +0xbc U+feb1 #ARABIC LETTER SEEN ISOLATED FORM +0xbd U+feb5 #ARABIC LETTER SHEEN ISOLATED FORM +0xbe U+feb9 #ARABIC LETTER SAD ISOLATED FORM +0xbf U+061f #ARABIC QUESTION MARK +0xc0 U+00a2 #CENT SIGN +0xc1 U+fe80 #ARABIC LETTER HAMZA ISOLATED FORM +0xc2 U+fe81 #ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM +0xc3 U+fe83 #ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM +0xc4 U+fe85 #ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM +0xc5 U+feca #ARABIC LETTER AIN FINAL FORM +0xc6 U+fe8b #ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM +0xc7 U+fe8d #ARABIC LETTER ALEF ISOLATED FORM +0xc8 U+fe91 #ARABIC LETTER BEH INITIAL FORM +0xc9 U+fe93 #ARABIC LETTER TEH MARBUTA ISOLATED FORM +0xca U+fe97 #ARABIC LETTER TEH INITIAL FORM +0xcb U+fe9b #ARABIC LETTER THEH INITIAL FORM +0xcc U+fe9f #ARABIC LETTER JEEM INITIAL FORM +0xcd U+fea3 #ARABIC LETTER HAH INITIAL FORM +0xce U+fea7 #ARABIC LETTER KHAH INITIAL FORM +0xcf U+fea9 #ARABIC LETTER DAL ISOLATED FORM +0xd0 U+feab #ARABIC LETTER THAL ISOLATED FORM +0xd1 U+fead #ARABIC LETTER REH ISOLATED FORM +0xd2 U+feaf #ARABIC LETTER ZAIN ISOLATED FORM +0xd3 U+feb3 #ARABIC LETTER SEEN INITIAL FORM +0xd4 U+feb7 #ARABIC LETTER SHEEN INITIAL FORM +0xd5 U+febb #ARABIC LETTER SAD INITIAL FORM +0xd6 U+febf #ARABIC LETTER DAD INITIAL FORM +0xd7 U+fec1 #ARABIC LETTER TAH ISOLATED FORM +0xd8 U+fec5 #ARABIC LETTER ZAH ISOLATED FORM +0xd9 U+fecb #ARABIC LETTER AIN INITIAL FORM +0xda U+fecf #ARABIC LETTER GHAIN INITIAL FORM +0xdb U+00a6 #BROKEN VERTICAL BAR +0xdc U+00ac #NOT SIGN +0xdd U+00f7 #DIVISION SIGN +0xde U+00d7 #MULTIPLICATION SIGN +0xdf U+fec9 #ARABIC LETTER AIN ISOLATED FORM +0xe0 U+0640 #ARABIC TATWEEL +0xe1 U+fed3 #ARABIC LETTER FEH INITIAL FORM +0xe2 U+fed7 #ARABIC LETTER QAF INITIAL FORM +0xe3 U+fedb #ARABIC LETTER KAF INITIAL FORM +0xe4 U+fedf #ARABIC LETTER LAM INITIAL FORM +0xe5 U+fee3 #ARABIC LETTER MEEM INITIAL FORM +0xe6 U+fee7 #ARABIC LETTER NOON INITIAL FORM +0xe7 U+feeb #ARABIC LETTER HEH INITIAL FORM +0xe8 U+feed #ARABIC LETTER WAW ISOLATED FORM +0xe9 U+feef #ARABIC LETTER ALEF MAKSURA ISOLATED FORM +0xea U+fef3 #ARABIC LETTER YEH INITIAL FORM +0xeb U+febd #ARABIC LETTER DAD ISOLATED FORM +0xec U+fecc #ARABIC LETTER AIN MEDIAL FORM +0xed U+fece #ARABIC LETTER GHAIN FINAL FORM +0xee U+fecd #ARABIC LETTER GHAIN ISOLATED FORM +0xef U+fee1 #ARABIC LETTER MEEM ISOLATED FORM +0xf0 U+fe7d #ARABIC SHADDA MEDIAL FORM +0xf1 U+0651 #ARABIC SHADDAH +0xf2 U+fee5 #ARABIC LETTER NOON ISOLATED FORM +0xf3 U+fee9 #ARABIC LETTER HEH ISOLATED FORM +0xf4 U+feec #ARABIC LETTER HEH MEDIAL FORM +0xf5 U+fef0 #ARABIC LETTER ALEF MAKSURA FINAL FORM +0xf6 U+fef2 #ARABIC LETTER YEH FINAL FORM +0xf7 U+fed0 #ARABIC LETTER GHAIN MEDIAL FORM +0xf8 U+fed5 #ARABIC LETTER QAF ISOLATED FORM +0xf9 U+fef5 #ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM +0xfa U+fef6 #ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM +0xfb U+fedd #ARABIC LETTER LAM ISOLATED FORM +0xfc U+fed9 #ARABIC LETTER KAF ISOLATED FORM +0xfd U+fef1 #ARABIC LETTER YEH ISOLATED FORM +0xfe U+25a0 #BLACK SQUARE +#0xff #UNDEFINED + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp866_uni.tbl b/src/chrtrans/cp866_uni.tbl index 2b109897..9de12f9d 100644 --- a/src/chrtrans/cp866_uni.tbl +++ b/src/chrtrans/cp866_uni.tbl @@ -23,7 +23,7 @@ ODosCyrillic (cp866) # 0x20-0x40 idem -# some mapppings of greek capital letters to latin letters added - kw +# Some mapppings of Greek capital letters to Latin letters added. - KW 0x41 U+0041 U+0391 #LATIN CAPITAL LETTER A 0x42 U+0042 U+0392 #LATIN CAPITAL LETTER B 0x43 U+0043 #LATIN CAPITAL LETTER C diff --git a/src/chrtrans/cp869_uni.tbl b/src/chrtrans/cp869_uni.tbl new file mode 100644 index 00000000..412fb8a7 --- /dev/null +++ b/src/chrtrans/cp869_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +Mcp869 + +#Name as a Display Charset (used on Options screen) +ODosGreek2 (cp869) + +# Name: cp869_DOSGreek2 to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp869_DOSGreek2 code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp869_DOSGreek2 order +# +################## + +0x20-0x7f idem +# +#0x80 #UNDEFINED +#0x81 #UNDEFINED +#0x82 #UNDEFINED +#0x83 #UNDEFINED +#0x84 #UNDEFINED +#0x85 #UNDEFINED +0x86 U+0386 #GREEK CAPITAL LETTER ALPHA WITH TONOS +#0x87 #UNDEFINED +0x88 U+00b7 #MIDDLE DOT +0x89 U+00ac #NOT SIGN +0x8a U+00a6 #BROKEN BAR +0x8b U+2018 #LEFT SINGLE QUOTATION MARK +0x8c U+2019 #RIGHT SINGLE QUOTATION MARK +0x8d U+0388 #GREEK CAPITAL LETTER EPSILON WITH TONOS +0x8e U+2015 #HORIZONTAL BAR +0x8f U+0389 #GREEK CAPITAL LETTER ETA WITH TONOS +0x90 U+038a #GREEK CAPITAL LETTER IOTA WITH TONOS +0x91 U+03aa #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +0x92 U+038c #GREEK CAPITAL LETTER OMICRON WITH TONOS +#0x93 #UNDEFINED +#0x94 #UNDEFINED +0x95 U+038e #GREEK CAPITAL LETTER UPSILON WITH TONOS +0x96 U+03ab #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +0x97 U+00a9 #COPYRIGHT SIGN +0x98 U+038f #GREEK CAPITAL LETTER OMEGA WITH TONOS +0x99 U+00b2 #SUPERSCRIPT TWO +0x9a U+00b3 #SUPERSCRIPT THREE +0x9b U+03ac #GREEK SMALL LETTER ALPHA WITH TONOS +0x9c U+00a3 #POUND SIGN +0x9d U+03ad #GREEK SMALL LETTER EPSILON WITH TONOS +0x9e U+03ae #GREEK SMALL LETTER ETA WITH TONOS +0x9f U+03af #GREEK SMALL LETTER IOTA WITH TONOS +0xa0 U+03ca #GREEK SMALL LETTER IOTA WITH DIALYTIKA +0xa1 U+0390 #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0xa2 U+03cc #GREEK SMALL LETTER OMICRON WITH TONOS +0xa3 U+03cd #GREEK SMALL LETTER UPSILON WITH TONOS +0xa4 U+0391 #GREEK CAPITAL LETTER ALPHA +0xa5 U+0392 #GREEK CAPITAL LETTER BETA +0xa6 U+0393 #GREEK CAPITAL LETTER GAMMA +0xa7 U+0394 #GREEK CAPITAL LETTER DELTA +0xa8 U+0395 #GREEK CAPITAL LETTER EPSILON +0xa9 U+0396 #GREEK CAPITAL LETTER ZETA +0xaa U+0397 #GREEK CAPITAL LETTER ETA +0xab U+00bd #VULGAR FRACTION ONE HALF +0xac U+0398 #GREEK CAPITAL LETTER THETA +0xad U+0399 #GREEK CAPITAL LETTER IOTA +0xae U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xaf U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xb0 U+2591 #LIGHT SHADE +0xb1 U+2592 #MEDIUM SHADE +0xb2 U+2593 #DARK SHADE +0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 U+039a #GREEK CAPITAL LETTER KAPPA +0xb6 U+039b #GREEK CAPITAL LETTER LAMDA +0xb7 U+039c #GREEK CAPITAL LETTER MU +0xb8 U+039d #GREEK CAPITAL LETTER NU +0xb9 U+2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba U+2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb U+2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc U+255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd U+039e #GREEK CAPITAL LETTER XI +0xbe U+039f #GREEK CAPITAL LETTER OMICRON +0xbf U+2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 U+2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 U+2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 U+252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 U+251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 U+2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 U+253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 U+03a0 #GREEK CAPITAL LETTER PI +0xc7 U+03a1 #GREEK CAPITAL LETTER RHO +0xc8 U+255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 U+2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca U+2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb U+2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc U+2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd U+2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce U+256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf U+03a3 #GREEK CAPITAL LETTER SIGMA +0xd0 U+03a4 #GREEK CAPITAL LETTER TAU +0xd1 U+03a5 #GREEK CAPITAL LETTER UPSILON +0xd2 U+03a6 #GREEK CAPITAL LETTER PHI +0xd3 U+03a7 #GREEK CAPITAL LETTER CHI +0xd4 U+03a8 #GREEK CAPITAL LETTER PSI +0xd5 U+03a9 #GREEK CAPITAL LETTER OMEGA +0xd6 U+03b1 #GREEK SMALL LETTER ALPHA +0xd7 U+03b2 #GREEK SMALL LETTER BETA +0xd8 U+03b3 #GREEK SMALL LETTER GAMMA +0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb U+2588 #FULL BLOCK +0xdc U+2584 #LOWER HALF BLOCK +0xdd U+03b4 #GREEK SMALL LETTER DELTA +0xde U+03b5 #GREEK SMALL LETTER EPSILON +0xdf U+2580 #UPPER HALF BLOCK +0xe0 U+03b6 #GREEK SMALL LETTER ZETA +0xe1 U+03b7 #GREEK SMALL LETTER ETA +0xe2 U+03b8 #GREEK SMALL LETTER THETA +0xe3 U+03b9 #GREEK SMALL LETTER IOTA +0xe4 U+03ba #GREEK SMALL LETTER KAPPA +0xe5 U+03bb #GREEK SMALL LETTER LAMDA +0xe6 U+03bc #GREEK SMALL LETTER MU +0xe7 U+03bd #GREEK SMALL LETTER NU +0xe8 U+03be #GREEK SMALL LETTER XI +0xe9 U+03bf #GREEK SMALL LETTER OMICRON +0xea U+03c0 #GREEK SMALL LETTER PI +0xeb U+03c1 #GREEK SMALL LETTER RHO +0xec U+03c3 #GREEK SMALL LETTER SIGMA +0xed U+03c2 #GREEK SMALL LETTER FINAL SIGMA +0xee U+03c4 #GREEK SMALL LETTER TAU +0xef U+0384 #GREEK TONOS +0xf0 U+00ad #SOFT HYPHEN +0xf1 U+00b1 #PLUS-MINUS SIGN +0xf2 U+03c5 #GREEK SMALL LETTER UPSILON +0xf3 U+03c6 #GREEK SMALL LETTER PHI +0xf4 U+03c7 #GREEK SMALL LETTER CHI +0xf5 U+00a7 #SECTION SIGN +0xf6 U+03c8 #GREEK SMALL LETTER PSI +0xf7 U+0385 #GREEK DIALYTIKA TONOS +0xf8 U+00b0 #DEGREE SIGN +0xf9 U+00a8 #DIAERESIS +0xfa U+03c9 #GREEK SMALL LETTER OMEGA +0xfb U+03cb #GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0xfc U+03b0 #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +0xfd U+03ce #GREEK SMALL LETTER OMEGA WITH TONOS +0xfe U+25a0 #BLACK SQUARE +0xff U+00a0 #NO-BREAK SPACE + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/def7_uni.tbl b/src/chrtrans/def7_uni.tbl index c4bbef7c..42cca597 100644 --- a/src/chrtrans/def7_uni.tbl +++ b/src/chrtrans/def7_uni.tbl @@ -20,24 +20,24 @@ U+00a3:Pd U+00a4:CUR U+00a5:Ye U+00a6:| -U+00a7:SE +U+00a7:S: U+00a8:" -U+00a9:(C) +U+00a9:(c) U+00aa:-a U+00ab:<< -U+00ac:NO +U+00ac:NOT U+00ad:- U+00ae:(R) U+00af:- -U+00b0:DG +U+00b0:DEG U+00b1:+- U+00b2:^2 U+00b3:^3 U+00b4:' # My -> u -U+00b6:u -U+00b6:PI -U+00b7:.M +U+00b5:u +U+00b6:P: +U+00b7:. U+00b8:, U+00b9:^1 U+00ba:-o @@ -1325,9 +1325,9 @@ U+211e:Rx U+2120:(SM) U+2122:(TM) U+2126:Ohm -U+212E:est. 0x4b U+212A # Kelvin sign - K U+212b:Ang. +U+212E:est. U+2135:Aleph U+2153: 1/3 U+2154: 2/3 diff --git a/src/chrtrans/dmcs_uni.tbl b/src/chrtrans/dmcs_uni.tbl new file mode 100644 index 00000000..676b728d --- /dev/null +++ b/src/chrtrans/dmcs_uni.tbl @@ -0,0 +1,226 @@ +#The MIME name of this charset. +Mdec-mcs + +#Name as a Display Charset (used on Options screen) +ODEC Multinational + +# +# Name: DEC Multinational (dec-mcs) [to unicode] +# Date: 29 October 1997 +# Author: Fote +# +################## + +#0x20 U+0020 # SPACE +#0x21 U+0021 # EXCLAMATION MARK +#0x22 U+0022 # QUOTATION MARK +#0x23 U+0023 # NUMBER SIGN +#0x24 U+0024 # DOLLAR SIGN +#0x25 U+0025 # PERCENT SIGN +#0x26 U+0026 # AMPERSAND +#0x27 U+0027 # APOSTROPHE +#0x28 U+0028 # LEFT PARENTHESIS +#0x29 U+0029 # RIGHT PARENTHESIS +#0x2A U+002A # ASTERISK +#0x2B U+002B # PLUS SIGN +#0x2C U+002C # COMMA +#0x2D U+002D # HYPHEN-MINUS +#0x2E U+002E # FULL STOP +#0x2F U+002F # SOLIDUS +#0x30 U+0030 # DIGIT ZERO +#0x31 U+0031 # DIGIT ONE +#0x32 U+0032 # DIGIT TWO +#0x33 U+0033 # DIGIT THREE +#0x34 U+0034 # DIGIT FOUR +#0x35 U+0035 # DIGIT FIVE +#0x36 U+0036 # DIGIT SIX +#0x37 U+0037 # DIGIT SEVEN +#0x38 U+0038 # DIGIT EIGHT +#0x39 U+0039 # DIGIT NINE +#0x3A U+003A # COLON +#0x3B U+003B # SEMICOLON +#0x3C U+003C # LESS-THAN SIGN +#0x3D U+003D # EQUALS SIGN +#0x3E U+003E # GREATER-THAN SIGN +#0x3F U+003F # QUESTION MARK +#0x40 U+0040 # COMMERCIAL AT +#0x41 U+0041 # LATIN CAPITAL LETTER A +#0x42 U+0042 # LATIN CAPITAL LETTER B +#0x43 U+0043 # LATIN CAPITAL LETTER C +#0x44 U+0044 # LATIN CAPITAL LETTER D +#0x45 U+0045 # LATIN CAPITAL LETTER E +#0x46 U+0046 # LATIN CAPITAL LETTER F +#0x47 U+0047 # LATIN CAPITAL LETTER G +#0x48 U+0048 # LATIN CAPITAL LETTER H +#0x49 U+0049 # LATIN CAPITAL LETTER I +#0x4A U+004A # LATIN CAPITAL LETTER J +#0x4B U+004B # LATIN CAPITAL LETTER K +#0x4C U+004C # LATIN CAPITAL LETTER L +#0x4D U+004D # LATIN CAPITAL LETTER M +#0x4E U+004E # LATIN CAPITAL LETTER N +#0x4F U+004F # LATIN CAPITAL LETTER O +#0x50 U+0050 # LATIN CAPITAL LETTER P +#0x51 U+0051 # LATIN CAPITAL LETTER Q +#0x52 U+0052 # LATIN CAPITAL LETTER R +#0x53 U+0053 # LATIN CAPITAL LETTER S +#0x54 U+0054 # LATIN CAPITAL LETTER T +#0x55 U+0055 # LATIN CAPITAL LETTER U +#0x56 U+0056 # LATIN CAPITAL LETTER V +#0x57 U+0057 # LATIN CAPITAL LETTER W +#0x58 U+0058 # LATIN CAPITAL LETTER X +#0x59 U+0059 # LATIN CAPITAL LETTER Y +#0x5A U+005A # LATIN CAPITAL LETTER Z +#0x5B U+005B # LEFT SQUARE BRACKET +#0x5C U+005C # REVERSE SOLIDUS +#0x5D U+005D # RIGHT SQUARE BRACKET +#0x5E U+005E # CIRCUMFLEX ACCENT +#0x5F U+005F # LOW LINE +#0x60 U+0060 # GRAVE ACCENT +#0x61 U+0061 # LATIN SMALL LETTER A +#0x62 U+0062 # LATIN SMALL LETTER B +#0x63 U+0063 # LATIN SMALL LETTER C +#0x64 U+0064 # LATIN SMALL LETTER D +#0x65 U+0065 # LATIN SMALL LETTER E +#0x66 U+0066 # LATIN SMALL LETTER F +#0x67 U+0067 # LATIN SMALL LETTER G +#0x68 U+0068 # LATIN SMALL LETTER H +#0x69 U+0069 # LATIN SMALL LETTER I +#0x6A U+006A # LATIN SMALL LETTER J +#0x6B U+006B # LATIN SMALL LETTER K +#0x6C U+006C # LATIN SMALL LETTER L +#0x6D U+006D # LATIN SMALL LETTER M +#0x6E U+006E # LATIN SMALL LETTER N +#0x6F U+006F # LATIN SMALL LETTER O +#0x70 U+0070 # LATIN SMALL LETTER P +#0x71 U+0071 # LATIN SMALL LETTER Q +#0x72 U+0072 # LATIN SMALL LETTER R +#0x73 U+0073 # LATIN SMALL LETTER S +#0x74 U+0074 # LATIN SMALL LETTER T +#0x75 U+0075 # LATIN SMALL LETTER U +#0x76 U+0076 # LATIN SMALL LETTER V +#0x77 U+0077 # LATIN SMALL LETTER W +#0x78 U+0078 # LATIN SMALL LETTER X +#0x79 U+0079 # LATIN SMALL LETTER Y +#0x7A U+007A # LATIN SMALL LETTER Z +#0x7B U+007B # LEFT CURLY BRACKET +#0x7C U+007C # VERTICAL LINE +#0x7D U+007D # RIGHT CURLY BRACKET +#0x7E U+007E # TILDE +# +0x20-0x7f idem +# +0xA1 U+00A1 # inverted exclamation mark (¡) - iexcl +0xA2 U+00A2 # cent sign (¢) - cent +0xA3 U+00A3 # pound sign (£) - pound +# currency sign (¤) - curren +U+00A4:CUR +0xA5 U+00A5 # yen sign (¥) - yen +# broken vertical bar (¦) - brvbar, brkbar +U+00A6:| +0xA7 U+00A7 # section sign (§) - sect +0xA8 U+00A8 # spacing diaresis (¨) - uml, die +0xA9 U+00A9 # copyright sign (©) - copy +0xAA U+00AA # feminine ordinal indicator (ª) - ordf +0xAB U+00AB # angle quotation mark, left («) - laquo +# negation sign (¬); - not +U+00AC:NOT +# soft hyphen (­) - shy +#U+00AD +# circled R registered sign (®) - reg +U+00AE:(R) +# spacing macron (¯) - hibar, macr +U+00AF:- +0xB0 U+00B0 # degree sign (°) - deg +0xB1 U+00B1 # plus-or-minus sign (±) - plusmn +0xB2 U+00B2 # superscript 2 (²) - sup2 +0xB3 U+00B3 # superscript 3 (³) - sup3 +#spacing acute (´) - acute +U+00B4:' +0xB5 U+00B5 # micro sign (µ) - micro +0xB6 U+00B6 # paragraph sign (¶) - para +0xB7 U+00B7 # middle dot (·) - middot +# spacing cedilla (¸) - cedil +U+00B8:, +0xB9 U+00B9 # superscript 1 (¹) - sup1 +0xBA U+00BA # masculine ordinal indicator (º) - ordm +0xBB U+00BB # angle quotation mark, right (») - raquo +0xBC U+00BC # fraction 1/4 (¼) - frac14 +0xBD U+00BD # fraction 1/2 (½) - frac12 +# fraction 3/4 (¾) - frac34 +U+00BE: 3/4 +0xBF U+00BF # inverted question mark (¿) - iquest +0xC0 U+00C0 # capital A, grave accent (À) - Agrave +0xC1 U+00C1 # capital A, acute accent (Á) - Aacute +0xC2 U+00C2 # capital A, circumflex accent (Â) - Acirc +0xC3 U+00C3 # capital A, tilde (Ã) - Atilde +0xC4 U+00C4 # capital A, dieresis or umlaut mark (Ä) - Auml +0xC5 U+00C5 # capital A, ring (Å) - Aring +0xC6 U+00C6 # capital AE diphthong (ligature) (Æ) - AElig +0xC7 U+00C7 # capital C, cedilla (Ç) - Ccedil +0xC8 U+00C8 # capital E, grave accent (È) - Egrave +0xC9 U+00C9 # capital E, acute accent (É) - Eacute +0xCA U+00CA # capital E, circumflex accent (Ê) - Ecirc +0xCB U+00CB # capital E, dieresis or umlaut mark (Ë) - Euml +0xCC U+00CC # capital I, grave accent (Ì) - Igrave +0xCD U+00CD # capital I, acute accent (Í) - Iacute +0xCE U+00CE # capital I, circumflex accent (Î) - Icirc +0xCF U+00CF # capital I, dieresis or umlaut mark (Ï) - Iuml +# capital Eth, Icelandic (Ð) - ETH */ +U+00D0:DH +# Dj # capital D with stroke - Dstrok +0xD1 U+00D1 # capital N, tilde (Ñ) - Ntilde +0xD2 U+00D2 # capital O, grave accent (Ò) - Ograve +0xD3 U+00D3 # capital O, acute accent (Ó) - Oacute +0xD4 U+00D4 # capital O, circumflex accent (Ô) - Ocirc +0xD5 U+00D5 # capital O, tilde (Õ) - Otilde +0xD6 U+00D6 # capital O, dieresis or umlaut mark (Ö) - Ouml +# multiplication sign (×) - times +U+00D7:* +0xD8 U+00D8 # capital O, slash (Ø) - Oslash +0xD9 U+00D9 # capital U, grave accent (Ù) - Ugrave +0xDA U+00DA # capital U, acute accent (Ú) - Uacute +0xDB U+00DB # capital U, circumflex accent (Û) - Ucirc +0xDC U+00DC # capital U, dieresis or umlaut mark (Ü) - Uuml +0xDD U+00DD # capital Y, acute accent (Ý) - Yacute +# capital THORN, Icelandic (Þ) - THORN */ +U+00DE:P +0xDF U+00DF # small sharp s, German (sz ligature) (ß) - szlig +0xE0 U+00E0 # small a, grave accent (à) - agrave +0xE1 U+00E1 # small a, acute accent (á) - aacute +0xE2 U+00E2 # small a, circumflex accent (â) - acirc +0xE3 U+00E3 # small a, tilde (ã) - atilde +0xE4 U+00E4 # small a, dieresis or umlaut mark (ä) - auml +0xE5 U+00E5 # small a, ring (å) - aring +0xE6 U+00E6 # small ae diphthong (ligature) (æ) - aelig +0xE7 U+00E7 # small c, cedilla (ç) - ccedil +0xE8 U+00E8 # small e, grave accent (è) - egrave +0xE9 U+00E9 # small e, acute accent (é) - eacute +0xEA U+00EA # small e, circumflex accent (ê) - ecirc +0xEB U+00EB # small e, dieresis or umlaut mark (ë) - euml +0xEC U+00EC # small i, grave accent (ì) - igrave +0xED U+00ED # small i, acute accent (í) - iacute +0xEE U+00EE # small i, circumflex accent (î) - icirc +0xEF U+00EF # small i, dieresis or umlaut mark (ï) - iuml +# small eth, Icelandic (ð) - eth +U+00F0:dh +0xF1 U+00F1 # small n, tilde (ñ) - ntilde +0xF2 U+00F2 # small o, grave accent (ò) - ograve +0xF3 U+00F3 # small o, acute accent (ó) - oacute +0xF4 U+00F4 # small o, circumflex accent (ô) - ocirc +0xF5 U+00F5 # small o, tilde (õ) - otilde +0xF6 U+00F6 # small o, dieresis or umlaut mark (ö) - ouml +# division sign (÷) - divide +U+00F7:/ +0xF8 U+00F8 # small o, slash (ø) - oslash +0xF9 U+00F9 # small u, grave accent (ù) - ugrave +0xFA U+00FA # small u, acute accent (ú) - uacute +0xFB U+00FB # small u, circumflex accent (û) - ucirc +0xFC U+00FC # small u, dieresis or umlaut mark (ü) - uuml +0xFD U+00FF # small y, dieresis or umlaut mark (ÿ) - yuml +# small y, acute accent (ý) - yacute +U+00FD:y' +# small thorn, Icelandic (þ) - thorn +U+00FE:p +# +# TRADE MARK SIGN +U+2122:(TM) diff --git a/src/chrtrans/iso01_uni.tbl b/src/chrtrans/iso01_uni.tbl index d2147771..d2701f0e 100644 --- a/src/chrtrans/iso01_uni.tbl +++ b/src/chrtrans/iso01_uni.tbl @@ -75,4 +75,4 @@ U+2122:(TM) 0x27 U+2019-U+201b # various single quotation marks 0x22 U+201c-U+201f # various double quotation marks -U+2297 "(\327)" \ No newline at end of file +U+2297 "(\327)" diff --git a/src/chrtrans/iso01_uni.tbl.orig b/src/chrtrans/iso01_uni.tbl.orig deleted file mode 100644 index 14f71ff3..00000000 --- a/src/chrtrans/iso01_uni.tbl.orig +++ /dev/null @@ -1,78 +0,0 @@ -# -# Unicode mapping table for ISO 8859-1 fonts iso01.* -# [use: unicode_start iso01.f16 iso01] -# -#Shall this become the "default" translation? -#Meaning of that is currently not well defined. It is different -#from the default input or default output charset... -#but there has to be exactly one table marked as "default". -D0 -# -#The MIME name of this charset. -Miso-8859-1 - -#Name as a Display Charset (used on Options screen) -OISO Latin 1 - -0x20 U+0020 U+1360 -0x21-0x62 idem -# The following line is an example for mapping several accented versions -# of small letter 'c' to 'c': -0x63 U+0063 U+0107 U+0109 U+010B U+010D -0x64-0x7e idem -0xa0-0xff idem -#0x00 U+fffd # don't let failed char lookups return '\0' -# Mappings of C0 control chars from original, disabled -#0x01 U+263A -#0x02 U+263B -#0x03 U+2665 -#0x04 U+2666 -#0x05 U+2663 -#0x06 U+2660 -#0x07 U+2022 -#0x08 U+25D8 -#0x09 U+25CB -#0x0A U+25D9 -#0x0B U+2642 -#0x0C U+2640 -#0x0D U+266A -#0x0E U+266B -#0x0E U+266C -#0x0F U+263C -#0x10 U+25B6 -#0x10 U+25BA -#0x11 U+25C0 -#0x11 U+25C4 -#0x12 U+2195 -#0x13 U+203C -#0x14 U+00B6 -#0x15 U+00A7 -#0x16 U+25AC -#0x17 U+21A8 -#0x18 U+2191 -#0x19 U+2193 -#0x1A U+2192 -#0x1B U+2190 -#0x1C U+221F -#0x1C U+2319 -#0x1D U+2194 -#0x1E U+25B2 -#0x1F U+25BC -#0x7f U+2302 - -0xd0 U+0110 # Dstrok and ETH are nearly the same... - -# Dont wanna see these: -# POP DIRECTIONAL FORMATTING 202C -U+202c: -# LEFT-TO-RIGHT OVERRIDE 202D -U+202d: - -# TRADE MARK SIGN: -U+2122:(TM) - -0x60 U+2018 # left single quotation mark -0x27 U+2019-U+201b # various single quotation marks -0x22 U+201c-U+201f # various double quotation marks - -U+2297:(×) \ No newline at end of file diff --git a/src/chrtrans/iso06_uni.tbl b/src/chrtrans/iso06_uni.tbl index 46eb3709..fd3452da 100644 --- a/src/chrtrans/iso06_uni.tbl +++ b/src/chrtrans/iso06_uni.tbl @@ -109,4 +109,5 @@ U+2122:(TM) # Let's try to show a question mark for character that cannot # be shown. U+fffd is used for invalid characters. -U+fffd:? +# It works, but let's stick with UHHH representatiion. - FM +#U+fffd:? diff --git a/src/chrtrans/iso08_uni.tbl b/src/chrtrans/iso08_uni.tbl index d1c33b1d..bc2bb647 100644 --- a/src/chrtrans/iso08_uni.tbl +++ b/src/chrtrans/iso08_uni.tbl @@ -106,4 +106,5 @@ U+2122:(TM) # Let's try to show a question mark for character that cannot # be shown. U+fffd is used for invalid characters. -U+fffd:? +# It works, but let's stick with UHHH representatiion. - FM +#U+fffd:? diff --git a/src/chrtrans/koi8r_uni.tbl b/src/chrtrans/koi8r_uni.tbl index c4946a50..ebe4fe55 100644 --- a/src/chrtrans/koi8r_uni.tbl +++ b/src/chrtrans/koi8r_uni.tbl @@ -1,5 +1,5 @@ # Options screen name for this character set -OKOI8-R character set +OKOI8-R Cyrillic # MIME name for this charset Mkoi8-r diff --git a/src/chrtrans/mac_uni.tbl b/src/chrtrans/mac_uni.tbl new file mode 100644 index 00000000..61c630f3 --- /dev/null +++ b/src/chrtrans/mac_uni.tbl @@ -0,0 +1,342 @@ +#The MIME name of this charset. +Mmacintosh + +#Name as a Display Charset (used on Options screen) +OMacintosh (8 bit) + +# +# Name: MacOS_Roman [to Unicode] +# Unicode versions: 1.1, 2.0 +# Table version: 0.2 (from internal ufrm version <9>) +# Date: 15 April 1995 +# Author: Peter Edberg <edberg1@applelink.apple.com> +# +# Copyright (c) 1995 Apple Computer, Inc. All Rights reserved. +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple makes no warranty or representation, either express or +# implied, with respect to these tables, their quality, accuracy, or +# fitness for a particular purpose. In no event will Apple be liable +# for direct, indirect, special, incidental, or consequential damages +# resulting from any defect or inaccuracy in this document or the +# accompanying tables. +# +# These mapping tables and character lists are preliminary and +# subject to change. Updated tables will be available from the +# Unicode Inc. ftp site (unicode.org), the Apple Computer ftp site +# (ftp.info.apple.com), the Apple Computer World-Wide Web pages +# (http://www.info.apple.com), and possibly on diskette from APDA +# (Apple's mail-order distribution service for developers). +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the MacOS Roman code (in hex as 0xNN) +# Column #2 is the Unicode (in hex as 0xNNNN) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in MacOS Roman code order. +# +# One of these mappings requires the use of a corporate character +# (for the Apple logo character). See the file "MacOS-CorpCharList". +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Notes on MacOS Roman: +# --------------------- +# +# This character set is used for at least the following MacOS +# localizations: U.S., British, Canadian French, French, Swiss +# French, German, Swiss German, Italian, Swiss Italian, Dutch, +# Swedish, Norwegian, Danish, Finnish, Spanish, Catalan, +# Portuguese, Brazilian, and the default International system. +# +# Variants of MacRoman are used for Croatian, Icelandic, +# Turkish, and Romanian. Separate mapping tables are available +# for these encodings. +# +# At least through System 7.5, the bitmap versions of the fonts +# Chicago, New York, Geneva, and Monaco do not implement the +# full Roman character set; they only support MacOS Roman character +# codes up to 0xD8. The TrueType versions of these fonts do +# implement the full character set, as do both the bitmap and +# TrueType versions of the other standard Roman fonts. +# +# In all MacOS encodings, fonts such as Chicago which are used +# as "system" fonts (for menus, dialogs, etc.) have four glyphs +# at code points 0x11-0x14 for transient use by the Menu Manager. +# These glyphs are not intended as characters for use in normal +# text, and the associated code points are not generally +# interpreted as associated with these glyphs; they are usually +# interpreted (if at all) as the control codes DC1-DC4. +# +################## + +#0x20 U+0020 # SPACE +#0x21 U+0021 # EXCLAMATION MARK +#0x22 U+0022 # QUOTATION MARK +#0x23 U+0023 # NUMBER SIGN +#0x24 U+0024 # DOLLAR SIGN +#0x25 U+0025 # PERCENT SIGN +#0x26 U+0026 # AMPERSAND +#0x27 U+0027 # APOSTROPHE +#0x28 U+0028 # LEFT PARENTHESIS +#0x29 U+0029 # RIGHT PARENTHESIS +#0x2A U+002A # ASTERISK +#0x2B U+002B # PLUS SIGN +#0x2C U+002C # COMMA +#0x2D U+002D # HYPHEN-MINUS +#0x2E U+002E # FULL STOP +#0x2F U+002F # SOLIDUS +#0x30 U+0030 # DIGIT ZERO +#0x31 U+0031 # DIGIT ONE +#0x32 U+0032 # DIGIT TWO +#0x33 U+0033 # DIGIT THREE +#0x34 U+0034 # DIGIT FOUR +#0x35 U+0035 # DIGIT FIVE +#0x36 U+0036 # DIGIT SIX +#0x37 U+0037 # DIGIT SEVEN +#0x38 U+0038 # DIGIT EIGHT +#0x39 U+0039 # DIGIT NINE +#0x3A U+003A # COLON +#0x3B U+003B # SEMICOLON +#0x3C U+003C # LESS-THAN SIGN +#0x3D U+003D # EQUALS SIGN +#0x3E U+003E # GREATER-THAN SIGN +#0x3F U+003F # QUESTION MARK +#0x40 U+0040 # COMMERCIAL AT +#0x41 U+0041 # LATIN CAPITAL LETTER A +#0x42 U+0042 # LATIN CAPITAL LETTER B +#0x43 U+0043 # LATIN CAPITAL LETTER C +#0x44 U+0044 # LATIN CAPITAL LETTER D +#0x45 U+0045 # LATIN CAPITAL LETTER E +#0x46 U+0046 # LATIN CAPITAL LETTER F +#0x47 U+0047 # LATIN CAPITAL LETTER G +#0x48 U+0048 # LATIN CAPITAL LETTER H +#0x49 U+0049 # LATIN CAPITAL LETTER I +#0x4A U+004A # LATIN CAPITAL LETTER J +#0x4B U+004B # LATIN CAPITAL LETTER K +#0x4C U+004C # LATIN CAPITAL LETTER L +#0x4D U+004D # LATIN CAPITAL LETTER M +#0x4E U+004E # LATIN CAPITAL LETTER N +#0x4F U+004F # LATIN CAPITAL LETTER O +#0x50 U+0050 # LATIN CAPITAL LETTER P +#0x51 U+0051 # LATIN CAPITAL LETTER Q +#0x52 U+0052 # LATIN CAPITAL LETTER R +#0x53 U+0053 # LATIN CAPITAL LETTER S +#0x54 U+0054 # LATIN CAPITAL LETTER T +#0x55 U+0055 # LATIN CAPITAL LETTER U +#0x56 U+0056 # LATIN CAPITAL LETTER V +#0x57 U+0057 # LATIN CAPITAL LETTER W +#0x58 U+0058 # LATIN CAPITAL LETTER X +#0x59 U+0059 # LATIN CAPITAL LETTER Y +#0x5A U+005A # LATIN CAPITAL LETTER Z +#0x5B U+005B # LEFT SQUARE BRACKET +#0x5C U+005C # REVERSE SOLIDUS +#0x5D U+005D # RIGHT SQUARE BRACKET +#0x5E U+005E # CIRCUMFLEX ACCENT +#0x5F U+005F # LOW LINE +#0x60 U+0060 # GRAVE ACCENT +#0x61 U+0061 # LATIN SMALL LETTER A +#0x62 U+0062 # LATIN SMALL LETTER B +#0x63 U+0063 # LATIN SMALL LETTER C +#0x64 U+0064 # LATIN SMALL LETTER D +#0x65 U+0065 # LATIN SMALL LETTER E +#0x66 U+0066 # LATIN SMALL LETTER F +#0x67 U+0067 # LATIN SMALL LETTER G +#0x68 U+0068 # LATIN SMALL LETTER H +#0x69 U+0069 # LATIN SMALL LETTER I +#0x6A U+006A # LATIN SMALL LETTER J +#0x6B U+006B # LATIN SMALL LETTER K +#0x6C U+006C # LATIN SMALL LETTER L +#0x6D U+006D # LATIN SMALL LETTER M +#0x6E U+006E # LATIN SMALL LETTER N +#0x6F U+006F # LATIN SMALL LETTER O +#0x70 U+0070 # LATIN SMALL LETTER P +#0x71 U+0071 # LATIN SMALL LETTER Q +#0x72 U+0072 # LATIN SMALL LETTER R +#0x73 U+0073 # LATIN SMALL LETTER S +#0x74 U+0074 # LATIN SMALL LETTER T +#0x75 U+0075 # LATIN SMALL LETTER U +#0x76 U+0076 # LATIN SMALL LETTER V +#0x77 U+0077 # LATIN SMALL LETTER W +#0x78 U+0078 # LATIN SMALL LETTER X +#0x79 U+0079 # LATIN SMALL LETTER Y +#0x7A U+007A # LATIN SMALL LETTER Z +#0x7B U+007B # LEFT CURLY BRACKET +#0x7C U+007C # VERTICAL LINE +#0x7D U+007D # RIGHT CURLY BRACKET +#0x7E U+007E # TILDE +# +0x20-0x7f idem +# +0x80 U+00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0x81 U+00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0x82 U+00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0x83 U+00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0x84 U+00D1 # LATIN CAPITAL LETTER N WITH TILDE +0x85 U+00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0x86 U+00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0x87 U+00E1 # LATIN SMALL LETTER A WITH ACUTE +0x88 U+00E0 # LATIN SMALL LETTER A WITH GRAVE +0x89 U+00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0x8A U+00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0x8B U+00E3 # LATIN SMALL LETTER A WITH TILDE +0x8C U+00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0x8D U+00E7 # LATIN SMALL LETTER C WITH CEDILLA +0x8E U+00E9 # LATIN SMALL LETTER E WITH ACUTE +0x8F U+00E8 # LATIN SMALL LETTER E WITH GRAVE +0x90 U+00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0x91 U+00EB # LATIN SMALL LETTER E WITH DIAERESIS +0x92 U+00ED # LATIN SMALL LETTER I WITH ACUTE +0x93 U+00EC # LATIN SMALL LETTER I WITH GRAVE +0x94 U+00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0x95 U+00EF # LATIN SMALL LETTER I WITH DIAERESIS +0x96 U+00F1 # LATIN SMALL LETTER N WITH TILDE +0x97 U+00F3 # LATIN SMALL LETTER O WITH ACUTE +0x98 U+00F2 # LATIN SMALL LETTER O WITH GRAVE +0x99 U+00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0x9A U+00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0x9B U+00F5 # LATIN SMALL LETTER O WITH TILDE +0x9C U+00FA # LATIN SMALL LETTER U WITH ACUTE +0x9D U+00F9 # LATIN SMALL LETTER U WITH GRAVE +0x9E U+00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0x9F U+00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xA0 U+2020 # DAGGER +0xA1 U+00B0 # DEGREE SIGN +0xA2 U+00A2 # CENT SIGN +0xA3 U+00A3 # POUND SIGN +0xA4 U+00A7 # SECTION SIGN +0xA5 U+2022 # BULLET +0xA6 U+00B6 # PILCROW SIGN +0xA7 U+00DF # LATIN SMALL LETTER SHARP S +0xA8 U+00AE # REGISTERED SIGN +0xA9 U+00A9 # COPYRIGHT SIGN +0xAA U+2122 # TRADE MARK SIGN +0xAB U+00B4 # ACUTE ACCENT +0xAC U+00A8 # DIAERESIS +0xAD U+2260 # NOT EQUAL TO +0xAE U+00C6 # LATIN CAPITAL LIGATURE AE +0xAF U+00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xB0 U+221E # INFINITY +0xB1 U+00B1 # PLUS-MINUS SIGN +0xB2 U+2264 # LESS-THAN OR EQUAL TO +0xB3 U+2265 # GREATER-THAN OR EQUAL TO +0xB4 U+00A5 # YEN SIGN +0xB5 U+00B5 # MICRO SIGN +0xB6 U+2202 # PARTIAL DIFFERENTIAL +0xB7 U+2211 # N-ARY SUMMATION +0xB8 U+220F # N-ARY PRODUCT +0xB9 U+03C0 # GREEK SMALL LETTER PI +0xBA U+222B # INTEGRAL +0xBB U+00AA # FEMININE ORDINAL INDICATOR +0xBC U+00BA # MASCULINE ORDINAL INDICATOR +0xBD U+2126 # OHM SIGN +0xBE U+00E6 # LATIN SMALL LIGATURE AE +0xBF U+00F8 # LATIN SMALL LETTER O WITH STROKE +0xC0 U+00BF # INVERTED QUESTION MARK +0xC1 U+00A1 # INVERTED EXCLAMATION MARK +0xC2 U+00AC # NOT SIGN +0xC3 U+221A # SQUARE ROOT +0xC4 U+0192 # LATIN SMALL LETTER F WITH HOOK +0xC5 U+2248 # ALMOST EQUAL TO +0xC6 U+2206 # INCREMENT +0xC7 U+00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xC8 U+00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xC9 U+2026 # HORIZONTAL ELLIPSIS +0xCA U+00A0 # NO-BREAK SPACE +0xCB U+00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xCC U+00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xCD U+00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xCE U+0152 # LATIN CAPITAL LIGATURE OE +0xCF U+0153 # LATIN SMALL LIGATURE OE +0xD0 U+2013 # EN DASH +0xD1 U+2014 # EM DASH +0xD2 U+201C # LEFT DOUBLE QUOTATION MARK +0xD3 U+201D # RIGHT DOUBLE QUOTATION MARK +0xD4 U+2018 # LEFT SINGLE QUOTATION MARK +0xD5 U+2019 # RIGHT SINGLE QUOTATION MARK +0xD6 U+00F7 # DIVISION SIGN +0xD7 U+25CA # LOZENGE +0xD8 U+00FF # LATIN SMALL LETTER Y WITH DIAERESIS +0xD9 U+0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xDA U+2044 # FRACTION SLASH +0xDB U+00A4 # CURRENCY SIGN +0xDC U+2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0xDD U+203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0xDE U+FB01 # LATIN SMALL LIGATURE FI +0xDF U+FB02 # LATIN SMALL LIGATURE FL +0xE0 U+2021 # DOUBLE DAGGER +0xE1 U+00B7 # MIDDLE DOT +0xE2 U+201A # SINGLE LOW-9 QUOTATION MARK +0xE3 U+201E # DOUBLE LOW-9 QUOTATION MARK +0xE4 U+2030 # PER MILLE SIGN +0xE5 U+00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xE6 U+00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xE7 U+00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xE8 U+00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xE9 U+00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xEA U+00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xEB U+00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xEC U+00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xED U+00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xEE U+00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xEF U+00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xF0 U+F8FF # Apple logo +0xF1 U+00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xF2 U+00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xF3 U+00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xF4 U+00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xF5 U+0131 # LATIN SMALL LETTER DOTLESS I +0xF6 U+02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT +0xF7 U+02DC # SMALL TILDE +0xF8 U+00AF # MACRON +0xF9 U+02D8 # BREVE +0xFA U+02D9 # DOT ABOVE +0xFB U+02DA # RING ABOVE +0xFC U+00B8 # CEDILLA +0xFD U+02DD # DOUBLE ACUTE ACCENT +0xFE U+02DB # OGONEK +0xFF U+02C7 # CARON +# +# broken vertical bar (¦) - brvbar, brkbar +U+00A6:| +# superscript 3 (³) - sup3 +U+00B3:^3 +# superscript 2 (²) - sup2 +U+00B2:^2 +# superscript 1 (¹) - sup1 +U+00B9:^1 +# fraction 1/4 (¼) - frac14 +U+00BC: 1/4 +# fraction 1/2 (½) - frac12 +U+00BD: 1/2 +# fraction 3/4 (¾) - frac34 +U+00BE: 3/4 +# capital Eth, Icelandic (Ð) - ETH +U+00D0:DH +# Dj # capital D with stroke - Dstrok +# capital Y, acute accent (Ý) - Yacute +U+00DD:Y' +# capital THORN, Icelandic (Þ) - THORN +U+00DE:P +# multiplication sign (×) - times +U+00D7:* +# small eth, Icelandic (ð) - eth +U+00F0:dh +# small y, acute accent (ý) - yacute +U+00FD:y' +# small thorn, Icelandic (þ) - thorn +U+00FE:p +# +# TRADE MARK SIGN +U+2122:(TM) diff --git a/src/chrtrans/makefile.in b/src/chrtrans/makefile.in index 0122ecf1..cbb21618 100644 --- a/src/chrtrans/makefile.in +++ b/src/chrtrans/makefile.in @@ -9,6 +9,7 @@ SHELL = /bin/sh prefix = @prefix@ exec_prefix = @exec_prefix@ +top_srcdir = @top_srcdir@ srcdir = @srcdir@ VPATH = $(srcdir) @@ -18,7 +19,11 @@ SITE_DEFS = # FIXME: set in parent makefile CC = @CC@ CPP = @CPP@ CFLAGS = @CFLAGS@ -CPP_OPTS = @DEFS@ @CPPFLAGS@ -I.. -I../.. -I../../$(WWWINC) $(SITE_DEFS) +CPP_OPTS = @DEFS@ @CPPFLAGS@ \ + -I../.. \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/$(WWWINC) \ + $(SITE_DEFS) CC_OPTS = $(CPP_OPTS) $(CFLAGS) # @@ -33,9 +38,28 @@ FONTMAP_INC = iso01_uni.h# default, if not set by recursive call CHRTR= -TABLES= $(CHRTR)iso02_uni.h \ - $(CHRTR)iso01_uni.h \ +TABLES= $(CHRTR)iso01_uni.h \ $(CHRTR)def7_uni.h \ + $(CHRTR)iso02_uni.h \ + $(CHRTR)cp1252_uni.h \ + $(CHRTR)dmcs_uni.h \ + $(CHRTR)mac_uni.h \ + $(CHRTR)next_uni.h \ + $(CHRTR)koi8r_uni.h \ + $(CHRTR)viscii_uni.h \ + $(CHRTR)cp437_uni.h \ + $(CHRTR)cp850_uni.h \ + $(CHRTR)cp852_uni.h \ + $(CHRTR)cp866_uni.h \ + $(CHRTR)cp737_uni.h \ + $(CHRTR)cp869_uni.h \ + $(CHRTR)cp864_uni.h \ + $(CHRTR)cp862_uni.h \ + $(CHRTR)cp1250_uni.h \ + $(CHRTR)cp1251_uni.h \ + $(CHRTR)cp1253_uni.h \ + $(CHRTR)cp1255_uni.h \ + $(CHRTR)cp1256_uni.h \ $(CHRTR)iso03_uni.h \ $(CHRTR)iso04_uni.h \ $(CHRTR)iso05_uni.h \ @@ -44,15 +68,6 @@ TABLES= $(CHRTR)iso02_uni.h \ $(CHRTR)iso08_uni.h \ $(CHRTR)iso09_uni.h \ $(CHRTR)iso10_uni.h \ - $(CHRTR)koi8r_uni.h \ - $(CHRTR)cp437_uni.h \ - $(CHRTR)cp850_uni.h \ - $(CHRTR)cp852_uni.h \ - $(CHRTR)cp866_uni.h \ - $(CHRTR)cp1250_uni.h \ - $(CHRTR)cp1251_uni.h \ - $(CHRTR)cp1252_uni.h \ - $(CHRTR)viscii_uni.h \ $(CHRTR)utf8_uni.h \ $(CHRTR)rfc_suni.h \ $(CHRTR)mnemonic_suni.h \ @@ -62,8 +77,10 @@ default: $(FONTMAP_INC) tables: $(TABLES) -makeuctb: makeuctb.c UCkd.h - $(CC) $(CC_OPTS) -o makeuctb makeuctb.c +makeuctb: makeuctb.o + $(CC) $(CC_OPTS) -o makeuctb makeuctb.o + +makeuctb.o: $(srcdir)/UCkd.h $(srcdir)/makeuctb.c .SUFFIXES : .tbl .i @@ -76,34 +93,44 @@ makeuctb: makeuctb.c UCkd.h @ECHO_CC@$(CPP) -C $(CPP_OPTS) $*.c >$@ .tbl.h: - ./makeuctb $*.tbl > $@ + ./makeuctb $(srcdir)/$*.tbl > $@ # table files listed here once again to get the make dependencies # right, in case makeuctb was recompiled. -iso01_uni.h: iso01_uni.tbl makeuctb -iso02_uni.h: iso02_uni.tbl makeuctb -def7_uni.h: def7_uni.tbl makeuctb -iso03_uni.h: iso03_uni.tbl makeuctb -iso04_uni.h: iso04_uni.tbl makeuctb -iso05_uni.h: iso05_uni.tbl makeuctb -iso06_uni.h: iso06_uni.tbl makeuctb -iso07_uni.h: iso07_uni.tbl makeuctb -iso08_uni.h: iso08_uni.tbl makeuctb -iso09_uni.h: iso09_uni.tbl makeuctb -iso10_uni.h: iso10_uni.tbl makeuctb -koi8r_uni.h: koi8r_uni.tbl makeuctb -cp437_uni.h: cp437_uni.tbl makeuctb -cp850_uni.h: cp850_uni.tbl makeuctb -cp852_uni.h: cp852_uni.tbl makeuctb -cp852_uni.h: cp866_uni.tbl makeuctb -cp1250_uni.h: cp1250_uni.tbl makeuctb -cp1251_uni.h: cp1251_uni.tbl makeuctb -cp1252_uni.h: cp1252_uni.tbl makeuctb -viscii_uni.h: viscii_uni.tbl makeuctb -utf8_uni.h: utf8_uni.tbl makeuctb -mnemonic_suni.h: mnemonic_suni.tbl makeuctb -mnem_suni.h: mnem_suni.tbl makeuctb -rfc_suni.h: rfc_suni.tbl makeuctb +def7_uni.h: $(srcdir)/def7_uni.tbl makeuctb +iso01_uni.h: $(srcdir)/iso01_uni.tbl makeuctb +iso02_uni.h: $(srcdir)/iso02_uni.tbl makeuctb +cp1252_uni.h: $(srcdir)/cp1252_uni.tbl makeuctb +dmcs_uni.h: $(srcdir)/dmcs_uni.tbl makeuctb +mac_uni.h: $(srcdir)/mac_uni.tbl makeuctb +next_uni.h: $(srcdir)/next_uni.tbl makeuctb +koi8r_uni.h: $(srcdir)/koi8r_uni.tbl makeuctb +viscii_uni.h: $(srcdir)/viscii_uni.tbl makeuctb +cp437_uni.h: $(srcdir)/cp437_uni.tbl makeuctb +cp850_uni.h: $(srcdir)/cp850_uni.tbl makeuctb +cp852_uni.h: $(srcdir)/cp852_uni.tbl makeuctb +cp866_uni.h: $(srcdir)/cp866_uni.tbl makeuctb +cp737_uni.h: $(srcdir)/cp737_uni.tbl makeuctb +cp869_uni.h: $(srcdir)/cp869_uni.tbl makeuctb +cp864_uni.h: $(srcdir)/cp864_uni.tbl makeuctb +cp862_uni.h: $(srcdir)/cp862_uni.tbl makeuctb +cp1250_uni.h: $(srcdir)/cp1250_uni.tbl makeuctb +cp1251_uni.h: $(srcdir)/cp1251_uni.tbl makeuctb +cp1253_uni.h: $(srcdir)/cp1253_uni.tbl makeuctb +cp1255_uni.h: $(srcdir)/cp1255_uni.tbl makeuctb +cp1256_uni.h: $(srcdir)/cp1256_uni.tbl makeuctb +iso03_uni.h: $(srcdir)/iso03_uni.tbl makeuctb +iso04_uni.h: $(srcdir)/iso04_uni.tbl makeuctb +iso05_uni.h: $(srcdir)/iso05_uni.tbl makeuctb +iso06_uni.h: $(srcdir)/iso06_uni.tbl makeuctb +iso07_uni.h: $(srcdir)/iso07_uni.tbl makeuctb +iso08_uni.h: $(srcdir)/iso08_uni.tbl makeuctb +iso09_uni.h: $(srcdir)/iso09_uni.tbl makeuctb +iso10_uni.h: $(srcdir)/iso10_uni.tbl makeuctb +utf8_uni.h: $(srcdir)/utf8_uni.tbl makeuctb +mnemonic_suni.h: $(srcdir)/mnemonic_suni.tbl makeuctb +mnem_suni.h: $(srcdir)/mnem_suni.tbl makeuctb +rfc_suni.h: $(srcdir)/rfc_suni.tbl makeuctb clean: rm -f makeuctb *.o *uni.h diff --git a/src/chrtrans/makeuctb.c b/src/chrtrans/makeuctb.c index ad95c534..f1417cb7 100644 --- a/src/chrtrans/makeuctb.c +++ b/src/chrtrans/makeuctb.c @@ -256,7 +256,8 @@ PUBLIC int main ARGS2( if ((p = strchr(buffer, '\n')) != NULL) { *p = '\0'; } else { - fprintf(stderr, "%s: Warning: line too long or incomplete\n", + fprintf(stderr, + "%s: Warning: line too long or incomplete.\n", tblname); } @@ -276,7 +277,7 @@ PUBLIC int main ARGS2( * and <unicode> ::= U+<h><h><h><h> * and <h> ::= <hexadecimal digit> * and <replace> any string not containing '\n' or '\0' - * and <C replace> any string with C backslash escapes + * and <C replace> any string with C backslash escapes. */ p = buffer; while (*p == ' ' || *p == '\t') { @@ -416,19 +417,20 @@ PUBLIC int main ARGS2( continue; } - tbuf = (char *) malloc (4*strlen(p)); + tbuf = (char *)malloc(4*strlen(p)); + if (!(p1 = tbuf)) { fprintf(stderr, "%s: Out of memory\n", tblname); exit(EX_DATAERR); } if (*p == '"') { /* - * handle "<C replace>" + * Handle "<C replace>". * Copy chars verbatim until first '"' not \-escaped or - * end of buffer + * end of buffer. */ int escaped = 0; - for (ch = *++p; (ch = *p) != '\0'; p++) { + for (ch = *(++p); (ch = *p) != '\0'; p++) { if (escaped) { escaped = 0; } else if (ch == '"') { @@ -444,12 +446,17 @@ PUBLIC int main ARGS2( if (escaped) *p1++ = '\n'; } - } else { /* we had ':' */ - for (ch = *++p; (ch = *p) != '\0'; p++, p1++) { + } else { + /* + * We had ':'. + */ + for (ch = *(++p); (ch = *p) != '\0'; p++, p1++) { if ((unsigned char)ch < 32 || ch == '\\' || ch == '\"' || (unsigned char)ch >= 127) { sprintf(p1, "\\%.3o", (unsigned char)ch); -/* fprintf(stderr, "%s\n", tbuf); */ +#ifdef NOTDEFINED + fprintf(stderr, "%s\n", tbuf); +#endif /* NOTDEFINED */ p1 += 3; } else { *p1 = ch; @@ -458,16 +465,19 @@ PUBLIC int main ARGS2( } *p1 = '\0'; for (i = un0; i <= un1; i++) { -/* printf("U+0x%x:%s\n", i, tbuf); */ +#ifdef NOTDEFINED + printf("U+0x%x:%s\n", i, tbuf); */ +#endif /* NOTDEFINED */ addpair_str(tbuf,i); } continue; } -/* Input line (after skipping spaces) doesn't start with one - of the specially recognized characters, so try to interpret - it as starting with a fontpos. -*/ + /* + * Input line (after skipping spaces) doesn't start with one + * of the specially recognized characters, so try to interpret + * it as starting with a fontpos. + */ fp0 = strtol(p, &p1, 0); if (p1 == p) { fprintf(stderr, "Bad input line: %s\n", buffer); diff --git a/src/chrtrans/next_uni.tbl b/src/chrtrans/next_uni.tbl new file mode 100644 index 00000000..95dbff8b --- /dev/null +++ b/src/chrtrans/next_uni.tbl @@ -0,0 +1,182 @@ +#The MIME name of this charset. +MIMEname x-next + +#Name as a Display Charset (used on Options screen) +ONeXT character set + +# Name: NextStep Encoding to Unicode +# Unicode version: 1.1 +# Table version: 0.1 +# Table format: Format A +# Date: 14 February 1995 +# Authors: Rick McGowan (rick@unicode.org) +# +# Copyright (c) 1991-1995 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on magnetic media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Recipient is granted the right to make copies in any form for +# internal distribution and to freely use the information supplied +# in the creation of products supporting Unicode. Unicode, Inc. +# specifically excludes the right to re-distribute this file directly +# to third parties or other organizations whether for profit or not. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# NextStep Encoding characters map into Unicode. Since the first +# 128 characters (0x0 - 0x7f) are identical to ASCII and Unicode, +# this table only maps the NextStep range from 0x80 - 0xFF. +# +# Format: Three tab-separated columns +# Column #1 is the NextStep code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 NextStep name, Unicode name (follows a comment sign, '#') +# +# The entries are in NextStep order +# +# Any comments or problems, contact rick@unicode.org +# +# +0x20-0x7f idem +# +0x80 U+00a0 # NO-BREAK SPACE +0x81 U+00c0 # LATIN CAPITAL LETTER A WITH GRAVE +0x82 U+00c1 # LATIN CAPITAL LETTER A WITH ACUTE +0x83 U+00c2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0x84 U+00c3 # LATIN CAPITAL LETTER A WITH TILDE +0x85 U+00c4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0x86 U+00c5 # LATIN CAPITAL LETTER A WITH RING +0x87 U+00c7 # LATIN CAPITAL LETTER C WITH CEDILLA +0x88 U+00c8 # LATIN CAPITAL LETTER E WITH GRAVE +0x89 U+00c9 # LATIN CAPITAL LETTER E WITH ACUTE +0x8a U+00ca # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0x8b U+00cb # LATIN CAPITAL LETTER E WITH DIAERESIS +0x8c U+00cc # LATIN CAPITAL LETTER I WITH GRAVE +0x8d U+00cd # LATIN CAPITAL LETTER I WITH ACUTE +0x8e U+00ce # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0x8f U+00cf # LATIN CAPITAL LETTER I WITH DIAERESIS +0x90 U+00d0 # LATIN CAPITAL LETTER ETH +0x91 U+00d1 # LATIN CAPITAL LETTER N WITH TILDE +0x92 U+00d2 # LATIN CAPITAL LETTER O WITH GRAVE +0x93 U+00d3 # LATIN CAPITAL LETTER O WITH ACUTE +0x94 U+00d4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0x95 U+00d5 # LATIN CAPITAL LETTER O WITH TILDE +0x96 U+00d6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0x97 U+00d9 # LATIN CAPITAL LETTER U WITH GRAVE +0x98 U+00da # LATIN CAPITAL LETTER U WITH ACUTE +0x99 U+00db # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0x9a U+00dc # LATIN CAPITAL LETTER U WITH DIAERESIS +0x9b U+00dd # LATIN CAPITAL LETTER Y WITH ACUTE +0x9c U+00de # LATIN CAPITAL LETTER THORN +0x9d U+00b5 # MICRO SIGN +0x9e U+00d7 # MULTIPLICATION SIGN +0x9f U+00f7 # DIVISION SIGN +0xa0 U+00a9 # COPYRIGHT SIGN +0xa1 U+00a1 # INVERTED EXCLAMATION MARK +0xa2 U+00a2 # CENT SIGN +0xa3 U+00a3 # POUND SIGN +0xa4 U+2044 # FRACTION SLASH +0xa5 U+00a5 # YEN SIGN +0xa6 U+0192 # LATIN SMALL LETTER F WITH HOOK +0xa7 U+00a7 # SECTION SIGN +0xa8 U+00a4 # CURRENCY SIGN +0xa9 U+2019 # RIGHT SINGLE QUOTATION MARK +0xaa U+201c # LEFT DOUBLE QUOTATION MARK +0xab U+00ab # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xac U+2039 # LATIN SMALL LETTER +0xad U+203a # LATIN SMALL LETTER +0xae U+fb01 # LATIN SMALL LIGATURE FI +0xaf U+fb02 # LATIN SMALL LIGATURE FL +0xb0 U+00ae # REGISTERED SIGN +0xb1 U+2013 # EN DASH +0xb2 U+2020 # DAGGER +0xb3 U+2021 # DOUBLE DAGGER +0xb4 U+00b7 # MIDDLE DOT +0xb5 U+00a6 # BROKEN BAR +0xb6 U+00b6 # PILCROW SIGN +0xb7 U+2022 # BULLET +0xb8 U+201a # SINGLE LOW-9 QUOTATION MARK +0xb9 U+201e # DOUBLE LOW-9 QUOTATION MARK +0xba U+201d # RIGHT DOUBLE QUOTATION MARK +0xbb U+00bb # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xbc U+2026 # HORIZONTAL ELLIPSIS +0xbd U+2030 # PER MILLE SIGN +0xbe U+00ac # NOT SIGN +0xbf U+00bf # INVERTED QUESTION MARK +0xc0 U+00b9 # SUPERSCRIPT ONE +0xc1 U+02cb # MODIFIER LETTER GRAVE ACCENT +0xc2 U+00b4 # ACUTE ACCENT +0xc3 U+02c6 # MODIFIER LETTER CIRCUMFLEX ACCENT +0xc4 U+02dc # SMALL TILDE +0xc5 U+00af # MACRON +0xc6 U+02d8 # BREVE +0xc7 U+02d9 # DOT ABOVE +0xc8 U+00a8 # DIAERESIS +0xc9 U+00b2 # SUPERSCRIPT TWO +0xca U+02da # RING ABOVE +0xcb U+00b8 # CEDILLA +0xcc U+00b3 # SUPERSCRIPT THREE +0xcd U+02dd # DOUBLE ACUTE ACCENT +0xce U+02db # OGONEK +0xcf U+02c7 # CARON +0xd0 U+2014 # EM DASH +0xd1 U+00b1 # PLUS-MINUS SIGN +0xd2 U+00bc # VULGAR FRACTION ONE QUARTER +0xd3 U+00bd # VULGAR FRACTION ONE HALF +0xd4 U+00be # VULGAR FRACTION THREE QUARTERS +0xd5 U+00e0 # LATIN SMALL LETTER A WITH GRAVE +0xd6 U+00e1 # LATIN SMALL LETTER A WITH ACUTE +0xd7 U+00e2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xd8 U+00e3 # LATIN SMALL LETTER A WITH TILDE +0xd9 U+00e4 # LATIN SMALL LETTER A WITH DIAERESIS +0xda U+00e5 # LATIN SMALL LETTER A WITH RING ABOVE +0xdb U+00e7 # LATIN SMALL LETTER C WITH CEDILLA +0xdc U+00e8 # LATIN SMALL LETTER E WITH GRAVE +0xdd U+00e9 # LATIN SMALL LETTER E WITH ACUTE +0xde U+00ea # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xdf U+00eb # LATIN SMALL LETTER E WITH DIAERESIS +0xe0 U+00ec # LATIN SMALL LETTER I WITH GRAVE +0xe1 U+00c6 # LATIN CAPITAL LETTER AE +0xe2 U+00ed # LATIN SMALL LETTER I WITH ACUTE +0xe3 U+00aa # FEMININE ORDINAL INDICATOR +0xe4 U+00ee # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xe5 U+00ef # LATIN SMALL LETTER I WITH DIAERESIS +0xe6 U+00f0 # LATIN SMALL LETTER ETH +0xe7 U+00f1 # LATIN SMALL LETTER N WITH TILDE +0xe8 U+0141 # LATIN CAPITAL LETTER L WITH STROKE +0xe9 U+00d8 # LATIN CAPITAL LETTER O WITH STROKE +0xea U+0152 # LATIN CAPITAL LIGATURE OE +0xeb U+00ba # MASCULINE ORDINAL INDICATOR +0xec U+00f2 # LATIN SMALL LETTER O WITH GRAVE +0xed U+00f3 # LATIN SMALL LETTER O WITH ACUTE +0xee U+00f4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xef U+00f5 # LATIN SMALL LETTER O WITH TILDE +0xf0 U+00f6 # LATIN SMALL LETTER O WITH DIAERESIS +0xf1 U+00e6 # LATIN SMALL LETTER AE +0xf2 U+00f9 # LATIN SMALL LETTER U WITH GRAVE +0xf3 U+00fa # LATIN SMALL LETTER U WITH ACUTE +0xf4 U+00fb # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xf5 U+0131 # LATIN SMALL LETTER DOTLESS I +0xf6 U+00fc # LATIN SMALL LETTER U WITH DIAERESIS +0xf7 U+00fd # LATIN SMALL LETTER Y WITH ACUTE +0xf8 U+0142 # LATIN SMALL LETTER L WITH STROKE +0xf9 U+00f8 # LATIN SMALL LETTER O WITH STROKE +0xfa U+0153 # LATIN SMALL LIGATURE OE +0xfb U+00df # LATIN SMALL LETTER SHARP S +0xfc U+00fe # LATIN SMALL LETTER THORN +0xfd U+00ff # LATIN SMALL LETTER Y WITH DIAERESIS +#0xfe U+fffd # .notdef, REPLACEMENT CHARACTER +#0xff U+fffd # .notdef, REPLACEMENT CHARACTER +# +# degree sign (°) - deg +U+00B0:DEG +# +# TRADE MARK SIGN +U+2122:(TM) diff --git a/src/chrtrans/utf8_uni.tbl b/src/chrtrans/utf8_uni.tbl index 4cc4df97..61cdb259 100644 --- a/src/chrtrans/utf8_uni.tbl +++ b/src/chrtrans/utf8_uni.tbl @@ -2,17 +2,11 @@ # This one is not really much of a "translation table", it mostly just # tells Lynx that "unicode-1-1-utf-8" is Unicode/UCS2 encoded in UTF8... # -#Shall this become the "default" translation? -#Meaning of that is currently unclear... It's different -#from the default input or defualt output charset... -#but there has to be exactly one table marked as "default". -D0 -# #The MIME name of this charset. -Municode-1-1-utf-8 +MIMEname unicode-1-1-utf-8 #Name as a Display Charset (used on Options screen) -O UNICODE UTF 8 +OptionName UNICODE UTF 8 # Some kind of raw Unicode? # Use 6 for for really "raw" 16bit UCS-2, 7 for UTF-8, ... @@ -30,4 +24,11 @@ O UNICODE UTF 8 R 7 +#Shall this become the "default" translation? +#There has to be exactly one table marked as "default". +Default NO + +# Don't fall back to default table for unicode -> 8bit +Fallback NO + 0x20-0x7f idem diff --git a/src/makefile.in b/src/makefile.in index e1f35c23..203e97b2 100644 --- a/src/makefile.in +++ b/src/makefile.in @@ -5,6 +5,7 @@ SHELL = /bin/sh @SET_MAKE@ prefix = @prefix@ exec_prefix = @exec_prefix@ +top_srcdir = @top_srcdir@ srcdir = @srcdir@ VPATH = $(srcdir) @@ -28,7 +29,12 @@ WAISLIB = # FIXME: set in parent makefile WWWINC = WWW/Library/Implementation WWWLIB = ../WWW/Library/unix/libwww.a -CPP_OPTS = $(DEFS) $(CPPFLAGS) -I.. -I../$(WWWINC) $(SITE_DEFS) +CPP_OPTS = $(DEFS) $(CPPFLAGS) \ + -I. \ + -I.. \ + -I$(top_srcdir) \ + -I$(top_srcdir)/$(WWWINC) \ + $(SITE_DEFS) CC_OPTS = $(CPP_OPTS) $(CFLAGS) LINT = @LINT@ @@ -100,9 +106,28 @@ LYCharSets.o: ../userdefs.h CHRTR= chrtrans/ -TABLES= $(CHRTR)iso02_uni.h \ - $(CHRTR)iso01_uni.h \ +TABLES= $(CHRTR)iso01_uni.h \ $(CHRTR)def7_uni.h \ + $(CHRTR)iso02_uni.h \ + $(CHRTR)cp1252_uni.h \ + $(CHRTR)dmcs_uni.h \ + $(CHRTR)mac_uni.h \ + $(CHRTR)next_uni.h \ + $(CHRTR)koi8r_uni.h \ + $(CHRTR)viscii_uni.h \ + $(CHRTR)cp437_uni.h \ + $(CHRTR)cp850_uni.h \ + $(CHRTR)cp852_uni.h \ + $(CHRTR)cp866_uni.h \ + $(CHRTR)cp737_uni.h \ + $(CHRTR)cp869_uni.h \ + $(CHRTR)cp864_uni.h \ + $(CHRTR)cp862_uni.h \ + $(CHRTR)cp1250_uni.h \ + $(CHRTR)cp1251_uni.h \ + $(CHRTR)cp1253_uni.h \ + $(CHRTR)cp1255_uni.h \ + $(CHRTR)cp1256_uni.h \ $(CHRTR)iso03_uni.h \ $(CHRTR)iso04_uni.h \ $(CHRTR)iso05_uni.h \ @@ -111,15 +136,6 @@ TABLES= $(CHRTR)iso02_uni.h \ $(CHRTR)iso08_uni.h \ $(CHRTR)iso09_uni.h \ $(CHRTR)iso10_uni.h \ - $(CHRTR)koi8r_uni.h \ - $(CHRTR)cp437_uni.h \ - $(CHRTR)cp850_uni.h \ - $(CHRTR)cp852_uni.h \ - $(CHRTR)cp866_uni.h \ - $(CHRTR)cp1250_uni.h \ - $(CHRTR)cp1251_uni.h \ - $(CHRTR)cp1252_uni.h \ - $(CHRTR)viscii_uni.h \ $(CHRTR)utf8_uni.h \ $(CHRTR)rfc_suni.h \ $(CHRTR)mnemonic_suni.h \ |