about summary refs log tree commit diff stats
path: root/src/UCdomap.c
diff options
context:
space:
mode:
authorThomas E. Dickey <dickey@invisible-island.net>1998-11-06 16:18:09 -0500
committerThomas E. Dickey <dickey@invisible-island.net>1998-11-06 16:18:09 -0500
commit18024037b515bfff83e0230b35151babe6005e18 (patch)
treeb8c80055282a00883284722a3dd6bb25234e07f0 /src/UCdomap.c
parent3d8ecbe48af249fa0d77ce4d273e32ce4b7e6e18 (diff)
downloadlynx-snapshots-18024037b515bfff83e0230b35151babe6005e18.tar.gz
snapshot of project "lynx", label v2-8-1dev_4
Diffstat (limited to 'src/UCdomap.c')
-rw-r--r--src/UCdomap.c178
1 files changed, 82 insertions, 96 deletions
diff --git a/src/UCdomap.c b/src/UCdomap.c
index 4a579f34..20d78e92 100644
--- a/src/UCdomap.c
+++ b/src/UCdomap.c
@@ -15,6 +15,7 @@
  * aeb, 950210
  */
 #include <HTUtils.h>
+#include <tcp.h>
 #include <HTMLDTD.h>
 
 #include <LYGlobalDefs.h>
@@ -23,7 +24,6 @@
 #include <UCMap.h>
 #include <UCDefs.h>
 #include <LYCharSets.h>
-#include <LYStrings.h>
 
 /*
  *  Include tables & parameters.
@@ -67,6 +67,8 @@
 #include <mnem_suni.h>
 #endif /* NOTDEFINED */
 
+#define FREE(x) if (x) {free(x); x = NULL;}
+
 /*
  *  Some of the code below, and some of the comments, are left in for
  *  historical reasons.  Not all those tables below are currently
@@ -263,7 +265,7 @@ PRIVATE int con_insert_unipair PARAMS((
 	int		fordefault));
 PRIVATE int con_insert_unipair_str PARAMS((
 	u16		unicode,
-	CONST char *	replace_str,
+	char *		replace_str,
 	int		fordefault));
 PRIVATE void con_clear_unimap PARAMS((
 	int		fordefault));
@@ -304,7 +306,7 @@ PRIVATE int UC_FindGN_byMIME PARAMS((
 	CONST char *	UC_MIMEcharset));
 PRIVATE void UCreset_allocated_LYCharSets NOPARAMS;
 PRIVATE void UCfree_allocated_LYCharSets NOPARAMS;
-PRIVATE CONST char ** UC_setup_LYCharSets_repl PARAMS((
+PRIVATE char ** UC_setup_LYCharSets_repl PARAMS((
 	int		UC_charset_in_hndl,
 	unsigned	lowest8));
 PRIVATE int UC_Register_with_LYCharSets PARAMS((
@@ -471,7 +473,8 @@ PRIVATE void UC_con_set_trans ARGS3(
   u16 *ptrans;
 
     if (!UC_valid_UC_charset(UC_charset_in_hndl)) {
-	CTRACE(tfp, "UC_con_set_trans: Invalid charset handle %d.\n",
+	if (TRACE)
+	    fprintf(stderr, "UC_con_set_trans: Invalid charset handle %d.\n",
 		    UC_charset_in_hndl);
 	return;
     }
@@ -600,12 +603,11 @@ PRIVATE int con_insert_unipair ARGS3(
 
 PRIVATE int con_insert_unipair_str ARGS3(
 	u16,		unicode,
-	CONST char *, 	replace_str,
+	char *, 	replace_str,
 	int,		fordefault)
 {
     int i, n;
-    char ***p1;
-    CONST char **p2;
+    char ***p1, **p2;
 
     if(fordefault)
 	p1 = unidefault_pagedir_str[n = unicode >> 11];
@@ -625,18 +627,15 @@ PRIVATE int con_insert_unipair_str ARGS3(
 	}
     }
 
-    n = ((unicode >> 6) & 0x1f);
-    if (!p1[n]) {
-	p1[n] = (char **)malloc(64*sizeof(char *));
-	if (!p1[n])
+    if (!(p2 = p1[n = (unicode >> 6) & 0x1f])) {
+	p2 = p1[n] = (char* *)malloc(64*sizeof(char *));
+	if (!p2)
 	    return -ENOMEM;
 
-	p2 = (CONST char **)p1[n];
 	for (i = 0; i < 64; i++) {
 	    p2[i] = NULL;	/* No replace string this character (yet) */
 	}
     }
-    p2 = (CONST char **)p1[n];
 
     p2[unicode & 0x3f] = replace_str;
 
@@ -783,7 +782,8 @@ PRIVATE int UC_con_set_unimap ARGS2(
     u16 *p;
 
     if (!UC_valid_UC_charset(UC_charset_out_hndl)) {
-	CTRACE(tfp, "UC_con_set_unimap: Invalid charset handle %d.\n",
+	if (TRACE)
+	    fprintf(stderr, "UC_con_set_unimap: Invalid charset handle %d.\n",
 		    UC_charset_out_hndl);
 	return -1;
     }
@@ -896,7 +896,7 @@ PRIVATE int conv_uni_to_pc ARGS2(
 	 *  Not a printable character.
 	 */
 	return -1;
-    } else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f)) {
+    } else if (ucs == 0xfeff || (ucs >= 0x200a && ucs <= 0x200f)) {
 	/*
 	 *  Zero-width space.
 	 */
@@ -961,7 +961,7 @@ PRIVATE int conv_uni_to_str ARGS4(
 	 *  Not a printable character.
 	 */
 	return -1;
-    } else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f)) {
+    } else if (ucs == 0xfeff || (ucs >= 0x200a && ucs <= 0x200f)) {
 	/*
 	 *  Zero-width space.
 	 */
@@ -1190,8 +1190,11 @@ PRIVATE int UC_MapGN ARGS2(
 	UCInfo[UChndl].GN = Gn;
 	UC_GNhandles[Gn] = UChndl;
     }
-    CTRACE(tfp, "UC_MapGN: Using %d <- %d (%s)\n",
+    if (TRACE) {
+	fprintf(stderr,
+		"UC_MapGN: Using %d <- %d (%s)\n",
 		Gn, UChndl, UCInfo[UChndl].MIMEname);
+    }
     UC_con_set_trans(UChndl,Gn,update_flag);
     return Gn;
 }
@@ -1275,7 +1278,7 @@ PUBLIC long int UCTransToUni ARGS2(
 
   ch_iu = (unsigned char)ch_in;
 #ifndef UC_NO_SHORTCUTS
-    if (charset_in == LATIN1)
+    if (charset_in == 0)
 	return ch_iu;
     if ((unsigned char)ch_in < 128 && (unsigned char)ch_in >= 32)
 	return ch_iu;
@@ -1503,34 +1506,27 @@ PUBLIC int UCGetRawUniMode_byLYhndl ARGS1(
 }
 
 /*
- *  Get Lynx internal charset handler from MIME name,
- *  return -1 if we got NULL or did not recognize value.
- *  According to RFC, MIME headers should match case-insensitively.
+ *  Currently the charset name has to match exactly -- not substring
+ *  matching as was done before (see HTMIME.c, HTML.c).
  */
 PUBLIC int UCGetLYhndl_byMIME ARGS1(
-	CONST char *,	value)
+	CONST char *,	UC_MIMEcharset)
 {
   int i;
   int LYhndl = -1;
-  char *UC_MIMEcharset = NULL;
 
-    if (!value || !(*value)) {
-	CTRACE(tfp, "UCGetLYhndl_byMIME: NULL argument instead of MIME name.\n");
+    if (!UC_MIMEcharset || !(*UC_MIMEcharset))
 	return -1;
-    }
-
-    StrAllocCopy(UC_MIMEcharset, value);
-    LYLowerCase(UC_MIMEcharset);
 
     for (i = 0;
 	 (i < MAXCHARSETS && i < LYNumCharsets &&
-	  LYchar_set_names[i]); i++) {
+	  LYchar_set_names[i] && LYhndl < 0); i++) {
 	if (LYCharSet_UC[i].MIMEname &&
 	    !strcmp(UC_MIMEcharset, LYCharSet_UC[i].MIMEname)) {
-	    return i;
+	    LYhndl = i;
 	}
     }
-    {
+    if (LYhndl < 0) {
 	/*
 	 *  Not yet found, try synonyms. - FM
 	 */
@@ -1633,12 +1629,9 @@ PUBLIC int UCGetLYhndl_byMIME ARGS1(
 	}
 	if (!strcmp(UC_MIMEcharset, "koi-8")) { /* accentsoft bugosity */
 	  return UCGetLYhndl_byMIME("koi8-r");
-	}
+  }
     }
-    /* no more synonyms if come here... */
-
-    CTRACE(tfp, "UCGetLYhndl_byMIME: unrecognized MIME name \"%s\"\n", value);
-    return -1;	/* returns -1 if no charset found by that MIME name */
+  return LYhndl;	/* returns -1 if no charset found by that MIME name */
 }
 
 /*
@@ -1661,7 +1654,7 @@ PUBLIC int UCGetLYhndl_byMIME ARGS1(
 /*
  *  We need to remember which ones were allocated and which are static.
  */
-PRIVATE CONST char ** remember_allocated_LYCharSets[MAXCHARSETS];
+PRIVATE char ** remember_allocated_LYCharSets[MAXCHARSETS];
 
 PRIVATE void UCreset_allocated_LYCharSets NOARGS
 {
@@ -1683,17 +1676,17 @@ PRIVATE void UCfree_allocated_LYCharSets NOARGS
     }
 }
 
-PRIVATE CONST char ** UC_setup_LYCharSets_repl ARGS2(
+PRIVATE char ** UC_setup_LYCharSets_repl ARGS2(
 	int,		UC_charset_in_hndl,
 	unsigned,	lowest8)
 {
-    CONST char **ISO_Latin1 = LYCharSets[0];
-    CONST char **p;
+    char **ISO_Latin1 = LYCharSets[0];
+    char **p;
     char **prepl;
     u16 *pp;
-    CONST char **tp;
-    CONST char *s7;
-    CONST char *s8;
+    char **tp;
+    char *s7;
+    char *s8;
     size_t i;
     int j, changed;
     u16 k;
@@ -1702,7 +1695,7 @@ PRIVATE CONST char ** UC_setup_LYCharSets_repl ARGS2(
     /*
      *	Create a temporary table for reverse lookup of latin1 codes:
      */
-    tp = (CONST char **)malloc(96 * sizeof(CONST char *));
+    tp = (char **)malloc(96 * sizeof(char *));
     if (!tp)
 	return NULL;
     for (i = 0; i < 96; i++)
@@ -1756,14 +1749,12 @@ PRIVATE CONST char ** UC_setup_LYCharSets_repl ARGS2(
      *	Now allocate a new table compatible with LYCharSets[]
      *	and with the HTMLDTD for entities.
      *	We don't know yet whether we'll keep it around. */
-    prepl = (char **)malloc(HTML_dtd.number_of_entities * sizeof(char *));
-    if (!prepl) {
+    p = prepl = (char **)malloc(HTML_dtd.number_of_entities * sizeof(char *));
+    if (!p) {
 	FREE(tp);
 	FREE(ti);
-	return 0;
+	return NULL;
     }
-
-    p = (CONST char **)prepl;
     changed = 0;
     for (i = 0; i < HTML_dtd.number_of_entities; i++, p++) {
 	/*
@@ -1834,7 +1825,7 @@ PRIVATE CONST char ** UC_setup_LYCharSets_repl ARGS2(
 	FREE(prepl);
 	return NULL;
     }
-    return (CONST char **)prepl;
+    return prepl;
 }
 
 /*
@@ -1846,61 +1837,70 @@ PRIVATE int UC_Register_with_LYCharSets ARGS4(
 	CONST char *,	UC_LYNXcharset,
 	int,		lowest_eightbit)
 {
-    int i, LYhndl, found;
-    CONST char **repl;
+  int i, LYhndl, found;
+  char **repl;
 
-    LYhndl = -1;
+  LYhndl = -1;
     if (LYNumCharsets == 0) {
 	/*
 	 *  Initialize here; so whoever changes
 	 *  LYCharSets.c doesn't have to count...
 	 */
 	for (i = 0; (i < MAXCHARSETS) && LYchar_set_names[i]; i++) {
-	    LYNumCharsets = i+1;
+      LYNumCharsets = i+1;
 	}
     }
 
     /*
-     *	Search by MIME name, (LYchar_set_names may differ...)
+     *	Do different kinds of searches...
+     *	Normally the first should find the match if there is one!
      */
     for (i = 0; i < MAXCHARSETS && LYchar_set_names[i] && LYhndl < 0; i++) {
+	if (!strcmp(UC_LYNXcharset, LYchar_set_names[i])) {
+	    LYhndl = i;
+	}
+    }
+    for (i = 0; i < MAXCHARSETS && LYchar_set_names[i] && LYhndl < 0; i++) {
 	if (LYCharSet_UC[i].MIMEname &&
 	    !strcmp(UC_MIMEcharset, LYCharSet_UC[i].MIMEname)) {
 	    LYhndl = i;
 	}
     }
 
-    if (LYhndl < 0) {		/* not found */
-	found = 0;
-	if (LYNumCharsets >= MAXCHARSETS) {
-	    CTRACE(tfp, "UC_Register_with_LYCharSets: Too many. Ignoring %s/%s.",
+  if (LYhndl < 0) {		/* not found */
+    found = 0;
+    if (LYNumCharsets >= MAXCHARSETS) {
+	    if (TRACE) {
+		fprintf(stderr,
+		    "UC_Register_with_LYCharSets: Too many. Ignoring %s/%s.",
 			UC_MIMEcharset, UC_LYNXcharset);
-	    return -1;
-	}
+	    }
+      return -1;
+    }
 	/*
 	 *  Add to LYCharSets.c lists.
 	 */
 	LYhndl = LYNumCharsets;
 	LYNumCharsets ++;
-	LYlowest_eightbit[LYhndl] = 999;
-	LYCharSets[LYhndl] = SevenBitApproximations;
+    LYlowest_eightbit[LYhndl] = 999;
+    LYCharSets[LYhndl] = SevenBitApproximations;
 	/*
 	 *  Hmm, try to be conservative here.
 	 */
 	LYchar_set_names[LYhndl] = UC_LYNXcharset;
 	LYchar_set_names[LYhndl+1] = NULL;
 	/*
-	*  Terminating NULL may be looked for by Lynx code.
-	*/
+	 *  Terminating NULL may be looked for by Lynx code.
+	 */
     } else {
 	found = 1;
     }
-    LYCharSet_UC[LYhndl].UChndl = s;
+  LYCharSet_UC[LYhndl].UChndl = s;
     /*
      *	Can we just copy the pointer?  Hope so...
      */
-    LYCharSet_UC[LYhndl].MIMEname = UC_MIMEcharset;
-    LYCharSet_UC[LYhndl].enc = UCInfo[s].enc;
+  LYCharSet_UC[LYhndl].MIMEname = UC_MIMEcharset;
+  LYCharSet_UC[LYhndl].enc = UCInfo[s].enc;
 
     /*
      *	@@@ We really SHOULD get more info from the table files,
@@ -1908,22 +1908,22 @@ PRIVATE int UC_Register_with_LYCharSets ARGS4(
      *	that info...  For now, let's try it without. - KW
      */
     if (lowest_eightbit < LYlowest_eightbit[LYhndl]) {
-	LYlowest_eightbit[LYhndl] = lowest_eightbit;
+    LYlowest_eightbit[LYhndl] = lowest_eightbit;
     } else if (lowest_eightbit > LYlowest_eightbit[LYhndl]) {
-	UCInfo[s].lowest_eight = LYlowest_eightbit[LYhndl];
+    UCInfo[s].lowest_eight = LYlowest_eightbit[LYhndl];
     }
 
-    if (!found && LYhndl > 0) {
-	repl = UC_setup_LYCharSets_repl(s,UCInfo[s].lowest_eight);
-	if (repl) {
-	    LYCharSets[LYhndl] = repl;
+  if (!found && LYhndl > 0) {
+    repl = UC_setup_LYCharSets_repl(s,UCInfo[s].lowest_eight);
+    if (repl) {
+      LYCharSets[LYhndl] = repl;
 	    /*
 	     *	Remember to FREE at exit.
 	     */
-	    remember_allocated_LYCharSets[LYhndl] = repl;
-	}
+      remember_allocated_LYCharSets[LYhndl]=repl;
     }
-    return LYhndl;
+  }
+  return LYhndl;
 }
 
 /*
@@ -1956,8 +1956,10 @@ PUBLIC void UC_Charset_Setup ARGS8(
 	s = found;
     } else {
 	if (UCNumCharsets >= MAXCHARSETS) {
-	    CTRACE(tfp, "UC_Charset_Setup: Too many. Ignoring %s/%s.",
-			UC_MIMEcharset, UC_LYNXcharset);
+	    if (TRACE) {
+		fprintf(stderr, "UC_Charset_Setup: Too many. Ignoring %s/%s.",
+				UC_MIMEcharset, UC_LYNXcharset);
+	    }
 	    return;
 	}
 	s = UCNumCharsets;
@@ -2064,19 +2066,3 @@ PUBLIC void UCInit NOARGS
  *  check function UCGetLYhndl_byMIME in this file.
  */
 }
-
-/*
- *  Safe variant of UCGetLYhndl_byMIME, with blind recovery from typo
- *  in user input: lynx.cfg, userdefs.h, switches from command line.
- */
-PUBLIC int safeUCGetLYhndl_byMIME ARGS1 (CONST char *, value)
-{
-    int i = UCGetLYhndl_byMIME(value);
-
-    if (i == -1) {	/* was user's typo or not yet recognized value */
-	i = LATIN1;	/* error recovery? */
-	CTRACE(tfp, "safeUCGetLYhndl_byMIME: ISO-8859-1 assumed.\n");
-    }
-
-    return(i);
-}