about summary refs log tree commit diff stats
path: root/src/chrtrans/makeuctb.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/chrtrans/makeuctb.c')
-rw-r--r--src/chrtrans/makeuctb.c564
1 files changed, 564 insertions, 0 deletions
diff --git a/src/chrtrans/makeuctb.c b/src/chrtrans/makeuctb.c
new file mode 100644
index 00000000..aeb1a0e9
--- /dev/null
+++ b/src/chrtrans/makeuctb.c
@@ -0,0 +1,564 @@
+/*
+ * makeuctb.c, derived from conmakehash.c
+ *
+ * [ original comments: - kw ]
+ * Create arrays for initializing the kernel folded tables (using a hash
+ * table turned out to be to limiting...)  Unfortunately we can't simply
+ * preinitialize the tables at compile time since kfree() cannot accept
+ * memory not allocated by kmalloc(), and doing our own memory management
+ * just for this seems like massive overkill.
+ *
+ * Copyright (C) 1995 H. Peter Anvin
+ *
+ * This program is a part of the Linux kernel, and may be freely
+ * copied under the terms of the GNU General Public License (GPL),
+ * version 2, or at your option any later version.
+ */
+
+#ifdef NOTDEFINED
+#include <stdio.h>
+#include <stdlib.h>
+#include <sysexits.h>
+#include <string.h>
+#include <ctype.h>
+#else
+#include "tcp.h"
+#undef exit			/* don't try to use LYexit() */
+#endif
+
+#ifndef TOLOWER
+#define TOLOWER(c) (isupper((unsigned char)c) ? tolower((unsigned char)c) : (c))
+#endif /* ndef TOLOWER */
+
+#include "UCkd.h"
+#include "UCDefs.h"
+
+#define MAX_FONTLEN 256
+
+/* We don't deal with UCS4 here... -kw */
+typedef u16 unicode;
+
+PRIVATE void usage ARGS1(char *, argv0)
+{
+  fprintf(stderr, "Usage: \n");
+  fprintf(stderr, "        %s chartable [charsetmimename] [charsetdisplayname]\n", argv0);
+  fprintf(stderr, "Utility to convert .tbl into .h files for Lynx compilation.\n");
+  exit(EX_USAGE);
+}
+
+PRIVATE int getunicode ARGS1(char **, p0)
+{
+  char *p = *p0;
+
+  while (*p == ' ' || *p == '\t')
+    p++;
+  if (*p == '-')
+      return -2;
+  else if (*p != 'U' || p[1] != '+' ||
+      !isxdigit(p[2]) || !isxdigit(p[3]) || !isxdigit(p[4]) ||
+      !isxdigit(p[5]) || isxdigit(p[6]))
+    return -1;
+  *p0 = p+6;
+  return strtol(p+2,0,16);
+}
+
+unicode unitable[MAX_FONTLEN][255];
+				/* Massive overkill, but who cares? */
+int unicount[MAX_FONTLEN];
+
+struct unimapdesc_str themap_str = {0, NULL};
+
+char *tblname;
+
+PRIVATE void addpair_str ARGS2(char *, str, int, un)
+{
+  int i;
+
+
+  if ( un <= 0xfffe )
+    {
+      /* Initialize the map for replacement strings */
+
+      if (!themap_str.entry_ct) {
+	themap_str.entries =
+	  (struct unipair_str *) malloc (2000 * sizeof (struct unipair_str));
+	    if (! themap_str.entries)
+	      {
+		fprintf(stderr,
+			"%s: Out of memory\n", tblname);
+		exit(EX_DATAERR);
+	      }
+      }
+
+      /* Check it isn't a duplicate */
+      
+      else for ( i = 0 ; i < themap_str.entry_ct ; i++ )
+	if ( themap_str.entries[i].unicode == un ) {
+	  themap_str.entries[i].replace_str = str;
+	  return;
+	}
+
+      /* Add to list */
+
+      if ( themap_str.entry_ct > 1999 )
+	{
+	  fprintf(stderr, "ERROR: Only 2000 unicode replacement strings permitted!\n");
+	  exit(EX_DATAERR);
+	}
+
+      themap_str.entries[themap_str.entry_ct].unicode = un;
+      themap_str.entries[themap_str.entry_ct].replace_str = str;
+      themap_str.entry_ct++;
+    }
+
+  /* otherwise: ignore */
+}
+
+PRIVATE void addpair ARGS2(int, fp, int, un)
+{
+  int i;
+
+  if ( un <= 0xfffe )
+    {
+      /* Check it isn't a duplicate */
+
+      for ( i = 0 ; i < unicount[fp] ; i++ )
+	if ( unitable[fp][i] == un )
+	  return;
+
+      /* Add to list */
+
+      if ( unicount[fp] > 254 )
+	{
+	  fprintf(stderr, "ERROR: Only 255 unicodes/glyph permitted!\n");
+	  exit(EX_DATAERR);
+	}
+
+      unitable[fp][unicount[fp]] = un;
+      unicount[fp]++;
+    }
+
+  /* otherwise: ignore */
+}
+
+char this_MIMEcharset[UC_MAXLEN_MIMECSNAME +1];
+char this_LYNXcharset[UC_MAXLEN_LYNXCSNAME +1];
+char id_append[UC_MAXLEN_ID_APPEND +1] = "_";
+int this_isDefaultMap = -1;
+int RawUni = 0;
+int lowest_eight = 999;
+
+int main ARGS2(int, argc, char **, argv)
+{
+  FILE *ctbl;
+  char buffer[65536];
+  int fontlen;
+  int i, nuni, nent;
+  int fp0, fp1, un0, un1;
+  char *p, *p1;
+  char *tbuf, ch;
+
+  if ( argc < 2 || argc > 4 )
+    usage(argv[0]);
+
+  if ( !strcmp(argv[1],"-") )
+    {
+      ctbl = stdin;
+      tblname = "stdin";
+    }
+  else
+    {
+      ctbl = fopen(tblname = argv[1], "r");
+      if ( !ctbl )
+	{
+	  perror(tblname);
+	  exit(EX_NOINPUT);
+	}
+    }
+
+  /* For now we assume the default font is always 256 characters. */    
+  fontlen = 256;
+
+  /* Initialize table */
+
+  for ( i = 0 ; i < fontlen ; i++ )
+    unicount[i] = 0;
+
+  /* Now we come to the tricky part.  Parse the input table. */
+
+  while ( fgets(buffer, sizeof(buffer), ctbl) != NULL )
+    {
+      if ( (p = strchr(buffer, '\n')) != NULL )
+	*p = '\0';
+      else
+	fprintf(stderr, "%s: Warning: line too long\n", tblname);
+
+      p = buffer;
+
+/*
+ * Syntax accepted:
+ *	<fontpos>	<unicode> <unicode> ...
+ *	<fontpos>	<unicode range> <unicode range> ...
+ *	<fontpos>	idem
+ *	<range>		idem
+ *	<range>		<unicode range>
+ *      <unicode>	:<replace>
+ *      <unicode range>	:<replace>
+ *
+ * where <range> ::= <fontpos>-<fontpos>
+ * and <unicode> ::= U+<h><h><h><h>
+ * and <h> ::= <hexadecimal digit>
+ * and <replace> any string not containing '\n' or '\0'
+ */
+
+      while (*p == ' ' || *p == '\t')
+	p++;
+      if (!*p || *p == '#')
+	continue;	/* skip comment or blank line */
+
+      switch (*p) {
+      case 'R':			/* Raw Unicode? I.e. needs some special
+				 processing.  One digit code. */
+	p++;
+	while (*p == ' ' || *p == '\t')
+	  p++;
+	RawUni = strtol(p,0,10);
+	continue;
+
+      case 'D':			/* Is this the default display font? */
+	p++;
+	while (*p == ' ' || *p == '\t')
+	  p++;
+	this_isDefaultMap = (*p == '1');
+	continue;
+      case 'M':
+	p++;
+	while (*p == ' ' || *p == '\t')
+	  p++;
+	sscanf(p,"%40s",this_MIMEcharset);
+	continue;
+      case 'O':            /* Display charset name for options screen */
+	p++;
+	while (*p == ' ' || *p == '\t')
+	  p++;
+	for (i=0; *p && i<UC_MAXLEN_LYNXCSNAME; p++,i++) 
+	  this_LYNXcharset[i] = *p;
+	this_LYNXcharset[i] = '\0';
+	continue;
+      }
+
+      if(*p == 'U')
+	{
+	  un0 = getunicode(&p);
+	  if (un0 < 0)
+	    {
+	      fprintf(stderr, "Bad input line: %s\n", buffer);
+	      exit(EX_DATAERR);
+	      fprintf(stderr,
+		      "%s: Bad Unicode range corresponding to font position range 0x%x-0x%x\n",
+		      tblname, fp0, fp1);
+	      exit(EX_DATAERR);
+	    }
+	  un1 = un0;
+	  while (*p == ' ' || *p == '\t')
+	    p++;
+	  if (*p == '-')
+	    {
+	      p++;
+	      while (*p == ' ' || *p == '\t')
+		p++;
+	      un1 = getunicode(&p);
+	      if (un1 < 0 || un1 < un0)
+		{
+		  fprintf(stderr,
+			  "%s: Bad Unicode range U+%x-U+%x\n",
+			  tblname, un0, un1);
+		  fprintf(stderr, "Bad input line: %s\n", buffer);
+		  exit(EX_DATAERR);
+		}
+	      while (*p == ' ' || *p == '\t')
+		p++;
+	    }
+	  if (*p != ':')
+	    {
+	      fprintf(stderr, "No ':' where expected: %s\n", buffer);
+	      continue;
+	    }
+
+	  tbuf = (char *) malloc (4*strlen(++p) + 1);
+	  if (!(p1 = tbuf))
+	    {
+	      fprintf(stderr,
+		      "%s: Out of memory\n", tblname);
+	      exit(EX_DATAERR);
+	    }
+	  for(ch = *p; (ch = *p) != '\0'; p++,p1++)
+	    {
+	      if ((unsigned char)ch < 32 || ch == '\\' || ch == '\"' ||
+		  (unsigned char)ch >= 127)
+		{
+		  sprintf(p1,"\\%.3o",(unsigned char)ch); 
+/*		  fprintf(stderr,"%s\n",tbuf); */
+		  p1 += 3;
+		}
+	      else
+		*p1 = ch;
+	    }
+	  *p1 = '\0';
+	  for(i=un0; i<=un1; i++)
+/*	    printf("U+0x%x:%s\n",i,tbuf); */
+	    addpair_str(tbuf,i);
+	    continue;
+	}
+	
+      fp0 = strtol(p, &p1, 0);
+      if (p1 == p)
+	{
+	  fprintf(stderr, "Bad input line: %s\n", buffer);
+	  exit(EX_DATAERR);
+        }
+      p = p1;
+
+      while (*p == ' ' || *p == '\t')
+	p++;
+      if (*p == '-')
+	{
+	  p++;
+	  fp1 = strtol(p, &p1, 0);
+	  if (p1 == p)
+	    {
+	      fprintf(stderr, "Bad input line: %s\n", buffer);
+	      exit(EX_DATAERR);
+	    }
+	  p = p1;
+        }
+      else
+	fp1 = 0;
+
+      if ( fp0 < 0 || fp0 >= fontlen )
+	{
+	    fprintf(stderr,
+		    "%s: Glyph number (0x%x) larger than font length\n",
+		    tblname, fp0);
+	    exit(EX_DATAERR);
+	}
+      if ( fp1 && (fp1 < fp0 || fp1 >= fontlen) )
+	{
+	    fprintf(stderr,
+		    "%s: Bad end of range (0x%x)\n",
+		    tblname, fp1);
+	    exit(EX_DATAERR);
+	}
+
+      if (fp1)
+	{
+	  /* we have a range; expect the word "idem" or a Unicode range of the
+	     same length */
+	  while (*p == ' ' || *p == '\t')
+	    p++;
+	  if (!strncmp(p, "idem", 4))
+	    {
+	      for (i=fp0; i<=fp1; i++)
+		addpair(i,i);
+	      p += 4;
+	    }
+	  else
+	    {
+	      un0 = getunicode(&p);
+	      while (*p == ' ' || *p == '\t')
+		p++;
+	      if (*p != '-')
+		{
+		  fprintf(stderr,
+"%s: Corresponding to a range of font positions, there should be a Unicode range\n",
+			  tblname);
+		  exit(EX_DATAERR);
+	        }
+	      p++;
+	      un1 = getunicode(&p);
+	      if (un0 < 0 || un1 < 0)
+		{
+		  fprintf(stderr,
+"%s: Bad Unicode range corresponding to font position range 0x%x-0x%x\n",
+			  tblname, fp0, fp1);
+		  exit(EX_DATAERR);
+	        }
+	      if (un1 - un0 != fp1 - fp0)
+		{
+		  fprintf(stderr,
+"%s: Unicode range U+%x-U+%x not of the same length as font position range 0x%x-0x%x\n",
+			  tblname, un0, un1, fp0, fp1);
+		  exit(EX_DATAERR);
+	        }
+	      for(i=fp0; i<=fp1; i++)
+		addpair(i,un0-fp0+i);
+	    }
+        }
+      else
+	{
+	    /* no range; expect a list of unicode values or unicode ranges
+	       for a single font position, or the word "idem" */
+
+	    while (*p == ' ' || *p == '\t')
+		p++;
+	    if (!strncmp(p, "idem", 4))
+	    {
+		addpair(fp0,fp0);
+		p += 4;
+	    }
+	    while ( (un0 = getunicode(&p)) >= 0 ) {
+		addpair(fp0, un0);
+		while (*p == ' ' || *p == '\t')
+		    p++;
+		if (*p == '-') {
+		    p++;
+		    un1 = getunicode(&p);
+		    if (un1 < un0)
+		    {
+			fprintf(stderr,
+				"%s: Bad Unicode range 0x%x-0x%x\n",
+				tblname, un0, un1);
+			exit(EX_DATAERR);
+		    }
+		    for(un0++; un0 <= un1; un0++)
+			addpair(fp0, un0);
+		}
+	    }
+	}
+      while (*p == ' ' || *p == '\t')
+	p++;
+      if (*p && *p != '#')
+	fprintf(stderr, "%s: trailing junk (%s) ignored\n", tblname, p);
+    }
+
+  /* Okay, we hit EOF, now output hash table */
+  
+  fclose(ctbl);
+  
+
+  /* Compute total size of Unicode list */
+  nuni = 0;
+  for ( i = 0 ; i < fontlen ; i++ )
+    nuni += unicount[i];
+
+  if (argc >=3 )
+    strncpy(this_MIMEcharset,argv[2],UC_MAXLEN_MIMECSNAME);
+  else if (!this_MIMEcharset || ! *this_MIMEcharset) {
+    strncpy(this_MIMEcharset,tblname,UC_MAXLEN_MIMECSNAME);
+    if ((p = strchr(this_MIMEcharset,'.')) != 0)
+      *p = '\0';
+  }
+  for (p=this_MIMEcharset; *p; p++)
+      *p = TOLOWER(*p);
+  if (argc >=4 )
+    strncpy(this_LYNXcharset,argv[3],UC_MAXLEN_LYNXCSNAME);
+  else if (!this_LYNXcharset || ! *this_LYNXcharset) {
+    strncpy(this_LYNXcharset,this_MIMEcharset,UC_MAXLEN_LYNXCSNAME);
+  }
+  if ((i=strlen(this_LYNXcharset)) < UC_LEN_LYNXCSNAME) {
+    for (;i<UC_LEN_LYNXCSNAME;i++) 
+      this_LYNXcharset[i] = ' ';
+    this_LYNXcharset[i] = '\0';
+  }
+/*
+  fprintf(stderr,"this_MIMEcharset: %s.\n",this_MIMEcharset);
+  fprintf(stderr,"this_LYNXcharset: %s.\n",this_LYNXcharset);
+*/
+  if (this_isDefaultMap == -1)
+    this_isDefaultMap = !strncmp(this_MIMEcharset,"iso-8859-1",10);
+  fprintf(stderr,"makeuctb: %s: %stranslation map",
+	  this_MIMEcharset, (this_isDefaultMap ? "default " : ""));
+  if (this_isDefaultMap == 1)
+    *id_append = '\0';
+  else
+  for (i=0,p=this_MIMEcharset; *p && (i < UC_MAXLEN_ID_APPEND-1); p++,i++)
+    id_append[i+1] = isalnum(*p) ? *p : '_';
+  id_append[i+1] = '\0';
+  fprintf(stderr," (%s).\n", id_append);
+
+
+  printf("\
+/*\n\
+ * uni_hash.tbl\n\
+ *\n\
+ * Do not edit this file; it was automatically generated by\n\
+ *\n\
+ * %s %s\n\
+ *\n\
+ */\n\
+\n\
+static u8 dfont_unicount%s[%d] = \n\
+{\n\t", argv[0], argv[1], id_append, fontlen);
+
+  for ( i = 0 ; i < fontlen ; i++ )
+    {
+      if (i >= 128 && unicount[i] > 0 && i < lowest_eight)
+        lowest_eight = i;
+      printf("%3d", unicount[i]);
+      if ( i == fontlen-1 )
+        printf("\n};\n");
+      else if ( i % 8 == 7 )
+        printf(",\n\t");
+      else
+        printf(", ");
+    }
+
+  if (nuni)
+    printf("\nstatic u16 dfont_unitable%s[%d] = \n{\n\t", id_append, nuni);
+  else
+    printf("\nstatic u16 dfont_unitable%s[1]; /* dummy */\n", id_append);
+
+  fp0 = 0;
+  nent = 0;
+  for ( i = 0 ; i < nuni ; i++ )
+    {
+       while ( nent >= unicount[fp0] )
+         {
+            fp0++;
+            nent = 0;
+         }
+       printf("0x%04x", unitable[fp0][nent++]);
+       if ( i == nuni-1 )
+         printf("\n};\n");
+       else if ( i % 8 == 7 )
+         printf(",\n\t");
+       else
+         printf(", ");
+    }
+
+  if (themap_str.entry_ct)
+    printf("\n\
+static struct unipair_str repl_map%s[%d] = \n\
+{\n\t", id_append, themap_str.entry_ct);
+  else
+printf("\n\
+/* static struct unipair_str repl_map%s[]; */\n", id_append);
+    
+  for ( i = 0 ; i < themap_str.entry_ct ; i++ )
+    {
+      printf("{0x%x,\"%s\"}", themap_str.entries[i].unicode,
+                              themap_str.entries[i].replace_str);
+      if ( i == themap_str.entry_ct-1 )
+        printf("\n};\n");
+      else if ( i % 4 == 3 )
+        printf(",\n\t");
+      else
+        printf(", ");
+    }
+  if (themap_str.entry_ct)
+    printf("\n\
+static struct unimapdesc_str dfont_replacedesc%s = {%d,repl_map%s};\n",
+id_append, themap_str.entry_ct, id_append);
+  else
+    printf("\n\
+static struct unimapdesc_str dfont_replacedesc%s = {0,NULL};\n",id_append);
+
+  printf("#define UC_CHARSET_SETUP%s UC_Charset_Setup(\
+\"%s\",\\\n\"%s\",\\\n\
+dfont_unicount%s,dfont_unitable%s,%i,\\\n\
+dfont_replacedesc%s,%i,%i)\n",
+id_append, this_MIMEcharset, this_LYNXcharset,
+id_append, id_append, nuni, id_append, lowest_eight, RawUni);
+
+  exit(EX_OK);
+}