/* * UCmap.c * - K&R compatibility macros not (yet?) done - * * Derived from code in the Linux kernel console driver. * The GNU Public Licence therefore applies, see * the file COPYING in the about_lynx directory * which should come with every Lynx distribution. * * [ original comment: -kw ] * * Mapping from internal code (such as Latin-1 or Unicode or IBM PC code) * to font positions. * * aeb, 950210 */ #include "chrtrans/UCkd.h" #include "tcp.h" #include "HTMLDTD.h" #include "HTAAUtil.h" /* defines FREE */ #include "HTAtom.h" /* for use by UC_setup_LYCharSets_repl() */ #include "LYGlobalDefs.h" #include "LYCharSets.h" #include "UCdomap.h" #include "UCMap.h" #include "UCDefs.h" /* Include hash tables & parameters */ #include "chrtrans/def7_uni.h" #include "chrtrans/iso01_uni.h" #include "chrtrans/iso02_uni.h" #include "chrtrans/iso03_uni.h" #include "chrtrans/iso04_uni.h" #include "chrtrans/iso05_uni.h" #include "chrtrans/iso07_uni.h" #include "chrtrans/iso09_uni.h" #include "chrtrans/iso10_uni.h" #include "chrtrans/koi8r_uni.h" #include "chrtrans/cp437_uni.h" #include "chrtrans/cp850_uni.h" #include "chrtrans/cp852_uni.h" #include "chrtrans/cp1250_uni.h" #include "chrtrans/cp1252_uni.h" #include "chrtrans/utf8_uni.h" #include "chrtrans/rfc_suni.h" #include "chrtrans/mnemonic_suni.h" /* #include "chrtrans/mnem_suni.h" */ /* Some of the code below, and some of the comments, left in for historical reasons. Not all those tables below are currently really needed (and what with all those hardwired codepoints), but let's keep them around for now, they may come in handy if we decide to make more extended use of the mechanisms (including e.g. for chars < 127...) - kw */ static u16 translations[][256] = { /* 8-bit Latin-1 mapped to Unicode -- trivial mapping */ { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff }, /* VT100 graphics mapped to Unicode */ { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x00a0, 0x25c6, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1, 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0xf800, 0xf801, 0x2500, 0xf803, 0xf804, 0x251c, 0x2524, 0x2534, 0x252c, 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff }, /* IBM Codepage 437 mapped to Unicode */ { 0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, 0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c, 0x25ba, 0x25c4, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8, 0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc, 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x2302, 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0 }, /* User mapping -- default to codes for direct font mapping */ { 0xf000, 0xf001, 0xf002, 0xf003, 0xf004, 0xf005, 0xf006, 0xf007, 0xf008, 0xf009, 0xf00a, 0xf00b, 0xf00c, 0xf00d, 0xf00e, 0xf00f, 0xf010, 0xf011, 0xf012, 0xf013, 0xf014, 0xf015, 0xf016, 0xf017, 0xf018, 0xf019, 0xf01a, 0xf01b, 0xf01c, 0xf01d, 0xf01e, 0xf01f, 0xf020, 0xf021, 0xf022, 0xf023, 0xf024, 0xf025, 0xf026, 0xf027, 0xf028, 0xf029, 0xf02a, 0xf02b, 0xf02c, 0xf02d, 0xf02e, 0xf02f, 0xf030, 0xf031, 0xf032, 0xf033, 0xf034, 0xf035, 0xf036, 0xf037, 0xf038, 0xf039, 0xf03a, 0xf03b, 0xf03c, 0xf03d, 0xf03e, 0xf03f, 0xf040, 0xf041, 0xf042, 0xf043, 0xf044, 0xf045, 0xf046, 0xf047, 0xf048, 0xf049, 0xf04a, 0xf04b, 0xf04c, 0xf04d, 0xf04e, 0xf04f, 0xf050, 0xf051, 0xf052, 0xf053, 0xf054, 0xf055, 0xf056, 0xf057, 0xf058, 0xf059, 0xf05a, 0xf05b, 0xf05c, 0xf05d, 0xf05e, 0xf05f, 0xf060, 0xf061, 0xf062, 0xf063, 0xf064, 0xf065, 0xf066, 0xf067, 0xf068, 0xf069, 0xf06a, 0xf06b, 0xf06c, 0xf06d, 0xf06e, 0xf06f, 0xf070, 0xf071, 0xf072, 0xf073, 0xf074, 0xf075, 0xf076, 0xf077, 0xf078, 0xf079, 0xf07a, 0xf07b, 0xf07c, 0xf07d, 0xf07e, 0xf07f, 0xf080, 0xf081, 0xf082, 0xf083, 0xf084, 0xf085, 0xf086, 0xf087, 0xf088, 0xf089, 0xf08a, 0xf08b, 0xf08c, 0xf08d, 0xf08e, 0xf08f, 0xf090, 0xf091, 0xf092, 0xf093, 0xf094, 0xf095, 0xf096, 0xf097, 0xf098, 0xf099, 0xf09a, 0xf09b, 0xf09c, 0xf09d, 0xf09e, 0xf09f, 0xf0a0, 0xf0a1, 0xf0a2, 0xf0a3, 0xf0a4, 0xf0a5, 0xf0a6, 0xf0a7, 0xf0a8, 0xf0a9, 0xf0aa, 0xf0ab, 0xf0ac, 0xf0ad, 0xf0ae, 0xf0af, 0xf0b0, 0xf0b1, 0xf0b2, 0xf0b3, 0xf0b4, 0xf0b5, 0xf0b6, 0xf0b7, 0xf0b8, 0xf0b9, 0xf0ba, 0xf0bb, 0xf0bc, 0xf0bd, 0xf0be, 0xf0bf, 0xf0c0, 0xf0c1, 0xf0c2, 0xf0c3, 0xf0c4, 0xf0c5, 0xf0c6, 0xf0c7, 0xf0c8, 0xf0c9, 0xf0ca, 0xf0cb, 0xf0cc, 0xf0cd, 0xf0ce, 0xf0cf, 0xf0d0, 0xf0d1, 0xf0d2, 0xf0d3, 0xf0d4, 0xf0d5, 0xf0d6, 0xf0d7, 0xf0d8, 0xf0d9, 0xf0da, 0xf0db, 0xf0dc, 0xf0dd, 0xf0de, 0xf0df, 0xf0e0, 0xf0e1, 0xf0e2, 0xf0e3, 0xf0e4, 0xf0e5, 0xf0e6, 0xf0e7, 0xf0e8, 0xf0e9, 0xf0ea, 0xf0eb, 0xf0ec, 0xf0ed, 0xf0ee, 0xf0ef, 0xf0f0, 0xf0f1, 0xf0f2, 0xf0f3, 0xf0f4, 0xf0f5, 0xf0f6, 0xf0f7, 0xf0f8, 0xf0f9, 0xf0fa, 0xf0fb, 0xf0fc, 0xf0fd, 0xf0fe, 0xf0ff } }; static u16 *UC_translate = NULL; /* The standard kernel character-to-font mappings are not invertible -- this is just a best effort. */ #define MAX_GLYPH 512 /* Max possible glyph value */ static unsigned char * inv_translate = NULL; static unsigned char inv_norm_transl[MAX_GLYPH]; static unsigned char * inverse_translations[4] = { NULL, NULL, NULL, NULL }; PRIVATE void con_clear_unimap NOPARAMS; PRIVATE void set_inverse_transl ARGS1(int, i) { int j, glyph; u16 *p = translations[i]; unsigned char *q = inverse_translations[i]; if (!q) { /* slightly messy to avoid calling kmalloc too early */ q = inverse_translations[i] = ((i == LAT1_MAP) ? inv_norm_transl : (unsigned char *) malloc(MAX_GLYPH)); if (!q) return; } for (j=0; j= 0 && glyph < MAX_GLYPH && q[glyph] < 32) { /* prefer '-' above SHY etc. */ q[glyph] = j; } } } u16 *set_translate ARGS1(int, m) { if (!inverse_translations[m]) set_inverse_transl(m); inv_translate = inverse_translations[m]; return translations[m]; } #ifdef NOTDEFINED /* * Inverse translation is impossible for several reasons: * 1. The font<->character maps are not 1-1. * 2. The text may have been written while a different translation map * was active, or using Unicode. * Still, it is now possible to a certain extent to cut and paste non-ASCII. */ unsigned char inverse_translate(int glyph) { if ( glyph < 0 || glyph >= MAX_GLYPH ) return 0; else return ((inv_translate && inv_translate[glyph]) ? inv_translate[glyph] : (unsigned char)(glyph & 0xff)); } /* * Load customizable translation table * arg points to a 256 byte translation table. * * The "old" variants are for translation directly to font (using the * 0xf000-0xf0ff "transparent" Unicodes) whereas the "new" variants set * Unicodes explictly. */ int con_set_trans_old(unsigned char * arg) { int i; u16 *p = translations[USER_MAP]; #if(0) i = verify_area(VERIFY_READ, (void *)arg, E_TABSZ); if (i) return i; #endif for (i=0; i= 0 && UC_charset_hndl < UCNumCharsets); } PRIVATE void UC_con_set_trans ARGS3(int, UC_charset_in_hndl, int, Gn, int, update_flag) { int i, j; u16 *p; u16 *ptrans; if (!UC_valid_UC_charset(UC_charset_in_hndl)) { if (TRACE) fprintf(stderr,"UC_con_set_trans: Invalid charset handle %i.\n", UC_charset_in_hndl); return; } ptrans = translations[Gn]; p = UCInfo[UC_charset_in_hndl].unitable; #if(0) if (p == UC_current_unitable) { /* test whether pointers are equal */ return; /* nothing to be done */ } /* The font is always 256 characters - so far. */ con_clear_unimap(); #endif for ( i = 0 ; i < 256 ; i++ ) { if ((j = UCInfo[UC_charset_in_hndl].unicount[i])) { ptrans[i] = *p; for ( ; j ; j-- ) p++; } else ptrans[i] = 0xfffd; } if (update_flag) set_inverse_transl(Gn); /* Update inverse translation for this one */ } /* * Unicode -> current font conversion * * A font has at most 512 chars, usually 256. * But one font position may represent several Unicode chars. * A hashtable is somewhat of a pain to deal with, so use a * "paged table" instead. Simulation has shown the memory cost of * this 3-level paged table scheme to be comparable to a hash table. */ int hashtable_contents_valid = 0; /* Use ASCII-only mode for bootup*/ int hashtable_str_contents_valid = 0; static u16 **uni_pagedir[32] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; static char* **uni_pagedir_str[32] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; u16 * UC_current_unitable = NULL; struct unimapdesc_str *UC_current_unitable_str = NULL; PRIVATE int con_insert_unipair ARGS2(u16, unicode, u16, fontpos) { int i, n; u16 **p1, *p2; if ( !(p1 = uni_pagedir[n = unicode >> 11]) ) { p1 = uni_pagedir[n] = (u16* *) malloc(32*sizeof(u16 *)); if ( !p1 ) return -ENOMEM; for ( i = 0 ; i < 32 ; i++ ) p1[i] = NULL; } if ( !(p2 = p1[n = (unicode >> 6) & 0x1f]) ) { p2 = p1[n] = (u16 *) malloc(64*sizeof(u16)); if ( !p2 ) return -ENOMEM; for ( i = 0 ; i < 64 ; i++ ) p2[i] = 0xffff; /* No glyph for this character (yet) */ } p2[unicode & 0x3f] = fontpos; return 0; } PRIVATE int con_insert_unipair_str ARGS2(u16, unicode, char *, replace_str) { int i, n; char ***p1, **p2; if ( !(p1 = uni_pagedir_str[n = unicode >> 11]) ) { p1 = uni_pagedir_str[n] = (char** *) malloc(32*sizeof(char **)); if ( !p1 ) return -ENOMEM; for ( i = 0 ; i < 32 ; i++ ) p1[i] = NULL; } if ( !(p2 = p1[n = (unicode >> 6) & 0x1f]) ) { p2 = p1[n] = (char* *) malloc(64*sizeof(char *)); if ( !p2 ) return -ENOMEM; for ( i = 0 ; i < 64 ; i++ ) p2[i] = NULL; /* No replace string this character (yet) */ } p2[unicode & 0x3f] = replace_str; return 0; } /* ui arg was a leftover, deleted -kw */ PRIVATE void con_clear_unimap NOARGS { int i, j; u16 **p1; for ( i = 0 ; i < 32 ; i++ ) { if ( (p1 = uni_pagedir[i]) != NULL ) { for ( j = 0 ; j < 32 ; j++ ) { if ( p1[j] ) free(p1[j]); } free(p1); } uni_pagedir[i] = NULL; } hashtable_contents_valid = 1; } PRIVATE void con_clear_unimap_str NOARGS { int i, j; char ***p1; for ( i = 0 ; i < 32 ; i++ ) { if ( (p1 = uni_pagedir_str[i]) != NULL ) { for ( j = 0 ; j < 32 ; j++ ) { if ( p1[j] ) free(p1[j]); } free(p1); } uni_pagedir_str[i] = NULL; } hashtable_str_contents_valid = 1; /* ??? probably no use... */ } #ifdef NOTDEFINED int con_set_unimap ARGS2(u16, ct, struct unipair *, list) { int err = 0, err1, i; while( ct-- ) { if ( (err1 = con_insert_unipair(list->unicode, list->fontpos)) != 0 ) err = err1; list++; } for ( i = 0 ; i <= 3 ; i++ ) set_inverse_transl(i); /* Update all inverse translations */ return err; } #endif /* NOTDEFINED */ /* Loads the unimap for the hardware font, as defined in uni_hash.tbl. The representation used was the most compact I could come up with. This routine is executed at sys_setup time, and when the PIO_FONTRESET ioctl is called. */ PRIVATE void con_set_default_unimap NOARGS { int i, j; u16 *p; /* The default font is always 256 characters */ con_clear_unimap(); p = dfont_unitable; for ( i = 0 ; i < 256 ; i++ ) for ( j = dfont_unicount[i] ; j ; j-- ) con_insert_unipair(*(p++), i); for ( i = 0 ; i <= 3 ; i++ ) set_inverse_transl(i); /* Update all inverse translations */ UC_current_unitable = dfont_unitable; } PUBLIC int UCNumCharsets = 0; PUBLIC int UCLYhndl_HTFile_for_unspec = -1; PUBLIC int UCLYhndl_HTFile_for_unrec = -1; PUBLIC int UCLYhndl_for_unspec = -1; PUBLIC int UCLYhndl_for_unrec = -1; PRIVATE int UC_con_set_unimap ARGS2(int, UC_charset_out_hndl, int, update_flag) { int i, j; u16 *p; if (!UC_valid_UC_charset(UC_charset_out_hndl)) { if (TRACE) fprintf(stderr,"UC_con_set_unimap: Invalid charset handle %i.\n", UC_charset_out_hndl); return -1; } p = UCInfo[UC_charset_out_hndl].unitable; if (p == UC_current_unitable) { /* test whether pointers are equal */ return update_flag; /* nothing to be done */ } UC_current_unitable = p; /* The font is always 256 characters - so far. */ con_clear_unimap(); for ( i = 0 ; i < 256 ; i++ ) for ( j = UCInfo[UC_charset_out_hndl].unicount[i] ; j ; j-- ) con_insert_unipair(*(p++), i); if (update_flag) for ( i = 0 ; i <= 3 ; i++ ) set_inverse_transl(i); /* Update all inverse translations */ return 0; } PRIVATE int UC_con_set_unimap_str ARGS2(u16, ct, struct unipair_str *, list) { int err = 0, err1; while( ct-- ) { if ( (err1 = con_insert_unipair_str(list->unicode, list->replace_str)) != 0 ) err = err1; list++; } /* No inverse translations for replacement strings! */ if (!err) hashtable_str_contents_valid = 1; return err; } #if 0 /* UNUSED */ int con_get_unimap ARGS3(u16, ct, u16 *, uct, struct unipair *, list) { int i, j, k, ect; u16 **p1, *p2; ect = 0; if (hashtable_contents_valid) { for ( i = 0 ; i < 32 ; i++ ) if ( (p1 = uni_pagedir[i]) != NULL ) for ( j = 0 ; j < 32 ; j++ ) if ( (p2 = *(p1++)) != NULL ) for ( k = 0 ; k < 64 ; k++ ) { if ( *p2 < MAX_GLYPH && ect++ < ct ) { list->unicode = (u16) ((i<<11)+(j<<6)+k); list->fontpos = (u16) *p2; list++; } p2++; } } *uct = ect; return ((ect <= ct) ? 0 : -ENOMEM); } #endif int conv_uni_to_pc ARGS1(long, ucs) { int h; u16 **p1, *p2; /* Only 16-bit codes supported at this time */ if (ucs > 0xffff) ucs = 0xfffd; /* U+FFFD: REPLACEMENT CHARACTER */ else if (ucs < 0x20 || ucs >= 0xfffe) return -1; /* Not a printable character */ else if (ucs == 0xfeff || (ucs >= 0x200a && ucs <= 0x200f)) return -2; /* Zero-width space */ /* * UNI_DIRECT_BASE indicates the start of the region in the User Zone * which always has a 1:1 mapping to the currently loaded font. The * UNI_DIRECT_MASK indicates the bit span of the region. */ else if ( (ucs & ~UNI_DIRECT_MASK) == UNI_DIRECT_BASE ) return ucs & UNI_DIRECT_MASK; if (!hashtable_contents_valid) return -3; if ( (p1 = uni_pagedir[ucs >> 11]) && (p2 = p1[(ucs >> 6) & 0x1f]) && (h = p2[ucs & 0x3f]) < MAX_GLYPH ) return h; return -4; /* not found */ } /* Note: contents of outbuf is not changes for negative return value! */ PRIVATE int conv_uni_to_str ARGS3(char*, outbuf, int, buflen, long, ucs) { char *h; char ***p1, **p2; /* Only 16-bit codes supported at this time */ if (ucs > 0xffff) ucs = 0xfffd; /* U+FFFD: REPLACEMENT CHARACTER */ /* Maybe the following two cases should be allowed here?? -kw */ else if (ucs < 0x20 || ucs >= 0xfffe) return -1; /* Not a printable character */ else if (ucs == 0xfeff || (ucs >= 0x200a && ucs <= 0x200f)) return -2; /* Zero-width space */ /* * UNI_DIRECT_BASE indicates the start of the region in the User Zone * which always has a 1:1 mapping to the currently loaded font. The * UNI_DIRECT_MASK indicates the bit span of the region. */ /* We dont handle the following here: */ #if(0) else if ( (ucs & ~UNI_DIRECT_MASK) == UNI_DIRECT_BASE ) return ucs & UNI_DIRECT_MASK; #endif if (!hashtable_str_contents_valid) return -3; if ( (p1 = uni_pagedir_str[ucs >> 11]) && (p2 = p1[(ucs >> 6) & 0x1f]) && (h = p2[ucs & 0x3f]) ) { strncpy (outbuf,h,(size_t) (buflen-1)); return 1; /* ok ! */ } return -4; /* not found */ } PUBLIC int UCInitialized = 0; /* * [ original comment: - kw ] * This is called at sys_setup time, after memory and the console are * initialized. It must be possible to call kmalloc(..., GFP_KERNEL) * from this function, hence the call from sys_setup. */ PRIVATE void UCconsole_map_init NOARGS { con_set_default_unimap(); UCInitialized = 1; } /* OK now, finally, some stuff that is more specifically for Lynx: - kw */ PUBLIC int UCGetcharset_byMIMEname PARAMS((char * UC_MIMEcharset)); PUBLIC int UCGetcharset_byLYNXname PARAMS((char * UC_LYNXcharset)); PUBLIC int UCTransUniChar ARGS2(long, unicode, int, charset_out) { int rc; int UChndl_out; u16 * ut; if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) return -12; ut = UCInfo[UChndl_out].unitable; if (ut != UC_current_unitable) { rc = UC_con_set_unimap(UChndl_out, 1); if (rc < 0) return rc; } rc = conv_uni_to_pc(unicode); if (rc == -4) rc = conv_uni_to_pc(0xfffd); return rc; } /* returns string length, or negative value for error. */ PUBLIC int UCTransUniCharStr ARGS5(char *, outbuf, int, buflen, long, unicode, int, charset_out, int, chk_single_flag) { int rc, src = 0, ignore_err; int UChndl_out; struct unimapdesc_str * repl; u16 * ut; if (buflen<2) return -13; if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) return -12; if (chk_single_flag) { ut = UCInfo[UChndl_out].unitable; if (ut != UC_current_unitable) { src = UC_con_set_unimap(UChndl_out, 1); if (src < 0) return src; } src = conv_uni_to_pc(unicode); if (src >= 32) { outbuf[0] = src; outbuf[1] = '\0'; return 1; } } repl = &(UCInfo[UChndl_out].replacedesc); if (repl != UC_current_unitable_str) { con_clear_unimap_str(); ignore_err = UC_con_set_unimap_str(repl->entry_ct, repl->entries); UC_current_unitable_str = repl; } rc = conv_uni_to_str(outbuf, buflen, unicode); if (rc == -4) rc = conv_uni_to_str(outbuf, buflen, 0xfffd); if (rc >= 0) return (strlen(outbuf)); if (chk_single_flag && src == -4) { rc = conv_uni_to_pc(0xfffd); if (rc >= 32) { outbuf[0] = rc; outbuf[1] = '\0'; return 1; } else return rc; } return -4; } int UC_lastautoGN = 0; PRIVATE int UC_MapGN ARGS2(int, UChndl, int, update_flag) { int i,Gn,found,lasthndl; found = 0; Gn = -1; for (i=0; i<4 && Gn<0; i++) { if (UC_GNhandles[i] < 0) Gn = i; else if (UC_GNhandles[i] == UChndl) {Gn = i; found = 1;} } if (found) return Gn; if (Gn >= 0) { UCInfo[UChndl].GN = Gn; UC_GNhandles[Gn] = UChndl; } else { if (UC_lastautoGN == GRAF_MAP) Gn = IBMPC_MAP; else Gn = GRAF_MAP; UC_lastautoGN = Gn; lasthndl = UC_GNhandles[Gn]; UCInfo[lasthndl].GN = -1; UCInfo[UChndl].GN = Gn; UC_GNhandles[Gn] = UChndl; } UC_con_set_trans(UChndl,Gn,update_flag); return Gn; } PUBLIC int UCTransChar ARGS3(char, ch_in, int, charset_in, int, charset_out) { int unicode, Gn; int rc; int UChndl_in, UChndl_out; u16 * ut; int upd = 0; #ifndef UC_NO_SHORTCUTS if (charset_in == charset_out) return (unsigned char)ch_in; #endif if (charset_in < 0) return -11; if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) return -11; if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) return -12; if (!UCInfo[UChndl_in].num_uni) return -11; if ((Gn = UCInfo[UChndl_in].GN) < 0) {Gn = UC_MapGN(UChndl_in,0); upd = 1;} ut = UCInfo[UChndl_out].unitable; if (ut == UC_current_unitable) {if (upd) set_inverse_transl(Gn);} else { rc = UC_con_set_unimap(UChndl_out, 1); if (rc > 0) set_inverse_transl(Gn); else if (rc < 0) return rc; } UC_translate = set_translate(Gn); unicode = UC_translate[(unsigned char)ch_in]; rc = conv_uni_to_pc(unicode); if (rc == -4) rc = conv_uni_to_pc(0xfffd); return rc; } PUBLIC long int UCTransToUni ARGS2(char, ch_in, int, charset_in) { int unicode, Gn; unsigned char ch_iu; int UChndl_in; ch_iu = (unsigned char)ch_in; #ifndef UC_NO_SHORTCUTS if (charset_in == 0) return ch_iu; if ((unsigned char)ch_in < 128) return ch_iu; #endif if (charset_in < 0) return -11; if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) return -11; if (!UCInfo[UChndl_in].num_uni) return -11; if ((Gn = UCInfo[UChndl_in].GN) < 0) {Gn = UC_MapGN(UChndl_in,1);} UC_translate = set_translate(Gn); unicode = UC_translate[(unsigned char)ch_in]; return unicode; } #if 0 /* UNUSED */ PUBLIC int UCReverseTransChar ARGS3(char, ch_out, int, charset_in, int, charset_out) { int Gn; int rc; int UChndl_in, UChndl_out; u16 * ut; #ifndef UC_NO_SHORTCUTS if (charset_in == charset_out) return ch_out; #endif if (charset_in < 0) return -11; if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) return -11; if (charset_out < 0) return -12; if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) return -12; if (!UCInfo[UChndl_in].num_uni) return -11; ut = UCInfo[UChndl_out].unitable; if (ut == UC_current_unitable) { if ((Gn = UCInfo[UChndl_in].GN) >= 0) { UC_translate = set_translate(Gn); rc = inv_translate[(unsigned int)ch_out]; if (rc >= 32) return rc; } else { Gn = UC_MapGN(UChndl_in,1); UC_translate = set_translate(Gn); rc = inv_translate[(unsigned int)ch_out]; if (rc >= 32) return rc; } } return UCTransChar(ch_out, charset_out, charset_in); } #endif /* returns string length, or negative value for error. */ PUBLIC int UCTransCharStr ARGS6(char *, outbuf, int, buflen, char, ch_in, int, charset_in, int, charset_out, int, chk_single_flag) { int unicode, Gn; int rc, src = 0, ignore_err; int UChndl_in, UChndl_out; struct unimapdesc_str * repl; u16 * ut; int upd = 0; if (buflen<2) return -13; #ifndef UC_NO_SHORTCUTS if (chk_single_flag && charset_in == charset_out) { outbuf[0] = ch_in; outbuf[1] = '\0'; return 1; } #endif if (charset_in < 0) return -11; if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) return -11; if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) return -12; if (!UCInfo[UChndl_in].num_uni) return -11; if ((Gn = UCInfo[UChndl_in].GN) < 0) {Gn = UC_MapGN(UChndl_in,!chk_single_flag); upd=chk_single_flag;} UC_translate = set_translate(Gn); unicode = UC_translate[(unsigned char)ch_in]; if (chk_single_flag) { ut = UCInfo[UChndl_out].unitable; if (ut == UC_current_unitable) {if (upd) set_inverse_transl(Gn);} else { src = UC_con_set_unimap(UChndl_out, 1); if (src > 0) set_inverse_transl(Gn); else if (src < 0) return src; } src = conv_uni_to_pc(unicode); if (src >= 32) { outbuf[0] = src; outbuf[1] = '\0'; return 1; } } repl = &(UCInfo[UChndl_out].replacedesc); if (repl != UC_current_unitable_str) { con_clear_unimap_str(); ignore_err = UC_con_set_unimap_str(repl->entry_ct, repl->entries); UC_current_unitable_str = repl; } rc = conv_uni_to_str(outbuf, buflen, unicode); if (rc == -4) rc = conv_uni_to_str(outbuf, buflen, 0xfffd); if (rc >= 0) return (strlen(outbuf)); if (chk_single_flag && src == -4) { rc = conv_uni_to_pc(0xfffd); if (rc >= 32) { outbuf[0] = rc; outbuf[1] = '\0'; return 1; } else return rc; } return -4; } PRIVATE int UC_FindGN_byMIME ARGS1(char *, UC_MIMEcharset) { int i; for (i=0; i<4; i++) if (!strcmp(UC_MIMEcharset,UC_GNsetMIMEnames[i])) return i; return -1; } PUBLIC int UCGetRawUniMode_byLYhndl ARGS1(int, i) { if(i < 0) return 0; return LYCharSet_UC[i].enc; } /* Currently the charset name has to match exactly -- not substring matching as was done before (see HTMIME.c, HTML.c). */ PUBLIC int UCGetLYhndl_byMIME ARGS1(CONST char *,UC_MIMEcharset) { int i; int LYhndl = -1; if (!UC_MIMEcharset || !(*UC_MIMEcharset)) return -1; for (i=0; i 0x80 in our new charset... Store any mappings found in ti[]. */ if (UCInfo[UC_charset_in_hndl].num_uni > 0) for ( i = 0 ; i < 256 ; i++ ) { if ((j = UCInfo[UC_charset_in_hndl].unicount[i])) { if ((k = *pp) >= 160 && k < 256 && i >= lowest8) { ti[k-160] = i; } for ( ; j ; j-- ) pp++; } } { u16 ct; struct unipair_str *list; /* Determine if we have any mapping of a Unicode in the range 160-255 to a replacement string for our new charset... Store any mappings found in tp[]. */ ct = UCInfo[UC_charset_in_hndl].replacedesc.entry_ct; list = UCInfo[UC_charset_in_hndl].replacedesc.entries; while( ct-- ) { if ((k = list->unicode) >= 160 && k < 256) { tp[k-160] = list->replace_str; } list++; } } /* Now allocate a new table compatible with LYCharSets[] and with the HTMLDTD for entitied. We don't know yet whether we'll keep it around. */ p = prepl = (char **) malloc(HTML_dtd.number_of_entities * sizeof(char *)); if (!p) { FREE(tp); FREE(ti); return NULL; } changed = 0; for (i=0; i= 160 && strlen(s8) == 1) { /* We have an entity that is mapped to one valid eightbit latin1 char*/ if (ti[(unsigned char)(*s8) - 160] >= lowest8 && !(s7[0] == ti[(unsigned char)(*s8) - 160] && s7[1] == '\0')) { /* ...which in turn is mapped, by our "new method", to another valid eightbit char for this new charset: either to itself... */ if (ti[(unsigned char)(*s8) - 160] == (unsigned char)(*s8)) *p = s8; else { /* ...or another byte... */ #ifdef NOTDEFINED *p = (char *)malloc(2*sizeof(char)); if (!*p) { FREE(tp); FREE(ti); FREE(prepl); return NULL; } (*p)[0] = ti[(unsigned char)(*s8) - 160]; (*p)[1] = '\0'; #else /* use this instead... make those buggers int HTAtoms, so they will be cleaned up at exit... all for the sake of preventing memory leaks, sigh */ static char dummy[2]; /* one char dummy string */ dummy[0] = ti[(unsigned char)(*s8) - 160]; *p = HTAtom_name(HTAtom_for(dummy)); #endif /* ! NOTDEFINED */ } changed = 1; } else if (tp[(unsigned char)(*s8) - 160] && 0!=strcmp(s7,tp[(unsigned char)(*s8) - 160])) { /* ...or which is mapped, by our "new method", to a replacement string for this new charset. */ *p = tp[(unsigned char)(*s8) - 160]; changed = 1; } } } FREE(tp); FREE(ti); if (!changed) { FREE(prepl); return NULL; } return prepl; } /* "New method" meets "Old method" ... */ PRIVATE int UC_Register_with_LYCharSets ARGS4(int, s, char *, UC_MIMEcharset, char *, UC_LYNXcharset, int, lowest_eightbit) { int i, LYhndl,found; char ** repl; LYhndl = -1; if (LYNumCharsets == 0) /* initialize here; so whoever changes LYCharSets.c doesn't have to count... */ for (i=0; (i= MAX_CHARSETS) { if (TRACE) fprintf(stderr,"UC_Register_with_LYCharSets: Too many. Ignoring %s/%s.", UC_MIMEcharset,UC_LYNXcharset); return -1; } /* add to LYCharSets.c lists */ LYhndl = LYNumCharsets; LYNumCharsets ++; LYlowest_eightbit[LYhndl] = 999; LYCharSets[LYhndl] = SevenBitApproximations; /* Hmm, try to be conservative here. */ LYchar_set_names[LYhndl] = UC_LYNXcharset; LYchar_set_names[LYhndl+1] = (char *) 0; /* Terminating NULL may be looked for by Lynx code */ } else found = 1; LYCharSet_UC[LYhndl].UChndl = s; /* Can we just copy the pointer? Hope so... */ LYCharSet_UC[LYhndl].MIMEname = UC_MIMEcharset; LYCharSet_UC[LYhndl].enc = UCInfo[s].enc; /* @@@ We really SHOULD get more info from the table files, and set relevant flags in the LYCharSet_UC[] entry with that info... for now, let's try it without - kw */ if (lowest_eightbit < LYlowest_eightbit[LYhndl]) LYlowest_eightbit[LYhndl] = lowest_eightbit; else if (lowest_eightbit > LYlowest_eightbit[LYhndl]) UCInfo[s].lowest_eight = LYlowest_eightbit[LYhndl]; if (!found && LYhndl > 0) { repl = UC_setup_LYCharSets_repl(s,UCInfo[s].lowest_eight); if (repl) { LYCharSets[LYhndl] = repl; /* remember to FREE at exit */ remember_allocated_LYCharSets[LYhndl]=repl; } } return LYhndl; } /* This only sets up the structure - no initialization of the tables * is done here yet. */ void UC_Charset_Setup ARGS8(char *, UC_MIMEcharset, char *, UC_LYNXcharset, u8 *, unicount, u16 *, unitable, int, nnuni, struct unimapdesc_str, replacedesc, int, lowest_eight, int, UC_rawuni) { int s, Gn; int i, status = 0, found; /* Get (new?) slot */ found = -1; for (i=0; i= 0) s = found; else { if (UCNumCharsets >= MAX_CHARSETS) { if (TRACE) fprintf(stderr,"UC_Charset_Setup: Too many. Ignoring %s/%s.", UC_MIMEcharset,UC_LYNXcharset); return; } s = UCNumCharsets; UCInfo[s].MIMEname = UC_MIMEcharset; } UCInfo[s].LYNXname = UC_LYNXcharset; UCInfo[s].unicount = unicount; UCInfo[s].unitable = unitable; UCInfo[s].num_uni = nnuni; UCInfo[s].replacedesc = replacedesc; Gn = UC_FindGN_byMIME(UC_MIMEcharset); if (Gn >= 0) UC_GNhandles[Gn] = s; UCInfo[s].GN = Gn; if (UC_rawuni == UCT_ENC_UTF8) lowest_eight = 128; /* cheat here */ UCInfo[s].lowest_eight = lowest_eight; UCInfo[s].enc = UC_rawuni; UCInfo[s].LYhndl = UC_Register_with_LYCharSets(s, UC_MIMEcharset, UC_LYNXcharset, lowest_eight); UCInfo[s].uc_status = status; if (found < 0) UCNumCharsets++; return; } PRIVATE void UCcleanup_mem NOARGS { int i; UCfree_allocated_LYCharSets(); con_clear_unimap_str(); con_clear_unimap(); for (i=1; i<4; i++) /* first one is static! */ FREE(inverse_translations[i]); } PUBLIC void UCInit NOARGS { UCreset_allocated_LYCharSets(); atexit(UCcleanup_mem); UCconsole_map_init(); UC_CHARSET_SETUP; UC_CHARSET_SETUP_iso_8859_1; UC_CHARSET_SETUP_iso_8859_2; UC_CHARSET_SETUP_iso_8859_3; UC_CHARSET_SETUP_iso_8859_4; UC_CHARSET_SETUP_iso_8859_5; UC_CHARSET_SETUP_iso_8859_7; UC_CHARSET_SETUP_iso_8859_9; UC_CHARSET_SETUP_iso_8859_10; UC_CHARSET_SETUP_koi8_r; UC_CHARSET_SETUP_cp437; UC_CHARSET_SETUP_cp850; UC_CHARSET_SETUP_cp852; UC_CHARSET_SETUP_windows_1250; UC_CHARSET_SETUP_iso_8859_1_windows_; UC_CHARSET_SETUP_unicode_1_1_utf_8; UC_CHARSET_SETUP_mnemonic_ascii_0; UC_CHARSET_SETUP_mnemonic; /* UC_CHARSET_SETUP_mnem; */ }