diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 1998-11-10 19:47:00 -0500 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 1998-11-10 19:47:00 -0500 |
commit | d3f9d5478df478427c2aa5db4507ddd0a38f0eb6 (patch) | |
tree | e27eacd6bbda653dd77f11cc020b9e0a59f7f4fc /WWW/Library/Implementation/SGML.c | |
parent | 18024037b515bfff83e0230b35151babe6005e18 (diff) | |
download | lynx-snapshots-d3f9d5478df478427c2aa5db4507ddd0a38f0eb6.tar.gz |
snapshot of project "lynx", label v2-8-2dev_2
Diffstat (limited to 'WWW/Library/Implementation/SGML.c')
-rw-r--r-- | WWW/Library/Implementation/SGML.c | 428 |
1 files changed, 182 insertions, 246 deletions
diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index 91c1b00d..7f6324a0 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -4,13 +4,12 @@ ** This module implements an HTStream object. To parse an ** SGML file, create this object which is a parser. The object ** is (currently) created by being passed a DTD structure, -** and a target HTStructured oject at which to throw the parsed stuff. +** and a target HTStructured object at which to throw the parsed stuff. ** -** 6 Feb 93 Binary seraches used. Intreface modified. +** 6 Feb 93 Binary searches used. Interface modified. */ #include <HTUtils.h> -#include <tcp.h> /* For FROMASCII */ /* Remove the following to disable the experimental HTML DTD parsing. Currently only used in this source file. - kw */ @@ -26,24 +25,21 @@ #include <UCDefs.h> #include <UCAux.h> -#include <ctype.h> -/*#include <stdio.h> included in HTUtils.h -- FM */ #include <HTChunk.h> #include <LYCharSets.h> +#include <LYStrings.h> #include <LYLeaks.h> #define INVALID (-1) -#define FREE(x) if (x) {free(x); x = NULL;} - PUBLIC HTCJKlang HTCJK = NOCJK; /* CJK enum value. */ PUBLIC BOOL HTPassEightBitRaw = FALSE; /* Pass 161-172,174-255 raw. */ PUBLIC BOOL HTPassEightBitNum = FALSE; /* Pass ^ numeric entities raw. */ PUBLIC BOOL HTPassHighCtrlRaw = FALSE; /* Pass 127-160,173, raw. */ PUBLIC BOOL HTPassHighCtrlNum = FALSE; /* Pass €-Ÿ raw. */ -extern int LYlowest_eightbit[]; +/* extern int LYlowest_eightbit[]; for completeness here */ /* The State (context) of the parser ** @@ -56,7 +52,7 @@ extern int LYlowest_eightbit[]; /* Element Stack ** ------------- -** This allows us to return down the stack reselcting styles. +** This allows us to return down the stack reselecting styles. ** As we return, attribute values will be garbage in general. */ typedef struct _HTElement HTElement; @@ -74,7 +70,7 @@ struct _HTStream { CONST HTStreamClass * isa; /* inherited from HTStream */ CONST SGML_dtd *dtd; - HTStructuredClass *actions; /* target class */ + CONST HTStructuredClass *actions; /* target class */ HTStructured *target; /* target object */ HTTag *current_tag; @@ -200,7 +196,7 @@ PRIVATE void set_chartrans_handling ARGS3( context->T.trans_from_uni) { context->current_tag_charset = UCGetLYhndl_byMIME("utf-8"); } else { - context->current_tag_charset = 0; + context->current_tag_charset = LATIN1; } } @@ -282,17 +278,15 @@ PRIVATE void handle_attribute_name ARGS2( FREE(context->value[i]); #ifdef USE_COLOR_STYLE current_is_class=(!strcasecomp("class", s)); - if (TRACE) - fprintf(stderr, "SGML: found attribute %s, %d\n", s, current_is_class); + CTRACE(tfp, "SGML: found attribute %s, %d\n", s, current_is_class); #endif return; } /* if */ } /* for */ - if (TRACE) - fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n", - s, context->current_tag->name); + CTRACE(tfp, "SGML: Unknown attribute %s for tag %s\n", + s, context->current_tag->name); context->current_attribute_number = INVALID; /* Invalid */ } @@ -310,18 +304,15 @@ PRIVATE void handle_attribute_value ARGS2( if (current_is_class) { strncpy (class_string, s, TEMPSTRINGSIZE); - if (TRACE) - fprintf(stderr, "SGML: class is '%s'\n", s); + CTRACE(tfp, "SGML: class is '%s'\n", s); } else { - if (TRACE) - fprintf(stderr, "SGML: attribute value is '%s'\n", s); + CTRACE(tfp, "SGML: attribute value is '%s'\n", s); } #endif } else { - if (TRACE) - fprintf(stderr, "SGML: Attribute value %s ignored\n", s); + CTRACE(tfp, "SGML: Attribute value %s ignored\n", s); } context->current_attribute_number = INVALID; /* can't have two assignments! */ } @@ -331,9 +322,10 @@ PRIVATE void handle_attribute_value ARGS2( ** Translate some Unicodes to Lynx special codes and output them. ** Special codes - ones those output depend on parsing. ** -** Additional issue, like handling bidirectional text if nesseccery +** Additional issue, like handling bidirectional text if necessary ** may be called from here: zwnj (8204), zwj (8205), lrm (8206), rlm (8207) -** - currently they are passed to def7_uni.tbl as regular characters. +** - currently they are ignored in SGML.c and LYCharUtils.c +** but also in UCdomap.c because they are non printable... ** */ PRIVATE BOOL put_special_unicodes ARGS2( @@ -361,10 +353,7 @@ PRIVATE BOOL put_special_unicodes ARGS2( ** in the context of line wrapping. Unfortunately, if we use ** HT_EM_SPACE we override the chartrans tables for those spaces ** (e.g., emsp= double space) with a single '32' for all (but do line - ** wrapping more fancy). In the future we need HT_SPACE with a - ** transferred parameter (Unicode number) which falls back to - ** chartrans if line wrapping is not the case. - ** + ** wrapping more fancy). So we probably need HT_EN_SPACE etc... */ PUTC(HT_EM_SPACE); #ifdef NOTUSED_FOTEMODS @@ -403,11 +392,6 @@ PRIVATE BOOL put_special_unicodes ARGS2( PRIVATE char replace_buf [64]; /* buffer for replacement strings */ PRIVATE BOOL FoundEntity = FALSE; -#define IncludesLatin1Enc \ - (context->outUCLYhndl == 0 || \ - (context->outUCI && \ - (context->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) - PRIVATE void handle_entity ARGS2( HTStream *, context, char, term) @@ -416,9 +400,6 @@ PRIVATE void handle_entity ARGS2( long uck; CONST char *p; CONST char *s = context->string->data; -#ifdef NOTUSED_FOTEMODS - int high, low, i, diff; -#endif /* @@ -472,89 +453,32 @@ PRIVATE void handle_entity ARGS2( FoundEntity = TRUE; return; } -#ifdef NOTUSED_FOTEMODS /* - ** If the value is greater than 255 and we do not - ** have the "7-bit approximations" as our output - ** character set (in which case we did it already) - ** seek a translation for that. - FM + ** Ignore zwnj (8204) and zwj (8205), if we get to here. + ** Note that zwnj may have been handled as <WBR> + ** by the calling function. - FM */ - if ((chk = ((code > 255) && - context->outUCLYhndl != - UCGetLYhndl_byMIME("us-ascii"))) && - (uck = UCTransUniChar(code, - UCGetLYhndl_byMIME("us-ascii")))>= 32 && - uck < 127) { - /* - ** Got an ASCII character (yippey). - FM - */ - PUTC(((char)(uck & 0xff))); + if (!strcmp(s, "zwnj") || + !strcmp(s, "zwj")) { + CTRACE(tfp, "handle_entity: Ignoring '%s'.\n", s); FoundEntity = TRUE; return; - } else if ((chk && uck == -4) && - (uck = UCTransUniCharStr(replace_buf, - 60, code, - UCGetLYhndl_byMIME("us-ascii"), - 0) >= 0)) { - /* - ** Got a replacement string (yippey). - FM - */ - for (p = replace_buf; *p; p++) - PUTC(*p); - FoundEntity = TRUE; - return; - } - } - /* - ** Ignore zwnj (8204) and zwj (8205), if we get to here. - ** Note that zwnj may have been handled as <WBR> - ** by the calling function. - FM - */ - if (!strcmp(s, "zwnj") || - !strcmp(s, "zwj")) { - if (TRACE) { - fprintf(stderr, "handle_entity: Ignoring '%s'.\n", s); - } - FoundEntity = TRUE; - return; - } - - /* - ** Ignore lrm (8206), and rln (8207), if we get to here. - FM - */ - if (!strcmp(s, "lrm") || - !strcmp(s, "rlm")) { - if (TRACE) { - fprintf(stderr, "handle_entity: Ignoring '%s'.\n", s); } - FoundEntity = TRUE; - return; - } - - /* - ** We haven't succeeded yet, so try the old LYCharSets - ** arrays for translation strings. - FM - */ - for (low = 0, high = context->dtd->number_of_entities; - high > low; - diff < 0 ? (low = i+1) : (high = i)) { /* Binary search */ - i = (low + (high-low)/2); - diff = strcmp(entities[i], s); /* Case sensitive! */ - if (diff == 0) { /* success: found it */ - for (p = LYCharSets[context->outUCLYhndl][i]; *p; p++) { - PUTC(*p); - } + /* + ** Ignore lrm (8206), and rln (8207), if we get to here. - FM + */ + if (!strcmp(s, "lrm") || + !strcmp(s, "rlm")) { + CTRACE(tfp, "handle_entity: Ignoring '%s'.\n", s); FoundEntity = TRUE; return; } -#endif } /* ** If entity string not found, display as text. */ - if (TRACE) - fprintf(stderr, "SGML: Unknown entity '%s'\n", s); + CTRACE(tfp, "SGML: Unknown entity '%s'\n", s); PUTC('&'); for (p = s; *p; p++) { PUTC(*p); @@ -572,8 +496,7 @@ PRIVATE void handle_comment ARGS1( { CONST char *s = context->string->data; - if (TRACE) - fprintf(stderr, "SGML Comment:\n<%s>\n", s); + CTRACE(tfp, "SGML Comment:\n<%s>\n", s); if (context->csi == NULL && strncmp(s, "!--#", 4) == 0 && @@ -593,8 +516,7 @@ PRIVATE void handle_identifier ARGS1( { CONST char *s = context->string->data; - if (TRACE) - fprintf(stderr, "SGML Identifier\n<%s>\n", s); + CTRACE(tfp, "SGML Identifier\n<%s>\n", s); return; } @@ -608,8 +530,7 @@ PRIVATE void handle_doctype ARGS1( { CONST char *s = context->string->data; - if (TRACE) - fprintf(stderr, "SGML Doctype\n<%s>\n", s); + CTRACE(tfp, "SGML Doctype\n<%s>\n", s); return; } @@ -623,8 +544,7 @@ PRIVATE void handle_marked ARGS1( { CONST char *s = context->string->data; - if (TRACE) - fprintf(stderr, "SGML Marked Section:\n<%s>\n", s); + CTRACE(tfp, "SGML Marked Section:\n<%s>\n", s); return; } @@ -638,8 +558,7 @@ PRIVATE void handle_sgmlent ARGS1( { CONST char *s = context->string->data; - if (TRACE) - fprintf(stderr, "SGML Entity Declaration:\n<%s>\n", s); + CTRACE(tfp, "SGML Entity Declaration:\n<%s>\n", s); return; } @@ -653,8 +572,7 @@ PRIVATE void handle_sgmlele ARGS1( { CONST char *s = context->string->data; - if (TRACE) - fprintf(stderr, "SGML Element Declaration:\n<%s>\n", s); + CTRACE(tfp, "SGML Element Declaration:\n<%s>\n", s); return; } @@ -668,8 +586,7 @@ PRIVATE void handle_sgmlatt ARGS1( { CONST char *s = context->string->data; - if (TRACE) - fprintf(stderr, "SGML Attribute Declaration:\n<%s>\n", s); + CTRACE(tfp, "SGML Attribute Declaration:\n<%s>\n", s); return; } @@ -716,6 +633,7 @@ PRIVATE canclose_t can_close ARGS2( return ((stacked_tag->tagclass & new_tag->canclose) ? close_error : close_NO); } + PRIVATE void do_close_stacked ARGS1( HTStream *, context) { @@ -732,6 +650,7 @@ PRIVATE void do_close_stacked ARGS1( context->element_stack = stacked->next; FREE(stacked); } + PRIVATE int is_on_stack ARGS2( HTStream *, context, HTTag *, old_tag) @@ -765,8 +684,7 @@ PRIVATE void end_element ARGS2( (stackpos > 1 || (!extra_action_taken && stackpos == 0))) { canclose_check = can_close(old_tag, context->element_stack->tag); if (canclose_check != close_NO) { - if (TRACE) - fprintf(stderr, "SGML: End </%s> \t<- %s end </%s>\n", + CTRACE(tfp, "SGML: End </%s> \t<- %s end </%s>\n", context->element_stack->tag->name, canclose_check == close_valid ? "supplied," : "forced by", old_tag->name); @@ -774,8 +692,7 @@ PRIVATE void end_element ARGS2( extra_action_taken = YES; stackpos = is_on_stack(context, old_tag); } else { - if (TRACE) - fprintf(stderr, "SGML: Still open %s \t<- invalid end </%s>\n", + CTRACE(tfp, "SGML: Still open %s \t<- invalid end </%s>\n", context->element_stack->tag->name, old_tag->name); return; @@ -783,8 +700,7 @@ PRIVATE void end_element ARGS2( } if (stackpos == 0 && old_tag->contents != SGML_EMPTY) { - if (TRACE) - fprintf(stderr, "SGML: Still open %s, no open %s for </%s>\n", + CTRACE(tfp, "SGML: Still open %s, no open %s for </%s>\n", context->element_stack ? context->element_stack->tag->name : "none", old_tag->name, @@ -792,8 +708,7 @@ PRIVATE void end_element ARGS2( return; } if (stackpos > 1) { - if (TRACE) - fprintf(stderr, "SGML: Nesting <%s>...<%s> \t<- invalid end </%s>\n", + CTRACE(tfp, "SGML: Nesting <%s>...<%s> \t<- invalid end </%s>\n", old_tag->name, context->element_stack->tag->name, old_tag->name); @@ -818,42 +733,36 @@ PRIVATE void end_element ARGS2( /* ** Ignore the end tag. - FM */ - if (TRACE) { - fprintf(stderr, - "SGML: Ignoring end tag </%s> in SELECT block.\n", + CTRACE(tfp, "SGML: Ignoring end tag </%s> in SELECT block.\n", old_tag->name); - } return; } } /* ** Handle the end tag. - FM */ - if (TRACE) - fprintf(stderr, "SGML: End </%s>\n", old_tag->name); + CTRACE(tfp, "SGML: End </%s>\n", old_tag->name); if (old_tag->contents == SGML_EMPTY) { - if (TRACE) - fprintf(stderr, "SGML: Illegal end tag </%s> found.\n", - old_tag->name); + CTRACE(tfp, "SGML: Illegal end tag </%s> found.\n", + old_tag->name); return; } #ifdef WIND_DOWN_STACK - while (context->element_stack) { /* Loop is error path only */ + while (context->element_stack) /* Loop is error path only */ #else - if (context->element_stack) { /* Substitute and remove one stack element */ + if (context->element_stack) /* Substitute and remove one stack element */ #endif /* WIND_DOWN_STACK */ + { HTElement * N = context->element_stack; HTTag * t = N->tag; if (old_tag != t) { /* Mismatch: syntax error */ if (context->element_stack->next) { /* This is not the last level */ - if (TRACE) fprintf(stderr, - "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n", - old_tag->name, t->name, t->name); + CTRACE(tfp, "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n", + old_tag->name, t->name, t->name); } else { /* last level */ - if (TRACE) fprintf(stderr, - "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n", - old_tag->name, t->name, old_tag->name); + CTRACE(tfp, "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n", + old_tag->name, t->name, old_tag->name); return; /* Ignore */ } } @@ -872,9 +781,8 @@ PRIVATE void end_element ARGS2( /* Syntax error path only */ } - if (TRACE) - fprintf(stderr, "SGML: Extra end tag </%s> found and ignored.\n", - old_tag->name); + CTRACE(tfp, "SGML: Extra end tag </%s> found and ignored.\n", + old_tag->name); } @@ -901,8 +809,7 @@ PRIVATE void start_element ARGS1( direct_container))) { canclose_check = can_close(new_tag, context->element_stack->tag); if (canclose_check != close_NO) { - if (TRACE) - fprintf(stderr, "SGML: End </%s> \t<- %s start <%s>\n", + CTRACE(tfp, "SGML: End </%s> \t<- %s start <%s>\n", context->element_stack->tag->name, canclose_check == close_valid ? "supplied," : "forced by", new_tag->name); @@ -911,8 +818,7 @@ PRIVATE void start_element ARGS1( if (canclose_check == close_error) direct_container = NO; } else { - if (TRACE) - fprintf(stderr, "SGML: Still open %s \t<- invalid start <%s>\n", + CTRACE(tfp, "SGML: Still open %s \t<- invalid start <%s>\n", context->element_stack->tag->name, new_tag->name); } @@ -921,8 +827,7 @@ PRIVATE void start_element ARGS1( (context->element_stack->tag->flags & Tgf_strict) && !(valid = element_valid_within(new_tag, context->element_stack->tag, direct_container))) { - if (TRACE) - fprintf(stderr, "SGML: Still open %s \t<- ignoring start <%s>\n", + CTRACE(tfp, "SGML: Still open %s \t<- ignoring start <%s>\n", context->element_stack->tag->name, new_tag->name); return; @@ -935,8 +840,7 @@ PRIVATE void start_element ARGS1( for (; i< new_tag->number_of_attributes && !has_attributes; i++) has_attributes = context->present[i]; if (!has_attributes) { - if (TRACE) - fprintf(stderr, "SGML: Still open %s, converting invalid <%s> to </%s>\n", + CTRACE(tfp, "SGML: Still open %s, converting invalid <%s> to </%s>\n", context->element_stack->tag->name, new_tag->name, new_tag->name); @@ -951,8 +855,7 @@ PRIVATE void start_element ARGS1( new_tag, context->element_stack->tag, direct_container))) { - if (TRACE) - fprintf(stderr, "SGML: Still open %s \t<- invalid start <%s>\n", + CTRACE(tfp, "SGML: Still open %s \t<- invalid start <%s>\n", context->element_stack->tag->name, new_tag->name); } @@ -998,18 +901,14 @@ PRIVATE void start_element ARGS1( ** It is another form-related start tag, so terminate ** the current SELECT block and fall through. - FM */ - if (TRACE) - fprintf(stderr, - "SGML: Faking SELECT end tag before <%s> start tag.\n", + CTRACE(tfp, "SGML: Faking SELECT end tag before <%s> start tag.\n", new_tag->name); end_element(context, SGMLFindTag(context->dtd, "SELECT")); } else { /* ** Ignore the start tag. - FM */ - if (TRACE) - fprintf(stderr, - "SGML: Ignoring start tag <%s> in SELECT block.\n", + CTRACE(tfp, "SGML: Ignoring start tag <%s> in SELECT block.\n", new_tag->name); return; } @@ -1018,8 +917,7 @@ PRIVATE void start_element ARGS1( /* ** Handle the start tag. - FM */ - if (TRACE) - fprintf(stderr, "SGML: Start <%s>\n", new_tag->name); + CTRACE(tfp, "SGML: Start <%s>\n", new_tag->name); (*context->actions->start_element)( context->target, new_tag - context->dtd->tags, @@ -1047,7 +945,7 @@ PRIVATE void start_element ARGS1( ** ------------------------ ** ** On entry, -** dtd points to dtd structire including valid tag list +** dtd points to dtd structure including valid tag list ** string points to name of tag in question ** ** On exit, @@ -1073,7 +971,7 @@ PUBLIC HTTag * SGMLFindTag ARGS2( /* ** Unrecognized, but may be valid. - KW */ - return (HTTag *)&HTTag_unrecognized; + return &HTTag_unrecognized; } return NULL; } @@ -1199,7 +1097,7 @@ PRIVATE void SGML_character ARGS2( CONST char * EntityName; char * p; BOOLEAN chk; /* Helps (?) walk through all the else ifs... */ - UCode_t clong, uck; /* Enough bits for UCS4 ... */ + UCode_t clong, uck = 0; /* Enough bits for UCS4 ... */ char c; char saved_char_in = '\0'; @@ -1308,7 +1206,7 @@ PRIVATE void SGML_character ARGS2( ** to Unicode, try that now. - FM */ if (context->T.trans_to_uni && - ((unsign_c >= 127) || + ((unsign_c >= LYlowest_eightbit[context->inUCLYhndl]) || (unsign_c < 32 && unsign_c != 0 && context->T.trans_C0_to_uni))) { /* @@ -1403,7 +1301,7 @@ top0a: ** which unsign_c has been defined), and from below ** when we are recycling a character (e.g., because ** it terminated an entity but is not the standard -** semi-colon). The chararcter will already have +** semi-colon). The character will already have ** been put through the Unicode conversions. - FM */ top1: @@ -1533,11 +1431,8 @@ top1: (uck = UCTransUniChar(unsign_c, context->outUCLYhndl)) >= 32 && uck < 256) { - if (TRACE) { - fprintf(stderr, - "UCTransUniChar returned 0x%.2lX:'%c'.\n", + CTRACE(tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n", uck, FROMASCII((char)uck)); - } /* ** We got one octet from the conversions, so use it. - FM */ @@ -1554,7 +1449,7 @@ top1: 0) >= 0)) { /* ** Got a replacement string. - ** No further tests for valididy - assume that whoever + ** No further tests for validity - assume that whoever ** defined replacement strings knew what she was doing. - KW */ for (p = replace_buf; *p; p++) @@ -1565,13 +1460,19 @@ top1: } else if (context->T.output_utf8 && PUTUTF8(clong)) { ; /* do nothing more */ /* - ** If it's any other (> 160) 8-bit chararcter, and + ** If it's any other (> 160) 8-bit character, and ** we have not set HTPassEightBitRaw nor HTCJK, nor ** have the "ISO Latin 1" character set selected, ** back translate for our character set. - FM */ +#define IncludesLatin1Enc \ + (context->outUCLYhndl == LATIN1 || \ + (context->outUCI && \ + (context->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) + #define PASSHI8BIT (HTPassEightBitRaw || \ (context->T.do_8bitraw && !context->T.trans_from_uni)) + } else if (unsign_c > 160 && unsign_c < 256 && !(PASSHI8BIT || HTCJK != NOCJK) && !IncludesLatin1Enc) { @@ -1652,14 +1553,16 @@ top1: for (p = replace_buf; *p; p++) PUTC(*p); } else { +#endif /* NOTUSED_FOTEMODS */ /* ** Out of luck, so use the UHHH notation (ugh). - FM */ -#endif /* NOTUSED_FOTEMODS */ + /* do not print UHHH for now sprintf(replace_buf, "U%.2lX", unsign_c); for (p = replace_buf; *p; p++) { PUTC(*p); } + */ #ifdef NOTUSED_FOTEMODS } #endif /* NOTUSED_FOTEMODS */ @@ -1749,10 +1652,8 @@ top1: */ char temp[8]; - if (TRACE) { - fprintf(stderr, - "SGML_character: Handling 'zwnj' entity as 'WBR' element.\n"); - } + CTRACE(tfp, "SGML_character: Handling 'zwnj' entity as 'WBR' element.\n"); + if (c != ';') { sprintf(temp, "<WBR>%c", c); } else { @@ -1841,7 +1742,7 @@ top1: if ((context->isHex ? sscanf(string->data, "%lx", &code) : sscanf(string->data, "%ld", &code)) == 1) { if ((code == 1) || - (code > 129 && code < 156)) { + (code > 127 && code < 156)) { /* ** Assume these are Microsoft code points, ** inflicted on us by FrontPage. - FM @@ -1857,6 +1758,12 @@ top1: */ code = 0x263a; break; + case 128: + /* + ** EURO currency sign + */ + code = 0x20ac; + break; case 130: /* ** SINGLE LOW-9 QUOTATION MARK (sbquo) @@ -1981,10 +1888,8 @@ top1: */ char temp[8]; - if (TRACE) { - fprintf(stderr, - "SGML_character: Handling '8204' (zwnj) reference as 'WBR' element.\n"); - } + CTRACE(tfp, "SGML_character: Handling '8204' (zwnj) reference as 'WBR' element.\n"); + /* ** Include the terminator if it is not ** the standard semi-colon. - FM @@ -2073,6 +1978,7 @@ top1: */ for (p = replace_buf; *p; p++) PUTC(*p); +#endif /* NOTUSED_FOTEMODS */ /* ** Ignore 8205 (zwj), ** 8206 (lrm), and 8207 (rln), if we get to here. - FM @@ -2085,7 +1991,7 @@ top1: LYstrncpy(replace_buf, string->data, (string->size < 64 ? string->size : 63)); - fprintf(stderr, + fprintf(tfp, "SGML_character: Ignoring '%s%s'.\n", (context->isHex ? "&#x" : "&#"), replace_buf); @@ -2096,7 +2002,6 @@ top1: if (c != ';') goto top1; break; -#endif /* NOTUSED_FOTEMODS */ /* ** Show the numeric entity if we get to here ** and the value: @@ -2266,9 +2171,8 @@ top1: */ HTTag * t; if (c == '/') { - if (TRACE) - if (string->size!=0) - fprintf(stderr,"SGML: `<%s/' found!\n", string->data); + if (string->size != 0) + CTRACE(tfp,"SGML: `<%s/' found!\n", string->data); context->state = S_end; break; } @@ -2286,21 +2190,18 @@ top1: for (i = 0; i < 3; i++) /* recover */ PUTC(string->data[i]); PUTC(c); - if (TRACE) - fprintf(stderr, "SGML: Treating <%s%c as text\n", + CTRACE(tfp, "SGML: Treating <%s%c as text\n", string->data, c); string->size = 0; context->state = S_text; break; } else if (!t) { - if (TRACE) - fprintf(stderr, "SGML: *** Invalid element %s\n", + CTRACE(tfp, "SGML: *** Invalid element %s\n", string->data); context->state = (c == '>') ? S_text : S_junk_tag; break; } else if (t == context->unknown_tag) { - if (TRACE) - fprintf(stderr, "SGML: *** Unknown element %s\n", + CTRACE(tfp, "SGML: *** Unknown element %s\n", string->data); /* ** Fall through and treat like valid @@ -2655,8 +2556,7 @@ top1: if (WHITE(c)) break; /* Before attribute value */ if (c == '>') { /* End of tag */ - if (TRACE) - fprintf(stderr, "SGML: found = but no value\n"); + CTRACE(tfp, "SGML: found = but no value\n"); if (context->current_tag->name) start_element(context); context->state = S_text; @@ -2802,8 +2702,7 @@ top1: t = SGMLFindTag(dtd, string->data); } if (!t || t == context->unknown_tag) { - if (TRACE) - fprintf(stderr, "Unknown end tag </%s>\n", string->data); + CTRACE(tfp, "Unknown end tag </%s>\n", string->data); } else { BOOL tag_OK = (c == '>' || WHITE(c)); context->current_tag = t; @@ -2831,9 +2730,7 @@ top1: ** Don't treat these end tags as invalid, ** nor act on them. - FM */ - if (TRACE) - fprintf(stderr, - "SGML: `</%s%c' found! Ignoring it.\n", + CTRACE(tfp, "SGML: `</%s%c' found! Ignoring it.\n", string->data, c); string->size = 0; context->current_attribute_number = INVALID; @@ -2871,27 +2768,19 @@ top1: /* ** It is not at FORM end tag, so ignore it. - FM */ - if (TRACE) { - fprintf(stderr, - "SGML: Ignoring end tag </%s> in SELECT block.\n", + CTRACE(tfp, "SGML: Ignoring end tag </%s> in SELECT block.\n", string->data); - } } else { /* ** End the SELECT block and then ** handle the FORM end tag. - FM */ - if (TRACE) { - fprintf(stderr, - "SGML: Faking SELECT end tag before </%s> end tag.\n", + CTRACE(tfp, "SGML: Faking SELECT end tag before </%s> end tag.\n", string->data); - } end_element(context, SGMLFindTag(context->dtd, "SELECT")); - if (TRACE) { - fprintf(stderr, - "SGML: End </%s>\n", string->data); - } + CTRACE(tfp, "SGML: End </%s>\n", string->data); + (*context->actions->end_element) (context->target, (context->current_tag - context->dtd->tags), @@ -2902,9 +2791,7 @@ top1: ** Treat a P end tag like a P start tag (Ugh, ** what a hack! 8-). - FM */ - if (TRACE) - fprintf(stderr, - "SGML: `</%s%c' found! Treating as '<%s%c'.\n", + CTRACE(tfp, "SGML: `</%s%c' found! Treating as '<%s%c'.\n", string->data, c, string->data, c); { int i; @@ -2917,10 +2804,8 @@ top1: if (context->current_tag->name) start_element(context); } else { - if (TRACE) { - fprintf(stderr, - "SGML: End </%s>\n", string->data); - } + CTRACE(tfp, "SGML: End </%s>\n", string->data); + (*context->actions->end_element) (context->target, (context->current_tag - context->dtd->tags), @@ -2945,8 +2830,8 @@ top1: string->size = 0; context->current_attribute_number = INVALID; if (c != '>') { - if (TRACE && !WHITE(c)) - fprintf(stderr,"SGML: `</%s%c' found!\n", string->data, c); + if (!WHITE(c)) + CTRACE(tfp,"SGML: `</%s%c' found!\n", string->data, c); context->state = S_junk_tag; } else { context->state = S_text; @@ -3208,7 +3093,7 @@ PUBLIC HTStream* SGML_new ARGS3( context->string = HTChunkCreate(128); /* Grow by this much */ context->dtd = dtd; context->target = target; - context->actions = (HTStructuredClass*)(((HTStream*)target)->isa); + context->actions = (CONST HTStructuredClass*)(((HTStream*)target)->isa); /* Ugh: no OO */ context->unknown_tag = &HTTag_unrecognized; context->state = S_text; @@ -3265,7 +3150,7 @@ PUBLIC HTStream* SGML_new ARGS3( ** Added 24-Mar-96 by FM, based on: ** //////////////////////////////////////////////////////////////////////// -Copyright (c) 1993 Electrotechnical Laboratry (ETL) +Copyright (c) 1993 Electrotechnical Laboratory (ETL) Permission to use, copy, modify, and distribute this material for any purpose and without fee is hereby granted, provided @@ -3296,19 +3181,69 @@ PUBLIC void JISx0201TO0208_EUC ARGS4( register unsigned char *, OLO) { static char *table[] = { - "\xA1\xA3", "\xA1\xD6", "\xA1\xD7", "\xA1\xA2", "\xA1\xA6", "\xA5\xF2", - "\xA5\xA1", "\xA5\xA3", "\xA5\xA5", "\xA5\xA7", "\xA5\xA9", - "\xA5\xE3", "\xA5\xE5", "\xA5\xE7", "\xA5\xC3", "\xA1\xBC", - "\xA5\xA2", "\xA5\xA4", "\xA5\xA6", "\xA5\xA8", "\xA5\xAA", - "\xA5\xAB", "\xA5\xAD", "\xA5\xAF", "\xA5\xB1", "\xA5\xB3", - "\xA5\xB5", "\xA5\xB7", "\xA5\xB9", "\xA5\xBB", "\xA5\xBD", - "\xA5\xBF", "\xA5\xC1", "\xA5\xC4", "\xA5\xC6", "\xA5\xC8", - "\xA5\xCA", "\xA5\xCB", "\xA5\xCC", "\xA5\xCD", "\xA5\xCE", - "\xA5\xCF", "\xA5\xD2", "\xA5\xD5", "\xA5\xD8", "\xA5\xDB", - "\xA5\xDE", "\xA5\xDF", "\xA5\xE0", "\xA5\xE1", "\xA5\xE2", - "\xA5\xE4", "\xA5\xE6", "\xA5\xE8", "\xA5\xE9", "\xA5\xEA", - "\xA5\xEB", "\xA5\xEC", "\xA5\xED", "\xA5\xEF", "\xA5\xF3", - "\xA1\xAB", "\xA1\xAC" + "\241\243", /* A1,A3 */ + "\241\326", /* A1,D6 */ + "\241\327", /* A1,D7 */ + "\241\242", /* A1,A2 */ + "\241\246", /* A1,A6 */ + "\245\362", /* A5,F2 */ + "\245\241", /* A5,A1 */ + "\245\243", /* A5,A3 */ + "\245\245", /* A5,A5 */ + "\245\247", /* A5,A7 */ + "\245\251", /* A5,A9 */ + "\245\343", /* A5,E3 */ + "\245\345", /* A5,E5 */ + "\245\347", /* A5,E7 */ + "\245\303", /* A5,C3 */ + "\241\274", /* A1,BC */ + "\245\242", /* A5,A2 */ + "\245\244", /* A5,A4 */ + "\245\246", /* A5,A6 */ + "\245\250", /* A5,A8 */ + "\245\252", /* A5,AA */ + "\245\253", /* A5,AB */ + "\245\255", /* A5,AD */ + "\245\257", /* A5,AF */ + "\245\261", /* A5,B1 */ + "\245\263", /* A5,B3 */ + "\245\265", /* A5,B5 */ + "\245\267", /* A5,B7 */ + "\245\271", /* A5,B9 */ + "\245\273", /* A5,BB */ + "\245\275", /* A5,BD */ + "\245\277", /* A5,BF */ + "\245\301", /* A5,C1 */ + "\245\304", /* A5,C4 */ + "\245\306", /* A5,C6 */ + "\245\310", /* A5,C8 */ + "\245\312", /* A5,CA */ + "\245\313", /* A5,CB */ + "\245\314", /* A5,CC */ + "\245\315", /* A5,CD */ + "\245\316", /* A5,CE */ + "\245\317", /* A5,CF */ + "\245\322", /* A5,D2 */ + "\245\325", /* A5,D5 */ + "\245\330", /* A5,D8 */ + "\245\333", /* A5,DB */ + "\245\336", /* A5,DE */ + "\245\337", /* A5,DF */ + "\245\340", /* A5,E0 */ + "\245\341", /* A5,E1 */ + "\245\342", /* A5,E2 */ + "\245\344", /* A5,E4 */ + "\245\346", /* A5,E6 */ + "\245\350", /* A5,E8 */ + "\245\351", /* A5,E9 */ + "\245\352", /* A5,EA */ + "\245\353", /* A5,EB */ + "\245\354", /* A5,EC */ + "\245\355", /* A5,ED */ + "\245\357", /* A5,EF */ + "\245\363", /* A5,F3 */ + "\241\253", /* A1,AB */ + "\241\254" /* A1,AC */ }; if ((IHI == 0x8E) && (ILO >= 0xA1) && (ILO <= 0xDF)) { @@ -3377,7 +3312,7 @@ PUBLIC void JISx0201TO0208_SJIS ARGS3( { unsigned char SJCODE[2]; - JISx0201TO0208_EUC('\x8E', I, OHI, OLO); + JISx0201TO0208_EUC('\216', I, OHI, OLO); JIS_TO_SJIS1(*OHI&0x7F, *OLO&0x7F, SJCODE); *OHI = SJCODE[0]; *OLO = SJCODE[1]; @@ -3487,10 +3422,11 @@ PUBLIC unsigned char * EUC_TO_JIS ARGS4( } PUBLIC unsigned char * TO_EUC ARGS2( - unsigned char *, jis, + CONST unsigned char *, jis, unsigned char *, euc) { - register unsigned char *s, *d, c, jis_stat; + register CONST unsigned char *s; + register unsigned char *d, c, jis_stat; register int to1B, to2B; register int in_sjis = 0; @@ -3540,7 +3476,7 @@ PUBLIC unsigned char * TO_EUC ARGS2( } PUBLIC void TO_SJIS ARGS2( - unsigned char *, any, + CONST unsigned char *, any, unsigned char *, sjis) { unsigned char *euc; @@ -3558,7 +3494,7 @@ PUBLIC void TO_SJIS ARGS2( } PUBLIC void TO_JIS ARGS2( - unsigned char *, any, + CONST unsigned char *, any, unsigned char *, jis) { unsigned char *euc; |