diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 2004-05-07 01:13:29 -0400 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 2004-05-07 01:13:29 -0400 |
commit | d326f24d169154673717129098ff4554a673f178 (patch) | |
tree | 7229f817b10bcdb82f8df2b6af3a3acc792a762d /WWW/Library/Implementation/SGML.c | |
parent | 2cd8e80bfe2792ce8999a26b34384598f58e3889 (diff) | |
download | lynx-snapshots-d326f24d169154673717129098ff4554a673f178.tar.gz |
snapshot of project "lynx", label v2-8-6dev_3
Diffstat (limited to 'WWW/Library/Implementation/SGML.c')
-rw-r--r-- | WWW/Library/Implementation/SGML.c | 3057 |
1 files changed, 1489 insertions, 1568 deletions
diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index c8172f4d..0a32f58f 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -1,13 +1,13 @@ /* General SGML Parser code SGML.c -** ======================== -** -** This module implements an HTStream object. To parse an -** SGML file, create this object which is a parser. The object -** is (currently) created by being passed a DTD structure, -** and a target HTStructured object at which to throw the parsed stuff. -** -** 6 Feb 93 Binary searches used. Interface modified. -*/ + * ======================== + * + * This module implements an HTStream object. To parse an + * SGML file, create this object which is a parser. The object + * is (currently) created by being passed a DTD structure, + * and a target HTStructured object at which to throw the parsed stuff. + * + * 6 Feb 93 Binary searches used. Interface modified. + */ #include <HTUtils.h> @@ -28,7 +28,7 @@ #include <HTChunk.h> #include <LYCharSets.h> -#include <LYCharVals.h> /* S/390 -- gil -- 0635 */ +#include <LYCharVals.h> /* S/390 -- gil -- 0635 */ #include <LYGlobalDefs.h> #include <LYStrings.h> #include <LYLeaks.h> @@ -44,12 +44,12 @@ #ifdef USE_PRETTYSRC -char* entity_string; /* this is used for printing entity name. - Unconditionally added since redundant assigments don't hurt much*/ +char *entity_string; /* this is used for printing entity name. -static void fake_put_character ( - void* p GCC_UNUSED, - char c GCC_UNUSED) + Unconditionally added since redundant assigments don't hurt much */ + +static void fake_put_character(void *p GCC_UNUSED, + char c GCC_UNUSED) { } @@ -65,7 +65,6 @@ static void fake_put_character ( AS_casecomp(a,b) : \ (TOASCII(TOUPPER(*a)) - TOASCII(TOUPPER(*b)))) -#if ANSI_PREPRO /* will use partially inlined version */ #define orig_HTChunkPutUtf8Char HTChunkPutUtf8Char #undef HTChunkPutUtf8Char @@ -79,25 +78,6 @@ static void fake_put_character ( orig_HTChunkPutUtf8Char(ch,x); \ } -#if 0 -#define orig_HTChunkPutc HTChunkPutc -#undef HTChunkPutc - -#define HTChunkPutc(ch,x) \ - { \ - if (ch->size < ch->allocated) \ - ch->data[ch->size++] = x; \ - else \ - orig_HTChunkPutc(ch,x); \ - } - -#undef HTChunkTerminate - -#define HTChunkTerminate(ch) \ - HTChunkPutc(ch, (char)0) -#endif /* */ -#endif /* ANSI_PREPRO */ - #define PUTS(str) ((*context->actions->put_string)(context->target, str)) #define PUTC(ch) ((*context->actions->put_character)(context->target, ch)) #define PUTUTF8(code) (UCPutUtf8_charstring((HTStream *)context->target, \ @@ -105,34 +85,32 @@ static void fake_put_character ( #define OPT 1 - /*the following macros are used for pretty source view. */ #define IS_C(attr) (attr.type == HTMLA_CLASS) -HTCJKlang HTCJK = NOCJK; /* CJK enum value. */ -BOOL HTPassEightBitRaw = FALSE; /* Pass 161-172,174-255 raw. */ +HTCJKlang HTCJK = NOCJK; /* CJK enum value. */ +BOOL HTPassEightBitRaw = FALSE; /* Pass 161-172,174-255 raw. */ BOOL HTPassEightBitNum = FALSE; /* Pass ^ numeric entities raw. */ BOOL HTPassHighCtrlRaw = FALSE; /* Pass 127-160,173, raw. */ -BOOL HTPassHighCtrlNum = FALSE; /* Pass €-Ÿ raw. */ +BOOL HTPassHighCtrlNum = FALSE; /* Pass €-Ÿ raw. */ /* The State (context) of the parser -** -** This is passed with each call to make the parser reentrant -** -*/ + * + * This is passed with each call to make the parser reentrant + * + */ #define MAX_ATTRIBUTES 36 /* Max number of attributes per element */ - /* Element Stack -** ------------- -** This allows us to return down the stack reselecting styles. -** As we return, attribute values will be garbage in general. -*/ + * ------------- + * This allows us to return down the stack reselecting styles. + * As we return, attribute values will be garbage in general. + */ typedef struct _HTElement HTElement; struct _HTElement { - HTElement * next; /* Previously nested element or 0 */ - HTTag* tag; /* The tag at this level */ + HTElement *next; /* Previously nested element or 0 */ + HTTag *tag; /* The tag at this level */ }; typedef enum { @@ -182,73 +160,74 @@ typedef enum { } sgml_state; /* Internal Context Data Structure -** ------------------------------- -*/ + * ------------------------------- + */ struct _HTStream { - const HTStreamClass * isa; /* inherited from HTStream */ - - const SGML_dtd *dtd; - const HTStructuredClass *actions; /* target class */ - HTStructured *target; /* target object */ - - HTTag *current_tag; - HTTag *slashedtag; - const HTTag *unknown_tag; - BOOL inSELECT; - BOOL no_lynx_specialcodes; - int current_attribute_number; - HTChunk *string; - int leading_spaces; - int trailing_spaces; - HTElement *element_stack; - sgml_state state; + const HTStreamClass *isa; /* inherited from HTStream */ + + const SGML_dtd *dtd; + const HTStructuredClass *actions; /* target class */ + HTStructured *target; /* target object */ + + HTTag *current_tag; + HTTag *slashedtag; + const HTTag *unknown_tag; + BOOL inSELECT; + BOOL no_lynx_specialcodes; + int current_attribute_number; + HTChunk *string; + int leading_spaces; + int trailing_spaces; + HTElement *element_stack; + sgml_state state; unsigned char kanji_buf; #ifdef CALLERDATA - void * callerData; -#endif /* CALLERDATA */ + void *callerData; +#endif /* CALLERDATA */ BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */ - char * value[MAX_ATTRIBUTES]; /* NULL, or strings alloc'd with StrAllocCopy_extra() */ - - BOOL lead_exclamation; - BOOL first_dash; - BOOL end_comment; - BOOL doctype_bracket; - BOOL first_bracket; - BOOL second_bracket; - BOOL isHex; - - HTParentAnchor * node_anchor; - LYUCcharset * inUCI; /* pointer to anchor UCInfo */ - int inUCLYhndl; /* charset we are fed */ - LYUCcharset * outUCI; /* anchor UCInfo for target */ - int outUCLYhndl; /* charset for target */ - char utf_count; - UCode_t utf_char; - char utf_buf[8]; - char * utf_buf_p; - UCTransParams T; - int current_tag_charset; /* charset to pass attributes */ - - char * recover; - int recover_index; - char * include; - char * active_include; - int include_index; - char * url; - char * csi; - int csi_index; + char *value[MAX_ATTRIBUTES]; /* NULL, or strings alloc'd with StrAllocCopy_extra() */ + + BOOL lead_exclamation; + BOOL first_dash; + BOOL end_comment; + BOOL doctype_bracket; + BOOL first_bracket; + BOOL second_bracket; + BOOL isHex; + + HTParentAnchor *node_anchor; + LYUCcharset *inUCI; /* pointer to anchor UCInfo */ + int inUCLYhndl; /* charset we are fed */ + LYUCcharset *outUCI; /* anchor UCInfo for target */ + int outUCLYhndl; /* charset for target */ + char utf_count; + UCode_t utf_char; + char utf_buf[8]; + char *utf_buf_p; + UCTransParams T; + int current_tag_charset; /* charset to pass attributes */ + + char *recover; + int recover_index; + char *include; + char *active_include; + int include_index; + char *url; + char *csi; + int csi_index; #ifdef USE_PRETTYSRC - BOOL cur_attr_is_href; - BOOL cur_attr_is_name; - BOOL seen_nonwhite_in_junk_tag; + BOOL cur_attr_is_href; + BOOL cur_attr_is_name; + BOOL seen_nonwhite_in_junk_tag; #endif }; #ifndef NO_LYNX_TRACE -static char *state_name (sgml_state n) +static char *state_name(sgml_state n) { char *result = "?"; + /* *INDENT-OFF* */ switch (n) { case S_attr: result = "S_attr"; break; case S_attr_gap: result = "S_attr_gap"; break; @@ -294,6 +273,8 @@ static char *state_name (sgml_state n) case S_text: result = "S_text"; break; case S_value: result = "S_value"; break; } + /* *INDENT-ON* */ + return result; } #endif @@ -303,15 +284,15 @@ static char *state_name (sgml_state n) static HTElement pool[DEPTH]; static int depth = 0; -static HTElement* pool_alloc (void) +static HTElement *pool_alloc(void) { depth++; if (depth > DEPTH) - return (HTElement*) malloc(sizeof(HTElement)); + return (HTElement *) malloc(sizeof(HTElement)); return (pool + depth - 1); } -static void pool_free (HTElement* e) +static void pool_free(HTElement * e) { if (depth > DEPTH) FREE(e); @@ -321,12 +302,9 @@ static void pool_free (HTElement* e) #ifdef USE_PRETTYSRC -static void HTMLSRC_apply_markup ( - HTStream * context, - HTlexeme lexeme, - BOOL start) +static void HTMLSRC_apply_markup(HTStream *context, HTlexeme lexeme, BOOL start) { - HT_tagspec* ts = *( ( start ? lexeme_start : lexeme_end ) + lexeme); + HT_tagspec *ts = *((start ? lexeme_start : lexeme_end) + lexeme); while (ts) { #ifdef USE_COLOR_STYLE @@ -337,64 +315,54 @@ static void HTMLSRC_apply_markup ( force_classname = TRUE; } #endif - CTRACE((tfp,ts->start ? "SRCSTART %d\n" : "SRCSTOP %d\n",(int)lexeme)); + CTRACE((tfp, ts->start ? "SRCSTART %d\n" : "SRCSTOP %d\n", (int) lexeme)); if (ts->start) - (*context->actions->start_element)( - context->target, - ts->element, - ts->present, - (const char **)ts->value, - context->current_tag_charset, - (char **)&context->include); + (*context->actions->start_element) (context->target, + ts->element, + ts->present, + (const char **) ts->value, + context->current_tag_charset, + (char **) &context->include); else - (*context->actions->end_element)( - context->target, - ts->element, - (char **)&context->include); + (*context->actions->end_element) (context->target, + ts->element, + (char **) &context->include); ts = ts->next; } } -#if ANSI_PREPRO -# define PSRCSTART(x) HTMLSRC_apply_markup(context,HTL_##x,START) -# define PSRCSTOP(x) HTMLSRC_apply_markup(context,HTL_##x,STOP) -#else -# define PSRCSTART(x) HTMLSRC_apply_markup(context,HTL_/**/x,START) -# define PSRCSTOP(x) HTMLSRC_apply_markup(context,HTL_/**/x,STOP) -#endif +#define PSRCSTART(x) HTMLSRC_apply_markup(context,HTL_##x,START) +#define PSRCSTOP(x) HTMLSRC_apply_markup(context,HTL_##x,STOP) #define attr_is_href context->cur_attr_is_href #define attr_is_name context->cur_attr_is_name #endif -static void set_chartrans_handling ( - HTStream * context, - HTParentAnchor * anchor, - int chndl) +static void set_chartrans_handling(HTStream *context, HTParentAnchor *anchor, + int chndl) { if (chndl < 0) { /* - ** Nothing was set for the parser in earlier stages, - ** so the HTML parser's UCLYhndl should still be its - ** default. - FM - */ + * Nothing was set for the parser in earlier stages, so the HTML + * parser's UCLYhndl should still be its default. - FM + */ chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_STRUCTURED); if (chndl < 0) /* - ** That wasn't set either, so seek the HText default. - FM - */ + * That wasn't set either, so seek the HText default. - FM + */ chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); if (chndl < 0) /* - ** That wasn't set either, so assume the current display - ** character set. - FM - */ + * That wasn't set either, so assume the current display character + * set. - FM + */ chndl = current_char_set; /* - ** Try to set the HText and HTML stages' chartrans info - ** with the default lock level (will not be changed if - ** it was set previously with a higher lock level). - FM - */ + * Try to set the HText and HTML stages' chartrans info with the + * default lock level (will not be changed if it was set previously + * with a higher lock level). - FM + */ HTAnchor_setUCInfoStage(anchor, chndl, UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); @@ -402,27 +370,25 @@ static void set_chartrans_handling ( UCT_STAGE_STRUCTURED, UCT_SETBY_DEFAULT); /* - ** Get the chartrans info for output to the HTML parser. - FM - */ + * Get the chartrans info for output to the HTML parser. - FM + */ context->outUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_STRUCTURED); context->outUCLYhndl = HTAnchor_getUCLYhndl(context->node_anchor, UCT_STAGE_STRUCTURED); } /* - ** Set the in->out transformation parameters. - FM - */ + * Set the in->out transformation parameters. - FM + */ UCSetTransParams(&context->T, context->inUCLYhndl, context->inUCI, context->outUCLYhndl, context->outUCI); /* - ** This is intended for passing the SGML parser's input - ** charset as an argument in each call to the HTML - ** parser's start tag function, but it would be better - ** to call a Lynx_HTML_parser function to set an element - ** in its HTStructured object, itself, if this were - ** needed. - FM - */ + * This is intended for passing the SGML parser's input charset as an + * argument in each call to the HTML parser's start tag function, but it + * would be better to call a Lynx_HTML_parser function to set an element in + * its HTStructured object, itself, if this were needed. - FM + */ if (HTCJK != NOCJK) { context->current_tag_charset = -1; } else if (context->T.transp) { @@ -440,25 +406,24 @@ static void set_chartrans_handling ( } } -static void change_chartrans_handling ( - HTStream * context) +static void change_chartrans_handling(HTStream *context) { int new_LYhndl = HTAnchor_getUCLYhndl(context->node_anchor, UCT_STAGE_PARSER); + if (new_LYhndl != context->inUCLYhndl && new_LYhndl >= 0) { /* - * Something changed. but ignore if a META wants an unknown charset. + * Something changed. but ignore if a META wants an unknown charset. */ - LYUCcharset * new_UCI = HTAnchor_getUCInfoStage(context->node_anchor, - UCT_STAGE_PARSER); + LYUCcharset *new_UCI = HTAnchor_getUCInfoStage(context->node_anchor, + UCT_STAGE_PARSER); + if (new_UCI) { - LYUCcharset * next_UCI = HTAnchor_getUCInfoStage( - context->node_anchor, UCT_STAGE_STRUCTURED - ); - int next_LYhndl = HTAnchor_getUCLYhndl( - context->node_anchor, UCT_STAGE_STRUCTURED - ); + LYUCcharset *next_UCI = HTAnchor_getUCInfoStage(context->node_anchor, + UCT_STAGE_STRUCTURED); + int next_LYhndl = HTAnchor_getUCLYhndl(context->node_anchor, UCT_STAGE_STRUCTURED); + context->inUCI = new_UCI; context->inUCLYhndl = new_LYhndl; context->outUCI = next_UCI; @@ -475,16 +440,14 @@ static int current_is_class = 0; #endif /* Handle Attribute -** ---------------- -*/ + * ---------------- + */ /* PUBLIC const char * SGML_default = ""; ?? */ -static void handle_attribute_name ( - HTStream * context, - const char * s) +static void handle_attribute_name(HTStream *context, const char *s) { - HTTag * tag = context->current_tag; - attr * attributes = tag->attributes; + HTTag *tag = context->current_tag; + attr *attributes = tag->attributes; int high, low, i, diff; #ifdef USE_PRETTYSRC @@ -494,25 +457,25 @@ static void handle_attribute_name ( } #endif /* - ** Ignore unknown tag. - KW - */ + * Ignore unknown tag. - KW + */ if (tag == context->unknown_tag) { #ifdef USE_PRETTYSRC if (psrc_view) - context->current_attribute_number = 1; /* anything !=INVALID */ + context->current_attribute_number = 1; /* anything !=INVALID */ #endif return; } /* - ** Binary search for attribute name. - */ + * Binary search for attribute name. + */ for (low = 0, high = tag->number_of_attributes; high > low; - diff < 0 ? (low = i+1) : (high = i)) { - i = (low + (high-low)/2); + diff < 0 ? (low = i + 1) : (high = i)) { + i = (low + (high - low) / 2); diff = my_casecomp(attributes[i].name, s); - if (diff == 0) { /* success: found it */ + if (diff == 0) { /* success: found it */ context->current_attribute_number = i; #ifdef USE_PRETTYSRC if (psrc_view) { @@ -521,150 +484,141 @@ static void handle_attribute_name ( } else #endif { - context->present[i] = YES; - Clear_extra(context->value[i]); + context->present[i] = YES; + Clear_extra(context->value[i]); #ifdef USE_COLOR_STYLE # ifdef USE_PRETTYSRC - current_is_class = IS_C(attributes[i]); + current_is_class = IS_C(attributes[i]); # else - current_is_class = (!strcasecomp("class", s)); + current_is_class = (!strcasecomp("class", s)); # endif - CTRACE((tfp, "SGML: found attribute %s, %d\n", s, current_is_class)); + CTRACE((tfp, "SGML: found attribute %s, %d\n", s, current_is_class)); #endif } return; - } /* if */ - - } /* for */ + } + /* if */ + } /* for */ CTRACE((tfp, "SGML: Unknown attribute %s for tag %s\n", - s, context->current_tag->name)); + s, context->current_tag->name)); context->current_attribute_number = INVALID; /* Invalid */ } - /* Handle attribute value -** ---------------------- -*/ -static void handle_attribute_value ( - HTStream * context, - const char * s) + * ---------------------- + */ +static void handle_attribute_value(HTStream *context, const char *s) { if (context->current_attribute_number != INVALID) { StrAllocCopy_extra(context->value[context->current_attribute_number], s); #ifdef USE_COLOR_STYLE - if (current_is_class) - { - strncpy (class_string, s, TEMPSTRINGSIZE); + if (current_is_class) { + strncpy(class_string, s, TEMPSTRINGSIZE); CTRACE((tfp, "SGML: class is '%s'\n", s)); - } - else - { + } else { CTRACE((tfp, "SGML: attribute value is '%s'\n", s)); } #endif } else { CTRACE((tfp, "SGML: Attribute value %s ***ignored\n", s)); } - context->current_attribute_number = INVALID; /* can't have two assignments! */ + context->current_attribute_number = INVALID; /* can't have two assignments! */ } - /* -** Translate some Unicodes to Lynx special codes and output them. -** Special codes - ones those output depend on parsing. -** -** Additional issue, like handling bidirectional text if necessary -** may be called from here: zwnj (8204), zwj (8205), lrm (8206), rlm (8207) -** - currently they are ignored in SGML.c and LYCharUtils.c -** but also in UCdomap.c because they are non printable... -** -*/ -static BOOL put_special_unicodes ( - HTStream * context, - UCode_t code) + * Translate some Unicodes to Lynx special codes and output them. + * Special codes - ones those output depend on parsing. + * + * Additional issue, like handling bidirectional text if necessary + * may be called from here: zwnj (8204), zwj (8205), lrm (8206), rlm (8207) + * - currently they are ignored in SGML.c and LYCharUtils.c + * but also in UCdomap.c because they are non printable... + * + */ +static BOOL put_special_unicodes(HTStream *context, UCode_t code) { /* (Tgf_nolyspcl) */ if (context->no_lynx_specialcodes) { /* - ** We were asked by a "DTD" flag to not generate lynx specials. - kw - */ + * We were asked by a "DTD" flag to not generate lynx specials. - kw + */ return NO; } - if (code == CH_NBSP) { /* S/390 -- gil -- 0657 */ + if (code == CH_NBSP) { /* S/390 -- gil -- 0657 */ /* - ** Use Lynx special character for nbsp. - */ + * Use Lynx special character for nbsp. + */ #ifdef USE_PRETTYSRC if (!psrc_view) #endif - PUTC(HT_NON_BREAK_SPACE); - } else if (code == CH_SHY) { + PUTC(HT_NON_BREAK_SPACE); + } else if (code == CH_SHY) { /* - ** Use Lynx special character for shy. - */ + * Use Lynx special character for shy. + */ #ifdef USE_PRETTYSRC if (!psrc_view) #endif - PUTC(LY_SOFT_HYPHEN); + PUTC(LY_SOFT_HYPHEN); } else if (code == 8194 || code == 8201) { /* - ** Use Lynx special character for ensp or thinsp. - ** - ** Originally, Lynx use space '32' as word delimiter and omits this - ** space at end of line if word is wrapped to the next line. There - ** are several other spaces in the Unicode repertoire and we should - ** teach Lynx to understand them, not only as regular characters but - ** in the context of line wrapping. Unfortunately, if we use - ** HT_EN_SPACE we override the chartrans tables for those spaces - ** with a single '32' for all (but do line wrapping more fancy). - ** - ** We may treat emsp as one or two ensp (below). - */ + * Use Lynx special character for ensp or thinsp. + * + * Originally, Lynx use space '32' as word delimiter and omits this + * space at end of line if word is wrapped to the next line. There are + * several other spaces in the Unicode repertoire and we should teach + * Lynx to understand them, not only as regular characters but in the + * context of line wrapping. Unfortunately, if we use HT_EN_SPACE we + * override the chartrans tables for those spaces with a single '32' + * for all (but do line wrapping more fancy). + * + * We may treat emsp as one or two ensp (below). + */ #ifdef USE_PRETTYSRC if (!psrc_view) #endif - PUTC(HT_EN_SPACE); + PUTC(HT_EN_SPACE); } else if (code == 8195) { /* - ** Use Lynx special character for emsp. - */ + * Use Lynx special character for emsp. + */ #ifdef USE_PRETTYSRC if (!psrc_view) { #endif - /* PUTC(HT_EN_SPACE); let's stay with a single space :) */ - PUTC(HT_EN_SPACE); + /* PUTC(HT_EN_SPACE); let's stay with a single space :) */ + PUTC(HT_EN_SPACE); #ifdef USE_PRETTYSRC } #endif } else { /* - ** Return NO if nothing done. - */ + * Return NO if nothing done. + */ return NO; } /* - ** We have handled it. - */ + * We have handled it. + */ return YES; } #ifdef USE_PRETTYSRC -static void put_pretty_entity (HTStream * context, int term) +static void put_pretty_entity(HTStream *context, int term) { PSRCSTART(entity); PUTC('&'); PUTS(entity_string); if (term) - PUTC((char)term); + PUTC((char) term); PSRCSTOP(entity); } -static void put_pretty_number (HTStream * context) +static void put_pretty_number(HTStream *context) { PSRCSTART(entity); - PUTS( (context->isHex ? "&#x" : "&#") ); + PUTS((context->isHex ? "&#x" : "&#")); PUTS(entity_string); PUTC(';'); PSRCSTOP(entity); @@ -672,39 +626,37 @@ static void put_pretty_number (HTStream * context) #endif /* USE_PRETTYSRC */ /* Handle entity -** ------------- -** -** On entry, -** s contains the entity name zero terminated -** Bugs: -** If the entity name is unknown, the terminator is treated as -** a printable non-special character in all cases, even if it is '<' -** Bug-fix: -** Modified SGML_character() so we only come here with terminator -** as '\0' and check a FoundEntity flag. -- Foteos Macrides -** -** Modified more (for use with Lynx character translation code): -*/ -static char replace_buf [64]; /* buffer for replacement strings */ + * ------------- + * + * On entry, + * s contains the entity name zero terminated + * Bugs: + * If the entity name is unknown, the terminator is treated as + * a printable non-special character in all cases, even if it is '<' + * Bug-fix: + * Modified SGML_character() so we only come here with terminator + * as '\0' and check a FoundEntity flag. -- Foteos Macrides + * + * Modified more (for use with Lynx character translation code): + */ +static char replace_buf[64]; /* buffer for replacement strings */ static BOOL FoundEntity = FALSE; -static void handle_entity ( - HTStream * context, - char term) +static void handle_entity(HTStream *context, char term) { UCode_t code; long uck = -1; const char *s = context->string->data; /* - ** Handle all entities normally. - FM - */ + * Handle all entities normally. - FM + */ FoundEntity = FALSE; if ((code = HTMLGetEntityUCValue(s)) != 0) { /* - ** We got a Unicode value for the entity name. - ** Check for special Unicodes. - FM - */ + * We got a Unicode value for the entity name. Check for special + * Unicodes. - FM + */ if (put_special_unicodes(context, code)) { #ifdef USE_PRETTYSRC if (psrc_view) { @@ -715,8 +667,8 @@ static void handle_entity ( return; } /* - ** Seek a translation from the chartrans tables. - */ + * Seek a translation from the chartrans tables. + */ if ((uck = UCTransUniChar(code, context->outUCLYhndl)) >= 32 && /* =============== work in ASCII below here =============== S/390 -- gil -- 0672 */ uck < 256 && @@ -727,15 +679,15 @@ static void handle_entity ( put_pretty_entity(context, term); } else #endif - PUTC(FROMASCII((char)uck)); + PUTC(FROMASCII((char) uck)); FoundEntity = TRUE; return; } else if ((uck == -4 || (context->T.repl_translated_C0 && uck > 0 && uck < 32)) && - /* - ** Not found; look for replacement string. - */ + /* + * Not found; look for replacement string. + */ (uck = UCTransUniCharStr(replace_buf, 60, code, context->outUCLYhndl, 0) >= 0)) { #ifdef USE_PRETTYSRC @@ -743,22 +695,24 @@ static void handle_entity ( put_pretty_entity(context, term); } else #endif - PUTS(replace_buf); + PUTS(replace_buf); FoundEntity = TRUE; return; } /* - ** If we're displaying UTF-8, try that now. - FM - */ + * If we're displaying UTF-8, try that now. - FM + */ #ifndef USE_PRETTYSRC if (context->T.output_utf8 && PUTUTF8(code)) { FoundEntity = TRUE; return; } #else - if (context->T.output_utf8 && (psrc_view ? - (UCPutUtf8_charstring((HTStream *)context->target, - (putc_func_t*)(fake_put_character), code)): PUTUTF8(code) ) ) { + if (context->T.output_utf8 && (psrc_view + ? (UCPutUtf8_charstring((HTStream *) context->target, + (putc_func_t *) (fake_put_character), + code)) + : PUTUTF8(code))) { if (psrc_view) { put_pretty_entity(context, term); @@ -769,8 +723,8 @@ static void handle_entity ( } #endif /* - ** If it's safe ASCII, use it. - FM - */ + * If it's safe ASCII, use it. - FM + */ if (code >= 32 && code < 127) { #ifdef USE_PRETTYSRC if (psrc_view) { @@ -778,16 +732,15 @@ static void handle_entity ( } else #endif - PUTC(FROMASCII((char)code)); + PUTC(FROMASCII((char) code)); FoundEntity = TRUE; return; } /* =============== work in ASCII above here =============== S/390 -- gil -- 0682 */ /* - ** Ignore zwnj (8204) and zwj (8205), if we get to here. - ** Note that zwnj may have been handled as <WBR> - ** by the calling function. - FM - */ + * Ignore zwnj (8204) and zwj (8205), if we get to here. Note that + * zwnj may have been handled as <WBR> by the calling function. - FM + */ if (!strcmp(s, "zwnj") || !strcmp(s, "zwj")) { CTRACE((tfp, "handle_entity: Ignoring '%s'.\n", s)); @@ -800,8 +753,8 @@ static void handle_entity ( return; } /* - ** Ignore lrm (8206), and rln (8207), if we get to here. - FM - */ + * Ignore lrm (8206), and rln (8207), if we get to here. - FM + */ if (!strcmp(s, "lrm") || !strcmp(s, "rlm")) { CTRACE((tfp, "handle_entity: Ignoring '%s'.\n", s)); @@ -816,13 +769,13 @@ static void handle_entity ( } /* - ** If entity string not found, display as text. - */ + * If entity string not found, display as text. + */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); #endif - CTRACE((tfp, "SGML: Unknown entity '%s' %ld %ld\n", s, (long)code, uck)); /* S/390 -- gil -- 0695 */ + CTRACE((tfp, "SGML: Unknown entity '%s' %ld %ld\n", s, (long) code, uck)); /* S/390 -- gil -- 0695 */ PUTC('&'); PUTS(s); if (term != '\0') @@ -833,12 +786,10 @@ static void handle_entity ( #endif } - /* Handle comment -** -------------- -*/ -static void handle_comment ( - HTStream * context) + * -------------- + */ +static void handle_comment(HTStream *context) { const char *s = context->string->data; @@ -846,8 +797,8 @@ static void handle_comment ( if (context->csi == NULL && strncmp(s, "!--#", 4) == 0 && - LYCheckForCSI(context->node_anchor, (char **)&context->url) == TRUE) { - LYDoCSI(context->url, s, (char **)&context->csi); + LYCheckForCSI(context->node_anchor, (char **) &context->url) == TRUE) { + LYDoCSI(context->url, s, (char **) &context->csi); } else { LYCommentHacks(context->node_anchor, context->string->data); } @@ -855,12 +806,10 @@ static void handle_comment ( return; } - /* Handle identifier -** ----------------- -*/ -static void handle_identifier ( - HTStream * context) + * ----------------- + */ +static void handle_identifier(HTStream *context) { const char *s = context->string->data; @@ -869,12 +818,10 @@ static void handle_identifier ( return; } - /* Handle doctype -** -------------- -*/ -static void handle_doctype ( - HTStream * context) + * -------------- + */ +static void handle_doctype(HTStream *context) { const char *s = context->string->data; @@ -883,16 +830,13 @@ static void handle_doctype ( return; } -static void SGML_write ( - HTStream * me, - const char * s, - int l); +static void SGML_write(HTStream *me, const char *s, + int l); /* Handle marked -** ------------- -*/ -static void handle_marked ( - HTStream * context) + * ------------- + */ +static void handle_marked(HTStream *context) { const char *s = context->string->data; @@ -906,20 +850,18 @@ static void handle_marked ( charset once it is in include - kw */ } else if (!strncmp(context->string->data, "![CDATA[", 8)) { - (*context->actions->_write)(context->target, - context->string->data + 8, - context->string->size - 11); + (*context->actions->_write) (context->target, + context->string->data + 8, + context->string->size - 11); } return; } - /* Handle sgmlent -** -------------- -*/ -static void handle_sgmlent ( - HTStream * context) + * -------------- + */ +static void handle_sgmlent(HTStream *context) { const char *s = context->string->data; @@ -928,12 +870,10 @@ static void handle_sgmlent ( return; } - /* Handle sgmlent -** -------------- -*/ -static void handle_sgmlele ( - HTStream * context) + * -------------- + */ +static void handle_sgmlele(HTStream *context) { const char *s = context->string->data; @@ -942,12 +882,10 @@ static void handle_sgmlele ( return; } - /* Handle sgmlatt -** -------------- -*/ -static void handle_sgmlatt ( - HTStream * context) + * -------------- + */ +static void handle_sgmlatt(HTStream *context) { const char *s = context->string->data; @@ -957,17 +895,16 @@ static void handle_sgmlatt ( } /* - * Convenience macros - tags (elements) are identified sometimes - * by an int or enum value ('TAGNUM'), sometimes - * by a pointer to HTTag ('TAGP'). - kw + * Convenience macros - tags (elements) are identified sometimes by an int or + * enum value ('TAGNUM'), sometimes by a pointer to HTTag ('TAGP'). - kw */ #define TAGNUM_OF_TAGP(t) (t - context->dtd->tags) #define TAGP_OF_TAGNUM(e) (context->dtd->tags + e) /* - * The following implement special knowledge about OBJECT. - * As long as HTML_OBJECT is the only tag for which an alternative - * variant exist, they can be simple macros. - kw + * The following implement special knowledge about OBJECT. As long as + * HTML_OBJECT is the only tag for which an alternative variant exist, they can + * be simple macros. - kw */ /* does 'TAGNUM' e have an alternative (variant) parsing mode? */ #define HAS_ALT_TAGNUM(e) (e == HTML_OBJECT) @@ -985,36 +922,31 @@ static void handle_sgmlatt ( #define ALT_TAGP(t) ALT_TAGP_OF_TAGNUM(TAGNUM_OF_TAGP(t)) #define NORMAL_TAGP(t) NORMAL_TAGP_OF_TAGNUM(TAGNUM_OF_TAGP(t)) - #ifdef EXTENDED_HTMLDTD -static BOOL element_valid_within ( - HTTag * new_tag, - HTTag * stacked_tag, - BOOL direct) +static BOOL element_valid_within(HTTag * new_tag, HTTag * stacked_tag, BOOL direct) { TagClass usecontains, usecontained; + if (!stacked_tag || !new_tag) return YES; usecontains = (direct ? stacked_tag->contains : stacked_tag->icontains); usecontained = (direct ? new_tag->contained : new_tag->icontained); if (new_tag == stacked_tag) return (BOOL) ((Tgc_same & usecontains) && - (Tgc_same & usecontained)); + (Tgc_same & usecontained)); else return (BOOL) ((new_tag->tagclass & usecontains) && - (stacked_tag->tagclass & usecontained)); + (stacked_tag->tagclass & usecontained)); } typedef enum { - close_NO = 0, + close_NO = 0, close_error = 1, close_valid = 2 } canclose_t; -static canclose_t can_close ( - HTTag * new_tag, - HTTag * stacked_tag) +static canclose_t can_close(HTTag * new_tag, HTTag * stacked_tag) { if (!stacked_tag) return close_NO; @@ -1027,11 +959,11 @@ static canclose_t can_close ( close_error : close_NO); } -static void do_close_stacked ( - HTStream * context) +static void do_close_stacked(HTStream *context) { - HTElement * stacked = context->element_stack; + HTElement *stacked = context->element_stack; HTMLElement e; + if (!stacked) return; /* stack was empty */ if (context->inSELECT && !strcasecomp(stacked->tag->name, "SELECT")) { @@ -1039,24 +971,22 @@ static void do_close_stacked ( } e = NORMAL_TAGNUM(TAGNUM_OF_TAGP(stacked->tag)); #ifdef USE_PRETTYSRC - if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ + if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ #endif - (*context->actions->end_element)( - context->target, - e, - (char **)&context->include); + (*context->actions->end_element) (context->target, + e, + (char **) &context->include); context->element_stack = stacked->next; pool_free(stacked); context->no_lynx_specialcodes = context->element_stack ? (context->element_stack->tag->flags & Tgf_nolyspcl) : NO; } -static int is_on_stack ( - HTStream * context, - HTTag * old_tag) +static int is_on_stack(HTStream *context, HTTag * old_tag) { - HTElement * stacked = context->element_stack; + HTElement *stacked = context->element_stack; int i = 1; + for (; stacked; stacked = stacked->next, i++) { if (stacked->tag == old_tag || stacked->tag == ALT_TAGP(old_tag)) @@ -1067,11 +997,9 @@ static int is_on_stack ( #endif /* EXTENDED_HTMLDTD */ /* End element -** ----------- -*/ -static void end_element ( - HTStream * context, - HTTag * old_tag) + * ----------- + */ +static void end_element(HTStream *context, HTTag * old_tag) { #ifdef EXTENDED_HTMLDTD @@ -1091,9 +1019,11 @@ static void end_element ( canclose_check = can_close(old_tag, context->element_stack->tag); if (canclose_check != close_NO) { CTRACE((tfp, "SGML: End </%s> \t<- %s end </%s>\n", - context->element_stack->tag->name, - canclose_check == close_valid ? "supplied," : "***forced by", - old_tag->name)); + context->element_stack->tag->name, + ((canclose_check == close_valid) + ? "supplied," + : "***forced by"), + old_tag->name)); do_close_stacked(context); extra_action_taken = YES; stackpos = is_on_stack(context, old_tag); @@ -1102,17 +1032,18 @@ static void end_element ( if (stackpos == 0 && old_tag->contents != SGML_EMPTY) { CTRACE((tfp, "SGML: Still open %s, ***no open %s for </%s>\n", - context->element_stack ? - context->element_stack->tag->name : "none", - old_tag->name, - old_tag->name)); + context->element_stack ? + context->element_stack->tag->name : "none", + old_tag->name, + old_tag->name)); return; } if (stackpos > 1) { - CTRACE((tfp, "SGML: Nesting <%s>...<%s> \t<- ***invalid end </%s>\n", - old_tag->name, - context->element_stack->tag->name, - old_tag->name)); + CTRACE((tfp, + "SGML: Nesting <%s>...<%s> \t<- ***invalid end </%s>\n", + old_tag->name, + context->element_stack->tag->name, + old_tag->name)); return; } } @@ -1121,83 +1052,83 @@ static void end_element ( #endif /* EXTENDED_HTMLDTD */ /* - ** If we are in a SELECT block, ignore anything - ** but a SELECT end tag. - FM - */ + * If we are in a SELECT block, ignore anything but a SELECT end tag. - FM + */ if (context->inSELECT) { if (!strcasecomp(old_tag->name, "SELECT")) { /* - ** Turn off the inSELECT flag and fall through. - FM - */ + * Turn off the inSELECT flag and fall through. - FM + */ context->inSELECT = FALSE; } else { /* - ** Ignore the end tag. - FM - */ + * Ignore the end tag. - FM + */ CTRACE((tfp, "SGML: ***Ignoring end tag </%s> in SELECT block.\n", - old_tag->name)); + old_tag->name)); return; } } /* - ** Handle the end tag. - FM - */ + * Handle the end tag. - FM + */ CTRACE((tfp, "SGML: End </%s>\n", old_tag->name)); if (old_tag->contents == SGML_EMPTY) { CTRACE((tfp, "SGML: ***Illegal end tag </%s> found.\n", - old_tag->name)); + old_tag->name)); return; } #ifdef WIND_DOWN_STACK - while (context->element_stack) /* Loop is error path only */ + while (context->element_stack) /* Loop is error path only */ #else - if (context->element_stack) /* Substitute and remove one stack element */ + if (context->element_stack) /* Substitute and remove one stack element */ #endif /* WIND_DOWN_STACK */ { int status = HT_OK; HTMLElement e; - HTElement * N = context->element_stack; - HTTag * t = (N->tag != old_tag) ? NORMAL_TAGP(N->tag) : N->tag; - - if (old_tag != t) { /* Mismatch: syntax error */ - if (context->element_stack->next) { /* This is not the last level */ - CTRACE((tfp, "SGML: Found </%s> when expecting </%s>. </%s> ***assumed.\n", - old_tag->name, t->name, t->name)); - } else { /* last level */ - CTRACE((tfp, "SGML: Found </%s> when expecting </%s>. </%s> ***Ignored.\n", - old_tag->name, t->name, old_tag->name)); - return; /* Ignore */ + HTElement *N = context->element_stack; + HTTag *t = (N->tag != old_tag) ? NORMAL_TAGP(N->tag) : N->tag; + + if (old_tag != t) { /* Mismatch: syntax error */ + if (context->element_stack->next) { /* This is not the last level */ + CTRACE((tfp, + "SGML: Found </%s> when expecting </%s>. </%s> ***assumed.\n", + old_tag->name, t->name, t->name)); + } else { /* last level */ + CTRACE((tfp, + "SGML: Found </%s> when expecting </%s>. </%s> ***Ignored.\n", + old_tag->name, t->name, old_tag->name)); + return; /* Ignore */ } } e = NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)); CTRACE2(TRACE_SGML, (tfp, "tagnum(%p) = %d\n", t, e)); #ifdef USE_PRETTYSRC - if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ + if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ #endif - status = (*context->actions->end_element)(context->target, - e, (char **)&context->include); + status = (*context->actions->end_element) (context->target, + e, (char **) &context->include); if (status == HT_PARSER_REOPEN_ELT) { CTRACE((tfp, "SGML: Restart <%s>\n", t->name)); - (*context->actions->start_element)( - context->target, - e, - NULL, - NULL, - context->current_tag_charset, - (char **)&context->include); + (*context->actions->start_element) (context->target, + e, + NULL, + NULL, + context->current_tag_charset, + (char **) &context->include); } else if (status == HT_PARSER_OTHER_CONTENT) { CTRACE((tfp, "SGML: Continue with other content model for <%s>\n", t->name)); context->element_stack->tag = ALT_TAGP_OF_TAGNUM(e); } else { - context->element_stack = N->next; /* Remove from stack */ + context->element_stack = N->next; /* Remove from stack */ pool_free(N); } context->no_lynx_specialcodes = context->element_stack ? (context->element_stack->tag->flags & Tgf_nolyspcl) : NO; #ifdef WIND_DOWN_STACK if (old_tag == t) - return; /* Correct sequence */ + return; /* Correct sequence */ #else return; #endif /* WIND_DOWN_STACK */ @@ -1206,17 +1137,15 @@ static void end_element ( } CTRACE((tfp, "SGML: Extra end tag </%s> found and ignored.\n", - old_tag->name)); + old_tag->name)); } - /* Start a element */ -static void start_element ( - HTStream * context) +static void start_element(HTStream *context) { int status; - HTTag * new_tag = context->current_tag; + HTTag *new_tag = context->current_tag; HTMLElement e = TAGNUM_OF_TAGP(new_tag); BOOL ok = FALSE; @@ -1237,17 +1166,20 @@ static void start_element ( canclose_check = can_close(new_tag, context->element_stack->tag); if (canclose_check != close_NO) { CTRACE((tfp, "SGML: End </%s> \t<- %s start <%s>\n", - context->element_stack->tag->name, - canclose_check == close_valid ? "supplied," : "***forced by", - new_tag->name)); + context->element_stack->tag->name, + ((canclose_check == close_valid) + ? "supplied," + : "***forced by"), + new_tag->name)); do_close_stacked(context); extra_action_taken = YES; - if (canclose_check == close_error) + if (canclose_check == close_error) direct_container = NO; } else { - CTRACE((tfp, "SGML: Still open %s \t<- ***invalid start <%s>\n", - context->element_stack->tag->name, - new_tag->name)); + CTRACE((tfp, + "SGML: Still open %s \t<- ***invalid start <%s>\n", + context->element_stack->tag->name, + new_tag->name)); } } if (context->element_stack && !valid && @@ -1255,36 +1187,39 @@ static void start_element ( !(valid = element_valid_within(new_tag, context->element_stack->tag, direct_container))) { CTRACE((tfp, "SGML: Still open %s \t<- ***ignoring start <%s>\n", - context->element_stack->tag->name, - new_tag->name)); + context->element_stack->tag->name, + new_tag->name)); return; } - if (context->element_stack && !extra_action_taken && - canclose_check == close_NO && !valid && (new_tag->flags & Tgf_mafse)) { + if (context->element_stack && + !extra_action_taken && + (canclose_check == close_NO) && + !valid && (new_tag->flags & Tgf_mafse)) { BOOL has_attributes = NO; int i = 0; - for (; i< new_tag->number_of_attributes && !has_attributes; i++) + + for (; i < new_tag->number_of_attributes && !has_attributes; i++) has_attributes = context->present[i]; if (!has_attributes) { - CTRACE((tfp, "SGML: Still open %s, ***converting invalid <%s> to </%s>\n", - context->element_stack->tag->name, - new_tag->name, - new_tag->name)); + CTRACE((tfp, + "SGML: Still open %s, ***converting invalid <%s> to </%s>\n", + context->element_stack->tag->name, + new_tag->name, + new_tag->name)); end_element(context, new_tag); return; } } if (context->element_stack && - canclose_check == close_error && !(valid = - element_valid_within( - new_tag, - context->element_stack->tag, - direct_container))) { + (canclose_check == close_error) && + !(valid = element_valid_within(new_tag, + context->element_stack->tag, + direct_container))) { CTRACE((tfp, "SGML: Still open %s \t<- ***invalid start <%s>\n", - context->element_stack->tag->name, - new_tag->name)); + context->element_stack->tag->name, + new_tag->name)); } } /* Fall through to the non-extended code - kw */ @@ -1292,75 +1227,80 @@ static void start_element ( #endif /* EXTENDED_HTMLDTD */ /* - ** If we are not in a SELECT block, check if this is - ** a SELECT start tag. Otherwise (i.e., we are in a - ** SELECT block) accept only OPTION as valid, terminate - ** the SELECT block if it is any other form-related - ** element, and otherwise ignore it. - FM - */ + * If we are not in a SELECT block, check if this is a SELECT start tag. + * Otherwise (i.e., we are in a SELECT block) accept only OPTION as valid, + * terminate the SELECT block if it is any other form-related element, and + * otherwise ignore it. - FM + */ if (!context->inSELECT) { /* - ** We are not in a SELECT block, so check if this starts one. - FM - ** (frequent case!) - */ + * We are not in a SELECT block, so check if this starts one. - FM + * (frequent case!) + */ /* my_casecomp() - optimized by the first character */ if (!my_casecomp(new_tag->name, "SELECT")) { /* - ** Set the inSELECT flag and fall through. - FM - */ + * Set the inSELECT flag and fall through. - FM + */ context->inSELECT = TRUE; } } else { /* - ** We are in a SELECT block. - FM - */ + * We are in a SELECT block. - FM + */ if (strcasecomp(new_tag->name, "OPTION")) { /* - ** Ugh, it is not an OPTION. - FM - */ + * Ugh, it is not an OPTION. - FM + */ switch (e) { - case HTML_INPUT: case HTML_TEXTAREA: case HTML_SELECT: - case HTML_BUTTON: case HTML_FIELDSET: case HTML_LABEL: - case HTML_LEGEND: case HTML_FORM: - ok = TRUE; - break; - default: - break; + case HTML_INPUT: + case HTML_TEXTAREA: + case HTML_SELECT: + case HTML_BUTTON: + case HTML_FIELDSET: + case HTML_LABEL: + case HTML_LEGEND: + case HTML_FORM: + ok = TRUE; + break; + default: + break; } - if (ok) - { + if (ok) { /* - ** It is another form-related start tag, so terminate - ** the current SELECT block and fall through. - FM - */ - CTRACE((tfp, "SGML: ***Faking SELECT end tag before <%s> start tag.\n", - new_tag->name)); + * It is another form-related start tag, so terminate the + * current SELECT block and fall through. - FM + */ + CTRACE((tfp, + "SGML: ***Faking SELECT end tag before <%s> start tag.\n", + new_tag->name)); end_element(context, SGMLFindTag(context->dtd, "SELECT")); } else { /* - ** Ignore the start tag. - FM - */ - CTRACE((tfp, "SGML: ***Ignoring start tag <%s> in SELECT block.\n", - new_tag->name)); + * Ignore the start tag. - FM + */ + CTRACE((tfp, + "SGML: ***Ignoring start tag <%s> in SELECT block.\n", + new_tag->name)); return; } } } /* - ** Handle the start tag. - FM - */ + * Handle the start tag. - FM + */ CTRACE((tfp, "SGML: Start <%s>\n", new_tag->name)); - status = (*context->actions->start_element)( - context->target, - TAGNUM_OF_TAGP(new_tag), - context->present, - (const char**) context->value, /* coerce type for think c */ - context->current_tag_charset, - (char **)&context->include); + status = (*context->actions->start_element) (context->target, + TAGNUM_OF_TAGP(new_tag), + context->present, + (const char **) context->value, /* coerce type for think c */ + context->current_tag_charset, + (char **) &context->include); if (status == HT_PARSER_OTHER_CONTENT) new_tag = ALT_TAGP(new_tag); /* this is only returned for OBJECT */ - if (new_tag->contents != SGML_EMPTY) { /* i.e., tag not empty */ - HTElement * N = pool_alloc(); + if (new_tag->contents != SGML_EMPTY) { /* i.e., tag not empty */ + HTElement *N = pool_alloc(); + if (N == NULL) outofmem(__FILE__, "start_element"); N->next = context->element_stack; @@ -1368,76 +1308,73 @@ static void start_element ( context->element_stack = N; context->no_lynx_specialcodes = (new_tag->flags & Tgf_nolyspcl); - } else if (e == HTML_META ) { + } else if (e == HTML_META) { /* - ** Check for result of META tag. - KW & FM - */ + * Check for result of META tag. - KW & FM + */ change_chartrans_handling(context); } } - /* Find Tag in DTD tag list -** ------------------------ -** -** On entry, -** dtd points to dtd structure including valid tag list -** string points to name of tag in question -** -** On exit, -** returns: -** NULL tag not found -** else address of tag structure in dtd -*/ -HTTag * SGMLFindTag ( - const SGML_dtd* dtd, - const char * s) + * ------------------------ + * + * On entry, + * dtd points to dtd structure including valid tag list + * string points to name of tag in question + * + * On exit, + * returns: + * NULL tag not found + * else address of tag structure in dtd + */ +HTTag *SGMLFindTag(const SGML_dtd * dtd, + const char *s) { int high, low, i, diff; - static HTTag* last[64] = {NULL}; /*optimize using the previous results*/ - HTTag** res = last + (UCH(*s) % 64); /*pointer arithmetic*/ + static HTTag *last[64] = + {NULL}; /*optimize using the previous results */ + HTTag **res = last + (UCH(*s) % 64); /*pointer arithmetic */ if (*res && !strcasecomp((*res)->name, s)) return *res; - for (low = 0, high=dtd->number_of_tags; - high > low; - diff < 0 ? (low = i+1) : (high = i)) { /* Binary search */ - i = (low + (high-low)/2); + for (low = 0, high = dtd->number_of_tags; + high > low; + diff < 0 ? (low = i + 1) : (high = i)) { /* Binary search */ + i = (low + (high - low) / 2); /* my_casecomp() - optimized by the first character, NOT_ASCII ok */ diff = my_casecomp(dtd->tags[i].name, s); /* Case insensitive */ - if (diff == 0) { /* success: found it */ + if (diff == 0) { /* success: found it */ *res = &dtd->tags[i]; return *res; } } if (IsNmStart(*s)) { /* - ** Unrecognized, but may be valid. - KW - */ + * Unrecognized, but may be valid. - KW + */ return &HTTag_unrecognized; } return NULL; } /*________________________________________________________________________ -** Public Methods -*/ - + * Public Methods + */ /* Could check that we are back to bottom of stack! @@ */ /* Do check! - FM */ /* */ -static void SGML_free ( - HTStream * context) +static void SGML_free(HTStream *context) { int i; - HTElement * cur; - HTTag * t; + HTElement *cur; + HTTag *t; /* - ** Free the buffers. - FM - */ + * Free the buffers. - FM + */ FREE(context->recover); FREE(context->url); FREE(context->csi); @@ -1445,30 +1382,30 @@ static void SGML_free ( FREE(context->active_include); /* - ** Wind down stack if any elements are open. - FM - */ + * Wind down stack if any elements are open. - FM + */ while (context->element_stack) { cur = context->element_stack; t = cur->tag; context->element_stack = cur->next; /* Remove from stack */ pool_free(cur); #ifdef USE_PRETTYSRC - if (!psrc_view) /* Don't actually call on target if viewing psrc - kw */ + if (!psrc_view) /* Don't actually call on target if viewing psrc - kw */ #endif - (*context->actions->end_element)(context->target, - NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)), - (char **)&context->include); + (*context->actions->end_element) (context->target, + NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)), + (char **) &context->include); FREE(context->include); } /* - ** Finish off the target. - FM - */ - (*context->actions->_free)(context->target); + * Finish off the target. - FM + */ + (*context->actions->_free) (context->target); /* - ** Free the strings and context structure. - FM - */ + * Free the strings and context structure. - FM + */ HTChunkFree(context->string); for (i = 0; i < MAX_ATTRIBUTES; i++) FREE_extra(context->value[i]); @@ -1479,21 +1416,19 @@ static void SGML_free ( #endif } -static void SGML_abort ( - HTStream * context, - HTError e) +static void SGML_abort(HTStream *context, HTError e) { int i; - HTElement * cur; + HTElement *cur; /* - ** Abort the target. - FM - */ - (*context->actions->_abort)(context->target, e); + * Abort the target. - FM + */ + (*context->actions->_abort) (context->target, e); /* - ** Free the buffers. - FM - */ + * Free the buffers. - FM + */ FREE(context->recover); FREE(context->include); FREE(context->active_include); @@ -1501,8 +1436,8 @@ static void SGML_abort ( FREE(context->csi); /* - ** Free stack memory if any elements were left open. - KW - */ + * Free stack memory if any elements were left open. - KW + */ while (context->element_stack) { cur = context->element_stack; context->element_stack = cur->next; /* Remove from stack */ @@ -1510,8 +1445,8 @@ static void SGML_abort ( } /* - ** Free the strings and context structure. - FM - */ + * Free the strings and context structure. - FM + */ HTChunkFree(context->string); for (i = 0; i < MAX_ATTRIBUTES; i++) FREE_extra(context->value[i]); @@ -1522,68 +1457,62 @@ static void SGML_abort ( #endif } - /* Read and write user callback handle -** ----------------------------------- -** -** The callbacks from the SGML parser have an SGML context parameter. -** These calls allow the caller to associate his own context with a -** particular SGML context. -*/ + * ----------------------------------- + * + * The callbacks from the SGML parser have an SGML context parameter. + * These calls allow the caller to associate his own context with a + * particular SGML context. + */ #ifdef CALLERDATA -void* SGML_callerData ( - HTStream * context) +void *SGML_callerData(HTStream *context) { return context->callerData; } -void SGML_setCallerData ( - HTStream * context, - void* data) +void SGML_setCallerData(HTStream *context, void *data) { context->callerData = data; } #endif /* CALLERDATA */ -static void SGML_character ( - HTStream * context, - char c_in) +static void SGML_character(HTStream *context, char c_in) { - const SGML_dtd *dtd = context->dtd; - HTChunk *string = context->string; - const char * EntityName; - HTTag * testtag = NULL; - BOOLEAN chk; /* Helps (?) walk through all the else ifs... */ - UCode_t clong, uck = 0; /* Enough bits for UCS4 ... */ + const SGML_dtd *dtd = context->dtd; + HTChunk *string = context->string; + const char *EntityName; + HTTag *testtag = NULL; + BOOLEAN chk; /* Helps (?) walk through all the else ifs... */ + UCode_t clong, uck = 0; /* Enough bits for UCS4 ... */ int testlast; + #ifdef CJK_EX unsigned char c; + #else char c; #endif char saved_char_in = '\0'; /* - ** Now some fun with the preprocessor. - ** Use copies for c and unsign_c == clong, so that - ** we can revert back to the unchanged c_in. - KW - */ + * Now some fun with the preprocessor. Use copies for c and unsign_c == + * clong, so that we can revert back to the unchanged c_in. - KW + */ #define unsign_c clong c = c_in; - clong = UCH(c); /* a.k.a. unsign_c */ + clong = UCH(c); /* a.k.a. unsign_c */ if (context->T.decode_utf8) { /* - ** Combine UTF-8 into Unicode. - ** Incomplete characters silently ignored. - ** From Linux kernel's console.c. - KW - */ - if (TOASCII(UCH(c)) > 127) { /* S/390 -- gil -- 0710 */ + * Combine UTF-8 into Unicode. Incomplete characters silently ignored. + * From Linux kernel's console.c. - KW + */ + if (TOASCII(UCH(c)) > 127) { /* S/390 -- gil -- 0710 */ /* - ** We have an octet from a multibyte character. - FM - */ + * We have an octet from a multibyte character. - FM + */ if (context->utf_count > 0 && (TOASCII(c) & 0xc0) == 0x80) { context->utf_char = (context->utf_char << 6) | (TOASCII(c) & 0x3f); context->utf_count--; @@ -1591,26 +1520,25 @@ static void SGML_character ( (context->utf_buf_p)++; if (context->utf_count == 0) { /* - ** We have all of the bytes, so terminate - ** the buffer and set 'clong' to the UCode_t - ** value. - FM - */ + * We have all of the bytes, so terminate the buffer and + * set 'clong' to the UCode_t value. - FM + */ *(context->utf_buf_p) = '\0'; clong = context->utf_char; if (clong < 256) { - c = ((char)(clong & 0xff)); + c = ((char) (clong & 0xff)); } goto top1; } else { /* - ** Wait for more. - KW - */ + * Wait for more. - KW + */ return; } } else { /* - ** Start handling a new multibyte character. - FM - */ + * Start handling a new multibyte character. - FM + */ context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = c; (context->utf_buf_p)++; @@ -1631,70 +1559,68 @@ static void SGML_character ( context->utf_char = (c & 0x01); } else { /* - ** Garbage. - KW - */ + * Garbage. - KW + */ context->utf_count = 0; context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; } /* - ** Wait for more. - KW - */ + * Wait for more. - KW + */ return; } } else { /* - ** Got an ASCII char. - KW - */ + * Got an ASCII char. - KW + */ context->utf_count = 0; context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; - /* goto top; */ + /* goto top; */ } - } /* end of context->T.decode_utf8 S/390 -- gil -- 0726 */ - + } + /* end of context->T.decode_utf8 S/390 -- gil -- 0726 */ #ifdef NOTDEFINED /* - ** If we have a koi8-r input and do not have - ** koi8-r as the output, save the raw input - ** in saved_char_in before we potentially - ** convert it to Unicode. - FM - */ + * If we have a koi8-r input and do not have koi8-r as the output, save the + * raw input in saved_char_in before we potentially convert it to Unicode. + * - FM + */ if (context->T.strip_raw_char_in) saved_char_in = c; #endif /* NOTDEFINED */ /* - ** If we want the raw input converted - ** to Unicode, try that now. - FM - */ + * If we want the raw input converted to Unicode, try that now. - FM + */ if (context->T.trans_to_uni && - ((TOASCII(unsign_c) >= LYlowest_eightbit[context->inUCLYhndl]) || /* S/390 -- gil -- 0744 */ + ((TOASCII(unsign_c) >= LYlowest_eightbit[context->inUCLYhndl]) || /* S/390 -- gil -- 0744 */ (unsign_c < ' ' && unsign_c != 0 && context->T.trans_C0_to_uni))) { /* - ** Convert the octet to Unicode. - FM - */ + * Convert the octet to Unicode. - FM + */ clong = UCTransToUni(c, context->inUCLYhndl); if (clong > 0) { saved_char_in = c; if (clong < 256) { - c = FROMASCII((char)clong); + c = FROMASCII((char) clong); } } goto top1; - } else if (unsign_c < ' ' && unsign_c != 0 && /* S/390 -- gil -- 0768 */ + } else if (unsign_c < ' ' && unsign_c != 0 && /* S/390 -- gil -- 0768 */ context->T.trans_C0_to_uni) { /* - ** This else if may be too ugly to keep. - KW - */ + * This else if may be too ugly to keep. - KW + */ if (context->T.trans_from_uni && (((clong = UCTransToUni(c, context->inUCLYhndl)) >= ' ') || (context->T.transp && (clong = UCTransToUni(c, context->inUCLYhndl)) > 0))) { saved_char_in = c; if (clong < 256) { - c = FROMASCII((char)clong); + c = FROMASCII((char) clong); } goto top1; } else { @@ -1723,80 +1649,78 @@ static void SGML_character ( StrAllocCat(context->recover, replace_buf + 1); } goto top0a; - } /* Next line end of ugly stuff for C0. - KW */ - } else { /* end of context->T.trans_to_uni S/390 -- gil -- 0791 */ + } /* Next line end of ugly stuff for C0. - KW */ + } else { /* end of context->T.trans_to_uni S/390 -- gil -- 0791 */ goto top0a; } /* - ** At this point we have either unsign_c a.k.a. clong in - ** Unicode (and c in latin1 if clong is in the latin1 range), - ** or unsign_c and c will have to be passed raw. - KW - */ + * At this point we have either unsign_c a.k.a. clong in Unicode (and c in + * latin1 if clong is in the latin1 range), or unsign_c and c will have to + * be passed raw. - KW + */ /* -** We jump up to here from below if we have -** stuff in the recover, insert, or csi buffers -** to process. We zero saved_char_in, in effect -** as a flag that the octet in not that of the -** actual call to this function. This may be OK -** for now, for the stuff this function adds to -** its recover buffer, but it might not be for -** stuff other functions added to the insert or -** csi buffer, so bear that in mind. - FM -** Stuff from the recover buffer is now handled -** as UTF-8 if we can expect that's what it is, -** and in that case we don't come back up here. - kw -*/ -top: + * We jump up to here from below if we have + * stuff in the recover, insert, or csi buffers + * to process. We zero saved_char_in, in effect + * as a flag that the octet in not that of the + * actual call to this function. This may be OK + * for now, for the stuff this function adds to + * its recover buffer, but it might not be for + * stuff other functions added to the insert or + * csi buffer, so bear that in mind. - FM + * Stuff from the recover buffer is now handled + * as UTF-8 if we can expect that's what it is, + * and in that case we don't come back up here. - kw + */ + top: saved_char_in = '\0'; /* -** We jump to here from above when we don't have -** UTF-8 input, haven't converted to Unicode, and -** want clong set to the input octet (unsigned) -** without zeroing its saved_char_in copy (which -** is signed). - FM -*/ -top0a: + * We jump to here from above when we don't have + * UTF-8 input, haven't converted to Unicode, and + * want clong set to the input octet (unsigned) + * without zeroing its saved_char_in copy (which + * is signed). - FM + */ + top0a: *(context->utf_buf) = '\0'; clong = UCH(c); /* -** We jump to here from above if we have converted -** the input, or a multibyte sequence across calls, -** to a Unicode value and loaded it into clong (to -** which unsign_c has been defined), and from below -** when we are recycling a character (e.g., because -** it terminated an entity but is not the standard -** semi-colon). The character will already have -** been put through the Unicode conversions. - FM -*/ -top1: + * We jump to here from above if we have converted + * the input, or a multibyte sequence across calls, + * to a Unicode value and loaded it into clong (to + * which unsign_c has been defined), and from below + * when we are recycling a character (e.g., because + * it terminated an entity but is not the standard + * semi-colon). The character will already have + * been put through the Unicode conversions. - FM + */ + top1: /* - ** Ignore low ISO 646 7-bit control characters - ** if HTCJK is not set. - FM - */ + * Ignore low ISO 646 7-bit control characters if HTCJK is not set. - FM + */ /* - ** Works for both ASCII and EBCDIC. -- gil - */ /* S/390 -- gil -- 0811 */ + * Works for both ASCII and EBCDIC. -- gil + *//* S/390 -- gil -- 0811 */ if (TOASCII(unsign_c) < 32 && c != '\t' && c != '\n' && c != '\r' && HTCJK == NOCJK) goto after_switch; /* - ** Ignore 127 if we don't have HTPassHighCtrlRaw - ** or HTCJK set. - FM - */ + * Ignore 127 if we don't have HTPassHighCtrlRaw or HTCJK set. - FM + */ #define PASSHICTRL (context->T.transp || \ unsign_c >= LYlowest_eightbit[context->inUCLYhndl]) - if (TOASCII(c) == 127 && /* S/390 -- gil -- 0830 */ + if (TOASCII(c) == 127 && /* S/390 -- gil -- 0830 */ !(PASSHICTRL || HTCJK != NOCJK)) goto after_switch; /* - ** Ignore 8-bit control characters 128 - 159 if - ** neither HTPassHighCtrlRaw nor HTCJK is set. - FM - */ - if (TOASCII(unsign_c) > 127 && TOASCII(unsign_c) < 160 && /* S/390 -- gil -- 0847 */ + * Ignore 8-bit control characters 128 - 159 if neither HTPassHighCtrlRaw + * nor HTCJK is set. - FM + */ + if (TOASCII(unsign_c) > 127 && TOASCII(unsign_c) < 160 && /* S/390 -- gil -- 0847 */ !(PASSHICTRL || HTCJK != NOCJK)) goto after_switch; @@ -1804,42 +1728,40 @@ top1: * JIS X0201 Kana is single byte. To prevent to fail SGML parsing * we have to care them here. -- TH */ - if ((HTCJK==JAPANESE) && (context->state==S_in_kanji) && + if ((HTCJK == JAPANESE) && (context->state == S_in_kanji) && !IS_JAPANESE_2BYTE(context->kanji_buf, UCH(c))) { #ifdef CONV_JISX0201KANA_JISX0208KANA if (IS_SJIS_X0201KANA(context->kanji_buf)) { unsigned char sjis_hi, sjis_lo; + JISx0201TO0208_SJIS(context->kanji_buf, &sjis_hi, &sjis_lo); PUTC(sjis_hi); PUTC(sjis_lo); - } - else + } else #endif PUTC(context->kanji_buf); context->state = S_text; } /* - ** Handle character based on context->state. - */ + * Handle character based on context->state. + */ CTRACE2(TRACE_SGML, (tfp, "SGML before %s|%.*s|%c|\n", - state_name(context->state), - string->size, - NonNull(string->data), - UCH(c))); - switch(context->state) { + state_name(context->state), + string->size, + NonNull(string->data), + UCH(c))); + switch (context->state) { case S_in_kanji: /* - ** Note that if we don't have a CJK input, then this - ** is not the second byte of a CJK di-byte, and we're - ** trashing the input. That's why 8-bit characters - ** followed by, for example, '<' can cause the tag to - ** be treated as text, not markup. We could try to deal - ** with it by holding each first byte and then checking - ** byte pairs, but that doesn't seem worth the overhead - ** (see below). - FM - */ + * Note that if we don't have a CJK input, then this is not the second + * byte of a CJK di-byte, and we're trashing the input. That's why + * 8-bit characters followed by, for example, '<' can cause the tag to + * be treated as text, not markup. We could try to deal with it by + * holding each first byte and then checking byte pairs, but that + * doesn't seem worth the overhead (see below). - FM + */ context->state = S_text; PUTC(context->kanji_buf); PUTC(c); @@ -1847,36 +1769,34 @@ top1: case S_tagname_slash: /* - * We had something link "<name/" so far, set state to S_text - * but keep context->slashedtag as as a flag; except if we get - * '>' directly after the "<name/", and really have a tag for - * that name in context->slashedtag, in which case keep state as - * is and let code below deal with it. - kw + * We had something link "<name/" so far, set state to S_text but keep + * context->slashedtag as as a flag; except if we get '>' directly + * after the "<name/", and really have a tag for that name in + * context->slashedtag, in which case keep state as is and let code + * below deal with it. - kw */ if (!(c == '>' && context->slashedtag && TOASCII(unsign_c) < 127)) { context->state = S_text; - } /* fall through in any case! */ - + } + /* fall through in any case! */ case S_text: - if (HTCJK != NOCJK && (TOASCII(c) & 0200) != 0) { /* S/390 -- gil -- 0864 */ + if (HTCJK != NOCJK && (TOASCII(c) & 0200) != 0) { /* S/390 -- gil -- 0864 */ /* - ** Setting up for Kanji multibyte handling (based on - ** Takuya ASADA's (asada@three-a.co.jp) CJK Lynx). - ** Note that if the input is not in fact CJK, the - ** next byte also will be mishandled, as explained - ** above. Toggle raw mode off in such cases, or - ** select the "7 bit approximations" display - ** character set, which is largely equivalent - ** to having raw mode off with CJK. - FM - */ + * Setting up for Kanji multibyte handling (based on Takuya ASADA's + * (asada@three-a.co.jp) CJK Lynx). Note that if the input is not + * in fact CJK, the next byte also will be mishandled, as explained + * above. Toggle raw mode off in such cases, or select the "7 bit + * approximations" display character set, which is largely + * equivalent to having raw mode off with CJK. - FM + */ context->state = S_in_kanji; context->kanji_buf = c; break; - } else if (HTCJK != NOCJK && TOASCII(c) == '\033') { /* S/390 -- gil -- 0881 */ + } else if (HTCJK != NOCJK && TOASCII(c) == '\033') { /* S/390 -- gil -- 0881 */ /* - ** Setting up for CJK escape sequence handling (based on - ** Takuya ASADA's (asada@three-a.co.jp) CJK Lynx). - FM - */ + * Setting up for CJK escape sequence handling (based on Takuya + * ASADA's (asada@three-a.co.jp) CJK Lynx). - FM + */ context->state = S_esc; PUTC(c); break; @@ -1884,17 +1804,17 @@ top1: if (c == '&' || c == '<') { #ifdef USE_PRETTYSRC - if (psrc_view) { /*there is nothing useful in the element_stack*/ + if (psrc_view) { /*there is nothing useful in the element_stack */ testtag = context->current_tag; } else #endif { testtag = context->element_stack ? - context->element_stack->tag : NULL; + context->element_stack->tag : NULL; } } - if (c == '&' && TOASCII(unsign_c) < 127 && /* S/390 -- gil -- 0898 */ + if (c == '&' && TOASCII(unsign_c) < 127 && /* S/390 -- gil -- 0898 */ (!testtag || (testtag->contents == SGML_MIXED || testtag->contents == SGML_ELEMENT || @@ -1904,19 +1824,19 @@ top1: #endif testtag->contents == SGML_RCDATA))) { /* - ** Setting up for possible entity, without the leading '&'. - FM - */ + * Setting up for possible entity, without the leading '&'. - FM + */ string->size = 0; context->state = S_ero; - } else if (c == '<' && TOASCII(unsign_c) < 127) { /* S/390 -- gil -- 0915 */ + } else if (c == '<' && TOASCII(unsign_c) < 127) { /* S/390 -- gil -- 0915 */ /* - ** Setting up for possible tag. - FM - */ + * Setting up for possible tag. - FM + */ string->size = 0; if (testtag && testtag->contents == SGML_PCDATA) { context->state = S_pcdata; } else if (testtag && (testtag->contents == SGML_LITTERAL - || testtag->contents == SGML_CDATA)) { + || testtag->contents == SGML_CDATA)) { context->state = S_litteral; } else if (testtag && (testtag->contents == SGML_SCRIPT)) { context->state = S_script; @@ -1929,11 +1849,11 @@ top1: (c == '>' && context->state == S_tagname_slash)) && TOASCII(unsign_c) < 127) { /* - ** We got either the second slash of a pending "<NAME/blah blah/" - ** shortref construct, or the '>' of a mere "<NAME/>". In both - ** cases generate a "</NAME>" end tag in the recover buffer for - ** reparsing unless NAME is really an empty element. - kw - */ + * We got either the second slash of a pending "<NAME/blah blah/" + * shortref construct, or the '>' of a mere "<NAME/>". In both + * cases generate a "</NAME>" end tag in the recover buffer for + * reparsing unless NAME is really an empty element. - kw + */ #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); @@ -1941,8 +1861,8 @@ top1: PSRCSTOP(abracket); } else #endif - if (context->slashedtag != context->unknown_tag && - !ReallyEmptyTag(context->slashedtag)) { + if (context->slashedtag != context->unknown_tag && + !ReallyEmptyTag(context->slashedtag)) { if (context->recover == NULL) { StrAllocCopy(context->recover, "</"); context->recover_index = 0; @@ -1957,12 +1877,12 @@ top1: } else if (context->element_stack && (context->element_stack->tag->flags & Tgf_frecyc)) { /* - * The element stack says we are within the contents of an - * element that the next stage (HTML.c) may want to feed - * us back again (via the *include string). So try to output - * text in UTF-8 if possible, using the same logic as for - * attribute values (which should be in line with what - * context->current_tag_charset indicates). - kw + * The element stack says we are within the contents of an element + * that the next stage (HTML.c) may want to feed us back again (via + * the *include string). So try to output text in UTF-8 if + * possible, using the same logic as for attribute values (which + * should be in line with what context->current_tag_charset + * indicates). - kw */ if (context->T.decode_utf8 && *context->utf_buf) { @@ -1975,9 +1895,9 @@ top1: if (LYIsASCII(clong)) { PUTC(c); } else if (clong == 0xfffd && saved_char_in && - HTPassEightBitRaw && - UCH(saved_char_in) >= - LYlowest_eightbit[context->outUCLYhndl]) { + HTPassEightBitRaw && + UCH(saved_char_in) >= + LYlowest_eightbit[context->outUCLYhndl]) { PUTUTF8((0xf000 | UCH(saved_char_in))); } else { PUTUTF8(clong); @@ -1989,74 +1909,75 @@ top1: } #define PASS8859SPECL context->T.pass_160_173_raw - /* - ** Convert 160 (nbsp) to Lynx special character if - ** neither HTPassHighCtrlRaw nor HTCJK is set. - FM - */ - } else if (unsign_c == CH_NBSP && /* S/390 -- gil -- 0932 */ + /* + * Convert 160 (nbsp) to Lynx special character if neither + * HTPassHighCtrlRaw nor HTCJK is set. - FM + */ + } else if (unsign_c == CH_NBSP && /* S/390 -- gil -- 0932 */ !context->no_lynx_specialcodes && !(PASS8859SPECL || HTCJK != NOCJK)) { PUTC(HT_NON_BREAK_SPACE); - /* - ** Convert 173 (shy) to Lynx special character if - ** neither HTPassHighCtrlRaw nor HTCJK is set. - FM - */ - } else if (unsign_c == CH_SHY && /* S/390 -- gil -- 0949 */ + /* + * Convert 173 (shy) to Lynx special character if neither + * HTPassHighCtrlRaw nor HTCJK is set. - FM + */ + } else if (unsign_c == CH_SHY && /* S/390 -- gil -- 0949 */ !context->no_lynx_specialcodes && !(PASS8859SPECL || HTCJK != NOCJK)) { PUTC(LY_SOFT_HYPHEN); - /* - ** Handle the case in which we think we have a character - ** which doesn't need further processing (e.g., a koi8-r - ** input for a koi8-r output). - FM - */ + /* + * Handle the case in which we think we have a character which + * doesn't need further processing (e.g., a koi8-r input for a + * koi8-r output). - FM + */ } else if (context->T.use_raw_char_in && saved_char_in) { /* - ** Only if the original character is still in saved_char_in, - ** otherwise we may be iterating from a goto top. - KW - */ + * Only if the original character is still in saved_char_in, + * otherwise we may be iterating from a goto top. - KW + */ PUTC(saved_char_in); saved_char_in = '\0'; /****************************************************************** - * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET + * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET ******************************************************************/ - } else if ((chk = (BOOL) (context->T.trans_from_uni && TOASCII(unsign_c) >= 160)) && /* S/390 -- gil -- 0968 */ + } else if ((chk = (BOOL) (context->T.trans_from_uni && + TOASCII(unsign_c) >= 160)) && /* S/390 -- gil -- 0968 */ (uck = UCTransUniChar(unsign_c, context->outUCLYhndl)) >= ' ' && uck < 256) { CTRACE((tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n", - uck, FROMASCII((char)uck))); + uck, FROMASCII((char) uck))); /* - ** We got one octet from the conversions, so use it. - FM - */ - PUTC(FROMASCII((char)uck)); + * We got one octet from the conversions, so use it. - FM + */ + PUTC(FROMASCII((char) uck)); } else if ((chk && - (uck == -4 || - (context->T.repl_translated_C0 && - uck > 0 && uck < 32))) && - /* - ** Not found; look for replacement string. - KW - */ + (uck == -4 || + (context->T.repl_translated_C0 && + uck > 0 && uck < 32))) && + /* + * Not found; look for replacement string. - KW + */ (uck = UCTransUniCharStr(replace_buf, 60, clong, context->outUCLYhndl, 0) >= 0)) { /* - ** Got a replacement string. - ** No further tests for validity - assume that whoever - ** defined replacement strings knew what she was doing. - KW - */ + * Got a replacement string. No further tests for validity - + * assume that whoever defined replacement strings knew what she + * was doing. - KW + */ PUTS(replace_buf); - /* - ** If we're displaying UTF-8, try that now. - FM - */ + /* + * If we're displaying UTF-8, try that now. - FM + */ } else if (context->T.output_utf8 && PUTUTF8(clong)) { - ; /* do nothing more */ - /* - ** If it's any other (> 160) 8-bit character, and - ** we have not set HTPassEightBitRaw nor HTCJK, nor - ** have the "ISO Latin 1" character set selected, - ** back translate for our character set. - FM - */ + ; /* do nothing more */ + /* + * If it's any other (> 160) 8-bit character, and we have not set + * HTPassEightBitRaw nor HTCJK, nor have the "ISO Latin 1" + * character set selected, back translate for our character set. - + * FM + */ #define IncludesLatin1Enc \ (context->outUCLYhndl == LATIN1 || \ (context->outUCI && \ @@ -2073,18 +1994,19 @@ top1: #endif string->size = 0; - EntityName = HTMLGetEntityName((int)(unsign_c - 160)); + EntityName = HTMLGetEntityName((int) (unsign_c - 160)); HTChunkPuts(string, EntityName); HTChunkTerminate(string); #ifdef USE_PRETTYSRC - /* we need to disable it temporary*/ + /* we need to disable it temporary */ if (psrc_view) { - psrc_view_backup =1; psrc_view =0; + psrc_view_backup = 1; + psrc_view = 0; } #endif handle_entity(context, '\0'); #ifdef USE_PRETTYSRC - /* we need to disable it temporary*/ + /* we need to disable it temporary */ if (psrc_view_backup) psrc_view = TRUE; #endif @@ -2092,72 +2014,71 @@ top1: string->size = 0; if (!FoundEntity) PUTC(';'); - /* - ** If we get to here and have an ASCII char, - ** pass the character. - KW - */ - } else if (TOASCII(unsign_c) < 127 && unsign_c > 0) { /* S/390 -- gil -- 0987 */ + /* + * If we get to here and have an ASCII char, pass the character. - + * KW + */ + } else if (TOASCII(unsign_c) < 127 && unsign_c > 0) { /* S/390 -- gil -- 0987 */ PUTC(c); - /* - ** If we get to here, and should have translated, - ** translation has failed so far. - KW - ** - ** We should have sent UTF-8 output to the parser - ** already, but what the heck, try again. - FM - */ + /* + * If we get to here, and should have translated, translation has + * failed so far. - KW + * + * We should have sent UTF-8 output to the parser already, but what + * the heck, try again. - FM + */ } else if (context->T.output_utf8 && *context->utf_buf) { PUTS(context->utf_buf); context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; #ifdef NOTDEFINED - /* - ** Check for a strippable koi8-r 8-bit character. - FM - */ + /* + * Check for a strippable koi8-r 8-bit character. - FM + */ } else if (context->T.strip_raw_char_in && saved_char_in && (UCH(saved_char_in) >= 0xc0) && (UCH(saved_char_in) < 255)) { /* - ** KOI8 special: strip high bit, gives (somewhat) readable - ** ASCII or KOI7 - it was constructed that way! - KW - */ - PUTC(((char)(saved_char_in & 0x7f))); + * KOI8 special: strip high bit, gives (somewhat) readable ASCII + * or KOI7 - it was constructed that way! - KW + */ + PUTC(((char) (saved_char_in & 0x7f))); saved_char_in = '\0'; #endif /* NOTDEFINED */ - /* - ** If we don't actually want the character, - ** make it safe and output that now. - FM - */ - } else if (TOASCII(UCH(c)) < /* S/390 -- gil -- 0997 */ - LYlowest_eightbit[context->outUCLYhndl] || + /* + * If we don't actually want the character, make it safe and output + * that now. - FM + */ + } else if (TOASCII(UCH(c)) < /* S/390 -- gil -- 0997 */ + LYlowest_eightbit[context->outUCLYhndl] || (context->T.trans_from_uni && !HTPassEightBitRaw)) { - /* - ** If we get to here, pass the character. - FM - */ + /* + * If we get to here, pass the character. - FM + */ } else { PUTC(c); } break; - /* - ** Found '<' in SGML_PCDATA content; treat this mode nearly like - ** S_litteral, but recognize '<!' and '<?' to filter out comments - ** and processing instructions. - kw - */ + /* + * Found '<' in SGML_PCDATA content; treat this mode nearly like + * S_litteral, but recognize '<!' and '<?' to filter out comments and + * processing instructions. - kw + */ case S_pcdata: - if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */ - if (c == '!') { /* <! */ + if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */ + if (c == '!') { /* <! */ /* - ** Terminate and set up for possible comment, - ** identifier, declaration, or marked section - ** as under S_tag. - kw - */ + * Terminate and set up for possible comment, identifier, + * declaration, or marked section as under S_tag. - kw + */ context->state = S_exclamation; context->lead_exclamation = TRUE; context->doctype_bracket = FALSE; context->first_bracket = FALSE; HTChunkPutc(string, c); break; - } else if (c == '?') { /* <? - ignore as a PI until '>' - kw */ + } else if (c == '?') { /* <? - ignore as a PI until '>' - kw */ CTRACE((tfp, "SGML: Found PI in PCDATA, junking it until '>'\n")); #ifdef USE_PRETTYSRC @@ -2165,7 +2086,7 @@ top1: PSRCSTART(abracket); PUTS("<?"); PSRCSTOP(abracket); - context->seen_nonwhite_in_junk_tag = TRUE; /* show all */ + context->seen_nonwhite_in_junk_tag = TRUE; /* show all */ } #endif context->state = S_junk_pi; @@ -2174,19 +2095,18 @@ top1: } goto case_S_litteral; - /* - ** Found '<' in SGML_SCRIPT content; treat this mode nearly like - ** S_litteral, but recognize '<!' to allow the content to be treated - ** as a comment by lynx. - */ + /* + * Found '<' in SGML_SCRIPT content; treat this mode nearly like + * S_litteral, but recognize '<!' to allow the content to be treated as + * a comment by lynx. + */ case S_script: - if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */ - if (c == '!') { /* <! */ + if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */ + if (c == '!') { /* <! */ /* - ** Terminate and set up for possible comment, - ** identifier, declaration, or marked section - ** as under S_tag. - kw - */ + * Terminate and set up for possible comment, identifier, + * declaration, or marked section as under S_tag. - kw + */ context->state = S_exclamation; context->lead_exclamation = TRUE; context->doctype_bracket = FALSE; @@ -2197,15 +2117,15 @@ top1: } goto case_S_litteral; - /* - ** In litteral mode, waits only for specific end tag (for - ** compatibility with old servers, and for Lynx). - FM - */ - case_S_litteral: - case S_litteral: /*PSRC:this case not understood completely by HV, not done*/ + /* + * In litteral mode, waits only for specific end tag (for compatibility + * with old servers, and for Lynx). - FM + */ + case_S_litteral: + case S_litteral: /*PSRC:this case not understood completely by HV, not done */ HTChunkPutc(string, c); #ifdef USE_PRETTYSRC - if (psrc_view) { /*there is nothing useful in the element_stack*/ + if (psrc_view) { /*there is nothing useful in the element_stack */ testtag = context->current_tag; } else #endif @@ -2220,20 +2140,20 @@ top1: /* * Normally when we get the closing ">", - * testtag contains something like "TITLE" - * string contains something like "/title>" + * testtag contains something like "TITLE" + * string contains something like "/title>" * so we decrement by 2 to compare the final character of each. */ testlast = string->size - 2 - context->trailing_spaces - context->leading_spaces; if (TOUPPER(c) != ((testlast < 0) - ? '/' - : testtag->name[testlast])) { + ? '/' + : testtag->name[testlast])) { int i; /* - ** If complete match, end litteral. - */ + * If complete match, end litteral. + */ if ((c == '>') && testlast >= 0 && !testtag->name[testlast]) { #ifdef USE_PRETTYSRC @@ -2242,7 +2162,7 @@ top1: PUTS("</"); PSRCSTOP(abracket); PSRCSTART(tag); - strcpy(string->data,context->current_tag->name); + strcpy(string->data, context->current_tag->name); if (tagname_transform != 1) { if (tagname_transform == 0) LYLowerCase(string->data); @@ -2292,67 +2212,67 @@ top1: if (((testtag->contents != SGML_LITTERAL && (testtag->flags & Tgf_strict)) || (context->state == S_pcdata && - (testtag->flags & (Tgf_strict|Tgf_endO)))) && + (testtag->flags & (Tgf_strict | Tgf_endO)))) && (testlast > -1 && (c == '>' || testlast > 0 || IsNmStart(c)))) { context->state = S_end; string->size--; - for (i = 0; i < string->size; i++) /* remove '/' */ - string->data[i] = string->data[i+1]; + for (i = 0; i < string->size; i++) /* remove '/' */ + string->data[i] = string->data[i + 1]; if ((string->size == 1) ? IsNmStart(c) : IsNmChar(c)) break; string->size--; goto top1; } if (context->state == S_pcdata && - (testtag->flags & (Tgf_strict|Tgf_endO)) && + (testtag->flags & (Tgf_strict | Tgf_endO)) && (testlast < 0 && IsNmStart(c))) { context->state = S_tag; break; } /* - ** If Mismatch: recover string literally. - */ + * If Mismatch: recover string literally. + */ PUTC('<'); - for (i = 0; i < string->size-1; i++) /* recover, except last c */ - PUTC(string->data[i]); + for (i = 0; i < string->size - 1; i++) /* recover, except last c */ + PUTC(string->data[i]); string->size = 0; context->state = S_text; goto top1; /* to recover last c */ } break; - /* - ** Character reference (numeric entity) or named entity. - */ + /* + * Character reference (numeric entity) or named entity. + */ case S_ero: if (c == '#') { /* - ** Setting up for possible numeric entity. - */ - context->state = S_cro; /* &# is Char Ref Open */ + * Setting up for possible numeric entity. + */ + context->state = S_cro; /* &# is Char Ref Open */ break; } - context->state = S_entity; /* Fall through! */ + context->state = S_entity; /* Fall through! */ - /* - ** Handle possible named entity. - */ + /* + * Handle possible named entity. + */ case S_entity: if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1029 */ - isalnum(UCH(c)) : isalpha(UCH(c)))) { + isalnum(UCH(c)) : isalpha(UCH(c)))) { /* Should probably use IsNmStart/IsNmChar above (is that right?), but the world is not ready for that - there's  : (note colon!) and stuff around. */ /* - ** Accept valid ASCII character. - FM - */ + * Accept valid ASCII character. - FM + */ HTChunkPutc(string, c); } else if (string->size == 0) { /* - ** It was an ampersand that's just text, so output - ** the ampersand and recycle this character. - FM - */ + * It was an ampersand that's just text, so output the ampersand + * and recycle this character. - FM + */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); @@ -2366,8 +2286,8 @@ top1: goto top1; } else { /* - ** Terminate entity name and try to handle it. - FM - */ + * Terminate entity name and try to handle it. - FM + */ HTChunkTerminate(string); #ifdef USE_PRETTYSRC entity_string = string->data; @@ -2376,14 +2296,15 @@ top1: /* CTRACE((tfp, "%s: %d: %s\n", __FILE__, __LINE__, string->data)); */ if (!strcmp(string->data, "zwnj") && (!context->element_stack || - (context->element_stack->tag && + (context->element_stack->tag && context->element_stack->tag->contents == SGML_MIXED))) { /* - ** Handle zwnj (8204) as <WBR>. - FM - */ + * Handle zwnj (8204) as <WBR>. - FM + */ char temp[8]; - CTRACE((tfp, "SGML_character: Handling 'zwnj' entity as 'WBR' element.\n")); + CTRACE((tfp, + "SGML_character: Handling 'zwnj' entity as 'WBR' element.\n")); if (c != ';') { sprintf(temp, "<WBR>%c", c); @@ -2405,11 +2326,10 @@ top1: string->size = 0; context->state = S_text; /* - ** Don't eat the terminator if we didn't find the - ** entity name and therefore sent the raw string - ** via handle_entity(), or if the terminator is - ** not the "standard" semi-colon for HTML. - FM - */ + * Don't eat the terminator if we didn't find the entity name and + * therefore sent the raw string via handle_entity(), or if the + * terminator is not the "standard" semi-colon for HTML. - FM + */ #ifdef USE_PRETTYSRC if (psrc_view && FoundEntity && c == ';') { PSRCSTART(entity); @@ -2422,25 +2342,25 @@ top1: } break; - /* - ** Check for a numeric entity. - */ + /* + * Check for a numeric entity. + */ case S_cro: - if (TOASCII(unsign_c) < 127 && TOLOWER(UCH(c)) == 'x') { /* S/390 -- gil -- 1060 */ + if (TOASCII(unsign_c) < 127 && TOLOWER(UCH(c)) == 'x') { /* S/390 -- gil -- 1060 */ context->isHex = TRUE; context->state = S_incro; } else if (TOASCII(unsign_c) < 127 && isdigit(UCH(c))) { /* - ** Accept only valid ASCII digits. - FM - */ + * Accept only valid ASCII digits. - FM + */ HTChunkPutc(string, c); /* accumulate a character NUMBER */ context->isHex = FALSE; context->state = S_incro; } else if (string->size == 0) { /* - ** No 'x' or digit following the "&#" so recover - ** them and recycle the character. - FM - */ + * No 'x' or digit following the "&#" so recover them and recycle + * the character. - FM + */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); @@ -2456,24 +2376,24 @@ top1: } break; - /* - ** Handle a numeric entity. - */ + /* + * Handle a numeric entity. + */ case S_incro: - /* S/390 -- gil -- 1075 */ /* CTRACE((tfp, "%s: %d: numeric %d %d\n", - __FILE__, __LINE__, unsign_c, c)); */ +/* S/390 -- gil -- 1075 *//* CTRACE((tfp, "%s: %d: numeric %d %d\n", + __FILE__, __LINE__, unsign_c, c)); */ if ((TOASCII(unsign_c) < 127) && (context->isHex ? isxdigit(UCH(c)) : - isdigit(UCH(c)))) { + isdigit(UCH(c)))) { /* - ** Accept only valid hex or ASCII digits. - FM - */ + * Accept only valid hex or ASCII digits. - FM + */ HTChunkPutc(string, c); /* accumulate a character NUMBER */ } else if (string->size == 0) { /* - ** No hex digit following the "&#x" so recover - ** them and recycle the character. - FM - */ + * No hex digit following the "&#x" so recover them and recycle the + * character. - FM + */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); @@ -2488,179 +2408,178 @@ top1: goto top1; } else { /* - ** Terminate the numeric entity and try to handle it. - FM - */ + * Terminate the numeric entity and try to handle it. - FM + */ UCode_t code; int i; + HTChunkTerminate(string); #ifdef USE_PRETTYSRC entity_string = string->data; #endif if ((context->isHex ? sscanf(string->data, "%lx", &code) : - sscanf(string->data, "%ld", &code)) == 1) { + sscanf(string->data, "%ld", &code)) == 1) { /* =============== work in ASCII below here =============== S/390 -- gil -- 1092 */ if ((code == 1) || (code > 127 && code < 156)) { /* - ** Assume these are Microsoft code points, - ** inflicted on us by FrontPage. - FM - ** - ** MS FrontPage uses syntax like ™ in 128-159 range - ** and doesn't follow Unicode standards for this area. - ** Windows-1252 codepoints are assumed here. - */ + * Assume these are Microsoft code points, inflicted on us + * by FrontPage. - FM + * + * MS FrontPage uses syntax like ™ in 128-159 range + * and doesn't follow Unicode standards for this area. + * Windows-1252 codepoints are assumed here. + */ switch (code) { - case 1: - /* - ** WHITE SMILING FACE - */ - code = 0x263a; - break; - case 128: - /* - ** EURO currency sign - */ - code = 0x20ac; - break; - case 130: - /* - ** SINGLE LOW-9 QUOTATION MARK (sbquo) - */ - code = 0x201a; - break; - case 132: - /* - ** DOUBLE LOW-9 QUOTATION MARK (bdquo) - */ - code = 0x201e; - break; - case 133: - /* - ** HORIZONTAL ELLIPSIS (hellip) - */ - code = 0x2026; - break; - case 134: - /* - ** DAGGER (dagger) - */ - code = 0x2020; - break; - case 135: - /* - ** DOUBLE DAGGER (Dagger) - */ - code = 0x2021; - break; - case 137: - /* - ** PER MILLE SIGN (permil) - */ - code = 0x2030; - break; - case 139: - /* - ** SINGLE LEFT-POINTING ANGLE QUOTATION MARK - ** (lsaquo) - */ - code = 0x2039; - break; - case 145: - /* - ** LEFT SINGLE QUOTATION MARK (lsquo) - */ - code = 0x2018; - break; - case 146: - /* - ** RIGHT SINGLE QUOTATION MARK (rsquo) - */ - code = 0x2019; - break; - case 147: - /* - ** LEFT DOUBLE QUOTATION MARK (ldquo) - */ - code = 0x201c; - break; - case 148: - /* - ** RIGHT DOUBLE QUOTATION MARK (rdquo) - */ - code = 0x201d; - break; - case 149: - /* - ** BULLET (bull) - */ - code = 0x2022; - break; - case 150: - /* - ** EN DASH (ndash) - */ - code = 0x2013; - break; - case 151: - /* - ** EM DASH (mdash) - */ - code = 0x2014; - break; - case 152: - /* - ** SMALL TILDE (tilde) - */ - code = 0x02dc; - break; - case 153: - /* - ** TRADE MARK SIGN (trade) - */ - code = 0x2122; - break; - case 155: - /* - ** SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - ** (rsaquo) - */ - code = 0x203a; - break; - default: - /* - ** Do not attempt a conversion - ** to valid Unicode values. - */ - break; + case 1: + /* + * WHITE SMILING FACE + */ + code = 0x263a; + break; + case 128: + /* + * EURO currency sign + */ + code = 0x20ac; + break; + case 130: + /* + * SINGLE LOW-9 QUOTATION MARK (sbquo) + */ + code = 0x201a; + break; + case 132: + /* + * DOUBLE LOW-9 QUOTATION MARK (bdquo) + */ + code = 0x201e; + break; + case 133: + /* + * HORIZONTAL ELLIPSIS (hellip) + */ + code = 0x2026; + break; + case 134: + /* + * DAGGER (dagger) + */ + code = 0x2020; + break; + case 135: + /* + * DOUBLE DAGGER (Dagger) + */ + code = 0x2021; + break; + case 137: + /* + * PER MILLE SIGN (permil) + */ + code = 0x2030; + break; + case 139: + /* + * SINGLE LEFT-POINTING ANGLE QUOTATION MARK (lsaquo) + */ + code = 0x2039; + break; + case 145: + /* + * LEFT SINGLE QUOTATION MARK (lsquo) + */ + code = 0x2018; + break; + case 146: + /* + * RIGHT SINGLE QUOTATION MARK (rsquo) + */ + code = 0x2019; + break; + case 147: + /* + * LEFT DOUBLE QUOTATION MARK (ldquo) + */ + code = 0x201c; + break; + case 148: + /* + * RIGHT DOUBLE QUOTATION MARK (rdquo) + */ + code = 0x201d; + break; + case 149: + /* + * BULLET (bull) + */ + code = 0x2022; + break; + case 150: + /* + * EN DASH (ndash) + */ + code = 0x2013; + break; + case 151: + /* + * EM DASH (mdash) + */ + code = 0x2014; + break; + case 152: + /* + * SMALL TILDE (tilde) + */ + code = 0x02dc; + break; + case 153: + /* + * TRADE MARK SIGN (trade) + */ + code = 0x2122; + break; + case 155: + /* + * SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (rsaquo) + */ + code = 0x203a; + break; + default: + /* + * Do not attempt a conversion to valid Unicode values. + */ + break; } } /* - ** Check for special values. - FM - */ + * Check for special values. - FM + */ if ((code == 8204) && (!context->element_stack || - (context->element_stack->tag && + (context->element_stack->tag && context->element_stack->tag->contents == SGML_MIXED))) { /* - ** Handle zwnj (8204) as <WBR>. - FM - */ + * Handle zwnj (8204) as <WBR>. - FM + */ char temp[8]; - CTRACE((tfp, "SGML_character: Handling '8204' (zwnj) reference as 'WBR' element.\n")); + CTRACE((tfp, + "SGML_character: Handling '8204' (zwnj) reference as 'WBR' element.\n")); /* - ** Include the terminator if it is not - ** the standard semi-colon. - FM - */ + * Include the terminator if it is not the standard + * semi-colon. - FM + */ if (c != ';') { sprintf(temp, "<WBR>%c", c); } else { sprintf(temp, "<WBR>"); } /* - ** Add the replacement string to the - ** recover buffer for processing. - FM - */ + * Add the replacement string to the recover buffer for + * processing. - FM + */ if (context->recover == NULL) { StrAllocCopy(context->recover, temp); context->recover_index = 0; @@ -2673,13 +2592,13 @@ top1: break; } else if (put_special_unicodes(context, code)) { /* - ** We handled the value as a special character, - ** so recycle the terminator or break. - FM - */ + * We handled the value as a special character, so recycle + * the terminator or break. - FM + */ #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(entity); - PUTS( (context->isHex ? "&#x" : "&#") ); + PUTS((context->isHex ? "&#x" : "&#")); PUTS(entity_string); if (c == ';') PUTC(';'); @@ -2694,8 +2613,8 @@ top1: break; } /* - ** Seek a translation from the chartrans tables. - */ + * Seek a translation from the chartrans tables. + */ if ((uck = UCTransUniChar(code, context->outUCLYhndl)) >= 32 && uck < 256 && @@ -2704,7 +2623,7 @@ top1: #ifdef USE_PRETTYSRC if (!psrc_view) { #endif - PUTC(FROMASCII((char)uck)); + PUTC(FROMASCII((char) uck)); #ifdef USE_PRETTYSRC } else { put_pretty_number(context); @@ -2713,9 +2632,9 @@ top1: } else if ((uck == -4 || (context->T.repl_translated_C0 && uck > 0 && uck < 32)) && - /* - ** Not found; look for replacement string. - */ + /* + * Not found; look for replacement string. + */ (uck = UCTransUniCharStr(replace_buf, 60, code, context->outUCLYhndl, 0) >= 0)) { @@ -2724,16 +2643,16 @@ top1: put_pretty_number(context); } else #endif - PUTS(replace_buf); - /* - ** If we're displaying UTF-8, try that now. - FM - */ + PUTS(replace_buf); + /* + * If we're displaying UTF-8, try that now. - FM + */ } else if (context->T.output_utf8 && PUTUTF8(code)) { - ; /* do nothing more */ - /* - ** Ignore 8205 (zwj), - ** 8206 (lrm), and 8207 (rln), if we get to here. - FM - */ + ; /* do nothing more */ + /* + * Ignore 8205 (zwj), 8206 (lrm), and 8207 (rln), if we get + * to here. - FM + */ } else if (code == 8205 || code == 8206 || code == 8207) { @@ -2750,7 +2669,7 @@ top1: #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(badseq); - PUTS( (context->isHex ? "&#x" : "&#") ); + PUTS((context->isHex ? "&#x" : "&#")); PUTS(entity_string); if (c == ';') PUTC(';'); @@ -2763,71 +2682,69 @@ top1: if (c != ';') goto top1; break; - /* - ** Show the numeric entity if we get to here - ** and the value: - ** (1) Is greater than 255 (but use ASCII characters - ** for spaces or dashes). - ** (2) Is less than 32, and not valid or we don't - ** have HTCJK set. - ** (3) Is 127 and we don't have HTPassHighCtrlRaw or - ** HTCJK set. - ** (4) Is 128 - 159 and we don't have HTPassHighCtrlNum - ** set. - ** - FM - */ + /* + * Show the numeric entity if we get to here and the value: + * (1) Is greater than 255 (but use ASCII characters for + * spaces or dashes). + * (2) Is less than 32, and not valid or we don't have + * HTCJK set. + * (3) Is 127 and we don't have HTPassHighCtrlRaw or HTCJK + * set. + * (4) Is 128 - 159 and we don't have HTPassHighCtrlNum + * set. + * - FM + */ } else if ((code > 255) || - (code < ' ' && /* S/390 -- gil -- 1140 */ + (code < ' ' && /* S/390 -- gil -- 1140 */ code != '\t' && code != '\n' && code != '\r' && HTCJK == NOCJK) || (TOASCII(code) == 127 && !(HTPassHighCtrlRaw || HTCJK != NOCJK)) || (TOASCII(code) > 127 && code < 160 && !HTPassHighCtrlNum)) { - /* - ** Unhandled or illegal value. Recover the - ** "&#" or "&#x" and digit(s), and recycle - ** the terminator. - FM - */ + /* + * Unhandled or illegal value. Recover the "&#" or "&#x" + * and digit(s), and recycle the terminator. - FM + */ #ifdef USE_PRETTYSRC - if (psrc_view) { - PSRCSTART(badseq); - } + if (psrc_view) { + PSRCSTART(badseq); + } #endif - if (context->isHex) { - PUTS("&#x"); - context->isHex = FALSE; - } else { - PUTS("&#"); - } - string->size--; - for (i = 0; i < string->size; i++) /* recover */ - PUTC(string->data[i]); + if (context->isHex) { + PUTS("&#x"); + context->isHex = FALSE; + } else { + PUTS("&#"); + } + string->size--; + for (i = 0; i < string->size; i++) /* recover */ + PUTC(string->data[i]); #ifdef USE_PRETTYSRC - if (psrc_view) { - PSRCSTOP(badseq); - } + if (psrc_view) { + PSRCSTOP(badseq); + } #endif - string->size = 0; - context->isHex = FALSE; - context->state = S_text; - goto top1; - } else if (TOASCII(code) < 161 || /* S/390 -- gil -- 1162 */ + string->size = 0; + context->isHex = FALSE; + context->state = S_text; + goto top1; + } else if (TOASCII(code) < 161 || /* S/390 -- gil -- 1162 */ HTPassEightBitNum || IncludesLatin1Enc) { /* - ** No conversion needed. - FM - */ + * No conversion needed. - FM + */ #ifdef USE_PRETTYSRC if (psrc_view) { put_pretty_number(context); } else #endif - PUTC(FROMASCII((char)code)); + PUTC(FROMASCII((char) code)); } else { /* - ** Handle as named entity. - FM - */ + * Handle as named entity. - FM + */ code -= 160; EntityName = HTMLGetEntityName(code); if (EntityName && EntityName[0] != '\0') { @@ -2836,17 +2753,17 @@ top1: HTChunkTerminate(string); handle_entity(context, '\0'); /* - ** Add a semi-colon if something went wrong - ** and handle_entity() sent the string. - FM - */ + * Add a semi-colon if something went wrong and + * handle_entity() sent the string. - FM + */ if (!FoundEntity) { PUTC(';'); } } else { /* - ** Our conversion failed, so recover the "&#" - ** and digit(s), and recycle the terminator. - FM - */ + * Our conversion failed, so recover the "&#" and + * digit(s), and recycle the terminator. - FM + */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); @@ -2871,25 +2788,24 @@ top1: } } /* - ** If we get to here, we succeeded. Hoorah!!! - FM - */ + * If we get to here, we succeeded. Hoorah!!! - FM + */ string->size = 0; context->isHex = FALSE; context->state = S_text; /* - ** Don't eat the terminator if it's not - ** the "standard" semi-colon for HTML. - FM - */ + * Don't eat the terminator if it's not the "standard" + * semi-colon for HTML. - FM + */ if (c != ';') { goto top1; } } else { /* - ** Not an entity, and don't know why not, so add - ** the terminator to the string, output the "&#" - ** or "&#x", and process the string via the recover - ** element. - FM - */ + * Not an entity, and don't know why not, so add the terminator + * to the string, output the "&#" or "&#x", and process the + * string via the recover element. - FM + */ string->size--; HTChunkPutc(string, c); HTChunkTerminate(string); @@ -2921,21 +2837,21 @@ top1: } break; - /* - ** Tag - */ - case S_tag: /* new tag */ + /* + * Tag + */ + case S_tag: /* new tag */ if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1179 */ - IsNmChar(c) : IsNmStart(c))) { + IsNmChar(c) : IsNmStart(c))) { /* - ** Add valid ASCII character. - FM - */ + * Add valid ASCII character. - FM + */ HTChunkPutc(string, c); - } else if (c == '!' && !string->size) { /* <! */ + } else if (c == '!' && !string->size) { /* <! */ /* - ** Terminate and set up for possible comment, - ** identifier, declaration, or marked section. - FM - */ + * Terminate and set up for possible comment, identifier, + * declaration, or marked section. - FM + */ context->state = S_exclamation; context->lead_exclamation = TRUE; context->doctype_bracket = FALSE; @@ -2943,16 +2859,16 @@ top1: HTChunkPutc(string, c); break; } else if (!string->size && - (TOASCII(unsign_c) <= 160 && /* S/390 -- gil -- 1196 */ + (TOASCII(unsign_c) <= 160 && /* S/390 -- gil -- 1196 */ (c != '/' && c != '?' && c != '_' && c != ':'))) { /* - ** '<' must be followed by an ASCII letter to be a valid - ** start tag. Here it isn't, nor do we have a '/' for an - ** end tag, nor one of some other characters with a - ** special meaning for SGML or which are likely to be legal - ** Name Start characters in XML or some other extension. - ** So recover the '<' and following character as data. - FM & KW - */ + * '<' must be followed by an ASCII letter to be a valid start tag. + * Here it isn't, nor do we have a '/' for an end tag, nor one of + * some other characters with a special meaning for SGML or which + * are likely to be legal Name Start characters in XML or some + * other extension. So recover the '<' and following character as + * data. - FM & KW + */ context->state = S_text; #ifdef USE_PRETTYSRC if (psrc_view) @@ -2964,19 +2880,20 @@ top1: PSRCSTOP(badseq); #endif goto top1; - } else { /* End of tag name */ + } else { /* End of tag name */ /* - ** Try to handle tag. - FM - */ - HTTag * t; + * Try to handle tag. - FM + */ + HTTag *t; + if (c == '/') { if (string->size == 0) { context->state = S_end; break; } - CTRACE((tfp,"SGML: `<%.*s/' found!\n", string->size, string->data)); + CTRACE((tfp, "SGML: `<%.*s/' found!\n", string->size, string->data)); } - HTChunkTerminate(string) ; + HTChunkTerminate(string); t = SGMLFindTag(dtd, string->data); if (t == context->unknown_tag && @@ -2984,32 +2901,32 @@ top1: string->size == 4 && 0 == strcasecomp(string->data, "URL")) || (string->size > 4 && 0 == strncasecomp(string->data, "URL:", 4)))) { /* - ** Treat <URL: as text rather than a junk tag, - ** so we display it and the URL (Lynxism 8-). - FM - */ + * Treat <URL: as text rather than a junk tag, so we display + * it and the URL (Lynxism 8-). - FM + */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); #endif PUTC('<'); - PUTS(string->data); /* recover */ + PUTS(string->data); /* recover */ PUTC(c); #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTOP(badseq); #endif CTRACE((tfp, "SGML: Treating <%s%c as text\n", - string->data, c)); + string->data, c)); string->size = 0; context->state = S_text; break; } if (c == '/' && t) { /* - * Element name was ended by '/'. Remember the tag that - * ended thusly, we'll interpret this as either an indication - * of an empty element (if '>' follows directly) or do - * some SGMLshortref-ish treatment. - kw + * Element name was ended by '/'. Remember the tag that ended + * thusly, we'll interpret this as either an indication of an + * empty element (if '>' follows directly) or do some + * SGMLshortref-ish treatment. - kw */ context->slashedtag = t; } @@ -3021,14 +2938,14 @@ top1: PSRCSTART(abracket); PUTS("<?"); PSRCSTOP(abracket); - context->seen_nonwhite_in_junk_tag = TRUE; /*show all*/ + context->seen_nonwhite_in_junk_tag = TRUE; /*show all */ } #endif context->state = S_junk_pi; break; } CTRACE((tfp, "SGML: *** Invalid element %s\n", - string->data)); + string->data)); #ifdef USE_PRETTYSRC if (psrc_view) { @@ -3043,7 +2960,7 @@ top1: LYUpperCase(string->data); } PUTS(string->data); - if (c == '>' ) { + if (c == '>') { PSRCSTOP(badtag); PSRCSTART(abracket); PUTC('>'); @@ -3057,11 +2974,11 @@ top1: break; } else if (t == context->unknown_tag) { CTRACE((tfp, "SGML: *** Unknown element %s\n", - string->data)); + string->data)); /* - ** Fall through and treat like valid - ** tag for attribute parsing. - KW - */ + * Fall through and treat like valid tag for attribute parsing. + * - KW + */ } context->current_tag = t; @@ -3087,14 +3004,14 @@ top1: else PSRCSTOP(badtag); } - if (!psrc_view) /*don't waste time */ + if (!psrc_view) /*don't waste time */ #endif { - /* - ** Clear out attributes. - */ - memset( (void*)context->present, 0 , sizeof(BOOL)* - context->current_tag->number_of_attributes); + /* + * Clear out attributes. + */ + memset((void *) context->present, 0, sizeof(BOOL) * + context->current_tag->number_of_attributes); } string->size = 0; @@ -3131,8 +3048,8 @@ top1: case S_exclamation: if (context->lead_exclamation && c == '-') { /* - ** Set up for possible comment. - FM - */ + * Set up for possible comment. - FM + */ context->lead_exclamation = FALSE; context->first_dash = TRUE; HTChunkPutc(string, c); @@ -3140,8 +3057,8 @@ top1: } if (context->lead_exclamation && c == '[') { /* - ** Set up for possible marked section. - FM - */ + * Set up for possible marked section. - FM + */ context->lead_exclamation = FALSE; context->first_bracket = TRUE; context->second_bracket = FALSE; @@ -3151,8 +3068,8 @@ top1: } if (context->first_dash && c == '-') { /* - ** Set up to handle comment. - FM - */ + * Set up to handle comment. - FM + */ context->lead_exclamation = FALSE; context->first_dash = FALSE; context->end_comment = FALSE; @@ -3164,8 +3081,8 @@ top1: context->first_dash = FALSE; if (c == '>') { /* - ** Try to handle identifier. - FM - */ + * Try to handle identifier. - FM + */ HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { @@ -3176,7 +3093,7 @@ top1: PSRCSTOP(sgmlspecial); } else #endif - handle_identifier(context); + handle_identifier(context); string->size = 0; context->state = S_text; break; @@ -3185,8 +3102,8 @@ top1: if (string->size == 8 && !strncasecomp(string->data, "!DOCTYPE", 8)) { /* - ** Set up for DOCTYPE declaration. - FM - */ + * Set up for DOCTYPE declaration. - FM + */ HTChunkPutc(string, c); context->doctype_bracket = FALSE; context->state = S_doctype; @@ -3195,8 +3112,8 @@ top1: if (string->size == 7 && !strncasecomp(string->data, "!ENTITY", 7)) { /* - ** Set up for ENTITY declaration. - FM - */ + * Set up for ENTITY declaration. - FM + */ HTChunkPutc(string, c); context->first_dash = FALSE; context->end_comment = TRUE; @@ -3206,8 +3123,8 @@ top1: if (string->size == 8 && !strncasecomp(string->data, "!ELEMENT", 8)) { /* - ** Set up for ELEMENT declaration. - FM - */ + * Set up for ELEMENT declaration. - FM + */ HTChunkPutc(string, c); context->first_dash = FALSE; context->end_comment = TRUE; @@ -3217,8 +3134,8 @@ top1: if (string->size == 8 && !strncasecomp(string->data, "!ATTLIST", 8)) { /* - ** Set up for ATTLIST declaration. - FM - */ + * Set up for ATTLIST declaration. - FM + */ HTChunkPutc(string, c); context->first_dash = FALSE; context->end_comment = TRUE; @@ -3232,8 +3149,8 @@ top1: case S_comment: /* Expecting comment. - FM */ if (historical_comments) { /* - ** Any '>' terminates. - FM - */ + * Any '>' terminates. - FM + */ if (c == '>') { HTChunkTerminate(string); #ifdef USE_PRETTYSRC @@ -3245,7 +3162,7 @@ top1: PSRCSTOP(comm); } else #endif - handle_comment(context); + handle_comment(context); string->size = 0; context->end_comment = FALSE; context->first_dash = FALSE; @@ -3266,16 +3183,16 @@ top1: context->end_comment = TRUE; else if (!minimal_comments) /* - ** Validly treat '--' pairs as successive comments - ** (for minimal, any "--WHITE>" terminates). - FM - */ + * Validly treat '--' pairs as successive comments (for + * minimal, any "--WHITE>" terminates). - FM + */ context->end_comment = FALSE; break; } if (context->end_comment && c == '>') { /* - ** Terminate and handle the comment. - FM - */ + * Terminate and handle the comment. - FM + */ HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { @@ -3297,9 +3214,9 @@ top1: if (context->end_comment && !isspace(UCH(c))) context->end_comment = FALSE; - S_comment_put_c: + S_comment_put_c: if (context->T.decode_utf8 && - *context->utf_buf) { + *context->utf_buf) { HTChunkPuts(string, context->utf_buf); context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; @@ -3345,7 +3262,7 @@ top1: PSRCSTOP(sgmlspecial); } else #endif - handle_doctype(context); + handle_doctype(context); string->size = 0; context->state = S_text; break; @@ -3377,7 +3294,7 @@ top1: PSRCSTOP(sgmlspecial); } else #endif - handle_marked(context); + handle_marked(context); string->size = 0; context->state = S_text; break; @@ -3411,7 +3328,7 @@ top1: PSRCSTOP(sgmlspecial); } else #endif - handle_sgmlent(context); + handle_sgmlent(context); string->size = 0; context->end_comment = FALSE; context->first_dash = FALSE; @@ -3448,7 +3365,7 @@ top1: PSRCSTOP(sgmlspecial); } else #endif - handle_sgmlele(context); + handle_sgmlele(context); string->size = 0; context->end_comment = FALSE; context->first_dash = FALSE; @@ -3485,7 +3402,7 @@ top1: PSRCSTOP(sgmlspecial); } else #endif - handle_sgmlatt(context); + handle_sgmlatt(context); string->size = 0; context->end_comment = FALSE; context->first_dash = FALSE; @@ -3503,8 +3420,8 @@ top1: #ifdef USE_PRETTYSRC if (!psrc_view) #endif - if (context->current_tag->name) - start_element(context); + if (context->current_tag->name) + start_element(context); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); @@ -3516,10 +3433,10 @@ top1: break; } HTChunkPutc(string, c); - context->state = S_attr; /* Get attribute */ + context->state = S_attr; /* Get attribute */ break; - /* accumulating value */ + /* accumulating value */ case S_attr: if (WHITE(c) || (c == '>') || (c == '=')) { /* End of word */ HTChunkTerminate(string); @@ -3527,13 +3444,13 @@ top1: #ifdef USE_PRETTYSRC if (!psrc_view) { #endif - string->size = 0; - if (c == '>') { /* End of tag */ - if (context->current_tag->name) - start_element(context); - context->state = S_text; - break; - } + string->size = 0; + if (c == '>') { /* End of tag */ + if (context->current_tag->name) + start_element(context); + context->state = S_text; + break; + } #ifdef USE_PRETTYSRC } else { PUTC(' '); @@ -3548,7 +3465,8 @@ top1: LYUpperCase(string->data); } PUTS(string->data); - if (c == '=' ) PUTC('='); + if (c == '=') + PUTC('='); if (c == '=' || c == '>') { if (context->current_attribute_number == INVALID) PSRCSTOP(badattr); @@ -3565,7 +3483,7 @@ top1: string->size = 0; } #endif - context->state = (c == '=' ? S_equals: S_attr_gap); + context->state = (c == '=' ? S_equals : S_attr_gap); } else { HTChunkPutc(string, c); } @@ -3606,7 +3524,7 @@ top1: break; } HTChunkPutc(string, c); - context->state = S_attr; /* Get next attribute */ + context->state = S_attr; /* Get next attribute */ break; case S_equals: /* After attr = */ @@ -3651,49 +3569,50 @@ top1: PSRCSTART(attrval); #endif context->state = S_value; - /* no break! fall through to S_value and process current `c` */ + /* no break! fall through to S_value and process current `c` */ case S_value: - if (WHITE(c) || (c == '>')) { /* End of word */ - HTChunkTerminate(string) ; + if (WHITE(c) || (c == '>')) { /* End of word */ + HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { - /*PSRCSTART(attrval);*/ + /*PSRCSTART(attrval); */ if (attr_is_name) { HTStartAnchor(context->target, string->data, NULL); - (*context->actions->end_element)( - context->target, - HTML_A, - (char **)&context->include); + (*context->actions->end_element) (context->target, + HTML_A, + (char **) &context->include); } else if (attr_is_href) { PSRCSTART(href); - HTStartAnchor(context->target,NULL,string->data); + HTStartAnchor(context->target, NULL, string->data); } PUTS_TR(string->data); if (attr_is_href) { - (*context->actions->end_element)( - context->target, - HTML_A, - (char **)&context->include); + (*context->actions->end_element) (context->target, + HTML_A, + (char **) &context->include); PSRCSTOP(href); } PSRCSTOP(attrval); } else #endif -#ifdef CJK_EX /* Quick hack. - JH7AYN */ - { char jis_buf[512]; +#ifdef CJK_EX /* Quick hack. - JH7AYN */ + { + char jis_buf[512]; + if (string->data[0] == '$') { - if (string->data[1] == 'B' || string->data[1] == '@') { - jis_buf[0] = '\033'; - strcpy(jis_buf + 1, string->data); - TO_EUC((const unsigned char *)jis_buf, (unsigned char *)string->data); - } + if (string->data[1] == 'B' || string->data[1] == '@') { + jis_buf[0] = '\033'; + strcpy(jis_buf + 1, string->data); + TO_EUC((const unsigned char *) jis_buf, + (unsigned char *) string->data); + } } } #endif handle_attribute_value(context, string->data); string->size = 0; - if (c == '>') { /* End of tag */ + if (c == '>') { /* End of tag */ #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); @@ -3705,8 +3624,8 @@ top1: start_element(context); context->state = S_text; break; - } - else context->state = S_tag_gap; + } else + context->state = S_tag_gap; } else if (context->T.decode_utf8 && *context->utf_buf) { HTChunkPuts(string, context->utf_buf); @@ -3733,40 +3652,38 @@ top1: case S_squoted: /* Quoted attribute value */ if (c == '\'') { /* End of attribute value */ - HTChunkTerminate(string) ; + HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { - /*PSRCSTART(attrval);*/ + /*PSRCSTART(attrval); */ if (attr_is_name) { - HTStartAnchor(context->target,string->data, NULL); - (*context->actions->end_element)( - context->target, - HTML_A, - (char **)&context->include); + HTStartAnchor(context->target, string->data, NULL); + (*context->actions->end_element) (context->target, + HTML_A, + (char **) &context->include); } else if (attr_is_href) { PSRCSTART(href); - HTStartAnchor(context->target,NULL,string->data); + HTStartAnchor(context->target, NULL, string->data); } PUTS_TR(string->data); if (attr_is_href) { - (*context->actions->end_element)( - context->target, - HTML_A, - (char **)&context->include); + (*context->actions->end_element) (context->target, + HTML_A, + (char **) &context->include); PSRCSTOP(href); } PUTC('\''); PSRCSTOP(attrval); } else #endif - handle_attribute_value(context, string->data); + handle_attribute_value(context, string->data); string->size = 0; context->state = S_tag_gap; - } else if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1213 */ + } else if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1213 */ /* - ** Setting up for possible single quotes in CJK escape - ** sequences. - Takuya ASADA (asada@three-a.co.jp) - */ + * Setting up for possible single quotes in CJK escape sequences. + * - Takuya ASADA (asada@three-a.co.jp) + */ context->state = S_esc_sq; HTChunkPutc(string, c); } else if (context->T.decode_utf8 && @@ -3797,26 +3714,24 @@ top1: if (c == '"' || /* Valid end of attribute value */ (soft_dquotes && /* If emulating old Netscape bug, treat '>' */ c == '>')) { /* as a co-terminator of dquoted and tag */ - HTChunkTerminate(string) ; + HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { - /*PSRCSTART(attrval);*/ + /*PSRCSTART(attrval); */ if (attr_is_name) { - HTStartAnchor(context->target,string->data, NULL); - (*context->actions->end_element)( - context->target, - HTML_A, - (char **)&context->include); + HTStartAnchor(context->target, string->data, NULL); + (*context->actions->end_element) (context->target, + HTML_A, + (char **) &context->include); } else if (attr_is_href) { PSRCSTART(href); - HTStartAnchor(context->target,NULL,string->data); + HTStartAnchor(context->target, NULL, string->data); } PUTS_TR(string->data); if (attr_is_href) { - (*context->actions->end_element)( - context->target, - HTML_A, - (char **)&context->include); + (*context->actions->end_element) (context->target, + HTML_A, + (char **) &context->include); PSRCSTOP(href); } PUTC(c); @@ -3824,16 +3739,16 @@ top1: } else #endif - handle_attribute_value(context, string->data); + handle_attribute_value(context, string->data); string->size = 0; context->state = S_tag_gap; if (c == '>') /* We emulated the Netscape bug, so we go */ goto top1; /* back and treat it as the tag terminator */ - } else if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1230 */ + } else if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1230 */ /* - ** Setting up for possible double quotes in CJK escape - ** sequences. - Takuya ASADA (asada@three-a.co.jp) - */ + * Setting up for possible double quotes in CJK escape sequences. + * - Takuya ASADA (asada@three-a.co.jp) + */ context->state = S_esc_dq; HTChunkPutc(string, c); } else if (context->T.decode_utf8 && @@ -3860,12 +3775,13 @@ top1: } break; - case S_end: /* </ */ + case S_end: /* </ */ if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1247 */ - IsNmChar(c) : IsNmStart(c))) { + IsNmChar(c) : IsNmStart(c))) { HTChunkPutc(string, c); - } else { /* End of end tag name */ - HTTag * t = 0; + } else { /* End of end tag name */ + HTTag *t = 0; + #ifdef USE_PRETTYSRC BOOL psrc_tagname_processed = FALSE; #endif @@ -3900,15 +3816,16 @@ top1: PUTC('>'); PSRCSTOP(abracket); } - psrc_tagname_processed=TRUE; + psrc_tagname_processed = TRUE; } } else if (psrc_view) { #endif } else { BOOL tag_OK = (BOOL) (c == '>' || WHITE(c)); + #if OPT HTMLElement e = TAGNUM_OF_TAGP(t); - int branch = 2; /* it can be 0,1,2*/ + int branch = 2; /* it can be 0,1,2 */ #endif context->current_tag = t; if (HAS_ALT_TAGNUM(TAGNUM_OF_TAGP(t)) && @@ -3918,19 +3835,36 @@ top1: #if OPT if (tag_OK #ifdef EXTENDED_HTMLDTD - && Old_DTD + && Old_DTD #endif - ) { - switch (e) { - case HTML_DD: case HTML_DT: case HTML_LI: case HTML_LH : - case HTML_TD: case HTML_TH: case HTML_TR: case HTML_THEAD: - case HTML_TFOOT : case HTML_TBODY : case HTML_COLGROUP: + ) { + switch (e) { + case HTML_DD: + case HTML_DT: + case HTML_LI: + case HTML_LH: + case HTML_TD: + case HTML_TH: + case HTML_TR: + case HTML_THEAD: + case HTML_TFOOT: + case HTML_TBODY: + case HTML_COLGROUP: branch = 0; break; - case HTML_A: case HTML_B: case HTML_BLINK: case HTML_CITE: - case HTML_EM: case HTML_FONT: case HTML_FORM: case HTML_I: - case HTML_P: case HTML_STRONG: case HTML_TT: case HTML_U: + case HTML_A: + case HTML_B: + case HTML_BLINK: + case HTML_CITE: + case HTML_EM: + case HTML_FONT: + case HTML_FORM: + case HTML_I: + case HTML_P: + case HTML_STRONG: + case HTML_TT: + case HTML_U: branch = 1; break; default: @@ -3941,36 +3875,36 @@ top1: #ifdef EXTENDED_HTMLDTD /* - ** Just handle ALL end tags normally :-) - kw - */ + * Just handle ALL end tags normally :-) - kw + */ if (!Old_DTD) { - end_element( context, context->current_tag); + end_element(context, context->current_tag); } else #endif /* EXTENDED_HTMLDTD */ - if (tag_OK && + if (tag_OK && #if OPT - (branch == 0) + (branch == 0) #else - (!strcasecomp(string->data, "DD") || - !strcasecomp(string->data, "DT") || - !strcasecomp(string->data, "LI") || - !strcasecomp(string->data, "LH") || - !strcasecomp(string->data, "TD") || - !strcasecomp(string->data, "TH") || - !strcasecomp(string->data, "TR") || - !strcasecomp(string->data, "THEAD") || - !strcasecomp(string->data, "TFOOT") || - !strcasecomp(string->data, "TBODY") || - !strcasecomp(string->data, "COLGROUP")) + (!strcasecomp(string->data, "DD") || + !strcasecomp(string->data, "DT") || + !strcasecomp(string->data, "LI") || + !strcasecomp(string->data, "LH") || + !strcasecomp(string->data, "TD") || + !strcasecomp(string->data, "TH") || + !strcasecomp(string->data, "TR") || + !strcasecomp(string->data, "THEAD") || + !strcasecomp(string->data, "TFOOT") || + !strcasecomp(string->data, "TBODY") || + !strcasecomp(string->data, "COLGROUP")) #endif - ) { + ) { /* - ** Don't treat these end tags as invalid, - ** nor act on them. - FM - */ + * Don't treat these end tags as invalid, nor act on them. + * - FM + */ CTRACE((tfp, "SGML: `</%s%c' found! Ignoring it.\n", - string->data, c)); + string->data, c)); string->size = 0; context->current_attribute_number = INVALID; if (c != '>') { @@ -3997,52 +3931,56 @@ top1: !strcasecomp(string->data, "TT") || !strcasecomp(string->data, "U")) #endif - ) { + ) { /* - ** Handle end tags for container elements declared - ** as SGML_EMPTY to prevent "expected tag substitution" - ** but still processed via HTML_end_element() in HTML.c - ** with checks there to avoid throwing the HTML.c stack - ** out of whack (Ugh, what a hack! 8-). - FM - */ + * Handle end tags for container elements declared as + * SGML_EMPTY to prevent "expected tag substitution" but + * still processed via HTML_end_element() in HTML.c with + * checks there to avoid throwing the HTML.c stack out of + * whack (Ugh, what a hack! 8-). - FM + */ if (context->inSELECT) { /* - ** We are in a SELECT block. - FM - */ + * We are in a SELECT block. - FM + */ if (strcasecomp(string->data, "FORM")) { /* - ** It is not at FORM end tag, so ignore it. - FM - */ - CTRACE((tfp, "SGML: ***Ignoring end tag </%s> in SELECT block.\n", - string->data)); + * It is not at FORM end tag, so ignore it. - FM + */ + CTRACE((tfp, + "SGML: ***Ignoring end tag </%s> in SELECT block.\n", + string->data)); } else { /* - ** End the SELECT block and then - ** handle the FORM end tag. - FM - */ - CTRACE((tfp, "SGML: ***Faking SELECT end tag before </%s> end tag.\n", - string->data)); + * End the SELECT block and then handle the FORM + * end tag. - FM + */ + CTRACE((tfp, + "SGML: ***Faking SELECT end tag before </%s> end tag.\n", + string->data)); end_element(context, SGMLFindTag(context->dtd, "SELECT")); CTRACE((tfp, "SGML: End </%s>\n", string->data)); #ifdef USE_PRETTYSRC - if (!psrc_view) /* Don't actually call if viewing psrc - kw */ + if (!psrc_view) /* Don't actually call if viewing psrc - kw */ #endif - (*context->actions->end_element) - (context->target, - TAGNUM_OF_TAGP(context->current_tag), - (char **)&context->include); + (*context->actions->end_element) + (context->target, + TAGNUM_OF_TAGP(context->current_tag), + (char **) &context->include); } } else if (!strcasecomp(string->data, "P")) { /* - ** Treat a P end tag like a P start tag (Ugh, - ** what a hack! 8-). - FM - */ - CTRACE((tfp, "SGML: `</%s%c' found! Treating as '<%s%c'.\n", - string->data, c, string->data, c)); + * Treat a P end tag like a P start tag (Ugh, what a + * hack! 8-). - FM + */ + CTRACE((tfp, + "SGML: `</%s%c' found! Treating as '<%s%c'.\n", + string->data, c, string->data, c)); { int i; + for (i = 0; i < context->current_tag->number_of_attributes; i++) { @@ -4055,12 +3993,12 @@ top1: CTRACE((tfp, "SGML: End </%s>\n", string->data)); #ifdef USE_PRETTYSRC - if (!psrc_view) /* Don't actually call if viewing psrc - kw */ + if (!psrc_view) /* Don't actually call if viewing psrc - kw */ #endif - (*context->actions->end_element) - (context->target, - TAGNUM_OF_TAGP(context->current_tag), - (char **)&context->include); + (*context->actions->end_element) + (context->target, + TAGNUM_OF_TAGP(context->current_tag), + (char **) &context->include); } string->size = 0; context->current_attribute_number = INVALID; @@ -4073,9 +4011,9 @@ top1: break; } else { /* - ** Handle all other end tags normally. - FM - */ - end_element( context, context->current_tag); + * Handle all other end tags normally. - FM + */ + end_element(context, context->current_tag); } } @@ -4093,7 +4031,7 @@ top1: } PUTS(string->data); PSRCSTOP(tag); - if ( c != '>' ) { + if (c != '>') { PSRCSTART(badtag); PUTC(c); } else { @@ -4108,7 +4046,7 @@ top1: context->current_attribute_number = INVALID; if (c != '>') { if (!WHITE(c)) - CTRACE((tfp,"SGML: `</%s%c' found!\n", string->data, c)); + CTRACE((tfp, "SGML: `</%s%c' found!\n", string->data, c)); context->state = S_junk_tag; } else { context->current_tag = NULL; @@ -4117,7 +4055,6 @@ top1: } break; - case S_esc: /* Expecting '$'or '(' following CJK ESC. */ if (c == '$') { context->state = S_dollar; @@ -4129,7 +4066,7 @@ top1: PUTC(c); break; - case S_dollar: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */ + case S_dollar: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */ if (c == '@' || c == 'B' || c == 'A') { context->state = S_nonascii_text; } else if (c == '(') { @@ -4138,7 +4075,7 @@ top1: PUTC(c); break; - case S_dollar_paren: /* Expecting 'C' after CJK "ESC$(". */ + case S_dollar_paren: /* Expecting 'C' after CJK "ESC$(". */ if (c == 'C') { context->state = S_nonascii_text; } else { @@ -4147,7 +4084,7 @@ top1: PUTC(c); break; - case S_paren: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */ + case S_paren: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */ if (c == 'B' || c == 'J' || c == 'T') { context->state = S_text; } else if (c == 'I') { @@ -4158,8 +4095,8 @@ top1: PUTC(c); break; - case S_nonascii_text: /* Expecting CJK ESC after non-ASCII text. */ - if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1264 */ + case S_nonascii_text: /* Expecting CJK ESC after non-ASCII text. */ + if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1264 */ context->state = S_esc; } PUTC(c); @@ -4167,7 +4104,7 @@ top1: context->state = S_text; break; - case S_esc_sq: /* Expecting '$'or '(' following CJK ESC. */ + case S_esc_sq: /* Expecting '$'or '(' following CJK ESC. */ if (c == '$') { context->state = S_dollar_sq; } else if (c == '(') { @@ -4178,7 +4115,7 @@ top1: HTChunkPutc(string, c); break; - case S_dollar_sq: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */ + case S_dollar_sq: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */ if (c == '@' || c == 'B' || c == 'A') { context->state = S_nonascii_text_sq; } else if (c == '(') { @@ -4187,7 +4124,7 @@ top1: HTChunkPutc(string, c); break; - case S_dollar_paren_sq: /* Expecting 'C' after CJK "ESC$(". */ + case S_dollar_paren_sq: /* Expecting 'C' after CJK "ESC$(". */ if (c == 'C') { context->state = S_nonascii_text_sq; } else { @@ -4196,7 +4133,7 @@ top1: HTChunkPutc(string, c); break; - case S_paren_sq: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */ + case S_paren_sq: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */ if (c == 'B' || c == 'J' || c == 'T') { context->state = S_squoted; } else if (c == 'I') { @@ -4207,8 +4144,8 @@ top1: HTChunkPutc(string, c); break; - case S_nonascii_text_sq: /* Expecting CJK ESC after non-ASCII text. */ - if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1281 */ + case S_nonascii_text_sq: /* Expecting CJK ESC after non-ASCII text. */ + if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1281 */ context->state = S_esc_sq; } HTChunkPutc(string, c); @@ -4225,7 +4162,7 @@ top1: HTChunkPutc(string, c); break; - case S_dollar_dq: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */ + case S_dollar_dq: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */ if (c == '@' || c == 'B' || c == 'A') { context->state = S_nonascii_text_dq; } else if (c == '(') { @@ -4234,7 +4171,7 @@ top1: HTChunkPutc(string, c); break; - case S_dollar_paren_dq: /* Expecting 'C' after CJK "ESC$(". */ + case S_dollar_paren_dq: /* Expecting 'C' after CJK "ESC$(". */ if (c == 'C') { context->state = S_nonascii_text_dq; } else { @@ -4243,7 +4180,7 @@ top1: HTChunkPutc(string, c); break; - case S_paren_dq: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */ + case S_paren_dq: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */ if (c == 'B' || c == 'J' || c == 'T') { context->state = S_dquoted; } else if (c == 'I') { @@ -4254,8 +4191,8 @@ top1: HTChunkPutc(string, c); break; - case S_nonascii_text_dq: /* Expecting CJK ESC after non-ASCII text. */ - if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1298 */ + case S_nonascii_text_dq: /* Expecting CJK ESC after non-ASCII text. */ + if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1298 */ context->state = S_esc_dq; } HTChunkPutc(string, c); @@ -4280,7 +4217,7 @@ top1: } #ifdef USE_PRETTYSRC else if (psrc_view) { - /*pack spaces until first non-space is seen*/ + /*pack spaces until first non-space is seen */ if (!context->seen_nonwhite_in_junk_tag) { if (!WHITE(c)) { context->seen_nonwhite_in_junk_tag = TRUE; @@ -4291,19 +4228,19 @@ top1: } #endif - } /* switch on context->state */ + } /* switch on context->state */ CTRACE2(TRACE_SGML, (tfp, "SGML after %s|%.*s|%c|\n", - state_name(context->state), - string->size, - NonNull(string->data), - UCH(c))); + state_name(context->state), + string->size, + NonNull(string->data), + UCH(c))); -after_switch: + after_switch: /* - ** Check whether an external function has added - ** anything to the include buffer. If so, move the - ** new stuff to the beginning of active_include. - kw - */ + * Check whether an external function has added anything to the include + * buffer. If so, move the new stuff to the beginning of active_include. + * - kw + */ if (context->include != NULL) { if (context->include[0] == '\0') { FREE(context->include); @@ -4320,8 +4257,8 @@ after_switch: } /* - ** Check whether we've added anything to the recover buffer. - FM - */ + * Check whether we've added anything to the recover buffer. - FM + */ if (context->recover != NULL) { if (context->recover[context->recover_index] == '\0') { FREE(context->recover); @@ -4334,10 +4271,9 @@ after_switch: } /* - ** Check whether an external function had added - ** anything to the include buffer; it should now be - ** in active_include. - FM / kw - */ + * Check whether an external function had added anything to the include + * buffer; it should now be in active_include. - FM / kw + */ if (context->active_include != NULL) { if (context->active_include[context->include_index] == '\0') { FREE(context->active_include); @@ -4346,25 +4282,25 @@ after_switch: if (context->current_tag_charset == UTF8_handle || context->T.trans_from_uni) { /* - * If it looks like we would have fed UTF-8 to the - * next processing stage, assume that whatever we were - * fed back is in UTF-8 form, too. This won't be always - * true for all uses of the include buffer, but it's a - * start. - kw + * If it looks like we would have fed UTF-8 to the next + * processing stage, assume that whatever we were fed back is + * in UTF-8 form, too. This won't be always true for all uses + * of the include buffer, but it's a start. - kw */ char *puni = context->active_include + context->include_index; + c = *puni; clong = UCGetUniFromUtf8String(&puni); if (clong < 256 && clong >= 0) { - c = ((char)(clong & 0xff)); + c = ((char) (clong & 0xff)); } saved_char_in = '\0'; context->include_index = puni - context->active_include + 1; goto top1; } else { /* - * Otherwise assume no UTF-8 - do charset-naive processing - * and hope for the best. - kw + * Otherwise assume no UTF-8 - do charset-naive processing and + * hope for the best. - kw */ c = context->active_include[context->include_index]; context->include_index++; @@ -4374,9 +4310,9 @@ after_switch: } /* - ** Check whether an external function has added - ** anything to the csi buffer. - FM - */ + * Check whether an external function has added anything to the csi buffer. + * - FM + */ if (context->csi != NULL) { if (context->csi[context->csi_index] == '\0') { FREE(context->csi); @@ -4387,26 +4323,22 @@ after_switch: goto top; } } -} /* SGML_character */ - +} /* SGML_character */ -static void SGML_string ( - HTStream * context, - const char* str) +static void SGML_string(HTStream *context, const char *str) { const char *p; + for (p = str; *p; p++) SGML_character(context, *p); } - -static void SGML_write ( - HTStream * context, - const char* str, - int l) +static void SGML_write(HTStream *context, const char *str, + int l) { const char *p; - const char *e = str+l; + const char *e = str + l; + for (p = str; p < e; p++) SGML_character(context, *p); } @@ -4415,34 +4347,34 @@ static void SGML_write ( */ /* Structured Object Class -** ----------------------- -*/ + * ----------------------- + */ const HTStreamClass SGMLParser = { - "SGMLParser", - SGML_free, - SGML_abort, - SGML_character, - SGML_string, - SGML_write, + "SGMLParser", + SGML_free, + SGML_abort, + SGML_character, + SGML_string, + SGML_write, }; /* Create SGML Engine -** ------------------ -** -** On entry, -** dtd represents the DTD, along with -** actions is the sink for the data as a set of routines. -** -*/ + * ------------------ + * + * On entry, + * dtd represents the DTD, along with + * actions is the sink for the data as a set of routines. + * + */ -HTStream* SGML_new ( - const SGML_dtd * dtd, - HTParentAnchor * anchor, - HTStructured * target) +HTStream *SGML_new(const SGML_dtd * dtd, + HTParentAnchor *anchor, + HTStructured * target) { int i; - HTStream* context = (HTStream *) malloc(sizeof(*context)); + HTStream *context = (HTStream *) malloc(sizeof(*context)); + if (!context) outofmem(__FILE__, "SGML_begin"); @@ -4452,17 +4384,17 @@ HTStream* SGML_new ( context->trailing_spaces = 0; context->dtd = dtd; context->target = target; - context->actions = (const HTStructuredClass*)(((HTStream*)target)->isa); - /* Ugh: no OO */ + context->actions = (const HTStructuredClass *) (((HTStream *) target)->isa); + /* Ugh: no OO */ context->unknown_tag = &HTTag_unrecognized; context->current_tag = context->slashedtag = NULL; context->state = S_text; context->kanji_buf = '\0'; - context->element_stack = 0; /* empty */ + context->element_stack = 0; /* empty */ context->inSELECT = FALSE; - context->no_lynx_specialcodes = NO; /* special codes normally generated */ + context->no_lynx_specialcodes = NO; /* special codes normally generated */ #ifdef CALLERDATA - context->callerData = (void*) callerData; + context->callerData = (void *) callerData; #endif /* CALLERDATA */ for (i = 0; i < MAX_ATTRIBUTES; i++) context->value[i] = 0; @@ -4475,7 +4407,7 @@ HTStream* SGML_new ( context->second_bracket = FALSE; context->isHex = FALSE; - context->node_anchor = anchor; /* Could be NULL? */ + context->node_anchor = anchor; /* Could be NULL? */ context->utf_count = 0; context->utf_char = 0; context->utf_buf[0] = context->utf_buf[6] = '\0'; @@ -4491,15 +4423,15 @@ HTStream* SGML_new ( context->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER); } -#ifdef CAN_SWITCH_DISPLAY_CHARSET /* Allow a switch to a more suitable display charset */ +#ifdef CAN_SWITCH_DISPLAY_CHARSET /* Allow a switch to a more suitable display charset */ else if (anchor->UCStages && anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl >= 0 - && anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl != current_char_set ) { + && anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl != current_char_set) { int o = anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl; - anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl = -1; /* Force reset */ + anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl = -1; /* Force reset */ HTAnchor_resetUCInfoStage(anchor, o, UCT_STAGE_PARSER, - /* Preserve change this: */ + /* Preserve change this: */ anchor->UCStages->s[UCT_STAGE_PARSER].lock); } #endif @@ -4534,11 +4466,11 @@ HTStream* SGML_new ( } /* Asian character conversion functions -** ==================================== -** -** Added 24-Mar-96 by FM, based on: -** -//////////////////////////////////////////////////////////////////////// + * ==================================== + * + * Added 24-Mar-96 by FM, based on: + * + //////////////////////////////////////////////////////////////////////// Copyright (c) 1993 Electrotechnical Laboratory (ETL) Permission to use, copy, modify, and distribute this material @@ -4563,76 +4495,76 @@ History: int TREAT_SJIS = 1; -void JISx0201TO0208_EUC ( - register unsigned char IHI, - register unsigned char ILO, - register unsigned char * OHI, - register unsigned char * OLO) +void JISx0201TO0208_EUC(register unsigned char IHI, + register unsigned char ILO, + register unsigned char *OHI, + register unsigned char *OLO) { - static char *table[] = { - "\241\243", /* A1,A3 */ - "\241\326", /* A1,D6 */ - "\241\327", /* A1,D7 */ - "\241\242", /* A1,A2 */ - "\241\246", /* A1,A6 */ - "\245\362", /* A5,F2 */ - "\245\241", /* A5,A1 */ - "\245\243", /* A5,A3 */ - "\245\245", /* A5,A5 */ - "\245\247", /* A5,A7 */ - "\245\251", /* A5,A9 */ - "\245\343", /* A5,E3 */ - "\245\345", /* A5,E5 */ - "\245\347", /* A5,E7 */ - "\245\303", /* A5,C3 */ - "\241\274", /* A1,BC */ - "\245\242", /* A5,A2 */ - "\245\244", /* A5,A4 */ - "\245\246", /* A5,A6 */ - "\245\250", /* A5,A8 */ - "\245\252", /* A5,AA */ - "\245\253", /* A5,AB */ - "\245\255", /* A5,AD */ - "\245\257", /* A5,AF */ - "\245\261", /* A5,B1 */ - "\245\263", /* A5,B3 */ - "\245\265", /* A5,B5 */ - "\245\267", /* A5,B7 */ - "\245\271", /* A5,B9 */ - "\245\273", /* A5,BB */ - "\245\275", /* A5,BD */ - "\245\277", /* A5,BF */ - "\245\301", /* A5,C1 */ - "\245\304", /* A5,C4 */ - "\245\306", /* A5,C6 */ - "\245\310", /* A5,C8 */ - "\245\312", /* A5,CA */ - "\245\313", /* A5,CB */ - "\245\314", /* A5,CC */ - "\245\315", /* A5,CD */ - "\245\316", /* A5,CE */ - "\245\317", /* A5,CF */ - "\245\322", /* A5,D2 */ - "\245\325", /* A5,D5 */ - "\245\330", /* A5,D8 */ - "\245\333", /* A5,DB */ - "\245\336", /* A5,DE */ - "\245\337", /* A5,DF */ - "\245\340", /* A5,E0 */ - "\245\341", /* A5,E1 */ - "\245\342", /* A5,E2 */ - "\245\344", /* A5,E4 */ - "\245\346", /* A5,E6 */ - "\245\350", /* A5,E8 */ - "\245\351", /* A5,E9 */ - "\245\352", /* A5,EA */ - "\245\353", /* A5,EB */ - "\245\354", /* A5,EC */ - "\245\355", /* A5,ED */ - "\245\357", /* A5,EF */ - "\245\363", /* A5,F3 */ - "\241\253", /* A1,AB */ - "\241\254" /* A1,AC */ + static char *table[] = + { + "\241\243", /* A1,A3 */ + "\241\326", /* A1,D6 */ + "\241\327", /* A1,D7 */ + "\241\242", /* A1,A2 */ + "\241\246", /* A1,A6 */ + "\245\362", /* A5,F2 */ + "\245\241", /* A5,A1 */ + "\245\243", /* A5,A3 */ + "\245\245", /* A5,A5 */ + "\245\247", /* A5,A7 */ + "\245\251", /* A5,A9 */ + "\245\343", /* A5,E3 */ + "\245\345", /* A5,E5 */ + "\245\347", /* A5,E7 */ + "\245\303", /* A5,C3 */ + "\241\274", /* A1,BC */ + "\245\242", /* A5,A2 */ + "\245\244", /* A5,A4 */ + "\245\246", /* A5,A6 */ + "\245\250", /* A5,A8 */ + "\245\252", /* A5,AA */ + "\245\253", /* A5,AB */ + "\245\255", /* A5,AD */ + "\245\257", /* A5,AF */ + "\245\261", /* A5,B1 */ + "\245\263", /* A5,B3 */ + "\245\265", /* A5,B5 */ + "\245\267", /* A5,B7 */ + "\245\271", /* A5,B9 */ + "\245\273", /* A5,BB */ + "\245\275", /* A5,BD */ + "\245\277", /* A5,BF */ + "\245\301", /* A5,C1 */ + "\245\304", /* A5,C4 */ + "\245\306", /* A5,C6 */ + "\245\310", /* A5,C8 */ + "\245\312", /* A5,CA */ + "\245\313", /* A5,CB */ + "\245\314", /* A5,CC */ + "\245\315", /* A5,CD */ + "\245\316", /* A5,CE */ + "\245\317", /* A5,CF */ + "\245\322", /* A5,D2 */ + "\245\325", /* A5,D5 */ + "\245\330", /* A5,D8 */ + "\245\333", /* A5,DB */ + "\245\336", /* A5,DE */ + "\245\337", /* A5,DF */ + "\245\340", /* A5,E0 */ + "\245\341", /* A5,E1 */ + "\245\342", /* A5,E2 */ + "\245\344", /* A5,E4 */ + "\245\346", /* A5,E6 */ + "\245\350", /* A5,E8 */ + "\245\351", /* A5,E9 */ + "\245\352", /* A5,EA */ + "\245\353", /* A5,EB */ + "\245\354", /* A5,EC */ + "\245\355", /* A5,ED */ + "\245\357", /* A5,EF */ + "\245\363", /* A5,F3 */ + "\241\253", /* A1,AB */ + "\241\254" /* A1,AC */ }; if ((IHI == 0x8E) && (ILO >= 0xA1) && (ILO <= 0xDF)) { @@ -4644,7 +4576,7 @@ void JISx0201TO0208_EUC ( } } -static int IS_SJIS_STR (const unsigned char * str) +static int IS_SJIS_STR(const unsigned char *str) { const unsigned char *s; unsigned char ch; @@ -4659,10 +4591,9 @@ static int IS_SJIS_STR (const unsigned char * str) return 0; } -unsigned char * SJIS_TO_JIS1 ( - register unsigned char HI, - register unsigned char LO, - register unsigned char * JCODE) +unsigned char *SJIS_TO_JIS1(register unsigned char HI, + register unsigned char LO, + register unsigned char *JCODE) { HI -= UCH((HI <= 0x9F) ? 0x71 : 0xB1); HI = UCH((HI << 1) + 1); @@ -4679,10 +4610,9 @@ unsigned char * SJIS_TO_JIS1 ( return JCODE; } -unsigned char * JIS_TO_SJIS1 ( - register unsigned char HI, - register unsigned char LO, - register unsigned char * SJCODE) +unsigned char *JIS_TO_SJIS1(register unsigned char HI, + register unsigned char LO, + register unsigned char *SJCODE) { if (HI & 1) LO += UCH(0x1F); @@ -4699,10 +4629,9 @@ unsigned char * JIS_TO_SJIS1 ( return SJCODE; } -unsigned char * EUC_TO_SJIS1 ( - unsigned char HI, - unsigned char LO, - register unsigned char * SJCODE) +unsigned char *EUC_TO_SJIS1(unsigned char HI, + unsigned char LO, + register unsigned char *SJCODE) { if (HI == 0x8E) JISx0201TO0208_EUC(HI, LO, &HI, &LO); @@ -4710,10 +4639,9 @@ unsigned char * EUC_TO_SJIS1 ( return SJCODE; } -void JISx0201TO0208_SJIS ( - register unsigned char I, - register unsigned char * OHI, - register unsigned char * OLO) +void JISx0201TO0208_SJIS(register unsigned char I, + register unsigned char *OHI, + register unsigned char *OLO) { unsigned char SJCODE[2]; @@ -4723,10 +4651,9 @@ void JISx0201TO0208_SJIS ( *OLO = SJCODE[1]; } -unsigned char * SJIS_TO_EUC1 ( - unsigned char HI, - unsigned char LO, - unsigned char * data) +unsigned char *SJIS_TO_EUC1(unsigned char HI, + unsigned char LO, + unsigned char *data) { SJIS_TO_JIS1(HI, LO, data); data[0] |= 0x80; @@ -4734,9 +4661,8 @@ unsigned char * SJIS_TO_EUC1 ( return data; } -unsigned char * SJIS_TO_EUC ( - unsigned char * src, - unsigned char * dst) +unsigned char *SJIS_TO_EUC(unsigned char *src, + unsigned char *dst) { register unsigned char hi, lo, *sp, *dp; register int in_sjis = 0; @@ -4757,9 +4683,8 @@ unsigned char * SJIS_TO_EUC ( return dst; } -unsigned char * EUC_TO_SJIS ( - unsigned char * src, - unsigned char * dst) +unsigned char *EUC_TO_SJIS(unsigned char *src, + unsigned char *dst) { register unsigned char *sp, *dp; @@ -4782,11 +4707,10 @@ unsigned char * EUC_TO_SJIS ( #define Strcpy(a,b) (strcpy((char*)a,(const char*)b),&a[strlen((const char*)a)]) -unsigned char *EUC_TO_JIS ( - unsigned char * src, - unsigned char * dst, - const char * toK, - const char * toA) +unsigned char *EUC_TO_JIS(unsigned char *src, + unsigned char *dst, + const char *toK, + const char *toA) { register unsigned char kana_mode = 0; register unsigned char cch; @@ -4833,9 +4757,8 @@ unsigned char *EUC_TO_JIS ( int repair_JIS = 0; -static const unsigned char *repairJIStoEUC ( - const unsigned char * src, - unsigned char ** dstp) +static const unsigned char *repairJIStoEUC(const unsigned char *src, + unsigned char **dstp) { const unsigned char *s; unsigned char *d, ch1, ch2; @@ -4858,9 +4781,8 @@ static const unsigned char *repairJIStoEUC ( return 0; } -unsigned char *TO_EUC ( - const unsigned char * jis, - unsigned char * euc) +unsigned char *TO_EUC(const unsigned char *jis, + unsigned char *euc) { register const unsigned char *s; register unsigned char c, jis_stat; @@ -4890,6 +4812,7 @@ unsigned char *TO_EUC ( if (c == to2B && jis_stat == 0 && repair_JIS) { if (*s == 'B' || *s == '@') { const unsigned char *ts; + if ((ts = repairJIStoEUC(s + 1, &d)) != NULL) { s = ts; continue; @@ -4948,7 +4871,7 @@ unsigned char *TO_EUC ( #define non94(ch) ((ch) <= 0x20 || (ch) == 0x7F) -static int is_EUC_JP (unsigned char * euc) +static int is_EUC_JP(unsigned char *euc) { unsigned char *cp; int ch1, ch2; @@ -4970,41 +4893,39 @@ static int is_EUC_JP (unsigned char * euc) return 1; } -void TO_SJIS ( - const unsigned char * any, - unsigned char * sjis) +void TO_SJIS(const unsigned char *arg, + unsigned char *sjis) { unsigned char *euc; - euc = malloc(strlen((const char *) any) + 1); + euc = malloc(strlen((const char *) arg) + 1); #ifdef CJK_EX if (!euc) outofmem(__FILE__, "TO_SJIS"); #endif - TO_EUC(any, euc); + TO_EUC(arg, euc); if (is_EUC_JP(euc)) EUC_TO_SJIS(euc, sjis); else - strcpy((char *) sjis, (const char *) any); + strcpy((char *) sjis, (const char *) arg); free(euc); } -void TO_JIS ( - const unsigned char * any, - unsigned char * jis) +void TO_JIS(const unsigned char *arg, + unsigned char *jis) { unsigned char *euc; - if (any[0] == 0) { + if (arg[0] == 0) { jis[0] = 0; return; } - euc = malloc(strlen((const char *) any) + 1); + euc = malloc(strlen((const char *) arg) + 1); #ifdef CJK_EX if (!euc) outofmem(__FILE__, "TO_JIS"); #endif - TO_EUC(any, euc); + TO_EUC(arg, euc); is_EUC_JP(euc); EUC_TO_JIS(euc, jis, TO_KANJI, TO_ASCII); |