/* * $LynxId: SGML.c,v 1.117 2008/09/21 18:34:39 tom Exp $ * * General SGML Parser code SGML.c * ======================== * * This module implements an HTStream object. To parse an * SGML file, create this object which is a parser. The object * is (currently) created by being passed a DTD structure, * and a target HTStructured object at which to throw the parsed stuff. * * 6 Feb 93 Binary searches used. Interface modified. */ #include #include #include #include #include /* FIXME: this doesn't belong in SGML.c */ #include #include #include #include #include #include #include /* S/390 -- gil -- 0635 */ #include #include #include #include #ifdef USE_COLOR_STYLE # include #endif #ifdef USE_PRETTYSRC # include #endif #define INVALID (-1) static int sgml_offset; #ifdef USE_PRETTYSRC static char *entity_string; /* this is used for printing entity name. Unconditionally added since redundant assigments don't hurt much */ static void fake_put_character(void *p GCC_UNUSED, char c GCC_UNUSED) { } #define START TRUE #define STOP FALSE #define PUTS_TR(x) psrc_convert_string = TRUE; PUTS(x) #endif /* my_casecomp() - optimized by the first character, NOT_ASCII ok */ #define my_casecomp(a,b) ((TOUPPER(*a) == TOUPPER(*b)) ? \ AS_casecomp(a,b) : \ (TOASCII(TOUPPER(*a)) - TOASCII(TOUPPER(*b)))) /* will use partially inlined version */ #define orig_HTChunkPutUtf8Char HTChunkPutUtf8Char #undef HTChunkPutUtf8Char /* ...used for comments and attributes value like href... */ #define HTChunkPutUtf8Char(ch,x) \ { \ if ((TOASCII(x) < 128) && (ch->size < ch->allocated)) \ ch->data[ch->size++] = (char)x; \ else \ orig_HTChunkPutUtf8Char(ch,x); \ } #define PUTS(str) ((*context->actions->put_string)(context->target, str)) #define PUTC(ch) ((*context->actions->put_character)(context->target, ch)) #define PUTUTF8(code) (UCPutUtf8_charstring((HTStream *)context->target, \ (putc_func_t*)(context->actions->put_character), code)) /*the following macros are used for pretty source view. */ #define IS_C(attr) (attr.type == HTMLA_CLASS) HTCJKlang HTCJK = NOCJK; /* CJK enum value. */ BOOL HTPassEightBitRaw = FALSE; /* Pass 161-172,174-255 raw. */ BOOL HTPassEightBitNum = FALSE; /* Pass ^ numeric entities raw. */ BOOL HTPassHighCtrlRaw = FALSE; /* Pass 127-160,173, raw. */ BOOL HTPassHighCtrlNum = FALSE; /* Pass €-Ÿ raw. */ /* The State (context) of the parser * * This is passed with each call to make the parser reentrant * */ #define MAX_ATTRIBUTES 36 /* Max number of attributes per element */ /* Element Stack * ------------- * This allows us to return down the stack reselecting styles. * As we return, attribute values will be garbage in general. */ typedef struct _HTElement HTElement; struct _HTElement { HTElement *next; /* Previously nested element or 0 */ HTTag *tag; /* The tag at this level */ }; typedef enum { S_text = 0 ,S_attr ,S_attr_gap ,S_comment ,S_cro ,S_doctype ,S_dollar ,S_dollar_dq ,S_dollar_paren ,S_dollar_paren_dq ,S_dollar_paren_sq ,S_dollar_sq ,S_dquoted ,S_end ,S_entity ,S_equals ,S_ero ,S_esc ,S_esc_dq ,S_esc_sq ,S_exclamation ,S_in_kanji ,S_incro ,S_junk_tag ,S_litteral ,S_marked ,S_nonascii_text ,S_nonascii_text_dq ,S_nonascii_text_sq ,S_paren ,S_paren_dq ,S_paren_sq ,S_pcdata ,S_pi ,S_script ,S_sgmlatt ,S_sgmlele ,S_sgmlent ,S_squoted ,S_tag ,S_tag_gap ,S_tagname_slash ,S_value } sgml_state; /* Internal Context Data Structure * ------------------------------- */ struct _HTStream { const HTStreamClass *isa; /* inherited from HTStream */ const SGML_dtd *dtd; const HTStructuredClass *actions; /* target class */ HTStructured *target; /* target object */ HTTag *current_tag; HTTag *slashedtag; const HTTag *unknown_tag; BOOL extended_html; /* xhtml */ BOOL strict_xml; /* xml */ BOOL inSELECT; BOOL no_lynx_specialcodes; int current_attribute_number; HTChunk *string; int leading_spaces; int trailing_spaces; HTElement *element_stack; sgml_state state; unsigned char kanji_buf; #ifdef CALLERDATA void *callerData; #endif /* CALLERDATA */ BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */ char *value[MAX_ATTRIBUTES]; /* NULL, or strings alloc'd with StrAllocCopy_extra() */ BOOL lead_exclamation; BOOL first_dash; BOOL end_comment; BOOL doctype_bracket; BOOL first_bracket; BOOL second_bracket; BOOL isHex; HTParentAnchor *node_anchor; LYUCcharset *inUCI; /* pointer to anchor UCInfo */ int inUCLYhndl; /* charset we are fed */ LYUCcharset *outUCI; /* anchor UCInfo for target */ int outUCLYhndl; /* charset for target */ char utf_count; UCode_t utf_char; char utf_buf[8]; char *utf_buf_p; UCTransParams T; int current_tag_charset; /* charset to pass attributes */ char *recover; int recover_index; char *include; char *active_include; int include_index; char *url; char *csi; int csi_index; #ifdef USE_PRETTYSRC BOOL cur_attr_is_href; BOOL cur_attr_is_name; #endif }; #ifdef NO_LYNX_TRACE #define state_name(n) "state" #else static const char *state_name(sgml_state n) { const char *result = "?"; /* *INDENT-OFF* */ switch (n) { case S_attr: result = "S_attr"; break; case S_attr_gap: result = "S_attr_gap"; break; case S_comment: result = "S_comment"; break; case S_cro: result = "S_cro"; break; case S_doctype: result = "S_doctype"; break; case S_dollar: result = "S_dollar"; break; case S_dollar_dq: result = "S_dollar_dq"; break; case S_dollar_paren: result = "S_dollar_paren"; break; case S_dollar_paren_dq: result = "S_dollar_paren_dq"; break; case S_dollar_paren_sq: result = "S_dollar_paren_sq"; break; case S_dollar_sq: result = "S_dollar_sq"; break; case S_dquoted: result = "S_dquoted"; break; case S_end: result = "S_end"; break; case S_entity: result = "S_entity"; break; case S_equals: result = "S_equals"; break; case S_ero: result = "S_ero"; break; case S_esc: result = "S_esc"; break; case S_esc_dq: result = "S_esc_dq"; break; case S_esc_sq: result = "S_esc_sq"; break; case S_exclamation: result = "S_exclamation"; break; case S_in_kanji: result = "S_in_kanji"; break; case S_incro: result = "S_incro"; break; case S_pi: result = "S_pi"; break; case S_junk_tag: result = "S_junk_tag"; break; case S_litteral: result = "S_litteral"; break; case S_marked: result = "S_marked"; break; case S_nonascii_text: result = "S_nonascii_text"; break; case S_nonascii_text_dq: result = "S_nonascii_text_dq"; break; case S_nonascii_text_sq: result = "S_nonascii_text_sq"; break; case S_paren: result = "S_paren"; break; case S_paren_dq: result = "S_paren_dq"; break; case S_paren_sq: result = "S_paren_sq"; break; case S_pcdata: result = "S_pcdata"; break; case S_script: result = "S_script"; break; case S_sgmlatt: result = "S_sgmlatt"; break; case S_sgmlele: result = "S_sgmlele"; break; case S_sgmlent: result = "S_sgmlent"; break; case S_squoted: result = "S_squoted"; break; case S_tag: result = "S_tag"; break; case S_tag_gap: result = "S_tag_gap"; break; case S_tagname_slash: result = "S_tagname_slash"; break; case S_text: result = "S_text"; break; case S_value: result = "S_value"; break; } /* *INDENT-ON* */ return result; } #endif /* storage for Element Stack */ #define DEPTH 10 static HTElement pool[DEPTH]; static int depth = 0; static HTElement *pool_alloc(void) { depth++; if (depth > DEPTH) return (HTElement *) malloc(sizeof(HTElement)); return (pool + depth - 1); } static void pool_free(HTElement * e) { if (depth > DEPTH) FREE(e); depth--; return; } #ifdef USE_PRETTYSRC static void HTMLSRC_apply_markup(HTStream *context, HTlexeme lexeme, BOOL start) { HT_tagspec *ts = *((start ? lexeme_start : lexeme_end) + lexeme); while (ts) { #ifdef USE_COLOR_STYLE if (ts->start) { current_tag_style = ts->style; force_current_tag_style = TRUE; forced_classname = ts->class_name; force_classname = TRUE; } #endif CTRACE((tfp, ts->start ? "SRCSTART %d\n" : "SRCSTOP %d\n", (int) lexeme)); if (ts->start) (*context->actions->start_element) (context->target, ts->element, ts->present, (const char **) ts->value, context->current_tag_charset, &context->include); else (*context->actions->end_element) (context->target, ts->element, &context->include); ts = ts->next; } } #define PSRCSTART(x) HTMLSRC_apply_markup(context,HTL_##x,START) #define PSRCSTOP(x) HTMLSRC_apply_markup(context,HTL_##x,STOP) #define attr_is_href context->cur_attr_is_href #define attr_is_name context->cur_attr_is_name #endif static void set_chartrans_handling(HTStream *context, HTParentAnchor *anchor, int chndl) { if (chndl < 0) { /* * Nothing was set for the parser in earlier stages, so the HTML * parser's UCLYhndl should still be its default. - FM */ chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_STRUCTURED); if (chndl < 0) /* * That wasn't set either, so seek the HText default. - FM */ chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); if (chndl < 0) /* * That wasn't set either, so assume the current display character * set. - FM */ chndl = current_char_set; /* * Try to set the HText and HTML stages' chartrans info with the * default lock level (will not be changed if it was set previously * with a higher lock level). - FM */ HTAnchor_setUCInfoStage(anchor, chndl, UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); HTAnchor_setUCInfoStage(anchor, chndl, UCT_STAGE_STRUCTURED, UCT_SETBY_DEFAULT); /* * Get the chartrans info for output to the HTML parser. - FM */ context->outUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_STRUCTURED); context->outUCLYhndl = HTAnchor_getUCLYhndl(context->node_anchor, UCT_STAGE_STRUCTURED); } /* * Set the in->out transformation parameters. - FM */ UCSetTransParams(&context->T, context->inUCLYhndl, context->inUCI, context->outUCLYhndl, context->outUCI); /* * This is intended for passing the SGML parser's input charset as an * argument in each call to the HTML parser's start tag function, but it * would be better to call a Lynx_HTML_parser function to set an element in * its HTStructured object, itself, if this were needed. - FM */ #ifndef EXP_JAPANESEUTF8_SUPPORT if (IS_CJK_TTY) { context->current_tag_charset = -1; } else #endif if (context->T.transp) { context->current_tag_charset = context->inUCLYhndl; } else if (context->T.decode_utf8) { context->current_tag_charset = context->inUCLYhndl; } else if (context->T.do_8bitraw || context->T.use_raw_char_in) { context->current_tag_charset = context->inUCLYhndl; } else if (context->T.output_utf8 || context->T.trans_from_uni) { context->current_tag_charset = UCGetLYhndl_byMIME("utf-8"); } else { context->current_tag_charset = LATIN1; } } static void change_chartrans_handling(HTStream *context) { int new_LYhndl = HTAnchor_getUCLYhndl(context->node_anchor, UCT_STAGE_PARSER); if (new_LYhndl != context->inUCLYhndl && new_LYhndl >= 0) { /* * Something changed. but ignore if a META wants an unknown charset. */ LYUCcharset *new_UCI = HTAnchor_getUCInfoStage(context->node_anchor, UCT_STAGE_PARSER); if (new_UCI) { LYUCcharset *next_UCI = HTAnchor_getUCInfoStage(context->node_anchor, UCT_STAGE_STRUCTURED); int next_LYhndl = HTAnchor_getUCLYhndl(context->node_anchor, UCT_STAGE_STRUCTURED); context->inUCI = new_UCI; context->inUCLYhndl = new_LYhndl; context->outUCI = next_UCI; context->outUCLYhndl = next_LYhndl; set_chartrans_handling(context, context->node_anchor, next_LYhndl); } } } #ifdef USE_COLOR_STYLE #include static int current_is_class = 0; #endif /* Handle Attribute * ---------------- */ /* PUBLIC const char * SGML_default = ""; ?? */ static void handle_attribute_name(HTStream *context, const char *s) { HTTag *tag = context->current_tag; const attr *attributes = tag->attributes; int high, low, i, diff; #ifdef USE_PRETTYSRC if (psrc_view) { attr_is_href = FALSE; attr_is_name = FALSE; } #endif /* * Ignore unknown tag. - KW */ if (tag == context->unknown_tag) { #ifdef USE_PRETTYSRC if (psrc_view) context->current_attribute_number = 1; /* anything !=INVALID */ #endif return; } /* * Binary search for attribute name. */ for (low = 0, high = tag->number_of_attributes; high > low; diff < 0 ? (low = i + 1) : (high = i)) { i = (low + (high - low) / 2); diff = my_casecomp(attributes[i].name, s); if (diff == 0) { /* success: found it */ context->current_attribute_number = i; #ifdef USE_PRETTYSRC if (psrc_view) { attr_is_name = (BOOL) (attributes[i].type == HTMLA_ANAME); attr_is_href = (BOOL) (attributes[i].type == HTMLA_HREF); } else #endif { context->present[i] = YES; Clear_extra(context->value[i]); #ifdef USE_COLOR_STYLE # ifdef USE_PRETTYSRC current_is_class = IS_C(attributes[i]); # else current_is_class = (!strcasecomp("class", s)); # endif CTRACE((tfp, "SGML: found attribute %s, %d\n", s, current_is_class)); #endif } return; } /* if */ } /* for */ CTRACE((tfp, "SGML: Unknown attribute %s for tag %s\n", s, NonNull(context->current_tag->name))); context->current_attribute_number = INVALID; /* Invalid */ } /* Handle attribute value * ---------------------- */ static void handle_attribute_value(HTStream *context, const char *s) { if (context->current_attribute_number != INVALID) { StrAllocCopy_extra(context->value[context->current_attribute_number], s); #ifdef USE_COLOR_STYLE if (current_is_class) { strncpy(class_string, s, TEMPSTRINGSIZE); CTRACE((tfp, "SGML: class is '%s'\n", s)); } else { CTRACE((tfp, "SGML: attribute value is '%s'\n", s)); } #endif } else { CTRACE((tfp, "SGML: Attribute value %s ***ignored\n", s)); } context->current_attribute_number = INVALID; /* can't have two assignments! */ } /* * Translate some Unicodes to Lynx special codes and output them. * Special codes - ones those output depend on parsing. * * Additional issue, like handling bidirectional text if necessary * may be called from here: zwnj (8204), zwj (8205), lrm (8206), rlm (8207) * - currently they are ignored in SGML.c and LYCharUtils.c * but also in UCdomap.c because they are non printable... * */ static BOOL put_special_unicodes(HTStream *context, UCode_t code) { /* (Tgf_nolyspcl) */ if (context->no_lynx_specialcodes) { /* * We were asked by a "DTD" flag to not generate lynx specials. - kw */ return NO; } if (code == CH_NBSP) { /* S/390 -- gil -- 0657 */ /* * Use Lynx special character for nbsp. */ #ifdef USE_PRETTYSRC if (!psrc_view) #endif PUTC(HT_NON_BREAK_SPACE); } else if (code == CH_SHY) { /* * Use Lynx special character for shy. */ #ifdef USE_PRETTYSRC if (!psrc_view) #endif PUTC(LY_SOFT_HYPHEN); } else if (code == 8194 || code == 8201) { /* * Use Lynx special character for ensp or thinsp. * * Originally, Lynx use space '32' as word delimiter and omits this * space at end of line if word is wrapped to the next line. There are * several other spaces in the Unicode repertoire and we should teach * Lynx to understand them, not only as regular characters but in the * context of line wrapping. Unfortunately, if we use HT_EN_SPACE we * override the chartrans tables for those spaces with a single '32' * for all (but do line wrapping more fancy). * * We may treat emsp as one or two ensp (below). */ #ifdef USE_PRETTYSRC if (!psrc_view) #endif PUTC(HT_EN_SPACE); } else if (code == 8195) { /* * Use Lynx special character for emsp. */ #ifdef USE_PRETTYSRC if (!psrc_view) { #endif /* PUTC(HT_EN_SPACE); let's stay with a single space :) */ PUTC(HT_EN_SPACE); #ifdef USE_PRETTYSRC } #endif } else { /* * Return NO if nothing done. */ return NO; } /* * We have handled it. */ return YES; } #ifdef USE_PRETTYSRC static void put_pretty_entity(HTStream *context, int term) { PSRCSTART(entity); PUTC('&'); PUTS(entity_string); if (term) PUTC((char) term); PSRCSTOP(entity); } static void put_pretty_number(HTStream *context) { PSRCSTART(entity); PUTS((context->isHex ? "&#x" : "&#")); PUTS(entity_string); PUTC(';'); PSRCSTOP(entity); } #endif /* USE_PRETTYSRC */ /* Handle entity * ------------- * * On entry, * s contains the entity name zero terminated * Bugs: * If the entity name is unknown, the terminator is treated as * a printable non-special character in all cases, even if it is '<' * Bug-fix: * Modified SGML_character() so we only come here with terminator * as '\0' and check a FoundEntity flag. -- Foteos Macrides * * Modified more (for use with Lynx character translation code): */ static char replace_buf[64]; /* buffer for replacement strings */ static BOOL FoundEntity = FALSE; static void handle_entity(HTStream *context, char term) { UCode_t code; long uck = -1; const char *s = context->string->data; /* * Handle all entities normally. - FM */ FoundEntity = FALSE; if ((code = HTMLGetEntityUCValue(s)) != 0) { /* * We got a Unicode value for the entity name. Check for special * Unicodes. - FM */ if (put_special_unicodes(context, code)) { #ifdef USE_PRETTYSRC if (psrc_view) { put_pretty_entity(context, term); } #endif FoundEntity = TRUE; return; } /* * Seek a translation from the chartrans tables. */ if ((uck = UCTransUniChar(code, context->outUCLYhndl)) >= 32 && /* =============== work in ASCII below here =============== S/390 -- gil -- 0672 */ uck < 256 && (uck < 127 || uck >= LYlowest_eightbit[context->outUCLYhndl])) { #ifdef USE_PRETTYSRC if (psrc_view) { put_pretty_entity(context, term); } else #endif PUTC(FROMASCII((char) uck)); FoundEntity = TRUE; return; } else if ((uck == -4 || (context->T.repl_translated_C0 && uck > 0 && uck < 32)) && /* * Not found; look for replacement string. */ (uck = UCTransUniCharStr(replace_buf, 60, code, context->outUCLYhndl, 0) >= 0)) { #ifdef USE_PRETTYSRC if (psrc_view) { put_pretty_entity(context, term); } else #endif PUTS(replace_buf); FoundEntity = TRUE; return; } /* * If we're displaying UTF-8, try that now. - FM */ #ifndef USE_PRETTYSRC if (context->T.output_utf8 && PUTUTF8(code)) { FoundEntity = TRUE; return; } #else if (context->T.output_utf8 && (psrc_view ? (UCPutUtf8_charstring((HTStream *) context->target, (putc_func_t *) (fake_put_character), code)) : PUTUTF8(code))) { if (psrc_view) { put_pretty_entity(context, term); } FoundEntity = TRUE; return; } #endif /* * If it's safe ASCII, use it. - FM */ if (code >= 32 && code < 127) { #ifdef USE_PRETTYSRC if (psrc_view) { put_pretty_entity(context, term); } else #endif PUTC(FROMASCII((char) code)); FoundEntity = TRUE; return; } /* =============== work in ASCII above here =============== S/390 -- gil -- 0682 */ /* * Ignore zwnj (8204) and zwj (8205), if we get to here. Note that * zwnj may have been handled as by the calling function. - FM */ if (!strcmp(s, "zwnj") || !strcmp(s, "zwj")) { CTRACE((tfp, "handle_entity: Ignoring '%s'.\n", s)); #ifdef USE_PRETTYSRC if (psrc_view) { put_pretty_entity(context, term); } #endif FoundEntity = TRUE; return; } /* * Ignore lrm (8206), and rln (8207), if we get to here. - FM */ if (!strcmp(s, "lrm") || !strcmp(s, "rlm")) { CTRACE((tfp, "handle_entity: Ignoring '%s'.\n", s)); #ifdef USE_PRETTYSRC if (psrc_view) { put_pretty_entity(context, term); } #endif FoundEntity = TRUE; return; } } /* * If entity string not found, display as text. */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); #endif CTRACE((tfp, "SGML: Unknown entity '%s' %" PRI_UCode_t " %ld\n", s, code, uck)); /* S/390 -- gil -- 0695 */ PUTC('&'); PUTS(s); if (term != '\0') PUTC(term); #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTOP(badseq); #endif } /* Handle comment * -------------- */ static void handle_comment(HTStream *context) { const char *s = context->string->data; CTRACE((tfp, "SGML Comment:\n<%s>\n", s)); if (context->csi == NULL && strncmp(s, "!--#", 4) == 0 && LYCheckForCSI(context->node_anchor, &context->url) == TRUE) { LYDoCSI(context->url, s, &context->csi); } else { LYCommentHacks(context->node_anchor, context->string->data); } return; } /* Handle identifier * ----------------- */ static void handle_identifier(HTStream *context) { const char *s = context->string->data; CTRACE((tfp, "SGML Identifier:\n<%s>\n", s)); return; } /* Handle doctype * -------------- */ static void handle_doctype(HTStream *context) { const char *s = context->string->data; CTRACE((tfp, "SGML Doctype:\n<%s>\n", s)); if (strstr(s, "DTD XHTML ") != 0) { CTRACE((tfp, "...processing extended HTML\n")); context->extended_html = TRUE; } return; } /* Handle marked * ------------- */ static void handle_marked(HTStream *context) { const char *s = context->string->data; CTRACE((tfp, "SGML Marked Section:\n<%s>\n", s)); if (!strncmp(context->string->data, "![INCLUDE[", 10)) { context->string->data[context->string->size - 3] = '\0'; StrAllocCat(context->include, context->string->data + 10); /* @@@ This needs to take charset into account! @@@ the wrong assumptions will be made about the data's charset once it is in include - kw */ } else if (!strncmp(context->string->data, "![CDATA[", 8)) { (*context->actions->put_block) (context->target, context->string->data + 8, context->string->size - 11); } return; } /* Handle processing instruction * ----------------------------- */ static void handle_processing_instruction(HTStream *context) { const char *s = context->string->data; CTRACE((tfp, "SGML Processing instruction:\n<%s>\n", s)); if (!strncmp(s, "?xml ", 5)) { int flag = context->T.decode_utf8; context->strict_xml = TRUE; /* * Switch to UTF-8 if the encoding is explicitly "utf-8". */ if (!flag) { char *t = strstr(s, "encoding="); if (t != 0) { t += 9; if (*t == '"') ++t; flag = !strncmp(t, "utf-8", 5); } if (flag) { CTRACE((tfp, "...Use UTF-8 for XML\n")); context->T.decode_utf8 = TRUE; } } } return; } /* Handle sgmlent * -------------- */ static void handle_sgmlent(HTStream *context) { const char *s = context->string->data; CTRACE((tfp, "SGML Entity Declaration:\n<%s>\n", s)); return; } /* Handle sgmlent * -------------- */ static void handle_sgmlele(HTStream *context) { const char *s = context->string->data; CTRACE((tfp, "SGML Element Declaration:\n<%s>\n", s)); return; } /* Handle sgmlatt * -------------- */ static void handle_sgmlatt(HTStream *context) { const char *s = context->string->data; CTRACE((tfp, "SGML Attribute Declaration:\n<%s>\n", s)); return; } /* * Convenience macros - tags (elements) are identified sometimes by an int or * enum value ('TAGNUM'), sometimes by a pointer to HTTag ('TAGP'). - kw */ #define TAGNUM_OF_TAGP(t) (HTMLElement) (t - context->dtd->tags) #define TAGP_OF_TAGNUM(e) (context->dtd->tags + e) /* * The following implement special knowledge about OBJECT. As long as * HTML_OBJECT is the only tag for which an alternative variant exist, they can * be simple macros. - kw */ /* does 'TAGNUM' e have an alternative (variant) parsing mode? */ #define HAS_ALT_TAGNUM(e) (e == HTML_OBJECT) /* return 'TAGNUM' of the alternative mode for 'TAGNUM' e, if any. */ #define ALT_TAGNUM(e) ((e == HTML_OBJECT) ? HTML_ALT_OBJECT : e) /* return 'TAGNUM' of the normal mode for 'TAGNUM' e which may be alt. */ #define NORMAL_TAGNUM(e) (((int)(e) >= HTML_ELEMENTS) ? HTML_OBJECT : (HTMLElement)e) /* More convenience stuff. - kw */ #define ALT_TAGP_OF_TAGNUM(e) TAGP_OF_TAGNUM(ALT_TAGNUM(e)) #define NORMAL_TAGP_OF_TAGNUM(e) TAGP_OF_TAGNUM(NORMAL_TAGNUM(e)) #define ALT_TAGP(t) ALT_TAGP_OF_TAGNUM(TAGNUM_OF_TAGP(t)) #define NORMAL_TAGP(t) NORMAL_TAGP_OF_TAGNUM(TAGNUM_OF_TAGP(t)) static BOOL element_valid_within(HTTag * new_tag, HTTag * stacked_tag, BOOL direct) { TagClass usecontains, usecontained; if (!stacked_tag || !new_tag) return YES; usecontains = (direct ? stacked_tag->contains : stacked_tag->icontains); usecontained = (direct ? new_tag->contained : new_tag->icontained); if (new_tag == stacked_tag) return (BOOL) ((Tgc_same & usecontains) && (Tgc_same & usecontained)); else return (BOOL) ((new_tag->tagclass & usecontains) && (stacked_tag->tagclass & usecontained)); } typedef enum { close_NO = 0, close_error = 1, close_valid = 2 } canclose_t; static canclose_t can_close(HTTag * new_tag, HTTag * stacked_tag) { if (!stacked_tag) return close_NO; if (stacked_tag->flags & Tgf_endO) return close_valid; else if (new_tag == stacked_tag) return ((Tgc_same & new_tag->canclose) ? close_error : close_NO); else return ((stacked_tag->tagclass & new_tag->canclose) ? close_error : close_NO); } static void do_close_stacked(HTStream *context) { HTElement *stacked = context->element_stack; HTMLElement e; if (!stacked) return; /* stack was empty */ if (context->inSELECT && !strcasecomp(stacked->tag->name, "SELECT")) { context->inSELECT = FALSE; } e = NORMAL_TAGNUM(TAGNUM_OF_TAGP(stacked->tag)); #ifdef USE_PRETTYSRC if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ #endif (*context->actions->end_element) (context->target, e, &context->include); context->element_stack = stacked->next; pool_free(stacked); context->no_lynx_specialcodes = context->element_stack ? (context->element_stack->tag->flags & Tgf_nolyspcl) : NO; } static int is_on_stack(HTStream *context, HTTag * old_tag) { HTElement *stacked = context->element_stack; int i = 1; for (; stacked; stacked = stacked->next, i++) { if (stacked->tag == old_tag || stacked->tag == ALT_TAGP(old_tag)) return i; } return 0; } /* End element * ----------- */ static void end_element(HTStream *context, HTTag * old_tag) { BOOL extra_action_taken = NO; canclose_t canclose_check = close_valid; int stackpos = is_on_stack(context, old_tag); if (!Old_DTD) { while (canclose_check != close_NO && context->element_stack && (stackpos > 1 || (!extra_action_taken && stackpos == 0))) { if (stackpos == 0 && (old_tag->flags & Tgf_startO) && element_valid_within(old_tag, context->element_stack->tag, YES)) { CTRACE((tfp, "SGML: ignored\n", old_tag->name)); return; } canclose_check = can_close(old_tag, context->element_stack->tag); if (canclose_check != close_NO) { CTRACE((tfp, "SGML: End \t<- %s end \n", context->element_stack->tag->name, ((canclose_check == close_valid) ? "supplied," : "***forced by"), old_tag->name)); do_close_stacked(context); extra_action_taken = YES; stackpos = is_on_stack(context, old_tag); } } if (stackpos == 0 && old_tag->contents != SGML_EMPTY) { CTRACE((tfp, "SGML: Still open %s, ***no open %s for \n", context->element_stack ? context->element_stack->tag->name : "none", old_tag->name, old_tag->name)); return; } if (stackpos > 1) { CTRACE((tfp, "SGML: Nesting <%s>...<%s> \t<- ***invalid end \n", old_tag->name, context->element_stack->tag->name, old_tag->name)); return; } } /* Now let the non-extended code deal with the rest. - kw */ /* * If we are in a SELECT block, ignore anything but a SELECT end tag. - FM */ if (context->inSELECT) { if (!strcasecomp(old_tag->name, "SELECT")) { /* * Turn off the inSELECT flag and fall through. - FM */ context->inSELECT = FALSE; } else { /* * Ignore the end tag. - FM */ CTRACE((tfp, "SGML: ***Ignoring end tag in SELECT block.\n", old_tag->name)); return; } } /* * Handle the end tag. - FM */ CTRACE((tfp, "SGML: End \n", old_tag->name)); if (old_tag->contents == SGML_EMPTY) { CTRACE((tfp, "SGML: ***Illegal end tag found.\n", old_tag->name)); return; } #ifdef WIND_DOWN_STACK while (context->element_stack) /* Loop is error path only */ #else if (context->element_stack) /* Substitute and remove one stack element */ #endif /* WIND_DOWN_STACK */ { int status = HT_OK; HTMLElement e; HTElement *N = context->element_stack; HTTag *t = (N->tag != old_tag) ? NORMAL_TAGP(N->tag) : N->tag; if (old_tag != t) { /* Mismatch: syntax error */ if (context->element_stack->next) { /* This is not the last level */ CTRACE((tfp, "SGML: Found when expecting . ***assumed.\n", old_tag->name, t->name, t->name)); } else { /* last level */ CTRACE((tfp, "SGML: Found when expecting . ***Ignored.\n", old_tag->name, t->name, old_tag->name)); return; /* Ignore */ } } e = NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)); CTRACE2(TRACE_SGML, (tfp, "tagnum(%p) = %d\n", (void *) t, (int) e)); #ifdef USE_PRETTYSRC if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ #endif status = (*context->actions->end_element) (context->target, e, &context->include); if (status == HT_PARSER_REOPEN_ELT) { CTRACE((tfp, "SGML: Restart <%s>\n", t->name)); (*context->actions->start_element) (context->target, e, NULL, NULL, context->current_tag_charset, &context->include); } else if (status == HT_PARSER_OTHER_CONTENT) { CTRACE((tfp, "SGML: Continue with other content model for <%s>\n", t->name)); context->element_stack->tag = ALT_TAGP_OF_TAGNUM(e); } else { context->element_stack = N->next; /* Remove from stack */ pool_free(N); } context->no_lynx_specialcodes = context->element_stack ? (context->element_stack->tag->flags & Tgf_nolyspcl) : NO; #ifdef WIND_DOWN_STACK if (old_tag == t) return; /* Correct sequence */ #else return; #endif /* WIND_DOWN_STACK */ /* Syntax error path only */ } CTRACE((tfp, "SGML: Extra end tag found and ignored.\n", old_tag->name)); } /* Start a element */ static void start_element(HTStream *context) { int status; HTTag *new_tag = context->current_tag; HTMLElement e = TAGNUM_OF_TAGP(new_tag); BOOL ok = FALSE; BOOL valid = YES; BOOL direct_container = YES; BOOL extra_action_taken = NO; canclose_t canclose_check = close_valid; if (!Old_DTD) { while (context->element_stack && (canclose_check == close_valid || (canclose_check == close_error && new_tag == context->element_stack->tag)) && !(valid = element_valid_within(new_tag, context->element_stack->tag, direct_container))) { canclose_check = can_close(new_tag, context->element_stack->tag); if (canclose_check != close_NO) { CTRACE((tfp, "SGML: End \t<- %s start <%s>\n", context->element_stack->tag->name, ((canclose_check == close_valid) ? "supplied," : "***forced by"), new_tag->name)); do_close_stacked(context); extra_action_taken = YES; if (canclose_check == close_error) direct_container = NO; } else { CTRACE((tfp, "SGML: Still open %s \t<- ***invalid start <%s>\n", context->element_stack->tag->name, new_tag->name)); } } if (context->element_stack && !valid && (context->element_stack->tag->flags & Tgf_strict) && !(valid = element_valid_within(new_tag, context->element_stack->tag, direct_container))) { CTRACE((tfp, "SGML: Still open %s \t<- ***ignoring start <%s>\n", context->element_stack->tag->name, new_tag->name)); return; } if (context->element_stack && !extra_action_taken && (canclose_check == close_NO) && !valid && (new_tag->flags & Tgf_mafse)) { BOOL has_attributes = NO; int i = 0; for (; i < new_tag->number_of_attributes && !has_attributes; i++) has_attributes = context->present[i]; if (!has_attributes) { CTRACE((tfp, "SGML: Still open %s, ***converting invalid <%s> to \n", context->element_stack->tag->name, new_tag->name, new_tag->name)); end_element(context, new_tag); return; } } if (context->element_stack && (canclose_check == close_error) && !(valid = element_valid_within(new_tag, context->element_stack->tag, direct_container))) { CTRACE((tfp, "SGML: Still open %s \t<- ***invalid start <%s>\n", context->element_stack->tag->name, new_tag->name)); } } /* Fall through to the non-extended code - kw */ /* * If we are not in a SELECT block, check if this is a SELECT start tag. * Otherwise (i.e., we are in a SELECT block) accept only OPTION as valid, * terminate the SELECT block if it is any other form-related element, and * otherwise ignore it. - FM */ if (!context->inSELECT) { /* * We are not in a SELECT block, so check if this starts one. - FM * (frequent case!) */ /* my_casecomp() - optimized by the first character */ if (!my_casecomp(new_tag->name, "SELECT")) { /* * Set the inSELECT flag and fall through. - FM */ context->inSELECT = TRUE; } } else { /* * We are in a SELECT block. - FM */ if (strcasecomp(new_tag->name, "OPTION")) { /* * Ugh, it is not an OPTION. - FM */ switch (e) { case HTML_INPUT: case HTML_TEXTAREA: case HTML_SELECT: case HTML_BUTTON: case HTML_FIELDSET: case HTML_LABEL: case HTML_LEGEND: case HTML_FORM: ok = TRUE; break; default: break; } if (ok) { /* * It is another form-related start tag, so terminate the * current SELECT block and fall through. - FM */ CTRACE((tfp, "SGML: ***Faking SELECT end tag before <%s> start tag.\n", new_tag->name)); end_element(context, SGMLFindTag(context->dtd, "SELECT")); } else { /* * Ignore the start tag. - FM */ CTRACE((tfp, "SGML: ***Ignoring start tag <%s> in SELECT block.\n", new_tag->name)); return; } } } /* * Handle the start tag. - FM */ CTRACE((tfp, "SGML: Start <%s>\n", new_tag->name)); status = (*context->actions->start_element) (context->target, TAGNUM_OF_TAGP(new_tag), context->present, (const char **) context->value, /* coerce type for think c */ context->current_tag_charset, &context->include); if (status == HT_PARSER_OTHER_CONTENT) new_tag = ALT_TAGP(new_tag); /* this is only returned for OBJECT */ if (new_tag->contents != SGML_EMPTY) { /* i.e., tag not empty */ HTElement *N = pool_alloc(); if (N == NULL) outofmem(__FILE__, "start_element"); N->next = context->element_stack; N->tag = new_tag; context->element_stack = N; context->no_lynx_specialcodes = (new_tag->flags & Tgf_nolyspcl); } else if (e == HTML_META) { /* * Check for result of META tag. - KW & FM */ change_chartrans_handling(context); } } /* Find Tag in DTD tag list * ------------------------ * * On entry, * dtd points to dtd structure including valid tag list * string points to name of tag in question * * On exit, * returns: * NULL tag not found * else address of tag structure in dtd */ HTTag *SGMLFindTag(const SGML_dtd * dtd, const char *s) { int high, low, i, diff; static HTTag *last[64] = {NULL}; /*optimize using the previous results */ HTTag **res = last + (UCH(*s) % 64); /*pointer arithmetic */ if (*res) { if ((*res)->name == NULL) return NULL; if (!strcasecomp((*res)->name, s)) return *res; } for (low = 0, high = dtd->number_of_tags; high > low; diff < 0 ? (low = i + 1) : (high = i)) { /* Binary search */ i = (low + (high - low) / 2); /* my_casecomp() - optimized by the first character, NOT_ASCII ok */ diff = my_casecomp(dtd->tags[i].name, s); /* Case insensitive */ if (diff == 0) { /* success: found it */ *res = &dtd->tags[i]; return *res; } } if (IsNmStart(*s)) { /* * Unrecognized, but may be valid. - KW */ return &HTTag_unrecognized; } return NULL; } /*________________________________________________________________________ * Public Methods */ /* Could check that we are back to bottom of stack! @@ */ /* Do check! - FM */ /* */ static void SGML_free(HTStream *context) { int i; HTElement *cur; HTTag *t; /* * Free the buffers. - FM */ FREE(context->recover); FREE(context->url); FREE(context->csi); FREE(context->include); FREE(context->active_include); /* * Wind down stack if any elements are open. - FM */ while (context->element_stack) { cur = context->element_stack; t = cur->tag; context->element_stack = cur->next; /* Remove from stack */ pool_free(cur); #ifdef USE_PRETTYSRC if (!psrc_view) /* Don't actually call on target if viewing psrc - kw */ #endif (*context->actions->end_element) (context->target, NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)), &context->include); FREE(context->include); } /* * Finish off the target. - FM */ (*context->actions->_free) (context->target); /* * Free the strings and context structure. - FM */ HTChunkFree(context->string); for (i = 0; i < MAX_ATTRIBUTES; i++) FREE_extra(context->value[i]); FREE(context); #ifdef USE_PRETTYSRC sgml_in_psrc_was_initialized = FALSE; #endif } static void SGML_abort(HTStream *context, HTError e) { int i; HTElement *cur; /* * Abort the target. - FM */ (*context->actions->_abort) (context->target, e); /* * Free the buffers. - FM */ FREE(context->recover); FREE(context->include); FREE(context->active_include); FREE(context->url); FREE(context->csi); /* * Free stack memory if any elements were left open. - KW */ while (context->element_stack) { cur = context->element_stack; context->element_stack = cur->next; /* Remove from stack */ pool_free(cur); } /* * Free the strings and context structure. - FM */ HTChunkFree(context->string); for (i = 0; i < MAX_ATTRIBUTES; i++) FREE_extra(context->value[i]); FREE(context); #ifdef USE_PRETTYSRC sgml_in_psrc_was_initialized = FALSE; #endif } /* Read and write user callback handle * ----------------------------------- * * The callbacks from the SGML parser have an SGML context parameter. * These calls allow the caller to associate his own context with a * particular SGML context. */ #ifdef CALLERDATA void *SGML_callerData(HTStream *context) { return context->callerData; } void SGML_setCallerData(HTStream *context, void *data) { context->callerData = data; } #endif /* CALLERDATA */ #ifdef USE_PRETTYSRC static void transform_tag(HTStream *context, HTChunk *string) { if (!context->strict_xml) { if (tagname_transform != 1) { if (tagname_transform == 0) LYLowerCase(string->data); else LYUpperCase(string->data); } } } #endif /* USE_PRETTYSRC */ static BOOL ignore_when_empty(HTTag * tag) { BOOL result = FALSE; if (!LYPreparsedSource && LYxhtml_parsing && tag->name != 0 && !(tag->flags & Tgf_mafse) && tag->contents != SGML_EMPTY && tag->tagclass != Tgc_Plike && (tag->tagclass == Tgc_SELECTlike || (tag->contains && tag->icontains))) { result = TRUE; } CTRACE((tfp, "SGML Do%s ignore_when_empty:%s\n", result ? "" : " not", NonNull(tag->name))); return result; } static void discard_empty(HTStream *context) { static HTTag empty_tag; CTRACE((tfp, "SGML discarding empty %s\n", NonNull(context->current_tag->name))); CTRACE_FLUSH(tfp); memset(&empty_tag, 0, sizeof(empty_tag)); context->current_tag = &empty_tag; context->string->size = 0; /* do not call end_element() if start_element() was not called */ } #ifdef USE_PRETTYSRC static BOOL end_if_prettysrc(HTStream *context, HTChunk *string, int end_ch) { BOOL result = psrc_view; if (psrc_view) { if (attr_is_name) { HTStartAnchor(context->target, string->data, NULL); (*context->actions->end_element) (context->target, HTML_A, &context->include); } else if (attr_is_href) { PSRCSTART(href); HTStartAnchor(context->target, NULL, string->data); } PUTS_TR(string->data); if (attr_is_href) { (*context->actions->end_element) (context->target, HTML_A, &context->include); PSRCSTOP(href); } if (end_ch) PUTC(end_ch); PSRCSTOP(attrval); } return result; } #endif static void SGML_character(HTStream *context, char c_in) { const SGML_dtd *dtd = context->dtd; HTChunk *string = context->string; const char *EntityName; HTTag *testtag = NULL; BOOLEAN chk; /* Helps (?) walk through all the else ifs... */ UCode_t clong, uck = 0; /* Enough bits for UCS4 ... */ int testlast; #ifdef CJK_EX unsigned char c; #else char c; #endif char saved_char_in = '\0'; ++sgml_offset; /* * Now some fun with the preprocessor. Use copies for c and unsign_c == * clong, so that we can revert back to the unchanged c_in. - KW */ #define unsign_c clong c = c_in; clong = UCH(c); /* a.k.a. unsign_c */ if (context->T.decode_utf8) { /* * Combine UTF-8 into Unicode. Incomplete characters silently ignored. * From Linux kernel's console.c. - KW */ if (TOASCII(UCH(c)) > 127) { /* S/390 -- gil -- 0710 */ /* * We have an octet from a multibyte character. - FM */ if (context->utf_count > 0 && (TOASCII(c) & 0xc0) == 0x80) { context->utf_char = (context->utf_char << 6) | (TOASCII(c) & 0x3f); context->utf_count--; *(context->utf_buf_p) = c; (context->utf_buf_p)++; if (context->utf_count == 0) { /* * We have all of the bytes, so terminate the buffer and * set 'clong' to the UCode_t value. - FM */ *(context->utf_buf_p) = '\0'; clong = context->utf_char; if (clong < 256) { c = ((char) (clong & 0xff)); } goto top1; } else { /* * Wait for more. - KW */ return; } } else { /* * Start handling a new multibyte character. - FM */ context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = c; (context->utf_buf_p)++; if ((c & 0xe0) == 0xc0) { context->utf_count = 1; context->utf_char = (c & 0x1f); } else if ((c & 0xf0) == 0xe0) { context->utf_count = 2; context->utf_char = (c & 0x0f); } else if ((c & 0xf8) == 0xf0) { context->utf_count = 3; context->utf_char = (c & 0x07); } else if ((c & 0xfc) == 0xf8) { context->utf_count = 4; context->utf_char = (c & 0x03); } else if ((c & 0xfe) == 0xfc) { context->utf_count = 5; context->utf_char = (c & 0x01); } else { /* * Garbage. - KW */ context->utf_count = 0; context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; } /* * Wait for more. - KW */ return; } } else { /* * Got an ASCII char. - KW */ context->utf_count = 0; context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; /* goto top; */ } } /* end of context->T.decode_utf8 S/390 -- gil -- 0726 */ #ifdef NOTDEFINED /* * If we have a koi8-r input and do not have koi8-r as the output, save the * raw input in saved_char_in before we potentially convert it to Unicode. * - FM */ if (context->T.strip_raw_char_in) saved_char_in = c; #endif /* NOTDEFINED */ /* * If we want the raw input converted to Unicode, try that now. - FM */ if (context->T.trans_to_uni && #ifdef EXP_JAPANESEUTF8_SUPPORT ((strcmp(LYCharSet_UC[context->inUCLYhndl].MIMEname, "euc-jp") == 0) || (strcmp(LYCharSet_UC[context->inUCLYhndl].MIMEname, "shift_jis") == 0))) { if (strcmp(LYCharSet_UC[context->inUCLYhndl].MIMEname, "shift_jis") == 0) { if (context->utf_count == 0) { if (IS_SJIS_HI1((unsigned char) c) || IS_SJIS_HI2((unsigned char) c)) { context->utf_buf[0] = c; context->utf_count = 1; clong = -11; } } else { if (IS_SJIS_LO((unsigned char) c)) { context->utf_buf[1] = c; clong = UCTransJPToUni(context->utf_buf, 2, context->inUCLYhndl); } context->utf_count = 0; } } else { if (context->utf_count == 0) { if (IS_EUC_HI((unsigned char) c)) { context->utf_buf[0] = c; context->utf_count = 1; clong = -11; } } else { if (IS_EUC_LOX((unsigned char) c)) { context->utf_buf[1] = c; clong = UCTransJPToUni(context->utf_buf, 2, context->inUCLYhndl); } context->utf_count = 0; } } goto top1; } else if (context->T.trans_to_uni && #endif ((TOASCII(unsign_c) >= LYlowest_eightbit[context->inUCLYhndl]) || /* S/390 -- gil -- 0744 */ (unsign_c < ' ' && unsign_c != 0 && context->T.trans_C0_to_uni))) { /* * Convert the octet to Unicode. - FM */ clong = UCTransToUni(c, context->inUCLYhndl); if (clong > 0) { saved_char_in = c; if (clong < 256) { c = FROMASCII((char) clong); } } goto top1; } else if (unsign_c < ' ' && unsign_c != 0 && /* S/390 -- gil -- 0768 */ context->T.trans_C0_to_uni) { /* * This else if may be too ugly to keep. - KW */ if (context->T.trans_from_uni && (((clong = UCTransToUni(c, context->inUCLYhndl)) >= ' ') || (context->T.transp && (clong = UCTransToUni(c, context->inUCLYhndl)) > 0))) { saved_char_in = c; if (clong < 256) { c = FROMASCII((char) clong); } goto top1; } else { uck = -1; if (context->T.transp) { uck = UCTransCharStr(replace_buf, 60, c, context->inUCLYhndl, context->inUCLYhndl, NO); } if (!context->T.transp || uck < 0) { uck = UCTransCharStr(replace_buf, 60, c, context->inUCLYhndl, context->outUCLYhndl, YES); } if (uck == 0) { return; } else if (uck < 0) { goto top0a; } c = replace_buf[0]; if (c && replace_buf[1]) { if (context->state == S_text) { PUTS(replace_buf); return; } StrAllocCat(context->recover, replace_buf + 1); } goto top0a; } /* Next line end of ugly stuff for C0. - KW */ } else { /* end of context->T.trans_to_uni S/390 -- gil -- 0791 */ goto top0a; } /* * At this point we have either unsign_c a.k.a. clong in Unicode (and c in * latin1 if clong is in the latin1 range), or unsign_c and c will have to * be passed raw. - KW */ /* * We jump up to here from below if we have * stuff in the recover, insert, or csi buffers * to process. We zero saved_char_in, in effect * as a flag that the octet is not that of the * actual call to this function. This may be OK * for now, for the stuff this function adds to * its recover buffer, but it might not be for * stuff other functions added to the insert or * csi buffer, so bear that in mind. - FM * Stuff from the recover buffer is now handled * as UTF-8 if we can expect that's what it is, * and in that case we don't come back up here. - kw */ top: saved_char_in = '\0'; /* * We jump to here from above when we don't have * UTF-8 input, haven't converted to Unicode, and * want clong set to the input octet (unsigned) * without zeroing its saved_char_in copy (which * is signed). - FM */ top0a: *(context->utf_buf) = '\0'; clong = UCH(c); /* * We jump to here from above if we have converted * the input, or a multibyte sequence across calls, * to a Unicode value and loaded it into clong (to * which unsign_c has been defined), and from below * when we are recycling a character (e.g., because * it terminated an entity but is not the standard * semi-colon). The character will already have * been put through the Unicode conversions. - FM */ top1: /* * Ignore low ISO 646 7-bit control characters if HTCJK is not set. - FM */ /* * Works for both ASCII and EBCDIC. -- gil * S/390 -- gil -- 0811 */ if (TOASCII(unsign_c) < 32 && c != '\t' && c != '\n' && c != '\r' && !IS_CJK_TTY) goto after_switch; /* * Ignore 127 if we don't have HTPassHighCtrlRaw or HTCJK set. - FM */ #define PASSHICTRL (context->T.transp || \ unsign_c >= LYlowest_eightbit[context->inUCLYhndl]) if (TOASCII(c) == 127 && /* S/390 -- gil -- 0830 */ !(PASSHICTRL || IS_CJK_TTY)) goto after_switch; /* * Ignore 8-bit control characters 128 - 159 if neither HTPassHighCtrlRaw * nor HTCJK is set. - FM */ if (TOASCII(unsign_c) > 127 && TOASCII(unsign_c) < 160 && /* S/390 -- gil -- 0847 */ !(PASSHICTRL || IS_CJK_TTY)) goto after_switch; /* Almost all CJK characters are double byte but only Japanese * JIS X0201 Kana is single byte. To prevent to fail SGML parsing * we have to take care of them here. -- TH */ if ((HTCJK == JAPANESE) && (context->state == S_in_kanji) && !IS_JAPANESE_2BYTE(context->kanji_buf, UCH(c)) #ifdef EXP_JAPANESEUTF8_SUPPORT && !context->T.decode_utf8 #endif ) { #ifdef CONV_JISX0201KANA_JISX0208KANA if (IS_SJIS_X0201KANA(context->kanji_buf)) { unsigned char sjis_hi, sjis_lo; JISx0201TO0208_SJIS(context->kanji_buf, &sjis_hi, &sjis_lo); PUTC(sjis_hi); PUTC(sjis_lo); } else #endif PUTC(context->kanji_buf); context->state = S_text; } /* * Handle character based on context->state. */ CTRACE2(TRACE_SGML, (tfp, "SGML before %s|%.*s|%c|\n", state_name(context->state), string->size, NonNull(string->data), UCH(c))); switch (context->state) { case S_in_kanji: /* * Note that if we don't have a CJK input, then this is not the second * byte of a CJK di-byte, and we're trashing the input. That's why * 8-bit characters followed by, for example, '<' can cause the tag to * be treated as text, not markup. We could try to deal with it by * holding each first byte and then checking byte pairs, but that * doesn't seem worth the overhead (see below). - FM */ context->state = S_text; PUTC(context->kanji_buf); PUTC(c); break; case S_tagname_slash: /* * We had something link "slashedtag as a flag; except if we get '>' directly * after the "slashedtag, in which case keep state as is and let code * below deal with it. - kw */ if (!(c == '>' && context->slashedtag && TOASCII(unsign_c) < 127)) { context->state = S_text; } /* fall through in any case! */ case S_text: if (IS_CJK_TTY && ((TOASCII(c) & 0200) != 0) #ifdef EXP_JAPANESEUTF8_SUPPORT && !context->T.decode_utf8 #endif ) { /* S/390 -- gil -- 0864 */ /* * Setting up for Kanji multibyte handling (based on Takuya ASADA's * (asada@three-a.co.jp) CJK Lynx). Note that if the input is not * in fact CJK, the next byte also will be mishandled, as explained * above. Toggle raw mode off in such cases, or select the "7 bit * approximations" display character set, which is largely * equivalent to having raw mode off with CJK. - FM */ context->state = S_in_kanji; context->kanji_buf = c; break; } else if (IS_CJK_TTY && TOASCII(c) == '\033') { /* S/390 -- gil -- 0881 */ /* * Setting up for CJK escape sequence handling (based on Takuya * ASADA's (asada@three-a.co.jp) CJK Lynx). - FM */ context->state = S_esc; PUTC(c); break; } if (c == '&' || c == '<') { #ifdef USE_PRETTYSRC if (psrc_view) { /*there is nothing useful in the element_stack */ testtag = context->current_tag; } else #endif { testtag = context->element_stack ? context->element_stack->tag : NULL; } } if (c == '&' && TOASCII(unsign_c) < 127 && /* S/390 -- gil -- 0898 */ (!testtag || (testtag->contents == SGML_MIXED || testtag->contents == SGML_ELEMENT || testtag->contents == SGML_PCDATA || #ifdef USE_PRETTYSRC testtag->contents == SGML_EMPTY || #endif testtag->contents == SGML_RCDATA))) { /* * Setting up for possible entity, without the leading '&'. - FM */ string->size = 0; context->state = S_ero; } else if (c == '<' && TOASCII(unsign_c) < 127) { /* S/390 -- gil -- 0915 */ /* * Setting up for possible tag. - FM */ string->size = 0; if (testtag && testtag->contents == SGML_PCDATA) { context->state = S_pcdata; } else if (testtag && (testtag->contents == SGML_LITTERAL || testtag->contents == SGML_CDATA)) { context->state = S_litteral; } else if (testtag && (testtag->contents == SGML_SCRIPT)) { context->state = S_script; } else { context->state = S_tag; } context->slashedtag = NULL; } else if (context->slashedtag && context->slashedtag->name && (c == '/' || (c == '>' && context->state == S_tagname_slash)) && TOASCII(unsign_c) < 127) { /* * We got either the second slash of a pending "' of a mere "". In both * cases generate a "" end tag in the recover buffer for * reparsing unless NAME is really an empty element. - kw */ #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); PUTC(c); PSRCSTOP(abracket); } else #endif if (context->slashedtag != context->unknown_tag && !ReallyEmptyTag(context->slashedtag)) { if (context->recover == NULL) { StrAllocCopy(context->recover, "recover_index = 0; } else { StrAllocCat(context->recover, "recover, context->slashedtag->name); StrAllocCat(context->recover, ">"); } context->slashedtag = NULL; } else if (context->element_stack && (context->element_stack->tag->flags & Tgf_frecyc)) { /* * The element stack says we are within the contents of an element * that the next stage (HTML.c) may want to feed us back again (via * the *include string). So try to output text in UTF-8 if * possible, using the same logic as for attribute values (which * should be in line with what context->current_tag_charset * indicates). - kw */ if (context->T.decode_utf8 && *context->utf_buf) { PUTS(context->utf_buf); context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; } else if (!IS_CJK_TTY && (context->T.output_utf8 || context->T.trans_from_uni)) { if (LYIsASCII(clong)) { PUTC(c); } else if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw && UCH(saved_char_in) >= LYlowest_eightbit[context->outUCLYhndl]) { PUTUTF8((0xf000 | UCH(saved_char_in))); } else { PUTUTF8(clong); } } else if (saved_char_in && context->T.use_raw_char_in) { PUTC(saved_char_in); } else { PUTC(c); } #define PASS8859SPECL context->T.pass_160_173_raw /* * Convert 160 (nbsp) to Lynx special character if neither * HTPassHighCtrlRaw nor HTCJK is set. - FM */ } else if (unsign_c == CH_NBSP && /* S/390 -- gil -- 0932 */ !context->no_lynx_specialcodes && !(PASS8859SPECL || IS_CJK_TTY)) { PUTC(HT_NON_BREAK_SPACE); /* * Convert 173 (shy) to Lynx special character if neither * HTPassHighCtrlRaw nor HTCJK is set. - FM */ } else if (unsign_c == CH_SHY && /* S/390 -- gil -- 0949 */ !context->no_lynx_specialcodes && !(PASS8859SPECL || IS_CJK_TTY)) { PUTC(LY_SOFT_HYPHEN); /* * Handle the case in which we think we have a character which * doesn't need further processing (e.g., a koi8-r input for a * koi8-r output). - FM */ } else if (context->T.use_raw_char_in && saved_char_in) { /* * Only if the original character is still in saved_char_in, * otherwise we may be iterating from a goto top. - KW */ PUTC(saved_char_in); saved_char_in = '\0'; /****************************************************************** * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET ******************************************************************/ } else if ((chk = (BOOL) (context->T.trans_from_uni && TOASCII(unsign_c) >= 160)) && /* S/390 -- gil -- 0968 */ (uck = UCTransUniChar(unsign_c, context->outUCLYhndl)) >= ' ' && uck < 256) { CTRACE((tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n", uck, FROMASCII((char) uck))); /* * We got one octet from the conversions, so use it. - FM */ PUTC(FROMASCII((char) uck)); } else if ((chk && (uck == -4 || (context->T.repl_translated_C0 && uck > 0 && uck < 32))) && /* * Not found; look for replacement string. - KW */ (uck = UCTransUniCharStr(replace_buf, 60, clong, context->outUCLYhndl, 0) >= 0)) { /* * Got a replacement string. No further tests for validity - * assume that whoever defined replacement strings knew what she * was doing. - KW */ PUTS(replace_buf); /* * If we're displaying UTF-8, try that now. - FM */ } else if (context->T.output_utf8 && PUTUTF8(clong)) { ; /* do nothing more */ /* * If it's any other (> 160) 8-bit character, and we have not set * HTPassEightBitRaw nor HTCJK, nor have the "ISO Latin 1" * character set selected, back translate for our character set. - * FM */ #define IncludesLatin1Enc \ (context->outUCLYhndl == LATIN1 || \ (context->outUCI && \ (context->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) #define PASSHI8BIT (HTPassEightBitRaw || \ (context->T.do_8bitraw && !context->T.trans_from_uni)) } else if (unsign_c > 160 && unsign_c < 256 && !(PASSHI8BIT || IS_CJK_TTY) && !IncludesLatin1Enc) { #ifdef USE_PRETTYSRC int psrc_view_backup = 0; #endif string->size = 0; EntityName = HTMLGetEntityName((int) (unsign_c - 160)); HTChunkPuts(string, EntityName); HTChunkTerminate(string); #ifdef USE_PRETTYSRC /* we need to disable it temporarily */ if (psrc_view) { psrc_view_backup = 1; psrc_view = 0; } #endif handle_entity(context, '\0'); #ifdef USE_PRETTYSRC /* we need to disable it temporarily */ if (psrc_view_backup) psrc_view = TRUE; #endif string->size = 0; if (!FoundEntity) PUTC(';'); /* * If we get to here and have an ASCII char, pass the character. - * KW */ } else if (TOASCII(unsign_c) < 127 && unsign_c > 0) { /* S/390 -- gil -- 0987 */ PUTC(c); /* * If we get to here, and should have translated, translation has * failed so far. - KW * * We should have sent UTF-8 output to the parser already, but what * the heck, try again. - FM */ } else if (context->T.output_utf8 && *context->utf_buf) { PUTS(context->utf_buf); context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; #ifdef NOTDEFINED /* * Check for a strippable koi8-r 8-bit character. - FM */ } else if (context->T.strip_raw_char_in && saved_char_in && (UCH(saved_char_in) >= 0xc0) && (UCH(saved_char_in) < 255)) { /* * KOI8 special: strip high bit, gives (somewhat) readable ASCII * or KOI7 - it was constructed that way! - KW */ PUTC(((char) (saved_char_in & 0x7f))); saved_char_in = '\0'; #endif /* NOTDEFINED */ /* * If we don't actually want the character, make it safe and output * that now. - FM */ } else if (TOASCII(UCH(c)) < /* S/390 -- gil -- 0997 */ LYlowest_eightbit[context->outUCLYhndl] || (context->T.trans_from_uni && !HTPassEightBitRaw)) { /* * If we get to here, pass the character. - FM */ } else { PUTC(c); } break; /* * Found '<' in SGML_PCDATA content; treat this mode nearly like * S_litteral, but recognize 'size && TOASCII(unsign_c) < 127) { /* first after '<' */ if (c == '!') { /* state = S_exclamation; context->lead_exclamation = TRUE; context->doctype_bracket = FALSE; context->first_bracket = FALSE; HTChunkPutc(string, c); break; } else if (c == '?') { /* ' - kw */ CTRACE((tfp, "SGML: Found PI in PCDATA, junking it until '>'\n")); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); PUTS("state = S_pi; break; } } goto case_S_litteral; /* * Found '<' in SGML_SCRIPT content; treat this mode nearly like * S_litteral, but recognize 'size && TOASCII(unsign_c) < 127) { /* first after '<' */ if (c == '!') { /* state = S_exclamation; context->lead_exclamation = TRUE; context->doctype_bracket = FALSE; context->first_bracket = FALSE; HTChunkPutc(string, c); break; } } goto case_S_litteral; /* * In litteral mode, waits only for specific end tag (for compatibility * with old servers, and for Lynx). - FM */ case_S_litteral: case S_litteral: /*PSRC:this case not understood completely by HV, not done */ HTChunkPutc(string, c); #ifdef USE_PRETTYSRC if (psrc_view) { /* there is nothing useful in the element_stack */ testtag = context->current_tag; } else #endif testtag = (context->element_stack ? context->element_stack->tag : NULL); if (testtag == NULL || testtag->name == NULL) { string->size--; context->state = S_text; goto top1; } /* * Normally when we get the closing ">", * testtag contains something like "TITLE" * string contains something like "/title>" * so we decrement by 2 to compare the final character of each. */ testlast = string->size - 2 - context->trailing_spaces - context->leading_spaces; if (TOUPPER(c) != ((testlast < 0) ? '/' : testtag->name[testlast])) { int i; /* * If complete match, end litteral. */ if ((c == '>') && testlast >= 0 && !testtag->name[testlast]) { #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); PUTS("data, context->current_tag->name); transform_tag(context, string); PUTS(string->data); PSRCSTOP(tag); PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); context->current_tag = NULL; } else #endif end_element(context, context->element_stack->tag); string->size = 0; context->current_attribute_number = INVALID; context->state = S_text; context->leading_spaces = 0; context->trailing_spaces = 0; break; } /* * Allow whitespace between the "<" or ">" and the keyword, for * error-recovery. */ if (isspace(UCH(c))) { if (testlast == -1) { context->leading_spaces += 1; CTRACE2(TRACE_SGML, (tfp, "leading spaces: %d\n", context->leading_spaces)); break; } else if (testlast > 0) { context->trailing_spaces += 1; CTRACE2(TRACE_SGML, (tfp, "trailing spaces: %d\n", context->trailing_spaces)); break; } } /* * Mismatch - recover. */ context->leading_spaces = 0; context->trailing_spaces = 0; if (((testtag->contents != SGML_LITTERAL && (testtag->flags & Tgf_strict)) || (context->state == S_pcdata && (testtag->flags & (Tgf_strict | Tgf_endO)))) && (testlast > -1 && (c == '>' || testlast > 0 || IsNmStart(c)))) { context->state = S_end; string->size--; for (i = 0; i < string->size; i++) /* remove '/' */ string->data[i] = string->data[i + 1]; if ((string->size == 1) ? IsNmStart(c) : IsNmChar(c)) break; string->size--; goto top1; } if (context->state == S_pcdata && (testtag->flags & (Tgf_strict | Tgf_endO)) && (testlast < 0 && IsNmStart(c))) { context->state = S_tag; break; } /* * If Mismatch: recover string literally. */ PUTC('<'); for (i = 0; i < string->size - 1; i++) /* recover, except last c */ PUTC(string->data[i]); string->size = 0; context->state = S_text; goto top1; /* to recover last c */ } break; /* * Character reference (numeric entity) or named entity. */ case S_ero: if (c == '#') { /* * Setting up for possible numeric entity. */ context->state = S_cro; /* &# is Char Ref Open */ break; } context->state = S_entity; /* Fall through! */ /* * Handle possible named entity. */ case S_entity: if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1029 */ isalnum(UCH(c)) : isalpha(UCH(c)))) { /* Should probably use IsNmStart/IsNmChar above (is that right?), but the world is not ready for that - there's  : (note colon!) and stuff around. */ /* * Accept valid ASCII character. - FM */ HTChunkPutc(string, c); } else if (string->size == 0) { /* * It was an ampersand that's just text, so output the ampersand * and recycle this character. - FM */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); #endif PUTC('&'); #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTOP(badseq); #endif context->state = S_text; goto top1; } else { /* * Terminate entity name and try to handle it. - FM */ HTChunkTerminate(string); #ifdef USE_PRETTYSRC entity_string = string->data; #endif /* S/390 -- gil -- 1039 */ /* CTRACE((tfp, "%s: %d: %s\n", __FILE__, __LINE__, string->data)); */ if (!strcmp(string->data, "zwnj") && (!context->element_stack || (context->element_stack->tag && context->element_stack->tag->contents == SGML_MIXED))) { /* * Handle zwnj (8204) as . - FM */ char temp[8]; CTRACE((tfp, "SGML_character: Handling 'zwnj' entity as 'WBR' element.\n")); if (c != ';') { sprintf(temp, "%c", c); } else { sprintf(temp, ""); } if (context->recover == NULL) { StrAllocCopy(context->recover, temp); context->recover_index = 0; } else { StrAllocCat(context->recover, temp); } string->size = 0; context->state = S_text; break; } else { handle_entity(context, '\0'); } string->size = 0; context->state = S_text; /* * Don't eat the terminator if we didn't find the entity name and * therefore sent the raw string via handle_entity(), or if the * terminator is not the "standard" semi-colon for HTML. - FM */ #ifdef USE_PRETTYSRC if (psrc_view && FoundEntity && c == ';') { PSRCSTART(entity); PUTC(c); PSRCSTOP(entity); } #endif if (!FoundEntity || c != ';') goto top1; } break; /* * Check for a numeric entity. */ case S_cro: if (TOASCII(unsign_c) < 127 && TOLOWER(UCH(c)) == 'x') { /* S/390 -- gil -- 1060 */ context->isHex = TRUE; context->state = S_incro; } else if (TOASCII(unsign_c) < 127 && isdigit(UCH(c))) { /* * Accept only valid ASCII digits. - FM */ HTChunkPutc(string, c); /* accumulate a character NUMBER */ context->isHex = FALSE; context->state = S_incro; } else if (string->size == 0) { /* * No 'x' or digit following the "&#" so recover them and recycle * the character. - FM */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); #endif PUTC('&'); PUTC('#'); #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTOP(badseq); #endif context->state = S_text; goto top1; } break; /* * Handle a numeric entity. */ case S_incro: /* S/390 -- gil -- 1075 */ if ((TOASCII(unsign_c) < 127) && (context->isHex ? isxdigit(UCH(c)) : isdigit(UCH(c)))) { /* * Accept only valid hex or ASCII digits. - FM */ HTChunkPutc(string, c); /* accumulate a character NUMBER */ } else if (string->size == 0) { /* * No hex digit following the "&#x" so recover them and recycle the * character. - FM */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); #endif PUTS("&#x"); #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTOP(badseq); #endif context->isHex = FALSE; context->state = S_text; goto top1; } else { /* * Terminate the numeric entity and try to handle it. - FM */ unsigned long code; int i; HTChunkTerminate(string); #ifdef USE_PRETTYSRC entity_string = string->data; #endif if ((context->isHex ? sscanf(string->data, "%lx", &code) : sscanf(string->data, "%lu", &code)) == 1) { /* =============== work in ASCII below here =============== S/390 -- gil -- 1092 */ if ((code == 1) || (code > 127 && code < 156)) { /* * Assume these are Microsoft code points, inflicted on us * by FrontPage. - FM * * MS FrontPage uses syntax like ™ in 128-159 range * and doesn't follow Unicode standards for this area. * Windows-1252 codepoints are assumed here. */ switch (code) { case 1: /* * WHITE SMILING FACE */ code = 0x263a; break; case 128: /* * EURO currency sign */ code = 0x20ac; break; case 130: /* * SINGLE LOW-9 QUOTATION MARK (sbquo) */ code = 0x201a; break; case 132: /* * DOUBLE LOW-9 QUOTATION MARK (bdquo) */ code = 0x201e; break; case 133: /* * HORIZONTAL ELLIPSIS (hellip) */ code = 0x2026; break; case 134: /* * DAGGER (dagger) */ code = 0x2020; break; case 135: /* * DOUBLE DAGGER (Dagger) */ code = 0x2021; break; case 137: /* * PER MILLE SIGN (permil) */ code = 0x2030; break; case 139: /* * SINGLE LEFT-POINTING ANGLE QUOTATION MARK (lsaquo) */ code = 0x2039; break; case 145: /* * LEFT SINGLE QUOTATION MARK (lsquo) */ code = 0x2018; break; case 146: /* * RIGHT SINGLE QUOTATION MARK (rsquo) */ code = 0x2019; break; case 147: /* * LEFT DOUBLE QUOTATION MARK (ldquo) */ code = 0x201c; break; case 148: /* * RIGHT DOUBLE QUOTATION MARK (rdquo) */ code = 0x201d; break; case 149: /* * BULLET (bull) */ code = 0x2022; break; case 150: /* * EN DASH (ndash) */ code = 0x2013; break; case 151: /* * EM DASH (mdash) */ code = 0x2014; break; case 152: /* * SMALL TILDE (tilde) */ code = 0x02dc; break; case 153: /* * TRADE MARK SIGN (trade) */ code = 0x2122; break; case 155: /* * SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (rsaquo) */ code = 0x203a; break; default: /* * Do not attempt a conversion to valid Unicode values. */ break; } } /* * Check for special values. - FM */ if ((code == 8204) && (!context->element_stack || (context->element_stack->tag && context->element_stack->tag->contents == SGML_MIXED))) { /* * Handle zwnj (8204) as . - FM */ char temp[8]; CTRACE((tfp, "SGML_character: Handling '8204' (zwnj) reference as 'WBR' element.\n")); /* * Include the terminator if it is not the standard * semi-colon. - FM */ if (c != ';') { sprintf(temp, "%c", c); } else { sprintf(temp, ""); } /* * Add the replacement string to the recover buffer for * processing. - FM */ if (context->recover == NULL) { StrAllocCopy(context->recover, temp); context->recover_index = 0; } else { StrAllocCat(context->recover, temp); } string->size = 0; context->isHex = FALSE; context->state = S_text; break; } else if (put_special_unicodes(context, code)) { /* * We handled the value as a special character, so recycle * the terminator or break. - FM */ #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(entity); PUTS((context->isHex ? "&#x" : "&#")); PUTS(entity_string); if (c == ';') PUTC(';'); PSRCSTOP(entity); } #endif string->size = 0; context->isHex = FALSE; context->state = S_text; if (c != ';') goto top1; break; } /* * Seek a translation from the chartrans tables. */ if ((uck = UCTransUniChar(code, context->outUCLYhndl)) >= 32 && uck < 256 && (uck < 127 || uck >= LYlowest_eightbit[context->outUCLYhndl])) { #ifdef USE_PRETTYSRC if (!psrc_view) { #endif PUTC(FROMASCII((char) uck)); #ifdef USE_PRETTYSRC } else { put_pretty_number(context); } #endif } else if ((uck == -4 || (context->T.repl_translated_C0 && uck > 0 && uck < 32)) && /* * Not found; look for replacement string. */ (uck = UCTransUniCharStr(replace_buf, 60, code, context->outUCLYhndl, 0) >= 0)) { #ifdef USE_PRETTYSRC if (psrc_view) { put_pretty_number(context); } else #endif PUTS(replace_buf); /* * If we're displaying UTF-8, try that now. - FM */ } else if (context->T.output_utf8 && PUTUTF8(code)) { ; /* do nothing more */ /* * Ignore 8205 (zwj), 8206 (lrm), and 8207 (rln), if we get * to here. - FM */ } else if (code == 8205 || code == 8206 || code == 8207) { if (TRACE) { string->size--; LYstrncpy(replace_buf, string->data, (string->size < 64 ? string->size : 63)); fprintf(tfp, "SGML_character: Ignoring '%s%s'.\n", (context->isHex ? "&#x" : "&#"), replace_buf); } #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(badseq); PUTS((context->isHex ? "&#x" : "&#")); PUTS(entity_string); if (c == ';') PUTC(';'); PSRCSTOP(badseq); } #endif string->size = 0; context->isHex = FALSE; context->state = S_text; if (c != ';') goto top1; break; /* * Show the numeric entity if we get to here and the value: * (1) Is greater than 255 (but use ASCII characters for * spaces or dashes). * (2) Is less than 32, and not valid or we don't have * HTCJK set. * (3) Is 127 and we don't have HTPassHighCtrlRaw or HTCJK * set. * (4) Is 128 - 159 and we don't have HTPassHighCtrlNum * set. * - FM */ } else if ((code > 255) || (code < ' ' && /* S/390 -- gil -- 1140 */ code != '\t' && code != '\n' && code != '\r' && !IS_CJK_TTY) || (TOASCII(code) == 127 && !(HTPassHighCtrlRaw || IS_CJK_TTY)) || (TOASCII(code) > 127 && code < 160 && !HTPassHighCtrlNum)) { /* * Unhandled or illegal value. Recover the "&#" or "&#x" * and digit(s), and recycle the terminator. - FM */ #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(badseq); } #endif if (context->isHex) { PUTS("&#x"); context->isHex = FALSE; } else { PUTS("&#"); } string->size--; for (i = 0; i < string->size; i++) /* recover */ PUTC(string->data[i]); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTOP(badseq); } #endif string->size = 0; context->isHex = FALSE; context->state = S_text; goto top1; } else if (TOASCII(code) < 161 || /* S/390 -- gil -- 1162 */ HTPassEightBitNum || IncludesLatin1Enc) { /* * No conversion needed. - FM */ #ifdef USE_PRETTYSRC if (psrc_view) { put_pretty_number(context); } else #endif PUTC(FROMASCII((char) code)); } else { /* * Handle as named entity. - FM */ code -= 160; EntityName = HTMLGetEntityName(code); if (EntityName && EntityName[0] != '\0') { string->size = 0; HTChunkPuts(string, EntityName); HTChunkTerminate(string); handle_entity(context, '\0'); /* * Add a semi-colon if something went wrong and * handle_entity() sent the string. - FM */ if (!FoundEntity) { PUTC(';'); } } else { /* * Our conversion failed, so recover the "&#" and * digit(s), and recycle the terminator. - FM */ #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); #endif if (context->isHex) { PUTS("&#x"); context->isHex = FALSE; } else { PUTS("&#"); } string->size--; for (i = 0; i < string->size; i++) /* recover */ PUTC(string->data[i]); #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTOP(badseq); #endif string->size = 0; context->isHex = FALSE; context->state = S_text; goto top1; } } /* * If we get to here, we succeeded. Hoorah!!! - FM */ string->size = 0; context->isHex = FALSE; context->state = S_text; /* * Don't eat the terminator if it's not the "standard" * semi-colon for HTML. - FM */ if (c != ';') { goto top1; } } else { /* * Not an entity, and don't know why not, so add the terminator * to the string, output the "&#" or "&#x", and process the * string via the recover element. - FM */ string->size--; HTChunkPutc(string, c); HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); #endif if (context->isHex) { PUTS("&#x"); context->isHex = FALSE; } else { PUTS("&#"); } #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTOP(badseq); #endif if (context->recover == NULL) { StrAllocCopy(context->recover, string->data); context->recover_index = 0; } else { StrAllocCat(context->recover, string->data); } string->size = 0; context->isHex = FALSE; context->state = S_text; break; } } break; /* * Tag */ case S_tag: /* new tag */ if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1179 */ IsNmChar(c) : IsNmStart(c))) { /* * Add valid ASCII character. - FM */ HTChunkPutc(string, c); } else if (c == '!' && !string->size) { /* state = S_exclamation; context->lead_exclamation = TRUE; context->doctype_bracket = FALSE; context->first_bracket = FALSE; HTChunkPutc(string, c); break; } else if (!string->size && (TOASCII(unsign_c) <= 160 && /* S/390 -- gil -- 1196 */ (c != '/' && c != '?' && c != '_' && c != ':'))) { /* * '<' must be followed by an ASCII letter to be a valid start tag. * Here it isn't, nor do we have a '/' for an end tag, nor one of * some other characters with a special meaning for SGML or which * are likely to be legal Name Start characters in XML or some * other extension. So recover the '<' and following character as * data. - FM & KW */ context->state = S_text; #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(badseq); #endif PUTC('<'); #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTOP(badseq); #endif goto top1; } else { /* End of tag name */ /* * Try to handle tag. - FM */ HTTag *t; if (c == '/') { if (string->size == 0) { context->state = S_end; break; } CTRACE((tfp, "SGML: `<%.*s/' found!\n", string->size, string->data)); } HTChunkTerminate(string); t = SGMLFindTag(dtd, string->data); if (t == context->unknown_tag && ((c == ':' && string->size == 4 && 0 == strcasecomp(string->data, "URL")) || (string->size > 4 && 0 == strncasecomp(string->data, "URL:", 4)))) { /* * Treat data); /* recover */ PUTC(c); #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTOP(badseq); #endif CTRACE((tfp, "SGML: Treating <%s%c as text\n", string->data, c)); string->size = 0; context->state = S_text; break; } if (c == '/' && t) { /* * Element name was ended by '/'. Remember the tag that ended * thusly, we'll interpret this as either an indication of an * empty element (if '>' follows directly) or do some * SGMLshortref-ish treatment. - kw */ context->slashedtag = t; } if (!t) { if (c == '?' && string->size <= 1) { CTRACE((tfp, "SGML: Found PI, looking for '>'\n")); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); PUTS("size = 0; context->state = S_pi; HTChunkPutc(string, c); break; } CTRACE((tfp, "SGML: *** Invalid element %s\n", string->data)); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); PUTC('<'); PSRCSTOP(abracket); PSRCSTART(badtag); transform_tag(context, string); PUTS(string->data); if (c == '>') { PSRCSTOP(badtag); PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); } else { PUTC(c); } } #endif context->state = (c == '>') ? S_text : S_junk_tag; break; } else if (t == context->unknown_tag) { CTRACE((tfp, "SGML: *** Unknown element %s\n", string->data)); /* * Fall through and treat like valid tag for attribute parsing. * - KW */ } context->current_tag = t; #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); PUTC('<'); PSRCSTOP(abracket); if (t != context->unknown_tag) PSRCSTART(tag); else PSRCSTART(badtag); transform_tag(context, string); PUTS(string->data); if (t != context->unknown_tag) PSRCSTOP(tag); else PSRCSTOP(badtag); } if (!psrc_view) /*don't waste time */ #endif { /* * Clear out attributes. */ memset((void *) context->present, 0, sizeof(BOOL) * context->current_tag->number_of_attributes); } string->size = 0; context->current_attribute_number = INVALID; #ifdef USE_PRETTYSRC if (psrc_view) { if (c == '>' || c == '<' || (c == '/' && context->slashedtag)) { if (c != '<') { PSRCSTART(abracket); PUTC(c); PSRCSTOP(abracket); context->state = (c == '>') ? S_text : S_tagname_slash; } else { context->state = S_tag; } } else { if (!WHITE(c)) PUTC(c); context->state = S_tag_gap; } } else #endif if (c == '>' || c == '<' || (c == '/' && context->slashedtag)) { if (context->current_tag->name) start_element(context); context->state = (c == '>') ? S_text : (c == '<') ? S_tag : S_tagname_slash; } else { context->state = S_tag_gap; } } break; case S_exclamation: if (context->lead_exclamation && c == '-') { /* * Set up for possible comment. - FM */ context->lead_exclamation = FALSE; context->first_dash = TRUE; HTChunkPutc(string, c); break; } if (context->lead_exclamation && c == '[') { /* * Set up for possible marked section. - FM */ context->lead_exclamation = FALSE; context->first_bracket = TRUE; context->second_bracket = FALSE; HTChunkPutc(string, c); context->state = S_marked; break; } if (context->first_dash && c == '-') { /* * Set up to handle comment. - FM */ context->lead_exclamation = FALSE; context->first_dash = FALSE; context->end_comment = FALSE; HTChunkPutc(string, c); context->state = S_comment; break; } context->lead_exclamation = FALSE; context->first_dash = FALSE; if (c == '>') { /* * Try to handle identifier. - FM */ HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(sgmlspecial); PUTC('<'); PUTS(string->data); PUTC('>'); PSRCSTOP(sgmlspecial); } else #endif handle_identifier(context); string->size = 0; context->state = S_text; break; } if (WHITE(c)) { if (string->size == 8 && !strncasecomp(string->data, "!DOCTYPE", 8)) { /* * Set up for DOCTYPE declaration. - FM */ HTChunkPutc(string, c); context->doctype_bracket = FALSE; context->state = S_doctype; break; } if (string->size == 7 && !strncasecomp(string->data, "!ENTITY", 7)) { /* * Set up for ENTITY declaration. - FM */ HTChunkPutc(string, c); context->first_dash = FALSE; context->end_comment = TRUE; context->state = S_sgmlent; break; } if (string->size == 8 && !strncasecomp(string->data, "!ELEMENT", 8)) { /* * Set up for ELEMENT declaration. - FM */ HTChunkPutc(string, c); context->first_dash = FALSE; context->end_comment = TRUE; context->state = S_sgmlele; break; } if (string->size == 8 && !strncasecomp(string->data, "!ATTLIST", 8)) { /* * Set up for ATTLIST declaration. - FM */ HTChunkPutc(string, c); context->first_dash = FALSE; context->end_comment = TRUE; context->state = S_sgmlatt; break; } } HTChunkPutc(string, c); break; case S_comment: /* Expecting comment. - FM */ if (historical_comments) { /* * Any '>' terminates. - FM */ if (c == '>') { HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(comm); PUTC('<'); PUTS_TR(string->data); PUTC('>'); PSRCSTOP(comm); } else #endif handle_comment(context); string->size = 0; context->end_comment = FALSE; context->first_dash = FALSE; context->state = S_text; break; } goto S_comment_put_c; } if (!context->first_dash && c == '-') { HTChunkPutc(string, c); context->first_dash = TRUE; break; } if (context->first_dash && c == '-') { HTChunkPutc(string, c); context->first_dash = FALSE; if (!context->end_comment) context->end_comment = TRUE; else if (!minimal_comments) /* * Validly treat '--' pairs as successive comments (for * minimal, any "--WHITE>" terminates). - FM */ context->end_comment = FALSE; break; } if (context->end_comment && c == '>') { /* * Terminate and handle the comment. - FM */ HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(comm); PUTC('<'); PUTS_TR(string->data); PUTC('>'); PSRCSTOP(comm); } else #endif handle_comment(context); string->size = 0; context->end_comment = FALSE; context->first_dash = FALSE; context->state = S_text; break; } context->first_dash = FALSE; if (context->end_comment && !isspace(UCH(c))) context->end_comment = FALSE; S_comment_put_c: if (context->T.decode_utf8 && *context->utf_buf) { HTChunkPuts(string, context->utf_buf); context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; } else if (!IS_CJK_TTY && (context->T.output_utf8 || context->T.trans_from_uni)) { if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw && UCH(saved_char_in) >= LYlowest_eightbit[context->outUCLYhndl]) { HTChunkPutUtf8Char(string, (0xf000 | UCH(saved_char_in))); } else { HTChunkPutUtf8Char(string, clong); } } else if (saved_char_in && context->T.use_raw_char_in) { HTChunkPutc(string, saved_char_in); } else { HTChunkPutc(string, c); } break; case S_doctype: /* Expecting DOCTYPE. - FM */ if (context->doctype_bracket) { HTChunkPutc(string, c); if (c == ']') context->doctype_bracket = FALSE; break; } if (c == '[' && WHITE(string->data[string->size - 1])) { HTChunkPutc(string, c); context->doctype_bracket = TRUE; break; } if (c == '>') { HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(sgmlspecial); PUTC('<'); PUTS(string->data); PUTC('>'); PSRCSTOP(sgmlspecial); } else #endif handle_doctype(context); string->size = 0; context->state = S_text; break; } HTChunkPutc(string, c); break; case S_marked: /* Expecting marked section. - FM */ if (context->first_bracket && c == '[') { HTChunkPutc(string, c); context->first_bracket = FALSE; context->second_bracket = TRUE; break; } if (context->second_bracket && c == ']' && string->data[string->size - 1] == ']') { HTChunkPutc(string, c); context->second_bracket = FALSE; break; } if (!context->second_bracket && c == '>') { HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(sgmlspecial); PUTC('<'); PUTS(string->data); PUTC('>'); PSRCSTOP(sgmlspecial); } else #endif handle_marked(context); string->size = 0; context->state = S_text; break; } HTChunkPutc(string, c); break; case S_sgmlent: /* Expecting ENTITY. - FM */ if (!context->first_dash && c == '-') { HTChunkPutc(string, c); context->first_dash = TRUE; break; } if (context->first_dash && c == '-') { HTChunkPutc(string, c); context->first_dash = FALSE; if (!context->end_comment) context->end_comment = TRUE; else context->end_comment = FALSE; break; } if (context->end_comment && c == '>') { HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(sgmlspecial); PUTC('<'); PUTS(string->data); PUTC('>'); PSRCSTOP(sgmlspecial); } else #endif handle_sgmlent(context); string->size = 0; context->end_comment = FALSE; context->first_dash = FALSE; context->state = S_text; break; } context->first_dash = FALSE; HTChunkPutc(string, c); break; case S_sgmlele: /* Expecting ELEMENT. - FM */ if (!context->first_dash && c == '-') { HTChunkPutc(string, c); context->first_dash = TRUE; break; } if (context->first_dash && c == '-') { HTChunkPutc(string, c); context->first_dash = FALSE; if (!context->end_comment) context->end_comment = TRUE; else context->end_comment = FALSE; break; } if (context->end_comment && c == '>') { HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(sgmlspecial); PUTC('<'); PUTS(string->data); PUTC('>'); PSRCSTOP(sgmlspecial); } else #endif handle_sgmlele(context); string->size = 0; context->end_comment = FALSE; context->first_dash = FALSE; context->state = S_text; break; } context->first_dash = FALSE; HTChunkPutc(string, c); break; case S_sgmlatt: /* Expecting ATTLIST. - FM */ if (!context->first_dash && c == '-') { HTChunkPutc(string, c); context->first_dash = TRUE; break; } if (context->first_dash && c == '-') { HTChunkPutc(string, c); context->first_dash = FALSE; if (!context->end_comment) context->end_comment = TRUE; else context->end_comment = FALSE; break; } if (context->end_comment && c == '>') { HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(sgmlspecial); PUTC('<'); PUTS(string->data); PUTC('>'); PSRCSTOP(sgmlspecial); } else #endif handle_sgmlatt(context); string->size = 0; context->end_comment = FALSE; context->first_dash = FALSE; context->state = S_text; break; } context->first_dash = FALSE; HTChunkPutc(string, c); break; case S_tag_gap: /* Expecting attribute or '>' */ if (WHITE(c)) { /* PUTC(c); - no, done as special case */ break; /* Gap between attributes */ } if (c == '>') { /* End of tag */ #ifdef USE_PRETTYSRC if (!psrc_view) #endif if (context->current_tag->name) start_element(context); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); } #endif context->state = S_text; break; } HTChunkPutc(string, c); context->state = S_attr; /* Get attribute */ break; /* accumulating value */ case S_attr: if (WHITE(c) || (c == '>') || (c == '=')) { /* End of word */ if ((c == '>') && (string->size == 1) && (string->data[0] == '/')) { if (context->extended_html && ignore_when_empty(context->current_tag)) { discard_empty(context); } } else { HTChunkTerminate(string); handle_attribute_name(context, string->data); } #ifdef USE_PRETTYSRC if (!psrc_view) { #endif string->size = 0; if (c == '>') { /* End of tag */ if (context->current_tag->name) start_element(context); context->state = S_text; break; } #ifdef USE_PRETTYSRC } else { PUTC(' '); if (context->current_attribute_number == INVALID) PSRCSTART(badattr); else PSRCSTART(attrib); if (attrname_transform != 1) { if (attrname_transform == 0) LYLowerCase(string->data); else LYUpperCase(string->data); } PUTS(string->data); if (c == '=' || WHITE(c)) PUTC(c); if (c == '=' || c == '>' || WHITE(c)) { if (context->current_attribute_number == INVALID) PSRCSTOP(badattr); else PSRCSTOP(attrib); } if (c == '>') { PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); context->state = S_text; break; } string->size = 0; } #endif context->state = (c == '=' ? S_equals : S_attr_gap); } else { HTChunkPutc(string, c); } break; case S_attr_gap: /* Expecting attribute or '=' or '>' */ if (WHITE(c)) { PUTC(c); break; /* Gap after attribute */ } if (c == '>') { /* End of tag */ #ifdef USE_PRETTYSRC if (psrc_view) { if (context->current_attribute_number == INVALID) { PSRCSTOP(badattr); } else { PSRCSTOP(attrib); } PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); } else #endif if (context->current_tag->name) start_element(context); context->state = S_text; break; } else if (c == '=') { #ifdef USE_PRETTYSRC if (psrc_view) { PUTC('='); if (context->current_attribute_number == INVALID) { PSRCSTOP(badattr); } else { PSRCSTOP(attrib); } } #endif context->state = S_equals; break; } HTChunkPutc(string, c); context->state = S_attr; /* Get next attribute */ break; case S_equals: /* After attr = */ if (WHITE(c)) { PUTC(c); break; /* Before attribute value */ } if (c == '>') { /* End of tag */ CTRACE((tfp, "SGML: found = but no value\n")); #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); } else #endif if (context->current_tag->name) start_element(context); context->state = S_text; break; } else if (c == '\'') { #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(attrval); PUTC(c); } #endif context->state = S_squoted; break; } else if (c == '"') { #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(attrval); PUTC(c); } #endif context->state = S_dquoted; break; } #ifdef USE_PRETTYSRC if (psrc_view) PSRCSTART(attrval); #endif context->state = S_value; /* no break! fall through to S_value and process current `c` */ case S_value: if (WHITE(c) || (c == '>')) { /* End of word */ HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (!end_if_prettysrc(context, string, 0)) #endif { #ifdef CJK_EX /* Quick hack. - JH7AYN */ if (IS_CJK_TTY) { if (string->data[0] == '$') { if (string->data[1] == 'B' || string->data[1] == '@') { char *jis_buf = 0; HTSprintf0(&jis_buf, "\033%s", string->data); TO_EUC((const unsigned char *) jis_buf, (unsigned char *) string->data); FREE(jis_buf); } } } #endif handle_attribute_value(context, string->data); } string->size = 0; if (c == '>') { /* End of tag */ #ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); } else #endif if (context->current_tag->name) start_element(context); context->state = S_text; break; } else context->state = S_tag_gap; } else if (context->T.decode_utf8 && *context->utf_buf) { HTChunkPuts(string, context->utf_buf); context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; } else if (!IS_CJK_TTY && (context->T.output_utf8 || context->T.trans_from_uni)) { if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw && UCH(saved_char_in) >= LYlowest_eightbit[context->outUCLYhndl]) { HTChunkPutUtf8Char(string, (0xf000 | UCH(saved_char_in))); } else { HTChunkPutUtf8Char(string, clong); } } else if (saved_char_in && context->T.use_raw_char_in) { HTChunkPutc(string, saved_char_in); } else { HTChunkPutc(string, c); } break; case S_squoted: /* Quoted attribute value */ if (c == '\'') { /* End of attribute value */ HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (!end_if_prettysrc(context, string, '\'')) #endif handle_attribute_value(context, string->data); string->size = 0; context->state = S_tag_gap; } else if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1213 */ /* * Setting up for possible single quotes in CJK escape sequences. * - Takuya ASADA (asada@three-a.co.jp) */ context->state = S_esc_sq; HTChunkPutc(string, c); } else if (context->T.decode_utf8 && *context->utf_buf) { HTChunkPuts(string, context->utf_buf); context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; } else if (!IS_CJK_TTY && (context->T.output_utf8 || context->T.trans_from_uni)) { if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw && UCH(saved_char_in) >= LYlowest_eightbit[context->outUCLYhndl]) { HTChunkPutUtf8Char(string, (0xf000 | UCH(saved_char_in))); } else { HTChunkPutUtf8Char(string, clong); } } else if (saved_char_in && context->T.use_raw_char_in) { HTChunkPutc(string, saved_char_in); } else { HTChunkPutc(string, c); } break; case S_dquoted: /* Quoted attribute value */ if (c == '"' || /* Valid end of attribute value */ (soft_dquotes && /* If emulating old Netscape bug, treat '>' */ c == '>')) { /* as a co-terminator of dquoted and tag */ HTChunkTerminate(string); #ifdef USE_PRETTYSRC if (!end_if_prettysrc(context, string, c)) #endif handle_attribute_value(context, string->data); string->size = 0; context->state = S_tag_gap; if (c == '>') /* We emulaan class="nt">a>, <a href="ranger.gui.curses_shortcuts.html#CursesShortcuts">ranger.gui.curses_shortcuts.CursesShortcuts</a>) </font></dt><dd> <dl> <dt><font face="helvetica, arial"><a href="ranger.gui.displayable.html#DisplayableContainer">DisplayableContainer</a> </font></dt></dl> </dd> </dl> </dd> <dt><font face="helvetica, arial"><a href="ranger.shared.html#FileManagerAware">ranger.shared.FileManagerAware</a>(<a href="ranger.shared.html#Awareness">ranger.shared.Awareness</a>) </font></dt><dd> <dl> <dt><font face="helvetica, arial"><a href="ranger.gui.displayable.html#Displayable">Displayable</a>(<a href="ranger.shared.html#EnvironmentAware">ranger.shared.EnvironmentAware</a>, <a href="ranger.shared.html#FileManagerAware">ranger.shared.FileManagerAware</a>, <a href="ranger.gui.curses_shortcuts.html#CursesShortcuts">ranger.gui.curses_shortcuts.CursesShortcuts</a>) </font></dt><dd> <dl> <dt><font face="helvetica, arial"><a href="ranger.gui.displayable.html#DisplayableContainer">DisplayableContainer</a> </font></dt></dl> </dd> </dl> </dd> </dl> <p> <table width="100%" cellspacing=0 cellpadding=2 border=0 summary="section"> <tr bgcolor="#ffc8d8"> <td colspan=3 valign=bottom>&nbsp;<br> <font color="#000000" face="helvetica, arial"><a name="Displayable">class <strong>Displayable</strong></a>(<a href="ranger.shared.html#EnvironmentAware">ranger.shared.EnvironmentAware</a>, <a href="ranger.shared.html#FileManagerAware">ranger.shared.FileManagerAware</a>, <a href="ranger.gui.curses_shortcuts.html#CursesShortcuts">ranger.gui.curses_shortcuts.CursesShortcuts</a>)</font></td></tr> <tr bgcolor="#ffc8d8"><td rowspan=2><tt>&nbsp;&nbsp;&nbsp;</tt></td> <td colspan=2><tt>Displayables&nbsp;are&nbsp;objects&nbsp;which&nbsp;are&nbsp;displayed&nbsp;on&nbsp;the&nbsp;screen.<br> &nbsp;<br> This&nbsp;is&nbsp;just&nbsp;the&nbsp;abstract&nbsp;class,&nbsp;defining&nbsp;basic&nbsp;operations<br> such&nbsp;as&nbsp;resizing,&nbsp;printing,&nbsp;changing&nbsp;colors.<br> Subclasses&nbsp;of&nbsp;displayable&nbsp;can&nbsp;extend&nbsp;these&nbsp;methods:<br> &nbsp;<br> <a href="#Displayable-draw">draw</a>()&nbsp;--&nbsp;draw&nbsp;the&nbsp;object.&nbsp;Is&nbsp;only&nbsp;called&nbsp;if&nbsp;visible.<br> <a href="#Displayable-poke">poke</a>()&nbsp;--&nbsp;is&nbsp;called&nbsp;just&nbsp;before&nbsp;<a href="#Displayable-draw">draw</a>(),&nbsp;even&nbsp;if&nbsp;not&nbsp;visible.<br> <a href="#Displayable-finalize">finalize</a>()&nbsp;--&nbsp;called&nbsp;after&nbsp;all&nbsp;objects&nbsp;finished&nbsp;drawing.<br> <a href="#Displayable-click">click</a>(event)&nbsp;--&nbsp;called&nbsp;with&nbsp;a&nbsp;MouseEvent.&nbsp;This&nbsp;is&nbsp;called&nbsp;on&nbsp;all<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;visible&nbsp;objects&nbsp;under&nbsp;the&nbsp;mouse,&nbsp;until&nbsp;one&nbsp;returns&nbsp;True.<br> <a href="#Displayable-press">press</a>(key)&nbsp;--&nbsp;called&nbsp;after&nbsp;a&nbsp;key&nbsp;press&nbsp;on&nbsp;focused&nbsp;objects.<br> <a href="#Displayable-destroy">destroy</a>()&nbsp;--&nbsp;called&nbsp;before&nbsp;destroying&nbsp;the&nbsp;displayable&nbsp;object<br> &nbsp;<br> Additionally,&nbsp;there&nbsp;are&nbsp;these&nbsp;methods:<br> &nbsp;<br> <a href="#Displayable-__contains__">__contains__</a>(item)&nbsp;--&nbsp;is&nbsp;the&nbsp;item&nbsp;(y,&nbsp;x)&nbsp;inside&nbsp;the&nbsp;widget?<br> &nbsp;<br> These&nbsp;attributes&nbsp;are&nbsp;set:<br> &nbsp;<br> Modifiable:<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;focused&nbsp;--&nbsp;Focused&nbsp;objects&nbsp;receive&nbsp;<a href="#Displayable-press">press</a>()&nbsp;calls.<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;visible&nbsp;--&nbsp;Visible&nbsp;objects&nbsp;receive&nbsp;<a href="#Displayable-draw">draw</a>()&nbsp;and&nbsp;<a href="#Displayable-finalize">finalize</a>()&nbsp;calls<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;need_redraw&nbsp;--&nbsp;Should&nbsp;the&nbsp;widget&nbsp;be&nbsp;redrawn?&nbsp;This&nbsp;variable&nbsp;may<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;be&nbsp;set&nbsp;at&nbsp;various&nbsp;places&nbsp;in&nbsp;the&nbsp;script&nbsp;and&nbsp;should&nbsp;eventually&nbsp;be<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;handled&nbsp;(and&nbsp;unset)&nbsp;in&nbsp;the&nbsp;<a href="#Displayable-draw">draw</a>()&nbsp;method.<br> &nbsp;<br> Read-Only:&nbsp;(i.e.&nbsp;reccomended&nbsp;not&nbsp;to&nbsp;change&nbsp;manually)<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;win&nbsp;--&nbsp;the&nbsp;own&nbsp;curses&nbsp;window&nbsp;object<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;parent&nbsp;--&nbsp;the&nbsp;parent&nbsp;(<a href="#DisplayableContainer">DisplayableContainer</a>)&nbsp;object&nbsp;or&nbsp;None<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;x,&nbsp;y,&nbsp;wid,&nbsp;hei&nbsp;--&nbsp;absolute&nbsp;coordinates&nbsp;and&nbsp;boundaries<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;settings,&nbsp;fm,&nbsp;env&nbsp;--&nbsp;inherited&nbsp;shared&nbsp;variables<br>&nbsp;</tt></td></tr> <tr><td>&nbsp;</td> <td width="100%"><dl><dt>Method resolution order:</dt> <dd><a href="ranger.gui.displayable.html#Displayable">Displayable</a></dd> <dd><a href="ranger.shared.html#EnvironmentAware">ranger.shared.EnvironmentAware</a></dd> <dd><a href="ranger.shared.html#FileManagerAware">ranger.shared.FileManagerAware</a></dd> <dd><a href="ranger.shared.html#Awareness">ranger.shared.Awareness</a></dd> <dd><a href="ranger.gui.curses_shortcuts.html#CursesShortcuts">ranger.gui.curses_shortcuts.CursesShortcuts</a></dd> <dd><a href="ranger.shared.settings.html#SettingsAware"&g