/* * $LynxId: HTML.c,v 1.155 2012/02/10 18:36:39 tom Exp $ * * Structured stream to Rich hypertext converter * ============================================ * * This generates a hypertext object. It converts from the * structured stream interface of HTML events into the style- * oriented interface of the HText.h interface. This module is * only used in clients and should not be linked into servers. * * Override this module if making a new GUI browser. * * Being Overidden * */ #define HTSTREAM_INTERNAL 1 #include #define Lynx_HTML_Handler #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VMS #include #endif /* VMS */ #ifdef USE_PRETTYSRC #include #endif #ifdef USE_COLOR_STYLE #include #include #include #include #undef SELECTED_STYLES #define pHText_changeStyle(X,Y,Z) {} #if OMIT_SCN_KEEPING # define HCODE_TO_STACK_OFF(x) /*(CSHASHSIZE+1)*/ 88888 /*special value. */ #else # define HCODE_TO_STACK_OFF(x) x /*pass computed value */ #endif #endif /* USE_COLOR_STYLE */ #ifdef USE_SOURCE_CACHE #include #endif #include #include #include #include #define STACKLEVEL(me) ((me->stack + MAX_NESTING - 1) - me->sp) #define DFT_TEXTAREA_COLS 60 #define DFT_TEXTAREA_ROWS 4 #define MAX_TEXTAREA_COLS LYcolLimit #define MAX_TEXTAREA_ROWS (3 * LYlines) #define LimitValue(name, value) \ if (name > value) { \ CTRACE((tfp, "Limited " #name " to %d, was %d\n", \ value, name)); \ name = value; \ } struct _HTStream { const HTStreamClass *isa; #ifdef USE_SOURCE_CACHE HTParentAnchor *anchor; FILE *fp; char *filename; HTChunk *chunk; HTChunk *last_chunk; /* the last chunk in a chain! */ const HTStreamClass *actions; HTStream *target; int status; #else /* .... */ #endif }; static HTStyleSheet *styleSheet = NULL; /* Application-wide */ /* Module-wide style cache */ static HTStyle *styles[HTML_ELEMENTS + LYNX_HTML_EXTRA_ELEMENTS]; /* adding 24 nested list styles */ /* and 3 header alignment styles */ /* and 3 div alignment styles */ static HTStyle *default_style = NULL; const char *LYToolbarName = "LynxPseudoToolbar"; /* used to turn off a style if the HTML author forgot to static int i_prior_style = -1; */ /* * Private function.... */ static int HTML_end_element(HTStructured * me, int element_number, char **include); static int HTML_start_element(HTStructured * me, int element_number, const BOOL *present, STRING2PTR value, int tag_charset, char **include); /* * If we have verbose_img set, display labels for images. */ #define VERBOSE_IMG(value,src_type,string) \ ((verbose_img) ? (newtitle = MakeNewTitle(value,src_type)): string) static char *MakeNewTitle(STRING2PTR value, int src_type); static char *MakeNewImageValue(STRING2PTR value); static char *MakeNewMapValue(STRING2PTR value, const char *mapstr); /* Set an internal flag that the next call to a stack-affecting method * is only internal and the stack manipulation should be skipped. - kw */ #define SET_SKIP_STACK(el_num) if (HTML_dtd.tags[el_num].contents != SGML_EMPTY) \ { me->skip_stack++; } void strtolower(char *i) { if (!i) return; while (*i) { *i = (char) TOLOWER(*i); i++; } } /* Flattening the style structure * ------------------------------ * * On the NeXT, and on any read-only browser, it is simpler for the text to * have a sequence of styles, rather than a nested tree of styles. In this * case we have to flatten the structure as it arrives from SGML tags into a * sequence of styles. */ /* * If style really needs to be set, call this. */ void actually_set_style(HTStructured * me) { if (!me->text) { /* First time through */ LYGetChartransInfo(me); UCSetTransParams(&me->T, me->UCLYhndl, me->UCI, HTAnchor_getUCLYhndl(me->node_anchor, UCT_STAGE_HTEXT), HTAnchor_getUCInfoStage(me->node_anchor, UCT_STAGE_HTEXT)); me->text = HText_new2(me->node_anchor, me->target); HText_beginAppend(me->text); HText_setStyle(me->text, me->new_style); me->in_word = NO; LYCheckForContentBase(me); } else { HText_setStyle(me->text, me->new_style); } me->old_style = me->new_style; me->style_change = NO; } /* * If you THINK you need to change style, call this. */ static void change_paragraph_style(HTStructured * me, HTStyle *style) { if (me->new_style != style) { me->style_change = YES; me->new_style = style; } me->in_word = NO; } /* * Return true if we should write a message (to LYNXMESSAGES, or the trace * file) telling about some bad HTML that we've found. */ BOOL LYBadHTML(HTStructured * me) { BOOL code = FALSE; switch ((enumBadHtml) cfg_bad_html) { case BAD_HTML_IGNORE: break; case BAD_HTML_TRACE: code = TRUE; break; case BAD_HTML_MESSAGE: code = TRUE; break; case BAD_HTML_WARN: /* * If we're already tracing, do not add a warning. */ if (!TRACE && !me->inBadHTML) { HTUserMsg(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; } code = TRACE; break; } return code; } /* * Handle the formatted message. */ void LYShowBadHTML(const char *message) { switch ((enumBadHtml) cfg_bad_html) { case BAD_HTML_IGNORE: break; case BAD_HTML_TRACE: CTRACE((tfp, "%s", message)); break; case BAD_HTML_MESSAGE: CTRACE((tfp, "%s", message)); LYstore_message(message); break; case BAD_HTML_WARN: CTRACE((tfp, "%s", message)); break; } } /*_________________________________________________________________________ * * A C T I O N R O U T I N E S */ /* FIXME: this should be amended to do the substitution only when not in a * multibyte stream. */ #ifdef EXP_JAPANESE_SPACES #define FIX_JAPANESE_SPACES \ (HTCJK == CHINESE || HTCJK == JAPANESE || HTCJK == TAIPEI) /* don't replace '\n' with ' ' if Chinese or Japanese - HN */ #else #define FIX_JAPANESE_SPACES 0 #endif /* Character handling * ------------------ */ void HTML_put_character(HTStructured * me, int c) { unsigned uc = UCH(c); /* * Ignore all non-MAP content when just scanning a document for MAPs. - FM */ if (LYMapsOnly && me->sp[0].tag_number != HTML_OBJECT) return; c = (int) uc; /* * Do EOL conversion if needed. - FM * * Convert EOL styles: * macintosh: cr --> lf * ascii: cr-lf --> lf * unix: lf --> lf */ if ((me->lastraw == '\r') && c == '\n') { me->lastraw = -1; return; } me->lastraw = c; if (c == '\r') { c = '\n'; uc = UCH(c); } /* * Handle SGML_LITTERAL tags that have HTChunk elements. - FM */ switch (me->sp[0].tag_number) { case HTML_COMMENT: return; /* Do Nothing */ case HTML_TITLE: if (c == LY_SOFT_HYPHEN) return; if (c != '\n' && c != '\t' && c != '\r') { HTChunkPutc(&me->title, uc); } else if (FIX_JAPANESE_SPACES) { if (c == '\t') { HTChunkPutc(&me->title, ' '); } else { return; } } else { HTChunkPutc(&me->title, ' '); } return; case HTML_STYLE: HTChunkPutc(&me->style_block, uc); return; case HTML_SCRIPT: HTChunkPutc(&me->script, uc); return; case HTML_OBJECT: HTChunkPutc(&me->object, uc); return; case HTML_TEXTAREA: HTChunkPutc(&me->textarea, uc); return; case HTML_SELECT: case HTML_OPTION: HTChunkPutc(&me->option, uc); return; case HTML_MATH: HTChunkPutc(&me->math, uc); return; default: if (me->inSELECT) { /* * If we are within a SELECT not caught by the cases above - * HTML_SELECT or HTML_OPTION may not be the last element pushed on * the style stack if there were invalid markup tags within a * SELECT element. For error recovery, treat text as part of the * OPTION text, it is probably meant to show up as user-visible * text. Having A as an open element while in SELECT is really * sick, don't make anchor text part of the option text in that * case since the option text will probably just be discarded. - * kw */ if (me->sp[0].tag_number == HTML_A) break; HTChunkPutc(&me->option, uc); return; } break; } /* end first switch */ /* * Handle all other tag content. - FM */ switch (me->sp[0].tag_number) { case HTML_PRE: /* Formatted text */ /* * We guarantee that the style is up-to-date in begin_litteral. But we * still want to strip \r's. */ if (c != '\r' && !(c == '\n' && me->inLABEL && !me->inP) && !(c == '\n' && !me->inPRE)) { me->inP = TRUE; me->inLABEL = FALSE; HText_appendCharacter(me->text, c); } me->inPRE = TRUE; break; case HTML_LISTING: /* Literal text */ case HTML_XMP: case HTML_PLAINTEXT: /* * We guarantee that the style is up-to-date in begin_litteral. But we * still want to strip \r's. */ if (c != '\r') { me->inP = TRUE; me->inLABEL = FALSE; HText_appendCharacter(me->text, c); } break; default: /* * Free format text. */ if (me->sp->style->id == ST_Preformatted) { if (c != '\r' && !(c == '\n' && me->inLABEL && !me->inP) && !(c == '\n' && !me->inPRE)) { me->inP = TRUE; me->inLABEL = FALSE; HText_appendCharacter(me->text, c); } me->inPRE = TRUE; } else if (me->sp->style->id == ST_Listing || me->sp->style->id == ST_Example) { if (c != '\r') { me->inP = TRUE; me->inLABEL = FALSE; HText_appendCharacter(me->text, c); } } else { if (me->style_change) { if ((c == '\n') || (c == ' ')) return; /* Ignore it */ UPDATE_STYLE; } if (c == '\n') { if (!FIX_JAPANESE_SPACES) { if (me->in_word) { if (HText_getLastChar(me->text) != ' ') { me->inP = TRUE; me->inLABEL = FALSE; HText_appendCharacter(me->text, ' '); } me->in_word = NO; } } } else if (c == ' ' || c == '\t') { if (HText_getLastChar(me->text) != ' ') { me->inP = TRUE; me->inLABEL = FALSE; HText_appendCharacter(me->text, ' '); } } else if (c == '\r') { /* ignore */ } else { me->inP = TRUE; me->inLABEL = FALSE; HText_appendCharacter(me->text, c); me->in_word = YES; } } } /* end second switch */ if (c == '\n' || c == '\t') { HText_setLastChar(me->text, ' '); /* set it to a generic separator */ /* * \r's are ignored. In order to keep collapsing spaces correctly we * must default back to the previous separator if there was one. */ } else if (c == '\r' && HText_getLastChar(me->text) == ' ') { HText_setLastChar(me->text, ' '); /* set it to a generic separator */ } else { HText_setLastChar(me->text, c); } } /* String handling * --------------- * * This is written separately from put_character because the loop can * in some cases be promoted to a higher function call level for speed. */ void HTML_put_string(HTStructured * me, const char *s) { #ifdef USE_PRETTYSRC char *translated_string = NULL; #endif if (s == NULL || (LYMapsOnly && me->sp[0].tag_number != HTML_OBJECT)) return; #ifdef USE_PRETTYSRC if (psrc_convert_string) { StrAllocCopy(translated_string, s); TRANSLATE_AND_UNESCAPE_ENTITIES(&translated_string, TRUE, FALSE); s = (const char *) translated_string; } #endif switch (me->sp[0].tag_number) { case HTML_COMMENT: break; /* Do Nothing */ case HTML_TITLE: HTChunkPuts(&me->title, s); break; case HTML_STYLE: HTChunkPuts(&me->style_block, s); break; case HTML_SCRIPT: HTChunkPuts(&me->script, s); break; case HTML_PRE: /* Formatted text */ case HTML_LISTING: /* Literal text */ case HTML_XMP: case HTML_PLAINTEXT: /* * We guarantee that the style is up-to-date in begin_litteral */ HText_appendText(me->text, s); break; case HTML_OBJECT: HTChunkPuts(&me->object, s); break; case HTML_TEXTAREA: HTChunkPuts(&me->textarea, s); break; case HTML_SELECT: case HTML_OPTION: HTChunkPuts(&me->option, s); break; case HTML_MATH: HTChunkPuts(&me->math, s); break; default: /* Free format text? */ if (!me->sp->style->freeFormat) { /* * If we are within a preformatted text style not caught by the * cases above (HTML_PRE or similar may not be the last element * pushed on the style stack). - kw */ #ifdef USE_PRETTYSRC if (psrc_view) { /* * We do this so that a raw '\r' in the string will not be * interpreted as an internal request to break a line - passing * '\r' to HText_appendText is treated by it as a request to * insert a blank line - VH */ for (; *s; ++s) HTML_put_character(me, *s); } else #endif HText_appendText(me->text, s); break; } else { const char *p = s; char c; if (me->style_change) { for (; *p && ((*p == '\n') || (*p == '\r') || (*p == ' ') || (*p == '\t')); p++) ; /* Ignore leaders */ if (!*p) break; UPDATE_STYLE; } for (; *p; p++) { if (*p == 13 && p[1] != 10) { /* * Treat any '\r' which is not followed by '\n' as '\n', to * account for macintosh lineend in ALT attributes etc. - * kw */ c = '\n'; } else { c = *p; } if (me->style_change) { if ((c == '\n') || (c == ' ') || (c == '\t')) continue; /* Ignore it */ UPDATE_STYLE; } if (c == '\n') { if (!FIX_JAPANESE_SPACES) { if (me->in_word) { if (HText_getLastChar(me->text) != ' ') HText_appendCharacter(me->text, ' '); me->in_word = NO; } } } else if (c == ' ' || c == '\t') { if (HText_getLastChar(me->text) != ' ') HText_appendCharacter(me->text, ' '); } else if (c == '\r') { /* ignore */ } else { HText_appendCharacter(me->text, c); me->in_word = YES; } /* set the Last Character */ if (c == '\n' || c == '\t') { /* set it to a generic separator */ HText_setLastChar(me->text, ' '); } else if (c == '\r' && HText_getLastChar(me->text) == ' ') { /* * \r's are ignored. In order to keep collapsing spaces * correctly, we must default back to the previous * separator, if there was one. So we set LastChar to a * generic separator. */ HText_setLastChar(me->text, ' '); } else { HText_setLastChar(me->text, c); } } /* for */ } } /* end switch */ #ifdef USE_PRETTYSRC if (psrc_convert_string) { psrc_convert_string = FALSE; FREE(translated_string); } #endif } /* Buffer write * ------------ */ void HTML_write(HTStructured * me, const char *s, int l) { const char *p; const char *e = s + l; if (LYMapsOnly && me->sp[0].tag_number != HTML_OBJECT) return; for (p = s; p < e; p++) HTML_put_character(me, *p); } /* * "Internal links" are hyperlinks whose source and destination are * within the same document, and for which the destination is given * as a URL Reference with an empty URL, but possibly with a non-empty * #fragment. (This terminology re URL-Reference vs. URL follows the * Fielding URL syntax and semantics drafts). * Differences: * (1) The document's base (in whatever way it is given) is not used for * resolving internal link references. * (2) Activating an internal link should not result in a new retrieval * of a copy of the document. * (3) Internal links are the only way to refer with a hyperlink to a document * (or a location in it) which is only known as the result of a POST * request (doesn't have a URL from which the document can be retrieved * with GET), and can only be used from within that document. * * *If DONT_TRACK_INTERNAL_LINKS is not defined, we keep track of whether a * link destination was given as an internal link. This information is * recorded in the type of the link between anchor objects, and is available * to the HText object and the mainloop from there. URL References to * internal destinations are still resolved into an absolute form before * being passed on, but using the current stream's retrieval address instead * of the base URL. * Examples: (replace [...] to have a valid absolute URL) * In document retrieved from [...]/mypath/mydoc.htm w/ base [...]/otherpath/ * a. HREF="[...]/mypath/mydoc.htm" -> [...]/mypath/mydoc.htm * b. HREF="[...]/mypath/mydoc.htm#frag" -> [...]/mypath/mydoc.htm#frag * c. HREF="mydoc.htm" -> [...]/otherpath/mydoc.htm * d. HREF="mydoc.htm#frag" -> [...]/otherpath/mydoc.htm#frag * e. HREF="" -> [...]/mypath/mydoc.htm (marked internal) * f. HREF="#frag" -> [...]/mypath/mydoc.htm#frag (marked internal) * * *If DONT_TRACK_INTERNAL_LINKS is defined, URL-less URL-References are * resolved differently from URL-References with a non-empty URL (using the * current stream's retrieval address instead of the base), but we make no * further distinction. Resolution is then as in the examples above, execept * that there is no "(marked internal)". * * *Note that this doesn't apply to form ACTIONs (always resolved using base, * never marked internal). Also other references encountered or generated * are not marked internal, whether they have a URL or not, if in a given * context an internal link makes no sense (e.g., IMG SRC=). */ /* A flag is used to keep track of whether an "URL reference" encountered had a real "URL" or not. In the latter case, it will be marked as "internal". The flag is set before we start messing around with the string (resolution of relative URLs etc.). This variable only used locally here, don't confuse with LYinternal_flag which is for overriding non-caching similar to LYoverride_no_cache. - kw */ #define CHECK_FOR_INTERN(flag,s) \ flag = (BOOLEAN) ((s && (*s=='#' || *s=='\0')) ? TRUE : FALSE) /* Last argument to pass to HTAnchor_findChildAndLink() calls, just an abbreviation. - kw */ #define INTERN_LT (HTLinkType *)(intern_flag ? HTInternalLink : NULL) #ifdef USE_COLOR_STYLE static char *Style_className = 0; static char *Style_className_end = 0; static size_t Style_className_len = 0; static int hcode; #ifdef LY_FIND_LEAKS static void free_Style_className(void) { FREE(Style_className); } #endif static void addClassName(const char *prefix, const char *actual, size_t length) { size_t offset = strlen(prefix); size_t have = (unsigned) (Style_className_end - Style_className); size_t need = (offset + length + 1); if ((have + need) >= Style_className_len) { Style_className_len += 1024 + 2 * (have + need); if (Style_className == 0) { Style_className = typeMallocn(char, Style_className_len); } else { Style_className = typeRealloc(char, Style_className, Style_className_len); } if (Style_className == NULL) outofmem(__FILE__, "addClassName"); assert(Style_className != NULL); Style_className_end = Style_className + have; } if (offset) strcpy(Style_className_end, prefix); if (length) memcpy(Style_className_end + offset, actual, length); Style_className_end[offset + length] = '\0'; strtolower(Style_className_end); Style_className_end += (offset + length); } #else #define addClassName(prefix, actual, length) /* nothing */ #endif #ifdef USE_PRETTYSRC static void HTMLSRC_apply_markup(HTStructured * context, HTlexeme lexeme, int start, int tag_charset) { HT_tagspec *ts = *((start ? lexeme_start : lexeme_end) + lexeme); while (ts) { #ifdef USE_COLOR_STYLE if (ts->start) { current_tag_style = ts->style; force_current_tag_style = TRUE; forced_classname = ts->class_name; force_classname = TRUE; } #endif CTRACE((tfp, ts->start ? "SRCSTART %d\n" : "SRCSTOP %d\n", (int) lexeme)); if (ts->start) HTML_start_element(context, (int) ts->element, ts->present, (STRING2PTR) ts->value, tag_charset, NULL); else HTML_end_element(context, (int) ts->element, NULL); ts = ts->next; } } # define START TRUE # define STOP FALSE # define PSRCSTART(x) HTMLSRC_apply_markup(me,HTL_##x,START,tag_charset) # define PSRCSTOP(x) HTMLSRC_apply_markup(me,HTL_##x,STOP,tag_charset) # define PUTC(x) HTML_put_character(me,x) # define PUTS(x) HTML_put_string(me,x) #endif /* USE_PRETTYSRC */ static void LYStartArea(HTStructured * obj, const char *href, const char *alt, const char *title, int tag_charset) { BOOL new_present[HTML_AREA_ATTRIBUTES]; const char *new_value[HTML_AREA_ATTRIBUTES]; int i; for (i = 0; i < HTML_AREA_ATTRIBUTES; i++) new_present[i] = NO; if (alt) { new_present[HTML_AREA_ALT] = YES; new_value[HTML_AREA_ALT] = (const char *) alt; } if (non_empty(title)) { new_present[HTML_AREA_TITLE] = YES; new_value[HTML_AREA_TITLE] = (const char *) title; } if (href) { new_present[HTML_AREA_HREF] = YES; new_value[HTML_AREA_HREF] = (const char *) href; } (*obj->isa->start_element) (obj, HTML_AREA, new_present, new_value, tag_charset, 0); } static void LYHandleFIG(HTStructured * me, const BOOL *present, STRING2PTR value, int isobject, int imagemap, const char *id, const char *src, int convert, int start, BOOL *intern_flag GCC_UNUSED) { if (start == TRUE) { me->inFIG = TRUE; if (me->inA) { SET_SKIP_STACK(HTML_A); HTML_end_element(me, HTML_A, NULL); } if (!isobject) { LYEnsureDoubleSpace(me); LYResetParagraphAlignment(me); me->inFIGwithP = TRUE; } else { me->inFIGwithP = FALSE; HTML_put_character(me, ' '); /* space char may be ignored */ } if (non_empty(id)) { if (present && convert) { CHECK_ID(HTML_FIG_ID); } else LYHandleID(me, id); } me->in_word = NO; me->inP = FALSE; if (clickable_images && non_empty(src)) { char *href = NULL; StrAllocCopy(href, src); CHECK_FOR_INTERN(*intern_flag, href); LYLegitimizeHREF(me, &href, TRUE, TRUE); if (*href) { me->CurrentA = HTAnchor_findChildAndLink(me->node_anchor, /* Parent */ NULL, /* Tag */ href, /* Addresss */ INTERN_LT); /* Type */ HText_beginAnchor(me->text, me->inUnderline, me->CurrentA); if (me->inBoldH == FALSE) HText_appendCharacter(me->text, LY_BOLD_START_CHAR); HTML_put_string(me, (isobject ? (imagemap ? "(IMAGE)" : "(OBJECT)") : "[FIGURE]")); if (me->inBoldH == FALSE) HText_appendCharacter(me->text, LY_BOLD_END_CHAR); HText_endAnchor(me->text, 0); HTML_put_character(me, '-'); HTML_put_character(me, ' '); /* space char may be ignored */ me->in_word = NO; } FREE(href); } } else { /* handle end tag */ if (me->inFIGwithP) { LYEnsureDoubleSpace(me); } else { HTML_put_character(me, ' '); /* space char may be ignored */ } LYResetParagraphAlignment(me); me->inFIGwithP = FALSE; me->inFIG = FALSE; change_paragraph_style(me, me->sp->style); /* Often won't really change */ if (me->List_Nesting_Level >= 0) { UPDATE_STYLE; HText_NegateLineOne(me->text); } } } static void clear_objectdata(HTStructured * me) { if (me) { HTChunkClear(&me->object); me->object_started = FALSE; me->object_declare = FALSE; me->object_shapes = FALSE; me->object_ismap = FALSE; FREE(me->object_usemap); FREE(me->object_id); FREE(me->object_title); FREE(me->object_data); FREE(me->object_type); FREE(me->object_classid); FREE(me->object_codebase); FREE(me->object_codetype); FREE(me->object_name); } } #define HTParseALL(pp,pconst) \ { char* free_me = *pp; \ *pp = HTParse(*pp, pconst, PARSE_ALL); \ FREE(free_me); \ } /* Start Element * ------------- */ static int HTML_start_element(HTStructured * me, int element_number, const BOOL *present, STRING2PTR value, int tag_charset, char **include) { char *alt_string = NULL; char *id_string = NULL; char *newtitle = NULL; char **pdoctitle = NULL; char *href = NULL; char *map_href = NULL; char *title = NULL; char *I_value = NULL; char *I_name = NULL; char *temp = NULL; const char *Base = NULL; int dest_char_set = -1; HTParentAnchor *dest = NULL; /* An anchor's destination */ BOOL dest_ismap = FALSE; /* Is dest an image map script? */ HTChildAnchor *ID_A = NULL; /* HTML_foo_ID anchor */ int url_type = 0, i = 0; char *cp = NULL; HTMLElement ElementNumber = (HTMLElement) element_number; BOOL intern_flag = FALSE; short stbl_align = HT_ALIGN_NONE; int status = HT_OK; #ifdef USE_COLOR_STYLE char *class_name; int class_used = 0; #endif #ifdef USE_PRETTYSRC if (psrc_view && !sgml_in_psrc_was_initialized) { if (!psrc_nested_call) { HTTag *tag = &HTML_dtd.tags[element_number]; char buf[200]; const char *p; if (psrc_first_tag) { psrc_first_tag = FALSE; /* perform the special actions on the begining of the document. It's assumed that all lynx modules start generating html from tag (ie not a text) so we are able to trap this moment and initialize. */ psrc_nested_call = TRUE; HTML_start_element(me, HTML_BODY, NULL, NULL, tag_charset, NULL); HTML_start_element(me, HTML_PRE, NULL, NULL, tag_charset, NULL); PSRCSTART(entire); psrc_nested_call = FALSE; } psrc_nested_call = TRUE; /*write markup for tags and exit */ PSRCSTART(abracket); PUTC('<'); PSRCSTOP(abracket); PSRCSTART(tag); if (tagname_transform != 0) PUTS(tag->name); else { LYStrNCpy(buf, tag->name, sizeof(buf) - 1); LYLowerCase(buf); PUTS(buf); } if (present) { for (i = 0; i < tag->number_of_attributes; i++) if (present[i]) { PUTC(' '); PSRCSTART(attrib); if (attrname_transform != 0) PUTS(tag->attributes[i].name); else { LYStrNCpy(buf, tag->attributes[i].name, sizeof(buf) - 1); LYLowerCase(buf); PUTS(buf); } if (value[i]) { char q = '"'; /*0 in dquotes, 1 - in quotes, 2 mixed */ char kind = (char) (!strchr(value[i], '"') ? 0 : !strchr(value[i], '\'') ? q = '\'', 1 : 2); PUTC('='); PSRCSTOP(attrib); PSRCSTART(attrval); PUTC(q); /*is it special ? */ if (tag->attributes[i].type == HTMLA_ANAME) { HTStartAnchor(me, value[i], NULL); HTML_end_element(me, HTML_A, NULL); } else if (tag->attributes[i].type == HTMLA_HREF) { PSRCSTART(href); HTStartAnchor(me, NULL, value[i]); } if (kind != 2) PUTS(value[i]); else for (p = value[i]; *p; p++) if (*p != '"') PUTC(*p); else PUTS("""); /*is it special ? */ if (tag->attributes[i].type == HTMLA_HREF) { HTML_end_element(me, HTML_A, NULL); PSRCSTOP(href); } PUTC(q); PSRCSTOP(attrval); } /* if value */ } /* if present[i] */ } /* if present */ PSRCSTOP(tag); PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); psrc_nested_call = FALSE; return HT_OK; } /*if (!psrc_nested_call) */ /*fall through */ } #endif /* USE_PRETTYSRC */ if (LYMapsOnly) { if (!(ElementNumber == HTML_MAP || ElementNumber == HTML_AREA || ElementNumber == HTML_BASE || ElementNumber == HTML_OBJECT || ElementNumber == HTML_A)) { return HT_OK; } } else if (!me->text) { UPDATE_STYLE; } { /* me->tag_charset is charset for attribute values. */ int j = ((tag_charset < 0) ? me->UCLYhndl : tag_charset); if ((me->tag_charset != j) || (j < 0 /* for trace entry */ )) { CTRACE((tfp, "me->tag_charset: %d -> %d", me->tag_charset, j)); CTRACE((tfp, " (me->UCLYhndl: %d, tag_charset: %d)\n", me->UCLYhndl, tag_charset)); me->tag_charset = j; } } /* this should be done differently */ #if defined(USE_COLOR_STYLE) addClassName(";", HTML_dtd.tags[element_number].name, (size_t) HTML_dtd.tags[element_number].name_len); class_name = (force_classname ? forced_classname : class_string); force_classname = FALSE; if (force_current_tag_style == FALSE) { current_tag_style = (class_name[0] ? -1 : cached_tag_styles[element_number]); } else { force_current_tag_style = FALSE; } CTRACE2(TRACE_STYLE, (tfp, "CSS.elt:<%s>\n", HTML_dtd.tags[element_number].name)); if (current_tag_style == -1) { /* Append class_name */ hcode = hash_code_lowercase_on_fly(HTML_dtd.tags[element_number].name); if (class_name[0]) { int ohcode = hcode; hcode = hash_code_aggregate_char('.', hcode); hcode = hash_code_aggregate_lower_str(class_name, hcode); if (!hashStyles[hcode].name) { /* None such -> classless version */ hcode = ohcode; CTRACE2(TRACE_STYLE, (tfp, "STYLE.start_element: <%s> (class <%s> not configured), hcode=%d.\n", HTML_dtd.tags[element_number].name, class_name, hcode)); } else { addClassName(".", class_name, strlen(class_name)); CTRACE2(TRACE_STYLE, (tfp, "STYLE.start_element: <%s>.<%s>, hcode=%d.\n", HTML_dtd.tags[element_number].name, class_name, hcode)); class_used = 1; } } class_string[0] = '\0'; } else { /* (current_tag_style!=-1) */ if (class_name[0]) { addClassName(".", class_name, strlen(class_name)); class_string[0] = '\0'; } hcode = current_tag_style; CTRACE2(TRACE_STYLE, (tfp, "STYLE.start_element: <%s>, hcode=%d.\n", HTML_dtd.tags[element_number].name, hcode)); current_tag_style = -1; } #if !OMIT_SCN_KEEPING /* Can be done in other cases too... */ if (!class_used && ElementNumber == HTML_INPUT) { /* For some other too? */ const char *type = ""; int ohcode = hcode; if (present && present[HTML_INPUT_TYPE] && value[HTML_INPUT_TYPE]) type = value[HTML_INPUT_TYPE]; hcode = hash_code_aggregate_lower_str(".type.", hcode); hcode = hash_code_aggregate_lower_str(type, hcode); if (!hashStyles[hcode].name) { /* None such -> classless version */ hcode = ohcode; CTRACE2(TRACE_STYLE, (tfp, "STYLE.start_element: type <%s> not configured.\n", type)); } else { addClassName(".type.", type, strlen(type)); CTRACE2(TRACE_STYLE, (tfp, "STYLE.start_element: <%s>.type.<%s>, hcode=%d.\n", HTML_dtd.tags[element_number].name, type, hcode)); } } #endif /* !OMIT_SCN_KEEPING */ HText_characterStyle(me->text, hcode, STACK_ON); #endif /* USE_COLOR_STYLE */ /* * Handle the start tag. - FM */ switch (ElementNumber) { case HTML_HTML: break; case HTML_HEAD: break; case HTML_BASE: if (present && present[HTML_BASE_HREF] && !local_host_only && non_empty(value[HTML_BASE_HREF])) { char *base = NULL; const char *related = NULL; StrAllocCopy(base, value[HTML_BASE_HREF]); CTRACE((tfp, "*HTML_BASE: initial href=`%s'\n", NonNull(base))); if (!(url_type = LYLegitimizeHREF(me, &base, TRUE, TRUE))) { CTRACE((tfp, "HTML: BASE '%s' is not an absolute URL.\n", NonNull(base))); if (me->inBadBASE == FALSE) HTAlert(BASE_NOT_ABSOLUTE); me->inBadBASE = TRUE; } if (url_type == LYNXIMGMAP_URL_TYPE) { /* * These have a non-standard form, basically strip the prefix * or the code below would insert a nonsense host into the * pseudo URL. These should never occur where they would be * used for resolution of relative URLs anyway. We can also * strip the #map part. - kw */ temp = base; base = HTParse(base + 11, "", PARSE_ALL_WITHOUT_ANCHOR); FREE(temp); } /* * Get parent's address for defaulted fields. */ related = me->node_anchor->address; /* * Create the access field. */ temp = HTParse(base, related, PARSE_ACCESS + PARSE_PUNCTUATION); StrAllocCopy(me->base_href, temp); FREE(temp); /* * Create the host[:port] field. */ temp = HTParse(base, "", PARSE_HOST + PARSE_PUNCTUATION); if (!StrNCmp(temp, "//", 2)) { StrAllocCat(me->base_href, temp); if (!strcmp(me->base_href, "file://")) { StrAllocCat(me->base_href, "localhost"); } } else { if (isFILE_URL(me->base_href)) { StrAllocCat(me->base_href, "//localhost"); } else if (strcmp(me->base_href, STR_NEWS_URL)) { FREE(temp); StrAllocCat(me->base_href, (temp = HTParse(related, "", PARSE_HOST + PARSE_PUNCTUATION))); } } FREE(temp); /* * Create the path field. */ temp = HTParse(base, "", PARSE_PATH + PARSE_PUNCTUATION); if (*temp != '\0') { char *p = strchr(temp, '?'); if (p) *p = '\0'; p = strrchr(temp, '/'); if (p) *(p + 1) = '\0'; /* strip after the last slash */ StrAllocCat(me->base_href, temp); } else if (!strcmp(me->base_href, STR_NEWS_URL)) { StrAllocCat(me->base_href, "*"); } else if (isNEWS_URL(me->base_href) || isNNTP_URL(me->base_href) || isSNEWS_URL(me->base_href)) { StrAllocCat(me->base_href, "/*"); } else { StrAllocCat(me->base_href, "/"); } FREE(temp); FREE(base); me->inBASE = TRUE; me->node_anchor->inBASE = TRUE; StrAllocCopy(me->node_anchor->content_base, me->base_href); /* me->base_href is a valid URL */ CTRACE((tfp, "*HTML_BASE: final href=`%s'\n", me->base_href)); } break; case HTML_META: if (present) LYHandleMETA(me, present, value, include); break; case HTML_TITLE: HTChunkClear(&me->title); break; case HTML_LINK: intern_flag = FALSE; if (present && present[HTML_LINK_HREF]) { CHECK_FOR_INTERN(intern_flag, value[HTML_LINK_HREF]); /* * Prepare to do housekeeping on the reference. - FM */ if (isEmpty(value[HTML_LINK_HREF])) { Base = (me->inBASE) ? me->base_href : me->node_anchor->address; StrAllocCopy(href, Base); } else { StrAllocCopy(href, value[HTML_LINK_HREF]); (void) LYLegitimizeHREF(me, &href, TRUE, TRUE); Base = (me->inBASE && *href != '\0' && *href != '#') ? me->base_href : me->node_anchor->address; HTParseALL(&href, Base); } /* * Handle links with a REV attribute. - FM * Handle REV="made" or REV="owner". - LM & FM * Handle REL="author" -TD */ if (present && ((present[HTML_LINK_REV] && value[HTML_LINK_REV] && (!strcasecomp("made", value[HTML_LINK_REV]) || !strcasecomp("owner", value[HTML_LINK_REV]))) || (present[HTML_LINK_REL] && value[HTML_LINK_REL] && (!strcasecomp("author", value[HTML_LINK_REL]))))) { /* * Load the owner element. - FM */ HTAnchor_setOwner(me->node_anchor, href); CTRACE((tfp, "HTML: DOC OWNER '%s' found\n", href)); FREE(href); /* * Load the RevTitle element if a TITLE attribute and value * are present. - FM */ if (present && present[HTML_LINK_TITLE] && value[HTML_LINK_TITLE] && *value[HTML_LINK_TITLE] != '\0') { StrAllocCopy(title, value[HTML_LINK_TITLE]); TRANSLATE_AND_UNESCAPE_ENTITIES(&title, TRUE, FALSE); LYTrimHead(title); LYTrimTail(title); if (*title != '\0') HTAnchor_setRevTitle(me->node_anchor, title); FREE(title); } break; } /* * Handle REL links. - FM */ if (present && present[HTML_LINK_REL] && value[HTML_LINK_REL]) { /* * Ignore style sheets, for now. - FM * * lss and css have different syntax - lynx shouldn't try to * parse them now (it tries to parse them as lss, so it exits * with error message on the 1st non-empty line) - VH */ #ifndef USE_COLOR_STYLE if (!strcasecomp(value[HTML_LINK_REL], "StyleSheet") || !strcasecomp(value[HTML_LINK_REL], "Style")) { CTRACE2(TRACE_STYLE, (tfp, "HTML: StyleSheet link found.\n")); CTRACE2(TRACE_STYLE, (tfp, " StyleSheets not yet implemented.\n")); FREE(href); break; } #endif /* ! USE_COLOR_STYLE */ /* * Ignore anything not registered in the 28-Mar-95 IETF HTML * 3.0 draft and W3C HTML 3.2 draft, or not appropriate for * Lynx banner links in the expired Maloney and Quin relrev * draft. We'll make this more efficient when the situation * stabilizes, and for now, we'll treat "Banner" as another * toolbar element. - FM */ if (!strcasecomp(value[HTML_LINK_REL], "Home") || !strcasecomp(value[HTML_LINK_REL], "ToC") || !strcasecomp(value[HTML_LINK_REL], "Contents") || !strcasecomp(value[HTML_LINK_REL], "Index") || !strcasecomp(value[HTML_LINK_REL], "Glossary") || !strcasecomp(value[HTML_LINK_REL], "Copyright") || !strcasecomp(value[HTML_LINK_REL], "Help") || !strcasecomp(value[HTML_LINK_REL], "Search") || !strcasecomp(value[HTML_LINK_REL], "Bookmark") || !strcasecomp(value[HTML_LINK_REL], "Banner") || !strcasecomp(value[HTML_LINK_REL], "Top") || !strcasecomp(value[HTML_LINK_REL], "Origin") || !strcasecomp(value[HTML_LINK_REL], "Navigator") || !strcasecomp(value[HTML_LINK_REL], "Disclaimer") || !strcasecomp(value[HTML_LINK_REL], "Author") || !strcasecomp(value[HTML_LINK_REL], "Editor") || !strcasecomp(value[HTML_LINK_REL], "Publisher") || !strcasecomp(value[HTML_LINK_REL], "Trademark") || !strcasecomp(value[HTML_LINK_REL], "Hotlist") || !strcasecomp(value[HTML_LINK_REL], "Begin") || !strcasecomp(value[HTML_LINK_REL], "First") || !strcasecomp(value[HTML_LINK_REL], "End") || !strcasecomp(value[HTML_LINK_REL], "Last") || !strcasecomp(value[HTML_LINK_REL], "Documentation") || !strcasecomp(value[HTML_LINK_REL], "Biblioentry") || !strcasecomp(value[HTML_LINK_REL], "Bibliography") || !strcasecomp(value[HTML_LINK_REL], "Start") || !strcasecomp(value[HTML_LINK_REL], "Appendix")) { StrAllocCopy(title, value[HTML_LINK_REL]); pdoctitle = &title; /* for setting HTAnchor's title */ } else if (!strcasecomp(value[HTML_LINK_REL], "Up") || !strcasecomp(value[HTML_LINK_REL], "Next") || !strcasecomp(value[HTML_LINK_REL], "Previous") || !strcasecomp(value[HTML_LINK_REL], "Prev") || !strcasecomp(value[HTML_LINK_REL], "Child") || !strcasecomp(value[HTML_LINK_REL], "Sibling") || !strcasecomp(value[HTML_LINK_REL], "Parent") || !strcasecomp(value[HTML_LINK_REL], "Meta") || !strcasecomp(value[HTML_LINK_REL], "URC") || !strcasecomp(value[HTML_LINK_REL], "Pointer") || !strcasecomp(value[HTML_LINK_REL], "Translation") || !strcasecomp(value[HTML_LINK_REL], "Definition") || !strcasecomp(value[HTML_LINK_REL], "Alternate") || !strcasecomp(value[HTML_LINK_REL], "Section") || !strcasecomp(value[HTML_LINK_REL], "Subsection") || !strcasecomp(value[HTML_LINK_REL], "Chapter")) { StrAllocCopy(title, value[HTML_LINK_REL]); /* not setting target HTAnchor's title, for these links of highly relative character. Instead, try to remember the REL attribute as a property of the link (but not the destination), in the (otherwise underused) link type in a special format; the LIST page generation code may later use it. - kw */ if (!intern_flag) { StrAllocCopy(temp, "RelTitle: "); StrAllocCat(temp, value[HTML_LINK_REL]); } #ifndef DISABLE_BIBP } else if (!strcasecomp(value[HTML_LINK_REL], "citehost")) { /* Citehost determination for bibp links. - RDC */ HTAnchor_setCitehost(me->node_anchor, href); CTRACE((tfp, "HTML: citehost '%s' found\n", href)); FREE(href); break; #endif } else { CTRACE((tfp, "HTML: LINK with REL=\"%s\" ignored.\n", value[HTML_LINK_REL])); FREE(href); break; } } } else if (present && present[HTML_LINK_REL] && value[HTML_LINK_REL]) { /* * If no HREF was specified, handle special REL links with * self-designated HREFs. - FM */ if (!strcasecomp(value[HTML_LINK_REL], "Home")) { StrAllocCopy(href, LynxHome); } else if (!strcasecomp(value[HTML_LINK_REL], "Help")) { StrAllocCopy(href, helpfile); } else if (!strcasecomp(value[HTML_LINK_REL], "Index")) { StrAllocCopy(href, indexfile); } else { CTRACE((tfp, "HTML: LINK with REL=\"%s\" and no HREF ignored.\n", value[HTML_LINK_REL])); break; } StrAllocCopy(title, value[HTML_LINK_REL]); pdoctitle = &title; } if (href) { /* * Create a title (link name) from the TITLE value, if present, or * default to the REL value that was loaded into title. - FM */ if (present && present[HTML_LINK_TITLE] && non_empty(value[HTML_LINK_TITLE])) { StrAllocCopy(title, value[HTML_LINK_TITLE]); TRANSLATE_AND_UNESCAPE_ENTITIES(&title, TRUE, FALSE); LYTrimHead(title); LYTrimTail(title); pdoctitle = &title; FREE(temp); /* forget about recording RelTitle - kw */ } if (isEmpty(title)) { FREE(href); FREE(title); break; } if (me->inA) { /* * Ugh! The LINK tag, which is a HEAD element, is in an * Anchor, which is BODY element. All we can do is close the * Anchor and cross our fingers. - FM */ SET_SKIP_STACK(HTML_A); HTML_end_element(me, HTML_A, include); } /* * Create anchors for the links that simulate a toolbar. - FM */ me->CurrentA = HTAnchor_findChildAndLink(me->node_anchor, /* Parent */ NULL, /* Tag */ href, /* Addresss */ (temp ? (HTLinkType *) HTAtom_for(temp) : INTERN_LT)); /* Type */ FREE(temp); if ((dest = HTAnchor_parent(HTAnchor_followLink(me->CurrentA) )) != NULL) { if (pdoctitle && !HTAnchor_title(dest)) HTAnchor_setTitle(dest, *pdoctitle); /* Don't allow CHARSET attribute to change *this* document's charset assumption. - kw */ if (dest == me->node_anchor) dest = NULL; if (present[HTML_LINK_CHARSET] && non_empty(value[HTML_LINK_CHARSET])) { dest_char_set = UCGetLYhndl_byMIME(value[HTML_LINK_CHARSET]); if (dest_char_set < 0) dest_char_set = UCLYhndl_for_unrec; } if (dest && dest_char_set >= 0) HTAnchor_setUCInfoStage(dest, dest_char_set, UCT_STAGE_PARSER, UCT_SETBY_LINK); } UPDATE_STYLE; if (!HText_hasToolbar(me->text) && (ID_A = HTAnchor_findChildAndLink(me->node_anchor, /* Parent */ LYToolbarName, /* Tag */ NULL, /* Addresss */ (HTLinkType *) 0))) { /* Type */ HText_appendCharacter(me->text, '#'); HText_setLastChar(me->text, ' '); /* absorb white space */ HText_beginAnchor(me->text, me->inUnderline, ID_A); HText_endAnchor(me->text, 0); HText_setToolbar(me->text); } else { /* * Add collapsible space to separate link from previous * generated links. - kw */ HTML_put_character(me, ' '); } HText_beginAnchor(me->text, me->inUnderline, me->CurrentA); if (me->inBoldH == FALSE) HText_appendCharacter(me->text, LY_BOLD_START_CHAR); #ifdef USE_COLOR_STYLE if (present && present[HTML_LINK_CLASS] && non_empty(value[HTML_LINK_CLASS])) { char *tmp = 0; HTSprintf0(&tmp, "link.%s.%s", value[HTML_LINK_CLASS], title); CTRACE2(TRACE_STYLE, (tfp, "STYLE.link: using style <%s>\n", tmp)); HText_characterStyle(me->text, hash_code(tmp), STACK_ON); HTML_put_string(me, title); HTML_put_string(me, " ("); HTML_put_string(me, value[HTML_LINK_CLASS]); HTML_put_string(me, ")"); HText_characterStyle(me->text, hash_code(tmp), STACK_OFF); FREE(tmp); } else #endif HTML_put_string(me, title); if (me->inBoldH == FALSE) HText_appendCharacter(me->text, LY_BOLD_END_CHAR); HText_endAnchor(me->text, 0); } FREE(href); FREE(title); break; case HTML_ISINDEX: if (((present)) && ((present[HTML_ISINDEX_HREF] && value[HTML_ISINDEX_HREF]) || (present[HTML_ISINDEX_ACTION] && value[HTML_ISINDEX_ACTION]))) { /* * Lynx was supporting ACTION, which never made it into the HTML * 2.0 specs. HTML 3.0 uses HREF, so we'll use that too, but allow * use of ACTION as an alternate until people have fully switched * over. - FM */ if (present[HTML_ISINDEX_HREF] && value[HTML_ISINDEX_HREF]) StrAllocCopy(href, value[HTML_ISINDEX_HREF]); else StrAllocCopy(href, value[HTML_ISINDEX_ACTION]); LYLegitimizeHREF(me, &href, TRUE, TRUE); Base = (me->inBASE && *href != '\0' && *href != '#') ? me->base_href : me->node_anchor->address; HTParseALL(&href, Base); HTAnchor_setIndex(me->node_anchor, href); FREE(href); } else { Base = (me->inBASE) ? me->base_href : me->node_anchor->address; HTAnchor_setIndex(me->node_anchor, Base); } /* * Support HTML 3.0 PROMPT attribute. - FM */ if (present && present[HTML_ISINDEX_PROMPT] && non_empty(value[HTML_ISINDEX_PROMPT])) { StrAllocCopy(temp, value[HTML_ISINDEX_PROMPT]); TRANSLATE_AND_UNESCAPE_ENTITIES(&temp, TRUE, FALSE); LYTrimHead(temp); LYTrimTail(temp); if (*temp != '\0') { StrAllocCat(temp, " "); HTAnchor_setPrompt(me->node_anchor, temp); } else { HTAnchor_setPrompt(me->node_anchor, ENTER_DATABASE_QUERY); } FREE(temp); } else { HTAnchor_setPrompt(me->node_anchor, ENTER_DATABASE_QUERY); } break; case HTML_NEXTID: break; case HTML_STYLE: /* * We're getting it as Literal text, which, for now, we'll just ignore. * - FM */ HTChunkClear(&me->style_block); break; case HTML_SCRIPT: /* * We're getting it as Literal text, which, for now, we'll just ignore. * - FM */ HTChunkClear(&me->script); break; case HTML_BODY: CHECK_ID(HTML_BODY_ID); if (HText_hasToolbar(me->text)) HText_appendParagraph(me->text); break; case HTML_FRAMESET: break; case HTML_FRAME: if (present && present[HTML_FRAME_NAME] && non_empty(value[HTML_FRAME_NAME])) { StrAllocCopy(id_string, value[HTML_FRAME_NAME]); TRANSLATE_AND_UNESCAPE_ENTITIES(&id_string, TRUE, FALSE); LYTrimHead(id_string); LYTrimTail(id_string); } if (present && present[HTML_FRAME_SRC] && non_empty(value[HTML_FRAME_SRC])) { StrAllocCopy(href, value[HTML_FRAME_SRC]); LYLegitimizeHREF(me, &href, TRUE, TRUE); if (me->inA) { SET_SKIP_STACK(HTML_A); HTML_end_element(me, HTML_A, include); } me->CurrentA = HTAnchor_findChildAndLink(me->node_anchor, /* Parent */ NULL, /* Tag */ href, /* Addresss */ (HTLinkType *) 0); /* Type */ CAN_JUSTIFY_PUSH(FALSE); LYEnsureSingleSpace(me); if (me->inUnderline == FALSE) HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR); HTML_put_string(me, "FRAME:"); if (me->inUnderline == FALSE) HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR); HTML_put_character(me, ' '); me->in_word = NO; CHECK_ID(HTML_FRAME_ID); HText_beginAnchor(me->text, me->inUnderline, me->CurrentA); if (me->inBoldH == FALSE) HText_appendCharacter(me->text, LY_BOLD_START_CHAR); HTML_put_string(me, (id_string ? id_string : href)); FREE(href); if (me->inBoldH == FALSE) HText_appendCharacter(me->text, LY_BOLD_END_CHAR); HText_endAnchor(me->text, 0); LYEnsureSingleSpace(me); CAN_JUSTIFY_POP; } else { CHECK_ID(HTML_FRAME_ID); } FREE(id_string); break; case HTML_NOFRAMES: LYEnsureDoubleSpace(me); LYResetParagraphAlignment(me); break; case HTML_IFRAME: if (present && present[HTML_IFRAME_NAME] && non_empty(value[HTML_IFRAME_NAME])) { StrAllocCopy(id_string, value[HTML_IFRAME_NAME]); TRANSLATE_AND_UNESCAPE_ENTITIES(&id_string, TRUE, FALSE); LYTrimHead(id_string); LYTrimTail(id_string); } if (present && present[HTML_IFRAME_SRC] && non_empty(value[HTML_IFRAME_SRC])) { StrAllocCopy(href, value[HTML_IFRAME_SRC]); LYLegitimizeHREF(me, &href, TRUE, TRUE); if (me->inA) HTML_end_element(me, HTML_A, include); me->CurrentA = HTAnchor_findChildAndLink(me->node_anchor, /* Parent */ NULL, /* Tag */ href, /* Addresss */ (HTLinkType *) 0); /* Type */ LYEnsureDoubleSpace(me); CAN_JUSTIFY_PUSH_F LYResetParagraphAlignment(me); if (me->inUnderline == FALSE) HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR); HTML_put_string(me, "IFRAME:"); if (me->inUnderline == FALSE) HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR); HTML_put_character(me, ' '); me->in_word = NO; CHECK_ID(HTML_IFRAME_ID); HText_beginAnchor(me->text, me->inUnderline, me->CurrentA); if (me->inBoldH == FALSE) HText_appendCharacter(me->text, LY_BOLD_START_CHAR); HTML_put_string(me, (id_string ? id_string : href)); FREE(href); if (me->inBoldH == FALSE) HText_appendCharacter(me->text, LY_BOLD_END_CHAR); HText_endAnchor(me->text, 0); LYEnsureSingleSpace(me); CAN_JUSTIFY_POP; } else { CHECK_ID(HTML_IFRAME_ID); } FREE(id_string); break; case HTML_BANNER: case HTML_MARQUEE: change_paragraph_style(me, styles[HTML_BANNER]); UPDATE_STYLE; if (me->sp->tag_number == (int) ElementNumber) LYEnsureDoubleSpace(me); /* * Treat this as a toolbar if we don't have one yet, and we are in the * first half of the first page. - FM */ if ((!HText_hasToolbar(me->text) && HText_getLines(me->text) < (display_lines / 2)) && (ID_A = HTAnchor_findChildAndLink(me->node_anchor, /* Parent */ LYToolbarName, /* Tag */ NULL, /* Addresss */ (HTLinkType *) 0))) { /* Type */ HText_beginAnchor(me->text, me->inUnderline, ID_A); HText_endAnchor(me->text, 0); HText_setToolbar(me->text); } CHECK_ID(HTML_GEN_ID); break; case HTML_CENTER: case HTML_DIV: if (me->Division_Level < (MAX_NESTING - 1)) { me->Division_Level++; } else { CTRACE((tfp, "HTML: ****** Maximum nesting of %d divisions exceeded!\n", MAX_NESTING)); } if (me->inP) LYEnsureSingleSpace(me); /* always at least break line - kw */ if (ElementNumber == HTML_CENTER) { me->DivisionAlignments[me->Division_Level] = HT_CENTER; change_paragraph_style(me, styles[HTML_DCENTER]); UPDATE_STYLE; me->current_default_alignment = styles[HTML_DCENTER]->alignment; } else if (me->List_Nesting_Level >= 0 && !(present && present[HTML_DIV_ALIGN] && value[HTML_DIV_ALIGN] && (!strcasecomp(value[HTML_DIV_ALIGN], "center") || !strcasecomp(value[HTML_DIV_ALIGN], "right")))) { if (present && present[HTML_DIV_ALIGN]) me->current_default_alignment = HT_LEFT; else if (me->Division_Level == 0) me->current_default_alignment = HT_LEFT; else if (me->sp[0].tag_number == HTML_UL || me->sp[0].tag_number == HTML_OL || me->sp[0].tag_number == HTML_MENU || me->sp[0].tag_number == HTML_DIR || me->sp[0].tag_number == HTML_LI || me->sp[0].tag_number == HTML_LH || me->sp[0].tag_number == HTML_DD) me->current_default_alignment = HT_LEFT; LYHandlePlike(me, present, value, include, HTML_DIV_ALIGN, TRUE); me->DivisionAlignments[me->Division_Level] = (short) me->current_default_alignment; } else if (present && present[HTML_DIV_ALIGN] && non_empty(value[HTML_DIV_ALIGN])) { if (!strcasecomp(value[HTML_DIV_ALIGN], "center")) { me->DivisionAlignments[me->Division_Level] = HT_CENTER; change_paragraph_style(me, styles[HTML_DCENTER]); UPDATE_STYLE; me->current_default_alignment = styles[HTML_DCENTER]->alignment; } else if (!strcasecomp(value[HTML_DIV_ALIGN], "right")) { me->DivisionAlignments[me->Division_Level] = HT_RIGHT; change_paragraph_style(me, styles[HTML_DRIGHT]); UPDATE_STYLE; me->current_default_alignment = styles[HTML_DRIGHT]->alignment; } else { me->DivisionAlignments[me->Division_Level] = HT_LEFT; change_paragraph_style(me, styles[HTML_DLEFT]); UPDATE_STYLE; me->current_default_alignment = styles[HTML_DLEFT]->alignment; } } else { me->DivisionAlignments[me->Division_Level] = HT_LEFT; change_paragraph_style(me, styles[HTML_DLEFT]); UPDATE_STYLE; me->current_default_alignment = styles[HTML_DLEFT]->alignment; } CHECK_ID(HTML_DIV_ID); break; case HTML_H1: case HTML_H2: case HTML_H3: case HTML_H4: case HTML_H5: case HTML_H6: /* * Close the previous style if not done by HTML doc. Added to get rid * of core dumps in BAD HTML on the net. * GAB 07-07-94 * But then again, these are actually allowed to nest. I guess I have * to depend on the HTML writers correct style. * GAB 07-12-94 if (i_prior_style != -1) { HTML_end_element(me, i_prior_style); } i_prior_style = ElementNumber; */ /* * Check whether we have an H# in a list, and if so, treat it as an LH. * - FM */ if ((me->List_Nesting_Level >= 0) && (me->sp[0].tag_number == HTML_UL || me->sp[0].tag_number == HTML_OL || me->sp[0].tag_number == HTML_MENU || me->sp[0].tag_number == HTML_DIR || me->sp[0].tag_number == HTML_LI)) { if (HTML_dtd.tags[HTML_LH].contents == SGML_EMPTY) { ElementNumber = HTML_LH; } else { me->new_style = me->sp[0].style; ElementNumber = (HTMLElement) me->sp[0].tag_number; UPDATE_STYLE; } /* * Some authors use H# headers as a substitute for FONT, so check * if this one immediately followed an LI. If so, both me->inP and * me->in_word will be FALSE (though the line might not be empty * due to a bullet and/or nbsp) and we can assume it is just for a * FONT change. We thus will not create another line break nor add * to the current left indentation. - FM */ if (!(me->inP == FALSE && me->in_word == NO)) { HText_appendParagraph(me->text); HTML_put_character(me, HT_NON_BREAK_SPACE); HText_setLastChar(me->text, ' '); me->in_word = NO; me->inP = FALSE; } CHECK_ID(HTML_H_ID); break; } if (present && present[HTML_H_ALIGN] && non_empty(value[HTML_H_ALIGN])) { if (!strcasecomp(value[HTML_H_ALIGN], "center")) change_paragraph_style(me, styles[HTML_HCENTER]); else if (!strcasecomp(value[HTML_H_ALIGN], "right")) change_paragraph_style(me, styles[HTML_HRIGHT]); else if (!strcasecomp(value[HTML_H_ALIGN], "left") || !strcasecomp(value[HTML_H_ALIGN], "justify")) change_paragraph_style(me, styles[HTML_HLEFT]); else change_paragraph_style(me, styles[ElementNumber]); } else if (me->Division_Level >= 0) { if (me->DivisionAlignments[me->Division_Level] == HT_CENTER) { change_paragraph_style(me, styles[HTML_HCENTER]); } else if (me->DivisionAlignments[me->Division_Level] == HT_LEFT) { change_paragraph_style(me, styles[HTML_HLEFT]); } else if (me->DivisionAlignments[me->Division_Level] == HT_RIGHT) { change_paragraph_style(me, styles[HTML_HRIGHT]); } } else { change_paragraph_style(me, styles[ElementNumber]); } UPDATE_STYLE; CHECK_ID(HTML_H_ID); if ((bold_headers == TRUE || (ElementNumber == HTML_H1 && bold_H1 == TRUE)) && (styles[ElementNumber]->font & HT_BOLD)) { if (me->inBoldA == FALSE && me->inBoldH == FALSE) { HText_appendCharacter(me->text, LY_BOLD_START_CHAR); } me->inBoldH = TRUE; } break; case HTML_P: LYHandlePlike(me, present, value, include, HTML_P_ALIGN, TRUE); CHECK_ID(HTML_P_ID); break; case HTML_BR: UPDATE_STYLE; CHECK_ID(HTML_GEN_ID); /* Add a \r (new line) if these three conditions are true: * 1. We are not collapsing BR's, and * 2. The previous line has text on it, or * 3. This line has text on it. * Otherwise, don't do anything. -DH 980814, TD 980827 */ if ((LYCollapseBRs == FALSE && !HText_PreviousLineEmpty(me->text, FALSE)) || !HText_LastLineEmpty(me->text, FALSE)) { HText_setLastChar(me->text, ' '); /* absorb white space */ HText_appendCharacter(me->text, '\r'); } me->in_word = NO; me->inP = FALSE; break; case HTML_WBR: UPDATE_STYLE; CHECK_ID(HTML_GEN_ID); HText_setBreakPoint(me->text); break; case HTML_HY: case HTML_SHY: UPDATE_STYLE; CHECK_ID(HTML_GEN_ID); HText_appendCharacter(me->text, LY_SOFT_HYPHEN); break; case HTML_HR: { int width; /* * Start a new line only if we had printable characters following * the previous newline, or remove the previous line if both it and * the last line are blank. - FM */ UPDATE_STYLE; if (!HText_LastLineEmpty(me->text, FALSE)) { HText_setLastChar(me->text, ' '); /* absorb white space */ HText_appendCharacter(me->text, '\r'); } else if (HText_PreviousLineEmpty(me->text, FALSE)) { HText_RemovePreviousLine(me->text); } me->in_word = NO; me->inP = FALSE; /* * Add an ID link if needed. - FM */ CHECK_ID(HTML_HR_ID); /* * Center lines within the current margins, if a right or left * ALIGNment is not specified. If WIDTH="#%" is given and not * garbage, use that to calculate the width, otherwise use the * default width. - FM */ if (present && present[HTML_HR_ALIGN] && value[HTML_HR_ALIGN]) { if (!strcasecomp(value[HTML_HR_ALIGN], "right")) { me->sp->style->alignment = HT_RIGHT; } else if (!strcasecomp(value[HTML_HR_ALIGN], "left")) { me->sp->style->alignment = HT_LEFT; } else { me->sp->style->alignment = HT_CENTER; } } else { me->sp->style->alignment = HT_CENTER; } width = LYcolLimit - me->new_style->leftIndent - me->new_style->rightIndent; if (present && present[HTML_HR_WIDTH] && value[HTML_HR_WIDTH] && isdigit(UCH(*value[HTML_HR_WIDTH])) && value[HTML_HR_WIDTH][strlen(value[HTML_HR_WIDTH]) - 1] == '%') { char *percent = NULL; int Percent, Width; StrAllocCopy(percent, value[HTML_HR_WIDTH]); percent[strlen(percent) - 1] = '\0'; Percent = atoi(percent); if (Percent > 100 || Percent < 1) width -= 5; else { Width = (width * Percent) / 100; if (Width < 1) width = 1; else width = Width; } FREE(percent); } else { width -= 5; } for (i = 0; i < width; i++) HTML_put_character(me, '_'); HText_appendCharacter(me->text, '\r'); me->in_word = NO; me->inP = FALSE; /* * Reset the alignment appropriately for the division and/or block. * - FM */ if (me->List_Nesting_Level < 0 && me->Division_Level >= 0) { me->sp->style->alignment = me->DivisionAlignments[me->Division_Level]; } else if (me->sp->style->id == ST_HeadingCenter || me->sp->style->id == ST_Heading1) { me->sp->style->alignment = HT_CENTER; } else if (me->sp->style->id == ST_HeadingRight) { me->sp->style->alignment = HT_RIGHT; } else { me->sp->style->alignment = HT_LEFT; } /* * Add a blank line and set the second line indentation for lists * and addresses, or a paragraph separator for other blocks. - FM */ if (me->List_Nesting_Level >= 0 || me->sp[0].tag_number == HTML_ADDRESS) { HText_setLastChar(me->text, ' '); /* absorb white space */ HText_appendCharacter(me->text, '\r'); } else { HText_appendParagraph(me->text); } } break; case HTML_TAB: if (!present) { /* Bad tag. Must have at least one attribute. - FM */ CTRACE((tfp, "HTML: TAB tag has no attributes. Ignored.\n")); break; } /* * If page author is using TAB within a TABLE, it's probably formatted * specifically to work well for Lynx without simple table tracking * code. Cancel tracking, it would only make things worse. - kw */ HText_cancelStbl(me->text); UPDATE_STYLE; CANT_JUSTIFY_THIS_LINE; if (present[HTML_TAB_ALIGN] && value[HTML_TAB_ALIGN] && (strcasecomp(value[HTML_TAB_ALIGN], "left") || !(present[HTML_TAB_TO] || present[HTML_TAB_INDENT]))) { /* * Just ensure a collapsible space, until we have the ALIGN and DP * attributes implemented. - FM */ HTML_put_character(me, ' '); CTRACE((tfp, "HTML: ALIGN not 'left'. Using space instead of TAB.\n")); } else if (!LYoverride_default_alignment(me) && me->current_default_alignment != HT_LEFT) { /* * Just ensure a collapsible space, until we can replace * HText_getCurrentColumn() in GridText.c with code which doesn't * require that the alignment be HT_LEFT. - FM */ HTML_put_character(me, ' '); CTRACE((tfp, "HTML: Not HT_LEFT. Using space instead of TAB.\n")); } else if ((present[HTML_TAB_TO] && non_empty(value[HTML_TAB_TO])) || (present[HTML_TAB_INDENT] && value[HTML_TAB_INDENT] && isdigit(UCH(*value[HTML_TAB_INDENT])))) { int column, target = -1; int enval = 2; column = HText_getCurrentColumn(me->text); if (present[HTML_TAB_TO] && non_empty(value[HTML_TAB_TO])) { /* * TO has priority over INDENT if both are present. - FM */ StrAllocCopy(temp, value[HTML_TAB_TO]); TRANSLATE_AND_UNESCAPE_TO_STD(&temp); if (*temp) { target = HText_getTabIDColumn(me->text, temp); } } else if (isEmpty(temp) && present[HTML_TAB_INDENT] && value[HTML_TAB_INDENT] && isdigit(UCH(*value[HTML_TAB_INDENT]))) { /* * The INDENT value is in "en" (enval per column) units. * Divide it by enval, rounding odd values up. - FM */ target = (int) (((1.0 * atoi(value[HTML_TAB_INDENT])) / enval) + (0.5)); } FREE(temp); /* * If we are being directed to a column too far to the left or * right, just add a collapsible space, otherwise, add the * appropriate number of spaces. - FM */ if (target < column || target > HText_getMaximumColumn(me->text)) { HTML_put_character(me, ' '); CTRACE((tfp, "HTML: Column out of bounds. Using space instead of TAB.\n")); } else { for (i = column; i < target; i++) HText_appendCharacter(me->text, ' '); HText_setLastChar(me->text, ' '); /* absorb white space */ } } me->in_word = NO; /* * If we have an ID attribute, save it together with the value of the * column we've reached. - FM */ if (present[HTML_TAB_ID] && non_empty(value[HTML_TAB_ID])) { StrAllocCopy(temp, value[HTML_TAB_ID]); TRANSLATE_AND_UNESCAPE_TO_STD(&temp); if (*temp) HText_setTabID(me->text, temp); FREE(temp); } break; case HTML_BASEFONT: break; case HTML_FONT: /* * FONT *may* have been declared SGML_EMPTY in HTMLDTD.c, and * SGML_character() in SGML.c *may* check for a FONT end tag to call * HTML_end_element() directly (with a check in that to bypass * decrementing of the HTML parser's stack). Or this may have been * really a end tag, for which some incarnations of SGML.c * would fake a start tag instead. - fm & kw * * But if we have an open FONT, DON'T close that one now, since FONT * tags can be legally nested AFAIK, and Lynx currently doesn't do * anything with them anyway... - kw */ #ifdef NOTUSED_FOTEMODS if (me->inFONT == TRUE) HTML_end_element(me, HTML_FONT, &include); #endif /* NOTUSED_FOTEMODS */ /* * Set flag to know we are in a FONT container, and add code to do * something about it, someday. - FM */ me->inFONT = TRUE; break; case HTML_B: /* Physical character highlighting */ case HTML_BLINK: case HTML_I: case HTML_U: case HTML_CITE: /* Logical character highlighting */ case HTML_EM: case HTML_STRONG: UPDATE_STYLE; me->Underline_Level++; CHECK_ID(HTML_GEN_ID); /* * Ignore this if inside of a bold anchor or header. Can't display * both underline and bold at same time. */ if (me->inBoldA == TRUE || me->inBoldH == TRUE) { CTRACE((tfp, "Underline Level is %d\n", me->Underline_Level)); break; } if (me->inUnderline == FALSE) { HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR); me->inUnderline = TRUE; CTRACE((tfp, "Beginning underline\n")); } else { CTRACE((tfp, "Underline Level is %d\n", me->Underline_Level)); } break; case HTML_ABBR: /* Miscellaneous character containers */ case HTML_ACRONYM: case HTML_AU: case HTML_AUTHOR: case HTML_BIG: case HTML_CODE: case HTML_DFN: case HTML_KBD: case HTML_SAMP: case HTML_SMALL: case HTML_TT: case HTML_VAR: CHECK_ID(HTML_GEN_ID); break; /* ignore */ case HTML_SUP: HText_appendCharacter(me->text, '^'); CHECK_ID(HTML_GEN_ID); break; case HTML_SUB: HText_appendCharacter(me->text, '['); CHECK_ID(HTML_GEN_ID); break; case HTML_DEL: case HTML_S: case HTML_STRIKE: CHECK_ID(HTML_GEN_ID); if (me->inUnderline == FALSE) HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR); HTML_put_string(me, "[DEL:"); if (me->inUnderline == FALSE) HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR); HTML_put_character(me, ' '); me->in_word = NO; break; case HTML_INS: CHECK_ID(HTML_GEN_ID); if (me->inUnderline == FALSE) HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR); HTML_put_string(me, "[INS:"); if (me->inUnderline == FALSE) HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR); HTML_put_character(me, ' '); me->in_word = NO; break; case HTML_Q: CHECK_ID(HTML_GEN_ID); /* * Should check LANG and/or DIR attributes, and the * me->node_anchor->charset and/or yet to be added structure elements, * to determine whether we should use chevrons, but for now we'll * always use double- or single-quotes. - FM */ if (!(me->Quote_Level & 1)) HTML_put_character(me, '"'); else HTML_put_character(me, '`'); me->Quote_Level++; break; case HTML_PRE: /* Formatted text */ /* * Set our inPRE flag to FALSE so that a newline immediately following * the PRE start tag will be ignored. HTML_put_character() will set it * to TRUE when the first character within the PRE block is received. * - FM */ me->inPRE = FALSE; /* FALLTHRU */ case HTML_LISTING: /* Literal text */ /* FALLTHRU */ case HTML_XMP: /* FALLTHRU */ case HTML_PLAINTEXT: change_paragraph_style(me, styles[ElementNumber]); UPDATE_STYLE; CHECK_ID(HTML_GEN_ID); if (me->comment_end) HText_appendText(me->text, me->comment_end); break; case HTML_BLOCKQUOTE: case HTML_BQ: change_paragraph_style(me, styles[ElementNumber]); UPDATE_STYLE; if (me->sp->tag_number == (int) ElementNumber) LYEnsureDoubleSpace(me); CHECK_ID(HTML_BQ_ID); break; case HTML_NOTE: change_paragraph_style(me, styles[ElementNumber]); UPDATE_STYLE; if (me->sp->tag_number == (int) ElementNumber) LYEnsureDoubleSpace(me); CHECK_ID(HTML_NOTE_ID); { char *note = NULL; /* * Indicate the type of NOTE. */ if (present && present[HTML_NOTE_CLASS] && value[HTML_NOTE_CLASS] && (!strcasecomp(value[HTML_NOTE_CLASS], "CAUTION") || !strcasecomp(value[HTML_NOTE_CLASS], "WARNING"))) { StrAllocCopy(note, value[HTML_NOTE_CLASS]); LYUpperCase(note); StrAllocCat(note, ":"); } else if (present && present[HTML_NOTE_ROLE] && value[HTML_NOTE_ROLE] && (!strcasecomp(value[HTML_NOTE_ROLE], "CAUTION") || !strcasecomp(value[HTML_NOTE_ROLE], "WARNING"))) { StrAllocCopy(note, value[HTML_NOTE_ROLE]); LYUpperCase(note); StrAllocCat(note, ":"); } else { StrAllocCopy(note, "NOTE:"); } if (me->inUnderline == FALSE) HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR); HTML_put_string(me, note); if (me->inUnderline == FALSE) HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR); HTML_put_character(me, ' '); CAN_JUSTIFY_START; FREE(note); } CAN_JUSTIFY_START; me->inLABEL = TRUE; me->in_word = NO; me->inP = FALSE; break; case HTML_ADDRESS: change_paragraph_style(me, styles[ElementNumber]); UPDATE_STYLE; if (me->sp->tag_number == (int) ElementNumber) LYEnsureDoubleSpace(me); CHECK_ID(HTML_ADDRESS_ID); break; case HTML_DL: me->List_Nesting_Level++; /* increment the List nesting level */ if (me->List_Nesting_Level <= 0) { change_paragraph_style(me, present && present[HTML_DL_COMPACT] ? styles[HTML_DLC] : styles[HTML_DL]); } else if (me->List_Nesting_Level >= 6) { change_paragraph_style(me, present && present[HTML_DL_COMPACT] ? styles[HTML_DLC6] : styles[HTML_DL6]); } else { change_paragraph_style(me, present && present[HTML_DL_COMPACT] ? styles[(HTML_DLC1 - 1) + me->List_Nesting_Level] : styles[(HTML_DL1 - 1) + me->List_Nesting_Level]); } UPDATE_STYLE; /* update to the new style */ CHECK_ID(HTML_DL_ID); break; case HTML_DLC: me->List_Nesting_Level++; /* increment the List nesting level */ if (me->List_Nesting_Level <= 0) { change_paragraph_style(me, styles[HTML_DLC]); } else if (me->List_Nesting_Level >= 6) { change_paragraph_style(me, styles[HTML_DLC6]); } else { change_paragraph_style(me, styles[(HTML_DLC1 - 1) + me->List_Nesting_Level]); } UPDATE_STYLE; /* update to the new style */ CHECK_ID(HTML_DL_ID); break; case HTML_DT: CHECK_ID(HTML_GEN_ID); if (!me->style_change) { BOOL in_line_1 = HText_inLineOne(me->text); HTCoord saved_spaceBefore = me->sp->style->spaceBefore; HTCoord saved_spaceAfter = me->sp->style->spaceAfter; /* * If there are several DT elements and this is not the first, and * the preceding DT element's first (and normally only) line has * not yet been ended, suppress intervening blank line by * temporarily modifying the paragraph style in place. Ugly but * there's ample precedence. - kw */ if (in_line_1) { me->sp->style->spaceBefore = 0; /* temporary change */ me->sp->style->spaceAfter = 0; /* temporary change */ } HText_appendParagraph(me->text); me->sp->style->spaceBefore = saved_spaceBefore; /* undo */ me->sp->style->spaceAfter = saved_spaceAfter; /* undo */ me->in_word = NO; me->sp->style->alignment = HT_LEFT; } me->inP = FALSE; break; case HTML_DD: CHECK_ID(HTML_GEN_ID); HText_setLastChar(me->text, ' '); /* absorb white space */ if (!me->style_change) { if (!HText_LastLineEmpty(me->text, FALSE)) { HText_appendCharacter(me->text, '\r'); } else { HText_NegateLineOne(me->text); } } else { UPDATE_STYLE; HText_appendCharacter(me->text, '\t'); } me->sp->style->alignment = HT_LEFT; me->in_word = NO; me->inP = FALSE; break; case HTML_OL: /* * Set the default TYPE. */ me->OL_Type[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = '1'; /* * Check whether we have a starting sequence number, or want to * continue the numbering from a previous OL in this nest. - FM */ if (present && (present[HTML_OL_SEQNUM] || present[HTML_OL_START])) { int seqnum; /* * Give preference to the valid HTML 3.0 SEQNUM attribute name over * the Netscape START attribute name (too bad the Netscape * developers didn't read the HTML 3.0 specs before re-inventing * the "wheel" as "we'll"). - FM */ if (present[HTML_OL_SEQNUM] && non_empty(value[HTML_OL_SEQNUM])) { seqnum = atoi(value[HTML_OL_SEQNUM]); } else if (present[HTML_OL_START] && non_empty(value[HTML_OL_START])) { seqnum = atoi(value[HTML_OL_START]); } else { seqnum = 1; } /* * Don't allow negative numbers less than or equal to our flags, or * numbers less than 1 if an Alphabetic or Roman TYPE. - FM */ if (present[HTML_OL_TYPE] && value[HTML_OL_TYPE]) { if (*value[HTML_OL_TYPE] == 'A') { me->OL_Type[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = 'A'; if (seqnum < 1) seqnum = 1; } else if (*value[HTML_OL_TYPE] == 'a') { me->OL_Type[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = 'a'; if (seqnum < 1) seqnum = 1; } else if (*value[HTML_OL_TYPE] == 'I') { me->OL_Type[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = 'I'; if (seqnum < 1) seqnum = 1; } else if (*value[HTML_OL_TYPE] == 'i') { me->OL_Type[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = 'i'; if (seqnum < 1) seqnum = 1; } else { if (seqnum <= OL_VOID) seqnum = OL_VOID + 1; } } else if (seqnum <= OL_VOID) { seqnum = OL_VOID + 1; } me->OL_Counter[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = seqnum; } else if (present && present[HTML_OL_CONTINUE]) { me->OL_Counter[me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11] = OL_CONTINUE; } else { me->OL_Counter[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = 1; if (present && present[HTML_OL_TYPE] && value[HTML_OL_TYPE]) { if (*value[HTML_OL_TYPE] == 'A') { me->OL_Type[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = 'A'; } else if (*value[HTML_OL_TYPE] == 'a') { me->OL_Type[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = 'a'; } else if (*value[HTML_OL_TYPE] == 'I') { me->OL_Type[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = 'I'; } else if (*value[HTML_OL_TYPE] == 'i') { me->OL_Type[(me->List_Nesting_Level < 11 ? me->List_Nesting_Level + 1 : 11)] = 'i'; } } } me->List_Nesting_Level++; if (me->List_Nesting_Level <= 0) { change_paragraph_style(me, styles[ElementNumber]); } else if (me->List_Nesting_Level >= 6) { change_paragraph_style(me, styles[HTML_OL6]); } else { change_paragraph_style(me, styles[HTML_OL1 + me->List_Nesting_Level - 1]); } UPDATE_STYLE; /* update to the new style */ CHECK_ID(HTML_OL_ID); break; case HTML_UL: me->List_Nesting_Level++; if (me->List_Nesting_Level <= 0) { if (!(present && present[HTML_UL_PLAIN]) && !(present && present[HTML_UL_TYPE] && value[HTML_UL_TYPE] && 0 == strcasecomp(value[HTML_UL_TYPE], "PLAIN"))) { change_paragraph_style(me, styles[ElementNumber]); } else { change_paragraph_style(me, styles[HTML_DIR]); ElementNumber = HTML_DIR; } } else if (me->List_Nesting_Level >= 6) { if (!(present && present[HTML_UL_PLAIN]) && !(present && present[HTML_UL_TYPE] && value[HTML_UL_TYPE] && 0 == strcasecomp(value[HTML_UL_TYPE],
#
#
#           The Nim Compiler
#        (c) Copyright 2015 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

# This module implements the parser of the standard Nim syntax.
# The parser strictly reflects the grammar ("doc/grammar.txt"); however
# it uses several helper routines to keep the parser small. A special
# efficient algorithm is used for the precedence levels. The parser here can
# be seen as a refinement of the grammar, as it specifies how the AST is built
# from the grammar and how comments belong to the AST.


# In fact the grammar is generated from this file:
when isMainModule:
  import pegs
  var outp = open("doc/grammar.txt", fmWrite)
  for line in lines("compiler/parser.nim"):
    if line =~ peg" \s* '#| ' {.*}":
      outp.write matches[0], "\L"
  outp.close

import
  llstream, lexer, idents, strutils, ast, astalgo, msgs

type
  TParser*{.final.} = object  # A TParser object represents a module that
                              # is being parsed
    currInd: int              # current indentation level
    firstTok, strongSpaces: bool # Has the first token been read?
                                 # Is strongSpaces on?
    lex*: TLexer              # The lexer that is used for parsing
    tok*: TToken              # The current token
    inPragma: int             # Pragma level
    inSemiStmtList: int

proc parseAll*(p: var TParser): PNode
proc closeParser*(p: var TParser)
proc parseTopLevelStmt*(p: var TParser): PNode

# helpers for the other parsers
proc isOperator*(tok: TToken): bool
proc getTok*(p: var TParser)
proc parMessage*(p: TParser, msg: TMsgKind, arg: string = "")
proc skipComment*(p: var TParser, node: PNode)
proc newNodeP*(kind: TNodeKind, p: TParser): PNode
proc newIntNodeP*(kind: TNodeKind, intVal: BiggestInt, p: TParser): PNode
proc newFloatNodeP*(kind: TNodeKind, floatVal: BiggestFloat, p: TParser): PNode
proc newStrNodeP*(kind: TNodeKind, strVal: string, p: TParser): PNode
proc newIdentNodeP*(ident: PIdent, p: TParser): PNode
proc expectIdentOrKeyw*(p: TParser)
proc expectIdent*(p: TParser)
proc parLineInfo*(p: TParser): TLineInfo
proc eat*(p: var TParser, tokType: TTokType)
proc skipInd*(p: var TParser)
proc optPar*(p: var TParser)
proc optInd*(p: var TParser, n: PNode)
proc indAndComment*(p: var TParser, n: PNode)
proc setBaseFlags*(n: PNode, base: TNumericalBase)
proc parseSymbol*(p: var TParser, allowNil = false): PNode
proc parseTry(p: var TParser; isExpr: bool): PNode
proc parseCase(p: var TParser): PNode
proc parseStmtPragma(p: var TParser): PNode
# implementation

proc getTok(p: var TParser) =
  ## Get the next token from the parser's lexer, and store it in the parser's
  ## `tok` member.
  rawGetTok(p.lex, p.tok)

proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream,
                 strongSpaces=false) =
  ## Open a parser, using the given arguments to set up its internal state.
  ##
  initToken(p.tok)
  openLexer(p.lex, fileIdx, inputStream)
  getTok(p)                   # read the first token
  p.firstTok = true
  p.strongSpaces = strongSpaces

proc openParser*(p: var TParser, filename: string, inputStream: PLLStream,
                 strongSpaces=false) =
  openParser(p, filename.fileInfoIdx, inputStream, strongSpaces)

proc closeParser(p: var TParser) =
  ## Close a parser, freeing up its resources.
  closeLexer(p.lex)

proc parMessage(p: TParser, msg: TMsgKind, arg = "") =
  ## Produce and emit the parser message `arg` to output.
  lexMessageTok(p.lex, msg, p.tok, arg)

proc parMessage(p: TParser, msg: TMsgKind, tok: TToken) =
  ## Produce and emit a parser message to output about the token `tok`
  parMessage(p, msg, prettyTok(tok))

template withInd(p: expr, body: stmt) {.immediate.} =
  let oldInd = p.currInd
  p.currInd = p.tok.indent
  body
  p.currInd = oldInd

template realInd(p): bool = p.tok.indent > p.currInd
template sameInd(p): bool = p.tok.indent == p.currInd
template sameOrNoInd(p): bool = p.tok.indent == p.currInd or p.tok.indent < 0

proc rawSkipComment(p: var TParser, node: PNode) =
  if p.tok.tokType == tkComment:
    if node != nil:
      if node.comment == nil: node.comment = ""
      add(node.comment, p.tok.literal)
    else:
      parMessage(p, errInternal, "skipComment")
    getTok(p)

proc skipComment(p: var TParser, node: PNode) =
  if p.tok.indent < 0: rawSkipComment(p, node)

proc flexComment(p: var TParser, node: PNode) =
  if p.tok.indent < 0 or realInd(p): rawSkipComment(p, node)

proc skipInd(p: var TParser) =
  if p.tok.indent >= 0:
    if not realInd(p): parMessage(p, errInvalidIndentation)

proc optPar(p: var TParser) =
  if p.tok.indent >= 0:
    if p.tok.indent < p.currInd: parMessage(p, errInvalidIndentation)

proc optInd(p: var TParser, n: PNode) =
  skipComment(p, n)
  skipInd(p)

proc getTokNoInd(p: var TParser) =
  getTok(p)
  if p.tok.indent >= 0: parMessage(p, errInvalidIndentation)

proc expectIdentOrKeyw(p: TParser) =
  if p.tok.tokType != tkSymbol and not isKeyword(p.tok.tokType):
    lexMessage(p.lex, errIdentifierExpected, prettyTok(p.tok))

proc expectIdent(p: TParser) =
  if p.tok.tokType != tkSymbol:
    lexMessage(p.lex, errIdentifierExpected, prettyTok(p.tok))

proc eat(p: var TParser, tokType: TTokType) =
  ## Move the parser to the next token if the current token is of type
  ## `tokType`, otherwise error.
  if p.tok.tokType == tokType:
    getTok(p)
  else:
    lexMessageTok(p.lex, errTokenExpected, p.tok, TokTypeToStr[tokType])

proc parLineInfo(p: TParser): TLineInfo =
  ## Retrieve the line information associated with the parser's current state.
  result = getLineInfo(p.lex, p.tok)

proc indAndComment(p: var TParser, n: PNode) =
  if p.tok.indent > p.currInd:
    if p.tok.tokType == tkComment: rawSkipComment(p, n)
    else: parMessage(p, errInvalidIndentation)
  else:
    skipComment(p, n)

proc newNodeP(kind: TNodeKind, p: TParser): PNode =
  result = newNodeI(kind, parLineInfo(p))

proc newIntNodeP(kind: TNodeKind, intVal: BiggestInt, p: TParser): PNode =
  result = newNodeP(kind, p)
  result.intVal = intVal

proc newFloatNodeP(kind: TNodeKind, floatVal: BiggestFloat,
                   p: TParser): PNode =
  result = newNodeP(kind, p)
  result.floatVal = floatVal

proc newStrNodeP(kind: TNodeKind, strVal: string, p: TParser): PNode =
  result = newNodeP(kind, p)
  result.strVal = strVal

proc newIdentNodeP(ident: PIdent, p: TParser): PNode =
  result = newNodeP(nkIdent, p)
  result.ident = ident

proc parseExpr(p: var TParser): PNode
proc parseStmt(p: var TParser): PNode
proc parseTypeDesc(p: var TParser): PNode
proc parseDoBlocks(p: var TParser, call: PNode)
proc parseParamList(p: var TParser, retColon = true): PNode

proc isSigilLike(tok: TToken): bool {.inline.} =
  result = tok.tokType == tkOpr and tok.ident.s[0] == '@'

proc isRightAssociative(tok: TToken): bool {.inline.} =
  ## Determines whether the token is right assocative.
  result = tok.tokType == tkOpr and tok.ident.s[0] == '^'
  # or (let L = tok.ident.s.len; L > 1 and tok.ident.s[L-1] == '>'))

proc getPrecedence(tok: TToken, strongSpaces: bool): int =
  ## Calculates the precedence of the given token.
  template considerStrongSpaces(x): expr =
    x + (if strongSpaces: 100 - tok.strongSpaceA.int*10 else: 0)

  case tok.tokType
  of tkOpr:
    let L = tok.ident.s.len
    let relevantChar = tok.ident.s[0]

    # arrow like?
    if L > 1 and tok.ident.s[L-1] == '>' and
      tok.ident.s[L-2] in {'-', '~', '='}: return considerStrongSpaces(1)

    template considerAsgn(value: expr) =
      result = if tok.ident.s[L-1] == '=': 1 else: value

    case relevantChar
    of '$', '^': considerAsgn(10)
    of '*', '%', '/', '\\': considerAsgn(9)
    of '~': result = 8
    of '+', '-', '|': considerAsgn(8)
    of '&': considerAsgn(7)
    of '=', '<', '>', '!': result = 5
    of '.': considerAsgn(6)
    of '?': result = 2
    else: considerAsgn(2)
  of tkDiv, tkMod, tkShl, tkShr: result = 9
  of tkIn, tkNotin, tkIs, tkIsnot, tkNot, tkOf, tkAs: result = 5
  of tkDotDot: result = 6
  of tkAnd: result = 4
  of tkOr, tkXor, tkPtr, tkRef: result = 3
  else: return -10
  result = considerStrongSpaces(result)

proc isOperator(tok: TToken): bool =
  ## Determines if the given token is an operator type token.
  tok.tokType in {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs,
                  tkIsnot, tkNot, tkOf, tkAs, tkDotDot, tkAnd, tkOr, tkXor}

proc isUnary(p: TParser): bool =
  ## Check if the current parser token is a unary operator
  if p.tok.tokType in {tkOpr, tkDotDot} and
     p.tok.strongSpaceB == 0 and
     p.tok.strongSpaceA > 0:
      result = true
      # versions prior to 0.13.0 used to do this:
      when false:
        if p.strongSpaces:
          result = true
        else:
          parMessage(p, warnDeprecated,
            "will be parsed as unary operator; inconsistent spacing")

proc checkBinary(p: TParser) {.inline.} =
  ## Check if the current parser token is a binary operator.
  # we don't check '..' here as that's too annoying
  if p.strongSpaces and p.tok.tokType == tkOpr:
    if p.tok.strongSpaceB > 0 and p.tok.strongSpaceA != p.tok.strongSpaceB:
      parMessage(p, errGenerated,
                 "Number of spaces around '$#' not consistent" %
                 prettyTok(p.tok))
    elif p.tok.strongSpaceA notin {0,1,2,4,8}:
      parMessage(p, errGenerated, "Number of spaces must be 0,1,2,4 or 8")

#| module = stmt ^* (';' / IND{=})
#|
#| comma = ',' COMMENT?
#| semicolon = ';' COMMENT?
#| colon = ':' COMMENT?
#| colcom = ':' COMMENT?
#|
#| operator =  OP0 | OP1 | OP2 | OP3 | OP4 | OP5 | OP6 | OP7 | OP8 | OP9
#|          | 'or' | 'xor' | 'and'
#|          | 'is' | 'isnot' | 'in' | 'notin' | 'of'
#|          | 'div' | 'mod' | 'shl' | 'shr' | 'not' | 'static' | '..'
#|
#| prefixOperator = operator
#|
#| optInd = COMMENT?
#| optPar = (IND{>} | IND{=})?
#|
#| simpleExpr = arrowExpr (OP0 optInd arrowExpr)*
#| arrowExpr = assignExpr (OP1 optInd assignExpr)*
#| assignExpr = orExpr (OP2 optInd orExpr)*
#| orExpr = andExpr (OP3 optInd andExpr)*
#| andExpr = cmpExpr (OP4 optInd cmpExpr)*
#| cmpExpr = sliceExpr (OP5 optInd sliceExpr)*
#| sliceExpr = ampExpr (OP6 optInd ampExpr)*
#| ampExpr = plusExpr (OP7 optInd plusExpr)*
#| plusExpr = mulExpr (OP8 optInd mulExpr)*
#| mulExpr = dollarExpr (OP9 optInd dollarExpr)*
#| dollarExpr = primary (OP10 optInd primary)*

proc colcom(p: var TParser, n: PNode) =
  eat(p, tkColon)
  skipComment(p, n)

proc parseSymbol(p: var TParser, allowNil = false): PNode =
  #| symbol = '`' (KEYW|IDENT|literal|(operator|'('|')'|'['|']'|'{'|'}'|'=')+)+ '`'
  #|        | IDENT | 'addr' | 'type'
  case p.tok.tokType
  of tkSymbol, tkAddr, tkType:
    result = newIdentNodeP(p.tok.ident, p)
    getTok(p)
  of tkAccent:
    result = newNodeP(nkAccQuoted, p)
    getTok(p)
    while true:
      case p.tok.tokType
      of tkAccent:
        if result.len == 0:
          parMessage(p, errIdentifierExpected, p.tok)
        break
      of tkOpr, tkDot, tkDotDot, tkEquals, tkParLe..tkParDotRi:
        var accm = ""
        while p.tok.tokType in {tkOpr, tkDot, tkDotDot, tkEquals,
                                tkParLe..tkParDotRi}:
          accm.add(tokToStr(p.tok))
          getTok(p)
        result.add(newIdentNodeP(getIdent(accm), p))
      of tokKeywordLow..tokKeywordHigh, tkSymbol, tkIntLit..tkCharLit:
        result.add(newIdentNodeP(getIdent(tokToStr(p.tok)), p))
        getTok(p)
      else:
        parMessage(p, errIdentifierExpected, p.tok)
    eat(p, tkAccent)
  else:
    if allowNil and p.tok.tokType == tkNil:
      result = newNodeP(nkNilLit, p)
      getTok(p)
    else:
      parMessage(p, errIdentifierExpected, p.tok)
      # BUGFIX: We must consume a token here to prevent endless loops!
      # But: this really sucks for idetools and keywords, so we don't do it
      # if it is a keyword:
      if not isKeyword(p.tok.tokType): getTok(p)
      result = ast.emptyNode

proc indexExpr(p: var TParser): PNode =
  #| indexExpr = expr
  result = parseExpr(p)

proc indexExprList(p: var TParser, first: PNode, k: TNodeKind,
                   endToken: TTokType): PNode =
  #| indexExprList = indexExpr ^+ comma
  result = newNodeP(k, p)
  addSon(result, first)
  getTok(p)
  optInd(p, result)
  while p.tok.tokType notin {endToken, tkEof}:
    var a = indexExpr(p)
    addSon(result, a)
    if p.tok.tokType != tkComma: break
    getTok(p)
    skipComment(p, a)
  optPar(p)
  eat(p, endToken)

proc colonOrEquals(p: var TParser, a: PNode): PNode =
  if p.tok.tokType == tkColon:
    result = newNodeP(nkExprColonExpr, p)
    getTok(p)
    #optInd(p, result)
    addSon(result, a)
    addSon(result, parseExpr(p))
  elif p.tok.tokType == tkEquals:
    result = newNodeP(nkExprEqExpr, p)
    getTok(p)
    #optInd(p, result)
    addSon(result, a)
    addSon(result, parseExpr(p))
  else:
    result = a

proc exprColonEqExpr(p: var TParser): PNode =
  #| exprColonEqExpr = expr (':'|'=' expr)?
  var a = parseExpr(p)
  result = colonOrEquals(p, a)

proc exprList(p: var TParser, endTok: TTokType, result: PNode) =
  #| exprList = expr ^+ comma
  getTok(p)
  optInd(p, result)
  while (p.tok.tokType != endTok) and (p.tok.tokType != tkEof):
    var a = parseExpr(p)
    addSon(result, a)
    if p.tok.tokType != tkComma: break
    getTok(p)
    optInd(p, a)

proc dotExpr(p: var TParser, a: PNode): PNode =
  #| dotExpr = expr '.' optInd symbol
  var info = p.parLineInfo
  getTok(p)
  result = newNodeI(nkDotExpr, info)
  optInd(p, result)
  addSon(result, a)
  addSon(result, parseSymbol(p))

proc qualifiedIdent(p: var TParser): PNode =
  #| qualifiedIdent = symbol ('.' optInd symbol)?
  result = parseSymbol(p)
  if p.tok.tokType == tkDot: result = dotExpr(p, result)

proc exprColonEqExprListAux(p: var TParser, endTok: TTokType, result: PNode) =
  assert(endTok in {tkCurlyRi, tkCurlyDotRi, tkBracketRi, tkParRi})
  getTok(p)
  optInd(p, result)
  while p.tok.tokType != endTok and p.tok.tokType != tkEof:
    var a = exprColonEqExpr(p)
    addSon(result, a)
    if p.tok.tokType != tkComma: break
    getTok(p)
    skipComment(p, a)
  optPar(p)
  eat(p, endTok)

proc exprColonEqExprList(p: var TParser, kind: TNodeKind,
                         endTok: TTokType): PNode =
  #| exprColonEqExprList = exprColonEqExpr (comma exprColonEqExpr)* (comma)?
  result = newNodeP(kind, p)
  exprColonEqExprListAux(p, endTok, result)

proc setOrTableConstr(p: var TParser): PNode =
  #| setOrTableConstr = '{' ((exprColonEqExpr comma)* | ':' ) '}'
  result = newNodeP(nkCurly, p)
  getTok(p) # skip '{'
  optInd(p, result)
  if p.tok.tokType == tkColon:
    getTok(p) # skip ':'
    result.kind = nkTableConstr
  else:
    while p.tok.tokType notin {tkCurlyRi, tkEof}:
      var a = exprColonEqExpr(p)
      if a.kind == nkExprColonExpr: result.kind = nkTableConstr
      addSon(result, a)
      if p.tok.tokType != tkComma: break
      getTok(p)
      skipComment(p, a)
  optPar(p)
  eat(p, tkCurlyRi) # skip '}'

proc parseCast(p: var TParser): PNode =
  #| castExpr = 'cast' '[' optInd typeDesc optPar ']' '(' optInd expr optPar ')'
  result = newNodeP(nkCast, p)
  getTok(p)
  eat(p, tkBracketLe)
  optInd(p, result)
  addSon(result, parseTypeDesc(p))
  optPar(p)
  eat(p, tkBracketRi)
  eat(p, tkParLe)
  optInd(p, result)
  addSon(result, parseExpr(p))
  optPar(p)
  eat(p, tkParRi)

proc setBaseFlags(n: PNode, base: TNumericalBase) =
  case base
  of base10: discard
  of base2: incl(n.flags, nfBase2)
  of base8: incl(n.flags, nfBase8)
  of base16: incl(n.flags, nfBase16)

proc parseGStrLit(p: var TParser, a: PNode): PNode =
  case p.tok.tokType
  of tkGStrLit:
    result = newNodeP(nkCallStrLit, p)
    addSon(result, a)
    addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p))
    getTok(p)
  of tkGTripleStrLit:
    result = newNodeP(nkCallStrLit, p)
    addSon(result, a)
    addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p))
    getTok(p)
  else:
    result = a

type
  TPrimaryMode = enum pmNormal, pmTypeDesc, pmTypeDef, pmSkipSuffix

proc complexOrSimpleStmt(p: var TParser): PNode
proc simpleExpr(p: var TParser, mode = pmNormal): PNode

proc semiStmtList(p: var TParser, result: PNode) =
  inc p.inSemiStmtList
  result.add(complexOrSimpleStmt(p))
  while p.tok.tokType == tkSemiColon:
    getTok(p)
    optInd(p, result)
    result.add(complexOrSimpleStmt(p))
  dec p.inSemiStmtList
  result.kind = nkStmtListExpr

proc parsePar(p: var TParser): PNode =
  #| parKeyw = 'discard' | 'include' | 'if' | 'while' | 'case' | 'try'
  #|         | 'finally' | 'except' | 'for' | 'block' | 'const' | 'let'
  #|         | 'when' | 'var' | 'mixin'
  #| par = '(' optInd
  #|           ( &parKeyw complexOrSimpleStmt ^+ ';'
  #|           | ';' complexOrSimpleStmt ^+ ';'
  #|           | pragmaStmt
  #|           | simpleExpr ( ('=' expr (';' complexOrSimpleStmt ^+ ';' )? )
  #|                        | (':' expr (',' exprColonEqExpr     ^+ ',' )? ) ) )
  #|           optPar ')'
  #
  # unfortunately it's ambiguous: (expr: expr) vs (exprStmt); however a
  # leading ';' could be used to enforce a 'stmt' context ...
  result = newNodeP(nkPar, p)
  getTok(p)
  optInd(p, result)
  if p.tok.tokType in {tkDiscard, tkInclude, tkIf, tkWhile, tkCase,
                       tkTry, tkDefer, tkFinally, tkExcept, tkFor, tkBlock,
                       tkConst, tkLet, tkWhen, tkVar,
                       tkMixin}:
    # XXX 'bind' used to be an expression, so we exclude it here;
    # tests/reject/tbind2 fails otherwise.
    semiStmtList(p, result)
  elif p.tok.tokType == tkSemiColon:
    # '(;' enforces 'stmt' context:
    getTok(p)
    optInd(p, result)
    semiStmtList(p, result)
  elif p.tok.tokType == tkCurlyDotLe:
    result.add(parseStmtPragma(p))
  elif p.tok.tokType != tkParRi:
    var a = simpleExpr(p)
    if p.tok.tokType == tkEquals:
      # special case: allow assignments
      getTok(p)
      optInd(p, result)
      let b = parseExpr(p)
      let asgn = newNodeI(nkAsgn, a.info, 2)
      asgn.sons[0] = a
      asgn.sons[1] = b
      result.add(asgn)
      if p.tok.tokType == tkSemiColon:
        semiStmtList(p, result)
    elif p.tok.tokType == tkSemiColon:
      # stmt context:
      result.add(a)
      semiStmtList(p, result)
    else:
      a = colonOrEquals(p, a)
      result.add(a)
      if p.tok.tokType == tkComma:
        getTok(p)
        skipComment(p, a)
        while p.tok.tokType != tkParRi and p.tok.tokType != tkEof:
          var a = exprColonEqExpr(p)
          addSon(result, a)
          if p.tok.tokType != tkComma: break
          getTok(p)
          skipComment(p, a)
  optPar(p)
  eat(p, tkParRi)

proc identOrLiteral(p: var TParser, mode: TPrimaryMode): PNode =
  #| literal = | INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT
  #|           | UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
  #|           | FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
  #|           | STR_LIT | RSTR_LIT | TRIPLESTR_LIT
  #|           | CHAR_LIT
  #|           | NIL
  #| generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
  #| identOrLiteral = generalizedLit | symbol | literal
  #|                | par | arrayConstr | setOrTableConstr
  #|                | castExpr
  #| tupleConstr = '(' optInd (exprColonEqExpr comma?)* optPar ')'
  #| arrayConstr = '[' optInd (exprColonEqExpr comma?)* optPar ']'
  case p.tok.tokType
  of tkSymbol, tkType, tkAddr:
    result = newIdentNodeP(p.tok.ident, p)
    getTok(p)
    result = parseGStrLit(p, result)
  of tkAccent:
    result = parseSymbol(p)       # literals
  of tkIntLit:
    result = newIntNodeP(nkIntLit, p.tok.iNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkInt8Lit:
    result = newIntNodeP(nkInt8Lit, p.tok.iNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkInt16Lit:
    result = newIntNodeP(nkInt16Lit, p.tok.iNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkInt32Lit:
    result = newIntNodeP(nkInt32Lit, p.tok.iNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkInt64Lit:
    result = newIntNodeP(nkInt64Lit, p.tok.iNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkUIntLit:
    result = newIntNodeP(nkUIntLit, p.tok.iNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkUInt8Lit:
    result = newIntNodeP(nkUInt8Lit, p.tok.iNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkUInt16Lit:
    result = newIntNodeP(nkUInt16Lit, p.tok.iNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkUInt32Lit:
    result = newIntNodeP(nkUInt32Lit, p.tok.iNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkUInt64Lit:
    result = newIntNodeP(nkUInt64Lit, p.tok.iNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkFloatLit:
    result = newFloatNodeP(nkFloatLit, p.tok.fNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkFloat32Lit:
    result = newFloatNodeP(nkFloat32Lit, p.tok.fNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkFloat64Lit:
    result = newFloatNodeP(nkFloat64Lit, p.tok.fNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkFloat128Lit:
    result = newFloatNodeP(nkFloat128Lit, p.tok.fNumber, p)
    setBaseFlags(result, p.tok.base)
    getTok(p)
  of tkStrLit:
    result = newStrNodeP(nkStrLit, p.tok.literal, p)
    getTok(p)
  of tkRStrLit:
    result = newStrNodeP(nkRStrLit, p.tok.literal, p)
    getTok(p)
  of tkTripleStrLit:
    result = newStrNodeP(nkTripleStrLit, p.tok.literal, p)
    getTok(p)
  of tkCharLit:
    result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p)
    getTok(p)
  of tkNil:
    result = newNodeP(nkNilLit, p)
    getTok(p)
  of tkParLe:
    # () constructor
    if mode in {pmTypeDesc, pmTypeDef}:
      result = exprColonEqExprList(p, nkPar, tkParRi)
    else:
      result = parsePar(p)
  of tkCurlyLe:
    # {} constructor
    result = setOrTableConstr(p)
  of tkBracketLe:
    # [] constructor
    result = exprColonEqExprList(p, nkBracket, tkBracketRi)
  of tkCast:
    result = parseCast(p)
  else:
    parMessage(p, errExprExpected, p.tok)
    getTok(p)  # we must consume a token here to prevend endless loops!
    result = ast.emptyNode

proc namedParams(p: var TParser, callee: PNode,
                 kind: TNodeKind, endTok: TTokType): PNode =
  let a = callee
  result = newNodeP(kind, p)
  addSon(result, a)
  exprColonEqExprListAux(p, endTok, result)

proc parseMacroColon(p: var TParser, x: PNode): PNode
proc primarySuffix(p: var TParser, r: PNode, baseIndent: int): PNode =
  #| primarySuffix = '(' (exprColonEqExpr comma?)* ')' doBlocks?
  #|       | doBlocks
  #|       | '.' optInd symbol generalizedLit?
  #|       | '[' optInd indexExprList optPar ']'