/* * $LynxId: LYPrettySrc.c,v 1.25 2010/10/27 00:15:12 tom Exp $ * * HTML source syntax highlighting * by Vlad Harchev * March 1999 */ #include #include #include #include #include /* This file creates too many "leak detected" entries in Lynx.leaks. */ #define NO_MEMORY_TRACKING #include #ifdef USE_PRETTYSRC BOOL psrc_convert_string = FALSE; BOOL psrc_view = FALSE; /* this is read by SGML_put_character - TRUE when viewing pretty source */ BOOLEAN LYpsrc = FALSE; /* this tells what will be shown on '\': if TRUE, then pretty source, normal source view otherwise. Toggled by -prettysrc commandline option. */ BOOL sgml_in_psrc_was_initialized; BOOL psrc_nested_call; BOOL psrc_first_tag; BOOL mark_htext_as_source = FALSE; /* tagspecs from lynx.cfg are read here. After .lss file is read (is with lss support), the style cache and markup are created before entering the mainloop. */ BOOLEAN psrcview_no_anchor_numbering = FALSE; static const char *HTL_tagspecs_defaults[HTL_num_lexemes] = { /* these values are defaults. They are also listed in comments of distibution's lynx.cfg. */ #ifdef USE_COLOR_STYLE "span.htmlsrc_comment:!span", "span.htmlsrc_tag:!span", "span.htmlsrc_attrib:!span", "span.htmlsrc_attrval:!span", "span.htmlsrc_abracket:!span", "span.htmlsrc_entity:!span", "span.htmlsrc_href:!span", "span.htmlsrc_entire:!span", "span.htmlsrc_badseq:!span", "span.htmlsrc_badtag:!span", "span.htmlsrc_badattr:!span", "span.htmlsrc_sgmlspecial:!span" #else "b:!b", /* comment */ "b:!b", /* tag */ "b:!b", /* attrib */ ":", /* attrval */ "b:!b", /* abracket */ "b:!b", /* entity */ ":", /* href */ ":", /* entire */ "b:!b", /* badseq */ ":", /* badtag */ ":", /* badattr */ "b:!b" /* sgmlspec */ #endif }; char *HTL_tagspecs[HTL_num_lexemes]; /* these are pointers since tagspec can be empty (the pointer will be NULL in that case) */ HT_tagspec *lexeme_start[HTL_num_lexemes]; HT_tagspec *lexeme_end[HTL_num_lexemes]; int tagname_transform = 2; int attrname_transform = 2; static int html_src_tag_index(char *tagname) { HTTag *tag = SGMLFindTag(&HTML_dtd, tagname); return (tag && tag != &HTTag_unrecognized) ? (int) (tag - HTML_dtd.tags) : -1; } typedef enum { HTSRC_CK_normal, HTSRC_CK_seen_excl, HTSRC_CK_after_tagname, HTSRC_CK_seen_dot } html_src_check_state; static void append_close_tag(char *tagname, HT_tagspec ** head, HT_tagspec ** tail) { int idx, nattr; HTTag *tag; HT_tagspec *subj; idx = html_src_tag_index(tagname); tag = HTML_dtd.tags + idx; nattr = tag->number_of_attributes; if (idx == -1) { fprintf(stderr, "internal error: previous check didn't find bad HTML tag %s", tagname); exit_immediately(EXIT_FAILURE); } subj = typecalloc(HT_tagspec); subj->element = (HTMLElement) idx; subj->present = typecallocn(BOOL, (unsigned) nattr); subj->value = typecallocn(char *, (unsigned) nattr); subj->start = FALSE; #ifdef USE_COLOR_STYLE subj->class_name = NULL; #endif if (!*head) { *head = subj; *tail = subj; } else { (*tail)->next = subj; *tail = subj; } } /* this will allocate node, initialize all members, and node append to the list, possibly modifying head and modifying tail */ static void append_open_tag(char *tagname, char *classname GCC_UNUSED, HT_tagspec ** head, HT_tagspec ** tail) { HT_tagspec *subj; #ifdef USE_COLOR_STYLE int hcode; #endif append_close_tag(tagname, head, tail); /* initialize common members */ subj = *tail; subj->start = TRUE; #ifdef USE_COLOR_STYLE hcode = hash_code_lowercase_on_fly(tagname); if (non_empty(classname)) { # if 0 /* * we don't provide a classname as attribute of that tag, since for * plain formatting tags they are not used directly for anything except * style - and we provide style value directly. */ HTTag *tag = HTML_dtd.tags + subj->element; int class_attr_idx = 0; int n = tag->number_of_attributes; attr *attrs = tag->attributes; /*.... */ /* this is not implemented though it's easy */ # endif hcode = hash_code_aggregate_char('.', hcode); hcode = hash_code_aggregate_lower_str(classname, hcode); StrAllocCopy(subj->class_name, classname); } else { StrAllocCopy(subj->class_name, ""); } subj->style = hcode; #endif } #define isLeadP(p) ((isalpha(UCH(*p)) || *p == '_')) #define isNextP(p) ((isalnum(UCH(*p)) || *p == '_')) #define FMT_AT " at column %d:\n\t%s\n" #define TXT_AT (int) (1 + p - ts), ts /* returns FALSE if incorrect */ int html_src_parse_tagspec(char *ts, HTlexeme lexeme, int checkonly, int isstart) { BOOL stop = FALSE; BOOL code = FALSE; char *p = ts; char *tagstart = 0; char *tagend = 0; char *classstart; char *classend; char save, save1; char after_excl = FALSE; html_src_check_state state = HTSRC_CK_normal; HT_tagspec *head = NULL; HT_tagspec *tail = NULL; HT_tagspec **slot = (isstart ? lexeme_start : lexeme_end) + lexeme; while (!stop) { switch (state) { case HTSRC_CK_normal: case HTSRC_CK_seen_excl: switch (*p) { case '\0': stop = TRUE; code = TRUE; break; case ' ': case '\t': break; case '!': if (state == HTSRC_CK_seen_excl) { CTRACE2(TRACE_CFG, (tfp, "second '!'" FMT_AT, TXT_AT)); stop = TRUE; break; } state = HTSRC_CK_seen_excl; after_excl = TRUE; break; default: if (!isLeadP(p)) { CTRACE2(TRACE_CFG, (tfp, "no name starting" FMT_AT, TXT_AT)); stop = TRUE; break; } tagstart = p; while (*p && isNextP(p)) ++p; tagend = p--; state = HTSRC_CK_after_tagname; } break; case HTSRC_CK_after_tagname: switch (*p) { case '\0': stop = TRUE; code = TRUE; /* FALLTHRU */ case ' ': /* FALLTHRU */ case '\t': save = *tagend; *tagend = '\0'; classstart = 0; if (checkonly) { int idx = html_src_tag_index(tagstart); CTRACE2(TRACE_CFG, (tfp, "tag index(%s) = %d\n", tagstart, idx)); *tagend = save; if (idx == -1) { stop = TRUE; break; } } else { if (after_excl) append_close_tag(tagstart, &head, &tail); else append_open_tag(tagstart, NULL, &head, &tail); } state = HTSRC_CK_normal; after_excl = FALSE; break; case '.': if (after_excl) { CTRACE2(TRACE_CFG, (tfp, "dot after '!'" FMT_AT, TXT_AT)); stop = TRUE; break; } state = HTSRC_CK_seen_dot; break; default: CTRACE2(TRACE_CFG, (tfp, "unexpected char '%c' after tagname" FMT_AT, *p, TXT_AT)); stop = TRUE; break; } break; case HTSRC_CK_seen_dot: switch (*p) { case ' ': case '\t': break; case '\0': CTRACE2(TRACE_CFG, (tfp, "expected text after dot" FMT_AT, TXT_AT)); stop = TRUE; break; default: if (!isLeadP(p)) { CTRACE2(TRACE_CFG, (tfp, "no name starting" FMT_AT, TXT_AT)); stop = TRUE; break; } classstart = p; while (*p && isNextP(p)) ++p; classend = p--; save = *classend; *classend = '\0'; save1 = *tagend; *tagend = '\0'; if (checkonly) { int idx = html_src_tag_index(tagstart); *tagend = save1; *classend = save; if (idx == -1) return FALSE; } else { append_open_tag(tagstart, classstart, &head, &tail); } state = HTSRC_CK_normal; after_excl = FALSE; break; } /* of switch(*p) */ break; } /* of switch */ ++p; } if (code && !checkonly) *slot = head; return code; } /*this will clean the data associated with lexeme 'l' */ void html_src_clean_item(HTlexeme l) { int i; if (HTL_tagspecs[l]) FREE(HTL_tagspecs[l]); for (i = 0; i < 2; ++i) { HT_tagspec *cur; HT_tagspec **pts = (i ? lexeme_start : lexeme_end) + l; HT_tagspec *ts = *pts; *pts = NULL; while (ts) { FREE(ts->present); FREE(ts->value); #ifdef USE_COLOR_STYLE if (ts->start) { FREE(ts->class_name); } #endif cur = ts; ts = ts->next; FREE(cur); } } } /*this will be registered with atexit*/ void html_src_clean_data(void) { int i; for (i = 0; i < HTL_num_lexemes; ++i) html_src_clean_item((HTlexeme) i); } void html_src_on_lynxcfg_reload(void) { html_src_clean_data(); HTMLSRC_init_caches(TRUE); } static void failed_init(const char *tag, int lexeme) { fprintf(stderr, gettext("parse-error while caching %s tagspec of lexeme %d\n"), tag, lexeme); fprintf(stderr, gettext("Use -trace -trace-mask=8 to see details in log.\n")); exit_immediately(EXIT_FAILURE); } void HTMLSRC_init_caches(int dont_exit) { int i; char *p; char buf[1000]; CTRACE2(TRACE_CFG, (tfp, "HTMLSRC_init_caches(%d tagspecs)\n", HTL_num_lexemes)); for (i = 0; i < HTL_num_lexemes; ++i) { /*we assume that HT_tagspecs was NULLs at when program started */ LYStrNCpy(buf, HTL_tagspecs[i] ? HTL_tagspecs[i] : HTL_tagspecs_defaults[i], sizeof(buf) - 1); StrAllocCopy(HTL_tagspecs[i], buf); CTRACE2(TRACE_CFG, (tfp, "parsing lexeme %d: %s\n", i + 1, buf)); if ((p = strchr(buf, ':')) != 0) *p = '\0'; if (!html_src_parse_tagspec(buf, (HTlexeme) i, FALSE, TRUE) && !dont_exit) { failed_init("1st", i); } if (!html_src_parse_tagspec(p ? p + 1 : NULL, (HTlexeme) i, FALSE, FALSE) && !dont_exit) { failed_init("2nd", i); } } } #endif /* ifdef USE_PRETTYSRC */