/* HTML source syntax highlighting
by Vlad Harchev
March 1999
*/
#include
#include
#include
#include
#include
/* This file creates too many "leak detected" entries in Lynx.leaks. */
#define NO_MEMORY_TRACKING
#include
#ifdef USE_PRETTYSRC
BOOL psrc_convert_string = FALSE;
BOOL psrc_view = FALSE; /* this is read by SGML_put_character - TRUE
when viewing pretty source */
BOOL LYpsrc = FALSE; /* this tells what will be shown on '\':
if TRUE, then pretty source, normal source view otherwise. Toggled by
-prettysrc commandline option. */
BOOL sgml_in_psrc_was_initialized;
BOOL psrc_nested_call;
BOOL psrc_first_tag;
BOOL mark_htext_as_source = FALSE;
/* tagspecs from lynx.cfg are read here. After .lss file is read (is with lss
support), the style cache and markup are created before entering the
mainloop. */
BOOL psrcview_no_anchor_numbering = FALSE;
static char *HTL_tagspecs_defaults[HTL_num_lexemes] =
{
/* these values are defaults. They are also listed in comments of distibution's
lynx.cfg. */
#ifdef USE_COLOR_STYLE
"span.htmlsrc_comment:!span",
"span.htmlsrc_tag:!span",
"span.htmlsrc_attrib:!span",
"span.htmlsrc_attrval:!span",
"span.htmlsrc_abracket:!span",
"span.htmlsrc_entity:!span",
"span.htmlsrc_href:!span",
"span.htmlsrc_entire:!span",
"span.htmlsrc_badseq:!span",
"span.htmlsrc_badtag:!span",
"span.htmlsrc_badattr:!span",
"span.htmlsrc_sgmlspecial:!span"
#else
"b:!b", /* comment */
"b:!b", /* tag */
"b:!b", /* attrib */
":", /* attrval */
"b:!b", /* abracket */
"b:!b", /* entity */
":", /* href */
":", /* entire */
"b:!b", /* badseq */
":", /* badtag */
":", /* badattr */
"b:!b" /* sgmlspec */
#endif
};
char *HTL_tagspecs[HTL_num_lexemes];
/* these are pointers since tagspec can be empty (the pointer will be NULL
in that case) */
HT_tagspec *lexeme_start[HTL_num_lexemes];
HT_tagspec *lexeme_end[HTL_num_lexemes];
int tagname_transform = 2;
int attrname_transform = 2;
static int html_src_tag_index(char *tagname)
{
HTTag *tag = SGMLFindTag(&HTML_dtd, tagname);
return (tag && tag != &HTTag_unrecognized) ? tag - HTML_dtd.tags : -1;
}
typedef enum {
HTSRC_CK_normal,
HTSRC_CK_seen_excl,
HTSRC_CK_after_tagname,
HTSRC_CK_seen_dot
} html_src_check_state;
static void append_close_tag(char *tagname,
HT_tagspec ** head,
HT_tagspec ** tail)
{
int idx, nattr;
HTTag *tag;
HT_tagspec *subj;
idx = html_src_tag_index(tagname);
tag = HTML_dtd.tags + idx;
nattr = tag->number_of_attributes;
if (idx == -1) {
fprintf(stderr,
"internal error: previous check didn't find bad HTML tag %s", tagname);
exit_immediately(EXIT_FAILURE);
}
subj = typecalloc(HT_tagspec);
subj->element = idx;
subj->present = typecallocn(BOOL, nattr);
subj->value = typecallocn(char *, nattr);
subj->start = FALSE;
#ifdef USE_COLOR_STYLE
subj->class_name = NULL;
#endif
if (!*head) {
*head = subj;
*tail = subj;
} else {
(*tail)->next = subj;
*tail = subj;
}
}
/* this will allocate node, initialize all members, and node
append to the list, possibly modifying head and modifying tail */
static void append_open_tag(char *tagname,
char *classname GCC_UNUSED,
HT_tagspec ** head,
HT_tagspec ** tail)
{
HT_tagspec *subj;
HTTag *tag;
#ifdef USE_COLOR_STYLE
int hcode;
#endif
append_close_tag(tagname, head, tail); /* initialize common members */
subj = *tail;
subj->start = TRUE;
tag = HTML_dtd.tags + subj->element;
#ifdef USE_COLOR_STYLE
hcode = hash_code_lowercase_on_fly(tagname);
if (classname && *classname) {
# if 0
/*
* we don't provide a classname as attribute of that tag, since for plain
* formatting tags they are not used directly for anything except style -
* and we provide style value directly.
*/
int class_attr_idx = 0;
int n = tag->number_of_attributes;
attr *attrs = tag->attributes;
/*.... *//* this is not implemented though it's easy */
# endif
hcode = hash_code_aggregate_char('.', hcode);
hcode = hash_code_aggregate_lower_str(classname, hcode);
StrAllocCopy(subj->class_name, classname);
} else {
StrAllocCopy(subj->class_name, "");
}
subj->style = hcode;
#endif
}
/* returns 1 if incorrect */
int html_src_parse_tagspec(char *ts,
HTlexeme lexeme,
BOOL checkonly,
BOOL isstart)
{
char *p = ts;
char *tagstart = 0;
char *tagend = 0;
char *classstart;
char *classend;
char stop = FALSE, after_excl = FALSE;
html_src_check_state state = HTSRC_CK_normal;
HT_tagspec *head = NULL, *tail = NULL;
HT_tagspec **slot = (isstart ? lexeme_start : lexeme_end) + lexeme;
while (!stop) {
switch (state) {
case HTSRC_CK_normal:
case HTSRC_CK_seen_excl:
switch (*p) {
case '\0':
stop = TRUE;
break;
case ' ':
case '\t':
break;
case '!':
if (state == HTSRC_CK_seen_excl)
return 1; /*second '!' */
state = HTSRC_CK_seen_excl;
after_excl = TRUE;
break;
default:
if (isalpha(UCH(*p)) || *p == '_') {
tagstart = p;
while (*p && (isalnum(UCH(*p)) || *p == '_'))
++p;
tagend = p;
state = HTSRC_CK_after_tagname;
} else
return 1;
continue;
}
break;
case HTSRC_CK_after_tagname:
switch (*p) {
case '\0':
stop = TRUE;
/* FALLTHRU */
case ' ':
/* FALLTHRU */
case '\t':
{
char save = *tagend;
*tagend = '\0';
classstart = 0;
if (checkonly) {
int idx = html_src_tag_index(tagstart);
*tagend = save;
if (idx == -1)
return 1;
} else {
if (after_excl)
append_close_tag(tagstart, &head, &tail);
else
append_open_tag(tagstart, NULL, &head, &tail);
}
state = HTSRC_CK_normal;
after_excl = FALSE;
}
break;
case '.':
if (after_excl)
return 1;
state = HTSRC_CK_seen_dot;
break;
default:
return 1;
}
break;
case HTSRC_CK_seen_dot:{
switch (*p) {
case ' ':
case '\t':
break;
case '\0':
return 1;
default:{
char save, save1;
if (isalpha(UCH(*p)) || *p == '_') {
classstart = p;
while (*p && (isalnum(UCH(*p)) || *p == '_'))
++p;
classend = p;
save = *classend;
*classend = '\0';
save1 = *tagend;
*tagend = '\0';
if (checkonly) {
int idx = html_src_tag_index(tagstart);
*tagend = save1;
*classend = save;
if (idx == -1)
return 1;
} else {
append_open_tag(tagstart, classstart, &head, &tail);
}
state = HTSRC_CK_normal;
after_excl = FALSE;
continue;
} else
return 1;
}
} /*of switch(*p) */
break;
} /* of case HTSRC_CK_seen_dot: */
} /* of switch */
++p;
}
if (!checkonly)
*slot = head;
return 0;
}
/*this will clean the data associated with lexeme 'l' */
void html_src_clean_item(HTlexeme l)
{
int i;
if (HTL_tagspecs[l])
FREE(HTL_tagspecs[l]);
for (i = 0; i < 2; ++i) {
HT_tagspec *cur;
HT_tagspec **pts = (i ? lexeme_start : lexeme_end) + l;
HT_tagspec *ts = *pts;
*pts = NULL;
while (ts) {
FREE(ts->present);
FREE(ts->value);
#ifdef USE_COLOR_STYLE
if (ts->start) {
FREE(ts->class_name);
}
#endif
cur = ts;
ts = ts->next;
FREE(cur);
}
}
}
/*this will be registered with atexit*/
void html_src_clean_data(void)
{
int i;
for (i = 0; i < HTL_num_lexemes; ++i)
html_src_clean_item(i);
}
void html_src_on_lynxcfg_reload(void)
{
html_src_clean_data();
HTMLSRC_init_caches(TRUE);
}
void HTMLSRC_init_caches(BOOL dont_exit)
{
int i;
char *p;
char buf[1000];
for (i = 0; i < HTL_num_lexemes; ++i) {
/*we assume that HT_tagspecs was NULLs at when program started */
LYstrncpy(buf,
HTL_tagspecs[i]
? HTL_tagspecs[i]
: HTL_tagspecs_defaults[i],
sizeof(buf) - 1);
StrAllocCopy(HTL_tagspecs[i], buf);
if ((p = strchr(buf, ':')) != 0)
*p = '\0';
if (html_src_parse_tagspec(buf, i, FALSE, TRUE) && !dont_exit) {
fprintf(stderr,
"internal error while caching 1st tagspec of %d lexeme", i);
exit_immediately(EXIT_FAILURE);
}
if (html_src_parse_tagspec(p ? p + 1 : NULL, i, FALSE, FALSE) && !dont_exit) {
fprintf(stderr,
"internal error while caching 2nd tagspec of %d lexeme", i);
exit_immediately(EXIT_FAILURE);
}
}
}
#endif /* ifdef USE_PRETTYSRC */