about summary refs log blame commit diff stats
path: root/src/LYPrettySrc.c
blob: e45c8e005078fc3aa0c06c7e3b51eb5b0fba1f18 (plain) (tree)
1
2
3
4
5
6
7
8
9






                                  
                      
                    
 



                                                                        
                    
                                 






                                                                                                        


                                  

                                  


                                                                               
                                          

                                                     
                                                                                
                













                                    











                                              


      
                                    


                                                                           

                                          
 

                           
 
                                            
 


                                                                          

 
              





                           


                                                

                   

                     

                                      
                              



                                      
                                                                                       
                                       

     
                                  
                        

                                             
 
                        


                            

                 

                     
            

                             




                                                                    



                                                       
 


                     



                      
                                                                               


                       
                                        












                                                                                 


                                                       


                                                     
                                                                

                                                  
                                           




                        
                            



                                           







                                                 

                                                                       


                        




                                
                      






































                                                                 
                                     







                                                                          

                      









                                          
                             





                             
                                         

                                                            
                                           

                                                                         






                                                                       


                                                 
                                              
                                             


                                                                                    

                                                    



                                     
                                                  
                      

                                                                







                     
                                                        
                                    

          
 

                              



                                                               
 
















                                       
                              

          
 



                                         
                                     




                              
                                        

          
            

                   
                                           
                                                                         





                                             
 

                                        


                                                                                
                                           
         


                                                                                      
                                           



         
                                
/* HTML source syntax highlighting
   by Vlad Harchev <hvv@hippo.ru>
   March 1999
*/
#include <HTUtils.h>
#include <LYHash.h>
#include <LYPrettySrc.h>
#include <LYStrings.h>
#include <LYLeaks.h>

 /* This file creates too many "leak detected" entries in Lynx.leaks. */
#define NO_MEMORY_TRACKING
#include <LYLeaks.h>

#ifdef USE_PRETTYSRC
BOOL psrc_convert_string = FALSE;
BOOL psrc_view = FALSE;		/* this is read by SGML_put_character - TRUE

				   when viewing pretty source */
BOOL LYpsrc = FALSE;		/* this tells what will be shown on '\':

				   if TRUE, then pretty source, normal source view otherwise. Toggled by
				   -prettysrc commandline option.  */
BOOL sgml_in_psrc_was_initialized;
BOOL psrc_nested_call;
BOOL psrc_first_tag;
BOOL mark_htext_as_source = FALSE;

  /* tagspecs from lynx.cfg are read here. After .lss file is read (is with lss
     support), the style cache and markup are created before entering the
     mainloop. */
BOOL psrcview_no_anchor_numbering = FALSE;
static char *HTL_tagspecs_defaults[HTL_num_lexemes] =
{
 /* these values are defaults. They are also listed in comments of distibution's
    lynx.cfg. */
#ifdef USE_COLOR_STYLE
    "span.htmlsrc_comment:!span",
    "span.htmlsrc_tag:!span",
    "span.htmlsrc_attrib:!span",
    "span.htmlsrc_attrval:!span",
    "span.htmlsrc_abracket:!span",
    "span.htmlsrc_entity:!span",
    "span.htmlsrc_href:!span",
    "span.htmlsrc_entire:!span",
    "span.htmlsrc_badseq:!span",
    "span.htmlsrc_badtag:!span",
    "span.htmlsrc_badattr:!span",
    "span.htmlsrc_sgmlspecial:!span"
#else
    "b:!b",			/* comment */
    "b:!b",			/* tag     */
    "b:!b",			/* attrib  */
    ":",			/* attrval */
    "b:!b",			/* abracket */
    "b:!b",			/* entity  */
    ":",			/* href    */
    ":",			/* entire  */
    "b:!b",			/* badseq  */
    ":",			/* badtag  */
    ":",			/* badattr */
    "b:!b"			/* sgmlspec */
#endif
};

char *HTL_tagspecs[HTL_num_lexemes];

 /* these are pointers since tagspec can be empty (the pointer will be NULL
    in that case) */
HT_tagspec *lexeme_start[HTL_num_lexemes];
HT_tagspec *lexeme_end[HTL_num_lexemes];

int tagname_transform = 2;
int attrname_transform = 2;

static int html_src_tag_index(char *tagname)
{
    HTTag *tag = SGMLFindTag(&HTML_dtd, tagname);

    return (tag && tag != &HTTag_unrecognized) ? tag - HTML_dtd.tags : -1;
}

typedef enum {
    HTSRC_CK_normal,
    HTSRC_CK_seen_excl,
    HTSRC_CK_after_tagname,
    HTSRC_CK_seen_dot
} html_src_check_state;

static void append_close_tag(char *tagname,
			     HT_tagspec ** head,
			     HT_tagspec ** tail)
{
    int idx, nattr;
    HTTag *tag;
    HT_tagspec *subj;

    idx = html_src_tag_index(tagname);
    tag = HTML_dtd.tags + idx;
    nattr = tag->number_of_attributes;

    if (idx == -1) {
	fprintf(stderr,
		"internal error: previous check didn't find bad HTML tag %s", tagname);
	exit_immediately(EXIT_FAILURE);
    }

    subj = typecalloc(HT_tagspec);
    subj->element = idx;
    subj->present = typecallocn(BOOL, nattr);
    subj->value = typecallocn(char *, nattr);

    subj->start = FALSE;
#ifdef USE_COLOR_STYLE
    subj->class_name = NULL;
#endif

    if (!*head) {
	*head = subj;
	*tail = subj;
    } else {
	(*tail)->next = subj;
	*tail = subj;
    }
}

/* this will allocate node, initialize all members, and node
   append to the list, possibly modifying head and modifying tail */
static void append_open_tag(char *tagname,
			    char *classname GCC_UNUSED,
			    HT_tagspec ** head,
			    HT_tagspec ** tail)
{
    HT_tagspec *subj;
    HTTag *tag;

#ifdef USE_COLOR_STYLE
    int hcode;
#endif

    append_close_tag(tagname, head, tail);	/* initialize common members */
    subj = *tail;
    subj->start = TRUE;

    tag = HTML_dtd.tags + subj->element;

#ifdef USE_COLOR_STYLE
    hcode = hash_code_lowercase_on_fly(tagname);
    if (classname && *classname) {

#  if 0
	/*
	 * we don't provide a classname as attribute of that tag, since for plain
	 * formatting tags they are not used directly for anything except style -
	 * and we provide style value directly.
	 */
	int class_attr_idx = 0;
	int n = tag->number_of_attributes;
	attr *attrs = tag->attributes;

/*.... *//* this is not implemented though it's easy */
#  endif

	hcode = hash_code_aggregate_char('.', hcode);
	hcode = hash_code_aggregate_lower_str(classname, hcode);
	StrAllocCopy(subj->class_name, classname);
    } else {
	StrAllocCopy(subj->class_name, "");
    }
    subj->style = hcode;
#endif
}

/* returns 1 if incorrect */
int html_src_parse_tagspec(char *ts,
			   HTlexeme lexeme,
			   BOOL checkonly,
			   BOOL isstart)
{
    char *p = ts;
    char *tagstart = 0;
    char *tagend = 0;
    char *classstart;
    char *classend;
    char stop = FALSE, after_excl = FALSE;
    html_src_check_state state = HTSRC_CK_normal;
    HT_tagspec *head = NULL, *tail = NULL;
    HT_tagspec **slot = (isstart ? lexeme_start : lexeme_end) + lexeme;

    while (!stop) {
	switch (state) {
	case HTSRC_CK_normal:
	case HTSRC_CK_seen_excl:
	    switch (*p) {
	    case '\0':
		stop = TRUE;
		break;
	    case ' ':
	    case '\t':
		break;
	    case '!':
		if (state == HTSRC_CK_seen_excl)
		    return 1;	/*second '!' */
		state = HTSRC_CK_seen_excl;
		after_excl = TRUE;
		break;
	    default:
		if (isalpha(UCH(*p)) || *p == '_') {
		    tagstart = p;
		    while (*p && (isalnum(UCH(*p)) || *p == '_'))
			++p;
		    tagend = p;
		    state = HTSRC_CK_after_tagname;
		} else
		    return 1;
		continue;
	    }
	    break;
	case HTSRC_CK_after_tagname:
	    switch (*p) {
	    case '\0':
		stop = TRUE;
		/* FALLTHRU */
	    case ' ':
		/* FALLTHRU */
	    case '\t':
		{
		    char save = *tagend;

		    *tagend = '\0';
		    classstart = 0;
		    if (checkonly) {
			int idx = html_src_tag_index(tagstart);

			*tagend = save;
			if (idx == -1)
			    return 1;
		    } else {
			if (after_excl)
			    append_close_tag(tagstart, &head, &tail);
			else
			    append_open_tag(tagstart, NULL, &head, &tail);
		    }
		    state = HTSRC_CK_normal;
		    after_excl = FALSE;
		}
		break;
	    case '.':
		if (after_excl)
		    return 1;
		state = HTSRC_CK_seen_dot;
		break;
	    default:
		return 1;
	    }
	    break;
	case HTSRC_CK_seen_dot:{
		switch (*p) {
		case ' ':
		case '\t':
		    break;
		case '\0':
		    return 1;
		default:{
			char save, save1;

			if (isalpha(UCH(*p)) || *p == '_') {
			    classstart = p;
			    while (*p && (isalnum(UCH(*p)) || *p == '_'))
				++p;
			    classend = p;
			    save = *classend;
			    *classend = '\0';
			    save1 = *tagend;
			    *tagend = '\0';
			    if (checkonly) {
				int idx = html_src_tag_index(tagstart);

				*tagend = save1;
				*classend = save;
				if (idx == -1)
				    return 1;
			    } else {
				append_open_tag(tagstart, classstart, &head, &tail);
			    }
			    state = HTSRC_CK_normal;
			    after_excl = FALSE;
			    continue;
			} else
			    return 1;
		    }
		}		/*of switch(*p) */
		break;
	    }			/* of case HTSRC_CK_seen_dot: */
	}			/* of switch */
	++p;
    }

    if (!checkonly)
	*slot = head;
    return 0;
}

/*this will clean the data associated with lexeme 'l' */
void html_src_clean_item(HTlexeme l)
{
    int i;

    if (HTL_tagspecs[l])
	FREE(HTL_tagspecs[l]);
    for (i = 0; i < 2; ++i) {
	HT_tagspec *cur;
	HT_tagspec **pts = (i ? lexeme_start : lexeme_end) + l;
	HT_tagspec *ts = *pts;

	*pts = NULL;
	while (ts) {
	    FREE(ts->present);
	    FREE(ts->value);
#ifdef USE_COLOR_STYLE
	    if (ts->start) {
		FREE(ts->class_name);
	    }
#endif
	    cur = ts;
	    ts = ts->next;
	    FREE(cur);
	}
    }
}

/*this will be registered with atexit*/
void html_src_clean_data(void)
{
    int i;

    for (i = 0; i < HTL_num_lexemes; ++i)
	html_src_clean_item(i);
}

void html_src_on_lynxcfg_reload(void)
{
    html_src_clean_data();
    HTMLSRC_init_caches(TRUE);
}

void HTMLSRC_init_caches(BOOL dont_exit)
{
    int i;
    char *p;
    char buf[1000];

    for (i = 0; i < HTL_num_lexemes; ++i) {
	/*we assume that HT_tagspecs was NULLs at when program started */
	LYstrncpy(buf,
		  HTL_tagspecs[i]
		  ? HTL_tagspecs[i]
		  : HTL_tagspecs_defaults[i],
		  sizeof(buf) - 1);
	StrAllocCopy(HTL_tagspecs[i], buf);

	if ((p = strchr(buf, ':')) != 0)
	    *p = '\0';
	if (html_src_parse_tagspec(buf, i, FALSE, TRUE) && !dont_exit) {
	    fprintf(stderr,
		    "internal error while caching 1st tagspec of %d lexeme", i);
	    exit_immediately(EXIT_FAILURE);
	}
	if (html_src_parse_tagspec(p ? p + 1 : NULL, i, FALSE, FALSE) && !dont_exit) {
	    fprintf(stderr,
		    "internal error while caching 2nd tagspec of %d lexeme", i);
	    exit_immediately(EXIT_FAILURE);
	}
    }
}

#endif /* ifdef USE_PRETTYSRC */