diff options
Diffstat (limited to 'WWW/Library/Implementation')
-rw-r--r-- | WWW/Library/Implementation/HTDOS.c | 2 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTFile.c | 12 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTMLDTD.c | 31 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTMLDTD.h | 16 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTNews.c | 2 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTParse.c | 3 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTString.c | 74 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTString.h | 8 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTTelnet.c | 4 | ||||
-rw-r--r-- | WWW/Library/Implementation/LYLeaks.h | 53 | ||||
-rw-r--r-- | WWW/Library/Implementation/SGML.c | 293 | ||||
-rw-r--r-- | WWW/Library/Implementation/SGML.h | 3 |
12 files changed, 350 insertions, 151 deletions
diff --git a/WWW/Library/Implementation/HTDOS.c b/WWW/Library/Implementation/HTDOS.c index fffd3a29..c90702d4 100644 --- a/WWW/Library/Implementation/HTDOS.c +++ b/WWW/Library/Implementation/HTDOS.c @@ -48,7 +48,7 @@ char * HTDOS_wwwName ARGS1(char *, dosname) break; default: *cp_url++ = ch; - break;; + break; } dosname++; } diff --git a/WWW/Library/Implementation/HTFile.c b/WWW/Library/Implementation/HTFile.c index cd8bcdab..b5ad32af 100644 --- a/WWW/Library/Implementation/HTFile.c +++ b/WWW/Library/Implementation/HTFile.c @@ -161,8 +161,10 @@ PRIVATE char *FormatStr ARGS3( if (*start) { sprintf(fmt, "%%%.*ss", (int) sizeof(fmt) - 3, start); HTSprintf0(bufp, fmt, entry); - } else { - HTSprintf0(bufp, "%s", entry); + } else if (*bufp && !(entry && *entry)) { + **bufp = '\0'; + } else if (entry) { + StrAllocCopy(*bufp, entry); } return *bufp; } @@ -178,7 +180,7 @@ PRIVATE char *FormatNum ARGS3( HTSprintf0(bufp, fmt, entry); } else { sprintf(fmt, "%d", entry); - StrAllocCat(*bufp, fmt); + StrAllocCopy(*bufp, fmt); } return *bufp; } @@ -1127,7 +1129,7 @@ PUBLIC void LYGetFileInfo ARGS7( HTFormat format; HTAtom * myEnc = NULL; HTParentAnchor *file_anchor; - CONST char *file_csname = file_anchor->charset; + CONST char *file_csname; int file_cs; /* @@ -1815,7 +1817,7 @@ PRIVATE int print_local_dir ARGS5( #ifndef DISP_PARTIAL if (num_of_entries_output % HTMAX(display_lines,10) == 0) { if (HTCheckForInterrupt()) { - _HTProgress ("Data transfer interrupted."); + _HTProgress (TRANSFER_INTERRUPTED); status = HT_PARTIAL_CONTENT; break; } diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c index 7f8c37ed..9c00e64d 100644 --- a/WWW/Library/Implementation/HTMLDTD.c +++ b/WWW/Library/Implementation/HTMLDTD.c @@ -1325,7 +1325,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "TD" , td_attr, HTML_TD_ATTRIBUTES, SGML_EMPTY }, */ #define T_TD 0x0400, 0x0FBCF,0x8FFFF,0x00020,0xB7FB7,0x8C75F,0x00001 /* { "TEXTAREA", textarea_attr,HTML_TEXTAREA_ATTRIBUTES, SGML_LITTERAL }, */ -#define T_TEXTAREA 0x0040, 0x00000,0x00000,0x07F8F,0x33FBF,0x80F5F,0x00000 +#define T_TEXTAREA 0x0040, 0x00000,0x00000,0x07F8F,0x33FBF,0x80F5F,0x00040 /* { "TEXTFLOW", bodytext_attr,HTML_BODYTEXT_ATTRIBUTES, SGML_MIXED }, */ #define T_TEXTFLOW 0x20000,0x8FBFF,0x9FFFF,0x977B0,0xB7FB7,0x9B00F,0x00003 /* { "TFOOT" , tr_attr, HTML_TR_ATTRIBUTES, SGML_EMPTY }, */ @@ -1383,7 +1383,7 @@ static attr ulist_attr[] = { /* UL attributes */ #define NULL_HTTag NULL_HTTag_ #endif -static CONST HTTag tags_old[HTML_ELEMENTS] = { +static CONST HTTag tags_old[HTML_ALL_ELEMENTS] = { { P("A") , a_attr, HTML_A_ATTRIBUTES, SGML_EMPTY,T_A}, { P("ABBREV") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_ABBREV}, { P("ACRONYM") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_ACRONYM}, @@ -1473,7 +1473,7 @@ static CONST HTTag tags_old[HTML_ELEMENTS] = { { P0("PRE") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_PRE}, { P("Q") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_Q}, { P("S") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_S}, - { P0("SAMP") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_SAMP}, + { P("SAMP") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_SAMP}, { P("SCRIPT") , script_attr, HTML_SCRIPT_ATTRIBUTES, SGML_LITTERAL,T_SCRIPT}, { P("SELECT") , select_attr, HTML_SELECT_ATTRIBUTES, SGML_MIXED,T_SELECT}, { P("SHY") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY,T_SHY}, @@ -1502,9 +1502,13 @@ static CONST HTTag tags_old[HTML_ELEMENTS] = { { P("VAR") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_VAR}, { P("WBR") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY,T_WBR}, { P0("XMP") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_LITTERAL,T_XMP}, + /* additional (alternative variants), not counted in HTML_ELEMENTS: */ +/* This one will be used as a temporary substitute within the parser when + it has been signalled to parse OBJECT content as MIXED. - kw */ + { P("OBJECT") , object_attr, HTML_OBJECT_ATTRIBUTES, SGML_MIXED,T_OBJECT_PCDATA}, }; -static CONST HTTag tags_new[HTML_ELEMENTS] = { +static CONST HTTag tags_new[HTML_ALL_ELEMENTS] = { { P("A") , a_attr, HTML_A_ATTRIBUTES, SGML_MIXED,T_A}, { P("ABBREV") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_ABBREV}, { P("ACRONYM") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_ACRONYM}, @@ -1594,7 +1598,7 @@ static CONST HTTag tags_new[HTML_ELEMENTS] = { { P0("PRE") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_PRE}, { P("Q") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_Q}, { P("S") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_S}, - { P0("SAMP") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_SAMP}, + { P("SAMP") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_SAMP}, { P("SCRIPT") , script_attr, HTML_SCRIPT_ATTRIBUTES, SGML_CDATA,T_SCRIPT}, { P("SELECT") , select_attr, HTML_SELECT_ATTRIBUTES, SGML_ELEMENT,T_SELECT}, { P("SHY") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY,T_SHY}, @@ -1623,12 +1627,11 @@ static CONST HTTag tags_new[HTML_ELEMENTS] = { { P("VAR") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_VAR}, { P("WBR") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY,T_WBR}, { P0("XMP") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_LITTERAL,T_XMP}, -}; - + /* additional (alternative variants), not counted in HTML_ELEMENTS: */ /* This one will be used as a temporary substitute within the parser when - it has been signalled to parse OBJECT content (again) as MIXED. - kw */ -PUBLIC HTTag HTTag_mixedObject = - { P("OBJECT") , object_attr, HTML_OBJECT_ATTRIBUTES, SGML_MIXED,T_OBJECT_PCDATA}; + it has been signalled to parse OBJECT content as MIXED. - kw */ + { P("OBJECT") , object_attr, HTML_OBJECT_ATTRIBUTES, SGML_MIXED,T_OBJECT_PCDATA}, +}; #undef P #undef P0 @@ -1637,7 +1640,7 @@ PUBLIC HTTag HTTag_mixedObject = /* Dummy space, will be filled with the contents of either tags_new or tags_old on calling HTSwitchDTD - kw */ -static HTTag tags[HTML_ELEMENTS]; +static HTTag tags[HTML_ALL_ELEMENTS]; PUBLIC CONST SGML_dtd HTML_dtd = { tags, @@ -1659,12 +1662,12 @@ PUBLIC void HTSwitchDTD ARGS1( if (TRACE) CTRACE((tfp,"HTMLDTD: Copying DTD element info of size %d, %d * %d\n", (int) (new ? sizeof(tags_new) : sizeof(tags_old)), - HTML_ELEMENTS, + HTML_ALL_ELEMENTS, (int) sizeof(HTTag))); if (new) - memcpy(tags, tags_new, HTML_ELEMENTS * sizeof(HTTag)); + memcpy(tags, tags_new, HTML_ALL_ELEMENTS * sizeof(HTTag)); else - memcpy(tags, tags_old, HTML_ELEMENTS * sizeof(HTTag)); + memcpy(tags, tags_old, HTML_ALL_ELEMENTS * sizeof(HTTag)); } PUBLIC HTTag HTTag_unrecognized = diff --git a/WWW/Library/Implementation/HTMLDTD.h b/WWW/Library/Implementation/HTMLDTD.h index 9c81774a..5ef77f6a 100644 --- a/WWW/Library/Implementation/HTMLDTD.h +++ b/WWW/Library/Implementation/HTMLDTD.h @@ -170,10 +170,22 @@ typedef enum _HTMLElement { HTML_UL, HTML_VAR, HTML_WBR, - HTML_XMP } HTMLElement; - + HTML_XMP, + HTML_ALT_OBJECT } HTMLElement; + +/* Notes: HTML.c uses a different extension of the HTML_ELEMENTS space + privately, see HTNestedList.h. */ +/* Don't replace HTML_ELEMENTS with TABLESIZE(mumble_dtd.tags). */ +/* Keep the following defines in synch with the above enum! */ + +/* HTML_ELEMENTS: number of elements visible to Lynx code in general, + alphabetic (ASCII) order. */ +/* HTML_ALL_ELEMENTS: number of elements visible to SGML parser, + additional variant(s) at end. */ +#define HTML_ALL_ELEMENTS 119 #define HTML_ELEMENTS 118 + /* Attribute numbers diff --git a/WWW/Library/Implementation/HTNews.c b/WWW/Library/Implementation/HTNews.c index c98d8329..c37f07e0 100644 --- a/WWW/Library/Implementation/HTNews.c +++ b/WWW/Library/Implementation/HTNews.c @@ -2738,7 +2738,7 @@ Send_NNTP_command: * to occur in normal use, break from the loop without retrying * and without closing the connection. It is unlikely that * these are leftovers from a timed-out connection (but we do - * some checks to see whether the response rorresponds to the + * some checks to see whether the response corresponds to the * last command), or that they will give anything else when * automatically retried. - kw */ diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c index 32ae6fa5..afbf2910 100644 --- a/WWW/Library/Implementation/HTParse.c +++ b/WWW/Library/Implementation/HTParse.c @@ -322,8 +322,7 @@ PUBLIC char * HTParse ARGS3( } } else if (p2 != result) { h = p2; - if (h != result) - h--; /* End of hostname */ + h--; /* End of hostname */ if (*h == '.') { /* ** Slide p2 over h. diff --git a/WWW/Library/Implementation/HTString.c b/WWW/Library/Implementation/HTString.c index 824ab937..85f9123f 100644 --- a/WWW/Library/Implementation/HTString.c +++ b/WWW/Library/Implementation/HTString.c @@ -496,6 +496,16 @@ PRIVATE char *HTAlloc ARGS2(char *, ptr, size_t, length) } /* + * If SAVE_TIME_NOT_SPACE is defined, StrAllocVsprintf will hang on to + * its temporary string buffers instead of allocating and freeing them + * in each invocation. They only grow and never shrink, and won't be + * cleaned up on exit. - kw + */ +#if !(defined(_REENTRANT) || defined(_THREAD_SAFE)) +#define SAVE_TIME_NOT_SPACE +#endif + +/* * Replacement for sprintf, allocates buffer on the fly according to what's needed * for its arguments. Unlike sprintf, this always concatenates to the destination * buffer, so we do not have to provide both flavors. @@ -513,27 +523,65 @@ typedef enum { Flags, Width, Prec, Type, Format } PRINTF; #define GROW_EXPR(n) (((n) * 3) / 2) #define GROW_SIZE 256 -PRIVATE char * StrAllocVsprintf ARGS4( +PUBLIC_IF_FIND_LEAKS char * StrAllocVsprintf ARGS4( char **, pstr, size_t, dst_len, CONST char *, fmt, va_list *, ap) { +#ifdef SAVE_TIME_NOT_SPACE + static size_t tmp_len = 0; + static size_t fmt_len = 0; + static char *tmp_ptr = NULL; + static char *fmt_ptr = NULL; +#else size_t tmp_len = GROW_SIZE; - size_t have, need; char *tmp_ptr = 0; char *fmt_ptr; +#endif /* SAVE_TIME_NOT_SPACE */ + size_t have, need; char *dst_ptr = *pstr; CONST char *format = fmt; if (fmt == 0 || *fmt == '\0') return 0; +#ifdef USE_VASPRINTF + if (pstr && !dst_len) { + if (*pstr) + FREE(*pstr); + if (vasprintf(pstr, fmt, *ap) >= 0) { + mark_malloced(*pstr, strlen(*pstr)+1); + return(*pstr); + } + } +#endif /* USE_VASPRINTF */ + need = strlen(fmt) + 1; +#ifdef SAVE_TIME_NOT_SPACE + /* the following assumes that 0 as first arg to realloc works + portably like malloc; if that isn't the case, change to use + HTAlloc. - kw */ + if (!fmt_ptr || fmt_len < need*NUM_WIDTH) { + if ((fmt_ptr = realloc(fmt_ptr, need*NUM_WIDTH)) == 0) { + outofmem(__FILE__, "StrAllocVsprintf (fmt_ptr)"); + } else { + fmt_len = need*NUM_WIDTH; + } + } + if (!tmp_ptr || tmp_len < GROW_SIZE) { + if ((tmp_ptr = realloc(tmp_ptr, GROW_SIZE)) == 0) { + outofmem(__FILE__, "StrAllocVsprintf (tmp_ptr)"); + } else { + tmp_len = GROW_SIZE; + } + } +#else if ((fmt_ptr = malloc(need*NUM_WIDTH)) == 0 || (tmp_ptr = malloc(tmp_len)) == 0) { outofmem(__FILE__, "StrAllocVsprintf"); } +#endif /* SAVE_TIME_NOT_SPACE */ if (dst_ptr == 0) { dst_ptr = HTAlloc(dst_ptr, have = GROW_SIZE + need); @@ -703,19 +751,27 @@ PRIVATE char * StrAllocVsprintf ARGS4( } } +#ifndef SAVE_TIME_NOT_SPACE FREE(tmp_ptr); FREE(fmt_ptr); +#endif dst_ptr[dst_len] = '\0'; if (pstr) *pstr = dst_ptr; return (dst_ptr); } +#undef SAVE_TIME_NOT_SPACE /* * Replacement for sprintf, allocates buffer on the fly according to what's needed * for its arguments. Unlike sprintf, this always concatenates to the destination * buffer. */ +/* Note: if making changes, also check the memory tracking version + * LYLeakHTSprintf in LYLeaks.c. - kw */ +#ifdef HTSprintf /* if hidden by LYLeaks stuff */ +#undef HTSprintf +#endif #if ANSI_VARARGS PUBLIC char * HTSprintf (char ** pstr, CONST char * fmt, ...) #else @@ -747,6 +803,11 @@ PUBLIC char * HTSprintf (va_alist) * needed for its arguments. Like sprintf, this always resets the destination * buffer. */ +/* Note: if making changes, also check the memory tracking version + * LYLeakHTSprintf0 in LYLeaks.c. - kw */ +#ifdef HTSprintf0 /* if hidden by LYLeaks stuff */ +#undef HTSprintf0 +#endif #if ANSI_VARARGS PUBLIC char * HTSprintf0 (char ** pstr, CONST char * fmt, ...) #else @@ -763,6 +824,15 @@ PUBLIC char * HTSprintf0 (va_alist) char ** pstr = va_arg(ap, char **); CONST char * fmt = va_arg(ap, CONST char *); #endif +#ifdef USE_VASPRINTF + if (pstr) { + if (*pstr) + FREE(*pstr); + if (vasprintf(pstr, fmt, ap) >= 0) /* else call outofmem?? */ + mark_malloced(*pstr, strlen(*pstr)+1); + result = *pstr; + } else +#endif /* USE_VASPRINTF */ result = StrAllocVsprintf(pstr, 0, fmt, &ap); } va_end(ap); diff --git a/WWW/Library/Implementation/HTString.h b/WWW/Library/Implementation/HTString.h index e3a83660..2b167a8c 100644 --- a/WWW/Library/Implementation/HTString.h +++ b/WWW/Library/Implementation/HTString.h @@ -81,6 +81,14 @@ extern char * HTSprintf () GCC_PRINTFLIKE(2,3); extern char * HTSprintf0 () GCC_PRINTFLIKE(2,3); #endif +#if defined(LY_FIND_LEAKS) /* private otherwise */ +extern char * StrAllocVsprintf PARAMS(( + char ** pstr, + size_t len, + CONST char * fmt, + va_list * ap)); +#endif + #if (defined(VMS) || defined(DOSPATH) || defined(__EMX__)) && !defined(__CYGWIN__) #define USE_QUOTED_PARAMETER 0 #else diff --git a/WWW/Library/Implementation/HTTelnet.c b/WWW/Library/Implementation/HTTelnet.c index 77f9ec69..025ede23 100644 --- a/WWW/Library/Implementation/HTTelnet.c +++ b/WWW/Library/Implementation/HTTelnet.c @@ -37,7 +37,11 @@ PRIVATE void do_system ARGS1(char *, command) { CTRACE((tfp, "HTTelnet: Command is: %s\n\n", command)); +#ifdef UNIX /* want LYSystem's signal sanitizing - kw */ + LYSystem(command); +#else /* Non-UNIX should use LYSystem too? - left for now - kw */ system(command); +#endif FREE(command); } diff --git a/WWW/Library/Implementation/LYLeaks.h b/WWW/Library/Implementation/LYLeaks.h index 5ecf8e67..fc2a0dc0 100644 --- a/WWW/Library/Implementation/LYLeaks.h +++ b/WWW/Library/Implementation/LYLeaks.h @@ -32,12 +32,23 @@ ** string created by __FILE__ to not be dynamic in ** nature (don't free it and assume will exist at all ** times during execution). +** If you are using LY_FIND_LEAKS and LY_FIND_LEAKS_EXTENDED and +** want only normal memory tracking (not extended for +** HTSprintf/HTSprintf0) to be used in a certain file, +** define NO_EXTENDED_MEMORY_TRACKING and don't define +** NO_MEMORY_TRACKING before including this file. ** Revision History: ** 05-26-94 created for Lynx 2-3-1, Garrett Arch Blythe ** 10-30-97 modified to handle StrAllocCopy() and ** StrAllocCat(). - KW & FM +** 1999-10-17 modified to handle HTSprintf0 and HTSprintf(), +** and to provide mark_malloced, if +** LY_FIND_LEAKS_EXTENDED is defined. - kw */ +/* Undefine this to get no inproved HTSprintf0/HTSprintf tracking: */ +#define LY_FIND_LEAKS_EXTENDED + /* ** Required includes */ @@ -146,13 +157,42 @@ typedef struct AllocationList_tag { #endif /* StrAllocCat */ #define StrAllocCat(dest, src) LYLeakSACat(&(dest), src, __FILE__, __LINE__) +#define mark_malloced(a,size) LYLeak_mark_malloced(a,size, __FILE__, __LINE__) + +#if defined(LY_FIND_LEAKS_EXTENDED) && !defined(NO_EXTENDED_MEMORY_TRACKING) +#ifdef HTSprintf0 +#undef HTSprintf0 +#endif /* HTSprintf0 */ +#define HTSprintf0 (Get_htsprintf0_fn(__FILE__,__LINE__)) +#ifdef HTSprintf +#undef HTSprintf +#endif /* HTSprintf */ +#define HTSprintf (Get_htsprintf_fn(__FILE__,__LINE__)) +#endif /* LY_FIND_LEAKS_EXTENDED and not NO_EXTENDED_MEMORY_TRACKING */ + +#else /* LY_FIND_LEAKS && !NO_MEMORY_TRACKING */ + +#define mark_malloced(a,size) /* no-op */ + #endif /* LY_FIND_LEAKS && !NO_MEMORY_TRACKING */ +#if defined(LY_FIND_LEAKS) +#define PUBLIC_IF_FIND_LEAKS PUBLIC +#else +#define PUBLIC_IF_FIND_LEAKS PRIVATE +#endif /* ** Function declarations ** See the appropriate source file for usage. */ extern void LYLeaks NOPARAMS; +#ifdef LY_FIND_LEAKS_EXTENDED +extern AllocationList *LYLeak_mark_malloced PARAMS(( + void * vp_alloced, + size_t st_bytes, + CONST char * cp_File, + CONST short ssi_Line)); +#endif /* LY_FIND_LEAKS_EXTENDED */ extern void *LYLeakMalloc PARAMS(( size_t st_bytes, CONST char * cp_File, @@ -182,4 +222,17 @@ extern char * LYLeakSACat PARAMS(( CONST char * cp_File, CONST short ssi_Line)); +#ifdef LY_FIND_LEAKS_EXTENDED +/* Trick to get tracking of var arg functions without relying + on var arg preprocessor macros: */ + +typedef char * HTSprintflike PARAMS((char **, CONST char *, ...)); +extern HTSprintflike *Get_htsprintf_fn PARAMS(( + CONST char * cp_File, + CONST short ssi_Line)); +extern HTSprintflike *Get_htsprintf0_fn PARAMS(( + CONST char * cp_File, + CONST short ssi_Line)); +#endif /* LY_FIND_LEAKS_EXTENDED */ + #endif /* __LYLEAKS_H */ diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index 9b018177..3e2f3389 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -90,9 +90,8 @@ PRIVATE void fake_put_character ARGS2( #define PUTS(str) ((*context->actions->put_string)(context->target, str)) -#define OPT 0 /* don't make it 1 otherwise something wrong will be with - TagSoup parser mode - I was unable to undestand why it works incorrectly -HV*/ -#define OPT1 1 /* set to 1 for several optimizations */ +#define OPT 1 + /*the following macros are used for pretty source view. */ #define IS_C(attr) (attr.type == HTMLA_CLASS) @@ -138,9 +137,10 @@ struct _HTStream { HTStructured *target; /* target object */ HTTag *current_tag; - HTTag *slashtag; + HTTag *slashedtag; CONST HTTag *unknown_tag; BOOL inSELECT; + BOOL no_lynx_specialcodes; int current_attribute_number; HTChunk *string; HTElement *element_stack; @@ -156,7 +156,7 @@ struct _HTStream { S_dollar_paren_sq, S_esc_dq, S_dollar_dq, S_paren_dq, S_nonascii_text_dq, S_dollar_paren_dq, - S_in_kanji, S_junk_tag} state; + S_in_kanji, S_junk_tag, S_junk_pi} state; #ifdef CALLERDATA void * callerData; #endif /* CALLERDATA */ @@ -202,10 +202,10 @@ struct _HTStream { PRIVATE void HTMLSRC_apply_markup ARGS3( HTStream *, context, - HTlexem, lexem, + HTlexeme, lexeme, BOOL, start) { - HT_tagspec* ts = *( ( start ? lexem_start : lexem_end ) + lexem); + HT_tagspec* ts = *( ( start ? lexeme_start : lexeme_end ) + lexeme); while (ts) { #ifdef USE_COLOR_STYLE @@ -216,7 +216,7 @@ PRIVATE void HTMLSRC_apply_markup ARGS3( force_classname = TRUE; } #endif - CTRACE((tfp,ts->start ? "SRCSTART %d\n" : "SRCSTOP %d\n",(int)lexem)); + CTRACE((tfp,ts->start ? "SRCSTART %d\n" : "SRCSTOP %d\n",(int)lexeme)); if (ts->start) (*context->actions->start_element)( context->target, @@ -409,11 +409,7 @@ PRIVATE void handle_attribute_name ARGS2( FREE(context->value[i]); #ifdef USE_COLOR_STYLE # ifdef USE_PSRC -# if !OPT1 - current_is_class = (!strcasecomp("class", s)); -# else current_is_class = IS_C(attributes[i]); -# endif # else current_is_class = (!strcasecomp("class", s)); # endif @@ -478,6 +474,14 @@ PRIVATE BOOL put_special_unicodes ARGS2( HTStream *, context, UCode_t, code) { + /* (Tgf_nolyspcl) */ + if (context->no_lynx_specialcodes) { + /* + ** We were asked by a "DTD" flag to not generate lynx specials. - kw + */ + return NO; + } + if (code == CH_NBSP) { /* S/390 -- gil -- 0657 */ /* ** Use Lynx special character for nbsp. @@ -505,7 +509,8 @@ PRIVATE BOOL put_special_unicodes ARGS2( ** in the context of line wrapping. Unfortunately, if we use ** HT_EN_SPACE we override the chartrans tables for those spaces ** with a single '32' for all (but do line wrapping more fancy). - ** Assume emsp as two ensp (below). + ** + ** We may treat emsp as one or two ensp (below). */ #ifdef USE_PSRC if (!psrc_view) @@ -854,6 +859,36 @@ PRIVATE void handle_sgmlatt ARGS1( return; } +/* + * Convenience macros - tags (elements) are identified sometimes + * by an int or enum value ('TAGNUM'), sometimes + * by a pointer to HTTag ('TAGP'). - kw + */ +#define TAGNUM_OF_TAGP(t) (t - context->dtd->tags) +#define TAGP_OF_TAGNUM(e) (context->dtd->tags + e) + +/* + * The following implement special knowledge about OBJECT. + * As long as HTML_OBJECT is the only tag for which an alternative + * variant exist, they can be simple macros. - kw + */ +/* does 'TAGNUM' e have an alternative (variant) parsing mode? */ +#define HAS_ALT_TAGNUM(e) (e == HTML_OBJECT) + +/* return 'TAGNUM' of the alternative mode for 'TAGNUM' e, if any. */ +#define ALT_TAGNUM(e) ((e == HTML_OBJECT) ? HTML_ALT_OBJECT : e) + +/* return 'TAGNUM' of the normal mode for 'TAGNUM' e which may be alt. */ +#define NORMAL_TAGNUM(e) ((e >= HTML_ELEMENTS) ? HTML_OBJECT : e) + +/* More convenience stuff. - kw */ +#define ALT_TAGP_OF_TAGNUM(e) TAGP_OF_TAGNUM(ALT_TAGNUM(e)) +#define NORMAL_TAGP_OF_TAGNUM(e) TAGP_OF_TAGNUM(NORMAL_TAGNUM(e)) + +#define ALT_TAGP(t) ALT_TAGP_OF_TAGNUM(TAGNUM_OF_TAGP(t)) +#define NORMAL_TAGP(t) NORMAL_TAGP_OF_TAGNUM(TAGNUM_OF_TAGP(t)) + + #ifdef EXTENDED_HTMLDTD PRIVATE BOOL element_valid_within ARGS3( @@ -907,8 +942,7 @@ PRIVATE void do_close_stacked ARGS1( if (context->inSELECT && !strcasecomp(stacked->tag->name, "SELECT")) { context->inSELECT = FALSE; } - e = (stacked->tag == &HTTag_mixedObject) ? - HTML_OBJECT : stacked->tag - context->dtd->tags; + e = NORMAL_TAGNUM(TAGNUM_OF_TAGP(stacked->tag)); #ifdef USE_PSRC if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ #endif @@ -918,6 +952,8 @@ PRIVATE void do_close_stacked ARGS1( (char **)&context->include); context->element_stack = stacked->next; FREE(stacked); + context->no_lynx_specialcodes = context->element_stack ? + (context->element_stack->tag->flags & Tgf_nolyspcl) : NO; } PRIVATE int is_on_stack ARGS2( @@ -928,8 +964,7 @@ PRIVATE int is_on_stack ARGS2( int i = 1; for (; stacked; stacked = stacked->next, i++) { if (stacked->tag == old_tag || - (stacked->tag == &HTTag_mixedObject && - old_tag - context->dtd->tags == HTML_OBJECT)) + stacked->tag == ALT_TAGP(old_tag)) return i; } return 0; @@ -1034,8 +1069,7 @@ PRIVATE void end_element ARGS2( int status = HT_OK; HTMLElement e; HTElement * N = context->element_stack; - HTTag * t = (N->tag == &HTTag_mixedObject && N->tag != old_tag) ? - context->dtd->tags + HTML_OBJECT : N->tag; + HTTag * t = (N->tag != old_tag) ? NORMAL_TAGP(N->tag) : N->tag; if (old_tag != t) { /* Mismatch: syntax error */ if (context->element_stack->next) { /* This is not the last level */ @@ -1048,8 +1082,7 @@ PRIVATE void end_element ARGS2( } } - e = (t == &HTTag_mixedObject) ? - HTML_OBJECT : t - context->dtd->tags; + e = NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)); #ifdef USE_PSRC if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ #endif @@ -1066,11 +1099,13 @@ PRIVATE void end_element ARGS2( (char **)&context->include); } else if (status == HT_PARSER_OTHER_CONTENT) { CTRACE((tfp, "SGML: Continue with other content model for <%s>\n", t->name)); - context->element_stack->tag = &HTTag_mixedObject; + context->element_stack->tag = ALT_TAGP_OF_TAGNUM(e); } else { context->element_stack = N->next; /* Remove from stack */ FREE(N); } + context->no_lynx_specialcodes = context->element_stack ? + (context->element_stack->tag->flags & Tgf_nolyspcl) : NO; #ifdef WIND_DOWN_STACK if (old_tag == t) return; /* Correct sequence */ @@ -1093,10 +1128,8 @@ PRIVATE void start_element ARGS1( { int status; HTTag * new_tag = context->current_tag; -#if OPT1 - HTMLElement e = new_tag - context->dtd->tags; + HTMLElement e = TAGNUM_OF_TAGP(new_tag); BOOL ok = FALSE; -#endif #ifdef EXTENDED_HTMLDTD @@ -1194,16 +1227,6 @@ PRIVATE void start_element ARGS1( /* ** Ugh, it is not an OPTION. - FM */ -#if !OPT1 - if (!strcasecomp(new_tag->name, "INPUT") || - !strcasecomp(new_tag->name, "TEXTAREA") || - !strcasecomp(new_tag->name, "SELECT") || - !strcasecomp(new_tag->name, "BUTTON") || - !strcasecomp(new_tag->name, "FIELDSET") || - !strcasecomp(new_tag->name, "LABEL") || - !strcasecomp(new_tag->name, "LEGEND") || - !strcasecomp(new_tag->name, "FORM")) -#else switch (e) { case HTML_INPUT: case HTML_TEXTAREA: case HTML_SELECT: case HTML_BUTTON: case HTML_FIELDSET: case HTML_LABEL: @@ -1214,7 +1237,6 @@ PRIVATE void start_element ARGS1( break; } if (ok) -#endif { /* ** It is another form-related start tag, so terminate @@ -1239,13 +1261,13 @@ PRIVATE void start_element ARGS1( CTRACE((tfp, "SGML: Start <%s>\n", new_tag->name)); status = (*context->actions->start_element)( context->target, - new_tag - context->dtd->tags, + TAGNUM_OF_TAGP(new_tag), context->present, (CONST char**) context->value, /* coerce type for think c */ context->current_tag_charset, (char **)&context->include); if (status == HT_PARSER_OTHER_CONTENT) - new_tag = &HTTag_mixedObject; /* this is only returned for OBJECT */ + new_tag = ALT_TAGP(new_tag); /* this is only returned for OBJECT */ if (new_tag->contents != SGML_EMPTY) { /* i.e., tag not empty */ HTElement * N = (HTElement *)malloc(sizeof(HTElement)); if (N == NULL) @@ -1253,11 +1275,9 @@ PRIVATE void start_element ARGS1( N->next = context->element_stack; N->tag = new_tag; context->element_stack = N; -#if OPT + context->no_lynx_specialcodes = (new_tag->flags & Tgf_nolyspcl); + } else if (e == HTML_META ) { -#else - } else if (!strcasecomp(new_tag->name, "META")) { -#endif /* ** Check for result of META tag. - KW & FM */ @@ -1338,8 +1358,7 @@ PRIVATE void SGML_free ARGS1( if (!psrc_view) /* Don't actually call on target if viewing psrc - kw */ #endif (*context->actions->end_element)(context->target, - (t == &HTTag_mixedObject) ? - HTML_OBJECT : t - context->dtd->tags, + NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)), (char **)&context->include); FREE(context->include); } @@ -1729,9 +1748,16 @@ top1: break; case S_tagname_slash: - if (!(c == '>' && context->slashtag && TOASCII(unsign_c) < 127)) { + /* + * We had something link "<name/" so far, set state to S_text + * but keep context->slashedtag as as a flag; except if we get + * '>' directly after the "<name/", and really have a tag for + * that name in context->slashedtag, in which case keep state as + * is and let code below deal with it. - kw + */ + if (!(c == '>' && context->slashedtag && TOASCII(unsign_c) < 127)) { context->state = S_text; - } + } /* fall through in any case! */ case S_text: if (HTCJK != NOCJK && (TOASCII(c) & 0200) != 0) { /* S/390 -- gil -- 0864 */ @@ -1794,13 +1820,16 @@ top1: context->state = S_litteral; else context->state = S_tag; - context->slashtag = NULL; - } else if (context->slashtag && + context->slashedtag = NULL; + } else if (context->slashedtag && (c == '/' || (c == '>' && context->state == S_tagname_slash)) && TOASCII(unsign_c) < 127) { /* - ** Pending tag close from shortref tag. - kw + ** We got either the second slash of a pending "<NAME/blah blah/" + ** shortref construct, or the '>' of a mere "<NAME/>". In both + ** cases generate a "</NAME>" end tag in the recover buffer for + ** reparsing unless NAME is really an empty element. - kw */ #ifdef USE_PSRC if (psrc_view) { @@ -1809,18 +1838,18 @@ top1: PSRCSTOP(abracket); } else #endif - if (context->slashtag != context->unknown_tag && - !ReallyEmptyTag(context->slashtag)) { + if (context->slashedtag != context->unknown_tag && + !ReallyEmptyTag(context->slashedtag)) { if (context->recover == NULL) { StrAllocCopy(context->recover, "</"); context->recover_index = 0; } else { StrAllocCat(context->recover, "</"); } - StrAllocCat(context->recover, context->slashtag->name); + StrAllocCat(context->recover, context->slashedtag->name); StrAllocCat(context->recover, ">"); } - context->slashtag = NULL; + context->slashedtag = NULL; } else if (context->element_stack && (context->element_stack->tag->flags & Tgf_frecyc)) { @@ -1862,6 +1891,7 @@ top1: ** neither HTPassHighCtrlRaw nor HTCJK is set. - FM */ } else if (unsign_c == CH_NBSP && /* S/390 -- gil -- 0932 */ + !context->no_lynx_specialcodes && !(PASS8859SPECL || HTCJK != NOCJK)) { PUTC(HT_NON_BREAK_SPACE); /* @@ -1869,6 +1899,7 @@ top1: ** neither HTPassHighCtrlRaw nor HTCJK is set. - FM */ } else if (unsign_c == CH_SHY && /* S/390 -- gil -- 0949 */ + !context->no_lynx_specialcodes && !(PASS8859SPECL || HTCJK != NOCJK)) { PUTC(LY_SOFT_HYPHEN); /* @@ -2051,21 +2082,35 @@ top1: /* ** Found '<' in SGML_PCDATA content; treat this mode nearly like - ** S_litteral, but recognize '<!' to filter out comments etc. - kw + ** S_litteral, but recognize '<!' and '<?' to filter out comments + ** and processing instructions. - kw */ case S_pcdata: - if (c == '!' && !string->size) { /* <! */ - /* - ** Terminate and set up for possible comment, - ** identifier, declaration, or marked section - ** as under S_tag. - kw - */ - context->state = S_exclamation; - context->lead_exclamation = TRUE; - context->doctype_bracket = FALSE; - context->first_bracket = FALSE; - HTChunkPutc(string, c); - break; + if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */ + if (c == '!') { /* <! */ + /* + ** Terminate and set up for possible comment, + ** identifier, declaration, or marked section + ** as under S_tag. - kw + */ + context->state = S_exclamation; + context->lead_exclamation = TRUE; + context->doctype_bracket = FALSE; + context->first_bracket = FALSE; + HTChunkPutc(string, c); + break; + } else if (c == '?') { /* <? - ignore as a PI until '>' - kw */ + CTRACE((tfp, + "SGML: Found PI in PCDATA, junking it until '>'\n")); +#ifdef USE_PSRC + if (psrc_view) { + PSRCSTART(abracket);PUTS("<?");PSRCSTOP(abracket); + context->seen_nonwhite_in_junk_tag = TRUE; /* show all */ + } +#endif + context->state = S_junk_pi; + break; + } } /* Fall through to S_litteral - kw */ @@ -2172,7 +2217,10 @@ top1: */ case S_entity: if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1029 */ - IsNmChar(c) : isalpha((unsigned char)c))) { + isalnum((unsigned char)c) : isalpha((unsigned char)c))) { + /* Should probably use IsNmStart/IsNmChar above (is that right?), + but the world is not ready for that - there's  : (note + colon!) and stuff around. */ /* ** Accept valid ASCII character. - FM */ @@ -2900,13 +2948,24 @@ top1: if (c == '/' && t) { /* * Element name was ended by '/'. Remember the tag that - * ended thusly, we'll interpret as either an indication + * ended thusly, we'll interpret this as either an indication * of an empty element (if '>' follows directly) or do * some SGMLshortref-ish treatment. - kw */ - context->slashtag = t; + context->slashedtag = t; } if (!t) { + if (c == '?' && string->size <= 1) { + CTRACE((tfp, "SGML: Found PI, junking it until '>'\n")); +#ifdef USE_PSRC + if (psrc_view) { + PSRCSTART(abracket);PUTS("<?");PSRCSTOP(abracket); + context->seen_nonwhite_in_junk_tag = TRUE; /*show all*/ + } +#endif + context->state = S_junk_pi; + break; + } CTRACE((tfp, "SGML: *** Invalid element %s\n", string->data)); @@ -2921,10 +2980,11 @@ top1: LYUpperCase(string->data); } PUTS(string->data); - if (c == '>' ) { PSRCSTOP(badtag); PSRCSTART(abracket);PUTC('>');PSRCSTOP(abracket); + } else { + PUTC(c); } } #endif @@ -2941,9 +3001,6 @@ top1: } context->current_tag = t; - /* - ** Clear out attributes. - */ #ifdef USE_PSRC if (psrc_view) { PSRCSTART(abracket);PUTC('<');PSRCSTOP(abracket); @@ -2958,29 +3015,26 @@ top1: LYUpperCase(string->data); } PUTS(string->data); + if (t != context->unknown_tag) + PSRCSTOP(tag); + else + PSRCSTOP(badtag); } if (!psrc_view) /*don't waste time */ #endif { -#if !OPT1 - int i; - for (i = 0; i < context->current_tag->number_of_attributes; i++) - context->present[i] = NO; -#else + /* + ** Clear out attributes. + */ memset( (void*)context->present, 0 , sizeof(BOOL)* context->current_tag->number_of_attributes); -#endif } string->size = 0; context->current_attribute_number = INVALID; #ifdef USE_PSRC if (psrc_view) { - if (c == '>' || c == '<' || (c == '/' && context->slashtag)) { - if (t != context->unknown_tag) - PSRCSTOP(tag); - else - PSRCSTOP(badtag); + if (c == '>' || c == '<' || (c == '/' && context->slashedtag)) { if (c != '<') { PSRCSTART(abracket); PUTC(c); @@ -2995,7 +3049,7 @@ top1: } } else #endif - if (c == '>' || c == '<' || (c == '/' && context->slashtag)) { + if (c == '>' || c == '<' || (c == '/' && context->slashedtag)) { if (context->current_tag->name) start_element(context); context->state = (c == '>') ? S_text : @@ -3363,10 +3417,6 @@ top1: start_element(context); #ifdef USE_PSRC if (psrc_view) { - if (context->current_tag != context->unknown_tag) - PSRCSTOP(tag); - else - PSRCSTOP(badtag); PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); @@ -3408,18 +3458,12 @@ top1: LYUpperCase(string->data); } PUTS(string->data); - if (c == '=' || c == '>') { - if (c == '=' ) PUTC('='); - if (context->current_attribute_number == INVALID) - PSRCSTOP(badattr); - else - PSRCSTOP(attrib); - } + if (c == '=' ) PUTC('='); + if (context->current_attribute_number == INVALID) + PSRCSTOP(badattr); + else + PSRCSTOP(attrib); if (c == '>') { - if (context->current_tag != context->unknown_tag) - PSRCSTOP(tag); - else - PSRCSTOP(badtag); PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); @@ -3445,10 +3489,6 @@ top1: PSRCSTOP(badattr); else PSRCSTOP(attrib); - if (context->current_tag != context->unknown_tag) - PSRCSTOP(tag); - else - PSRCSTOP(badtag); PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); @@ -3489,10 +3529,6 @@ top1: CTRACE((tfp, "SGML: found = but no value\n")); #ifdef USE_PSRC if (psrc_view) { - if (context->current_tag != context->unknown_tag) - PSRCSTOP(tag); - else - PSRCSTOP(badtag); PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); @@ -3573,10 +3609,6 @@ top1: if (c == '>') { /* End of tag */ #ifdef USE_PSRC if (psrc_view) { - if (context->current_tag != context->unknown_tag) - PSRCSTOP(tag); - else - PSRCSTOP(badtag); PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); @@ -3773,8 +3805,13 @@ top1: else LYUpperCase(string->data); } - PUTS(string->data); PSRCSTOP(badtag); - PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); + PUTS(string->data); + if (c != '>') { + PUTC(c); + } else { + PSRCSTOP(badtag); + PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); + } psrc_tagname_processed=TRUE; } } else if (psrc_view) { @@ -3782,14 +3819,14 @@ top1: } else { BOOL tag_OK = (BOOL) (c == '>' || WHITE(c)); #if OPT - HTMLElement e = t - context->dtd->tags; + HTMLElement e = TAGNUM_OF_TAGP(t); int branch = 2; /* it can be 0,1,2*/ #endif context->current_tag = t; - if (t - context->dtd->tags == HTML_OBJECT && + if (HAS_ALT_TAGNUM(TAGNUM_OF_TAGP(t)) && context->element_stack && - context->element_stack->tag == &HTTag_mixedObject) - context->element_stack->tag = t; + ALT_TAGP(t) == context->element_stack->tag) + context->element_stack->tag = NORMAL_TAGP(context->element_stack->tag); #if OPT if (tag_OK #ifdef EXTENDED_HTMLDTD @@ -3808,6 +3845,8 @@ top1: case HTML_P: case HTML_STRONG: case HTML_TT: case HTML_U: branch = 1; break; + default: + break; } } #endif @@ -3904,7 +3943,7 @@ top1: #endif (*context->actions->end_element) (context->target, - (context->current_tag - context->dtd->tags), + TAGNUM_OF_TAGP(context->current_tag), (char **)&context->include); } } else if (!strcasecomp(string->data, "P")) { @@ -3932,7 +3971,7 @@ top1: #endif (*context->actions->end_element) (context->target, - (context->current_tag - context->dtd->tags), + TAGNUM_OF_TAGP(context->current_tag), (char **)&context->include); } string->size = 0; @@ -3966,9 +4005,10 @@ top1: LYUpperCase(string->data); } PUTS(string->data); PSRCSTOP(tag); - if ( c != '>' ) + if ( c != '>' ) { PSRCSTART(badtag); - else { + PUTC(c); + } else { PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); @@ -4132,10 +4172,13 @@ top1: break; case S_junk_tag: + case S_junk_pi: if (c == '>') { #ifdef USE_PSRC if (psrc_view) { - PSRCSTOP(badtag); + if (context->state == S_junk_tag) { + PSRCSTOP(badtag); + } PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); @@ -4315,10 +4358,12 @@ PUBLIC HTStream* SGML_new ARGS3( context->actions = (CONST HTStructuredClass*)(((HTStream*)target)->isa); /* Ugh: no OO */ context->unknown_tag = &HTTag_unrecognized; - context->current_tag = context->slashtag = NULL; +/* context->extra_tags = dtd->tags + dtd->number_of_tags; */ + context->current_tag = context->slashedtag = NULL; context->state = S_text; context->element_stack = 0; /* empty */ context->inSELECT = FALSE; + context->no_lynx_specialcodes = NO; /* special codes normally generated */ #ifdef CALLERDATA context->callerData = (void*) callerData; #endif /* CALLERDATA */ diff --git a/WWW/Library/Implementation/SGML.h b/WWW/Library/Implementation/SGML.h index 85278f44..b15cec36 100644 --- a/WWW/Library/Implementation/SGML.h +++ b/WWW/Library/Implementation/SGML.h @@ -104,6 +104,9 @@ typedef int TagFlags; allows recycling, i.e. don't translate to output (display) character set yet (treat content similar to attribute values) */ +#define Tgf_nolyspcl 0x00040 /* Don't generate lynx special characters + for soft hyphen and various spaces (nbsp, + ensp,..) */ /* A tag structure describes an SGML element. ** ----------------------------------------- |