diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 1997-10-06 04:08:00 -0400 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 1997-10-06 04:08:00 -0400 |
commit | 1d80538b4b84eadd223c7b61839b950389c2d49d (patch) | |
tree | a46f327e82edb06d8d789b60c3395f873476e040 /WWW/Library | |
parent | 443226a5ffcf805f6ab3ccbcc2a6b4802793b07d (diff) | |
download | lynx-snapshots-1d80538b4b84eadd223c7b61839b950389c2d49d.tar.gz |
snapshot of project "lynx", label v2-7-1ac_0-76
Diffstat (limited to 'WWW/Library')
33 files changed, 918 insertions, 439 deletions
diff --git a/WWW/Library/Implementation/HTChunk.c b/WWW/Library/Implementation/HTChunk.c index fc46ac4a..64a7e0ba 100644 --- a/WWW/Library/Implementation/HTChunk.c +++ b/WWW/Library/Implementation/HTChunk.c @@ -79,6 +79,74 @@ PUBLIC void HTChunkEnsure ARGS2 (HTChunk *,ch, int,needed) outofmem(__FILE__, "HTChunkEnsure"); } +#ifdef EXP_CHARTRANS + +#define PUTC(code) ch->data[ch->size++] = (char)(code) +#define PUTC2(code) ch->data[ch->size++] = (char)(0x80|(0x3f &(code))) + +PUBLIC void HTChunkPutUtf8Char ARGS2 (HTChunk *,ch, UCode_t,code) +{ + int utflen; + if (code < 128) + utflen = 1; + else if (code < 0x800L) { + utflen = 2; + } else if (code < 0x10000L) { + utflen = 3; + } else if (code < 0x200000L) { + utflen = 4; + } else if (code < 0x4000000L) { + utflen = 5; + } else if (code<=0x7fffffffL) { + utflen = 6; + } else + utflen = 0; + + if (ch->size + utflen > ch->allocated) { + int growby = (ch->growby >= utflen) ? ch->growby : utflen; + ch->allocated = ch->allocated + growby; + ch->data = ch->data ? (char *)realloc(ch->data, ch->allocated) + : (char *)calloc(1, ch->allocated); + if (!ch->data) + outofmem(__FILE__, "HTChunkPutUtf8Char"); + } + + switch(utflen) { + case 0: + return; + case 1: + ch->data[ch->size++] = (char)code; + return; + case 2: + PUTC(0xc0 | (code>>6)); + break; + case 3: + PUTC(0xe0 | (code>>12)); + break; + case 4: + PUTC(0xf0 | (code>>18)); + break; + case 5: + PUTC(0xf8 | (code>>24)); + break; + case 6: + PUTC(0xfc | (code>>30)); + } + switch(utflen) { + case 6: + PUTC2(code>>24); + case 5: + PUTC2(code>>18); + case 4: + PUTC2(code>>12); + case 3: + PUTC2(code>>6); + case 2: + PUTC2(code); + } +} + +#endif /* EXP_CHARTRANS */ /* Terminate a chunk ** ----------------- diff --git a/WWW/Library/Implementation/HTChunk.h b/WWW/Library/Implementation/HTChunk.h index 260f798a..c7308165 100644 --- a/WWW/Library/Implementation/HTChunk.h +++ b/WWW/Library/Implementation/HTChunk.h @@ -7,6 +7,10 @@ automatically reallocating them as necessary. */ +#ifdef EXP_CHARTRANS +#include "UCMap.h" +#endif + typedef struct { int size; /* In bytes */ int growby; /* Allocation unit in bytes */ @@ -114,8 +118,12 @@ Append a character to a chunk */ extern void HTChunkPutc PARAMS((HTChunk * ch, char c)); -/* +#ifdef EXP_CHARTRANS +extern void HTChunkPutUtf8Char PARAMS((HTChunk * ch, UCode_t code)); +#endif /* EXP_CHARTRANS */ + +/* Append a string to a chunk ON ENTRY, diff --git a/WWW/Library/Implementation/HTDOS.c b/WWW/Library/Implementation/HTDOS.c index 79a8b463..c5687808 100644 --- a/WWW/Library/Implementation/HTDOS.c +++ b/WWW/Library/Implementation/HTDOS.c @@ -1,25 +1,25 @@ -/* DOS specific routines - - */ - -#include <mem.h> -#include <dos.h> -#include "htstring.h" - -/* PUBLIC HTDOS_wwwName() -** CONVERTS DOS Name into WWW Name -** ON ENTRY: -** dosname DOS file specification (NO NODE) -** -** ON EXIT: -** returns www file specification -** -*/ -char * HTDOS_wwwName (char *dosname) -{ - static char wwwname[1024]; - char *cp_url = wwwname; - +/* DOS specific routines + + */ + +#include <mem.h> +#include <dos.h> +#include "htstring.h" + +/* PUBLIC HTDOS_wwwName() +** CONVERTS DOS Name into WWW Name +** ON ENTRY: +** dosname DOS file specification (NO NODE) +** +** ON EXIT: +** returns www file specification +** +*/ +char * HTDOS_wwwName (char *dosname) +{ + static char wwwname[1024]; + char *cp_url = wwwname; + strcpy(wwwname,dosname); for ( ; *cp_url != '\0' ; cp_url++) @@ -36,25 +36,25 @@ char * HTDOS_wwwName (char *dosname) /* if((strlen(wwwname)>2)&&(wwwname[1]==':')) wwwname[1]='|'; - printf("\n\nwww: %s\n\ndos: %s\n\n",wwwname,dosname); - sleep(5); -*/ + printf("\n\nwww: %s\n\ndos: %s\n\n",wwwname,dosname); + sleep(5); +*/ return(wwwname); -} - - -/* PUBLIC HTDOS_name() -** CONVERTS WWW name into a DOS name -** ON ENTRY: -** fn WWW file name -** -** ON EXIT: -** returns dos file specification -** -** Bug: Returns pointer to static -- non-reentrant -*/ -char * HTDOS_name(char *dosname) { - +} + + +/* PUBLIC HTDOS_name() +** CONVERTS WWW name into a DOS name +** ON ENTRY: +** fn WWW file name +** +** ON EXIT: +** returns dos file specification +** +** Bug: Returns pointer to static -- non-reentrant +*/ +char * HTDOS_name(char *dosname) { + static char cp_url[1024]; int joe; @@ -92,4 +92,4 @@ char * HTDOS_name(char *dosname) { } } - + diff --git a/WWW/Library/Implementation/HTFTP.c b/WWW/Library/Implementation/HTFTP.c index 3cf9dd3d..a3ad9de1 100644 --- a/WWW/Library/Implementation/HTFTP.c +++ b/WWW/Library/Implementation/HTFTP.c @@ -129,7 +129,7 @@ typedef struct _connection { #define PUTC(c) (*targetClass.put_character)(target, c) #define PUTS(s) (*targetClass.put_string)(target, s) -#define START(e) (*targetClass.start_element)(target, e, 0, 0, 0) +#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0) #define END(e) (*targetClass.end_element)(target, e, 0) #define FREE_TARGET (*targetClass._free)(target) #define ABORT_TARGET (*targetClass._free)(target) @@ -2143,17 +2143,19 @@ PRIVATE EntryInfo * parse_dir_entry ARGS2( ** Get real types eventually. */ if (!entry_info->type) { - char *cp; + CONST char *cp; HTFormat format; HTAtom * encoding; /* @@ not used at all */ - format = HTFileFormat(entry_info->filename, &encoding); + format = HTFileFormat(entry_info->filename, &encoding, &cp); - if (!strncmp(HTAtom_name(format), "application",11)) { - cp = HTAtom_name(format) + 12; - if (!strncmp(cp,"x-",2)) - cp += 2; - } else { - cp = HTAtom_name(format); + if (cp == NULL) { + if (!strncmp(HTAtom_name(format), "application",11)) { + cp = HTAtom_name(format) + 12; + if (!strncmp(cp,"x-",2)) + cp += 2; + } else { + cp = HTAtom_name(format); + } } StrAllocCopy(entry_info->type, cp); @@ -2574,6 +2576,7 @@ PUBLIC int HTFTPLoad ARGS4( HTStream *, sink) { BOOL isDirectory = NO; + HTAtom * encoding = NULL; int status; int retry; /* How many times tried? */ HTFormat format; @@ -2689,7 +2692,6 @@ PUBLIC int HTFTPLoad ARGS4( char *filename = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION); char *fname = filename; /** Save for subsequent free() **/ BOOL binary; - HTAtom * encoding; char *type = NULL; char *cp; @@ -2795,16 +2797,16 @@ PUBLIC int HTFTPLoad ARGS4( (cp > (filename + 3) && 0 == strncasecomp((cp - 4), "read.me", 7))) { *cp = '\0'; - format = HTFileFormat(filename, &encoding); + format = HTFileFormat(filename, &encoding, NULL); *cp = '.'; } else { - format = HTFileFormat(filename, &encoding); + format = HTFileFormat(filename, &encoding, NULL); } } else { - format = HTFileFormat(filename, &encoding); + format = HTFileFormat(filename, &encoding, NULL); } } else { - format = HTFileFormat(filename, &encoding); + format = HTFileFormat(filename, &encoding, NULL); } format = HTCharsetFormat(format, anchor, -1); binary = (encoding != HTAtom_for("8bit") && @@ -3218,7 +3220,6 @@ listen: } else { int rv; int len; - HTAtom * encoding; char *FileName = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION); /** Clear any login messages **/ @@ -3226,14 +3227,24 @@ listen: /** Fake a Content-Encoding for compressed files. - FM **/ HTUnEscape(FileName); - if ((len = strlen(FileName)) > 2) { + if (!IsUnityEnc(encoding)) { + /* + * We already know from the call to HTFileFormat above that + * this is a compressed file, no need to look at the filename + * again. - kw + */ + StrAllocCopy(anchor->content_type, format->name); + StrAllocCopy(anchor->content_encoding, HTAtom_name(encoding)); + format = HTAtom_for("www/compressed"); + + } else if ((len = strlen(FileName)) > 2) { if ((FileName[len - 1] == 'Z') && (FileName[len - 2] == '.' || FileName[len - 2] == '-' || FileName[len - 2] == '_')) { FileName[len - 2] = '\0'; - format = HTFileFormat(FileName, &encoding); + format = HTFileFormat(FileName, &encoding, NULL); format = HTCharsetFormat(format, anchor, -1); StrAllocCopy(anchor->content_type, format->name); StrAllocCopy(anchor->content_encoding, "x-compress"); @@ -3244,7 +3255,7 @@ listen: FileName[len - 3] == '-' || FileName[len - 3] == '_') { FileName[len - 3] = '\0'; - format = HTFileFormat(FileName, &encoding); + format = HTFileFormat(FileName, &encoding, NULL); format = HTCharsetFormat(format, anchor, -1); StrAllocCopy(anchor->content_type, format->name); StrAllocCopy(anchor->content_encoding, "x-gzip"); diff --git a/WWW/Library/Implementation/HTFile.c b/WWW/Library/Implementation/HTFile.c index 71ea8823..93553bcb 100644 --- a/WWW/Library/Implementation/HTFile.c +++ b/WWW/Library/Implementation/HTFile.c @@ -87,6 +87,7 @@ typedef struct _HTSuffix { char * suffix; HTAtom * rep; HTAtom * encoding; + char * desc; float quality; } HTSuffix; @@ -102,7 +103,7 @@ typedef struct _HTSuffix { #define PUTC(c) (*target->isa->put_character)(target, c) #define PUTS(s) (*target->isa->put_string)(target, s) -#define START(e) (*target->isa->start_element)(target, e, 0, 0, 0) +#define START(e) (*target->isa->start_element)(target, e, 0, 0, -1, 0) #define END(e) (*target->isa->end_element)(target, e, 0) #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \ (*target->isa->end_element)(target, e, 0) @@ -148,8 +149,8 @@ PRIVATE char *HTCacheRoot = "/tmp/W3_Cache_"; /* Where to cache things */ ** Suffix registration. */ PRIVATE HTList * HTSuffixes = 0; -PRIVATE HTSuffix no_suffix = { "*", NULL, NULL, 1.0 }; -PRIVATE HTSuffix unknown_suffix = { "*.*", NULL, NULL, 1.0}; +PRIVATE HTSuffix no_suffix = { "*", NULL, NULL, NULL, 1.0 }; +PRIVATE HTSuffix unknown_suffix = { "*.*", NULL, NULL, NULL, 1.0}; #ifdef _WINDOWS @@ -354,16 +355,21 @@ PRIVATE void LYListFmtParse ARGS5( ** Calling this with suffix set to "*.*" will set the default ** representation for unknown suffix files which contain a ".". ** -** If filename suffix is already defined its previous -** definition is overridden. +** The encoding parameter can give a trivial (8bit, 7bit, binary) +** or real (gzip, compress) encoding. +** +** If filename suffix is already defined with the same encoding +** its previous definition is overridden. */ -PUBLIC void HTSetSuffix ARGS4( +PUBLIC void HTSetSuffix5 ARGS5( CONST char *, suffix, CONST char *, representation, CONST char *, encoding, + CONST char *, desc, float, value) { HTSuffix * suff; + BOOL trivial_enc = IsUnityEncStr(encoding); if (strcmp(suffix, "*") == 0) suff = &no_suffix; @@ -373,7 +379,10 @@ PUBLIC void HTSetSuffix ARGS4( HTList *cur = HTSuffixes; while (NULL != (suff = (HTSuffix*)HTList_nextObject(cur))) { - if (suff->suffix && 0 == strcmp(suff->suffix, suffix)) + if (suff->suffix && 0 == strcmp(suff->suffix, suffix) && + ((trivial_enc && IsUnityEnc(suff->encoding)) || + (!trivial_enc && !IsUnityEnc(suff->encoding) && + strcmp(encoding, HTAtom_name(suff->encoding)) == 0))) break; } if (!suff) { /* Not found -- create a new node */ @@ -396,7 +405,8 @@ PUBLIC void HTSetSuffix ARGS4( } } - suff->rep = HTAtom_for(representation); + if (representation) + suff->rep = HTAtom_for(representation); /* ** Memory leak fixed. @@ -404,7 +414,9 @@ PUBLIC void HTSetSuffix ARGS4( ** Invariant code removed. */ suff->encoding = HTAtom_for(encoding); - + + StrAllocCopy(suff->desc, desc); + suff->quality = value; } @@ -430,6 +442,7 @@ PRIVATE void free_suffixes NOARGS */ suff = (HTSuffix *)HTList_removeLastObject(HTSuffixes); FREE(suff->suffix); + FREE(suff->desc); FREE(suff); } /* @@ -643,15 +656,21 @@ PUBLIC char * WWW_nameOfFile ARGS1( ** ** On entry, ** rep is the atomized MIME style representation +** enc is an encoding, trivial (8bit, binary, etc.) or gzip etc. ** ** On exit: ** Returns a pointer to a suitable suffix string if one has been ** found, else "". */ -PUBLIC CONST char * HTFileSuffix ARGS1( - HTAtom*, rep) +PUBLIC CONST char * HTFileSuffix ARGS2( + HTAtom*, rep, + CONST char *, enc) { HTSuffix * suff; +#ifdef FNAMES_8_3 + HTSuffix * first_found = NULL; +#endif + BOOL trivial_enc; int n; int i; @@ -660,13 +679,44 @@ PUBLIC CONST char * HTFileSuffix ARGS1( if (!HTSuffixes) HTFileInit(); #endif /* !NO_INIT */ + + trivial_enc = IsUnityEncStr(enc); n = HTList_count(HTSuffixes); for (i = 0; i < n; i++) { suff = (HTSuffix *)HTList_objectAt(HTSuffixes, i); - if (suff->rep == rep) { + if (suff->rep == rep && +#if defined(VMS) || defined(FNAMES_8_3) + /* Don't return a suffix whose first char is a dot and which + has more dots or with asterisks, for + these systems - kw */ + (!suff->suffix || !suff->suffix[0] || suff->suffix[0] != '.' || + (strchr(suff->suffix + 1, '.') == NULL && + strchr(suff->suffix + 1, '.') == NULL)) && +#endif + ((trivial_enc && IsUnityEnc(suff->encoding)) || + (!trivial_enc && !IsUnityEnc(suff->encoding) && + strcmp(enc, HTAtom_name(suff->encoding)) == 0))) { +#ifdef FNAMES_8_3 + if (suff->suffix && (strlen(suff->suffix) <= 4)) { + /* + * If length of suffix (including dot) is 4 or smaller, + * return this one even if we found a longer one + * earlier - kw + */ + return suff->suffix; + } else if (!first_found) { + first_found = suff; /* remember this one */ + } +#else return suff->suffix; /* OK -- found */ +#endif } } +#ifdef FNAMES_8_3 + if (first_found) + return first_found->suffix; + else +#endif return ""; /* Dunno */ } @@ -676,11 +726,15 @@ PUBLIC CONST char * HTFileSuffix ARGS1( ** This version will return the representation and also set ** a variable for the encoding. ** +** Encoding may be a unity encoding (binary, 8bit, etc.) or +** a content-coding like gzip, compress. +** ** It will handle for example x.txt, x.txt,Z, x.Z */ -PUBLIC HTFormat HTFileFormat ARGS2( +PUBLIC HTFormat HTFileFormat ARGS3( CONST char *, filename, - HTAtom **, pencoding) + HTAtom **, pencoding, + CONST char**, pdesc) { HTSuffix * suff; int n; @@ -691,7 +745,13 @@ PUBLIC HTFormat HTFileFormat ARGS2( #endif /* VMS */ extern char LYforce_HTML_mode; + if (pencoding) + *pencoding = NULL; + if (pdesc) + *pdesc = NULL; if (LYforce_HTML_mode) { + if (pencoding) + *pencoding = WWW_ENC_8BIT; return WWW_HTML; } @@ -709,8 +769,6 @@ PUBLIC HTFormat HTFileFormat ARGS2( if (!HTSuffixes) HTFileInit(); #endif /* !NO_INIT */ - if (pencoding) - *pencoding = NULL; lf = strlen(filename); n = HTList_count(HTSuffixes); for (i = 0; i < n; i++) { @@ -721,6 +779,8 @@ PUBLIC HTFormat HTFileFormat ARGS2( int j; if (pencoding) *pencoding = suff->encoding; + if (pdesc) + *pdesc = suff->desc; if (suff->rep) { #ifdef VMS if (semicolon != NULL) @@ -732,9 +792,11 @@ PUBLIC HTFormat HTFileFormat ARGS2( int ls2; suff = (HTSuffix *)HTList_objectAt(HTSuffixes, j); ls2 = strlen(suff->suffix); - if ((ls <= lf) && 0 == strncasecomp( + if ((ls + ls2 <= lf) && 0 == strncasecomp( suff->suffix, filename + lf - ls -ls2, ls2)) { if (suff->rep) { + if (pdesc && !(*pdesc)) + *pdesc = suff->desc; #ifdef VMS if (semicolon != NULL) *semicolon = ';'; @@ -1419,7 +1481,8 @@ PUBLIC int HTLoadFile ARGS4( HTFormat format; char * nodename = NULL; char * newname = NULL; /* Simplified name of file */ - HTAtom * encoding; /* @@ not used yet */ + HTAtom * encoding; /* @@ not used */ + HTAtom * myEncoding = NULL; /* enc of this file, may be gzip etc. */ int status; #ifdef VMS struct stat stat_info; @@ -1466,17 +1529,25 @@ PUBLIC int HTLoadFile ARGS4( /* ** Determine the format and encoding mapped to any suffix. */ - format = HTFileFormat(filename, &encoding); - + if (anchor->content_type && anchor->content_encoding) { + /* + * If content_type and content_encoding are BOTH already set + * in the anchor object, we believe it and don't try to + * derive format and ancoding from the filename. - kw + */ + format = HTAtom_for(anchor->content_type); + myEncoding = HTAtom_for(anchor->content_encoding); + } else { + format = HTFileFormat(filename, &myEncoding, NULL); + /* ** Check the format for an extended MIME charset value, and - ** act on it if present. Otherwise, assume the ISO-8859-1 - ** character set for local files. If it's actually another - ** charset (e.g., ISO-8859-2 or KOI8-R) and the terminal is - ** using that, Lynx users should make the current character - ** set "ISO Latin 1" so that 8-bit characters are passed raw. + ** act on it if present. Otherwise, assume what is indicated + ** by the last parameter (fallback will effectively be + ** UCLYhndl_for_unspec, by default ISO-8859-1). - kw */ - format = HTCharsetFormat(format, anchor, UCLYhndl_HTFile_for_unspec); + format = HTCharsetFormat(format, anchor, UCLYhndl_HTFile_for_unspec); + } #ifdef VMS /* @@ -1576,7 +1647,7 @@ PUBLIC int HTLoadFile ARGS4( vmsname[len - 3] != ':') { StrAllocCopy(cp, vmsname); cp[len - 2] = '\0'; - format = HTFileFormat(cp, &encoding); + format = HTFileFormat(cp, &encoding, NULL); FREE(cp); format = HTCharsetFormat(format, anchor, UCLYhndl_HTFile_for_unspec); @@ -1590,7 +1661,7 @@ PUBLIC int HTLoadFile ARGS4( vmsname[len - 3] == '_') { StrAllocCopy(cp, vmsname); cp[len - 3] = '\0'; - format = HTFileFormat(cp, &encoding); + format = HTFileFormat(cp, &encoding, NULL); FREE(cp); format = HTCharsetFormat(format, anchor, UCLYhndl_HTFile_for_unspec); @@ -1692,6 +1763,7 @@ PUBLIC int HTLoadFile ARGS4( STRUCT_DIRENT * dirbuf; float best = NO_VALUE_FOUND; /* So far best is bad */ HTFormat best_rep = NULL; /* Set when rep found */ + HTAtom * best_enc = NULL; char * best_name = NULL; /* Best dir entry so far */ char *base = strrchr(localname, '/'); @@ -1722,8 +1794,9 @@ PUBLIC int HTLoadFile ARGS4( continue; /* if the entry is not being used, skip it */ #endif if ((int)strlen(dirbuf->d_name) > baselen && /* Match? */ - !strncmp(dirbuf->d_name, base, baselen)) { - HTFormat rep = HTFileFormat(dirbuf->d_name, NULL); + !strncmp(dirbuf->d_name, base, baselen)) { + HTAtom * enc; + HTFormat rep = HTFileFormat(dirbuf->d_name, &enc, NULL); float filevalue = HTFileValue(dirbuf->d_name); float value = HTStackValue(rep, format_out, filevalue, @@ -1731,12 +1804,13 @@ PUBLIC int HTLoadFile ARGS4( if (value <= 0.0) { char * cp = NULL; int len = strlen(dirbuf->d_name); + enc = NULL; if (len > 2 && dirbuf->d_name[len - 1] == 'Z' && dirbuf->d_name[len - 2] == '.') { StrAllocCopy(cp, dirbuf->d_name); cp[len - 2] = '\0'; - format = HTFileFormat(cp, NULL); + format = HTFileFormat(cp, NULL, NULL); FREE(cp); value = HTStackValue(format, format_out, filevalue, 0); @@ -1756,7 +1830,7 @@ PUBLIC int HTLoadFile ARGS4( dirbuf->d_name[len - 3] == '.') { StrAllocCopy(cp, dirbuf->d_name); cp[len - 3] = '\0'; - format = HTFileFormat(cp, NULL); + format = HTFileFormat(cp, NULL, NULL); FREE(cp); value = HTStackValue(format, format_out, filevalue, 0); @@ -1779,6 +1853,7 @@ PUBLIC int HTLoadFile ARGS4( HTAtom_name(rep), value); if (value > best) { best_rep = rep; + best_enc = enc; best = value; StrAllocCopy(best_name, dirbuf->d_name); } @@ -1790,6 +1865,7 @@ PUBLIC int HTLoadFile ARGS4( if (best_rep) { format = best_rep; + myEncoding = best_enc; base[-1] = '/'; /* Restore directory name */ base[0] = '\0'; StrAllocCat(localname, best_name); @@ -2188,12 +2264,37 @@ PUBLIC int HTLoadFile ARGS4( /* ** Fake a Content-Encoding for compressed files. - FM */ - if ((len = strlen(localname)) > 2) { + if (!IsUnityEnc(myEncoding)) { + /* + * We already know from the call to HTFileFormat above + * that this is a compressed file, no need to look at + * the filename again. - kw + */ +#ifdef USE_ZLIB + if (strcmp(format_out->name, "www/download") != 0 && + (!strcmp(HTAtom_name(myEncoding), "gzip") || + !strcmp(HTAtom_name(myEncoding), "x-gzip"))) { + fclose(fp); + gzfp = gzopen(localname, "rb"); + + if (TRACE) + fprintf(stderr, + "HTLoadFile: gzopen of `%s' gives %p\n", + localname, (void*)gzfp); + use_gzread = YES; + } else +#endif /* USE_ZLIB */ + { + StrAllocCopy(anchor->content_type, format->name); + StrAllocCopy(anchor->content_encoding, HTAtom_name(myEncoding)); + format = HTAtom_for("www/compressed"); + } + } else if ((len = strlen(localname)) > 2) { if (localname[len - 1] == 'Z' && localname[len - 2] == '.') { StrAllocCopy(cp, localname); cp[len - 2] = '\0'; - format = HTFileFormat(cp, &encoding); + format = HTFileFormat(cp, &encoding, NULL); FREE(cp); format = HTCharsetFormat(format, anchor, UCLYhndl_HTFile_for_unspec); @@ -2206,7 +2307,7 @@ PUBLIC int HTLoadFile ARGS4( localname[len - 3] == '.') { StrAllocCopy(cp, localname); cp[len - 3] = '\0'; - format = HTFileFormat(cp, &encoding); + format = HTFileFormat(cp, &encoding, NULL); FREE(cp); format = HTCharsetFormat(format, anchor, UCLYhndl_HTFile_for_unspec); diff --git a/WWW/Library/Implementation/HTFile.h b/WWW/Library/Implementation/HTFile.h index 2d37a537..e6a7926a 100644 --- a/WWW/Library/Implementation/HTFile.h +++ b/WWW/Library/Implementation/HTFile.h @@ -90,19 +90,31 @@ extern void HTDirEntry PARAMS(( ** representation is MIME-style content-type ** ** encoding is MIME-style content-transfer-encoding -** (8bit, 7bit, etc) +** (8bit, 7bit, etc) or HTTP-style content-encoding +** (gzip, compress etc.) ** ** quality an a priori judgement of the quality of such files ** (0.0..1.0) ** -** Example: HTSetSuffix(".ps", "application/postscript", "8bit", 1.0); +** HTSetSuffix5 has one more parameter for a short description of the type +** which is otherwise derived from the representation: +** +** desc is a short textual description, or NULL +** +** Examples: HTSetSuffix(".ps", "application/postscript", "8bit", 1.0); +** Examples: HTSetSuffix(".psz", "application/postscript", "gzip", 1.0); +** A MIME type could also indicate a non-trivial encoding on its own +** ("application/x-compressed-tar"), but in that case don't use enconding +** to also indicate it but use "binary" etc. */ -extern void HTSetSuffix PARAMS(( +extern void HTSetSuffix5 PARAMS(( CONST char * suffix, CONST char * representation, CONST char * encoding, + CONST char * desc, float quality)); - + +#define HTSetSuffix(suff,rep,enc,q) HTSetSuffix5(suff, rep, enc, NULL, q) /* ** HTFileFormat: Get Representation and Encoding from file name. @@ -115,7 +127,8 @@ extern void HTSetSuffix PARAMS(( */ extern HTFormat HTFileFormat PARAMS(( CONST char * filename, - HTAtom ** pEncoding)); + HTAtom ** pEncoding, + CONST char ** pDesc)); /* ** HTCharsetFormat: Revise the file format in relation to the Lynx charset. @@ -155,6 +168,7 @@ extern BOOL HTEditable PARAMS((CONST char * filename)); ** ON ENTRY, ** ** rep is the atomized MIME style representation +** enc is an encoding (8bit, binary, gzip, compress,..) ** ** ON EXIT, ** @@ -162,7 +176,8 @@ extern BOOL HTEditable PARAMS((CONST char * filename)); ** been found, else NULL. */ extern CONST char * HTFileSuffix PARAMS(( - HTAtom* rep)); + HTAtom* rep, + CONST char* enc)); /* ** The Protocols diff --git a/WWW/Library/Implementation/HTFinger.c b/WWW/Library/Implementation/HTFinger.c index d00b57cb..06188d2b 100644 --- a/WWW/Library/Implementation/HTFinger.c +++ b/WWW/Library/Implementation/HTFinger.c @@ -46,7 +46,7 @@ #define PUTC(c) (*targetClass.put_character)(target, c) #define PUTS(s) (*targetClass.put_string)(target, s) -#define START(e) (*targetClass.start_element)(target, e, 0, 0, 0) +#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0) #define END(e) (*targetClass.end_element)(target, e, 0) #define FREE_TARGET (*targetClass._free)(target) #define NEXT_CHAR HTGetCharacter() @@ -91,7 +91,7 @@ PRIVATE void start_anchor ARGS1(CONST char *, href) } ((CONST char **)value)[HTML_A_HREF] = href; (*targetClass.start_element)(target, HTML_A, present, - (CONST char **)value, 0); + (CONST char **)value, -1, 0); } diff --git a/WWW/Library/Implementation/HTFormat.c b/WWW/Library/Implementation/HTFormat.c index b5f8584d..3b33fb9a 100644 --- a/WWW/Library/Implementation/HTFormat.c +++ b/WWW/Library/Implementation/HTFormat.c @@ -254,6 +254,7 @@ PUBLIC char HTGetCharacter NOARGS return FROMASCII(ch); } +#ifdef NOT_USED /* Stream the data to an ouput file as binary */ PUBLIC int HTOutputBinary ARGS2( int, input, @@ -271,6 +272,7 @@ PUBLIC int HTOutputBinary ARGS2( int, input, fwrite(input_buffer, sizeof(char), status, output); } while (YES); } +#endif /* NOT_USED */ /* Match maintype to any MIME type starting with maintype, * for example: image/gif should match image diff --git a/WWW/Library/Implementation/HTFormat.h b/WWW/Library/Implementation/HTFormat.h index 812e2fc3..b21a0ec3 100644 --- a/WWW/Library/Implementation/HTFormat.h +++ b/WWW/Library/Implementation/HTFormat.h @@ -125,6 +125,19 @@ typedef HTAtom* HTEncoding; */ #define WWW_ENC_COMPRESS HTAtom_for("compress") +/* + Does a string designate a real encoding, or is it just + a "dummy" as for example 7bit, 8bit, and binary? + */ +#define IsUnityEncStr(senc) \ + ((senc)==NULL || *(senc)=='\0' || !strcmp(senc,"identity") ||\ + !strcmp(senc,"8bit") || !strcmp(senc,"binary") || !strcmp(senc,"7bit")) + +#define IsUnityEnc(enc) \ + ((enc)==NULL || (enc)==HTAtom_for("identity") ||\ + (enc)==WWW_ENC_8BIT || (enc)==WWW_ENC_BINARY || (enc)==WWW_ENC_7BIT) + + #include "HTAnchor.h" /* diff --git a/WWW/Library/Implementation/HTGopher.c b/WWW/Library/Implementation/HTGopher.c index 7af74580..c94c89d8 100644 --- a/WWW/Library/Implementation/HTGopher.c +++ b/WWW/Library/Implementation/HTGopher.c @@ -78,7 +78,7 @@ #define PUTC(c) (*targetClass.put_character)(target, c) #define PUTS(s) (*targetClass.put_string)(target, s) -#define START(e) (*targetClass.start_element)(target, e, 0, 0, 0) +#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0) #define END(e) (*targetClass.end_element)(target, e, 0) #define FREE_TARGET (*targetClass._free)(target) @@ -201,7 +201,7 @@ PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr) HT_Is_Gopher_URL = TRUE; /* tell HTML.c that this is a Gopher URL */ (*targetClass.start_element)(target, HTML_A, present, - (CONST char **)value, 0); + (CONST char **)value, -1, 0); PUTS(text); END(HTML_A); @@ -1773,7 +1773,7 @@ PUBLIC int HTLoadGopher ARGS4( int len; if ((len = strlen(arg)) > 5) { - if (0 == strcmp(&arg[len-6], ":105/2")) { + if (0 == strcmp((CONST char *)&arg[len-6], ":105/2")) { /* Use CSO gateway. */ if (TRACE) fprintf(stderr, "HTGopher: Passing to CSO/PH gateway.\n"); diff --git a/WWW/Library/Implementation/HTMIME.c b/WWW/Library/Implementation/HTMIME.c index db6d75b7..0d5c6fdd 100644 --- a/WWW/Library/Implementation/HTMIME.c +++ b/WWW/Library/Implementation/HTMIME.c @@ -2292,26 +2292,60 @@ PUBLIC void HTmmdec_quote ARGS2( strcpy(t, buf); } +/* Generalized HTmmdecode for chartrans - kweide 1997-03-06 */ + PUBLIC void HTmmdecode ARGS2( char *, trg, char *, str) { char buf[BUFLEN], mmbuf[BUFLEN]; - char *s, *t, *u; + char *s, *t, *u, *qm2; int base64, quote; buf[0] = '\0'; +/* encoded-words look like =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= */ + for (s = str, u = buf; *s; ) { - if (!strncasecomp(s, "=?ISO-2022-JP?B?", 16)) { - base64 = 1; - } else { - base64 = 0; - } - if (!strncasecomp(s, "=?ISO-2022-JP?Q?", 16)) { - quote = 1; - } else { - quote = 0; + base64 = quote = 0; + if (*s == '=' && s[1] == '?' && + (s == str || *(s-1) == '(' || WHITE(*(s-1)))) + { /* must be beginning of word */ + qm2 = strchr(s+2, '?'); /* 2nd question mark */ + if (qm2 && + (qm2[1] == 'B' || qm2[1] == 'b' || qm2[1] == 'Q' || qm2[1] == 'q') && + qm2[2] == '?') { /* 3rd question mark */ + char * qm4 = strchr(qm2 + 3, '?'); /* 4th question mark */ + if (qm4 && qm4 - s < 74 && /* RFC 2047 length restriction */ + qm4[1] == '=') { + char *p; + BOOL invalid = NO; + for (p = s+2; p < qm4; p++) + if (WHITE(*p)) { + invalid = YES; + break; + } + if (!invalid) { + int LYhndl; + *qm2 = '\0'; +#ifdef EXP_CHARTRANS + for (p = s+2; *p; p++) + *p = TOLOWER(*p); + invalid = ((LYhndl = UCGetLYhndl_byMIME(s+2)) < 0 || + !UCCanTranslateFromTo(LYhndl, current_char_set)); +#else + invalid = (0!=strncasecomp(s+2, "ISO-2022-JP", 11)); +#endif + *qm2 = '?'; + } + if (!invalid) { + if (qm2[1] == 'B' || qm2[1] == 'b') + base64 = 1; + else if (qm2[1] == 'Q' || qm2[1] == 'q') + quote = 1; + } + } + } } if (base64 || quote) { if (HTmmcont) { @@ -2320,7 +2354,7 @@ PUBLIC void HTmmdecode ARGS2( u--; } } - for (s += 16, t = mmbuf; *s; ) { + for (s = qm2 + 3, t = mmbuf; *s; ) { if (s[0] == '?' && s[1] == '=') { break; } else { @@ -2461,7 +2495,7 @@ PUBLIC int HTrjis ARGS2( */ /* * RJIS ( Recover JIS code from broken file ) - * $Header: /usr/build/VCS/lynx/WWW/Library/Implementation/RCS/HTMIME.c,v 1.10 1997/09/19 01:14:00 klaus Exp $ + * @Header: rjis.c,v 0.2 92/09/04 takahasi Exp @ * Copyright (C) 1992 1994 * Hironobu Takahashi (takahasi@tiny.or.jp) * diff --git a/WWW/Library/Implementation/HTML.h b/WWW/Library/Implementation/HTML.h index 7480b678..f1653cf1 100644 --- a/WWW/Library/Implementation/HTML.h +++ b/WWW/Library/Implementation/HTML.h @@ -76,6 +76,8 @@ struct _HTStructured { BOOL select_disabled; HTChunk textarea; /* Grow by 128 */ char * textarea_name; + int textarea_name_cs; + char * textarea_accept_cs; char * textarea_cols; int textarea_rows; int textarea_disabled; @@ -148,6 +150,7 @@ struct _HTStructured { LYUCcharset * UCI; /* pointer to node_anchor's UCInfo */ int UCLYhndl; /* tells us what charset we are fed */ UCTransParams T; + int tag_charset; /* charset for attribute values etc. */ #endif }; diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c index 7eb1d86b..cef1eaef 100644 --- a/WWW/Library/Implementation/HTMLDTD.c +++ b/WWW/Library/Implementation/HTMLDTD.c @@ -135,9 +135,8 @@ static CONST char* entities[] = { /* Extra Entity Names ** ------------------ ** -** This table contains Unicodes in addition to the Names. +** This table contains Unicodes in addition to the Names. - kw ** -** Just an idea how it could be done. -kw * * I think in the future the whole entities[] thing above could migrate * to this kind of structure. The structured streams to which @@ -160,137 +159,137 @@ static CONST char* entities[] = { - lrm, rlm, zwnj and zwj */ static CONST UC_entity_info extra_entities[] = { - {"Aacute", 0x00c1}, /* A with acute */ - {"Abreve", 0x0102}, /* A with breve */ - {"Acirc", 0x00c2}, /* A with circumflex */ - {"Aogon", 0x0104}, /* A with ogonek */ - {"Auml", 0x00c4}, /* A with diaeresis */ - {"Cacute", 0x0106}, /* C with acute */ - {"Ccaron", 0x010c}, /* C with caron */ - {"Ccedil", 0x00c7}, /* C with cedilla */ - {"Dcaron", 0x010e}, /* D with caron */ - {"Dstrok", 0x0110}, /* D with stroke */ - {"Eacute", 0x00c9}, /* E with acute */ - {"Ecaron", 0x011a}, /* E with caron */ - {"Eogon", 0x0118}, /* E with ogonek */ - {"Euml", 0x00cb}, /* E with diaeresis */ - {"Iacute", 0x00cd}, /* I with acute */ - {"Icirc", 0x00ce}, /* I with circumflex */ - {"Lacute", 0x0139}, /* L with acute */ - {"Lcaron", 0x013d}, /* L with caron */ - {"Lstrok", 0x0141}, /* L with stroke */ - {"Nacute", 0x0143}, /* N with acute */ - {"Ncaron", 0x0147}, /* N with caron */ - {"Oacute", 0x00d3}, /* O with acute */ - {"Ocirc", 0x00d4}, /* O with circumflex */ - {"Odblac", 0x0150}, /* O with double acute */ - {"Ouml", 0x00d6}, /* O with diaeresis */ - {"Racute", 0x0154}, /* R with acute */ - {"Rcaron", 0x0158}, /* R with caron */ - {"Sacute", 0x015a}, /* S with acute */ - {"Scaron", 0x0160}, /* S with caron */ - {"Scedil", 0x015e}, /* S with cedilla */ - {"Tcaron", 0x0164}, /* T with caron */ - {"Tcedil", 0x0162}, /* T with cedilla */ - {"Uacute", 0x00da}, /* U with acute */ - {"Udblac", 0x0170}, /* U with double acute */ - {"Uring", 0x016e}, /* U with ring above */ - {"Uuml", 0x00dc}, /* U with diaeresis */ - {"Yacute", 0x00dd}, /* Y with acute */ - {"Zacute", 0x0179}, /* Z with acute */ - {"Zcaron", 0x017d}, /* Z with caron */ - {"Zdot", 0x017b}, /* Z with dot above */ - {"aacute", 0x00e1}, /* a with acute */ - {"abreve", 0x0103}, /* a with breve */ - {"acirc", 0x00e2}, /* a with circumflex */ - {"acute", 0x00b4}, /* acuteaccent */ - {"amp", 0x0026}, /* ampersand */ - {"aogon", 0x0105}, /* a with ogonek */ - {"apos", 0x0027}, /* apostrophe */ - {"ast", 0x002a}, /* asterisk */ - {"auml", 0x00e4}, /* a with diaeresis */ - {"breve", 0x02d8}, /* breve */ - {"bsol", 0x005c}, /* reversesolidus */ - {"cacute", 0x0107}, /* c with acute */ - {"caron", 0x02c7}, /* caron */ - {"ccaron", 0x010d}, /* c with caron */ - {"ccedil", 0x00e7}, /* c with cedilla */ - {"cedil", 0x00b8}, /* cedilla */ - {"circ", 0x005e}, /* circumflexaccent */ - {"colon", 0x003a}, /* colon */ - {"comma", 0x002c}, /* comma */ - {"commat", 0x0040}, /* commercialat */ - {"curren", 0x00a4}, /* currencysign */ - {"dblac", 0x02dd}, /* doubleacuteaccent */ - {"dcaron", 0x010f}, /* d with caron */ - {"deg", 0x00b0}, /* degreesign */ - {"divide", 0x00f7}, /* divisionsign */ - {"dollar", 0x0024}, /* dollarsign */ - {"dot", 0x02d9}, /* dotabove */ - {"dstrok", 0x0111}, /* d with stroke */ - {"eacute", 0x00e9}, /* e with acute */ - {"ecaron", 0x011b}, /* e with caron */ - {"eogon", 0x0119}, /* e with ogonek */ - {"equals", 0x003d}, /* equalssign */ - {"euml", 0x00eb}, /* e with diaeresis */ - {"excl", 0x0021}, /* exclamationmark */ - {"grave", 0x0060}, /* graveaccent */ - {"gt", 0x003e}, /* greater-thansign */ - {"hyphen", 0x002d}, /* hyphen-minus */ - {"iacute", 0x00ed}, /* i with acute */ - {"icirc", 0x00ee}, /* i with circumflex */ - {"lacute", 0x013a}, /* l with acute */ - {"lcaron", 0x013e}, /* l with caron */ - {"lcub", 0x007b}, /* leftcurlybracket */ - {"lowbar", 0x005f}, /* lowline */ - {"lpar", 0x0028}, /* leftparenthesis */ - {"lrm", 8206}, /* left-to-right mark */ - {"lsqb", 0x005b}, /* leftsquarebracket */ - {"lstrok", 0x0142}, /* l with stroke */ - {"lt", 0x003c}, /* less-thansign */ - {"nacute", 0x0144}, /* n with acute */ - {"nbsp", 0x00a0}, /* no-breakspace */ - {"ncaron", 0x0148}, /* n with caron */ - {"num", 0x0023}, /* numbersign */ - {"oacute", 0x00f3}, /* o with acute */ - {"ocirc", 0x00f4}, /* o with circumflex */ - {"odblac", 0x0151}, /* o with double acute */ - {"ogon", 0x02db}, /* ogonek */ - {"ouml", 0x00f6}, /* o with diaeresis */ - {"percnt", 0x0025}, /* percentsign */ - {"period", 0x002e}, /* fullstop */ - {"plus", 0x002b}, /* plussign */ - {"quest", 0x003f}, /* questionmark */ - {"quot", 0x0022}, /* quotationmark */ - {"racute", 0x0155}, /* r with acute */ - {"rcaron", 0x0159}, /* r with caron */ - {"rcub", 0x007d}, /* rightcurlybracket */ - {"rlm", 8207}, /* right-to-left mark */ - {"rpar", 0x0029}, /* rightparenthesis */ - {"rsqb", 0x005d}, /* rightsquarebracket */ - {"sacute", 0x015b}, /* s with acute */ - {"scaron", 0x0161}, /* s with caron */ - {"scedil", 0x015f}, /* s with cedilla */ - {"sect", 0x00a7}, /* sectionsign */ - {"semi", 0x003b}, /* semicolon */ - {"shy", 0x00ad}, /* softhyphen */ - {"sol", 0x002f}, /* solidus */ - {"tcaron", 0x0165}, /* t with caron */ - {"tcedil", 0x0163}, /* t with cedilla */ - {"tilde", 0x007e}, /* tilde */ - {"times", 0x00d7}, /* multiplicationsign */ - {"uacute", 0x00fa}, /* u with acute */ - {"udblac", 0x0171}, /* u with double acute */ - {"uml", 0x00a8}, /* diaeresis */ - {"uring", 0x016f}, /* u with ring above */ - {"uuml", 0x00fc}, /* u with diaeresis */ - {"verbar", 0x007c}, /* verticalline */ - {"yacute", 0x00fd}, /* y with acute */ - {"zacute", 0x017a}, /* z with acute */ - {"zcaron", 0x017e}, /* z with caron */ - {"zdot", 0x017c}, /* z with dot above */ - {"zwj", 8205}, /* zero width joiner */ - {"zwnj", 8204}, /* zero width non-joiner */ + {"Aacute", 0x00c1}, /* A with acute */ + {"Abreve", 0x0102}, /* A with breve */ + {"Acirc", 0x00c2}, /* A with circumflex */ + {"Aogon", 0x0104}, /* A with ogonek */ + {"Auml", 0x00c4}, /* A with diaeresis */ + {"Cacute", 0x0106}, /* C with acute */ + {"Ccaron", 0x010c}, /* C with caron */ + {"Ccedil", 0x00c7}, /* C with cedilla */ + {"Dcaron", 0x010e}, /* D with caron */ + {"Dstrok", 0x0110}, /* D with stroke */ + {"Eacute", 0x00c9}, /* E with acute */ + {"Ecaron", 0x011a}, /* E with caron */ + {"Eogon", 0x0118}, /* E with ogonek */ + {"Euml", 0x00cb}, /* E with diaeresis */ + {"Iacute", 0x00cd}, /* I with acute */ + {"Icirc", 0x00ce}, /* I with circumflex */ + {"Lacute", 0x0139}, /* L with acute */ + {"Lcaron", 0x013d}, /* L with caron */ + {"Lstrok", 0x0141}, /* L with stroke */ + {"Nacute", 0x0143}, /* N with acute */ + {"Ncaron", 0x0147}, /* N with caron */ + {"Oacute", 0x00d3}, /* O with acute */ + {"Ocirc", 0x00d4}, /* O with circumflex */ + {"Odblac", 0x0150}, /* O with double acute */ + {"Ouml", 0x00d6}, /* O with diaeresis */ + {"Racute", 0x0154}, /* R with acute */ + {"Rcaron", 0x0158}, /* R with caron */ + {"Sacute", 0x015a}, /* S with acute */ + {"Scaron", 0x0160}, /* S with caron */ + {"Scedil", 0x015e}, /* S with cedilla */ + {"Tcaron", 0x0164}, /* T with caron */ + {"Tcedil", 0x0162}, /* T with cedilla */ + {"Uacute", 0x00da}, /* U with acute */ + {"Udblac", 0x0170}, /* U with double acute */ + {"Uring", 0x016e}, /* U with ring above */ + {"Uuml", 0x00dc}, /* U with diaeresis */ + {"Yacute", 0x00dd}, /* Y with acute */ + {"Zacute", 0x0179}, /* Z with acute */ + {"Zcaron", 0x017d}, /* Z with caron */ + {"Zdot", 0x017b}, /* Z with dot above */ + {"aacute", 0x00e1}, /* a with acute */ + {"abreve", 0x0103}, /* a with breve */ + {"acirc", 0x00e2}, /* a with circumflex */ + {"acute", 0x00b4}, /* acuteaccent */ + {"amp", 0x0026}, /* ampersand */ + {"aogon", 0x0105}, /* a with ogonek */ + {"apos", 0x0027}, /* apostrophe */ + {"ast", 0x002a}, /* asterisk */ + {"auml", 0x00e4}, /* a with diaeresis */ + {"breve", 0x02d8}, /* breve */ + {"bsol", 0x005c}, /* reversesolidus */ + {"cacute", 0x0107}, /* c with acute */ + {"caron", 0x02c7}, /* caron */ + {"ccaron", 0x010d}, /* c with caron */ + {"ccedil", 0x00e7}, /* c with cedilla */ + {"cedil", 0x00b8}, /* cedilla */ + {"circ", 0x005e}, /* circumflexaccent */ + {"colon", 0x003a}, /* colon */ + {"comma", 0x002c}, /* comma */ + {"commat", 0x0040}, /* commercialat */ + {"curren", 0x00a4}, /* currencysign */ + {"dblac", 0x02dd}, /* doubleacuteaccent */ + {"dcaron", 0x010f}, /* d with caron */ + {"deg", 0x00b0}, /* degreesign */ + {"divide", 0x00f7}, /* divisionsign */ + {"dollar", 0x0024}, /* dollarsign */ + {"dot", 0x02d9}, /* dotabove */ + {"dstrok", 0x0111}, /* d with stroke */ + {"eacute", 0x00e9}, /* e with acute */ + {"ecaron", 0x011b}, /* e with caron */ + {"eogon", 0x0119}, /* e with ogonek */ + {"equals", 0x003d}, /* equalssign */ + {"euml", 0x00eb}, /* e with diaeresis */ + {"excl", 0x0021}, /* exclamationmark */ + {"grave", 0x0060}, /* graveaccent */ + {"gt", 0x003e}, /* greater-thansign */ + {"hyphen", 0x002d}, /* hyphen-minus */ + {"iacute", 0x00ed}, /* i with acute */ + {"icirc", 0x00ee}, /* i with circumflex */ + {"lacute", 0x013a}, /* l with acute */ + {"lcaron", 0x013e}, /* l with caron */ + {"lcub", 0x007b}, /* leftcurlybracket */ + {"lowbar", 0x005f}, /* lowline */ + {"lpar", 0x0028}, /* leftparenthesis */ + {"lrm", 8206}, /* left-to-right mark */ + {"lsqb", 0x005b}, /* leftsquarebracket */ + {"lstrok", 0x0142}, /* l with stroke */ + {"lt", 0x003c}, /* less-thansign */ + {"nacute", 0x0144}, /* n with acute */ + {"nbsp", 0x00a0}, /* no-breakspace */ + {"ncaron", 0x0148}, /* n with caron */ + {"num", 0x0023}, /* numbersign */ + {"oacute", 0x00f3}, /* o with acute */ + {"ocirc", 0x00f4}, /* o with circumflex */ + {"odblac", 0x0151}, /* o with double acute */ + {"ogon", 0x02db}, /* ogonek */ + {"ouml", 0x00f6}, /* o with diaeresis */ + {"percnt", 0x0025}, /* percentsign */ + {"period", 0x002e}, /* fullstop */ + {"plus", 0x002b}, /* plussign */ + {"quest", 0x003f}, /* questionmark */ + {"quot", 0x0022}, /* quotationmark */ + {"racute", 0x0155}, /* r with acute */ + {"rcaron", 0x0159}, /* r with caron */ + {"rcub", 0x007d}, /* rightcurlybracket */ + {"rlm", 8207}, /* right-to-left mark */ + {"rpar", 0x0029}, /* rightparenthesis */ + {"rsqb", 0x005d}, /* rightsquarebracket */ + {"sacute", 0x015b}, /* s with acute */ + {"scaron", 0x0161}, /* s with caron */ + {"scedil", 0x015f}, /* s with cedilla */ + {"sect", 0x00a7}, /* sectionsign */ + {"semi", 0x003b}, /* semicolon */ + {"shy", 0x00ad}, /* softhyphen */ + {"sol", 0x002f}, /* solidus */ + {"tcaron", 0x0165}, /* t with caron */ + {"tcedil", 0x0163}, /* t with cedilla */ + {"tilde", 0x007e}, /* tilde */ + {"times", 0x00d7}, /* multiplicationsign */ + {"uacute", 0x00fa}, /* u with acute */ + {"udblac", 0x0171}, /* u with double acute */ + {"uml", 0x00a8}, /* diaeresis */ + {"uring", 0x016f}, /* u with ring above */ + {"uuml", 0x00fc}, /* u with diaeresis */ + {"verbar", 0x007c}, /* verticalline */ + {"yacute", 0x00fd}, /* y with acute */ + {"zacute", 0x017a}, /* z with acute */ + {"zcaron", 0x017e}, /* z with caron */ + {"zdot", 0x017c}, /* z with dot above */ + {"zwj", 8205}, /* zero width joiner */ + {"zwnj", 8204}, /* zero width non-joiner */ }; #endif /* EXP_CHARTRANS */ @@ -608,6 +607,7 @@ static attr font_attr[] = { /* FONT attributes */ }; static attr form_attr[] = { /* FORM attributes */ + { "ACCEPT-CHARSET"}, /* HTML 4.0 draft - kw */ { "ACTION"}, { "CLASS" }, { "CLEAR" }, @@ -738,6 +738,7 @@ static attr img_attr[] = { /* IMG attributes */ static attr input_attr[] = { /* INPUT attributes */ { "ACCEPT" }, + { "ACCEPT-CHARSET" }, /* RFC 2070 HTML i18n - kw */ { "ALIGN" }, { "ALT" }, { "CHECKED" }, @@ -822,6 +823,7 @@ static attr legend_attr[] = { /* LEGEND attributes */ }; static attr link_attr[] = { /* LINK attributes */ + { "CHARSET" }, /* RFC 2070 HTML i18n -- hint for UA -- - kw */ { "CLASS" }, { "HREF" }, { "ID" }, @@ -1126,6 +1128,7 @@ static attr td_attr[] = { /* TD and TH attributes */ }; static attr textarea_attr[] = { /* TEXTAREA attributes */ + { "ACCEPT-CHARSET" }, /* RFC 2070 HTML i18n - kw */ { "ALIGN" }, { "CLASS" }, { "CLEAR" }, @@ -1471,6 +1474,8 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "XMP" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_LITTERAL }, */ #define T_XMP 0x0800, 0x00000,0x00000,0x367E0,0x36FFF,0x0875F,0x00001 +#define T__UNREC_ 0x0000, 0x00000,0x00000,0x00000,0x00000,0x00000,0x00000 + /* Elements ** -------- ** @@ -1762,6 +1767,9 @@ PUBLIC void HTSwitchDTD ARGS1( memcpy(tags, tags_old, HTML_ELEMENTS * sizeof(HTTag)); } +PUBLIC CONST HTTag HTTag_unrecognized = + { NULL, NULL, 0, SGML_EMPTY,T__UNREC_}; + /* ** Utility Routine: Useful for people building HTML objects. */ @@ -1799,7 +1807,7 @@ PUBLIC void HTStartAnchor ARGS3( value[HTML_A_HREF] = (CONST char *)href; } - (*obj->isa->start_element)(obj, HTML_A, present, value, 0); + (*obj->isa->start_element)(obj, HTML_A, present, value, -1, 0); } PUBLIC void HTStartIsIndex ARGS3( @@ -1823,5 +1831,5 @@ PUBLIC void HTStartIsIndex ARGS3( value[HTML_ISINDEX_HREF] = (CONST char *)href; } - (*obj->isa->start_element)(obj, HTML_ISINDEX , present, value, 0); + (*obj->isa->start_element)(obj, HTML_ISINDEX , present, value, -1, 0); } diff --git a/WWW/Library/Implementation/HTMLDTD.h b/WWW/Library/Implementation/HTMLDTD.h index 98816060..1e5646fd 100644 --- a/WWW/Library/Implementation/HTMLDTD.h +++ b/WWW/Library/Implementation/HTMLDTD.h @@ -178,7 +178,7 @@ Attribute numbers */ #define HTML_A_ACCESSKEY 0 -#define HTML_A_CHARSET 1 /* i18n draft, added tentatively - kw */ +#define HTML_A_CHARSET 1 /* RFC 2070 HTML i18n - kw */ #define HTML_A_CLASS 2 #define HTML_A_CLEAR 3 #define HTML_A_COORDS 4 @@ -455,21 +455,22 @@ Attribute numbers #define HTML_FONT_STYLE 9 #define HTML_FONT_ATTRIBUTES 10 -#define HTML_FORM_ACTION 0 -#define HTML_FORM_CLASS 1 -#define HTML_FORM_CLEAR 2 -#define HTML_FORM_DIR 3 -#define HTML_FORM_ENCTYPE 4 -#define HTML_FORM_ID 5 -#define HTML_FORM_LANG 6 -#define HTML_FORM_METHOD 7 -#define HTML_FORM_ONSUBMIT 8 -#define HTML_FORM_SCRIPT 9 -#define HTML_FORM_STYLE 10 -#define HTML_FORM_SUBJECT 11 -#define HTML_FORM_TARGET 12 -#define HTML_FORM_TITLE 13 -#define HTML_FORM_ATTRIBUTES 14 +#define HTML_FORM_ACCEPT_CHARSET 0 /* HTML 4.0 draft - kw */ +#define HTML_FORM_ACTION 1 +#define HTML_FORM_CLASS 2 +#define HTML_FORM_CLEAR 3 +#define HTML_FORM_DIR 4 +#define HTML_FORM_ENCTYPE 5 +#define HTML_FORM_ID 6 +#define HTML_FORM_LANG 7 +#define HTML_FORM_METHOD 8 +#define HTML_FORM_ONSUBMIT 9 +#define HTML_FORM_SCRIPT 10 +#define HTML_FORM_STYLE 11 +#define HTML_FORM_SUBJECT 12 +#define HTML_FORM_TARGET 13 +#define HTML_FORM_TITLE 14 +#define HTML_FORM_ATTRIBUTES 15 #define HTML_FRAME_ID 0 #define HTML_FRAME_MARGINHEIGHT 1 @@ -557,37 +558,38 @@ Attribute numbers #define HTML_IMG_ATTRIBUTES 18 #define HTML_INPUT_ACCEPT 0 -#define HTML_INPUT_ALIGN 1 -#define HTML_INPUT_ALT 2 -#define HTML_INPUT_CHECKED 3 -#define HTML_INPUT_CLASS 4 -#define HTML_INPUT_CLEAR 5 -#define HTML_INPUT_DIR 6 -#define HTML_INPUT_DISABLED 7 -#define HTML_INPUT_ERROR 8 -#define HTML_INPUT_HEIGHT 9 -#define HTML_INPUT_ID 10 -#define HTML_INPUT_LANG 11 -#define HTML_INPUT_MAX 12 -#define HTML_INPUT_MAXLENGTH 13 -#define HTML_INPUT_MD 14 -#define HTML_INPUT_MIN 15 -#define HTML_INPUT_NAME 16 -#define HTML_INPUT_NOTAB 17 -#define HTML_INPUT_ONBLUR 18 -#define HTML_INPUT_ONCHANGE 19 -#define HTML_INPUT_ONCLICK 20 -#define HTML_INPUT_ONFOCUS 21 -#define HTML_INPUT_ONSELECT 22 -#define HTML_INPUT_SIZE 23 -#define HTML_INPUT_SRC 24 -#define HTML_INPUT_STYLE 25 -#define HTML_INPUT_TABINDEX 26 -#define HTML_INPUT_TITLE 27 -#define HTML_INPUT_TYPE 28 -#define HTML_INPUT_VALUE 29 -#define HTML_INPUT_WIDTH 30 -#define HTML_INPUT_ATTRIBUTES 31 +#define HTML_INPUT_ACCEPT_CHARSET 1 /* RFC 2070 HTML i18n - kw */ +#define HTML_INPUT_ALIGN 2 +#define HTML_INPUT_ALT 3 +#define HTML_INPUT_CHECKED 4 +#define HTML_INPUT_CLASS 5 +#define HTML_INPUT_CLEAR 6 +#define HTML_INPUT_DIR 7 +#define HTML_INPUT_DISABLED 8 +#define HTML_INPUT_ERROR 9 +#define HTML_INPUT_HEIGHT 10 +#define HTML_INPUT_ID 11 +#define HTML_INPUT_LANG 12 +#define HTML_INPUT_MAX 13 +#define HTML_INPUT_MAXLENGTH 14 +#define HTML_INPUT_MD 15 +#define HTML_INPUT_MIN 16 +#define HTML_INPUT_NAME 17 +#define HTML_INPUT_NOTAB 18 +#define HTML_INPUT_ONBLUR 19 +#define HTML_INPUT_ONCHANGE 20 +#define HTML_INPUT_ONCLICK 21 +#define HTML_INPUT_ONFOCUS 22 +#define HTML_INPUT_ONSELECT 23 +#define HTML_INPUT_SIZE 24 +#define HTML_INPUT_SRC 25 +#define HTML_INPUT_STYLE 26 +#define HTML_INPUT_TABINDEX 27 +#define HTML_INPUT_TITLE 28 +#define HTML_INPUT_TYPE 29 +#define HTML_INPUT_VALUE 30 +#define HTML_INPUT_WIDTH 31 +#define HTML_INPUT_ATTRIBUTES 32 #define HTML_ISINDEX_ACTION 0 /* Treat as synonym for HREF. - FM */ #define HTML_ISINDEX_DIR 1 @@ -646,17 +648,18 @@ Attribute numbers #define HTML_LI_VALUE 12 #define HTML_LI_ATTRIBUTES 13 -#define HTML_LINK_CLASS 0 -#define HTML_LINK_HREF 1 -#define HTML_LINK_ID 2 -#define HTML_LINK_MEDIA 3 -#define HTML_LINK_REL 4 -#define HTML_LINK_REV 5 -#define HTML_LINK_STYLE 6 -#define HTML_LINK_TARGET 7 -#define HTML_LINK_TITLE 8 -#define HTML_LINK_TYPE 9 -#define HTML_LINK_ATTRIBUTES 10 +#define HTML_LINK_CHARSET 0 /* RFC 2070 HTML i18n - kw */ +#define HTML_LINK_CLASS 1 +#define HTML_LINK_HREF 2 +#define HTML_LINK_ID 3 +#define HTML_LINK_MEDIA 4 +#define HTML_LINK_REL 5 +#define HTML_LINK_REV 6 +#define HTML_LINK_STYLE 7 +#define HTML_LINK_TARGET 8 +#define HTML_LINK_TITLE 9 +#define HTML_LINK_TYPE 10 +#define HTML_LINK_ATTRIBUTES 11 #define HTML_MAP_CLASS 0 #define HTML_MAP_CLEAR 1 @@ -897,26 +900,27 @@ Attribute numbers #define HTML_TD_VALIGN 16 #define HTML_TD_ATTRIBUTES 17 -#define HTML_TEXTAREA_ALIGN 0 -#define HTML_TEXTAREA_CLASS 1 -#define HTML_TEXTAREA_CLEAR 2 -#define HTML_TEXTAREA_COLS 3 -#define HTML_TEXTAREA_DIR 4 -#define HTML_TEXTAREA_DISABLED 5 -#define HTML_TEXTAREA_ERROR 6 -#define HTML_TEXTAREA_ID 7 -#define HTML_TEXTAREA_LANG 8 -#define HTML_TEXTAREA_NAME 9 -#define HTML_TEXTAREA_NOTAB 10 -#define HTML_TEXTAREA_ONBLUR 11 -#define HTML_TEXTAREA_ONCHANGE 12 -#define HTML_TEXTAREA_ONFOCUS 13 -#define HTML_TEXTAREA_ONSELECT 14 -#define HTML_TEXTAREA_ROWS 15 -#define HTML_TEXTAREA_STYLE 16 -#define HTML_TEXTAREA_TABINDEX 17 -#define HTML_TEXTAREA_TITLE 18 -#define HTML_TEXTAREA_ATTRIBUTES 19 +#define HTML_TEXTAREA_ACCEPT_CHARSET 0 /* RFC 2070 HTML i18n - kw */ +#define HTML_TEXTAREA_ALIGN 1 +#define HTML_TEXTAREA_CLASS 2 +#define HTML_TEXTAREA_CLEAR 3 +#define HTML_TEXTAREA_COLS 4 +#define HTML_TEXTAREA_DIR 5 +#define HTML_TEXTAREA_DISABLED 6 +#define HTML_TEXTAREA_ERROR 7 +#define HTML_TEXTAREA_ID 8 +#define HTML_TEXTAREA_LANG 9 +#define HTML_TEXTAREA_NAME 10 +#define HTML_TEXTAREA_NOTAB 11 +#define HTML_TEXTAREA_ONBLUR 12 +#define HTML_TEXTAREA_ONCHANGE 13 +#define HTML_TEXTAREA_ONFOCUS 14 +#define HTML_TEXTAREA_ONSELECT 15 +#define HTML_TEXTAREA_ROWS 16 +#define HTML_TEXTAREA_STYLE 17 +#define HTML_TEXTAREA_TABINDEX 18 +#define HTML_TEXTAREA_TITLE 19 +#define HTML_TEXTAREA_ATTRIBUTES 20 #define HTML_TR_ALIGN 0 #define HTML_TR_CHAR 1 @@ -954,6 +958,8 @@ extern CONST SGML_dtd HTML_dtd; extern void HTSwitchDTD PARAMS(( BOOL new)); +extern CONST HTTag HTTag_unrecognized; + /* Start anchor element diff --git a/WWW/Library/Implementation/HTMLGen.c b/WWW/Library/Implementation/HTMLGen.c index e3ca3e9d..ef5c2faa 100644 --- a/WWW/Library/Implementation/HTMLGen.c +++ b/WWW/Library/Implementation/HTMLGen.c @@ -66,7 +66,8 @@ struct _HTStructured { ** ------------ */ -PRIVATE void flush_breaks (HTStructured * me) +PRIVATE void flush_breaks ARGS1( + HTStructured *, me) { int i; for (i=0; i<= MAX_CLEANNESS; i++) { @@ -91,7 +92,10 @@ PRIVATE void HTMLGen_flush ARGS1( ** We keep track of all the breaks for when we chop the line */ -PRIVATE void allow_break (HTStructured * me, int new_cleanness, BOOL dlbc) +PRIVATE void allow_break ARGS3( + HTStructured *, me, + int, new_cleanness, + BOOL, dlbc) { if (dlbc && me->write_pointer == me->buffer) dlbc = NO; me->line_break[new_cleanness] = @@ -254,11 +258,12 @@ PRIVATE void HTMLGen_write ARGS3( ** Within the opening tag, there may be spaces ** and the line may be broken at these spaces. */ -PRIVATE void HTMLGen_start_element ARGS5( +PRIVATE void HTMLGen_start_element ARGS6( HTStructured *, me, int, element_number, CONST BOOL*, present, CONST char **, value, + int, charset, char **, insert) { int i; @@ -433,6 +438,7 @@ PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */ */ extern int LYcols; /* LYCurses.h, set in LYMain.c */ extern BOOL dump_output_immediately; /* TRUE if no interactive user */ +extern int dump_output_width; /* -width instead of 80 */ extern BOOLEAN LYPreparsedSource; /* Show source as preparsed? */ PUBLIC HTStructured * HTMLGenerator ARGS1( @@ -460,17 +466,19 @@ PUBLIC HTStructured * HTMLGenerator ARGS1( */ if (!LYPreparsedSource) { me->buffer_maxchars = 80; /* work as before - kw */ + } else if (dump_output_width > 1) { + me->buffer_maxchars = dump_output_width; /* try to honor -width - kw */ } else if (dump_output_immediately) { - me->buffer_maxchars = 80; /* work as before - kw */ + me->buffer_maxchars = 80; /* try to honor -width - kw */ } else { me->buffer_maxchars = LYcols - 2; if (me->buffer_maxchars < 38) /* too narrow, let GridText deal */ me->buffer_maxchars = 40; - if (me->buffer_maxchars > 900) /* likely not true - kw */ - me->buffer_maxchars = 78; - if (me->buffer_maxchars > BUFFER_SIZE) /* must not be larger! */ - me->buffer_maxchars = BUFFER_SIZE - 2; } + if (me->buffer_maxchars > 900) /* likely not true - kw */ + me->buffer_maxchars = 78; + if (me->buffer_maxchars > BUFFER_SIZE) /* must not be larger! */ + me->buffer_maxchars = BUFFER_SIZE - 2; /* * If dump_output_immediately is set, there likely isn't anything @@ -513,10 +521,10 @@ PUBLIC HTStream* HTPlainToHTML ARGS3( HTParentAnchor *, anchor, HTStream *, sink) { - HTStructured* me = (HTStructured*)malloc(sizeof(*me)); + HTStructured *me = (HTStructured *)malloc(sizeof(*me)); if (me == NULL) outofmem(__FILE__, "PlainToHTML"); - me->isa = (CONST HTStructuredClass*) &PlainToHTMLConversion; + me->isa = (CONST HTStructuredClass *)&PlainToHTMLConversion; /* * Copy pointers to routines for speed. @@ -528,8 +536,10 @@ PUBLIC HTStream* HTPlainToHTML ARGS3( me->cleanness = 0; me->overflowed = NO; me->delete_line_break_char[0] = NO; - me->buffer_maxchars = 80; - + /* try to honor -width - kw */ + me->buffer_maxchars = (dump_output_width > 1 ? + dump_output_width : 80); + HTMLGen_put_string(me, "<HTML>\n<BODY>\n<PRE>\n"); me->preformatted = YES; me->escape_specials = NO; diff --git a/WWW/Library/Implementation/HTNews.c b/WWW/Library/Implementation/HTNews.c index bcf2993e..bfd64e5a 100644 --- a/WWW/Library/Implementation/HTNews.c +++ b/WWW/Library/Implementation/HTNews.c @@ -82,7 +82,7 @@ PRIVATE HTList *NNTP_AuthInfo = NULL; /* AUTHINFO database */ #define PUTC(c) (*targetClass.put_character)(target, c) #define PUTS(s) (*targetClass.put_string)(target, s) -#define START(e) (*targetClass.start_element)(target, e, 0, 0, 0) +#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0) #define END(e) (*targetClass.end_element)(target, e, 0) #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \ (*targetClass.end_element)(target, e, 0) @@ -639,7 +639,7 @@ PRIVATE void start_anchor ARGS1(CONST char *, href) } ((CONST char **)value)[HTML_A_HREF] = href; (*targetClass.start_element)(target, HTML_A , present, - (CONST char **)value, 0); + (CONST char **)value, -1, 0); } /* Start link element @@ -658,7 +658,7 @@ PRIVATE void start_link ARGS2(CONST char *, href, CONST char *, rev) ((CONST char **)value)[HTML_LINK_HREF] = href; ((CONST char **)value)[HTML_LINK_REV] = rev; (*targetClass.start_element)(target, HTML_LINK, present, - (CONST char **)value, 0); + (CONST char **)value, -1, 0); } /* Start list element @@ -677,7 +677,7 @@ PRIVATE void start_list ARGS1(int, seqnum) ((CONST char **)value)[HTML_OL_SEQNUM] = SeqNum; ((CONST char **)value)[HTML_OL_START] = SeqNum; (*targetClass.start_element)(target, HTML_OL , present, - (CONST char **)value, 0); + (CONST char **)value, -1, 0); } /* Paste in an Anchor @@ -1010,6 +1010,9 @@ PRIVATE int read_article NOARGS HTmmdecode(subject, subject); HTrjis(subject, subject); } +#ifdef NOTUSED_CHARTRANS + else HTmmdecode(subject, subject); +#endif } else if (match(full_line, "DATE:")) { StrAllocCopy(date, HTStrip(strchr(full_line,':')+1)); @@ -1021,6 +1024,9 @@ PRIVATE int read_article NOARGS HTmmdecode(organization, organization); HTrjis(organization, organization); } +#ifdef NOTUSED_CHARTRANS + else HTmmdecode(organization, organization); +#endif } else if (match(full_line, "FROM:")) { StrAllocCopy(from, HTStrip(strchr(full_line,':')+1)); @@ -1028,6 +1034,9 @@ PRIVATE int read_article NOARGS HTmmdecode(from, from); HTrjis(from, from); } +#ifdef NOTUSED_CHARTRANS + else HTmmdecode(from, from); +#endif } else if (match(full_line, "REPLY-TO:")) { StrAllocCopy(replyto, HTStrip(strchr(full_line,':')+1)); @@ -1035,6 +1044,9 @@ PRIVATE int read_article NOARGS HTmmdecode(replyto, replyto); HTrjis(replyto, replyto); } +#ifdef NOTUSED_CHARTRANS + else HTmmdecode(replyto, replyto); +#endif } else if (match(full_line, "NEWSGROUPS:")) { StrAllocCopy(newsgroups, HTStrip(strchr(full_line,':')+1)); @@ -1750,6 +1762,11 @@ PRIVATE int read_group ARGS3( case 's': if (match(line, "SUBJECT:")) { strcpy(subject, line+9);/* Save subject */ +#ifdef NOTUSED_CHARTRANS + HTmmdecode(subject, subject); + if (HTCJK == JAPANESE) + HTrjis(subject, subject); +#endif if (HTCJK == JAPANESE) { HTmmdecode(subject, subject); HTrjis(subject, subject); @@ -1772,6 +1789,11 @@ PRIVATE int read_group ARGS3( char * p; strcpy(author, author_name(strchr(line,':')+1)); +#ifdef NOTUSED_CHARTRANS + HTmmdecode(author, author); + if (HTCJK == JAPANESE) + HTrjis(author, author); +#endif if (HTCJK == JAPANESE) { HTmmdecode(author, author); HTrjis(author, author); diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c index 350107c2..e69b77ad 100644 --- a/WWW/Library/Implementation/HTParse.c +++ b/WWW/Library/Implementation/HTParse.c @@ -594,7 +594,7 @@ PRIVATE CONST unsigned char isAcceptable[96] = 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */ 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */ 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */ - 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */ + 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{|}~ DEL */ PRIVATE char *hex = "0123456789ABCDEF"; #define ACCEPTABLE(a) ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask)) @@ -749,3 +749,63 @@ PUBLIC char * HTUnEscapeSome ARGS2( return str; } /* HTUnEscapeSome */ + +PRIVATE CONST unsigned char crfc[96] = + +/* Bit 0 xalpha -- need "quoting" +** Bit 1 xpalpha -- need \escape if quoted +*/ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + { 1,0,3,0,0,0,0,0,1,1,0,0,1,0,1,0, /* 2x !"#$%&'()*+,-./ */ + 0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0, /* 3x 0123456789:;<=>? */ + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4x @ABCDEFGHIJKLMNO */ + 0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0, /* 5X PQRSTUVWXYZ[\]^_ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 6x `abcdefghijklmno */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3 }; /* 7X pqrstuvwxyz{|}~ DEL */ + +PUBLIC void HTMake822Word ARGS1( + char **, str) +{ + CONST char * p; + char * q; + char * result; + unsigned char a; + int added = 0; + if (!(*str) || !(**str)) { + StrAllocCopy(*str, "\"\""); + return; + } + for (p = *str; *p; p++) { + a = *p; + if (a < 32 || a >= 128 || + ((crfc[a-32]) & 1)) { + if (!added) + added = 2; + if (a >= 160 || a == '\t') + continue; + if (a == '\r' || a == '\n') + added += 2; + else if ((a & 127) < 32 || ((crfc[a-32]) & 2)) + added++; + } + } + if (!added) + return; + result = (char *) malloc(p-(*str) + added + 1); + if (result == NULL) + outofmem(__FILE__, "HTMake822Word"); + result[0] = '"'; + for (q = result + 1, p = *str; *p; p++) { + a = TOASCII(*p); + if ((a != '\t') && ((a & 127) < 32 || + ( a < 128 && ((crfc[a-32]) & 2)))) + *q++ = '\''; + *q++ = *p; + if (a == '\n' || (a == '\r' && (TOASCII(*(p+1)) != '\n'))) + *q++ = ' '; + } + *q++ = '"'; + *q++ = '\0'; /* Terminate */ + FREE(*str); + *str = result; +} diff --git a/WWW/Library/Implementation/HTParse.h b/WWW/Library/Implementation/HTParse.h index 2e01c271..63c84739 100644 --- a/WWW/Library/Implementation/HTParse.h +++ b/WWW/Library/Implementation/HTParse.h @@ -154,6 +154,10 @@ extern char * HTUnEscapeSome PARAMS(( char * str, CONST char * do_trans)); +/* Turn a string which is not a RFC 822 token into a quoted-string - kw */ +extern void HTMake822Word PARAMS(( + char ** str)); + #endif /* HTPARSE_H */ /* diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c index a76616e5..c95f55a8 100644 --- a/WWW/Library/Implementation/HTPlain.c +++ b/WWW/Library/Implementation/HTPlain.c @@ -34,6 +34,7 @@ extern CONST char * LYchar_set_names[]; extern CONST char **LYCharSets[]; #ifdef EXP_CHARTRANS extern int LYlowest_eightbit[]; +extern BOOLEAN LYRawMode; #endif /* EXP_CHARTRANS */ extern CONST char * HTMLGetEntityName PARAMS((int i)); extern BOOL HTPassEightBitRaw; @@ -361,16 +362,6 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) */ } else if (unsign_c == 173) { continue; -#ifdef EXP_CHARTRANS - } else if (me->T.strip_raw_char_in && - (unsigned char)*p >= 0xc0 && - (unsigned char)*p < 255) { - /* - ** KOI special: strip high bit, gives - ** (somewhat) readable ASCII. - */ - HText_appendCharacter(me->text, (char)(*p & 0x7f)); -#endif /* EXP_CHARTRANS */ /* ** If we get to here, pass the displayable ASCII characters. - FM */ @@ -419,9 +410,22 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) HText_appendText(me->text, me->utf_buf); me->utf_buf_p = me->utf_buf; *(me->utf_buf_p) = '\0'; + } else if (me->T.strip_raw_char_in && + (unsigned char)*p >= 0xc0 && + (unsigned char)*p < 255) { + /* + ** KOI special: strip high bit, gives + ** (somewhat) readable ASCII. + */ + HText_appendCharacter(me->text, (char)(*p & 0x7f)); } else if (me->T.trans_from_uni && unsign_c > 255) { - sprintf(replace_buf, "U%.2lx", unsign_c); - HText_appendText(me->text, replace_buf); + if (PASSHI8BIT && PASSHICTRL && LYRawMode && + (unsigned char)*p >= LYlowest_eightbit[me->htext_char_set]) { + HText_appendCharacter(me->text, *p); + } else { + sprintf(replace_buf, "U%.2lx", unsign_c); + HText_appendText(me->text, replace_buf); + } #endif /* EXP_CHARTRANS */ /* @@ -493,6 +497,12 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) me->utf_buf_p = me->utf_buf; *(me->utf_buf_p) = '\0'; + } else if (LYRawMode && + me->in_char_set != me->htext_char_set && + (PASSHI8BIT || PASSHICTRL) && + (unsigned char)c_p >= + LYlowest_eightbit[me->htext_char_set]) { + HText_appendCharacter(me->text, c_p); } else if (me->T.trans_from_uni && unsign_c >= 127) { sprintf(replace_buf,"U%.2lx",unsign_c); HText_appendText(me->text, replace_buf); diff --git a/WWW/Library/Implementation/HTTP.c b/WWW/Library/Implementation/HTTP.c index 9c08a170..f4009328 100644 --- a/WWW/Library/Implementation/HTTP.c +++ b/WWW/Library/Implementation/HTTP.c @@ -317,9 +317,9 @@ try_again: for (i = 0; line[i]; i++) line[i] = TOLOWER(line[i]); if (strstr(line, "iso-8859-1") == NULL) - strcat(line, ", iso-8859-1;q=0.001"); + strcat(line, ", iso-8859-1;q=0.01"); if (strstr(line, "us-ascii") == NULL) - strcat(line, ", us-ascii;q=0.001"); + strcat(line, ", us-ascii;q=0.01"); StrAllocCat(command, line); sprintf(line, "%c%c", CR, LF); StrAllocCat(command, line); @@ -830,7 +830,7 @@ try_again: if (TRACE) fprintf (stderr, "--- Talking HTTP0.\n"); - format_in = HTFileFormat(url, &encoding); + format_in = HTFileFormat(url, &encoding, NULL); /* ** Treat all plain text as HTML. ** This sucks but its the only solution without @@ -842,6 +842,23 @@ try_again: "HTTP: format_in being changed to text/HTML\n"); format_in = WWW_HTML; } + if (!IsUnityEnc(encoding)) { + /* + ** Change the format to that for "www/compressed". + */ + if (TRACE) { + fprintf(stderr, + "HTTP: format_in is '%s',\n", HTAtom_name(format_in)); + } + StrAllocCopy(anAnchor->content_type, HTAtom_name(format_in)); + StrAllocCopy(anAnchor->content_encoding, HTAtom_name(encoding)); + format_in = HTAtom_for("www/compressed"); + if (TRACE) { + fprintf(stderr, + " Treating as '%s' with encoding '%s'\n", + "www/compressed", HTAtom_name(encoding)); + } + } start_of_data = line_kept_clean; } else { @@ -893,6 +910,14 @@ try_again: /* ** Good: Got MIME object! (Successful) - FM */ + if (do_head) { + /* + * If HEAD was requested, show headers (and possibly + * bogus body) for all 2xx status codes as text/plain - KW + */ + HTProgress(line_buffer); + break; + } switch (server_status) { case 204: /* diff --git a/WWW/Library/Implementation/HTUtils.h b/WWW/Library/Implementation/HTUtils.h index a0c93655..ec7265fb 100644 --- a/WWW/Library/Implementation/HTUtils.h +++ b/WWW/Library/Implementation/HTUtils.h @@ -21,6 +21,10 @@ /* Explicit system-configure */ #ifdef VMS #define NO_SIZECHANGE +#define NO_UNISTD_H +#define EXP_CHARTRANS +#define NO_KEYPAD +#define NO_UTMP #endif /* FIXME: these will be removed after completing auto-configure script */ diff --git a/WWW/Library/Implementation/HTVMSUtils.c b/WWW/Library/Implementation/HTVMSUtils.c index 495c71ca..118f9517 100644 --- a/WWW/Library/Implementation/HTVMSUtils.c +++ b/WWW/Library/Implementation/HTVMSUtils.c @@ -711,7 +711,7 @@ long status; #include "HTML.h" #define PUTC(c) (*targetClass.put_character)(target, c) #define PUTS(s) (*targetClass.put_string)(target, s) -#define START(e) (*targetClass.start_element)(target, e, 0, 0, 0) +#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0) #define END(e) (*targetClass.end_element)(target, e, 0) #define FREE_TARGET (*targetClass._free)(target) #define ABORT_TARGET (*targetClass._free)(target) @@ -1098,15 +1098,18 @@ PUBLIC int HTVMSBrowseDir ARGS4( entry_info->display = TRUE; /* Get the type */ - format = HTFileFormat(dirbuf->d_name, &encoding); - if(!strncmp(HTAtom_name(format), "application",11)) - { - cp = HTAtom_name(format) + 12; - if(!strncmp(cp,"x-", 2)) + format = HTFileFormat(dirbuf->d_name, &encoding, + (CONST char **)&cp); + if (!cp) { + if(!strncmp(HTAtom_name(format), "application",11)) + { + cp = HTAtom_name(format) + 12; + if(!strncmp(cp,"x-", 2)) cp += 2; - } - else - cp = HTAtom_name(format); + } + else + cp = HTAtom_name(format); + } StrAllocCopy(entry_info->type, cp); StrAllocCopy(entry_info->filename, dirbuf->d_name); diff --git a/WWW/Library/Implementation/HTWAIS.c b/WWW/Library/Implementation/HTWAIS.c index 2bc4c4f6..bd293c2f 100644 --- a/WWW/Library/Implementation/HTWAIS.c +++ b/WWW/Library/Implementation/HTWAIS.c @@ -105,7 +105,7 @@ PRIVATE char line[2048]; /* For building strings to display */ #define PUTC(c) (*target->isa->put_character)(target, c) #define PUTS(s) (*target->isa->put_string)(target, s) -#define START(e) (*target->isa->start_element)(target, e, 0, 0, 0) +#define START(e) (*target->isa->start_element)(target, e, 0, 0, -1, 0) #define END(e) (*target->isa->end_element)(target, e, 0) #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \ (*target->isa->end_element)(target, e, 0) diff --git a/WWW/Library/Implementation/HTWSRC.c b/WWW/Library/Implementation/HTWSRC.c index 3d3647b3..731ac5c7 100644 --- a/WWW/Library/Implementation/HTWSRC.c +++ b/WWW/Library/Implementation/HTWSRC.c @@ -37,7 +37,7 @@ struct _HTStructured { #define PUTC(c) (*me->target->isa->put_character)(me->target, c) #define PUTS(s) (*me->target->isa->put_string)(me->target, s) -#define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0, 0) +#define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0, -1, 0) #define END(e) (*me->target->isa->end_element)(me->target, e, 0) #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \ (*me->target->isa->end_element)(me->target, e, 0) diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index e1f56166..51434a66 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -80,6 +80,7 @@ struct _HTStream { HTStructured *target; /* target object */ HTTag *current_tag; + CONST HTTag *unknown_tag; int current_attribute_number; HTChunk *string; HTElement *element_stack; @@ -112,7 +113,7 @@ struct _HTStream { #ifdef EXP_CHARTRANS HTParentAnchor * node_anchor; - LYUCcharset * UCI; /* anchor UCInfo */ + LYUCcharset * UCI; /* pointer to anchor UCInfo */ int in_char_set; /* charset we are fed */ LYUCcharset * htmlUCI; /* anchor UCInfo for target */ int html_char_set; /* feed it to target stream */ @@ -121,6 +122,7 @@ struct _HTStream { char utf_buf[7]; char * utf_buf_p; UCTransParams T; + int current_tag_charset; /* charset to pass attributes */ #endif /* EXP_CHARTRANS */ char * recover; @@ -159,6 +161,20 @@ PRIVATE void set_chartrans_handling ARGS3( UCSetTransParams(&context->T, context->in_char_set, context->UCI, context->html_char_set, context->htmlUCI); + if (HTCJK != NOCJK) { + context->current_tag_charset = -1; + } else if (context->T.transp) { + context->current_tag_charset = context->in_char_set; + } else if (context->T.decode_utf8) { + context->current_tag_charset = context->in_char_set; + } else if (context->T.do_8bitraw || + context->T.use_raw_char_in) { + context->current_tag_charset = context->in_char_set; + } else if (context->T.trans_from_uni || context->T.output_utf8) { + context->current_tag_charset = UCGetLYhndl_byMIME("unicode-1-1-utf-8"); + } else { + context->current_tag_charset = 0; + } } PRIVATE void change_chartrans_handling ARGS1( @@ -222,6 +238,10 @@ PRIVATE void handle_attribute_name ARGS2( attr * attributes = tag->attributes; int high, low, i, diff; /* Binary search for attribute name */ + if (tag == context->unknown_tag) { + return; + } + for (low = 0, high = tag->number_of_attributes; high > low; diff < 0 ? (low = i+1) : (high = i)) { @@ -409,16 +429,16 @@ PRIVATE void handle_entity ARGS2( return; } else if ((rc == -4) && /* Not found; look for replacement string */ - (rc = UCTransUniCharStr(replace_buf,60, - extra_entities[i].code, - current_char_set, 0) >= 0 ) ) { - CONST char *p; - for (p=replace_buf; *p; p++) - PUTC(*p); - FoundEntity = TRUE; - return; - } - rc = (*context->actions->put_entity)(context->target, + (rc = UCTransUniCharStr(replace_buf, 60, + extra_entities[i].code, + current_char_set, 0) >= 0)) { + CONST char *p; + for (p = replace_buf; *p; p++) + PUTC(*p); + FoundEntity = TRUE; + return; + } + rc = (*context->actions->put_entity)(context->target, i+context->dtd->number_of_entities); if (rc != HT_CANNOT_TRANSLATE) { FoundEntity = TRUE; @@ -580,7 +600,7 @@ extern BOOL New_DTD; typedef enum { close_NO = 0, close_error = 1, - close_valid = 2, + close_valid = 2 } canclose_t; PRIVATE canclose_t can_close ARGS2( @@ -811,6 +831,7 @@ PRIVATE void start_element ARGS1( new_tag - context->dtd->tags, context->present, (CONST char**) context->value, /* coerce type for think c */ + context->current_tag_charset, (char **)&context->include); if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */ HTElement * N = (HTElement *)malloc(sizeof(HTElement)); @@ -821,7 +842,10 @@ PRIVATE void start_element ARGS1( context->element_stack = N; } #ifdef EXP_CHARTRANS - else { /* check for result of META tag. */ + else if (!strcasecomp(new_tag->name, "META")) { + /* + ** Check for result of META tag. - KW & FM + */ change_chartrans_handling(context); } #endif /* EXP_CHARTRANS */ @@ -854,6 +878,10 @@ PUBLIC HTTag * SGMLFindTag ARGS2( return &dtd->tags[i]; } } + if (isalpha((unsigned char)string[0])) { + /* unrecognized, but may be valid - kw */ + return (HTTag *)&HTTag_unrecognized; + } return NULL; } @@ -977,7 +1005,7 @@ PUBLIC void SGML_character ARGS2( HTChunk *string = context->string; CONST char * EntityName; extern int current_char_set; - extern CONST char *LYchar_set_names[]; + extern CONST char * LYchar_set_names[]; extern CONST char * HTMLGetEntityName PARAMS((int i)); #ifdef EXP_CHARTRANS @@ -1118,7 +1146,7 @@ PUBLIC void SGML_character ARGS2( c = replace_buf[0]; if (c && replace_buf[1]) { if (context->state == S_text) { - for (p=replace_buf; *p; p++) + for (p = replace_buf; *p; p++) PUTC(*p); return; } @@ -1272,7 +1300,7 @@ top1: /* ** Not found; look for replacement string. - KW */ - (uck = UCTransUniCharStr(replace_buf,60, clong, + (uck = UCTransUniCharStr(replace_buf, 60, clong, context->html_char_set, 0) >= 0)) { /* @@ -1401,7 +1429,8 @@ top1: ** Handle possible named entity. */ case S_entity: - if (unsign_c < 127 && isalnum((unsigned char)c)) { + if (unsign_c < 127 && (string->size ? + isalnum((unsigned char)c) : isalpha((unsigned char)c))) { /* ** Accept valid ASCII character. - FM */ @@ -1610,16 +1639,6 @@ top1: context->state = S_text; goto top1; } - } else if (value == 160) { - /* - ** Use Lynx special character for 160 (nbsp). - FM - */ - PUTC(HT_NON_BREAK_SPACE); - } else if (value == 173) { - /* - ** Use Lynx special character for 173 (shy) - FM - */ - PUTC(LY_SOFT_HYPHEN); } else if (value < 161 || HTPassEightBitNum || !strncmp(LYchar_set_names[current_char_set], "ISO Latin 1", 11)) { @@ -1712,7 +1731,8 @@ top1: ** Tag */ case S_tag: /* new tag */ - if (unsign_c < 127 && isalnum((unsigned char)c)) { + if (unsign_c < 127 && (string->size ? + isalnum((unsigned char)c) : isalpha((unsigned char)c))) { /* ** Add valid ASCII character. - FM */ @@ -1728,9 +1748,16 @@ top1: context->first_bracket = FALSE; HTChunkPutc(string, c); break; - } else if (!string->size && (WHITE(c) || c == '=')) {/* <WHITE or <= */ + } else if (!string->size && + (unsign_c <= 160 && + (c != '/' && c != '?' && c != '_' && c != ':'))) { /* - ** Recover the '<' and WHITE or '=' character. - FM & KW + ** '<' must be followed by an ASCII letter to be a valid + ** start tag. Here it isn't, nor do we have a '/' for an + ** end tag, nor one of some other characters with a + ** special meaning for SGML or which are likely to be legal + ** Name Start characters in XML or some other extension. + ** So recover the '<' and following character as data. - FM & KW */ context->state = S_text; PUTC('<'); @@ -1750,29 +1777,35 @@ top1: HTChunkTerminate(string) ; t = SGMLFindTag(dtd, string->data); - if (!t) { - if (c == ':' && 0 == strcasecomp(string->data, "URL")) { - /* - ** Treat <URL: as text rather than a junk tag, - ** so we display it and the URL (Lynxism 8-). - FM - */ - int i; - PUTC('<'); - for (i = 0; i < 3; i++) /* recover */ - PUTC(string->data[i]); - PUTC(c); - if (TRACE) - fprintf(stderr, "SGML: Treating <%s%c as text\n", - string->data, c); - string->size = 0; - context->state = S_text; - } else { - if (TRACE) - fprintf(stderr, "SGML: *** Unknown element %s\n", - string->data); - context->state = (c == '>') ? S_text : S_junk_tag; - } + if (t == context->unknown_tag && c == ':' && + 0 == strcasecomp(string->data, "URL")) { + /* + ** Treat <URL: as text rather than a junk tag, + ** so we display it and the URL (Lynxism 8-). - FM + */ + int i; + PUTC('<'); + for (i = 0; i < 3; i++) /* recover */ + PUTC(string->data[i]); + PUTC(c); + if (TRACE) + fprintf(stderr, "SGML: Treating <%s%c as text\n", + string->data, c); + string->size = 0; + context->state = S_text; break; + } else if (!t) { + if (TRACE) + fprintf(stderr, "SGML: *** Invalid element %s\n", + string->data); + context->state = (c == '>') ? S_text : S_junk_tag; + break; + } else if (t == context->unknown_tag) { + if (TRACE) + fprintf(stderr, "SGML: *** Unknown element %s\n", + string->data); + /* Fall through and treat like valid tag for attribute + parsing - kw */ } context->current_tag = t; @@ -2153,6 +2186,18 @@ top1: break; } else context->state = S_tag_gap; +#ifdef EXP_CHARTRANS + } else if (context->T.decode_utf8 && + *context->utf_buf) { + HTChunkPuts(string, context->utf_buf); + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + } else if (HTCJK == NOCJK && (context->T.output_utf8 || + context->T.trans_from_uni)) { + HTChunkPutUtf8Char(string, clong); + } else if (saved_char_in && context->T.use_raw_char_in) { + HTChunkPutc(string, saved_char_in); +#endif /* EXP_CHARTRANS */ } else { HTChunkPutc(string, c); } @@ -2171,6 +2216,18 @@ top1: */ context->state = S_esc_sq; HTChunkPutc(string, c); +#ifdef EXP_CHARTRANS + } else if (context->T.decode_utf8 && + *context->utf_buf) { + HTChunkPuts(string, context->utf_buf); + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + } else if (HTCJK == NOCJK && (context->T.output_utf8 || + context->T.trans_from_uni)) { + HTChunkPutUtf8Char(string, clong); + } else if (saved_char_in && context->T.use_raw_char_in) { + HTChunkPutc(string, saved_char_in); +#endif /* EXP_CHARTRANS */ } else { HTChunkPutc(string, c); } @@ -2193,6 +2250,18 @@ top1: */ context->state = S_esc_dq; HTChunkPutc(string, c); +#ifdef EXP_CHARTRANS + } else if (context->T.decode_utf8 && + *context->utf_buf) { + HTChunkPuts(string, context->utf_buf); + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + } else if (HTCJK == NOCJK && (context->T.output_utf8 || + context->T.trans_from_uni)) { + HTChunkPutUtf8Char(string, clong); + } else if (saved_char_in && context->T.use_raw_char_in) { + HTChunkPutc(string, saved_char_in); +#endif /* EXP_CHARTRANS */ } else { HTChunkPutc(string, c); } @@ -2210,7 +2279,7 @@ top1: } else { t = SGMLFindTag(dtd, string->data); } - if (!t) { + if (!t || t == context->unknown_tag) { if (TRACE) fprintf(stderr, "Unknown end tag </%s>\n", string->data); } else { @@ -2589,6 +2658,7 @@ PUBLIC HTStream* SGML_new ARGS3( context->target = target; context->actions = (HTStructuredClass*)(((HTStream*)target)->isa); /* Ugh: no OO */ + context->unknown_tag = &HTTag_unrecognized; context->state = S_text; context->element_stack = 0; /* empty */ #ifdef CALLERDATA diff --git a/WWW/Library/Implementation/SGML.h b/WWW/Library/Implementation/SGML.h index 42524cbe..a3ea248a 100644 --- a/WWW/Library/Implementation/SGML.h +++ b/WWW/Library/Implementation/SGML.h @@ -207,6 +207,7 @@ typedef struct _HTStructuredClass{ int element_number, CONST BOOL* attribute_present, CONST char** attribute_value, + int charset, char ** include)); void (*end_element) PARAMS(( diff --git a/WWW/Library/Implementation/UCAux.h b/WWW/Library/Implementation/UCAux.h index ac7a5439..ffe0a652 100644 --- a/WWW/Library/Implementation/UCAux.h +++ b/WWW/Library/Implementation/UCAux.h @@ -4,6 +4,7 @@ extern BOOL UCCanUniTranslateFrom PARAMS((int from)); extern BOOL UCCanTranslateUniTo PARAMS((int to)); extern BOOL UCCanTranslateFromTo PARAMS((int from, int to)); +extern BOOL UCNeedNotTranslate PARAMS((int from, int to)); struct _UCTransParams { diff --git a/WWW/Library/Implementation/UCDefs.h b/WWW/Library/Implementation/UCDefs.h index bbfdc33a..876fc075 100644 --- a/WWW/Library/Implementation/UCDefs.h +++ b/WWW/Library/Implementation/UCDefs.h @@ -57,7 +57,7 @@ typedef struct _LYUCcharset { #define UCT_R_8BIT UCT_R_LAT1 | UCT_R_HIGHCTRL /* full 8bit range */ /* - * For the following some coments are in HTAnchor.c. + * For the following some comments are in HTAnchor.c. */ #define UCT_STAGE_MIME 0 #define UCT_STAGE_PARSER 1 /* What the parser (SGML.c) gets to see */ @@ -67,9 +67,10 @@ typedef struct _LYUCcharset { #define UCT_SETBY_NONE 0 #define UCT_SETBY_DEFAULT 1 -#define UCT_SETBY_STRUCTURED 2 /* structured stream stage */ -#define UCT_SETBY_PARSER 3 /* set by SGML parser or similar */ -#define UCT_SETBY_MIME 4 /* set explicitly by MIME charset parameter */ +#define UCT_SETBY_LINK 2 /* set by A or LINK CHARSET= hint */ +#define UCT_SETBY_STRUCTURED 3 /* structured stream stage (HTML.c) */ +#define UCT_SETBY_PARSER 4 /* set by SGML parser or similar */ +#define UCT_SETBY_MIME 5 /* set explicitly by MIME charset parameter */ typedef struct _UCStageInfo { diff --git a/WWW/Library/Implementation/UCMap.h b/WWW/Library/Implementation/UCMap.h index de196752..017ebc92 100644 --- a/WWW/Library/Implementation/UCMap.h +++ b/WWW/Library/Implementation/UCMap.h @@ -17,6 +17,7 @@ extern int UCTransChar PARAMS(( char ch_in, int charset_in, int charset_out)); +PUBLIC int UCReverseTransChar PARAMS((char ch_out, int charset_in, int charset_out)); extern int UCTransCharStr PARAMS(( char * outbuf, int buflen, diff --git a/WWW/Library/Implementation/tcp.h b/WWW/Library/Implementation/tcp.h index e94bc0eb..d8c723fe 100644 --- a/WWW/Library/Implementation/tcp.h +++ b/WWW/Library/Implementation/tcp.h @@ -65,6 +65,7 @@ typedef struct sockaddr_in SockA; /* See netinet/in.h */ #define STDIO_H #endif /* !STDIO_H */ +#ifndef VMS #include <sys/types.h> #if HAVE_DIRENT_H @@ -84,7 +85,8 @@ typedef struct sockaddr_in SockA; /* See netinet/in.h */ # if HAVE_NDIR_H # include <ndir.h> # endif -#endif +#endif /* HAVE_DIRENT_H */ +#endif /* !VMS */ #if TIME_WITH_SYS_TIME # include <sys/time.h> diff --git a/WWW/Library/unix/Makefile b/WWW/Library/unix/Makefile index e894af92..5fea7dd7 100644 --- a/WWW/Library/unix/Makefile +++ b/WWW/Library/unix/Makefile @@ -8,7 +8,7 @@ WWW_MACH = unix ASIS_MACH = hardware/os -CFLAGS = -O -DDEBUG +CFLAGS = -g -DDEBUG LFLAGS = CC = cc diff --git a/WWW/Library/vms/descrip.mms b/WWW/Library/vms/descrip.mms index b178e538..d34fe347 100644 --- a/WWW/Library/vms/descrip.mms +++ b/WWW/Library/vms/descrip.mms @@ -176,9 +176,9 @@ CC = gcc ! HTVMSUtils.h, ufc-crypt.h, patchlevel.h MODULES = HTParse, HTAccess, HTTP, HTFile, HTBTree, HTFTP, HTTCP, HTString, - - SGML, HTMLDTD, HTChunk, HTPlain, HTWriter, HTFWriter, HTMLGen, - + SGML, HTMLDTD, HTChunk, HTPlain, HTWriter, HTMLGen, - HTAtom, HTAnchor, HTStyle, HTList, HTAlert, HTRules, HTFormat, - - HTInit, HTMIME, HTHistory, HTNews, HTGopher, HTTelnet, HTFinger, - + HTMIME, HTHistory, HTNews, HTGopher, HTTelnet, HTFinger, - HTWSRC, HTAAUtil, HTAABrow, HTAAServ, HTAAFile, HTPasswd, HTGroup, - HTACL, HTAuth, HTAAProt, HTAssoc, HTLex, HTUU, HTVMSUtils, getpass, - getline, crypt, crypt_util, HTWAIS, HTVMS_WaisUI, HTVMS_WaisProt @@ -218,11 +218,9 @@ clean : !HTMLDTD.obj : HTMLDTD.c HTMLDTD.h SGML.h !HTPlain.obj : HTPlain.c HTPlain.h HTStream.h !HTWriter.obj : HTWriter.c HTWriter.h HTStream.h -!HTFWriter.obj : HTFWriter.c HTFWriter.h HTStream.h !HTMLGen.obj : HTMLGen.c HTMLGen.h HTUtils.h HTMLDTD.h !HTAlert.obj : HTAlert.c HTAlert.h HTUtils.h Version.make !HTRules.obj : HTRules.c HTRules.h HTUtils.h Version.make -!HTInit.obj : HTInit.c HTInit.h HTUtils.h HTList.h !HTMIME.obj : HTMIME.c HTMIME.h HTUtils.h HTList.h !HTTelnet.obj : HTTelnet.c HTTelnet.h HTUtils.h !HTWAIS.obj : HTWAIS.c HTWAIS.h HTUtils.h HTList.h diff --git a/WWW/Library/vms/libmake.com b/WWW/Library/vms/libmake.com index 75cb23de..7d812cfd 100644 --- a/WWW/Library/vms/libmake.com +++ b/WWW/Library/vms/libmake.com @@ -137,7 +137,6 @@ $ cc [-.Implementation]HTMLDTD.c $ cc [-.Implementation]HTChunk.c $ cc [-.Implementation]HTPlain.c $ cc [-.Implementation]HTWriter.c -$ cc [-.Implementation]HTFWriter.c $ cc [-.Implementation]HTMLGen.c $ cc [-.Implementation]HTAtom.c $ cc [-.Implementation]HTAnchor.c @@ -146,7 +145,6 @@ $ cc [-.Implementation]HTList.c $ cc [-.Implementation]HTAlert.c $ cc [-.Implementation]HTRules.c $ cc [-.Implementation]HTFormat.c -$ cc [-.Implementation]HTInit.c $ cc [-.Implementation]HTMIME.c $ cc [-.Implementation]HTHistory.c $ cc [-.Implementation]HTNews.c |