diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 1997-10-17 18:00:00 -0400 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 1997-10-17 18:00:00 -0400 |
commit | cbcc3a1e1a82b01eea370bf7841e6b5f4d1e46c1 (patch) | |
tree | 2df907a422b75fb41590f113d3a0a3365bc667be /src | |
parent | 1d80538b4b84eadd223c7b61839b950389c2d49d (diff) | |
download | lynx-snapshots-cbcc3a1e1a82b01eea370bf7841e6b5f4d1e46c1.tar.gz |
snapshot of project "lynx", label v2-7-1ac_0-84
Diffstat (limited to 'src')
-rw-r--r-- | src/AttrList.h | 3 | ||||
-rw-r--r-- | src/GridText.c | 680 | ||||
-rw-r--r-- | src/GridText.h | 5 | ||||
-rw-r--r-- | src/HTFWriter.c | 2 | ||||
-rw-r--r-- | src/HTForms.h | 1 | ||||
-rw-r--r-- | src/HTInit.c | 66 | ||||
-rw-r--r-- | src/HTML.c | 70 | ||||
-rw-r--r-- | src/LYCharSets.c | 3 | ||||
-rw-r--r-- | src/LYCharUtils.c | 887 | ||||
-rw-r--r-- | src/LYCharUtils.h | 2 | ||||
-rw-r--r-- | src/LYCookie.c | 2 | ||||
-rw-r--r-- | src/LYCurses.c | 3 | ||||
-rw-r--r-- | src/LYEditmap.c | 2 | ||||
-rw-r--r-- | src/LYForms.c | 16 | ||||
-rw-r--r-- | src/LYGetFile.c | 15 | ||||
-rw-r--r-- | src/LYHash.c | 11 | ||||
-rw-r--r-- | src/LYHash.h | 16 | ||||
-rw-r--r-- | src/LYKeymap.c | 2 | ||||
-rw-r--r-- | src/LYLeaks.c | 77 | ||||
-rw-r--r-- | src/LYMain.c | 76 | ||||
-rw-r--r-- | src/LYMainLoop.c | 65 | ||||
-rw-r--r-- | src/LYOptions.c | 6 | ||||
-rw-r--r-- | src/LYStrings.c | 33 | ||||
-rw-r--r-- | src/LYStrings.h | 1 | ||||
-rw-r--r-- | src/LYStyle.c | 54 | ||||
-rw-r--r-- | src/LYUtils.c | 53 | ||||
-rw-r--r-- | src/LYUtils.h | 1 | ||||
-rw-r--r-- | src/LYexit.c | 2 | ||||
-rw-r--r-- | src/UCAux.c | 50 | ||||
-rw-r--r-- | src/chrtrans/def7_uni.tbl | 9 | ||||
-rw-r--r-- | src/chrtrans/makefile.dos | 92 |
31 files changed, 1317 insertions, 988 deletions
diff --git a/src/AttrList.h b/src/AttrList.h index d0a95c3c..bc5710db 100644 --- a/src/AttrList.h +++ b/src/AttrList.h @@ -48,10 +48,13 @@ typedef struct { int cattr; /* attributes to go with the color */ } HTCharStyle; +#ifdef NOT_USED + typedef struct _linkedlist { char name[64]; struct _linkedlist *next; } linked_list; +#endif #define HText_characterStyle if(TRACE)fprintf(stderr,"HTC called from %s/%d\n",__FILE__,__LINE__);_internal_HTC diff --git a/src/GridText.c b/src/GridText.c index fe0d5c73..f7dfd402 100644 --- a/src/GridText.c +++ b/src/GridText.c @@ -199,6 +199,7 @@ struct _HText { state; /* Escape sequence? */ char kanji_buf; /* Lead multibyte */ int in_sjis; /* SJIS flag */ + int halted; /* emergency halt */ BOOL have_8bit_chars; /* Any non-ASCII chars? */ #ifdef EXP_CHARTRANS @@ -245,6 +246,124 @@ PRIVATE int HText_TrueLineSize PARAMS(( HText * text, BOOL IgnoreSpaces)); +#ifndef VMS /* VMS has a better way - right? - kw */ +#define CHECK_FREE_MEM +#endif + +#ifdef CHECK_FREE_MEM + +/* + * text->halted = 1: have set fake 'Z' and output a message + * 2: next time when HText_appendCharacter is called + * it will append *** MEMORY EXHAUSTED ***, then set + * to 3. + * 3: normal text output will be suppressed (but not anchors, + * form fields etc.) + */ +PRIVATE void HText_halt NOARGS +{ + if (HTFormNumber > 0) + HText_DisableCurrentForm(); + if (!HTMainText) + return; + if (HTMainText->halted < 2) + HTMainText->halted = 2; +} + +#define MIN_NEEDED_MEM 5000 + +/* + * Check whether factor*min(bytes,MIN_NEEDED_MEM) is available, + * or bytes if factor is 0. + * MIN_NEEDED_MEM and factor together represent a security margin, + * to take account of all the memory allocations where we don't check + * and of buffers which may be emptied before HTCheckForInterupt() + * is (maybe) called and other things happening, with some chance of + * success. + * This just tries to malloc() the to-be-checked-for amount of memory, + * which might make the situation worse depending how allocation works. + * There should be a better way... - kw + */ +PRIVATE BOOL mem_is_avail ARGS2( + size_t, factor, + size_t, bytes) +{ + void *p; + if (bytes < MIN_NEEDED_MEM && factor > 0) + bytes = MIN_NEEDED_MEM; + if (factor == 0) + factor = 1; + p = malloc(factor * bytes); + if (p) { + FREE(p); + return YES; + } else { + return NO; + } +} + +/* + * Replacement for calloc which checks for "enough" free memory + * (with some security margins) and tries various recovery actions + * if deemed necessary. - kw + */ +PRIVATE void * LY_check_calloc ARGS2( + size_t, nmemb, + size_t, size) +{ + int i, n; + if (mem_is_avail(4, nmemb * size)) { + return (calloc(nmemb, size)); + } + n = HTList_count(loaded_texts); + for (i = n - 1; i > 0; i--) { + HText * t = HTList_objectAt(loaded_texts, i); + if (t == HTMainText) + t = NULL; /* shouldn't happen */ + if (TRACE) { + fprintf(stderr, + "\r *** Emergeny freeing document %d/%d for '%s'%s!\n", + i + 1, n, + ((t && t->node_anchor && + t->node_anchor->address) ? + t->node_anchor->address : "unknown anchor"), + ((t && t->node_anchor && + t->node_anchor->post_data) ? + " with POST data" : "")); + } + HTList_removeObjectAt(loaded_texts, i); + HText_free(t); + if (mem_is_avail(4, nmemb * size)) { + return (calloc(nmemb, size)); + } + } + LYFakeZap(YES); + if (!HTMainText || HTMainText->halted <= 1) { + if (!mem_is_avail(2, nmemb * size)) { + HText_halt(); + if (mem_is_avail(0, 700)) { + HTAlert("Memory exhausted, display interrupted!"); + } + } else { + if ((!HTMainText || HTMainText->halted == 0) && + mem_is_avail(0, 700)) { + HTAlert("Memory exhausted, will interrupt transfer!"); + if (HTMainText) + HTMainText->halted = 1; + } + } + } + return (calloc(nmemb, size)); +} + +#define LY_CALLOC LY_check_calloc + +#else + +#define LY_CALLOC calloc + +#endif /* CHECK_FREE_MEM */ + #ifdef EXP_CHARTRANS PRIVATE void HText_getChartransInfo ARGS1( HText *, me) @@ -369,7 +488,8 @@ PUBLIC HText * HText_new ARGS1( * Check the kcode setting if the anchor has a charset element. - FM */ if (anchor->charset) - HText_setKcode(self, anchor->charset); + HText_setKcode(self, anchor->charset, + HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT)); /* * Memory leak fixed. @@ -487,6 +607,8 @@ PUBLIC void HText_free ARGS1( FREE(l->input_field->submit_action); FREE(l->input_field->submit_enctype); FREE(l->input_field->submit_title); + + FREE(l->input_field->accept_cs); FREE(l->input_field); } @@ -506,6 +628,7 @@ PUBLIC void HText_free ARGS1( while (NULL != (Tab = (HTTabID *)HTList_nextObject(cur))) { FREE(Tab->name); + FREE(Tab); } HTList_delete(self->tabs); self->tabs = NULL; @@ -1499,9 +1622,9 @@ PRIVATE void split_line ARGS2( HTLine * previous = text->last_line; int ctrl_chars_on_previous_line = 0; char * cp; - HTLine * line = (HTLine *)calloc(1, LINE_SIZE(MAX_LINE)); + HTLine * line = (HTLine *)LY_CALLOC(1, LINE_SIZE(MAX_LINE)); if (line == NULL) - outofmem(__FILE__, "split_line"); + outofmem(__FILE__, "split_line_1"); ctrl_chars_on_this_line = 0; /*reset since we are going to a new line*/ text->LastChar = ' '; @@ -1774,9 +1897,9 @@ PRIVATE void split_line ARGS2( previous->size--; TailTrim++; } - temp = (HTLine *)calloc(1, LINE_SIZE(previous->size)); + temp = (HTLine *)LY_CALLOC(1, LINE_SIZE(previous->size)); if (temp == NULL) - outofmem(__FILE__, "split_line"); + outofmem(__FILE__, "split_line_2"); memcpy(temp, previous, LINE_SIZE(previous->size)); FREE(previous); previous = temp; @@ -1935,6 +2058,15 @@ PUBLIC void HText_appendCharacter ARGS2( if (!text) return; + if (text->halted > 1) { + if (text->halted == 2) { + text->halted = 0; + text->kanji_buf = '\0'; + HText_appendText(text, " *** MEMORY EXHAUSTED ***"); + } + text->halted = 3; + return; + } /* * Make sure we don't hang on escape sequences. */ @@ -2148,6 +2280,12 @@ PUBLIC void HText_appendCharacter ARGS2( if (ch == '\n') { new_line(text); text->in_line_1 = YES; /* First line of new paragraph */ + /* + * There are some pages written in + * different kanji codes. - TA & kw + */ + if (HTCJK == JAPANESE) + text->kcode = NOKANJI; return; } @@ -2166,6 +2304,12 @@ PUBLIC void HText_appendCharacter ARGS2( if (ch == '\r') { new_line(text); text->in_line_1 = NO; + /* + * There are some pages written in + * different kanji codes. - TA & kw + */ + if (HTCJK == JAPANESE) + text->kcode = NOKANJI; return; } @@ -2238,7 +2382,7 @@ PUBLIC void HText_appendCharacter ARGS2( if (ch == ' ') { text->permissible_split = (int)line->size; /* Can split here */ /* - * There are some pages witten in + * There are some pages written in * different kanji codes. - TA */ if (HTCJK == JAPANESE) @@ -2928,6 +3072,9 @@ PUBLIC void HText_appendText ARGS2( if (str == NULL) return; + if (text->halted == 3) + return; + for (p = str; *p; p++) { HText_appendCharacter(text, *p); } @@ -2976,6 +3123,11 @@ PUBLIC void HText_endAppend ARGS1( */ new_line(text); + if (text->halted) { + LYFakeZap(NO); + text->halted = 0; + } + /* * Get the first line. */ @@ -5404,7 +5556,8 @@ PUBLIC void HText_setTabID ARGS2( return; /* Already set. Keep the first value. */ last = cur; } - cur = last; + if (last) + cur = last; } if (!Tab) { /* New name. Create a new node */ Tab = (HTTabID *)calloc(1, sizeof(HTTabID)); @@ -6290,6 +6443,11 @@ PUBLIC int HText_beginInput ARGS3( f->value_cs = I->value_cs; } else if (f->type != F_OPTION_LIST_TYPE) { StrAllocCopy(f->value, ""); + /* + * May be an empty INPUT field. The text entered will then + * probably be in the current display character set. - kw + */ + f->value_cs = current_char_set; } /* @@ -6344,6 +6502,16 @@ PUBLIC int HText_beginInput ARGS3( } /* + * Store accept-charset if present. - kw + */ + if (I->accept_cs) { + StrAllocCopy(f->accept_cs, I->accept_cs); + collapse_spaces(f->accept_cs); + for (i = 0; f->accept_cs[i]; i++) + f->accept_cs[i] = TOLOWER(f->accept_cs[i]); + } + + /* * Add numbers to form fields if needed. - LE & FM */ switch (f->type) { @@ -6494,12 +6662,14 @@ PUBLIC void HText_SubmitForm ARGS4( char *Boundary = NULL; char *MultipartContentType = NULL; int target_cs = -1; + CONST char *out_csname; CONST char *target_csname = NULL; char *name_used; #ifdef EXP_CHARTRANS BOOL form_has_8bit = NO, form_has_special = NO; BOOL field_has_8bit = NO, field_has_special = NO; BOOL name_has_8bit = NO, name_has_special = NO; + BOOL have_accept_cs = NO; BOOL success; BOOL had_chartrans_warning = NO; char *val_used; @@ -6558,8 +6728,30 @@ PUBLIC void HText_SubmitForm ARGS4( Boundary = "xnyLAaB03X"; } + /* + * Determine in what character encoding (aka. charset) we should + * submit. We call this target_cs and the MIME name for it + * target_csname. + * TODO: - actually use ACCEPT-CHARSET stuff from FORM + * TODO: - deal with list in ACCEPT-CHARSET, find a "best" + * charset to submit + */ #ifdef EXP_CHARTRANS - if (HTMainText->node_anchor->charset && + + if (submit_item->accept_cs && + strcasecomp(submit_item->accept_cs, "UNKNOWN")) + have_accept_cs = YES; + if (submit_item->accept_cs && *submit_item->accept_cs && + strcmp(submit_item->accept_cs, "*") && + strcasecomp(submit_item->accept_cs, "UNKNOWN")) { + target_cs = UCGetLYhndl_byMIME(submit_item->accept_cs); + if (target_cs >= 0) { + target_csname = submit_item->accept_cs; + } + } + + if (target_cs < 0 && + HTMainText->node_anchor->charset && *HTMainText->node_anchor->charset) { target_cs = UCGetLYhndl_byMIME(HTMainText->node_anchor->charset); if (target_cs >= 0) { @@ -6609,7 +6801,9 @@ PUBLIC void HText_SubmitForm ARGS4( len += 32; /* plus and ampersand + safety net */ #ifdef EXP_CHARTRANS - for (p = val; p && *p && !field_has_8bit; p++) + for (p = val; + p && *p && !(field_has_8bit && field_has_special); + p++) if ((*p == HT_NON_BREAK_SPACE) || (*p == HT_EM_SPACE) || (*p == LY_SOFT_HYPHEN)) { @@ -6617,12 +6811,22 @@ PUBLIC void HText_SubmitForm ARGS4( } else if ((*p & 0x80) != 0) { field_has_8bit = YES; } - for (p = form_ptr->name; p && *p && !field_has_8bit; p++) - field_has_8bit = ((*p & 0x80) != 0); - if (field_has_8bit) + for (p = form_ptr->name; + p && *p && !(name_has_8bit && name_has_special); + p++) + if ((*p == HT_NON_BREAK_SPACE) || + (*p == HT_EM_SPACE) || + (*p == LY_SOFT_HYPHEN)) { + name_has_special = YES; + } else if ((*p & 0x80) != 0) { + name_has_8bit = YES; + } + + if (field_has_8bit || name_has_8bit) form_has_8bit = YES; - if (field_has_special) + if (field_has_special || name_has_special) form_has_special = YES; + if (!field_has_8bit && !field_has_special) { /* already ok */ } else if (target_cs < 0) { @@ -6641,6 +6845,26 @@ PUBLIC void HText_SubmitForm ARGS4( } else { target_cs = -1; /* don't know what to do */ } + + /* Same for name */ + if (!name_has_8bit && !name_has_special) { + /* already ok */ + } else if (target_cs < 0) { + /* already confused */ + } else if (!name_has_8bit && + (LYCharSet_UC[target_cs].enc == UCT_ENC_8859 || + (LYCharSet_UC[target_cs].like8859 & UCT_R_8859SPECL))) { + /* those specials will be trivial */ + } else if (UCNeedNotTranslate(form_ptr->name_cs, target_cs)) { + /* already ok */ + } else if (UCCanTranslateFromTo(form_ptr->name_cs, target_cs)) { + /* also ok */ + } else if (UCCanTranslateFromTo(target_cs, form_ptr->name_cs)) { + target_cs = form_ptr->value_cs; /* try this */ + target_csname = NULL; /* will be set after loop */ + } else { + target_cs = -1; /* don't know what to do */ + } #endif /* EXP_CHARTRANS */ } else if (anchor_ptr->input_field->number > form_number) { @@ -6707,126 +6931,49 @@ PUBLIC void HText_SubmitForm ARGS4( "application/x-www-form-urlencoded"); } - -#ifndef EXP_CHARTRANS /* - * Append the exended charset info if known, and it is not - * ISO-8859-1 or US-ASCII. We'll assume the user has the - * matching character set selected, or a download offer would - * have been forced and we would not be processing the form - * here. We don't yet want to do this unless the server - * indicated the charset in the original transmission, because - * otherwise it might be an old server and CGI script which - * will not parse out the extended charset info, and reject - * the POST Content-Type as invalid. If the ENCTYPE is - * multipart/form-data and the charset is known, set up a - * Content-Type string for the text fields and append the - * charset even if it is ISO-8859-1 or US-ASCII, but don't - * append it to the post_content_type header. Note that we do - * not yet have a way to vary the charset among multipart form - * fields, so this code assumes it is the same for all of the - * text fields. - FM + * If the ENCTYPE is not multipart/form-data, append the + * charset we'll be sending to the post_content_type, IF + * (1) there was an explicit accept-charset attribute, OR + * (2) we have 8-bit or special chars, AND the document had + * an explicit (recognized and accepted) charset parameter, + * AND it or target_csname is different from iso-8859-1, + * OR + * (3) we have 8-bit or special chars, AND the document had + * no explicit (recognized and accepted) charset parameter, + * AND target_cs is different from the currently effective + * assumed charset (which should have been set by the user + * so that it reflects what the server is sending, if the + * document is rendered correctly). + * For multipart/form-data the equivalent will be done later, + * separately for each form field. - kw */ - if (HTMainText->node_anchor->charset != NULL && - *HTMainText->node_anchor->charset != '\0') { - if (Boundary == NULL && - strcasecomp(HTMainText->node_anchor->charset, "iso-8859-1") && - strcasecomp(HTMainText->node_anchor->charset, "us-ascii")) { - StrAllocCat(doc->post_content_type, "; charset="); - StrAllocCat(doc->post_content_type, - HTMainText->node_anchor->charset); - } else if (Boundary != NULL) { - MultipartContentType = (char *)calloc(1, - (40 + strlen(HTMainText->node_anchor->charset))); - if (query == NULL) - outofmem(__FILE__, "HText_SubmitForm"); - sprintf(MultipartContentType, - "\r\nContent-Type: text/plain; charset=%s", - HTMainText->node_anchor->charset); - ct_charset_startpos = strchr(MultipartContentType, ';'); - } - } -#else /* EXP_CHARTRANS */ - if (target_cs >= 0 && (form_has_8bit || form_has_special)) { - if (Boundary == NULL) { - if (target_csname && - (strcasecomp(target_csname, "iso-8859-1") || - (HTMainText->node_anchor->charset != NULL && - strcasecomp(HTMainText->node_anchor->charset, - "iso-8859-1")))) { - StrAllocCat(doc->post_content_type, "; charset="); - StrAllocCat(doc->post_content_type, target_csname); + if (have_accept_cs || + (form_has_8bit || form_has_special)) { + if (target_cs >= 0 && target_csname) { + if (Boundary == NULL) { + if ((HTMainText->node_anchor->charset && + (strcmp(HTMainText->node_anchor->charset, + "iso-8859-1") || + strcmp(target_csname, "iso-8859-1"))) || + (!HTMainText->node_anchor->charset && + target_cs != UCLYhndl_for_unspec)) { + StrAllocCat(doc->post_content_type, "; charset="); + StrAllocCat(doc->post_content_type, target_csname); + } } + } else { + had_chartrans_warning = YES; + _user_message( + CANNOT_TRANSCODE_FORM, + target_csname ? target_csname : "UNKNOWN"); + sleep(AlertSecs); } } -#endif /* EXP_CHARTRANS */ } -#if 0 /* 000000 */ - { - if (HTMainText->node_anchor->charset != NULL && - *HTMainText->node_anchor->charset != '\0') { -#ifdef EXP_CHARTRANS - /* - * For now, don't send charset if we may have translated. - * Although this is when it would be most needed (unless - * we translate back to the server's charset, which is - * currently not done). But currently there aren't many - * servers or scripts which understand it anyway, so at - * least we try not to lie. - kw - */ -#if 0 - if (!UCNeedNotTranslate(current_char_set, - UCGetLYhndl_byMIME( - HTMainText->node_anchor->charset))); -#endif - if (target_cs < 0) { - /* Do nothing */ - } else -#endif - if (Boundary == NULL && -#ifdef EXP_CHARTRANS - form_has_8bit && - target_cs >= 0 && -#endif - (strcasecomp(HTMainText->node_anchor->charset, "iso-8859-1") || - strcasecomp(target_csname, "iso-8859-1"))) { - StrAllocCat(doc->post_content_type, "; charset="); - StrAllocCat(doc->post_content_type, - HTMainText->node_anchor->charset); - } else - if (Boundary == NULL && -#ifdef EXP_CHARTRANS - target_cs >= 0 && -#endif - strcasecomp(HTMainText->node_anchor->charset, "iso-8859-1") && - strcasecomp(HTMainText->node_anchor->charset, "us-ascii")) { - StrAllocCat(doc->post_content_type, "; charset="); - StrAllocCat(doc->post_content_type, - HTMainText->node_anchor->charset); - } else if (Boundary != NULL) { - MultipartContentType = (char *)calloc(1, - (40 + strlen(HTMainText->node_anchor->charset))); - if (query == NULL) - outofmem(__FILE__, "HText_SubmitForm"); - sprintf(MultipartContentType, - "\r\nContent-Type: text/plain; charset=%s", - HTMainText->node_anchor->charset); - ct_charset_startpos = strchr(MultipartContentType, ';'); - } - } -#ifdef EXP_CHARTRANS - } else if (Boundary == NULL && - form_has_8bit && - target_cs >= 0 && - strcasecomp(target_csname, "iso-8859-1")) { - StrAllocCat(doc->post_content_type, "; charset="); - StrAllocCat(doc->post_content_type, - HTMainText->node_anchor->charset); -#endif /* EXP_CHARTRANS */ - } -#endif /* 000000 */ + out_csname = target_csname; /* * Reset anchor->ptr. @@ -6840,7 +6987,6 @@ PUBLIC void HText_SubmitForm ARGS4( if (anchor_ptr->input_field->number == form_number) { char *p; int out_cs; - CONST char * out_csname; form_ptr = anchor_ptr->input_field; if (form_ptr->type != F_TEXTAREA_TYPE) @@ -6890,9 +7036,7 @@ PUBLIC void HText_SubmitForm ARGS4( case F_HIDDEN_TYPE: #ifdef EXP_CHARTRANS /* - * Charset-translate value now, because we need - * to know the charset parameter for multipart - * bodyparts. - kw + * Be sure to actually look at the option submit value. */ if (form_ptr->cp_submit_value != NULL) { val_used = form_ptr->cp_submit_value; @@ -6900,9 +7044,16 @@ PUBLIC void HText_SubmitForm ARGS4( val_used = form_ptr->value; } + /* + * Charset-translate value now, because we need + * to know the charset parameter for multipart + * bodyparts. - kw + */ field_has_8bit = NO; field_has_special = NO; - for (p = val_used; p && *p && !field_has_8bit; p++) { + for (p = val_used; + p && *p && !(field_has_8bit && field_has_special); + p++) { if ((*p == HT_NON_BREAK_SPACE) || (*p == HT_EM_SPACE) || (*p == LY_SOFT_HYPHEN)) { @@ -6933,18 +7084,6 @@ PUBLIC void HText_SubmitForm ARGS4( if (success) { val_used = copied_val_used; } - if (Boundary) { - if (!success) { - StrAllocCopy(MultipartContentType, ""); - target_csname = NULL; - } else { - if (!target_csname) - target_csname = LYCharSet_UC[target_cs].MIMEname; - StrAllocCopy(MultipartContentType, - "\r\nContent-Type: text/plain; charset="); - StrAllocCat(MultipartContentType, target_csname); - } - } } else { /* We can use the value directly. */ if (TRACE) { fprintf(stderr, @@ -6953,22 +7092,22 @@ PUBLIC void HText_SubmitForm ARGS4( target_cs, target_csname ? target_csname : "???"); } - copied_val_used = NULL; success = YES; } if (!success) { if (!had_chartrans_warning) { had_chartrans_warning = YES; _user_message( - "Cannot convert form data to charset %s!", + CANNOT_TRANSCODE_FORM, target_csname ? target_csname : "UNKNOWN"); sleep(AlertSecs); } out_cs = form_ptr->value_cs; - out_csname = LYCharSet_UC[out_cs].MIMEname; } else { out_cs = target_cs; } + if (out_cs >= 0) + out_csname = LYCharSet_UC[out_cs].MIMEname; if (Boundary) { if (!success && form_ptr->value_cs < 0) { /* This is weird. */ @@ -6976,10 +7115,9 @@ PUBLIC void HText_SubmitForm ARGS4( "\r\nContent-Type: text/plain; charset="); StrAllocCat(MultipartContentType, "UNKNOWN-8BIT"); } else if (!success) { - target_csname = LYCharSet_UC[form_ptr->value_cs].MIMEname; StrAllocCopy(MultipartContentType, "\r\nContent-Type: text/plain; charset="); - StrAllocCat(MultipartContentType, target_csname); + StrAllocCat(MultipartContentType, out_csname); target_csname = NULL; } else { if (!target_csname) { @@ -6987,7 +7125,7 @@ PUBLIC void HText_SubmitForm ARGS4( } StrAllocCopy(MultipartContentType, "\r\nContent-Type: text/plain; charset="); - StrAllocCat(MultipartContentType, target_csname); + StrAllocCat(MultipartContentType, out_csname); } } @@ -7009,7 +7147,9 @@ PUBLIC void HText_SubmitForm ARGS4( name_has_8bit = NO; name_has_special = NO; - for (p = name_used; p && *p && !name_has_8bit; p++) { + for (p = name_used; + p && *p && !(name_has_8bit && name_has_special); + p++) { if ((*p == HT_NON_BREAK_SPACE) || (*p == HT_EM_SPACE) || (*p == LY_SOFT_HYPHEN)) { @@ -7047,9 +7187,6 @@ PUBLIC void HText_SubmitForm ARGS4( } else { if (!target_csname) target_csname = LYCharSet_UC[target_cs].MIMEname; - StrAllocCopy(MultipartContentType, - "\r\nContent-Type: text/plain; charset="); - StrAllocCat(MultipartContentType, target_csname); } } } else { /* We can use the name directly. */ @@ -7069,12 +7206,24 @@ PUBLIC void HText_SubmitForm ARGS4( if (!had_chartrans_warning) { had_chartrans_warning = YES; _user_message( - "Cannot convert form name to charset %s!", + CANNOT_TRANSCODE_FORM, target_csname ? target_csname : "UNKNOWN"); sleep(AlertSecs); } } if (Boundary) { + /* + * According to RFC 1867, Non-ASCII field names + * "should be encoded according to the prescriptions + * of RFC 1522 [...]. I don't think RFC 1522 actually + * is meant to apply to parameters like this, and it + * is unknown wheter any server would make sense of + * it, so for now just use some quoting/escaping and + * otherwise leave 8-bit values as they are. + * Non-ASCII characters in form field names submitted + * as multipart/form-data can only occur if the form + * provider specifically asked for it anyway. - kw + */ HTMake822Word(&copied_name_used); name_used = copied_name_used; } @@ -7109,7 +7258,7 @@ PUBLIC void HText_SubmitForm ARGS4( (form_ptr->type == F_TEXT_SUBMIT_TYPE || (form_ptr->value && *form_ptr->value != '\0' && !strcmp(form_ptr->value, link_value)))) { - int cdisp_name_startpos; + int cdisp_name_startpos = 0; if (first_one) { if (Boundary) { sprintf(&query[strlen(query)], @@ -7143,60 +7292,6 @@ PUBLIC void HText_SubmitForm ARGS4( escaped1 = HTEscapeSP(name_used, URL_XALPHAS); } -#ifndef EXP_CHARTRANS - /* - * Be sure to actually look at - * the option submit value. - */ - if (form_ptr->cp_submit_value != NULL) { - for (i = 0; form_ptr->cp_submit_value[i]; i++) { - if (form_ptr->cp_submit_value[i] == - HT_NON_BREAK_SPACE || - form_ptr->cp_submit_value[i] == - HT_EM_SPACE) { - if (PlainText) { - form_ptr->cp_submit_value[i] = ' '; - } else { - form_ptr->cp_submit_value[i] = 160; - } - } else if (form_ptr->cp_submit_value[i] == - LY_SOFT_HYPHEN) { - form_ptr->cp_submit_value[i] = 173; - } - } - if (PlainText || Boundary) { - StrAllocCopy(escaped2, - (form_ptr->cp_submit_value ? - form_ptr->cp_submit_value : "")); - } else { - escaped2 = HTEscapeSP(form_ptr->cp_submit_value, - URL_XALPHAS); - } - } else { - for (i = 0; form_ptr->value[i]; i++) { - if (form_ptr->value[i] == - HT_NON_BREAK_SPACE || - form_ptr->value[i] == - HT_EM_SPACE) { - if (PlainText) { - form_ptr->value[i] = ' '; - } else { - form_ptr->value[i] = 160; - } - } else if (form_ptr->value[i] == - LY_SOFT_HYPHEN) { - form_ptr->value[i] = 173; - } - } - if (PlainText || Boundary) { - StrAllocCopy(escaped2, (form_ptr->value ? - form_ptr->value : "")); - } else { - escaped2 = HTEscapeSP(form_ptr->value, - URL_XALPHAS); - } - } -#else /* EXP_CHARTRANS */ if (PlainText || Boundary) { StrAllocCopy(escaped2, (val_used ? @@ -7204,7 +7299,6 @@ PUBLIC void HText_SubmitForm ARGS4( } else { escaped2 = HTEscapeSP(val_used, URL_XALPHAS); } -#endif /* EXP_CHARTRANS */ if (form_ptr->type == F_IMAGE_SUBMIT_TYPE) { /* @@ -7293,60 +7387,6 @@ PUBLIC void HText_SubmitForm ARGS4( } else { escaped1 = HTEscapeSP(name_used, URL_XALPHAS); } -#ifndef EXP_CHARTRANS - /* - * Be sure to use the submit option value. - */ - if (form_ptr->cp_submit_value != NULL) { - for (i = 0; form_ptr->cp_submit_value[i]; i++) { - if (form_ptr->cp_submit_value[i] == - HT_NON_BREAK_SPACE || - form_ptr->cp_submit_value[i] == - HT_EM_SPACE) { - if (PlainText) { - form_ptr->cp_submit_value[i] = ' '; - } else { - form_ptr->cp_submit_value[i] = 160; - } - } else if (form_ptr->cp_submit_value[i] == - LY_SOFT_HYPHEN) { - form_ptr->cp_submit_value[i] = 173; - } - } - if (PlainText || Boundary) { - StrAllocCopy(escaped2, - (form_ptr->cp_submit_value ? - form_ptr->cp_submit_value : "")); - } else { - escaped2 = HTEscapeSP(form_ptr->cp_submit_value, - URL_XALPHAS); - } - } else { - for (i = 0; form_ptr->value[i]; i++) { - if (form_ptr->value[i] == - HT_NON_BREAK_SPACE || - form_ptr->value[i] == - HT_EM_SPACE) { - if (PlainText) { - form_ptr->value[i] = ' '; - } else { - form_ptr->value[i] = 160; - } - } else if (form_ptr->value[i] == - LY_SOFT_HYPHEN) { - form_ptr->value[i] = 173; - - } - } - if (PlainText || Boundary) { - StrAllocCopy(escaped2, (form_ptr->value ? - form_ptr->value : "")); - } else { - escaped2 = HTEscapeSP(form_ptr->value, - URL_XALPHAS); - } - } -#else /* EXP_CHARTRANS */ if (PlainText || Boundary) { StrAllocCopy(escaped2, (val_used ? @@ -7354,7 +7394,6 @@ PUBLIC void HText_SubmitForm ARGS4( } else { escaped2 = HTEscapeSP(val_used, URL_XALPHAS); } -#endif /* EXP_CHARTRANS */ sprintf(&query[strlen(query)], "%s%s%s%s%s", @@ -7376,26 +7415,6 @@ PUBLIC void HText_SubmitForm ARGS4( break; case F_TEXTAREA_TYPE: -#ifndef EXP_CHARTRANS - for (i = 0; form_ptr->value[i]; i++) { - if (form_ptr->value[i] == HT_NON_BREAK_SPACE || - form_ptr->value[i] == HT_EM_SPACE) { - if (PlainText) { - form_ptr->value[i] = ' '; - } else { - form_ptr->value[i] = 160; - } - } else if (form_ptr->value[i] == LY_SOFT_HYPHEN) { - form_ptr->value[i] = 173; - } - } - if (PlainText || Boundary) { - StrAllocCopy(escaped2, (form_ptr->value ? - form_ptr->value : "")); - } else { - escaped2 = HTEscapeSP(form_ptr->value, URL_XALPHAS); - } -#else /* EXP_CHARTRANS */ if (PlainText || Boundary) { StrAllocCopy(escaped2, (val_used ? @@ -7403,7 +7422,6 @@ PUBLIC void HText_SubmitForm ARGS4( } else { escaped2 = HTEscapeSP(val_used, URL_XALPHAS); } -#endif /* EXP_CHARTRANS */ if (!last_textarea_name || strcmp(last_textarea_name, form_ptr->name)) { @@ -7532,59 +7550,6 @@ PUBLIC void HText_SubmitForm ARGS4( escaped1 = HTEscapeSP(name_used, URL_XALPHAS); } -#ifndef EXP_CHARTRANS - /* - * Be sure to actually look at the option submit value. - */ - if (form_ptr->cp_submit_value != NULL) { - for (i = 0; form_ptr->cp_submit_value[i]; i++) { - if (form_ptr->cp_submit_value[i] == - HT_NON_BREAK_SPACE || - form_ptr->cp_submit_value[i] == - HT_EM_SPACE) { - if (PlainText) { - form_ptr->cp_submit_value[i] = ' '; - } else { - form_ptr->cp_submit_value[i] = 160; - } - } else if (form_ptr->cp_submit_value[i] == - LY_SOFT_HYPHEN) { - form_ptr->cp_submit_value[i] = 173; - } - } - if (PlainText || Boundary) { - StrAllocCopy(escaped2, - (form_ptr->cp_submit_value ? - form_ptr->cp_submit_value : "")); - } else { - escaped2 = HTEscapeSP(form_ptr->cp_submit_value, - URL_XALPHAS); - } - } else { - for (i = 0; form_ptr->value[i]; i++) { - if (form_ptr->value[i] == - HT_NON_BREAK_SPACE || - form_ptr->value[i] == - HT_EM_SPACE) { - if (PlainText) { - form_ptr->value[i] = ' '; - } else { - form_ptr->value[i] = 160; - } - } else if (form_ptr->value[i] == - LY_SOFT_HYPHEN) { - form_ptr->value[i] = 173; - } - } - if (PlainText || Boundary) { - StrAllocCopy(escaped2, (form_ptr->value ? - form_ptr->value : "")); - } else { - escaped2 = HTEscapeSP(form_ptr->value, - URL_XALPHAS); - } - } -#else /* EXP_CHARTRANS */ if (PlainText || Boundary) { StrAllocCopy(escaped2, (val_used ? @@ -7592,7 +7557,6 @@ PUBLIC void HText_SubmitForm ARGS4( } else { escaped2 = HTEscapeSP(val_used, URL_XALPHAS); } -#endif /* EXP_CHARTRANS */ sprintf(&query[strlen(query)], "%s%s%s%s%s", @@ -7892,16 +7856,36 @@ PUBLIC BOOL HText_hasUTF8OutputSet ARGS1( /* ** Check charset and set the kcode element. - FM +** Info on the input charset may be passed in in two forms, +** as a string (if given explicitly) and as a pointer to +** a LYUCcharset (from chartrans mechanism); either can be NULL. +** For Japanes the kcode will be reset at a space or explicit +** line or paragraph break, so what we set here may not last for +** long. It's potentially more important not to set HTCJK to +** NOCJK unless we are sure. - kw */ -PUBLIC void HText_setKcode ARGS2( +PUBLIC void HText_setKcode ARGS3( HText *, text, - CONST char *, charset) + CONST char *, charset, + LYUCcharset *, p_in) { if (!text) return; /* - ** Check whether we have a sepecified charset. - FM + ** Check whether we have some king of info. - kw + */ + if (!charset && !p_in) { + return; + } + /* + ** If no explicit charset string, use the implied one. - kw + */ + if (!charset || *charset == '\0') { + charset = p_in->MIMEname; + } + /* + ** Check whether we have a specified charset. - FM */ if (!charset || *charset == '\0') { return; @@ -7915,7 +7899,9 @@ PUBLIC void HText_setKcode ARGS2( */ if (!strcmp(charset, "shift_jis")) { text->kcode = SJIS; - } else if (!strcmp(charset, "euc-jp") || + } else if ((p_in && (p_in->enc == UCT_ENC_CJK)) || + !strcmp(charset, "euc-jp") || + !strncmp(charset, "x-euc-", 6) || !strcmp(charset, "iso-2022-jp") || !strcmp(charset, "iso-2022-jp-2") || !strcmp(charset, "euc-kr") || @@ -7927,10 +7913,14 @@ PUBLIC void HText_setKcode ARGS2( text->kcode = EUC; } else { /* - ** If we get to here, it's not CJK, so disable that. - FM + ** If we get to here, it's not CJK, so disable that if + ** it is enabled. But only if we are quite sure. - FM & kw */ text->kcode = NOKANJI; - HTCJK = NOCJK; + if (HTCJK != NOCJK) { + if (!p_in || p_in->enc != UCT_ENC_CJK) + HTCJK = NOCJK; + } } return; diff --git a/src/GridText.h b/src/GridText.h index a3fe5fa5..9247e14d 100644 --- a/src/GridText.h +++ b/src/GridText.h @@ -192,7 +192,10 @@ extern void HText_setNoCache PARAMS((HText *text)); extern BOOL HText_hasNoCacheSet PARAMS((HText *text)); extern BOOL HText_hasUTF8OutputSet PARAMS((HText *text)); -extern void HText_setKcode PARAMS((HText *text, CONST char *charset)); +extern void HText_setKcode PARAMS(( + HText * text, + CONST char * charset, + LYUCcharset * p_in)); extern void HText_setBreakPoint PARAMS((HText *text)); diff --git a/src/HTFWriter.c b/src/HTFWriter.c index 719ab8df..a6295ee9 100644 --- a/src/HTFWriter.c +++ b/src/HTFWriter.c @@ -1102,7 +1102,7 @@ Compressed_tempname: /* * Make command to process file. - FM */ -#if USE_ZLIB +#ifdef USE_ZLIB if (compress_suffix[0] == 'g' && /* must be gzip */ !me->viewer_command) { /* diff --git a/src/HTForms.h b/src/HTForms.h index 7e384cd5..b140f4d7 100644 --- a/src/HTForms.h +++ b/src/HTForms.h @@ -78,6 +78,7 @@ typedef struct _FormInfo { int disabled; /* If YES, can't change values */ int name_cs; int value_cs; + char * accept_cs; } FormInfo; #define HYPERTEXT_ANCHOR 1 diff --git a/src/HTInit.c b/src/HTInit.c index 532f9c5b..540c1c67 100644 --- a/src/HTInit.c +++ b/src/HTInit.c @@ -180,6 +180,11 @@ PUBLIC void HTFormatInit NOARGS HTLoadTypesConfigFile(buffer); } + /* + * Put text/html and text/plain at beginning of list. - kw + */ + HTReorderPresentation(WWW_PLAINTEXT, WWW_PRESENT); + HTReorderPresentation(WWW_HTML, WWW_PRESENT); } PUBLIC void HTPreparsedFormatInit NOARGS @@ -232,7 +237,7 @@ PRIVATE int ExitWithError PARAMS((char *txt)); PRIVATE int PassesTest PARAMS((struct MailcapEntry *mc)); #define LINE_BUF_SIZE 2000 -#define TMPFILE_NAME_SIZE 127 +#define TMPFILE_NAME_SIZE 256 PRIVATE char *GetCommand ARGS2(char *,s, char **,t) { @@ -282,8 +287,8 @@ PRIVATE char *Cleanse ARGS1(char *,s) *tmp = TOLOWER ((unsigned char)*tmp); } /* strip trailing white space */ - while (*--tmp && isspace((unsigned char) *tmp)) - *tmp = 0; + while ((tmp > news) && *--tmp && isspace((unsigned char) *tmp)) + *tmp = '\0'; return(news); } @@ -444,13 +449,17 @@ assign_presentation: return(1); } -PRIVATE void BuildCommand ARGS3(char *,Buf, char *,controlstring, - char *,TmpFileName) +PRIVATE void BuildCommand ARGS5( + char **, pBuf, + size_t, Bufsize, + char *, controlstring, + char *, TmpFileName, + size_t, TmpFileLen) { char *from, *to; int prefixed = 0; - for (from = controlstring, to = Buf; *from != '\0'; from++) { + for (from = controlstring, to = *pBuf; *from != '\0'; from++) { if (prefixed) { prefixed = 0; switch(*from) { @@ -465,7 +474,17 @@ PRIVATE void BuildCommand ARGS3(char *,Buf, char *,controlstring, controlstring); } case 's': - if (TmpFileName) { + if (TmpFileLen && TmpFileName) { + if ((to - *pBuf) + TmpFileLen + 1 > Bufsize) { + *to = '\0'; + if (TRACE) { + fprintf(stderr, + "Too long mailcap \"test\" clause, ignoring: %s%s...\n", + *pBuf, TmpFileName); + } + **pBuf = '\0'; + return; + } strcpy(to, TmpFileName); to += strlen(TmpFileName); } @@ -483,6 +502,16 @@ PRIVATE void BuildCommand ARGS3(char *,Buf, char *,controlstring, } else { *to++ = *from; } + if (to >= *pBuf + Bufsize) { + (*pBuf)[Bufsize - 1] = '\0'; + if (TRACE) { + fprintf(stderr, + "Too long mailcap \"test\" clause, ignoring: %s...\n", + *pBuf); + } + **pBuf = '\0'; + return; + } } *to = 0; } @@ -561,7 +590,9 @@ PRIVATE int PassesTest ARGS1(struct MailcapEntry *,mc) cmd = (char *)malloc(1024); if (!cmd) ExitWithError("Out of memory"); - BuildCommand(cmd, mc->testcommand, TmpFileName); + BuildCommand(&cmd, 1024, + mc->testcommand, + TmpFileName, strlen(TmpFileName)); if (TRACE) fprintf(stderr,"Executing test command: %s\n", cmd); result = system(cmd); @@ -882,23 +913,30 @@ PUBLIC void HTFileInit NOARGS /* The following is lifted from NCSA httpd 1.0a1, by Rob McCool; NCSA httpd is in the public domain, as is this code. */ +/* modified Oct 97 - kw */ #define MAX_STRING_LEN 256 PRIVATE int HTGetLine ARGS3(char *,s, int,n, FILE *,f) { - register int i = 0; + register int i = 0, r; if (!f) return(1); while (1) { - s[i] = (char)fgetc(f); - - if (s[i] == CR) - s[i] = fgetc(f); + r = fgetc(f); + s[i] = (char)r; + + if (s[i] == CR) { + r = fgetc(f); + if (r == LF) + s[i] = r; + else if (r != EOF) + ungetc(r, f); + } - if ((s[i] == EOF) || (s[i] == LF) || (i == (n-1))) { + if ((r == EOF) || (s[i] == LF) || (s[i] == CR) || (i == (n-1))) { s[i] = '\0'; return (feof(f) ? 1 : 0); } diff --git a/src/HTML.c b/src/HTML.c index f3040bb4..5da10c39 100644 --- a/src/HTML.c +++ b/src/HTML.c @@ -528,7 +528,7 @@ char prevailing_class[TEMPSTRINGSIZE]; LYUCFullyTranslateString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML) #define TRANSLATE_AND_UNESCAPE_ENTITIES4(s, cs_to, p, h) \ - LYUCFullyTranslateString(s, ATTR_CS_IN, cs_to, YES, p, h, st_HTML) + LYUCFullyTranslateString(s, ATTR_CS_IN, cs_to, YES, p, h, st_HTML) /* not used */ #define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \ LYUCFullyTranslateString(s, cs_from, cs_to, YES, p, h, st_HTML) @@ -548,11 +548,51 @@ char prevailing_class[TEMPSTRINGSIZE]; #else /* !EXP_CHARTRANS */ +#ifdef OLDSTUFF #define ATTR_CS_IN 0 +#define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \ + if (current_char_set) LYExpandString(s); LYUnEscapeEntities(*(s), p, h) +/* if (current_char_set) LYExpandString(s); LYUnEscapeEntities(*(s), p, FALSE) */ + +#define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \ + LYUnEscapeEntities(*(s), p, h) +/* LYUnEscapeEntities(*(s), TRUE, h) */ +/* LYUnEscapeEntities(*(s), TRUE, FALSE) */ + + +#define TRANSLATE_AND_UNESCAPE_ENTITIES6(s,cs_from,cs_to,spcls,p,h) \ + if (me->UsePlainSpace && !me->HiddenValue) LYExpandString(me, s);\ + LYUnEscapeEntities(*(s), me->UsePlainSpace, me->HiddenValue) +/* if (current_char_set && (p && !h)) LYExpandString(me, *(s));\ + LYUnEscapeEntities(*(s), p, h) */ + #define TRANSLATE_AND_UNESCAPE_TO_STD(s) \ LYUnEscapeToLatinOne(s, TRUE) /* for now */ #define UNESCAPE_FIELDNAME_TO_STD(s) ; /* no-op */ +#endif /* OLDSTUFF */ + +#ifdef NOTUSED_FOTEMODS +/* Roughly (and untested!), the equivalents if one would use the + * LYCharUtils.c from FOTEMODS 1997-10-06 instead: */ +#define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \ + LYExpandString(me, s); LYUnEscapeEntities(me, s) + LYUCFullyTranslateString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML) + +#define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \ + LYUnEscapeEntities(me, s) + +#define TRANSLATE_AND_UNESCAPE_ENTITIES6(s,cs_from,cs_to,spcls,p,h) \ + if (me->UsePlainSpace && !me->HiddenValue) LYExpandString(me, s);\ + LYUnEscapeEntities(me, s) + +#define TRANSLATE_AND_UNESCAPE_TO_STD(s) \ + LYUnEscapeToLatinOne(me, s, TRUE) + +#define UNESCAPE_FIELDNAME_TO_STD(s) ; /* no-op (?) */ + +#endif /* NOTUSED_FOTEMODS */ + #endif /* !EXP_CHARTRANS */ #define CHECK_ID(code) LYCheckForID(me, present, value, (int)code) @@ -653,7 +693,7 @@ PRIVATE void HTML_start_element ARGS6( fprintf(stderr, " ca=%d\n", hashStyles[hcode].color); } - if (displayStyles[element_number].color > -1) /* actually set */ + if (displayStyles[element_number + STARTAT].color > -2) /* actually set */ { if (TRACE) fprintf(stderr, "CSSTRIM: start_element: top <%s>\n", HTML_dtd.tags[element_number].name); @@ -1085,7 +1125,7 @@ PRIVATE void HTML_start_element ARGS6( HText_beginAnchor(me->text, me->inUnderline, me->CurrentA); if (me->inBoldH == FALSE) HText_appendCharacter(me->text, LY_BOLD_START_CHAR); -#if USE_COLOR_STYLE +#ifdef USE_COLOR_STYLE if (present && present[HTML_LINK_CLASS] && value && *value[HTML_LINK_CLASS]!='\0') { @@ -1823,6 +1863,7 @@ PRIVATE void HTML_start_element ARGS6( */ target = ((1.0 * atoi(value[HTML_TAB_INDENT])) / enval) + 0.5; } + FREE(temp); /* * If we are being directed to a column too far to the left * or right, just add a collapsible space, otherwise, add the @@ -4936,10 +4977,13 @@ PRIVATE void HTML_start_element ARGS6( StrAllocCopy(me->textarea_name, ""); } - if (present && present[HTML_TEXTAREA_ACCEPT_CHARSET] && - value[HTML_TEXTAREA_ACCEPT_CHARSET]) { - StrAllocCopy(me->textarea_accept_cs, value[HTML_TEXTAREA_ACCEPT_CHARSET]); - TRANSLATE_AND_UNESCAPE_TO_STD(&me->textarea_accept_cs); + if (present && present[HTML_TEXTAREA_ACCEPT_CHARSET]) { + if (value[HTML_TEXTAREA_ACCEPT_CHARSET]) { + StrAllocCopy(me->textarea_accept_cs, value[HTML_TEXTAREA_ACCEPT_CHARSET]); + TRANSLATE_AND_UNESCAPE_TO_STD(&me->textarea_accept_cs); + } else { + StrAllocCopy(me->textarea_accept_cs, "UNKNOWN"); + } } else { FREE(me->textarea_accept_cs); } @@ -7030,7 +7074,9 @@ End_Object: /* reset the prevailing class to the previous one */ { char *dot=strrchr(Style_className,'.'); - strcpy (prevailing_class, dot ? (char*)(dot+1) : ""); + LYstrncpy(prevailing_class, + dot ? (char*)(dot+1) : "", + (TEMPSTRINGSIZE - 1)); } #endif } @@ -7326,7 +7372,7 @@ PRIVATE void get_styles NOARGS */ PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */ { - "text/html", + "Lynx_HTML_Handler", HTML_free, HTML_abort, HTML_put_character, HTML_put_string, HTML_write, @@ -7500,6 +7546,12 @@ PUBLIC HTStructured* HTML_new ARGS3( me->comment_start = NULL; me->comment_end = NULL; +#ifdef USE_COLOR_STYLE + Style_className[0] = '\0'; + class_string[0] = '\0'; + prevailing_class[0] = '\0'; +#endif + #ifdef EXP_CHARTRANS LYGetChartransInfo(me); UCTransParams_clear(&me->T); diff --git a/src/LYCharSets.c b/src/LYCharSets.c index 41b5a138..17fdd3af 100644 --- a/src/LYCharSets.c +++ b/src/LYCharSets.c @@ -13,6 +13,7 @@ #include "HTFont.h" #include "GridText.h" #include "LYCurses.h" +#include "LYStrings.h" #include "LYexit.h" #include "LYLeaks.h" @@ -2314,6 +2315,8 @@ PUBLIC void HTMLSetCharacterHandling ARGS1(int,i) } #endif /* USE_SLANG */ + ena_csi((LYlowest_eightbit[current_char_set] > 155)); + return; } diff --git a/src/LYCharUtils.c b/src/LYCharUtils.c index b25b30ac..25ca634d 100644 --- a/src/LYCharUtils.c +++ b/src/LYCharUtils.c @@ -4,6 +4,7 @@ */ #include "HTUtils.h" #include "tcp.h" +#include "SGML.h" #define Lynx_HTML_Handler #include "HTChunk.h" @@ -351,8 +352,9 @@ PUBLIC void LYFillLocalFileURL ARGS2( * directory listing for the current default. - FM */ if (!strcmp(*href, "file://localhost")) { + char *temp2; #ifdef VMS - StrAllocCat(*href, HTVMS_wwwName(getenv("PATH"))); + temp2 = HTVMS_wwwName(getenv("PATH")); #else char curdir[DIRNAMESIZE]; #if HAVE_GETCWD @@ -361,11 +363,24 @@ PUBLIC void LYFillLocalFileURL ARGS2( getwd (curdir); #endif /* NO_GETCWD */ #ifdef DOSPATH - StrAllocCat(*href, HTDOS_wwwName(curdir)); + temp2 = HTDOS_wwwName(curdir); #else - StrAllocCat(*href, curdir); + temp2 = curdir; #endif /* DOSPATH */ #endif /* VMS */ + if (temp2[0] != '/') + StrAllocCat(*href, "/"); + /* + * Check for pathological cases - current dir has chars which + * MUST BE URL-escaped - kw + */ + if (strchr(temp2, '%') != NULL || strchr(temp2, '#') != NULL) { + FREE(temp); + temp = HTEscape(temp2, URL_PATH); + StrAllocCat(*href, temp); + } else { + StrAllocCat(*href, temp2); + } } #ifdef VMS @@ -823,85 +838,6 @@ PUBLIC void LYGetChartransInfo ARGS1( #endif /* EXP_CHARTRANS */ -/* -** This function reallocates an allocated string with -** 8-bit printable Latin characters (>= 160) converted -** to their HTML entity names and then translated for -** the current character set. - FM -*/ -PUBLIC void LYExpandString_old ARGS1( - char **, str) -{ - char *p = *str; - char *q = *str; - CONST char *name; - int i, j, value, high, low, diff = 0; - - /* - ** Don't do anything if we have no string - ** or are in CJK mode. - FM - */ - if (!p || *p == '\0' || - HTCJK != NOCJK) - return; - - /* - ** Start a clean copy of the string, without - ** invalidating our pointer to the original. - FM - */ - *str = NULL; - StrAllocCopy(*str, ""); - - /* - ** Check each character in the original string, - ** and add the characters or substitutions to - ** our clean copy. - FM - */ - for (i = 0; p[i]; i++) { - /* - ** Substitute Lynx special character for - ** 160 (nbsp) if HTPassHighCtrlRaw is not - ** set. - FM - */ - if (((unsigned char)p[i]) == 160 && - !HTPassHighCtrlRaw) { - p[i] = HT_NON_BREAK_SPACE; - /* - ** Substitute Lynx special character for - ** 173 (shy) if HTPassHighCtrlRaw is not - ** set. - FM - */ - } else if (((unsigned char)p[i]) == 173 && - !HTPassHighCtrlRaw) { - p[i] = LY_SOFT_HYPHEN; - /* - ** Substitute other 8-bit characters based on - ** the LYCharsets.c tables if HTPassEightBitRaw - ** is not set. - FM - */ - } else if (((unsigned char)p[i]) > 160 && - !HTPassEightBitRaw) { - value = (int)(((unsigned char)p[i]) - 160); - p[i] = '\0'; - StrAllocCat(*str, q); - q = &p[i+1]; - name = HTMLGetEntityName(value); - for (low = 0, high = HTML_dtd.number_of_entities; - high > low; - diff < 0 ? (low = j+1) : (high = j)) { - /* Binary search */ - j = (low + (high-low)/2); - diff = strcmp(HTML_dtd.entity_names[j], name); - if (diff == 0) { - StrAllocCat(*str, p_entity_values[j]); - break; - } - } - } - } - StrAllocCat(*str, q); - free_and_clear(&p); -} /* ** Get UCS character code for one character from UTF-8 encoded string. @@ -1017,35 +953,83 @@ PRIVATE char * UCPutUtf8ToBuffer ARGS3(char *, q, UCode_t, code, BOOL, terminate } } + /* as in HTParse.c, saves some calls - kw */ PRIVATE char *hex = "0123456789ABCDEF"; /* -** This function converts HTML named entities within a string -** to their translations in the active LYCharSets.c array. -** It also converts numeric entities to their HTML entity names -** and then similarly translates those. The string is converted -** in place, on the assumption that the conversion strings will -** not be longer than the entity strings, such that the overall -** string will never grow. This assumption is true for the -** current LYCharSets arrays. Make sure it stays true! If -** plain_space is TRUE, nbsp (160) will be treated as an ASCII +** This function translates a string from charset +** `cs_from' to charset `cs_to', reallocating it if necessary. +** If `do_ent' is YES, it also converts HTML named entities +** and numeric character references (NCRs) to their `cs_to' +** replacements. +** If plain_space is TRUE, nbsp (160) will be treated as an ASCII ** space (32). If hidden is TRUE, entities will be translated -** but escape sequences will be passed unaltered. - FM +** (if `do_ent' is YES) but escape sequences will be passed unaltered. +** If `hidden' is FALSE, some characters are converted to Lynx special +** codes (160, 173, .. @@ need list @@) (or ASCII space if `plain_space' +** applies). @@ is `use_lynx_specials' needed, does it have any effect? @@ +** If `use_lynx_specials' is YES, translate byte values 160 and 173 +** meaning U+00A0 and U+00AD given as or converted from raw char input +** are converted to HT_NON_BREAK_SPACE and LY_SOFT_HYPHEN, respectively +** (unless input and output charset are both iso-8859-1, for compatibility +** with previous usage in HTML.c) even if `hidden' or `plain_space' is set. +** +** If `Back' is YES, the reverse is done instead i.e. Lynx special codes +** in the input are translated back to character values. +** +** If `Back' is YES, an attempt is made to use UCReverseTransChar() for +** back translation which may be more efficient. (?) +** +** Named entities may be converted to their translations in the +** active LYCharSets.c array for `cs_out' or looked up as a Unicode +** value which is then passed to the chartrans functions (see UCdomap.c). +** @@ order? @@ +** NCRs with values in the ISO-8859-1 range 160-255 may be converted +** to their HTML entity names and then translated according to the +** LYCharSets.c array for `cs_out', in general NCRs are translated +** by UCdomap.c chartrans functions if necessary. +** +** If `stype' is st_URL, non-ASCII characters are URL-encoded instead. +** The sequence of bytes being URL-encoded is the raw input character if +** we couldn't transtate it from `cs_in' (CJK etc.); otherwise it is the +** UTF-8 representation if either `cs_to' requires this or if the +** character's Unicode value is > 255, otherwise it should be the iso-8859-1 +** representation. +** No general URL-encoding occurs for displayable ASCII characters and +** spaces and some C0 controls valid in HTML (LF, TAB), it is expected +** that other functions will take care of that as appropriate. +** +** Escape characters (0x1B, '\033') are +** - URL-encoded if `stype' is st_URL, otherwise +** - dropped if `stype' is st_other, otherwise (i.e. st_HTML) +** - passed if `hidden' is TRUE or HTCJK is set, otherwise +** - dropped. */ -PRIVATE char ** LYUnEscapeEntities ARGS5( +/* +** Returns pointer to the char** passed in +** if string translated or translation unnecessary, +** NULL otherwise +** (in which case something probably went wrong.) +*/ + +PRIVATE char ** LYUCFullyTranslateString_1 ARGS9( char **, str, + int, cs_from, int, cs_to, + BOOLEAN, do_ent, + BOOL, use_lynx_specials, BOOLEAN, plain_space, BOOLEAN, hidden, + BOOL, Back, CharUtil_st, stype) { char * p; - char *q, *Str; + char *q, *qs; + HTChunk *chunk = NULL; char * cp; char cpe; char *esc = NULL; - char buf[2]; - char replace_buf[61]; + char replace_buf[64]; int uck; int lowest_8; UCode_t code; @@ -1056,6 +1040,10 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( CONST char ** entities = HTML_dtd.entity_names; CONST UC_entity_info * extra_entities = HTML_dtd.extra_entity_info; CONST char * name; + BOOLEAN no_bytetrans; + UCTransParams T; + BOOL from_is_utf8; + char * puni; enum _state { S_text, S_esc, S_dollar, S_paren, S_nonascii_text, S_dollar_paren, S_trans_byte, S_check_ent, S_ncr, S_check_uni, S_check_name, S_named, @@ -1072,40 +1060,92 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( if (!str || *str == NULL || **str == '\0') return str; /* + ** Don't do byte translation + ** if original AND target character sets + ** are both iso-8859-1, + ** or if we are in CJK mode. + */ + no_bytetrans = ((cs_to <= 0 && cs_from == cs_to) || + HTCJK != NOCJK); + + /* No need to translate or examine the string any further */ + if (!no_bytetrans) + no_bytetrans = (!use_lynx_specials && !Back && + UCNeedNotTranslate(cs_from, cs_to)); + + /* ** Save malloc/calloc overhead in simple case - kw */ - if (hidden && (stype != st_URL) && (strchr(*str, '&') == NULL)) + if (do_ent && hidden && (stype != st_URL) && (strchr(*str, '&') == NULL)) + do_ent = FALSE; + + /* Can't do, caller should figure out what to do... */ + if (!UCCanTranslateFromTo(cs_from, cs_to)) { + if (cs_to < 0) + return NULL; + if (!do_ent && no_bytetrans) + return NULL; + no_bytetrans = TRUE; + } else if (cs_to < 0) { + do_ent = FALSE; + } + + if (!do_ent && no_bytetrans) return str; p = *str; - if (cs_to < 0) - return NULL; - output_utf8 = (LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8 || - HText_hasUTF8OutputSet(HTMainText)); -#if 0 - cs_to = current_char_set; /* for now */ - lowest_8 = LYlowest_eightbit[HTAnchor_getUCLYhndl( - me->node_anchor, - UCT_STAGE_STRUCTURED)]; - repl_translated_C0 = me->T.repl_translated_C0; -#endif + if (!no_bytetrans) { + UCTransParams_clear(&T); + UCSetTransParams(&T, cs_from, &LYCharSet_UC[cs_from], + cs_to, &LYCharSet_UC[cs_to]); + from_is_utf8 = (LYCharSet_UC[cs_from].enc == UCT_ENC_UTF8); + output_utf8 = T.output_utf8; + repl_translated_C0 = T.repl_translated_C0; + puni = p; + } else if (do_ent) { + output_utf8 = (LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8 || + HText_hasUTF8OutputSet(HTMainText)); + repl_translated_C0 = (LYCharSet_UC[cs_to].enc == UCT_ENC_8BIT_C0); + } + lowest_8 = LYlowest_eightbit[cs_to]; - repl_translated_C0 = (LYCharSet_UC[cs_to].enc == UCT_ENC_8BIT_C0); /* ** Create a buffer string seven times the length of the original, ** so we have plenty of room for expansions. - FM */ +#ifdef OLDSTUFF len = (strlen(p) * 7) + 1; if (len < 16) len = 16; if ((Str = (char *)calloc(1, len)) == NULL) { fprintf(stderr, - "LYUnEscapeEntities: calloc(1, %lu) failed for '%s'\r\n", + "LYUCFullyTranslateString_1: calloc(1, %lu) failed for '%s'\r\n", (unsigned long)len, *str); - outofmem(__FILE__, "LYUnEscapeEntities"); + outofmem(__FILE__, "LYUCFullyTranslateString_1"); } q = Str; +#else + len = strlen(p) + 16; + q = p; +#endif /* OLDSTUFF */ + + qs = q; + +/* Create the HTChunk only if we need it */ +#define CHUNK (chunk ? chunk : (chunk = HTChunkCreate2(128, len+1))) + +#define REPLACE_STRING(s) \ + if (q != qs) HTChunkPutb(CHUNK, qs, q-qs); \ + HTChunkPuts(CHUNK, s); \ + qs = q = *str + +#define REPLACE_CHAR(c) if (q > p) { \ + HTChunkPutb(CHUNK, qs, q-qs); \ + qs = q = *str; \ + *q++ = c; \ + } else \ + *q++ = c /* * Loop through string, making conversions as needed. @@ -1123,7 +1163,7 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( if ((HTCJK != NOCJK && !hidden) || stype != st_HTML) { state = S_esc; if (stype == st_URL) { - *q++ = '%'; *q++ = '1'; *q++ = 'B'; + REPLACE_STRING("%1B"); p++; continue; } else if (stype != st_HTML) { @@ -1140,10 +1180,10 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( */ state = S_next_char; } else { - state = S_check_ent; + state = S_trans_byte; } } else { - state = S_check_ent; + state = (do_ent ? S_check_ent : S_trans_byte); } break; @@ -1203,7 +1243,7 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( if ((HTCJK != NOCJK && !hidden) || stype != st_HTML) { state = S_esc; if (stype == st_URL) { - *q++ = '%'; *q++ = '1'; *q++ = 'B'; + REPLACE_STRING("%1B"); p++; continue; } else if (stype != st_HTML) { @@ -1216,8 +1256,107 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( continue; case S_trans_byte: - /* character translation could go here */ - state = S_got_outchar; + /* character translation goes here */ + /* + ** Don't do anything if we have no string, + ** or if original AND target character sets + ** are both iso-8859-1, + ** or if we are in CJK mode. + */ + if (*p == '\0' || no_bytetrans) { + state = S_got_outchar; + break; + } + + if (Back) { + int rev_c; + if ((*p) == HT_NON_BREAK_SPACE || + (*p) == HT_EM_SPACE) { + if (plain_space) { + code = *p = ' '; + state = S_got_outchar; + break; + } else { + *p = 160; + code = 160; + if (LYCharSet_UC[cs_to].enc == UCT_ENC_8859 || + (LYCharSet_UC[cs_to].like8859 & UCT_R_8859SPECL)) { + state = S_got_outchar; + break; + } + } + } else if ((*p) == LY_SOFT_HYPHEN) { + *p = 173; + code = 173; + if (LYCharSet_UC[cs_to].enc == UCT_ENC_8859 || + (LYCharSet_UC[cs_to].like8859 & UCT_R_8859SPECL)) { + state = S_got_outchar; + break; + } + } else if (code < 127 || T.transp) { + state = S_got_outchar; + break; + } + rev_c = UCReverseTransChar(*p, cs_to, cs_from); + if (rev_c > 127) { + *p = rev_c; + code = rev_c; + state = S_got_outchar; + break; + } + } else if (code < 127) { + state = S_got_outchar; + break; + } + + if (from_is_utf8) { + if (((*p)&0xc0)==0xc0) { + puni = p; + code = UCGetUniFromUtf8String(&puni); + if (code <= 0) { + code = (unsigned char)(*p); + } else { + what = P_utf8; + } + } + } else if (use_lynx_specials && !Back && + (code == 160 || code == 173) && + (LYCharSet_UC[cs_from].enc == UCT_ENC_8859 || + (LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) { + if (code == 160) + code = *p = HT_NON_BREAK_SPACE; + else if (code == 173) + code = *p = LY_SOFT_HYPHEN; + state = S_got_outchar; + break; + } else if (T.trans_to_uni) { + code = UCTransToUni(*p, cs_from); + if (code <= 0) { + /* What else can we do? */ + code = (unsigned char)(*p); + } + } else if (T.strip_raw_char_in && + (unsigned char)(*p) >= 0xc0 && + (unsigned char)(*p) < 255) { + code = ((*p & 0x7f)); + state = S_got_outchar; + break; + } else if (!T.trans_from_uni) { + state = S_got_outchar; + break; + } + /* + ** Substitute Lynx special character for + ** 160 (nbsp) if use_lynx_specials is set. + */ + if (use_lynx_specials && !Back && + (code == 160 || code == 173)) { + code = ((code==160 ? HT_NON_BREAK_SPACE : LY_SOFT_HYPHEN)); + state = S_got_outchar; + break; + } + + state = S_check_uni; break; case S_check_ent: @@ -1281,11 +1420,13 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( sscanf(cp, "%ld", &lcode)) != 1) || lcode > 0x7fffffffL || lcode < 0) { state = S_recover; + break; } else { code = lcode; state = S_check_uni; } break; + case S_check_uni: /* ** Show the numeric entity if the value: @@ -1503,11 +1644,28 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( if (cpe != '\0') *(p-1) = cpe; p = cp; + state = S_done; } else if (what == P_named) { *cp = cpe; *q++ = '&'; + state = S_done; + } else { + if (T.strip_raw_char_in && + (unsigned char)(*p) >= 0xc0 && + (unsigned char)(*p) < 255) { + code = (((*p) & 0x7f)); + state = S_got_outchar; + } else if (!T.output_utf8 && stype == st_HTML && !hidden && + !(HTPassEightBitRaw && + (unsigned char)(*p) >= lowest_8)) { + sprintf(replace_buf, "U%.2lX", code); + state = S_got_outstring; + } else { + puni = p; + code = (unsigned char)(*p); + state = S_got_outchar; + } } - state = S_done; break; case S_named: @@ -1661,16 +1819,9 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( } else if (what == P_named) { *cp = cpe; p = (*cp != ';') ? (cp - 1) : cp; + } else if (what == P_utf8) { + p = puni; } -#if 0 - if (what == P_decimal || what == P_hex) { - if (cpe != ';' && cpe != '\0') - *(--p) = cpe; - } else if (what == P_named) { - *cp = cpe; - p = (*cp != ';') ? cp : (cp + 1); - } -#endif if (replace_buf[0] == '\0') { state = S_next_char; break; @@ -1682,14 +1833,12 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( state = S_put_urlstring; } } - for (i = 0; replace_buf[i]; i++) - *q++ = replace_buf[i]; + REPLACE_STRING(replace_buf); state = S_next_char; break; case S_put_urlstring: esc = HTEscape(replace_buf, URL_XALPHAS); - for (i = 0; esc[i]; i++) - *q++ = esc[i]; + REPLACE_STRING(esc); FREE(esc); state = S_next_char; break; @@ -1701,22 +1850,32 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( } else if (what == P_named) { *cp = cpe; p = (*cp != ';') ? (cp - 1) : cp; + } else if (what == P_utf8) { + p = puni; } - if (stype == st_URL) { + if (stype == st_URL && /* Not a full HTEscape, only for 8bit and ctrl chars */ - if (code >= 127 || - (code < 32 && (code != 9 && code != 10))) { + (code >= 127 || + (code < 32 && (code != 9 && code != 10)))) { state = S_put_urlchar; break; - } + } else if (!hidden && code == 10 && *p == 10 + && q != qs && *(q-1) == 13) { + /* + ** If this is not a hidden string, and the current char is + ** the LF ('\n') of a CRLF pair, drop the CR ('\r'). - KW + */ + *(q-1) = *p++; + state = S_done; + break; } *q++ = (char)code; state = S_next_char; break; case S_put_urlchar: *q++ = '%'; - *q++ = hex[(code >> 4) & 15]; - *q++ = hex[(code & 15)]; + REPLACE_CHAR(hex[(code >> 4) & 15]); + REPLACE_CHAR(hex[(code & 15)]); /* fall through */ case S_next_char: p++; /* fall through */ @@ -2644,344 +2803,32 @@ PRIVATE char ** LYUnEscapeEntities ARGS5( } } #endif /* 0 */ - - *q = '\0'; - if (stype == st_URL) { - LYTrimHead(Str); - LYTrimTail(Str); - } - StrAllocCopy(*str, Str); - FREE(Str); - return str; -} - -/* -** This function converts any named or numeric character -** references in allocated strings to their ISO-8858-1 -** values or to our substitutes if they are not part of -** that charset. If the isURL flag is TRUE, it also -** hex escapes ESC and any characters greater than 159, -** and trims any leading or trailing blanks. Otherwise, -** it strips out ESC, as would be done when the -** "ISO Latin 1" Character Set is selected. - FM -*/ -PRIVATE void LYUnEscapeToLatinOne_unused ARGS2( - char **, str, - CharUtil_st, stype) -{ - char *p = *str; - char *q = NULL; - char *url = NULL; - char *esc = NULL; - char buf[2]; - int e; - char *cp; - char cpe; - int len, value; - int high, low, diff = 0, i; - enum _state - { S_text, S_esc, S_dollar, S_paren, - S_nonascii_text, S_dollar_paren } state = S_text; - /* - ** Make sure we have a non-empty string. - FM - */ - if (!p || *p == '\0') - return; - buf[0] = buf[1] = '\0'; - - /* - ** If the isURL flag is TRUE, set up for hex escaping. - FM - */ - if (stype == st_URL) { - if ((url = (char *)calloc(1, ((strlen(p) * 3) + 1))) == NULL) { - outofmem(__FILE__, "LYUnEscapeToLatinOne"); + *q = '\0'; + if (chunk) { + HTChunkPutb(CHUNK, qs, q-qs + 1); /* also terminates */ + if (stype == st_URL) { + LYTrimHead(chunk->data); + LYTrimTail(chunk->data); } - q = url; + StrAllocCopy(*str, chunk->data); + HTChunkFree(chunk); } else { - q = p; - } - - /* - ** Loop through string, making conversions as needed. - FM - */ - while (*p) { - /* - ** Handle any CJK escape sequences. - FM - */ - switch(state) { - case S_text: - if (*p == '\033') { - state = S_esc; - if (stype == st_URL) { - buf[0] = *p; - esc = HTEscape(buf, URL_XALPHAS); - for (e = 0; esc[e]; e++) - *q++ = esc[e]; - FREE(esc); - } - p++; - continue; - } - break; - - case S_esc: - if (*p == '$') { - state = S_dollar; - *q++ = *p++; - continue; - } else if (*p == '(') { - state = S_paren; - *q++ = *p++; - continue; - } else { - state = S_text; - } - - case S_dollar: - if (*p == '@' || *p == 'B' || *p == 'A') { - state = S_nonascii_text; - *q++ = *p++; - continue; - } else if (*p == '(') { - state = S_dollar_paren; - *q++ = *p++; - continue; - } else { - state = S_text; - } - break; - - case S_dollar_paren: - if (*p == 'C') { - state = S_nonascii_text; - *q++ = *p++; - continue; - } else { - state = S_text; - } - break; - - case S_paren: - if (*p == 'B' || *p == 'J' || *p == 'T') { - state = S_text; - *q++ = *p++; - continue; - } else if (*p == 'I') { - state = S_nonascii_text; - *q++ = *p++; - continue; - } else { - state = S_text; - } - break; - - case S_nonascii_text: - if (*p == '\033') { - state = S_esc; - if (stype == st_URL) { - buf[0] = *p; - esc = HTEscape(buf, URL_XALPHAS); - for (e = 0; esc[e]; e++) - *q++ = esc[e]; - FREE(esc); - } - p++; - } else { - *q++ = *p++; - } - continue; - break; - - default: - p++; - continue; + if (stype == st_URL) { + LYTrimHead(qs); + LYTrimTail(qs); } - - /* - ** Check for a numeric or named entity. - FM - */ - if (*p == '&') { - p++; - len = strlen(p); - /* - ** Check for a numeric entity. - FM - */ - if (*p == '#' && len > 2 && - (unsigned char)*(p+1) < 127 && - isdigit((unsigned char)*(p+1))) { - cp = ++p; - while (*p && (unsigned char)*p < 127 && - isdigit((unsigned char)*p)) - p++; - cpe = *p; - if (*p) - *p++ = '\0'; - /* - ** Show the numeric entity if the value: - ** (1) Is greater than 255 (until we support Unicode). - ** (2) Is less than 32 and not valid. - ** (3) Is 127. - ** (4) Is 128 - 159. - */ - if ((sscanf(cp, "%d", &value) != 1) || - (value > 255) || - (value < 32 && - value != 9 && value != 10 && value != 13) || - (value == 127) || - (value > 127 && value < 160)) { - /* - ** Illegal or not yet handled value. - ** Recover the "&#" and continue - ** from there. - FM - */ - *q++ = '&'; - *q++ = '#'; - if (cpe != '\0') - *(p-1) = cpe; - p = cp; - continue; - } - /* - ** Convert the value as an unsigned char, - ** hex escaped if isURL is set and it's - ** 8-bit, and then recycle the terminator - ** if it is not a semicolon. - FM - */ - if (value > 159 && stype == st_URL) { - buf[0] = value; - esc = HTEscape(buf, URL_XALPHAS); - for (e = 0; esc[e]; e++) - *q++ = esc[e]; - FREE(esc); - } else { - *q++ = (unsigned char)value; - } - if (cpe != ';' && cpe != '\0') { - p--; - *p = cpe; - } - continue; - /* - ** Check for a named entity. - FM - */ - } else if ((unsigned char)*p < 127 && - isalnum((unsigned char)*p)) { - cp = p; - while (*cp && (unsigned char)*cp < 127 && - isalnum((unsigned char)*cp)) - cp++; - cpe = *cp; - *cp = '\0'; - for (low = 0, high = HTML_dtd.number_of_entities; - high > low ; - diff < 0 ? (low = i+1) : (high = i)) { - /* - ** Binary search. - */ - i = (low + (high-low)/2); - diff = strcmp(HTML_dtd.entity_names[i], p); - if (diff == 0) { - /* - ** Found the entity. Convert it to - ** an ISO-8859-1 character, or our - ** substitute for any non-ISO-8859-1 - ** character, hex escaped if isURL - ** is set and it's 8-bit. - FM - */ - buf[0] = HTMLGetLatinOneValue(i); - if (buf[0] == '\0') { - /* - ** The entity does not have an 8859-1 - ** representation of exactly one char length. - ** Try to deal with it anyway - either HTEscape - ** the whole mess, or pass through raw. So - ** make sure the ISO_Latin1 table, which is the - ** first table in LYCharSets, has reasonable - ** substitution strings! (if it really must - ** have any longer than one char) - KW - */ - if (!LYCharSets[0][i][0]) { - /* - ** Totally empty, skip. - KW - */ - ; /* do nothing */ - } else if (stype == st_URL) { - /* - ** All will be HTEscape'd. - KW - */ - esc = HTEscape(LYCharSets[0][i], URL_XALPHAS); - for (e = 0; esc[e]; e++) - *q++ = esc[e]; - FREE(esc); - } else { - /* - ** Nothing will be HTEscape'd. - KW - */ - for (e = 0; LYCharSets[0][i][e]; e++) { - *q++ = - (unsigned char)(LYCharSets[0][i][e]); - } - } - } else if ((unsigned char)buf[0] > 159 && - stype == st_URL) { - esc = HTEscape(buf, URL_XALPHAS); - for (e = 0; esc[e]; e++) - *q++ = esc[e]; - FREE(esc); - } else { - *q++ = buf[0]; - } - /* - ** Recycle the terminator if it isn't - ** the standard ';' for HTML. - FM - */ - *cp = cpe; - if (*cp != ';') - p = cp; - else - p = (cp+1); - break; - } - } - *cp = cpe; - if (diff != 0) { - /* - ** Entity not found. Add the '&' and - ** continue processing from there. - FM - */ - *q++ = '&'; - } - continue; - /* - ** If we get to here, it's a raw ampersand. - FM - */ - } else { - *q++ = '&'; - continue; - } - /* - ** Not an entity. Use the character. - FM - */ - } else { - *q++ = *p++; - } - } - - /* - ** Clean up and return. - FM - */ - *q = '\0'; - if (stype == st_URL) { - LYTrimHead(url); - LYTrimTail(url); - StrAllocCopy(*str, url); - FREE(url); } + return str; } +#undef REPLACE_CHAR +#undef REPLACE_STRING + +#ifdef OLDSTUFF + /* -** This is a generalized version of LYExpandString. +** This is a generalized version of what was previously LYExpandString. ** ** This function translates a string from charset ** cs_from to charset cs_to, reallocating it if necessary. @@ -3208,6 +3055,8 @@ PRIVATE BOOL LYUCTranslateString ARGS7( return YES; } +#endif /* OLDSTUFF */ + PUBLIC BOOL LYUCFullyTranslateString ARGS7( char **, str, int, cs_from, @@ -3219,12 +3068,16 @@ PUBLIC BOOL LYUCFullyTranslateString ARGS7( { BOOL ret = YES; /* May reallocate *str even if cs_to == 0 */ +#ifdef OLDSTUFF if (!LYUCTranslateString(str, cs_from, cs_to, use_lynx_specials, FALSE, NO, stype)) { LYExpandString_old(str); ret = NO; } +#endif - if (!LYUnEscapeEntities(str, cs_to, plain_space, hidden, stype)) { + if (!LYUCFullyTranslateString_1(str, cs_from, cs_to, TRUE, + use_lynx_specials, plain_space, hidden, + NO, stype)) { ret = NO; } return ret; @@ -3236,29 +3089,17 @@ PUBLIC BOOL LYUCTranslateBackFormData ARGS4( int, cs_to, BOOLEAN, plain_space) { - /* May reallocate *str even if cs_to == 0 */ + char ** ret; + /* May reallocate *str */ +#ifdef OLDSTUFF return (LYUCTranslateString(str, cs_from, cs_to, NO, plain_space, YES, st_HTML)); - -} - -#ifdef NOTUSED -PUBLIC BOOL LYUCFullyTranslateString ARGS6( - char **, str, - int, cs_from, - int, cs_to, - BOOL, use_lynx_specials, - BOOLEAN, plain_space, - BOOLEAN, hidden) -{ - if (cs_to) { - if (!LYUCTranslateString(str, cs_from, cs_to, use_lynx_specials)) - LYExpandString_old(str); - } - /* Note that it is guaranteed that *str is not reallocated - if cs_to == 0 */ - LYUnEscapeEntities(*str, plain_space, hidden); +#else + ret = (LYUCFullyTranslateString_1(str, cs_from, cs_to, FALSE, + NO, plain_space, YES, + YES, st_HTML)); + return (ret != NULL); +#endif } -#endif /* NOTUSED */ /* ** This function processes META tags in HTML streams. - FM @@ -3478,6 +3319,7 @@ PUBLIC void LYHandleMETA ARGS4( } else if (!(me->node_anchor->charset && *me->node_anchor->charset) && !strcasecomp((http_equiv ? http_equiv : ""), "Content-Type")) { #ifdef EXP_CHARTRANS + LYUCcharset * p_in = NULL; LYUCFullyTranslateString(&content, me->tag_charset, me->tag_charset, NO, NO, YES, st_other); #else @@ -3541,14 +3383,13 @@ PUBLIC void LYHandleMETA ARGS4( } FREE(cp3); if (chartrans_ok) { - LYUCcharset * p_in = - HTAnchor_getUCInfoStage(me->node_anchor, - UCT_STAGE_PARSER); LYUCcharset * p_out = HTAnchor_setUCInfoStage(me->node_anchor, current_char_set, UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); + p_in = HTAnchor_getUCInfoStage(me->node_anchor, + UCT_STAGE_PARSER); if (!p_out) { /* * Try again. @@ -3658,7 +3499,7 @@ PUBLIC void LYHandleMETA ARGS4( /* * Set the kcode element based on the charset. - FM */ - HText_setKcode(me->text, me->node_anchor->charset); + HText_setKcode(me->text, me->node_anchor->charset, p_in); /* * Check for a Refresh directive. - FM @@ -4577,19 +4418,67 @@ PUBLIC void LYResetParagraphAlignment ARGS1( return; } +/* +** This is an example interface for accessing elements of the HTML +** parser's HTStructured object by the SGML parser. The SGML parser +** knows the HTML parsers's HTStructured objects as context->target +** elements, and, homologously, uses them as an argument in calls to +** HTML_start_element() and HTML_end_element(), but should not +** manipulate the context->target's elements, itself, because those +** are defined in the Lynx_HTML_Handler section of HTML.h, to which +** SGML.c is blind. This *example* function check's whether the +** node_anchor element of an HTML parser HTStructured object has +** an address with a file scheme, and if so, loads it into the +** the SGML parser's context->url element, which was passed as +** the second argument. The handle_comment() calling function in +** SGML.c then calls LYDoCSI() in LYUtils.c to insert HTML markup +** into the corresponding stream, homologously to an SSI by an +** HTTP server. - FM +** Since SGML.c is blind to the HTML parsers's HTStructured object +** details it may be calling this function inappropriately when +** its context->target isn't actually the HTStructured object +** implemented in HTML.c but one in HTMLDTD.c. Therefore this +** function checks for the expected HTStructuredClass's signature +** in me->isa->name before doing anything else which requires +** access to the expected HTStructured object's data fields, and +** returns a failure indication if it finds that it has been called +** inappropriately. - kw +** Functions such as this also could +** be used to set the values of elements (existing ones, or any +** new ones needed for future development) in the HTML parser's +** HTStructured objects, so that they will be accessible to all +** Lynx_HTML_Handler utility functions, thus avoiding the need +** to increase the number of arguments in the calls to those +** functions, and keeping them object-specific via their 'me' +** arguments. - FM +** But to generalize the SGML.c -> HTStructured calling mechanism +** so that it can deal with more than one implementation of what +** SGML.c sees as context->target, new functions will either have +** to check whether they are called for the expected kind of target +** object (such as here), or they have to come in several object- +** specific versions to work across all possible target object classes; +** in the latter case they should become new member functions of the +** Structured Object definition in SGML.h. - kw +*/ PUBLIC BOOLEAN LYCheckForCSI ARGS2( - HTParentAnchor *, anchor, + HTStructured *, me, char **, url) { - if (!(anchor && anchor->address)) + if (!me) + return FALSE; + + if (!me->isa || strcmp(me->isa->name, "Lynx_HTML_Handler")) + return FALSE; + + if (!(me->node_anchor && me->node_anchor->address)) return FALSE; - if (strncasecomp(anchor->address, "file:", 5)) + if (strncasecomp(me->node_anchor->address, "file:", 5)) return FALSE; - if (!LYisLocalHost(anchor->address)) + if (!LYisLocalHost(me->node_anchor->address)) return FALSE; - - StrAllocCopy(*url, anchor->address); + + StrAllocCopy(*url, me->node_anchor->address); return TRUE; } diff --git a/src/LYCharUtils.h b/src/LYCharUtils.h index c1c58f40..f276ad0a 100644 --- a/src/LYCharUtils.h +++ b/src/LYCharUtils.h @@ -109,7 +109,7 @@ extern void LYEnsureSingleSpace PARAMS(( extern void LYResetParagraphAlignment PARAMS(( HTStructured * me)); extern BOOLEAN LYCheckForCSI PARAMS(( - HTParentAnchor * anchor, + HTStructured * me, char ** url)); #endif /* Lynx_HTML_Handler */ diff --git a/src/LYCookie.c b/src/LYCookie.c index 284df163..ea1e25b1 100644 --- a/src/LYCookie.c +++ b/src/LYCookie.c @@ -63,6 +63,8 @@ #include "LYCharUtils.h" #include "LYCookie.h" +#include "LYLeaks.h" + #define FREE(x) if (x) {free(x); x = NULL;} /* diff --git a/src/LYCurses.c b/src/LYCurses.c index c3fc98ae..d134f4b9 100644 --- a/src/LYCurses.c +++ b/src/LYCurses.c @@ -200,6 +200,7 @@ PUBLIC void setHashStyle ARGS5(int,style,int,color,int,cattr,int,mono,char*,elem ds->cattr=cattr; ds->mono=mono; ds->code=style; + FREE(ds->name); ds->name=malloc(sizeof(char)*(strlen(element)+2)); strcpy(ds->name, element); } @@ -766,7 +767,7 @@ PUBLIC void lynx_enable_mouse ARGS1(int,state) button 1 is clicked */ #ifndef _WINDOWS if (state) - mousemask(BUTTON1_CLICKED, NULL); + mousemask(BUTTON1_CLICKED | BUTTON2_CLICKED, NULL); else mousemask(0, NULL); #else diff --git a/src/LYEditmap.c b/src/LYEditmap.c index 0ecfcbef..4cba958a 100644 --- a/src/LYEditmap.c +++ b/src/LYEditmap.c @@ -63,7 +63,7 @@ LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_DELP, -/* 80..99 ISO-8859-1 8-bit escape characters. */ +/* 80..9F ISO-8859-1 8-bit escape characters. */ LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, LYE_CHAR, diff --git a/src/LYForms.c b/src/LYForms.c index a92b0918..26f54f74 100644 --- a/src/LYForms.c +++ b/src/LYForms.c @@ -5,6 +5,7 @@ #include "HTAlert.h" #include "LYCurses.h" #include "GridText.h" +#include "LYCharSets.h" #include "LYUtils.h" #include "LYStructs.h" /* includes HTForms.h */ #include "LYStrings.h" @@ -355,7 +356,8 @@ again: action = EditBinding(ch); if (action == LYE_ENTER) break; - if (action == LYE_AIX && HTCJK == NOCJK) + if (action == LYE_AIX && + (HTCJK == NOCJK && LYlowest_eightbit[current_char_set] > 0x97)) break; if (action == LYE_TAB) { ch = (int)('\t'); @@ -456,6 +458,18 @@ breakfor: while ((p != form->value) && (p[-1] == ' ')) p--; *p = '\0'; + + /* + * If the field has been changed, assume that it is now in + * current display character set, even if for some reason + * it wasn't! Hopefully a user will only submit the form + * if the non-ASCII characters are displayed correctly, which + * means (assuming that the display character set has been set + * truthfully) the user confirms by changing the field that + * the character encoding is right. - kw + */ + if (*p) + form->value_cs = current_char_set; } return(ch); } diff --git a/src/LYGetFile.c b/src/LYGetFile.c index b29584aa..e8787cfd 100644 --- a/src/LYGetFile.c +++ b/src/LYGetFile.c @@ -106,6 +106,11 @@ PUBLIC BOOLEAN getfile ARGS1( LYCancelDownload = FALSE; } + /* + * Reset fake 'Z' to prevent unwanted delayed effect. - kw + */ + LYFakeZap(NO); + Try_Redirected_URL: /* * Load the WWWDoc struct in case we need to use it. @@ -824,7 +829,7 @@ Try_Redirected_URL: } StrAllocCat(use_this_url_instead, pound); } - if (TRACE) + if (TRACE && LYTraceLogFP == NULL) sleep(MessageSecs); _user_message(WWW_USING_MESSAGE, use_this_url_instead); @@ -963,7 +968,7 @@ Try_Redirected_URL: } } } else { - if (TRACE) + if (TRACE && LYTraceLogFP == NULL) sleep(MessageSecs); _user_message(WWW_BAD_ADDR_MESSAGE, doc->address); if (TRACE) @@ -1340,7 +1345,8 @@ PRIVATE int fix_http_urls ARGS1( doc->address[strlen(doc->address)-1] = '\0'; if (TRACE) { fprintf(stderr, " changed to '%s'\n", doc->address); - sleep(MessageSecs); + if (!LYTraceLogFP) + sleep(MessageSecs); } } @@ -1357,7 +1363,8 @@ PRIVATE int fix_http_urls ARGS1( StrAllocCat(doc->address, "/"); if (TRACE) { fprintf(stderr, " changed to '%s'\n",doc->address); - sleep(MessageSecs); + if (!LYTraceLogFP) + sleep(MessageSecs); } return(1); diff --git a/src/LYHash.c b/src/LYHash.c index 7fbb55ad..a6eea95f 100644 --- a/src/LYHash.c +++ b/src/LYHash.c @@ -10,9 +10,10 @@ #include "LYHash.h" -PUBLIC int hash_table[HASHSIZE]; /* 32K should be big enough */ - #ifdef NOT_USED + +PUBLIC int hash_table[CSHASHSIZE]; /* 32K should be big enough */ + PUBLIC int hash_code_rp ARGS1(char*,string) { char* hash_ptr = string; @@ -23,7 +24,7 @@ PUBLIC int hash_code_rp ARGS1(char*,string) hash_tmp ^= (((*hash_ptr)<<4) ^ ((*hash_ptr)<<12)); hash_tmp >>= 1; } - return (hash_tmp % HASHSIZE); + return (hash_tmp % CSHASHSIZE); } #endif @@ -32,7 +33,11 @@ PUBLIC int hash_code_rp ARGS1(char*,string) * but with a different value for HASH_SIZE. */ +#ifdef NOT_USED #define HASH_SIZE 8193 /* Arbitrary prime. Memory/speed tradeoff */ +#else +#define HASH_SIZE CSHASHSIZE +#endif PUBLIC int hash_code ARGS1 (char*, string) { diff --git a/src/LYHash.h b/src/LYHash.h index b466f2aa..00f8066c 100644 --- a/src/LYHash.h +++ b/src/LYHash.h @@ -11,16 +11,22 @@ struct _hashbucket { }; typedef struct _hashbucket bucket; - -#if !defined(HASHSIZE) -#define HASHSIZE 32768 + +#if !defined(CSHASHSIZE) +#ifdef NOT_USED +#define CSHASHSIZE 32768 +#else +#define CSHASHSIZE 8193 +#endif #endif #define NOSTYLE -1 -extern bucket hashStyles[HASHSIZE]; +extern bucket hashStyles[CSHASHSIZE]; extern int hash_code PARAMS((char* string)); -extern int hash_table[HASHSIZE]; /* 32K should be big enough */ +#ifdef NOT_USED +extern int hash_table[CSHASHSIZE]; /* 32K should be big enough */ +#endif extern int s_alink, s_a, s_status, s_label, s_value, s_high, diff --git a/src/LYKeymap.c b/src/LYKeymap.c index c47c3611..1ceef654 100644 --- a/src/LYKeymap.c +++ b/src/LYKeymap.c @@ -462,7 +462,7 @@ PRIVATE struct rmap revmap[] = { { "RAW_TOGGLE", "toggle raw 8-bit translations or CJK mode ON or OFF" }, { "COOKIE_JAR", "examine the Cookie Jar" }, { "F_LINK_NUM", "invoke the 'Follow link (or page) number:' prompt" }, -{ "SWITCH_DTD", "switch between old and new parsing of HTML" }, +{ "SWITCH_DTD", "switch between two ways of parsing HTML" }, #ifdef USE_EXTERNALS { "EXTERN", "run external program with url" }, #endif diff --git a/src/LYLeaks.c b/src/LYLeaks.c index fed79414..ee69621a 100644 --- a/src/LYLeaks.c +++ b/src/LYLeaks.c @@ -25,7 +25,8 @@ PRIVATE void AddToList PARAMS((AllocationList *ALp_new)); PRIVATE AllocationList *FindInList PARAMS((void *vp_find)); PRIVATE void RemoveFromList PARAMS((AllocationList *ALp_del)); -PUBLIC void LYLeaks NOARGS { +PUBLIC void LYLeaks NOARGS +{ /* * Purpose: Print a report of all memory left unallocated by * Lynx code or attempted unallocations on @@ -173,7 +174,8 @@ PUBLIC void LYLeaks NOARGS { } PUBLIC void *LYLeakMalloc ARGS3(size_t, st_bytes, CONST char *, cp_File, CONST - short, ssi_Line) { + short, ssi_Line) +{ /* * Purpose: Capture allocations using malloc (stdlib.h) and track * the information in a list. @@ -230,7 +232,8 @@ PUBLIC void *LYLeakMalloc ARGS3(size_t, st_bytes, CONST char *, cp_File, CONST } PUBLIC void *LYLeakCalloc ARGS4(size_t, st_number, size_t, st_bytes, CONST char - *, cp_File, CONST short, ssi_Line) { + *, cp_File, CONST short, ssi_Line) +{ /* * Purpose: Capture allocations by calloc (stdlib.h) and * save relevant information in a list. @@ -287,7 +290,8 @@ PUBLIC void *LYLeakCalloc ARGS4(size_t, st_number, size_t, st_bytes, CONST char } PUBLIC void *LYLeakRealloc ARGS4(void *, vp_Alloced, size_t, st_newBytes, CONST - char *, cp_File, CONST short, ssi_Line) { + char *, cp_File, CONST short, ssi_Line) +{ /* * Purpose: Capture any realloc (stdlib.h) calls in order to * properly keep track of our run time allocation @@ -378,7 +382,8 @@ PUBLIC void *LYLeakRealloc ARGS4(void *, vp_Alloced, size_t, st_newBytes, CONST } PUBLIC void LYLeakFree ARGS3(void *, vp_Alloced, CONST char *, cp_File, CONST - short, ssi_Line) { + short, ssi_Line) +{ /* * Purpose: Capture all requests to free information and also * remove items from the allocation list. @@ -441,7 +446,61 @@ PUBLIC void LYLeakFree ARGS3(void *, vp_Alloced, CONST char *, cp_File, CONST } } -PRIVATE void AddToList ARGS1(AllocationList *, ALp_new) { +/* Allocates a new copy of a string, and returns it. + * Tracks allocations by using other LYLeakFoo functions. + * Equivalent to HTSACopy in HTUtils.c - kw + * +*/ +PUBLIC char * LYLeakSACopy ARGS4( + char **, dest, + CONST char *, src, + CONST char *, cp_File, + CONST short, ssi_Line) +{ + if (*dest) { + LYLeakFree(*dest, cp_File, ssi_Line); + *dest = NULL; + } + if (src) { + *dest = (char *) LYLeakMalloc (strlen(src) + 1, cp_File, ssi_Line); + if (*dest == NULL) + outofmem(__FILE__, "LYLeakSACopy"); + strcpy (*dest, src); + } + return *dest; +} + +/* String Allocate and Concatenate. + * Tracks allocations by using other LYLeakFoo functions. + * Equivalent to HTSACat in HTUtils.c - kw +*/ +PUBLIC char * LYLeakSACat ARGS4( + char **, dest, + CONST char *, src, + CONST char *, cp_File, + CONST short, ssi_Line) +{ + if (src && *src) { + if (*dest) { + int length = strlen(*dest); + *dest = (char *)LYLeakRealloc(*dest, length + strlen(src) + 1, + cp_File, ssi_Line); + if (*dest == NULL) + outofmem(__FILE__, "LYLeakSACat"); + strcpy (*dest + length, src); + } else { + *dest = (char *)LYLeakMalloc(strlen(src) + 1, + cp_File, ssi_Line); + if (*dest == NULL) + outofmem(__FILE__, "LYLeakSACat"); + strcpy (*dest, src); + } + } + return *dest; +} + +PRIVATE void AddToList ARGS1(AllocationList *, ALp_new) +{ /* * Purpose: Add a new allocation item to the list. * Arguments: ALp_new The new item to add. @@ -460,7 +519,8 @@ PRIVATE void AddToList ARGS1(AllocationList *, ALp_new) { ALp_RunTimeAllocations = ALp_new; } -PRIVATE AllocationList *FindInList ARGS1(void *, vp_find) { +PRIVATE AllocationList *FindInList ARGS1(void *, vp_find) +{ /* * Purpose: Find the place in the list where vp_find is currently * tracked. @@ -491,7 +551,8 @@ PRIVATE AllocationList *FindInList ARGS1(void *, vp_find) { return(ALp_find); } -PRIVATE void RemoveFromList ARGS1(AllocationList *, ALp_del) { +PRIVATE void RemoveFromList ARGS1(AllocationList *, ALp_del) +{ /* * Purpose: Remove the specified item from the list. * Arguments: ALp_del The item to remove from the list. diff --git a/src/LYMain.c b/src/LYMain.c index 7f9c8800..bcb69040 100644 --- a/src/LYMain.c +++ b/src/LYMain.c @@ -390,6 +390,10 @@ PUBLIC BOOLEAN LYNoCore = NO_FORCED_CORE_DUMP; PRIVATE void FatalProblem PARAMS((int sig)); #endif /* !VMS */ +#if defined(USEHASH) + char *lynx_lss_file=NULL; +#endif + PRIVATE void free_lynx_globals NOARGS { int i; @@ -470,6 +474,9 @@ PRIVATE void free_lynx_globals NOARGS FREE(URLDomainSuffixes); FREE(XLoadImageCommand); FREE(LYTraceLogPath); +#if defined(USEHASH) + FREE(lynx_lss_file); +#endif for (i = 0; i < nlinks; i++) { FREE(links[i].lname); } @@ -478,10 +485,6 @@ PRIVATE void free_lynx_globals NOARGS return; } -#if defined(USEHASH) - char *lynx_lss_file=NULL; -#endif - /* * This function frees the LYStdinArgs list. - FM @@ -780,7 +783,7 @@ PUBLIC int main ARGS2( if (!LYValidate) parse_restrictions("default"); anon_restrictions_set = TRUE; - } else if (strcmp(argv[0], "-validate") == 0) { + } else if (strcmp(argv[i], "-validate") == 0) { /* * Follow only http URLs. */ @@ -796,6 +799,18 @@ PUBLIC int main ARGS2( StrAllocCopy(lynx_cfg_file, argv[i+1]); i++; } + +#if defined(USEHASH) + } else if (strncmp(argv[i], "-lss", 4) == 0) { + if ((cp=strchr(argv[i],'=')) != NULL) + StrAllocCopy(lynx_lss_file, cp+1); + else { + StrAllocCopy(lynx_lss_file, argv[i+1]); + i++; + } + fprintf(stderr, "LYMain found -lss flag, lss file is %s\n", + lynx_lss_file ? lynx_lss_file : "<NONE>"); +#endif } } @@ -863,6 +878,22 @@ PUBLIC int main ARGS2( if (*cp) StrAllocCopy(lynx_cfg_file, cp); } + #if defined(USEHASH) + } else if (strncmp(buf, "-lss", 4) == 0) { + if ((cp = strchr(buf,'=')) != NULL) { + StrAllocCopy(lynx_lss_file, cp+1); + } else { + cp = buf; + while (*cp && !isspace((unsigned char)*cp)) + cp++; + while (*cp && isspace((unsigned char)*cp)) + cp++; + if (*cp) + StrAllocCopy(lynx_cfg_file, cp); + } + fprintf(stderr, "LYMain found -lss flag, lss file is %s\n", + lynx_lss_file ? lynx_lss_file : "<NONE>"); +#endif } else if (strcmp(buf, "-get_data") == 0) { /* * User data for GET form. @@ -1134,9 +1165,6 @@ PUBLIC int main ARGS2( * Convert a '~' in the lynx-style file path to $HOME. */ if ((cp = strchr(lynx_lss_file, '~'))) { - char *temp = NULL; - int len; - *(cp++) = '\0'; StrAllocCopy(temp, lynx_lss_file); if ((len=strlen(temp)) > 0 && temp[len-1] == '/') @@ -1663,6 +1691,7 @@ PUBLIC int main ARGS2( if (display != NULL && *display != '\0') { LYisConfiguredForX = TRUE; } + ena_csi((LYlowest_eightbit[current_char_set] > 155)); status = mainloop(); cleanup(); } @@ -2143,17 +2172,6 @@ PRIVATE void parse_arg ARGS3( if (strncmp(argv[0], "-link", 5) == 0) { if (nextarg) ccount = atoi(cp); -#if defined(USEHASH) - } else if (strncmp(argv[0], "-lss", 4) == 0) { - if ((cp=strchr(argv[0],'=')) != NULL) - StrAllocCopy(lynx_lss_file, cp+1); - else { - StrAllocCopy(lynx_lss_file, argv[1]); - i++; - } - fprintf(stderr, "LYMain found -lss flag, lss file is %s\n", - lynx_lss_file ? lynx_lss_file : "<NONE>"); -#endif } else if (strncmp(argv[0], "-localhost", 10) == 0) { local_host_only = TRUE; @@ -2163,6 +2181,16 @@ PRIVATE void parse_arg ARGS3( local_exec_on_local_files = TRUE; #endif /* EXEC_LINKS || EXEC_SCRIPTS */ +#if defined(USEHASH) + } else if (strncmp(argv[0], "-lss", 4) == 0) { + /* + * Already read the alternate lynx-style file + * so just check whether we need to increment i + */ + if (nextarg) + ; /* do nothing */ +#endif + } else { goto Output_Error_and_Help_List; } @@ -2497,16 +2525,6 @@ PRIVATE void parse_arg ARGS3( HTAtom_for("www/download") : HTAtom_for("www/dump")); LYcols=999; -#if defined(USEHASH) - } else if (strncmp(argv[0], "-lss", 4) == 0) { - /* - * Already read the alternate lynx-style file - * so just check whether we need to increment i - */ - if (nextarg) - ; /* do nothing */ -#endif - } else if (strncmp(argv[0], "-stack_dump", 11) == 0) { stack_dump = TRUE; diff --git a/src/LYMainLoop.c b/src/LYMainLoop.c index 95166360..f541e0ba 100644 --- a/src/LYMainLoop.c +++ b/src/LYMainLoop.c @@ -853,8 +853,10 @@ try_again: break; } /* end switch */ - if (TRACE) - sleep(AlertSecs); /* allow me to look at the results */ + if (TRACE) { + if (!LYTraceLogFP || trace_mode_flag) + sleep(AlertSecs); /* allow me to look at the results */ + } /* * Set the files the same. @@ -1062,7 +1064,8 @@ try_again: } if (TRACE) { refresh_screen = TRUE; - sleep(AlertSecs); + if (!LYTraceLogFP || trace_mode_flag) + sleep(AlertSecs); } } @@ -1713,6 +1716,7 @@ new_cmd: /* } else { if (HText_getOwner()) StrAllocCopy(ownerS_address, HText_getOwner()); + LYUCPushAssumed(HTMainAnchor); HTOutputFormat = WWW_SOURCE; } HTuncache_current_document(); @@ -1906,9 +1910,7 @@ new_cmd: /* else New_DTD = YES; HTSwitchDTD(New_DTD); - _statusline(New_DTD ? - "Now using the experimental DTD!" : "Now using the old Lynx DTD."); -/* SOFT_DOUBLE_QUOTE_ON : SOFT_DOUBLE_QUOTE_OFF);*/ + _statusline(New_DTD ? USING_DTD_0 : USING_DTD_1); sleep(MessageSecs); break; @@ -1966,12 +1968,13 @@ new_cmd: /* case LYK_UP_TWO: if (Newline > 1) { - Newline -= 2; + int scrollamount = (Newline > 2 ? 2 : 1); + Newline -= scrollamount; if (nlinks > 0 && curdoc.link > -1) { - if (links[curdoc.link].ly <= (display_lines - 2)) { + if (links[curdoc.link].ly + scrollamount <= display_lines) { newdoc.link = curdoc.link + HText_LinksInLines(HTMainText, - Newline, 2); + Newline, scrollamount); } else { arrowup = TRUE; } @@ -2001,13 +2004,16 @@ new_cmd: /* case LYK_UP_HALF: if (Newline > 1) { - Newline -= display_lines/2; + int scrollamount = display_lines/2; + if (Newline - scrollamount < 1) + scrollamount = Newline - 1; + Newline -= scrollamount; if (nlinks > 0 && curdoc.link > -1) { - if (links[curdoc.link].ly <= (display_lines/2)) { + if (links[curdoc.link].ly + scrollamount <= display_lines) { newdoc.link = curdoc.link + HText_LinksInLines(HTMainText, Newline, - (display_lines/2)); + scrollamount); } else { arrowup = TRUE; } @@ -2084,8 +2090,18 @@ new_cmd: /* /* * Go back to the previous page. */ - Newline -= (display_lines); - arrowup = TRUE; + int scrollamount = (Newline > display_lines ? + display_lines : Newline - 1); + Newline -= scrollamount; + if (scrollamount < display_lines && + nlinks > 0 && curdoc.link == 0 && + links[0].ly - 1 + scrollamount <= display_lines) { + newdoc.link = HText_LinksInLines(HTMainText, + 1, + scrollamount) - 1; + } else { + arrowup = TRUE; + } } else if (old_c != real_c) { old_c = real_c; @@ -2138,7 +2154,11 @@ new_cmd: /* break; case LYK_UP_LINK: - if (curdoc.link > 0) { /* more links above? */ + if (curdoc.link > 0 && + (links[0].ly != links[curdoc.link].ly || + !HText_LinksInLines(HTMainText, 1, Newline - 1))) { + /* more links before this on screen, and first of them on + a different line or no previous links before this screen? */ int newlink = -1; for (i = curdoc.link; i >= 0; i--) { if (links[i].ly < links[curdoc.link].ly) { @@ -2155,14 +2175,13 @@ new_cmd: /* curdoc.link = (nlinks-1); } else if (more) { /* next page */ Newline += (display_lines); - } #else } else if (old_c != real_c) { old_c = real_c; _statusline(NO_LINKS_ABOVE); sleep(MessageSecs); - } #endif /* NOTDEFINED */ + } #ifdef NOTDEFINED /* @@ -2175,8 +2194,18 @@ new_cmd: /* #endif /* NOTDEFINED */ } else if (curdoc.line > 1 && Newline > 1) { /* previous page */ - Newline -= (display_lines); + int scrollamount = (Newline > display_lines ? + display_lines : Newline - 1); + Newline -= scrollamount; + if (scrollamount < display_lines && + nlinks > 0 && curdoc.link > -1 && + links[0].ly -1 + scrollamount <= display_lines) { + newdoc.link = HText_LinksInLines(HTMainText, + 1, + scrollamount) - 1; + } else { arrowup = TRUE; + } } else if (old_c != real_c) { old_c = real_c; diff --git a/src/LYOptions.c b/src/LYOptions.c index 8a03ec78..cbc4912b 100644 --- a/src/LYOptions.c +++ b/src/LYOptions.c @@ -271,7 +271,8 @@ draw_options: addstr(UCAssume_MIMEcharset); else addstr((UCLYhndl_for_unspec >= 0) ? - LYCharSet_UC[UCLYhndl_for_unspec].MIMEname : "NONE"); + (char *)LYCharSet_UC[UCLYhndl_for_unspec].MIMEname + : "NONE"); } move(L_Rawmode, 5); @@ -813,7 +814,8 @@ draw_options: move(L_ASSUME_CHARSET, COL_OPTION_VALUES); clrtoeol(); if (UCLYhndl_for_unspec >= 0) - addstr(LYCharSet_UC[UCLYhndl_for_unspec].MIMEname); + addstr((char *) + LYCharSet_UC[UCLYhndl_for_unspec].MIMEname); #endif /* VMS || USE_SLANG */ } diff --git a/src/LYStrings.c b/src/LYStrings.c index dd07fe98..6d9a68c7 100644 --- a/src/LYStrings.c +++ b/src/LYStrings.c @@ -12,6 +12,7 @@ #include "LYMail.h" #include "LYNews.h" #include "LYOptions.h" +#include "LYCharSets.h" #include <ctype.h> @@ -261,7 +262,7 @@ PRIVATE int sl_parse_mouse_event ARGS3(int *, x, int *, y, int *, button) } #endif -#ifdef USE_SLANG_MOUSE +#if defined(USE_SLANG_MOUSE) || defined(NCURSES_MOUSE_VERSION) PRIVATE int map_function_to_key ARGS1(char, keysym) { int i; @@ -278,6 +279,12 @@ PRIVATE int map_function_to_key ARGS1(char, keysym) } #endif +PRIVATE BOOLEAN csi_is_csi = TRUE; +PUBLIC void ena_csi ARGS1( + BOOLEAN, flag) +{ + csi_is_csi = flag; +} /* * LYgetch() translates some escape sequences and may fake noecho. */ @@ -371,7 +378,7 @@ re_read: } #endif /* USE_SLANG */ - if (c == 27 || c == 155) { /* handle escape sequence */ + if (c == 27 || (csi_is_csi && c == 155)) { /* handle escape sequence */ b = GetChar(); if (b == '[' || b == 'O') { @@ -499,7 +506,8 @@ re_read: default: if (TRACE) { fprintf(stderr,"Unknown key sequence: %d:%d:%d\n",c,b,a); - sleep(MessageSecs); + if (!LYTraceLogFP) + sleep(MessageSecs); } } if (isdigit(a) && (b == '[' || c == 155) && d != -1 && d != '~') @@ -585,6 +593,8 @@ re_read: err=getmouse(&event); if (event.bstate & BUTTON1_CLICKED) { c = set_clicked_link(event.x, event.y); + } else if (event.bstate & BUTTON2_CLICKED) { + c = map_function_to_key (LYK_PREV_DOC); } #else /* pdcurses version */ int left,right; @@ -718,10 +728,11 @@ PUBLIC int LYEdit1 ARGS4( case LYE_AIX: /* * Hex 97. - * Fall through as a character for CJK. + * Fall through as a character for CJK, or if this is a valid + * character in the current display character set. * Otherwise, we treat this as LYE_ENTER. */ - if (HTCJK == NOCJK) + if (HTCJK == NOCJK && LYlowest_eightbit[current_char_set] > 0x97) return(ch); case LYE_CHAR: /* @@ -930,7 +941,10 @@ PUBLIC void LYRefreshEdit ARGS1( for (i = 0; i < nrdisplayed; i++) if ((buffer[0] = str[i]) == 1 || buffer[0] == 2 || ((unsigned char)buffer[0] == 160 && - !(HTPassHighCtrlRaw || HTCJK != NOCJK))) { + !(HTPassHighCtrlRaw || HTCJK != NOCJK || + (LYCharSet_UC[current_char_set].enc != UCT_ENC_8859 && + !(LYCharSet_UC[current_char_set].like8859 + & UCT_R_8859SPECL))))) { addch(' '); } else { /* For CJK strings, by Masanobu Kimura */ @@ -1013,10 +1027,13 @@ again: case LYE_AIX: /* * Hex 97. - * Treat as a character for CJK. + * Treat as a character for CJK, or if this is a valid + * character in the current display character set. * Otherwise, we treat this as LYE_ENTER. */ - if (HTCJK != NOCJK && ch != '\t') { + if (ch != '\t' && + (HTCJK != NOCJK || + LYlowest_eightbit[current_char_set] <= 0x97)) { LYLineEdit(&MyEdit,ch, FALSE); break; } diff --git a/src/LYStrings.h b/src/LYStrings.h index 9d26458c..5dc64c06 100644 --- a/src/LYStrings.h +++ b/src/LYStrings.h @@ -11,6 +11,7 @@ extern char * LYstrncpy PARAMS(( char * dst, CONST char * src, int n)); +extern void ena_csi PARAMS((BOOLEAN flag)); extern int LYgetch NOPARAMS; extern int LYgetstr PARAMS(( char * inputline, diff --git a/src/LYStyle.c b/src/LYStyle.c index 8e36c09d..3f529264 100644 --- a/src/LYStyle.c +++ b/src/LYStyle.c @@ -1,6 +1,6 @@ /* character level styles for Lynx * (c) 1996 Rob Partington -- donated to the Lyncei (if they want it :-) - * $Id: LYStyle.c,v 1.3 1997/09/19 01:14:00 klaus Exp $ + * @Id: LYStyle.c 1.7 Wed, 17 Sep 1997 17:34:13 -0600 dickey @ */ #include "HTUtils.h" #include "HTML.h" @@ -20,10 +20,11 @@ #include "LYStyle.h" #include "LYexit.h" +#include "LYLeaks.h" #ifdef USE_COLOR_STYLE -PUBLIC bucket hashStyles[HASHSIZE]; +PUBLIC bucket hashStyles[CSHASHSIZE]; /* definitions for the mono attributes we can use */ static int ncursesMono[7] = { @@ -62,6 +63,8 @@ PUBLIC int s_alink=NOSTYLE, s_a=NOSTYLE, s_status=NOSTYLE, /* start somewhere safe */ PRIVATE int colorPairs=0; +PRIVATE int last_fA=COLOR_WHITE, last_bA=COLOR_BLACK; + #define FREE(x) if (x) {free(x); x = NULL;} @@ -125,15 +128,21 @@ PRIVATE void parse_attributes ARGS5(char*,mono,char*,fg,char*,bg,int,style,char* */ if (lynx_has_color && colorPairs < COLOR_PAIRS-1 && fA!=-1) { - colorPairs++; - init_pair(colorPairs, fA, bA); - setStyle(style, COLOR_PAIR(colorPairs)|cA, cA, mA); + if (colorPairs <= 0 || fA != last_fA || bA != last_bA) { + colorPairs++; + init_pair(colorPairs, fA, bA); + last_fA = fA; + last_bA = bA; + } + if (style < DSTYLE_ELEMENTS) + setStyle(style, COLOR_PAIR(colorPairs)|cA, cA, mA); setHashStyle(newstyle, COLOR_PAIR(colorPairs)|cA, cA, mA, element); } else { /* only mono is set */ - setStyle(style, -1, -1, mA); + if (style < DSTYLE_ELEMENTS) + setStyle(style, -1, -1, mA); setHashStyle(newstyle, -1, -1, mA, element); } } @@ -251,11 +260,26 @@ where OBJECT is one of EM,STRONG,B,I,U,BLINK etc.\n\n", buffer); } } #else - parse_attributes(mono,fg,bg,hash_code(element),element); + int element_number = -1; + HTTag * t = SGMLFindTag(&HTML_dtd, element); + if (t && t->name) { + element_number = t - HTML_dtd.tags; + } + if (element_number >= HTML_A && + element_number < HTML_ELEMENTS) + parse_attributes(mono,fg,bg, element_number+STARTAT,element); + else + parse_attributes(mono,fg,bg, DSTYLE_ELEMENTS,element); #endif } } +PRIVATE void free_colorstylestuff NOARGS +{ + style_initialiseHashTable(); + style_deleteStyleList(); +} + /* * initialise the default style sheet * This should be able to be read from a file in CSS format :-) @@ -268,13 +292,21 @@ PRIVATE void initialise_default_stylesheet NOARGS PUBLIC void style_initialiseHashTable NOARGS { int i; + static int firsttime = 1; - for (i=0; i<HASHSIZE; i++) + for (i=0; i<CSHASHSIZE; i++) { + if (firsttime) hashStyles[i].name=NULL; - hashStyles[i].color=-1; - hashStyles[i].cattr=-1; - hashStyles[i].mono=-1; + else + FREE(hashStyles[i].name); + hashStyles[i].color=-1; + hashStyles[i].cattr=-1; + hashStyles[i].mono=-1; + } + if (firsttime) { + firsttime = 0; + atexit(free_colorstylestuff); } s_high=hash_code("high"); s_alink=hash_code("alink"); diff --git a/src/LYUtils.c b/src/LYUtils.c index 58005020..117d44b3 100644 --- a/src/LYUtils.c +++ b/src/LYUtils.c @@ -1987,6 +1987,30 @@ PUBLIC void noviceline ARGS1( return; } +PRIVATE int fake_zap = 0; + +PUBLIC void LYFakeZap ARGS1( + BOOL, set) +{ + if (set && fake_zap < 1) { + if (TRACE) { + fprintf(stderr, "\r *** Set simulated 'Z'"); + if (fake_zap) + fprintf(stderr, ", %d pending", fake_zap); + fprintf(stderr, " ***\n"); + } + fake_zap++; + } else if (!set && fake_zap) { + if (TRACE) { + fprintf(stderr, "\r *** Unset simulated 'Z'"); + fprintf(stderr, ", %d pending", fake_zap); + fprintf(stderr, " ***\n"); + } + fake_zap = 0; + } + +} + PUBLIC int HTCheckForInterrupt NOARGS { #ifndef VMS /* UNIX stuff: */ @@ -1997,6 +2021,17 @@ PUBLIC int HTCheckForInterrupt NOARGS fd_set readfds; #endif /* !USE_SLANG */ + if (fake_zap > 0) { + fake_zap--; + if (TRACE) { + fprintf(stderr, "\r *** Got simulated 'Z' ***\n"); + fflush(stderr); + if (!LYTraceLogFP) + sleep(AlertSecs); + } + return((int)TRUE); + } + /** Curses or slang setup was not invoked **/ if (dump_output_immediately) return((int)FALSE); @@ -2053,6 +2088,17 @@ PUBLIC int HTCheckForInterrupt NOARGS extern BOOLEAN HadVMSInterrupt; extern int typeahead(); + if (fake_zap > 0) { + fake_zap--; + if (TRACE) { + fprintf(stderr, "\r *** Got simulated 'Z' ***\n"); + fflush(stderr); + if (!LYTraceLogFP) + sleep(AlertSecs); + } + return((int)TRUE); + } + /** Curses or slang setup was not invoked **/ if (dump_output_immediately) return((int)FALSE); @@ -4121,8 +4167,8 @@ have_VMS_URL: } } else { /* - * Normal absolute path. Simplify, trim any - * residual relative elements, and append it. - FM + * Normal absolute path in URL syntax. Simplify, trim + * any residual relative elements, and append it. - FM */ StrAllocCopy(temp, old_string); LYTrimRelFromAbsPath(temp); @@ -4137,7 +4183,8 @@ have_VMS_URL: FREE(old_string); if (TRACE) { /* Pause so we can read the messages before invoking curses */ - sleep(AlertSecs); + if (!LYTraceLogFP) + sleep(AlertSecs); } } diff --git a/src/LYUtils.h b/src/LYUtils.h index 3896624e..7af31349 100644 --- a/src/LYUtils.h +++ b/src/LYUtils.h @@ -16,6 +16,7 @@ extern char * strip_trailing_slash PARAMS((char * dirname)); extern void statusline PARAMS((CONST char *text)); extern void toggle_novice_line NOPARAMS; extern void noviceline PARAMS((int more_flag)); +extern void LYFakeZap PARAMS((BOOL set)); extern int HTCheckForInterrupt NOPARAMS; extern BOOLEAN LYisLocalFile PARAMS((char *filename)); extern BOOLEAN LYisLocalHost PARAMS((char *filename)); diff --git a/src/LYexit.c b/src/LYexit.c index 027b1549..625c05e3 100644 --- a/src/LYexit.c +++ b/src/LYexit.c @@ -137,6 +137,8 @@ void (*function)(); * Check for available space. */ if (topOfStack == ATEXITSIZE) { + if (TRACE) + fprintf(stderr, "(LY)atexit: Too many functions, ignoring one!\n"); return(-1); } diff --git a/src/UCAux.c b/src/UCAux.c index 3ee93929..d63e04e0 100644 --- a/src/UCAux.c +++ b/src/UCAux.c @@ -9,62 +9,68 @@ extern HTCJKlang HTCJK; extern LYUCcharset LYCharSet_UC[]; -PUBLIC BOOL UCCanUniTranslateFrom ARGS1( +PUBLIC UCTQ_t UCCanUniTranslateFrom ARGS1( int, from) { if (from < 0) - return NO; + return TQ_NO; if (LYCharSet_UC[from].enc == UCT_ENC_7BIT || LYCharSet_UC[from].enc == UCT_ENC_UTF8) - return YES; + return TQ_EXCELLENT; if (LYCharSet_UC[from].codepoints & (UCT_CP_SUBSETOF_LAT1)) - return YES; - return (LYCharSet_UC[from].UChndl >= 0); + return TQ_EXCELLENT; + return ((LYCharSet_UC[from].UChndl >= 0) ? TQ_GOOD : TQ_NO); } -PUBLIC BOOL UCCanTranslateUniTo ARGS1( +PUBLIC UCTQ_t UCCanTranslateUniTo ARGS1( int, to) { if (to < 0) - return NO; - return YES; /* well at least some characters... */ + return TQ_NO; + if (LYCharSet_UC[to].enc == UCT_ENC_7BIT) + return TQ_POOR; + if (LYCharSet_UC[to].enc == UCT_ENC_UTF8) + return TQ_EXCELLENT; + if (LYCharSet_UC[to].enc == UCT_ENC_CJK) + return TQ_POOR; + if (LYCharSet_UC[to].UChndl >= 0) + return TQ_GOOD; + return TQ_GOOD; /* at least some characters, we don't know more */ } -PUBLIC BOOL UCCanTranslateFromTo ARGS2( +PUBLIC UCTQ_t UCCanTranslateFromTo ARGS2( int, from, int, to) { if (from == to) - return YES; + return TQ_EXCELLENT; if (from < 0 || to < 0) - return NO; + return TQ_NO; if (from == 0) return UCCanTranslateUniTo(to); - if (to == 0) + if (to == 0 || LYCharSet_UC[to].enc == UCT_ENC_UTF8) return UCCanUniTranslateFrom(from); - if (LYCharSet_UC[to].enc == UCT_ENC_UTF8) { - return (LYCharSet_UC[from].UChndl >= 0); - } { CONST char * fromname = LYCharSet_UC[from].MIMEname; CONST char * toname = LYCharSet_UC[to].MIMEname; + UCTQ_t tqmin = TQ_NO, tqmax = TQ_GOOD; if (!strcmp(fromname, "x-transparent") || !strcmp(toname, "x-transparent")) { - return YES; + return TQ_GOOD; } if (LYCharSet_UC[from].enc == UCT_ENC_CJK) { if (HTCJK == NOCJK) /* use that global flag, for now */ - return NO; + return TQ_NO; if (HTCJK == JAPANESE && (!strcmp(fromname, "euc-jp") || !strncmp(fromname, "iso-2022-jp",11) || !strcmp(fromname, "shift_jis"))) - return YES; - return NO; /* if not handled by (from == to) above */ + return TQ_GOOD; + return TQ_NO; /* if not handled by (from == to) above */ } if (!strcmp(fromname, "koi8-r")) { /* * Will try to use stripping of high bit... */ - return YES; + tqmin = TQ_POOR; } if (!strcmp(fromname, "koi8-r") || /* from cyrillic */ @@ -76,10 +82,10 @@ PUBLIC BOOL UCCanTranslateFromTo ARGS2( strcmp(toname, "koi8-r") && strcmp(toname, "cp866") && strcmp(toname, "windows-1251")) - return NO; + tqmax = TQ_POOR; } + return ((LYCharSet_UC[from].UChndl >= 0) ? tqmax : tqmin); } - return (LYCharSet_UC[from].UChndl >= 0); } /* Returns YES if no tranlation necessary (because charsets diff --git a/src/chrtrans/def7_uni.tbl b/src/chrtrans/def7_uni.tbl index 0f6b6030..dcbb2f01 100644 --- a/src/chrtrans/def7_uni.tbl +++ b/src/chrtrans/def7_uni.tbl @@ -414,7 +414,7 @@ U+03e1:p3 U+03f4:'% U+03f5:j3 # Cyrillic capital letters -0x6e U+0401 +0x65 U+0401 U+0402:D% U+0403:G% U+0404:IE @@ -1301,10 +1301,14 @@ U+208b:_- U+208c:_= U+208d:( U+208e:) +# Old euro currency sign glyph: +U+20A0:CE U+20a3:Ff U+20a4:Li U+20a7:Pt U+20a9:W= +# New euro currency sign glyph ? +# U+20AC:EUR U+2103:oC U+2105:c/o U+2109:oF @@ -1314,6 +1318,7 @@ U+211e:Rx U+2120:(SM) U+2122:(TM) U+2126:Ohm +U+212E:est. 0x4b U+212A # Kelvin sign - K U+212b:Ang. U+2153: 1/3 @@ -2067,6 +2072,7 @@ U+fef9:lh- U+fefa:lh. U+fefb:la- U+fefc:la. +# Symbols for C0 and C1 control characters, in case they get through... U+0000:NU U+0001:SH U+0002:SX @@ -2132,6 +2138,7 @@ U+009c:ST U+009d:OC U+009e:PM U+009f:AC +# Unassigned stuff in private zone (?) U+e000:"3 U+e001:"1 U+e002:"! diff --git a/src/chrtrans/makefile.dos b/src/chrtrans/makefile.dos new file mode 100644 index 00000000..6a99e87f --- /dev/null +++ b/src/chrtrans/makefile.dos @@ -0,0 +1,92 @@ +# +# Makefile for the makeuctb and unicode tables +# for use with DJGPP. +# +# Type make to build makeuctb and all character translation maps. +# Type make fontmap to build makeuctb and translation map iso8859-1. +# Type make makeuctb.exe to build makeuctb only. +# Type make clean to remove makeuctb and character translation maps. +# Type make distclean to remove makeuctb, character translation maps +# and .bak files. +# +CFLAGS = $(MCFLAGS) + +CC = gcc +MCFLAGS = -O3 -DEXP_CHARTRANS -DDOSPATH -DNO_TTYTYP \ +-I../../WWW/library/implement -I../../djgpp/tcplib/include \ +-I../../djgpp/tcplib/include/tcp + +.SUFFIXES: .tbl +# +# This file contains the font map for the default (hardware) font +# + +FONTMAP_INC = iso01_un.h + +CHRTR= + +TABLES= $(CHRTR)iso01_un.h \ + $(CHRTR)iso02_un.h \ + $(CHRTR)def7_uni.h \ + $(CHRTR)iso03_un.h \ + $(CHRTR)iso04_un.h \ + $(CHRTR)iso05_un.h \ + $(CHRTR)iso06_un.h \ + $(CHRTR)iso07_un.h \ + $(CHRTR)iso08_un.h \ + $(CHRTR)iso09_un.h \ + $(CHRTR)iso10_un.h \ + $(CHRTR)koi8r_un.h \ + $(CHRTR)cp437_un.h \ + $(CHRTR)cp850_un.h \ + $(CHRTR)cp852_un.h \ + $(CHRTR)cp866_un.h \ + $(CHRTR)cp1250_u.h \ + $(CHRTR)cp1251_u.h \ + $(CHRTR)cp1252_u.h \ + $(CHRTR)viscii_u.h \ + $(CHRTR)utf8_uni.h \ + $(CHRTR)rfc_suni.h \ + $(CHRTR)mnemonic.h \ + $(CHRTR)mnem_sun.h + +default: $(TABLES) + +fontmap: $(FONTMAP_INC) + +makeuctb.exe: makeuctb.c UCkd.h + $(CC) $(CFLAGS) -o makeuctb.exe makeuctb.c + strip makeuctb.exe + +.tbl.h: + ./makeuctb $*.tbl > $@ + +iso01_un.h: iso01_un.tbl makeuctb.exe +iso02_un.h: iso02_un.tbl makeuctb.exe +def7_uni.h: def7_uni.tbl makeuctb.exe +iso03_un.h: iso03_un.tbl makeuctb.exe +iso04_un.h: iso04_un.tbl makeuctb.exe +iso05_un.h: iso05_un.tbl makeuctb.exe +iso06_un.h: iso06_un.tbl makeuctb.exe +iso07_un.h: iso07_un.tbl makeuctb.exe +iso08_un.h: iso08_un.tbl makeuctb.exe +iso09_un.h: iso09_un.tbl makeuctb.exe +iso10_un.h: iso10_un.tbl makeuctb.exe +koi8r_un.h: koi8r_un.tbl makeuctb.exe +cp437_un.h: cp437_un.tbl makeuctb.exe +cp850_un.h: cp850_un.tbl makeuctb.exe +cp852_un.h: cp852_un.tbl makeuctb.exe +cp1250_u.h: cp1250_u.tbl makeuctb.exe +cp1251_u.h: cp1251_u.tbl makeuctb.exe +cp1252_u.h: cp1252_u.tbl makeuctb.exe +utf8_uni.h: utf8_uni.tbl makeuctb.exe +mnemonic.h: mnemonic.tbl makeuctb.exe +mnem_sun.h: mnem_sun.tbl makeuctb.exe +rfc_suni.h: rfc_suni.tbl makeuctb.exe + +clean: + rm -f makeuctb.exe makeuctb *.o *un.h *u.h *c.h *i.h + +distclean: clean + -rm -f *.bak + |