diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 1997-10-06 04:08:00 -0400 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 1997-10-06 04:08:00 -0400 |
commit | 1d80538b4b84eadd223c7b61839b950389c2d49d (patch) | |
tree | a46f327e82edb06d8d789b60c3395f873476e040 /src | |
parent | 443226a5ffcf805f6ab3ccbcc2a6b4802793b07d (diff) | |
download | lynx-snapshots-1d80538b4b84eadd223c7b61839b950389c2d49d.tar.gz |
snapshot of project "lynx", label v2-7-1ac_0-76
Diffstat (limited to 'src')
-rw-r--r-- | src/GridText.c | 887 | ||||
-rw-r--r-- | src/GridText.h | 16 | ||||
-rw-r--r-- | src/HTFWriter.c | 96 | ||||
-rw-r--r-- | src/HTForms.h | 6 | ||||
-rw-r--r-- | src/HTInit.c | 12 | ||||
-rw-r--r-- | src/HTML.c | 445 | ||||
-rw-r--r-- | src/HTML.h | 3 | ||||
-rw-r--r-- | src/LYCharSets.c | 11 | ||||
-rw-r--r-- | src/LYCharSets.h | 2 | ||||
-rw-r--r-- | src/LYCharUtils.c | 1818 | ||||
-rw-r--r-- | src/LYCharUtils.h | 24 | ||||
-rw-r--r-- | src/LYForms.c | 4 | ||||
-rw-r--r-- | src/LYGetFile.c | 13 | ||||
-rw-r--r-- | src/LYGlobalDefs.h | 1 | ||||
-rw-r--r-- | src/LYKeymap.c | 6 | ||||
-rw-r--r-- | src/LYLocal.c | 2 | ||||
-rw-r--r-- | src/LYMail.h | 16 | ||||
-rw-r--r-- | src/LYMain.c | 25 | ||||
-rw-r--r-- | src/LYMainLoop.c | 6 | ||||
-rw-r--r-- | src/LYOptions.c | 212 | ||||
-rw-r--r-- | src/LYOptions.h | 1 | ||||
-rw-r--r-- | src/LYPrint.c | 14 | ||||
-rw-r--r-- | src/LYStrings.h | 51 | ||||
-rw-r--r-- | src/LYUtils.c | 25 | ||||
-rw-r--r-- | src/LYrcFile.c | 10 | ||||
-rw-r--r-- | src/UCAux.c | 54 | ||||
-rw-r--r-- | src/UCdomap.c | 167 | ||||
-rw-r--r-- | src/UCdomap.h | 28 | ||||
-rw-r--r-- | src/chrtrans/def7_uni.tbl | 5 | ||||
-rw-r--r-- | src/chrtrans/makeuctb.c | 41 |
30 files changed, 3395 insertions, 606 deletions
diff --git a/src/GridText.c b/src/GridText.c index 77d23ac1..fe0d5c73 100644 --- a/src/GridText.c +++ b/src/GridText.c @@ -32,9 +32,11 @@ #include "LYMail.h" #include "LYList.h" #include "LYCharSets.h" +#include "LYCharUtils.h" /* LYUCTranslateBack... */ #ifdef EXP_CHARTRANS #include "UCDefs.h" #include "UCAux.h" +#include "UCMap.h" #ifdef EXP_CHARTRANS_AUTOSWITCH #include "UCAuto.h" #endif /* EXP_CHARTRANS_AUTOSWITCH */ @@ -95,6 +97,7 @@ PUBLIC char * HTAppVersion = LYNX_VERSION; /* Application version */ PUBLIC int HTFormNumber = 0; PUBLIC int HTFormFields = 0; PUBLIC char * HTCurSelectGroup = NULL; /* Form select group name */ +PRIVATE int HTCurSelectGroupCharset = -1; /* ... and name's charset */ PUBLIC int HTCurSelectGroupType = F_RADIO_TYPE; /* Group type */ PUBLIC char * HTCurSelectGroupSize = NULL; /* Length of select */ PRIVATE char * HTCurSelectedOptionValue = NULL; /* Select choice */ @@ -2282,8 +2285,14 @@ check_IgnoreExcess: */ new_line(text); } + } else if ((int)line->size >= (int)(MAX_LINE-1)) { + /* + * Never overrun memory if LYcols is set to a large value - kw + */ + new_line(text); } + /* * Insert normal characters. */ @@ -5512,13 +5521,15 @@ PRIVATE int HTFormMethod; PRIVATE char * HTFormAction = NULL; PRIVATE char * HTFormEnctype = NULL; PRIVATE char * HTFormTitle = NULL; +PRIVATE char * HTFormAcceptCharset = NULL; /* !!! NEED TO DO SOMETHING WITH IT */ PRIVATE BOOLEAN HTFormDisabled = FALSE; -PUBLIC void HText_beginForm ARGS4( +PUBLIC void HText_beginForm ARGS5( char *, action, char *, method, char *, enctype, - char *, title) + char *, title, + CONST char *, accept_cs) { HTFormMethod = URL_GET_METHOD; HTFormNumber++; @@ -5545,6 +5556,12 @@ PUBLIC void HText_beginForm ARGS4( HTFormMethod = URL_POST_METHOD; /* + * Check the ACCEPT_CHARSET. - kw + */ + if (accept_cs != NULL) + StrAllocCopy(HTFormAcceptCharset, accept_cs); + + /* * Check the ENCTYPE. - FM */ if ((enctype != NULL) && *enctype) { @@ -5566,12 +5583,14 @@ PUBLIC void HText_beginForm ARGS4( if (TRACE) fprintf(stderr, - "BeginForm: action:%s Method:%d%s%s%s%s\n", + "BeginForm: action:%s Method:%d%s%s%s%s%s%s\n", HTFormAction, HTFormMethod, (HTFormTitle ? " Title:" : ""), (HTFormTitle ? HTFormTitle : ""), (HTFormEnctype ? " Enctype:" : ""), - (HTFormEnctype ? HTFormEnctype : "")); + (HTFormEnctype ? HTFormEnctype : ""), + (HTFormAcceptCharset ? " Accept-charset:" : ""), + (HTFormAcceptCharset ? HTFormAcceptCharset : "")); } PUBLIC void HText_endForm ARGS1( @@ -5617,12 +5636,14 @@ PUBLIC void HText_endForm ARGS1( FREE(HTFormAction); FREE(HTFormEnctype); FREE(HTFormTitle); + FREE(HTFormAcceptCharset); HTFormFields = 0; HTFormDisabled = FALSE; } -PUBLIC void HText_beginSelect ARGS3( +PUBLIC void HText_beginSelect ARGS4( char *, name, + int, name_cs, BOOLEAN, multiple, char *, size) { @@ -5630,6 +5651,7 @@ PUBLIC void HText_beginSelect ARGS3( * Save the group name. */ StrAllocCopy(HTCurSelectGroup, name); + HTCurSelectGroupCharset = name_cs; /* * If multiple then all options are actually checkboxes. @@ -5647,13 +5669,20 @@ PUBLIC void HText_beginSelect ARGS3( */ StrAllocCopy(HTCurSelectGroupSize, size); - if (TRACE) + if (TRACE) { fprintf(stderr,"HText_beginSelect: name=%s type=%d size=%s\n", ((HTCurSelectGroup == NULL) ? "<NULL>" : HTCurSelectGroup), HTCurSelectGroupType, ((HTCurSelectGroupSize == NULL) ? "<NULL>" : HTCurSelectGroupSize)); +#ifdef EXP_CHARTRANS + fprintf(stderr,"HText_beginSelect: name_cs=%d \"%s\"\n", + HTCurSelectGroupCharset, + (HTCurSelectGroupCharset >= 0 ? + LYCharSet_UC[HTCurSelectGroupCharset].MIMEname : "<UNKNOWN>")); +#endif + } } /* @@ -5740,20 +5769,28 @@ PRIVATE char * HText_skipOptionNumPrefix ARGS1( ** tag so we have to do it now. Assume that the last anchor ** was the previous options tag. */ -PUBLIC char * HText_setLastOptionValue ARGS5( +PUBLIC char * HText_setLastOptionValue ARGS7( HText *, text, char *, value, char*, submit_value, int, order, - BOOLEAN, checked) + BOOLEAN, checked, + int, val_cs, + int, submit_val_cs) { char *cp, *cp1; + char *ret_Value = NULL; unsigned char *tmp = NULL; int number = 0, i, j; if (!(text && text->last_anchor && - text->last_anchor->link_type == INPUT_ANCHOR)) + text->last_anchor->link_type == INPUT_ANCHOR)) { + if (TRACE) + fprintf(stderr, + "HText_setLastOptionValue: invalid call! value:%s!\n", + (value ? value : "<NULL>")); return NULL; + } if (TRACE) fprintf(stderr, @@ -5801,6 +5838,7 @@ PUBLIC char * HText_setLastOptionValue ARGS5( if (HTCurSelectGroupType == F_CHECKBOX_TYPE) { StrAllocCopy(text->last_anchor->input_field->value, cp); + text->last_anchor->input_field->value_cs = val_cs; /* * Put the text on the screen as well. */ @@ -5809,6 +5847,8 @@ PUBLIC char * HText_setLastOptionValue ARGS5( } else if (LYSelectPopups == FALSE) { StrAllocCopy(text->last_anchor->input_field->value, (submit_value ? submit_value : cp)); + text->last_anchor->input_field->value_cs = (submit_value ? + submit_val_cs : val_cs); /* * Put the text on the screen as well. */ @@ -5868,8 +5908,10 @@ PUBLIC char * HText_setLastOptionValue ARGS5( (tmp = (unsigned char *)calloc(1, strlen(cp)+1))) { if (kanji_code == EUC) { TO_EUC((unsigned char *)cp, tmp); + val_cs = current_char_set; } else if (kanji_code == SJIS) { TO_SJIS((unsigned char *)cp, tmp); + val_cs = current_char_set; } else { for (i = 0, j = 0; cp[i]; i++) { if (cp[i] != '\033') { @@ -5886,6 +5928,7 @@ PUBLIC char * HText_setLastOptionValue ARGS5( StrAllocCopy(new_ptr->cp_submit_value, (submit_value ? submit_value : HText_skipOptionNumPrefix(new_ptr->name))); + new_ptr->value_cs = (submit_value ? submit_val_cs : val_cs); if (first_option) { StrAllocCopy(HTCurSelectedOptionValue, new_ptr->name); @@ -5898,6 +5941,8 @@ PUBLIC char * HText_setLastOptionValue ARGS5( text->last_anchor->input_field->select_list->cp_submit_value; text->last_anchor->input_field->orig_submit_value = text->last_anchor->input_field->select_list->cp_submit_value; + text->last_anchor->input_field->value_cs = + new_ptr->value_cs; } else { int newlen = strlen(new_ptr->name); int curlen = strlen(HTCurSelectedOptionValue); @@ -5923,6 +5968,8 @@ PUBLIC char * HText_setLastOptionValue ARGS5( new_ptr->cp_submit_value; text->last_anchor->input_field->orig_submit_value = new_ptr->cp_submit_value; + text->last_anchor->input_field->value_cs = + new_ptr->value_cs; StrAllocCopy(HTCurSelectedOptionValue, new_ptr->name); if (newlen > curlen) StrAllocCat(HTCurSelectedOptionValue, @@ -5938,15 +5985,34 @@ PUBLIC char * HText_setLastOptionValue ARGS5( */ text->last_anchor->input_field->size = strlen(HTCurSelectedOptionValue); - return(HTCurSelectedOptionValue); - } else - return(NULL); + ret_Value = HTCurSelectedOptionValue; + } } - if (TRACE) - fprintf(stderr,"HText_setLastOptionValue: value=%s\n", value); - - return(NULL); + if (TRACE) { + fprintf(stderr,"HText_setLastOptionValue:%s value=%s", + (order == LAST_ORDER) ? " LAST_ORDER" : "", + value); +#ifdef EXP_CHARTRANS + fprintf(stderr," val_cs=%d \"%s\"", + val_cs, + (val_cs >= 0 ? + LYCharSet_UC[val_cs].MIMEname : "<UNKNOWN>")); + if (submit_value) { + fprintf(stderr, " (submit_val_cs %d \"%s\") submit_value%s=%s\n", + submit_val_cs, + (submit_val_cs >= 0 ? + LYCharSet_UC[submit_val_cs].MIMEname : "<UNKNOWN>"), + (HTCurSelectGroupType == F_CHECKBOX_TYPE) ? + "(ignored)" : "", + submit_value); + } + else { + fprintf(stderr,"\n"); + } +#endif + } + return(ret_Value); } /* @@ -5989,6 +6055,7 @@ PUBLIC int HText_beginInput ARGS3( HTCurSelectGroupType == F_RADIO_TYPE && LYSelectPopups == FALSE) { I->type = "RADIO"; I->name = HTCurSelectGroup; + I->name_cs = HTCurSelectGroupCharset; } if (I->name && I->type && !strcasecomp(I->type, "radio")) { if (!text->last_anchor) { @@ -6057,8 +6124,10 @@ PUBLIC int HText_beginInput ARGS3( if ((tmp = (unsigned char *)calloc(1, (strlen(IValue) + 1)))) { if (kanji_code == EUC) { TO_EUC((unsigned char *)IValue, tmp); + I->value_cs = current_char_set; } else if (kanji_code == SJIS) { TO_SJIS((unsigned char *)IValue, tmp); + I->value_cs = current_char_set; } else { for (i = 0, j = 0; IValue[i]; i++) { if (IValue[i] != '\033') { @@ -6083,6 +6152,7 @@ PUBLIC int HText_beginInput ARGS3( else I->type = "CHECKBOX"; I->name = HTCurSelectGroup; + I->name_cs = HTCurSelectGroupCharset; /* * The option's size parameter actually gives the length and not @@ -6174,6 +6244,7 @@ PUBLIC int HText_beginInput ARGS3( */ if (I->name != NULL) { StrAllocCopy(f->name,I->name); + f->name_cs = I->name_cs; } else { if (f->type == F_RESET_TYPE || f->type == F_SUBMIT_TYPE || @@ -6216,6 +6287,7 @@ PUBLIC int HText_beginInput ARGS3( } else { StrAllocCopy(f->value, IValue); } + f->value_cs = I->value_cs; } else if (f->type != F_OPTION_LIST_TYPE) { StrAllocCopy(f->value, ""); } @@ -6371,12 +6443,29 @@ PUBLIC int HText_beginInput ARGS3( */ text->last_anchor = a; - if (TRACE) + if (TRACE) { fprintf(stderr,"Input link: name=%s\nvalue=%s\nsize=%d\n", f->name, ((f->value != NULL) ? f->value : ""), f->size); - +#ifdef EXP_CHARTRANS + fprintf(stderr,"Input link: name_cs=%d \"%s\" (from %d \"%s\")\n", + f->name_cs, + (f->name_cs >= 0 ? + LYCharSet_UC[f->name_cs].MIMEname : "<UNKNOWN>"), + I->name_cs, + (I->name_cs >= 0 ? + LYCharSet_UC[I->name_cs].MIMEname : "<UNKNOWN>")); + fprintf(stderr," value_cs=%d \"%s\" (from %d \"%s\")\n", + f->value_cs, + (f->value_cs >= 0 ? + LYCharSet_UC[f->value_cs].MIMEname : "<UNKNOWN>"), + I->value_cs, + (I->value_cs >= 0 ? + LYCharSet_UC[I->value_cs].MIMEname : "<UNKNOWN>")); +#endif + } + /* * Return the SIZE of the input field. */ @@ -6393,16 +6482,30 @@ PUBLIC void HText_SubmitForm ARGS4( TextAnchor *anchor_ptr; int form_number = submit_item->number; FormInfo *form_ptr; - int len, i; + int len; char *query = NULL; char *escaped1 = NULL, *escaped2 = NULL; int first_one = 1; char *last_textarea_name = NULL; + int textarea_lineno = 0; char *previous_blanks = NULL; BOOLEAN PlainText = FALSE; BOOLEAN SemiColon = FALSE; char *Boundary = NULL; char *MultipartContentType = NULL; + int target_cs = -1; + CONST char *target_csname = NULL; + char *name_used; +#ifdef EXP_CHARTRANS + BOOL form_has_8bit = NO, form_has_special = NO; + BOOL field_has_8bit = NO, field_has_special = NO; + BOOL name_has_8bit = NO, name_has_special = NO; + BOOL success; + BOOL had_chartrans_warning = NO; + char *val_used; + char *copied_val_used = NULL; + char *copied_name_used = NULL; +#endif if (!HTMainText) return; @@ -6455,15 +6558,44 @@ PUBLIC void HText_SubmitForm ARGS4( Boundary = "xnyLAaB03X"; } +#ifdef EXP_CHARTRANS + if (HTMainText->node_anchor->charset && + *HTMainText->node_anchor->charset) { + target_cs = UCGetLYhndl_byMIME(HTMainText->node_anchor->charset); + if (target_cs >= 0) { + target_csname = HTMainText->node_anchor->charset; + } else { + target_cs = UCLYhndl_for_unspec; + if (target_cs >= 0) + target_csname = LYCharSet_UC[target_cs].MIMEname; + } + } + if (target_cs < 0) { + target_cs = UCLYhndl_for_unspec; + } +#else /* EXP_CHARTRANS */ + target_cs = LYRawMode ? current_char_set : 0; + target_csname = HTMainText->node_anchor->charset; +#endif /* EXP_CHARTRANS */ + /* * Go through list of anchors and get size first. */ + /* + * also get a "max." charset parameter - kw + */ anchor_ptr = HTMainText->first_anchor; while (anchor_ptr) { if (anchor_ptr->link_type == INPUT_ANCHOR) { if (anchor_ptr->input_field->number == form_number) { + char *p; + char * val; form_ptr = anchor_ptr->input_field; + val = form_ptr->cp_submit_value != NULL ? + form_ptr->cp_submit_value : form_ptr->value; + field_has_8bit = NO; + field_has_special = NO; len += (strlen(form_ptr->name) + (Boundary ? 100 : 10)); /* @@ -6474,7 +6606,42 @@ PUBLIC void HText_SubmitForm ARGS4( } else { len += (strlen(form_ptr->value) + 10); } - len += 32; /* plus and ampersand + safty net */ + len += 32; /* plus and ampersand + safety net */ + +#ifdef EXP_CHARTRANS + for (p = val; p && *p && !field_has_8bit; p++) + if ((*p == HT_NON_BREAK_SPACE) || + (*p == HT_EM_SPACE) || + (*p == LY_SOFT_HYPHEN)) { + field_has_special = YES; + } else if ((*p & 0x80) != 0) { + field_has_8bit = YES; + } + for (p = form_ptr->name; p && *p && !field_has_8bit; p++) + field_has_8bit = ((*p & 0x80) != 0); + if (field_has_8bit) + form_has_8bit = YES; + if (field_has_special) + form_has_special = YES; + if (!field_has_8bit && !field_has_special) { + /* already ok */ + } else if (target_cs < 0) { + /* already confused */ + } else if (!field_has_8bit && + (LYCharSet_UC[target_cs].enc == UCT_ENC_8859 || + (LYCharSet_UC[target_cs].like8859 & UCT_R_8859SPECL))) { + /* those specials will be trivial */ + } else if (UCNeedNotTranslate(form_ptr->value_cs, target_cs)) { + /* already ok */ + } else if (UCCanTranslateFromTo(form_ptr->value_cs, target_cs)) { + /* also ok */ + } else if (UCCanTranslateFromTo(target_cs, form_ptr->value_cs)) { + target_cs = form_ptr->value_cs; /* try this */ + target_csname = NULL; /* will be set after loop */ + } else { + target_cs = -1; /* don't know what to do */ + } +#endif /* EXP_CHARTRANS */ } else if (anchor_ptr->input_field->number > form_number) { break; @@ -6487,6 +6654,17 @@ PUBLIC void HText_SubmitForm ARGS4( anchor_ptr = anchor_ptr->next; } +#ifdef EXP_CHARTRANS + if (target_csname == NULL && target_cs >= 0) { + if (form_has_8bit) { + target_csname = LYCharSet_UC[target_cs].MIMEname; + } else if (form_has_special) { + target_csname = LYCharSet_UC[target_cs].MIMEname; + } else { + target_csname = "us-ascii"; + } + } +#endif /* * Get query ready. */ @@ -6529,6 +6707,8 @@ PUBLIC void HText_SubmitForm ARGS4( "application/x-www-form-urlencoded"); } + +#ifndef EXP_CHARTRANS /* * Append the exended charset info if known, and it is not * ISO-8859-1 or US-ASCII. We'll assume the user has the @@ -6563,9 +6743,90 @@ PUBLIC void HText_SubmitForm ARGS4( sprintf(MultipartContentType, "\r\nContent-Type: text/plain; charset=%s", HTMainText->node_anchor->charset); + ct_charset_startpos = strchr(MultipartContentType, ';'); + } + } +#else /* EXP_CHARTRANS */ + if (target_cs >= 0 && (form_has_8bit || form_has_special)) { + if (Boundary == NULL) { + if (target_csname && + (strcasecomp(target_csname, "iso-8859-1") || + (HTMainText->node_anchor->charset != NULL && + strcasecomp(HTMainText->node_anchor->charset, + "iso-8859-1")))) { + StrAllocCat(doc->post_content_type, "; charset="); + StrAllocCat(doc->post_content_type, target_csname); + } + } + } +#endif /* EXP_CHARTRANS */ + } + + +#if 0 /* 000000 */ + { + if (HTMainText->node_anchor->charset != NULL && + *HTMainText->node_anchor->charset != '\0') { +#ifdef EXP_CHARTRANS + /* + * For now, don't send charset if we may have translated. + * Although this is when it would be most needed (unless + * we translate back to the server's charset, which is + * currently not done). But currently there aren't many + * servers or scripts which understand it anyway, so at + * least we try not to lie. - kw + */ +#if 0 + if (!UCNeedNotTranslate(current_char_set, + UCGetLYhndl_byMIME( + HTMainText->node_anchor->charset))); +#endif + if (target_cs < 0) { + /* Do nothing */ + } else +#endif + if (Boundary == NULL && +#ifdef EXP_CHARTRANS + form_has_8bit && + target_cs >= 0 && +#endif + (strcasecomp(HTMainText->node_anchor->charset, "iso-8859-1") || + strcasecomp(target_csname, "iso-8859-1"))) { + StrAllocCat(doc->post_content_type, "; charset="); + StrAllocCat(doc->post_content_type, + HTMainText->node_anchor->charset); + } else + if (Boundary == NULL && +#ifdef EXP_CHARTRANS + target_cs >= 0 && +#endif + strcasecomp(HTMainText->node_anchor->charset, "iso-8859-1") && + strcasecomp(HTMainText->node_anchor->charset, "us-ascii")) { + StrAllocCat(doc->post_content_type, "; charset="); + StrAllocCat(doc->post_content_type, + HTMainText->node_anchor->charset); + } else if (Boundary != NULL) { + MultipartContentType = (char *)calloc(1, + (40 + strlen(HTMainText->node_anchor->charset))); + if (query == NULL) + outofmem(__FILE__, "HText_SubmitForm"); + sprintf(MultipartContentType, + "\r\nContent-Type: text/plain; charset=%s", + HTMainText->node_anchor->charset); + ct_charset_startpos = strchr(MultipartContentType, ';'); } } +#ifdef EXP_CHARTRANS + } else if (Boundary == NULL && + form_has_8bit && + target_cs >= 0 && + strcasecomp(target_csname, "iso-8859-1")) { + StrAllocCat(doc->post_content_type, "; charset="); + StrAllocCat(doc->post_content_type, + HTMainText->node_anchor->charset); +#endif /* EXP_CHARTRANS */ } +#endif /* 000000 */ /* * Reset anchor->ptr. @@ -6577,17 +6838,263 @@ PUBLIC void HText_SubmitForm ARGS4( while (anchor_ptr) { if (anchor_ptr->link_type == INPUT_ANCHOR) { if (anchor_ptr->input_field->number == form_number) { - + char *p; + int out_cs; + CONST char * out_csname; form_ptr = anchor_ptr->input_field; - switch(form_ptr->type) { + if (form_ptr->type != F_TEXTAREA_TYPE) + textarea_lineno = 0; + switch(form_ptr->type) { case F_RESET_TYPE: break; - case F_SUBMIT_TYPE: case F_TEXT_SUBMIT_TYPE: case F_IMAGE_SUBMIT_TYPE: + if (!(form_ptr->name && *form_ptr->name != '\0' && + !strcmp(form_ptr->name, link_name))) { + if (TRACE) { + fprintf(stderr, + "SubmitForm: skipping submit field with "); + fprintf(stderr, + "name \"%s\" for link_name \"%s\", %s.", + form_ptr->name ? form_ptr->name : "???", + link_name ? link_name : "???", + (form_ptr->name && *form_ptr->name) ? + "not current link" : "no field name"); + } + break; + } + if (!(form_ptr->type == F_TEXT_SUBMIT_TYPE || + (form_ptr->value && *form_ptr->value != '\0' && + !strcmp(form_ptr->value, link_value)))) { + if (TRACE) { + fprintf(stderr, + "SubmitForm: skipping submit field with "); + fprintf(stderr, + "name \"%s\" for link_name \"%s\", %s!", + form_ptr->name ? form_ptr->name : "???", + link_name ? link_name : "???", + "values are different"); + } + break; + } + /* fall through */ + case F_RADIO_TYPE: + case F_CHECKBOX_TYPE: + case F_TEXTAREA_TYPE: + case F_PASSWORD_TYPE: + case F_TEXT_TYPE: + case F_OPTION_LIST_TYPE: + case F_HIDDEN_TYPE: +#ifdef EXP_CHARTRANS + /* + * Charset-translate value now, because we need + * to know the charset parameter for multipart + * bodyparts. - kw + */ + if (form_ptr->cp_submit_value != NULL) { + val_used = form_ptr->cp_submit_value; + } else { + val_used = form_ptr->value; + } + + field_has_8bit = NO; + field_has_special = NO; + for (p = val_used; p && *p && !field_has_8bit; p++) { + if ((*p == HT_NON_BREAK_SPACE) || + (*p == HT_EM_SPACE) || + (*p == LY_SOFT_HYPHEN)) { + field_has_special = YES; + } else if ((*p & 0x80) != 0) { + field_has_8bit = YES; + } + } + + if (field_has_8bit || field_has_special) { + /* We should translate back. */ + StrAllocCopy(copied_val_used, val_used); + success = LYUCTranslateBackFormData(&copied_val_used, + form_ptr->value_cs, + target_cs, PlainText); + if (TRACE) { + fprintf(stderr, + "SubmitForm: field \"%s\" %d %s -> %d %s %s\n", + form_ptr->name ? form_ptr->name : "", + form_ptr->value_cs, + form_ptr->value_cs >= 0 ? + LYCharSet_UC[form_ptr->value_cs].MIMEname : + "???", + target_cs, + target_csname ? target_csname : "???", + success ? "OK" : "FAILED"); + } + if (success) { + val_used = copied_val_used; + } + if (Boundary) { + if (!success) { + StrAllocCopy(MultipartContentType, ""); + target_csname = NULL; + } else { + if (!target_csname) + target_csname = LYCharSet_UC[target_cs].MIMEname; + StrAllocCopy(MultipartContentType, + "\r\nContent-Type: text/plain; charset="); + StrAllocCat(MultipartContentType, target_csname); + } + } + } else { /* We can use the value directly. */ + if (TRACE) { + fprintf(stderr, + "SubmitForm: field \"%s\" %d %s OK\n", + form_ptr->name ? form_ptr->name : "", + target_cs, + target_csname ? target_csname : "???"); + } + copied_val_used = NULL; + success = YES; + } + if (!success) { + if (!had_chartrans_warning) { + had_chartrans_warning = YES; + _user_message( + "Cannot convert form data to charset %s!", + target_csname ? target_csname : "UNKNOWN"); + sleep(AlertSecs); + } + out_cs = form_ptr->value_cs; + out_csname = LYCharSet_UC[out_cs].MIMEname; + } else { + out_cs = target_cs; + } + if (Boundary) { + if (!success && form_ptr->value_cs < 0) { + /* This is weird. */ + StrAllocCopy(MultipartContentType, + "\r\nContent-Type: text/plain; charset="); + StrAllocCat(MultipartContentType, "UNKNOWN-8BIT"); + } else if (!success) { + target_csname = LYCharSet_UC[form_ptr->value_cs].MIMEname; + StrAllocCopy(MultipartContentType, + "\r\nContent-Type: text/plain; charset="); + StrAllocCat(MultipartContentType, target_csname); + target_csname = NULL; + } else { + if (!target_csname) { + target_csname = LYCharSet_UC[target_cs].MIMEname; + } + StrAllocCopy(MultipartContentType, + "\r\nContent-Type: text/plain; charset="); + StrAllocCat(MultipartContentType, target_csname); + } + } + + /* + * Charset-translate name now, because we need + * to know the charset parameter for multipart + * bodyparts. - kw + */ + if (form_ptr->type == F_TEXTAREA_TYPE) { + textarea_lineno++; + if (textarea_lineno > 1 && + last_textarea_name && form_ptr->name && + !strcmp(last_textarea_name, form_ptr->name)) { + break; + } + } + name_used = (form_ptr->name ? + form_ptr->name : ""); + + name_has_8bit = NO; + name_has_special = NO; + for (p = name_used; p && *p && !name_has_8bit; p++) { + if ((*p == HT_NON_BREAK_SPACE) || + (*p == HT_EM_SPACE) || + (*p == LY_SOFT_HYPHEN)) { + name_has_special = YES; + } else if ((*p & 0x80) != 0) { + name_has_8bit = YES; + } + } + + if (name_has_8bit || name_has_special) { + /* We should translate back. */ + StrAllocCopy(copied_name_used, name_used); + success = LYUCTranslateBackFormData(&copied_name_used, + form_ptr->name_cs, + target_cs, PlainText); + if (TRACE) { + fprintf(stderr, + "SubmitForm: name \"%s\" %d %s -> %d %s %s\n", + form_ptr->name ? form_ptr->name : "", + form_ptr->name_cs, + form_ptr->name_cs >= 0 ? + LYCharSet_UC[form_ptr->name_cs].MIMEname : + "???", + target_cs, + target_csname ? target_csname : "???", + success ? "OK" : "FAILED"); + } + if (success) { + name_used = copied_name_used; + } + if (Boundary) { + if (!success) { + StrAllocCopy(MultipartContentType, ""); + target_csname = NULL; + } else { + if (!target_csname) + target_csname = LYCharSet_UC[target_cs].MIMEname; + StrAllocCopy(MultipartContentType, + "\r\nContent-Type: text/plain; charset="); + StrAllocCat(MultipartContentType, target_csname); + } + } + } else { /* We can use the name directly. */ + if (TRACE) { + fprintf(stderr, + "SubmitForm: name \"%s\" %d %s OK\n", + form_ptr->name ? form_ptr->name : "", + target_cs, + target_csname ? target_csname : "???"); + } + success = YES; + if (Boundary) { + StrAllocCopy(copied_name_used, name_used); + } + } + if (!success) { + if (!had_chartrans_warning) { + had_chartrans_warning = YES; + _user_message( + "Cannot convert form name to charset %s!", + target_csname ? target_csname : "UNKNOWN"); + sleep(AlertSecs); + } + } + if (Boundary) { + HTMake822Word(&copied_name_used); + name_used = copied_name_used; + } + +#endif /* EXP_CHARTRANS */ + break; + default: + if (TRACE) + fprintf(stderr, "SubmitForm: What type is %d?\n", + form_ptr->type); + } + + switch(form_ptr->type) { + + case F_RESET_TYPE: + break; + + case F_SUBMIT_TYPE: + case F_TEXT_SUBMIT_TYPE: + case F_IMAGE_SUBMIT_TYPE: /* * If it has a non-zero length name (e.g., because * it's IMAGE_SUBMIT_TYPE to be handled homologously @@ -6598,58 +7105,59 @@ PUBLIC void HText_SubmitForm ARGS4( * name.y=0 pairs for IMAGE_SUBMIT_TYPE. - FM */ if ((form_ptr->name && *form_ptr->name != '\0' && - !strcmp(form_ptr->name, link_name)) && + !strcmp(form_ptr->name, link_name)) && (form_ptr->type == F_TEXT_SUBMIT_TYPE || - (form_ptr->value && *form_ptr->value != '\0' && - !strcmp(form_ptr->value, link_value)))) { - if (first_one) { + (form_ptr->value && *form_ptr->value != '\0' && + !strcmp(form_ptr->value, link_value)))) { + int cdisp_name_startpos; + if (first_one) { if (Boundary) { - sprintf(&query[strlen(query)], + sprintf(&query[strlen(query)], "--%s\r\n", Boundary); } - first_one=FALSE; - } else { + first_one=FALSE; + } else { if (PlainText) { - strcat(query, "\n"); + strcat(query, "\n"); } else if (SemiColon) { - strcat(query, ";"); + strcat(query, ";"); } else if (Boundary) { - sprintf(&query[strlen(query)], + sprintf(&query[strlen(query)], "\r\n--%s\r\n", Boundary); } else { - strcat(query, "&"); + strcat(query, "&"); } } if (PlainText) { - StrAllocCopy(escaped1, (form_ptr->name ? - form_ptr->name : "")); + StrAllocCopy(escaped1, name_used); } else if (Boundary) { StrAllocCopy(escaped1, - "Content-Disposition: form-data; name="); - StrAllocCat(escaped1, (form_ptr->name ? - form_ptr->name : "")); + "Content-Disposition: form-data; name="); + cdisp_name_startpos = strlen(escaped1); + StrAllocCat(escaped1, name_used); if (MultipartContentType) - StrAllocCat(escaped1, MultipartContentType); + StrAllocCat(escaped1, MultipartContentType); StrAllocCat(escaped1, "\r\n\r\n"); } else { - escaped1 = HTEscapeSP(form_ptr->name,URL_XALPHAS); + escaped1 = HTEscapeSP(name_used, URL_XALPHAS); } - /* - * Be sure to actually look at +#ifndef EXP_CHARTRANS + /* + * Be sure to actually look at * the option submit value. - */ - if (form_ptr->cp_submit_value != NULL) { + */ + if (form_ptr->cp_submit_value != NULL) { for (i = 0; form_ptr->cp_submit_value[i]; i++) { - if (form_ptr->cp_submit_value[i] == + if (form_ptr->cp_submit_value[i] == HT_NON_BREAK_SPACE || form_ptr->cp_submit_value[i] == - HT_EM_SPACE) { + HT_EM_SPACE) { if (PlainText) { - form_ptr->cp_submit_value[i] = ' '; + form_ptr->cp_submit_value[i] = ' '; } else { - form_ptr->cp_submit_value[i] = 160; + form_ptr->cp_submit_value[i] = 160; } } else if (form_ptr->cp_submit_value[i] == LY_SOFT_HYPHEN) { @@ -6657,23 +7165,23 @@ PUBLIC void HText_SubmitForm ARGS4( } } if (PlainText || Boundary) { - StrAllocCopy(escaped2, + StrAllocCopy(escaped2, (form_ptr->cp_submit_value ? form_ptr->cp_submit_value : "")); } else { - escaped2 = HTEscapeSP(form_ptr->cp_submit_value, + escaped2 = HTEscapeSP(form_ptr->cp_submit_value, URL_XALPHAS); } - } else { + } else { for (i = 0; form_ptr->value[i]; i++) { - if (form_ptr->value[i] == + if (form_ptr->value[i] == HT_NON_BREAK_SPACE || form_ptr->value[i] == - HT_EM_SPACE) { + HT_EM_SPACE) { if (PlainText) { - form_ptr->value[i] = ' '; + form_ptr->value[i] = ' '; } else { - form_ptr->value[i] = 160; + form_ptr->value[i] = 160; } } else if (form_ptr->value[i] == LY_SOFT_HYPHEN) { @@ -6681,13 +7189,22 @@ PUBLIC void HText_SubmitForm ARGS4( } } if (PlainText || Boundary) { - StrAllocCopy(escaped2, (form_ptr->value ? + StrAllocCopy(escaped2, (form_ptr->value ? form_ptr->value : "")); } else { - escaped2 = HTEscapeSP(form_ptr->value, + escaped2 = HTEscapeSP(form_ptr->value, URL_XALPHAS); } - } + } +#else /* EXP_CHARTRANS */ + if (PlainText || Boundary) { + StrAllocCopy(escaped2, + (val_used ? + val_used : "")); + } else { + escaped2 = HTEscapeSP(val_used, URL_XALPHAS); + } +#endif /* EXP_CHARTRANS */ if (form_ptr->type == F_IMAGE_SUBMIT_TYPE) { /* @@ -6696,14 +7213,14 @@ PUBLIC void HText_SubmitForm ARGS4( * typically returns the image's default. - FM */ if (Boundary) { - escaped1[(strlen(escaped1) - 4)] = '\0'; - sprintf(&query[strlen(query)], + escaped1[cdisp_name_startpos] = '\0'; + sprintf(&query[strlen(query)], "%s.x\r\n\r\n0\r\n--%s\r\n%s.y\r\n\r\n0", escaped1, Boundary, escaped1); } else { - sprintf(&query[strlen(query)], + sprintf(&query[strlen(query)], "%s.x=0%s%s.y=0%s", escaped1, (PlainText ? @@ -6711,7 +7228,7 @@ PUBLIC void HText_SubmitForm ARGS4( ";" : "&")), escaped1, ((PlainText && *escaped1) ? - "\n" : "")); + "\n" : "")); } } else { /* @@ -6722,83 +7239,86 @@ PUBLIC void HText_SubmitForm ARGS4( "%s%s%s%s%s", escaped1, (Boundary ? - "" : "="), + "" : "="), (PlainText ? - "\n" : ""), + "\n" : ""), escaped2, ((PlainText && *escaped2) ? - "\n" : "")); + "\n" : "")); } - FREE(escaped1); - FREE(escaped2); + FREE(escaped1); + FREE(escaped2); } - break; +#ifdef EXP_CHARTRANS + FREE(copied_name_used); + FREE(copied_val_used); +#endif + break; - case F_RADIO_TYPE: - case F_CHECKBOX_TYPE: + case F_RADIO_TYPE: + case F_CHECKBOX_TYPE: /* * Only add if selected. */ if (form_ptr->num_value) { - if (first_one) { + if (first_one) { if (Boundary) { - sprintf(&query[strlen(query)], + sprintf(&query[strlen(query)], "--%s\r\n", Boundary); } - first_one=FALSE; - } else { + first_one=FALSE; + } else { if (PlainText) { - strcat(query, "\n"); + strcat(query, "\n"); } else if (SemiColon) { - strcat(query, ";"); + strcat(query, ";"); } else if (Boundary) { - sprintf(&query[strlen(query)], + sprintf(&query[strlen(query)], "\r\n--%s\r\n", Boundary); } else { - strcat(query, "&"); + strcat(query, "&"); } } if (PlainText) { - StrAllocCopy(escaped1, (form_ptr->name ? - form_ptr->name : "")); + StrAllocCopy(escaped1, name_used); } else if (Boundary) { StrAllocCopy(escaped1, - "Content-Disposition: form-data; name="); + "Content-Disposition: form-data; name="); StrAllocCat(escaped1, - (form_ptr->name ? - form_ptr->name : "")); + name_used); if (MultipartContentType) - StrAllocCat(escaped1, MultipartContentType); + StrAllocCat(escaped1, MultipartContentType); StrAllocCat(escaped1, "\r\n\r\n"); } else { - escaped1 = HTEscapeSP(form_ptr->name, URL_XALPHAS); + escaped1 = HTEscapeSP(name_used, URL_XALPHAS); } +#ifndef EXP_CHARTRANS /* * Be sure to use the submit option value. */ if (form_ptr->cp_submit_value != NULL) { for (i = 0; form_ptr->cp_submit_value[i]; i++) { - if (form_ptr->cp_submit_value[i] == + if (form_ptr->cp_submit_value[i] == HT_NON_BREAK_SPACE || form_ptr->cp_submit_value[i] == - HT_EM_SPACE) { + HT_EM_SPACE) { if (PlainText) { - form_ptr->cp_submit_value[i] = ' '; + form_ptr->cp_submit_value[i] = ' '; } else { - form_ptr->cp_submit_value[i] = 160; + form_ptr->cp_submit_value[i] = 160; } } else if (form_ptr->cp_submit_value[i] == - LY_SOFT_HYPHEN) { + LY_SOFT_HYPHEN) { form_ptr->cp_submit_value[i] = 173; } } if (PlainText || Boundary) { - StrAllocCopy(escaped2, + StrAllocCopy(escaped2, (form_ptr->cp_submit_value ? form_ptr->cp_submit_value : "")); } else { - escaped2 = HTEscapeSP(form_ptr->cp_submit_value, + escaped2 = HTEscapeSP(form_ptr->cp_submit_value, URL_XALPHAS); } } else { @@ -6806,28 +7326,37 @@ PUBLIC void HText_SubmitForm ARGS4( if (form_ptr->value[i] == HT_NON_BREAK_SPACE || form_ptr->value[i] == - HT_EM_SPACE) { + HT_EM_SPACE) { if (PlainText) { - form_ptr->value[i] = ' '; + form_ptr->value[i] = ' '; } else { - form_ptr->value[i] = 160; + form_ptr->value[i] = 160; } } else if (form_ptr->value[i] == - LY_SOFT_HYPHEN) { + LY_SOFT_HYPHEN) { form_ptr->value[i] = 173; } } if (PlainText || Boundary) { - StrAllocCopy(escaped2, (form_ptr->value ? + StrAllocCopy(escaped2, (form_ptr->value ? form_ptr->value : "")); } else { - escaped2 = HTEscapeSP(form_ptr->value, + escaped2 = HTEscapeSP(form_ptr->value, URL_XALPHAS); } } +#else /* EXP_CHARTRANS */ + if (PlainText || Boundary) { + StrAllocCopy(escaped2, + (val_used ? + val_used : "")); + } else { + escaped2 = HTEscapeSP(val_used, URL_XALPHAS); + } +#endif /* EXP_CHARTRANS */ - sprintf(&query[strlen(query)], + sprintf(&query[strlen(query)], "%s%s%s%s%s", escaped1, (Boundary ? @@ -6837,72 +7366,89 @@ PUBLIC void HText_SubmitForm ARGS4( escaped2, ((PlainText && *escaped2) ? "\n" : "")); - FREE(escaped1); - FREE(escaped2); + FREE(escaped1); + FREE(escaped2); } +#ifdef EXP_CHARTRANS + FREE(copied_name_used); + FREE(copied_val_used); +#endif break; - + case F_TEXTAREA_TYPE: +#ifndef EXP_CHARTRANS for (i = 0; form_ptr->value[i]; i++) { if (form_ptr->value[i] == HT_NON_BREAK_SPACE || form_ptr->value[i] == HT_EM_SPACE) { if (PlainText) { - form_ptr->value[i] = ' '; + form_ptr->value[i] = ' '; } else { - form_ptr->value[i] = 160; + form_ptr->value[i] = 160; } } else if (form_ptr->value[i] == LY_SOFT_HYPHEN) { form_ptr->value[i] = 173; } } if (PlainText || Boundary) { - StrAllocCopy(escaped2, (form_ptr->value ? + StrAllocCopy(escaped2, (form_ptr->value ? form_ptr->value : "")); } else { - escaped2 = HTEscapeSP(form_ptr->value, URL_XALPHAS); + escaped2 = HTEscapeSP(form_ptr->value, URL_XALPHAS); } +#else /* EXP_CHARTRANS */ + if (PlainText || Boundary) { + StrAllocCopy(escaped2, + (val_used ? + val_used : "")); + } else { + escaped2 = HTEscapeSP(val_used, URL_XALPHAS); + } +#endif /* EXP_CHARTRANS */ if (!last_textarea_name || strcmp(last_textarea_name, form_ptr->name)) { + textarea_lineno = 1; /* * Names are different so this is the first * textarea or a different one from any before * it. */ - FREE(previous_blanks); - if (first_one) { + if (Boundary) { + StrAllocCopy(previous_blanks, "\r\n"); + } else { + FREE(previous_blanks); + } + if (first_one) { if (Boundary) { - sprintf(&query[strlen(query)], + sprintf(&query[strlen(query)], "--%s\r\n", Boundary); } - first_one=FALSE; - } else { + first_one=FALSE; + } else { if (PlainText) { - strcat(query, "\n"); + strcat(query, "\n"); } else if (SemiColon) { - strcat(query, ";"); + strcat(query, ";"); } else if (Boundary) { - sprintf(&query[strlen(query)], + sprintf(&query[strlen(query)], "\r\n--%s\r\n", Boundary); } else { - strcat(query, "&"); + strcat(query, "&"); } } if (PlainText) { - StrAllocCopy(escaped1, (form_ptr->name ? - form_ptr->name : "")); + StrAllocCopy(escaped1, name_used); } else if (Boundary) { StrAllocCopy(escaped1, - "Content-Disposition: form-data; name="); - StrAllocCat(escaped1, (form_ptr->name ? - form_ptr->name : "")); + "Content-Disposition: form-data; name="); + StrAllocCat(escaped1, name_used); if (MultipartContentType) - StrAllocCat(escaped1, MultipartContentType); + StrAllocCat(escaped1, MultipartContentType); StrAllocCat(escaped1, "\r\n\r\n"); } else { - escaped1 = HTEscapeSP(form_ptr->name, URL_XALPHAS); + escaped1 = HTEscapeSP(name_used, URL_XALPHAS); } - sprintf(&query[strlen(query)], + sprintf(&query[strlen(query)], "%s%s%s%s%s", escaped1, (Boundary ? @@ -6912,7 +7458,7 @@ PUBLIC void HText_SubmitForm ARGS4( escaped2, ((PlainText && *escaped2) ? "\n" : "")); - FREE(escaped1); + FREE(escaped1); last_textarea_name = form_ptr->name; } else { /* @@ -6925,74 +7471,76 @@ PUBLIC void HText_SubmitForm ARGS4( FREE(previous_blanks); } if (PlainText) { - sprintf(&query[strlen(query)], "%s\n", + sprintf(&query[strlen(query)], "%s\n", escaped2); } else if (Boundary) { - sprintf(&query[strlen(query)], "%s\r\n", + sprintf(&query[strlen(query)], "%s\r\n", escaped2); } else { - sprintf(&query[strlen(query)], "%%0a%s", + sprintf(&query[strlen(query)], "%%0a%s", escaped2); } } else { if (PlainText) { - StrAllocCat(previous_blanks, "\n"); + StrAllocCat(previous_blanks, "\n"); } else if (Boundary) { - StrAllocCat(previous_blanks, "\r\n"); + StrAllocCat(previous_blanks, "\r\n"); } else { - StrAllocCat(previous_blanks, "%0a"); + StrAllocCat(previous_blanks, "%0a"); } } } - FREE(escaped2); - break; + FREE(escaped2); +#ifdef EXP_CHARTRANS + FREE(copied_val_used); +#endif + break; - case F_PASSWORD_TYPE: - case F_TEXT_TYPE: + case F_PASSWORD_TYPE: + case F_TEXT_TYPE: case F_OPTION_LIST_TYPE: case F_HIDDEN_TYPE: - if (first_one) { + if (first_one) { if (Boundary) { sprintf(&query[strlen(query)], "--%s\r\n", Boundary); } - first_one=FALSE; - } else { - if (PlainText) { + first_one=FALSE; + } else { + if (PlainText) { strcat(query, "\n"); } else if (SemiColon) { strcat(query, ";"); } else if (Boundary) { sprintf(&query[strlen(query)], - "\r\n--%s\r\n", Boundary); + "\r\n--%s\r\n", Boundary); } else { - strcat(query, "&"); + strcat(query, "&"); } } - + if (PlainText) { - StrAllocCopy(escaped1, (form_ptr->name ? - form_ptr->name : "")); + StrAllocCopy(escaped1, name_used); } else if (Boundary) { StrAllocCopy(escaped1, - "Content-Disposition: form-data; name="); - StrAllocCat(escaped1, (form_ptr->name ? - form_ptr->name : "")); + "Content-Disposition: form-data; name="); + StrAllocCat(escaped1, name_used); if (MultipartContentType) StrAllocCat(escaped1, MultipartContentType); StrAllocCat(escaped1, "\r\n\r\n"); } else { - escaped1 = HTEscapeSP(form_ptr->name, URL_XALPHAS); + escaped1 = HTEscapeSP(name_used, URL_XALPHAS); } +#ifndef EXP_CHARTRANS /* * Be sure to actually look at the option submit value. */ if (form_ptr->cp_submit_value != NULL) { for (i = 0; form_ptr->cp_submit_value[i]; i++) { if (form_ptr->cp_submit_value[i] == - HT_NON_BREAK_SPACE || - form_ptr->cp_submit_value[i] == + HT_NON_BREAK_SPACE || + form_ptr->cp_submit_value[i] == HT_EM_SPACE) { if (PlainText) { form_ptr->cp_submit_value[i] = ' '; @@ -7006,17 +7554,17 @@ PUBLIC void HText_SubmitForm ARGS4( } if (PlainText || Boundary) { StrAllocCopy(escaped2, - (form_ptr->cp_submit_value ? + (form_ptr->cp_submit_value ? form_ptr->cp_submit_value : "")); } else { - escaped2 = HTEscapeSP(form_ptr->cp_submit_value, - URL_XALPHAS); + escaped2 = HTEscapeSP(form_ptr->cp_submit_value, + URL_XALPHAS); } } else { for (i = 0; form_ptr->value[i]; i++) { if (form_ptr->value[i] == - HT_NON_BREAK_SPACE || - form_ptr->value[i] == + HT_NON_BREAK_SPACE || + form_ptr->value[i] == HT_EM_SPACE) { if (PlainText) { form_ptr->value[i] = ' '; @@ -7030,25 +7578,38 @@ PUBLIC void HText_SubmitForm ARGS4( } if (PlainText || Boundary) { StrAllocCopy(escaped2, (form_ptr->value ? - form_ptr->value : "")); + form_ptr->value : "")); } else { escaped2 = HTEscapeSP(form_ptr->value, - URL_XALPHAS); + URL_XALPHAS); } } +#else /* EXP_CHARTRANS */ + if (PlainText || Boundary) { + StrAllocCopy(escaped2, + (val_used ? + val_used : "")); + } else { + escaped2 = HTEscapeSP(val_used, URL_XALPHAS); + } +#endif /* EXP_CHARTRANS */ - sprintf(&query[strlen(query)], - "%s%s%s%s%s", + sprintf(&query[strlen(query)], + "%s%s%s%s%s", escaped1, (Boundary ? - "" : "="), + "" : "="), (PlainText ? "\n" : ""), escaped2, ((PlainText && *escaped2) ? - "\n" : "")); + "\n" : "")); FREE(escaped1); FREE(escaped2); +#ifdef EXP_CHARTRANS + FREE(copied_name_used); + FREE(copied_val_used); +#endif break; } } else if (anchor_ptr->input_field->number > form_number) { @@ -7061,6 +7622,9 @@ PUBLIC void HText_SubmitForm ARGS4( anchor_ptr = anchor_ptr->next; } +#ifdef EXP_CHARTRANS + FREE(copied_name_used); +#endif if (Boundary) { sprintf(&query[strlen(query)], "\r\n--%s--\r\n", Boundary); } @@ -7269,6 +7833,7 @@ PRIVATE void free_all_texts NOARGS FREE(HTFormAction); FREE(HTFormEnctype); FREE(HTFormTitle); + FREE(HTFormAcceptCharset); return; } diff --git a/src/GridText.h b/src/GridText.h index 0e6080e6..a3fe5fa5 100644 --- a/src/GridText.h +++ b/src/GridText.h @@ -138,16 +138,22 @@ extern void HText_beginForm PARAMS(( char * action, char * method, char * enctype, - char * title)); + char * title, + CONST char * accept_cs)); extern void HText_endForm PARAMS((HText *text)); -extern void HText_beginSelect PARAMS((char *name, BOOLEAN multiple, char *len)); +extern void HText_beginSelect PARAMS((char *name, + int name_cs, + BOOLEAN multiple, + char *len)); extern int HText_getOptionNum PARAMS((HText *text)); extern char * HText_setLastOptionValue PARAMS(( HText * text, char * value, char * submit_value, int order, - BOOLEAN checked)); + BOOLEAN checked, + int val_cs, + int submit_val_cs)); extern int HText_beginInput PARAMS(( HText * text, BOOL underline, @@ -165,7 +171,9 @@ extern HTList * search_queries; /* Previous isindex and whereis queries */ extern void HTSearchQueries_free NOPARAMS; extern void HTAddSearchQuery PARAMS((char *query)); -extern void user_message PARAMS((CONST char * message, CONST char * argument)); +extern void user_message PARAMS(( + CONST char * message, + CONST char * argument)); #define _user_message(msg, arg) mustshow = TRUE, user_message(msg, arg) diff --git a/src/HTFWriter.c b/src/HTFWriter.c index 3c3e45aa..719ab8df 100644 --- a/src/HTFWriter.c +++ b/src/HTFWriter.c @@ -77,6 +77,9 @@ struct _HTStream { HTFormat output_format; /* Original pres->rep_out */ HTParentAnchor * anchor; /* Original stream's anchor. */ HTStream * sink; /* Original stream's sink. */ +#ifdef FNAMES_8_3 + int idash; /* remember position to become '.'*/ +#endif }; @@ -220,6 +223,29 @@ PRIVATE void HTFWriter_free ARGS1(HTStream *, me) * for the uncompressed file and invoke * HTLoadFile() to handle it. - FM */ +#ifdef FNAMES_8_3 + /* + * Assuming we have just uncompressed e.g. + * FILE-mpeg.gz -> FILE-mpeg, restore/shorten + * the name to be fit for passing to an external + * viewer, by renaming FILE-mpeg -> FILE.mpe - kw + */ + if (skip_loadfile) { + char *new_path = NULL; + if (me->idash > 1 && path[me->idash] == '-') { + StrAllocCopy(new_path, path); + new_path[me->idash] = '.'; + if (strlen(new_path + me->idash) > 4) + new_path[me->idash + 4] = '\0'; + if (rename(path, new_path) == 0) { + FREE(path); + path = new_path; + } else { + FREE(new_path); + } + } + } +#endif /* FNAMES_8_3 */ StrAllocCopy(addr, "file://localhost"); #ifdef DOSPATH StrAllocCat(addr, "/"); @@ -233,7 +259,7 @@ PRIVATE void HTFWriter_free ARGS1(HTStream *, me) #endif /* DOSPATH */ if (!use_gzread) { StrAllocCopy(me->anchor->FileCache, path); - FREE(me->anchor->content_encoding); + StrAllocCopy(me->anchor->content_encoding, "binary"); } FREE(path); #ifdef EXP_CHARTRANS @@ -560,19 +586,23 @@ SaveAndExecute_tempname: */ *cp = '\0'; if (!strcasecomp(pres->rep->name, "text/html")) { - strcat(fnam, ".html"); + strcat(fnam, HTML_SUFFIX); } else if (!strcasecomp(pres->rep->name, "text/plain")) { strcat(fnam, ".txt"); } else if (!strcasecomp(pres->rep->name, "application/octet-stream")) { strcat(fnam, ".bin"); - } else if ((suffix = HTFileSuffix(pres->rep)) && *suffix == '.') { + } else if ((suffix = HTFileSuffix(pres->rep, anchor->content_encoding)) + && *suffix == '.') { strcat(fnam, suffix); /* * It's not one of the suffixes checked for a * spoof in tempname(), so check it now. - FM */ - if ((fp = fopen(fnam, "r")) != NULL) { + if (strcmp(suffix, HTML_SUFFIX) && + strcmp(suffix, ".txt") && + strcmp(suffix, ".bin") && + (fp = fopen(fnam, "r")) != NULL) { fclose(fp); fp = NULL; goto SaveAndExecute_tempname; @@ -744,19 +774,23 @@ SaveToFile_tempname: */ *cp = '\0'; if (!strcasecomp(pres->rep->name, "text/html")) { - strcat(fnam, ".html"); + strcat(fnam, HTML_SUFFIX); } else if (!strcasecomp(pres->rep->name, "text/plain")) { strcat(fnam, ".txt"); } else if (!strcasecomp(pres->rep->name, "application/octet-stream")) { strcat(fnam, ".bin"); - } else if ((suffix = HTFileSuffix(pres->rep)) && *suffix == '.') { + } else if ((suffix = HTFileSuffix(pres->rep, + anchor->content_encoding)) && *suffix == '.') { strcat(fnam, suffix); /* * It's not one of the suffixes checked for a * spoof in tempname(), so check it now. - FM */ - if ((fp = fopen(fnam, "r")) != NULL) { + if (strcmp(suffix, HTML_SUFFIX) && + strcmp(suffix, ".txt") && + strcmp(suffix, ".bin") && + (fp = fopen(fnam, "r")) != NULL) { fclose(fp); fp = NULL; goto SaveToFile_tempname; @@ -885,6 +919,7 @@ PUBLIC HTStream* HTCompressed ARGS3( char *uncompress_mask = NULL; char *compress_suffix = ""; char *cp; + CONST char *middle; FILE *fp = NULL; /* @@ -916,7 +951,7 @@ PUBLIC HTStream* HTCompressed ARGS3( * It's compressed with the modern gzip. - FM */ StrAllocCopy(uncompress_mask, GZIP_PATH); - StrAllocCat(uncompress_mask, " -d %s"); + StrAllocCat(uncompress_mask, " -d --no-name %s"); compress_suffix = "gz"; } else if (!strcasecomp(anchor->content_encoding, "x-compress") || !strcasecomp(anchor->content_encoding, "compress")) { @@ -985,42 +1020,45 @@ PUBLIC HTStream* HTCompressed ARGS3( Compressed_tempname: tempname(fnam, NEW_FILE); if ((cp = strrchr(fnam, '.')) != NULL) { - *cp = '\0'; + middle = NULL; if (!strcasecomp(anchor->content_type, "text/html")) { -#ifdef VMS - strcat(fnam, ".html-"); -#else - strcat(fnam, ".html."); -#endif /* VMS */ + middle = HTML_SUFFIX; + middle++; /* point to 'h' of .htm(l) - kw */ } else if (!strcasecomp(anchor->content_type, "text/plain")) { -#ifdef VMS - strcat(fnam, ".txt-"); -#else - strcat(fnam, ".txt."); -#endif /* VMS */ + middle = "txt"; } else if (!strcasecomp(anchor->content_type, "application/octet-stream")) { -#ifdef VMS - strcat(fnam, ".bin-"); -#else - strcat(fnam, ".bin."); -#endif /* VMS */ + middle = "bin"; } else if ((suffix = - HTFileSuffix(HTAtom_for(anchor->content_type))) && + HTFileSuffix(HTAtom_for(anchor->content_type), NULL)) && *suffix == '.') { - strcat(fnam, suffix); +#if defined(VMS) || defined(FNAMES_8_3) + if (strchr(suffix + 1, '.') == NULL) +#endif + middle = suffix + 1; + } + if (middle) { + *cp = '\0'; +#ifdef FNAMES_8_3 + me->idash = strlen(fnam); /* remember position of '-' - kw */ + strcat(fnam, "-"); /* NAME-htm, NAME-txt, etc. - hack for DOS */ +#else + strcat(fnam, "."); /* NAME.html, NAME-txt etc. */ +#endif /* FNAMES_8_3 */ + strcat(fnam, middle); #ifdef VMS - strcat(fnam, "-"); + strcat(fnam, "-"); /* NAME.html-gz, NAME.txt-gz, NAME.txt-Z etc.*/ #else - strcat(fnam, "."); + strcat(fnam, "."); /* NAME-htm.gz (DOS), NAME.html.gz (UNIX)etc.*/ #endif /* VMS */ } else { - strcat(fnam, "."); + *(cp + 1) = '\0'; } } else { strcat(fnam, "."); } strcat(fnam, compress_suffix); + /* * It's not one of the suffixes checked for a * spoof in tempname(), so check it now. - FM diff --git a/src/HTForms.h b/src/HTForms.h index 9f2503b0..7e384cd5 100644 --- a/src/HTForms.h +++ b/src/HTForms.h @@ -34,6 +34,9 @@ typedef struct _InputFieldData { CONST char *type; char *value; CONST char *width; + int name_cs; /* charset handle for name */ + int value_cs; /* charset handle for value */ + CONST char *accept_cs; } InputFieldData; /* The OptionType structure is for a linked list of option entries @@ -41,6 +44,7 @@ typedef struct _InputFieldData { typedef struct _OptionType { char * name; /* the name of the entry */ char * cp_submit_value; /* the value to submit */ + int value_cs; /* charset value is in */ struct _OptionType * next; /* the next entry */ } OptionType; @@ -72,6 +76,8 @@ typedef struct _FormInfo { char * orig_submit_value; /* original submit value */ int size_l; /* The length of the option list */ int disabled; /* If YES, can't change values */ + int name_cs; + int value_cs; } FormInfo; #define HYPERTEXT_ANCHOR 1 diff --git a/src/HTInit.c b/src/HTInit.c index 9de4f24a..532f9c5b 100644 --- a/src/HTInit.c +++ b/src/HTInit.c @@ -699,8 +699,8 @@ PUBLIC void HTFileInit NOARGS HTSetSuffix("_gz", "application/GNU Compressed", "binary", 1.0); HTSetSuffix(".gz", "application/GNU Compressed", "binary", 1.0); - HTSetSuffix(".tar.gz", "application/GNU Compr. Tar", "binary", 1.0); - HTSetSuffix(".tgz", "application/GNU Compr. Tar", "binary", 1.0); + HTSetSuffix5(".tar.gz", "application/x-tar", "binary", "GNU Compr. Tar", 1.0); + HTSetSuffix5(".tgz", "application/x-tar", "gzip", "GNU Compr. Tar", 1.0); HTSetSuffix(".src", "application/x-WAIS-source", "8bit", 1.0); HTSetSuffix(".wsrc", "application/x-WAIS-source", "8bit", 1.0); @@ -714,9 +714,9 @@ PUBLIC void HTFileInit NOARGS HTSetSuffix(".o", "application/x-Prog. Object", "binary", 1.0); HTSetSuffix(".a", "application/x-Prog. Library", "binary", 1.0); - HTSetSuffix(".oda", "application/ODA", "binary", 1.0); + HTSetSuffix5(".oda", "application/oda", "binary", "ODA", 1.0); - HTSetSuffix(".pdf", "application/PDF", "binary", 1.0); + HTSetSuffix5(".pdf", "application/pdf", "binary", "PDF", 1.0); HTSetSuffix(".eps", "application/Postscript", "8bit", 1.0); HTSetSuffix(".ai", "application/Postscript", "8bit", 1.0); @@ -792,7 +792,7 @@ PUBLIC void HTFileInit NOARGS HTSetSuffix(".sv4cpio", "application/x-sv4cpio", "binary", 1.0); HTSetSuffix(".sv4crc", "application/x-sv4crc", "binary", 1.0); - HTSetSuffix(".tar", "application/x-Tar File", "binary", 1.0); + HTSetSuffix5(".tar", "application/x-tar", "binary", "Tar File", 1.0); HTSetSuffix(".ustar", "application/x-ustar", "binary", 1.0); HTSetSuffix(".snd", "audio/basic", "binary", 1.0); @@ -849,12 +849,12 @@ PUBLIC void HTFileInit NOARGS HTSetSuffix(".text", "text/plain", "8bit", 1.0); HTSetSuffix(".txt", "text/plain", "8bit", 1.0); - HTSetSuffix(".htm", "text/html", "8bit", 1.0); HTSetSuffix(".html3", "text/html", "8bit", 1.0); HTSetSuffix(".ht3", "text/html", "8bit", 1.0); HTSetSuffix(".phtml", "text/html", "8bit", 1.0); HTSetSuffix(".shtml", "text/html", "8bit", 1.0); HTSetSuffix(".htmlx", "text/html", "8bit", 1.0); + HTSetSuffix(".htm", "text/html", "8bit", 1.0); HTSetSuffix(".html", "text/html", "8bit", 1.0); /* These should override the default extensions as necessary. */ diff --git a/src/HTML.c b/src/HTML.c index eb09a0f9..f3040bb4 100644 --- a/src/HTML.c +++ b/src/HTML.c @@ -520,16 +520,52 @@ char prevailing_class[TEMPSTRINGSIZE]; int hcode; #endif +#ifdef EXP_CHARTRANS +/* #define ATTR_CS_IN (me->T.output_utf8 ? me->UCLYhndl : 0) */ +#define ATTR_CS_IN me->tag_charset + +#define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \ + LYUCFullyTranslateString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML) + +#define TRANSLATE_AND_UNESCAPE_ENTITIES4(s, cs_to, p, h) \ + LYUCFullyTranslateString(s, ATTR_CS_IN, cs_to, YES, p, h, st_HTML) + +#define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \ + LYUCFullyTranslateString(s, cs_from, cs_to, YES, p, h, st_HTML) + +#define TRANSLATE_AND_UNESCAPE_ENTITIES6(s,cs_from,cs_to,spcls,p,h) \ + LYUCFullyTranslateString(s, cs_from, cs_to, spcls, p, h, st_HTML) + +/* + * Strings from attributes which should be converted to some kind + * of "standard" representation (character encoding), was Latin-1, + * esp. URLs (incl. #fragments) and HTML NAME and ID stuff. + */ +#define TRANSLATE_AND_UNESCAPE_TO_STD(s) \ + LYUCFullyTranslateString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_URL) +#define UNESCAPE_FIELDNAME_TO_STD(s) \ + LYUCFullyTranslateString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_HTML) + +#else /* !EXP_CHARTRANS */ + +#define ATTR_CS_IN 0 + +#define TRANSLATE_AND_UNESCAPE_TO_STD(s) \ + LYUnEscapeToLatinOne(s, TRUE) /* for now */ +#define UNESCAPE_FIELDNAME_TO_STD(s) ; /* no-op */ +#endif /* !EXP_CHARTRANS */ + #define CHECK_ID(code) LYCheckForID(me, present, value, (int)code) /* Start Element ** ------------- */ -PRIVATE void HTML_start_element ARGS5( +PRIVATE void HTML_start_element ARGS6( HTStructured *, me, int, element_number, CONST BOOL*, present, CONST char **, value, + int, tag_charset, char **, include) { char *alt_string = NULL; @@ -537,6 +573,7 @@ PRIVATE void HTML_start_element ARGS5( char *href = NULL; char *map_href = NULL; char *title = NULL; + char *I_value = NULL, *I_name = NULL; char *temp = NULL; #ifdef EXP_CHARTRANS int dest_char_set = -1; @@ -557,6 +594,13 @@ PRIVATE void HTML_start_element ARGS5( } } +#ifdef EXP_CHARTRANS + if (tag_charset < 0) + me->tag_charset = me->UCLYhndl; + else + me->tag_charset = tag_charset; +#endif + /* this should be done differently */ #if defined(USE_COLOR_STYLE) strcat (Style_className, ";"); @@ -805,6 +849,9 @@ PRIVATE void HTML_start_element ARGS5( value[HTML_LINK_TITLE] && *value[HTML_LINK_TITLE] != '\0') { StrAllocCopy(title, value[HTML_LINK_TITLE]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&title, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&title); /* @@ -812,6 +859,7 @@ PRIVATE void HTML_start_element ARGS5( * or decimal escaping. - FM */ LYUnEscapeEntities(title, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(title); LYTrimTail(title); if (*title != '\0') @@ -962,12 +1010,16 @@ PRIVATE void HTML_start_element ARGS5( if (present && present[HTML_LINK_TITLE] && value[HTML_LINK_TITLE] && *value[HTML_LINK_TITLE] != '\0') { StrAllocCopy(title, value[HTML_LINK_TITLE]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&title, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&title); /* * Convert any HTML entities or decimal escaping. - FM */ LYUnEscapeEntities(title, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(title); LYTrimTail(title); } @@ -1003,6 +1055,19 @@ PRIVATE void HTML_start_element ARGS5( if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, title); dest = NULL; +#ifdef EXP_CHARTRANS + if (present[HTML_A_CHARSET] && + value[HTML_A_CHARSET] && *value[HTML_A_CHARSET] != '\0') { + dest_char_set = UCGetLYhndl_byMIME(value[HTML_A_CHARSET]); + if (dest_char_set < 0) + dest_char_set = UCLYhndl_for_unrec; + } + if (dest && dest_char_set >= 0) + HTAnchor_setUCInfoStage(dest, dest_char_set, + UCT_STAGE_PARSER, + UCT_SETBY_LINK); + dest_char_set = -1; +#endif /* EXP_CHARTRANS */ } UPDATE_STYLE; if (!HText_hasToolbar(me->text) && @@ -1107,12 +1172,16 @@ PRIVATE void HTML_start_element ARGS5( present[HTML_ISINDEX_PROMPT] && value[HTML_ISINDEX_PROMPT] && *value[HTML_ISINDEX_PROMPT]) { StrAllocCopy(temp, value[HTML_ISINDEX_PROMPT]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&temp, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&temp); /* * Convert any HTML entities or decimal escaping. - FM */ LYUnEscapeEntities(temp, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(temp); LYTrimTail(temp); if (*temp != '\0') { @@ -1174,12 +1243,16 @@ PRIVATE void HTML_start_element ARGS5( if (present && present[HTML_FRAME_NAME] && value[HTML_FRAME_NAME] && *value[HTML_FRAME_NAME]) { StrAllocCopy(id_string, value[HTML_FRAME_NAME]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&id_string, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&id_string); /* * Convert any HTML entities or decimal escaping. - FM */ LYUnEscapeEntities(id_string, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(id_string); LYTrimTail(id_string); } @@ -1255,12 +1328,16 @@ PRIVATE void HTML_start_element ARGS5( if (present && present[HTML_IFRAME_NAME] && value[HTML_IFRAME_NAME] && *value[HTML_IFRAME_NAME]) { StrAllocCopy(id_string, value[HTML_IFRAME_NAME]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&id_string, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&id_string); /* * Convert any HTML entities or decimal escaping. - FM */ LYUnEscapeEntities(id_string, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(id_string); LYTrimTail(id_string); } @@ -1733,7 +1810,7 @@ PRIVATE void HTML_start_element ARGS5( * TO has priority over INDENT if both are present. - FM */ StrAllocCopy(temp, value[HTML_TAB_TO]); - LYUnEscapeToLatinOne(&temp, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&temp); if (*temp) { target = HText_getTabIDColumn(me->text, temp); } @@ -1772,7 +1849,7 @@ PRIVATE void HTML_start_element ARGS5( if (present[HTML_TAB_ID] && value[HTML_TAB_ID] && *value[HTML_TAB_ID]) { StrAllocCopy(temp, value[HTML_TAB_ID]); - LYUnEscapeToLatinOne(&temp, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&temp); if (*temp) HText_setTabID(me->text, temp); FREE(temp); @@ -2443,7 +2520,7 @@ PRIVATE void HTML_start_element ARGS5( StrAllocCopy(id_string, value[HTML_A_NAME]); } if (id_string) { - LYUnEscapeToLatinOne(&id_string, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&id_string); if (*id_string == '\0') { FREE(id_string); } @@ -2543,12 +2620,16 @@ PRIVATE void HTML_start_element ARGS5( if (present[HTML_A_TITLE] && value[HTML_A_TITLE] && *value[HTML_A_TITLE] != '\0') { StrAllocCopy(title, value[HTML_A_TITLE]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&title, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&title); /* * Convert any HTML entities or decimal escaping. - FM */ LYUnEscapeEntities(title, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(title); LYTrimTail(title); if (*title == '\0') { @@ -2580,7 +2661,7 @@ PRIVATE void HTML_start_element ARGS5( if (dest && dest_char_set >= 0) HTAnchor_setUCInfoStage(dest, dest_char_set, UCT_STAGE_PARSER, - UCT_SETBY_DEFAULT); + UCT_SETBY_LINK); dest_char_set = -1; #endif /* EXP_CHARTRANS */ dest = NULL; @@ -2727,12 +2808,16 @@ PRIVATE void HTML_start_element ARGS5( if (present && present[HTML_IMG_TITLE] && value[HTML_IMG_TITLE] && *value[HTML_IMG_TITLE]) { StrAllocCopy(title, value[HTML_IMG_TITLE]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&title, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&title); /* * Convert any HTML entities or decimal escaping. - FM */ LYUnEscapeEntities(title, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(title); LYTrimTail(title); if (*title == '\0') { @@ -2751,6 +2836,10 @@ PRIVATE void HTML_start_element ARGS5( ((clickable_images || map_href) && *value[HTML_IMG_ALT] != '\0'))) { StrAllocCopy(alt_string, value[HTML_IMG_ALT]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&alt_string, + me->UsePlainSpace, me->HiddenValue); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&alt_string); /* @@ -2758,6 +2847,7 @@ PRIVATE void HTML_start_element ARGS5( */ LYUnEscapeEntities(alt_string, me->UsePlainSpace, me->HiddenValue); +#endif /* !EXP_CHARTRANS */ /* * If it's all spaces and we are making SRC or * USEMAP links, treat it as zero-length. - FM @@ -2825,7 +2915,7 @@ PRIVATE void HTML_start_element ARGS5( if (present && present[HTML_IMG_ID] && value[HTML_IMG_ID] && *value[HTML_IMG_ID]) { StrAllocCopy(id_string, value[HTML_IMG_ID]); - LYUnEscapeToLatinOne(&id_string, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&id_string); if (*id_string == '\0') { FREE(id_string); } @@ -3119,7 +3209,7 @@ PRIVATE void HTML_start_element ARGS5( StrAllocCopy(id_string, value[HTML_MAP_ID]); } if (id_string) { - LYUnEscapeToLatinOne(&id_string, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&id_string); if (*id_string == '\0') { FREE(id_string); } @@ -3146,12 +3236,16 @@ PRIVATE void HTML_start_element ARGS5( if (present && present[HTML_MAP_TITLE] && value[HTML_MAP_TITLE] && *value[HTML_MAP_TITLE] != '\0') { StrAllocCopy(title, value[HTML_MAP_TITLE]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&title, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&title); /* * Convert any HTML entities or decimal escaping. - FM */ LYUnEscapeEntities(title, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(title); LYTrimTail(title); if (*title == '\0') { @@ -3225,6 +3319,10 @@ PRIVATE void HTML_start_element ARGS5( StrAllocCopy(alt_string, value[HTML_AREA_TITLE]); } if (alt_string != NULL) { +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&alt_string, + me->UsePlainSpace, me->HiddenValue); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&alt_string); /* @@ -3232,6 +3330,7 @@ PRIVATE void HTML_start_element ARGS5( */ LYUnEscapeEntities(alt_string, me->UsePlainSpace, me->HiddenValue); +#endif /* !EXP_CHARTRANS */ /* * Make sure it's not just space(s). - FM */ @@ -3365,7 +3464,7 @@ PRIVATE void HTML_start_element ARGS5( if (present[HTML_OBJECT_USEMAP] && value[HTML_OBJECT_USEMAP] && *value[HTML_OBJECT_USEMAP]) { StrAllocCopy(me->object_usemap, value[HTML_OBJECT_USEMAP]); - LYUnEscapeToLatinOne(&me->object_usemap, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&me->object_usemap); if (*me->object_usemap == '\0') { FREE(me->object_usemap); } @@ -3373,7 +3472,7 @@ PRIVATE void HTML_start_element ARGS5( if (present[HTML_OBJECT_ID] && value[HTML_OBJECT_ID] && *value[HTML_OBJECT_ID]) { StrAllocCopy(me->object_id, value[HTML_OBJECT_ID]); - LYUnEscapeToLatinOne(&me->object_id, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&me->object_id); if (*me->object_id == '\0') { FREE(me->object_id); } @@ -3381,9 +3480,13 @@ PRIVATE void HTML_start_element ARGS5( if (present[HTML_OBJECT_TITLE] && value[HTML_OBJECT_TITLE] && *value[HTML_OBJECT_TITLE]) { StrAllocCopy(me->object_title, value[HTML_OBJECT_TITLE]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&me->object_title, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&me->object_title); LYUnEscapeEntities(me->object_title, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(me->object_title); LYTrimTail(me->object_title); if (me->object_title == '\0') { @@ -3393,7 +3496,7 @@ PRIVATE void HTML_start_element ARGS5( if (present[HTML_OBJECT_DATA] && value[HTML_OBJECT_DATA] && *value[HTML_OBJECT_DATA]) { StrAllocCopy(me->object_data, value[HTML_OBJECT_DATA]); - LYUnEscapeToLatinOne(&me->object_data, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&me->object_data); if (*me->object_data == '\0') { FREE(me->object_data); } @@ -3401,9 +3504,13 @@ PRIVATE void HTML_start_element ARGS5( if (present[HTML_OBJECT_TYPE] && value[HTML_OBJECT_TYPE] && *value[HTML_OBJECT_TYPE]) { StrAllocCopy(me->object_type, value[HTML_OBJECT_TYPE]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&me->object_type, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&me->object_type); LYUnEscapeEntities(me->object_type, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(me->object_type); LYTrimTail(me->object_type); if (me->object_type == '\0') { @@ -3415,9 +3522,13 @@ PRIVATE void HTML_start_element ARGS5( *value[HTML_OBJECT_CLASSID]) { StrAllocCopy(me->object_classid, value[HTML_OBJECT_CLASSID]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&me->object_classid, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&me->object_classid); LYUnEscapeEntities(me->object_classid, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(me->object_classid); LYTrimTail(me->object_classid); if (me->object_classid == '\0') { @@ -3429,7 +3540,7 @@ PRIVATE void HTML_start_element ARGS5( *value[HTML_OBJECT_CODEBASE]) { StrAllocCopy(me->object_codebase, value[HTML_OBJECT_CODEBASE]); - LYUnEscapeToLatinOne(&me->object_codebase, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&me->object_codebase); if (*me->object_codebase == '\0') { FREE(me->object_codebase); } @@ -3439,9 +3550,13 @@ PRIVATE void HTML_start_element ARGS5( *value[HTML_OBJECT_CODETYPE]) { StrAllocCopy(me->object_codetype, value[HTML_OBJECT_CODETYPE]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&me->object_codetype, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&me->object_codetype); LYUnEscapeEntities(me->object_codetype, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(me->object_codetype); LYTrimTail(me->object_codetype); if (me->object_codetype == '\0') { @@ -3451,9 +3566,13 @@ PRIVATE void HTML_start_element ARGS5( if (present[HTML_OBJECT_NAME] && value[HTML_OBJECT_NAME] && *value[HTML_OBJECT_NAME]) { StrAllocCopy(me->object_name, value[HTML_OBJECT_NAME]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&me->object_name, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&me->object_name); LYUnEscapeEntities(me->object_name, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(me->object_name); LYTrimTail(me->object_name); if (me->object_name == '\0') { @@ -3544,7 +3663,7 @@ PRIVATE void HTML_start_element ARGS5( StrAllocCopy(id_string, value[HTML_APPLET_NAME]); } if (id_string) { - LYUnEscapeToLatinOne(&id_string, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&id_string); LYHandleID(me, id_string); FREE(id_string); } @@ -3558,6 +3677,10 @@ PRIVATE void HTML_start_element ARGS5( (!clickable_images || (clickable_images && *value[HTML_APPLET_ALT] != '\0'))) { StrAllocCopy(alt_string, value[HTML_APPLET_ALT]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&alt_string, + me->UsePlainSpace, me->HiddenValue); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&alt_string); /* @@ -3565,6 +3688,7 @@ PRIVATE void HTML_start_element ARGS5( */ LYUnEscapeEntities(alt_string, me->UsePlainSpace, me->HiddenValue); +#endif /* !EXP_CHARTRANS */ /* * If it's all spaces and we are making sources links, * treat it as zero-length. - FM @@ -3599,7 +3723,7 @@ PRIVATE void HTML_start_element ARGS5( value[HTML_APPLET_CODEBASE] && *value[HTML_APPLET_CODEBASE]) { StrAllocCopy(base, value[HTML_APPLET_CODEBASE]); collapse_spaces(base); - LYUnEscapeToLatinOne(&base, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&base); /* * Force it to be a directory. - FM */ @@ -3767,7 +3891,7 @@ PRIVATE void HTML_start_element ARGS5( StrAllocCopy(id_string, value[HTML_EMBED_NAME]); } if (id_string) { - LYUnEscapeToLatinOne(&id_string, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&id_string); LYHandleID(me, id_string); FREE(id_string); } @@ -3782,6 +3906,10 @@ PRIVATE void HTML_start_element ARGS5( (!clickable_images || (clickable_images && *value[HTML_EMBED_ALT] != '\0'))) { StrAllocCopy(alt_string, value[HTML_EMBED_ALT]); +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&alt_string, + me->UsePlainSpace, me->HiddenValue); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&alt_string); /* @@ -3789,6 +3917,7 @@ PRIVATE void HTML_start_element ARGS5( */ LYUnEscapeEntities(alt_string, me->UsePlainSpace, me->HiddenValue); +#endif /* !EXP_CHARTRANS */ /* * If it's all spaces and we are making sources links, * treat it as zero-length. - FM @@ -3948,14 +4077,16 @@ PRIVATE void HTML_start_element ARGS5( char * action = NULL; char * method = NULL; char * enctype = NULL; + CONST char * accept_cs = NULL; + HTChildAnchor * source; HTAnchor *link_dest; if (!me->text) UPDATE_STYLE; /* - * FORM was declared SGML_EMPTY in HTMLDTD.c, and - * SGML_character() in SGML.c checks for a FORM end + * FORM may have been declared SGML_EMPTY in HTMLDTD.c, and + * SGML_character() in SGML.c may check for a FORM end * tag to call HTML_end_element() directly (with a * check in that to bypass decrementing of the HTML * parser's stack), so if we have an open FORM, close @@ -3975,6 +4106,10 @@ PRIVATE void HTML_start_element ARGS5( */ me->inFORM = TRUE; + if (present && present[HTML_FORM_ACCEPT_CHARSET]) { + accept_cs = value[HTML_FORM_ACCEPT_CHARSET] ? + value[HTML_FORM_ACCEPT_CHARSET] : "UNKNOWN"; + } if (present && present[HTML_FORM_ACTION] && value[HTML_FORM_ACTION]) { /* @@ -4062,12 +4197,16 @@ PRIVATE void HTML_start_element ARGS5( StrAllocCopy(title, value[HTML_FORM_SUBJECT]); } if (title != NULL && *title != '\0') { +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&title, TRUE, FALSE); +#else /* !EXP_CHARTRANS */ if (current_char_set) LYExpandString(&title); /* * Convert any HTML entities or decimal escaping. - FM */ LYUnEscapeEntities(title, TRUE, FALSE); +#endif /* !EXP_CHARTRANS */ LYTrimHead(title); LYTrimTail(title); if (*title == '\0') { @@ -4076,12 +4215,13 @@ PRIVATE void HTML_start_element ARGS5( } } - HText_beginForm(action, method, enctype, title); + HText_beginForm(action, method, enctype, title, accept_cs); FREE(action); FREE(method); FREE(enctype); FREE(title); + FREE(title); } CHECK_ID(HTML_FORM_ID); break; @@ -4121,6 +4261,9 @@ PRIVATE void HTML_start_element ARGS5( I.lang=NULL; I.max=NULL; I.maxlength=NULL; I.md=NULL; I.min=NULL; I.name=NULL; I.size=NULL; I.src=NULL; I.type=NULL; I.value=NULL; I.width=NULL; + I.accept_cs = NULL; + I.name_cs = ATTR_CS_IN; + I.value_cs = ATTR_CS_IN; UPDATE_STYLE; if ((present && present[HTML_BUTTON_TYPE] && @@ -4144,7 +4287,7 @@ PRIVATE void HTML_start_element ARGS5( */ if (!me->inFORM) { if (TRACE) { - fprintf(stderr, "HTML: BUTTON tag not within FORM tag\n"); + fprintf(stderr, "HTML: ***** BUTTON tag not within FORM element *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -4177,10 +4320,16 @@ PRIVATE void HTML_start_element ARGS5( } HTML_put_character(me, '('); - if (present && present[HTML_BUTTON_NAME] && value[HTML_BUTTON_NAME]) - I.name = value[HTML_BUTTON_NAME]; - else + if (!(present && present[HTML_BUTTON_NAME] && + value[HTML_BUTTON_NAME])) { I.name = ""; + } else if (strchr(value[HTML_BUTTON_NAME], '&') == NULL) { + I.name = value[HTML_BUTTON_NAME]; + } else { + StrAllocCopy(I_name, value[HTML_BUTTON_NAME]); + UNESCAPE_FIELDNAME_TO_STD(&I_name); + I.name = I_name; + } if (present && present[HTML_BUTTON_VALUE] && value[HTML_BUTTON_VALUE] && *value[HTML_BUTTON_VALUE]) { @@ -4189,13 +4338,19 @@ PRIVATE void HTML_start_element ARGS5( */ int len; + StrAllocCopy(I_value, value[HTML_BUTTON_VALUE]); me->UsePlainSpace = TRUE; +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES(&I_value, TRUE, me->HiddenValue); +#else /* !EXP_CHARTRANS */ if (current_char_set) { - LYExpandString((char **)&value[HTML_BUTTON_VALUE]); + LYExpandString(&I_value); } - LYUnEscapeEntities((char *)value[HTML_BUTTON_VALUE], + LYUnEscapeEntities(I_value, me->UsePlainSpace, me->HiddenValue); - I.value = (char *)value[HTML_BUTTON_VALUE]; +#endif /* EXP_CHARTRANS */ + me->UsePlainSpace = FALSE; + I.value = I_value; /* * Convert any newlines or tabs to spaces, * and trim any lead or trailing spaces. - FM @@ -4206,7 +4361,6 @@ PRIVATE void HTML_start_element ARGS5( len = strlen(I.value) - 1; while (len > 0 && I.value[len] == ' ') I.value[len--] = '\0'; - me->UsePlainSpace = FALSE; } if (present && present[HTML_BUTTON_DISABLED]) @@ -4284,6 +4438,8 @@ PRIVATE void HTML_start_element ARGS5( HTML_put_character(me, ' '); me->in_word = NO; } + FREE(I_value); + FREE(I_name); } break; @@ -4301,6 +4457,9 @@ PRIVATE void HTML_start_element ARGS5( I.lang=NULL; I.max=NULL; I.maxlength=NULL; I.md=NULL; I.min=NULL; I.name=NULL; I.size=NULL; I.src=NULL; I.type=NULL; I.value=NULL; I.width=NULL; + I.accept_cs = NULL; + I.name_cs = ATTR_CS_IN; + I.value_cs = ATTR_CS_IN; UPDATE_STYLE; @@ -4379,7 +4538,7 @@ PRIVATE void HTML_start_element ARGS5( */ if (!me->inFORM) { if (TRACE) { - fprintf(stderr, "HTML: INPUT tag not within FORM tag\n"); + fprintf(stderr, "HTML: ***** INPUT tag not within FORM element *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -4398,7 +4557,7 @@ PRIVATE void HTML_start_element ARGS5( */ if (me->inTEXTAREA) { if (TRACE) { - fprintf(stderr, "HTML: Missing TEXTAREA end tag.\n"); + fprintf(stderr, "HTML: ***** Missing TEXTAREA end tag. *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -4409,10 +4568,16 @@ PRIVATE void HTML_start_element ARGS5( /* * Handle the INPUT as for a FORM. - FM */ - if (present && present[HTML_INPUT_NAME] && value[HTML_INPUT_NAME]) - I.name = value[HTML_INPUT_NAME]; - else + if (!(present && present[HTML_INPUT_NAME] && + value[HTML_INPUT_NAME])) { I.name = ""; + } else if (strchr(value[HTML_INPUT_NAME], '&') == NULL) { + I.name = value[HTML_INPUT_NAME]; + } else { + StrAllocCopy(I_name, value[HTML_INPUT_NAME]); + UNESCAPE_FIELDNAME_TO_STD(&I_name); + I.name = I_name; + } if ((present && present[HTML_INPUT_ALT] && value[HTML_INPUT_ALT] && *value[HTML_INPUT_ALT] && I.type && !strcasecomp(I.type, "image")) && @@ -4422,7 +4587,7 @@ PRIVATE void HTML_start_element ARGS5( * This is a TYPE="image" using an ALT rather than * VALUE attribute to indicate the link string for * text clients or GUIs with image loading off, so - * set the flag to use that at if it were a VALUE + * set the flag to use that as if it were a VALUE * attribute. - FM */ UseALTasVALUE = TRUE; @@ -4508,17 +4673,27 @@ PRIVATE void HTML_start_element ARGS5( !strcasecomp(I.type, "image") || !strcasecomp(I.type, "reset")) me->UsePlainSpace = TRUE; + StrAllocCopy(I_value, + ((UseALTasVALUE == TRUE) ? + value[HTML_INPUT_ALT] : + value[HTML_INPUT_VALUE])); +#ifdef EXP_CHARTRANS + if (me->UsePlainSpace && !me->HiddenValue) { + I.value_cs = current_char_set; + } + TRANSLATE_AND_UNESCAPE_ENTITIES6( + &I_value, + ATTR_CS_IN, + I.value_cs, + (me->UsePlainSpace && !me->HiddenValue), + me->UsePlainSpace, me->HiddenValue); +#else /* !EXP_CHARTRANS */ if (current_char_set && me->UsePlainSpace) - LYExpandString(((UseALTasVALUE == TRUE) ? - (char **)&value[HTML_INPUT_ALT] : - (char **)&value[HTML_INPUT_VALUE])); - LYUnEscapeEntities(((UseALTasVALUE == TRUE) ? - (char *)value[HTML_INPUT_ALT] : - (char *)value[HTML_INPUT_VALUE]), + LYExpandString(&I_value); + LYUnEscapeEntities(I_value, me->UsePlainSpace, me->HiddenValue); - I.value = ((UseALTasVALUE == TRUE) ? - (char *)value[HTML_INPUT_ALT] : - (char *)value[HTML_INPUT_VALUE]); +#endif /* !EXP_CHARTRANS */ + I.value = I_value; if (me->UsePlainSpace == TRUE) { /* * Convert any newlines or tabs to spaces, @@ -4549,7 +4724,8 @@ PRIVATE void HTML_start_element ARGS5( * "Submit" value. If we didn't put up a link, then * HText_beginInput() will use "[IMAGE]-Submit". - FM */ - I.value = "Submit"; + StrAllocCopy(I_value, "Submit"); + I.value = I_value; } if (present && present[HTML_INPUT_CHECKED]) I.checked = YES; @@ -4562,6 +4738,10 @@ PRIVATE void HTML_start_element ARGS5( if (present && present[HTML_INPUT_DISABLED]) I.disabled = YES; + if (present && present[HTML_INPUT_ACCEPT_CHARSET]) { /* Not yet used. */ + I.accept_cs = value[HTML_INPUT_ACCEPT_CHARSET] ? + value[HTML_INPUT_ACCEPT_CHARSET] : "UNKNOWN"; + } if (present && present[HTML_INPUT_ALIGN] && /* Not yet used. */ value[HTML_INPUT_ALIGN] && *value[HTML_INPUT_ALIGN]) I.align = value[HTML_INPUT_ALIGN]; @@ -4712,6 +4892,8 @@ PRIVATE void HTML_start_element ARGS5( } } HText_setIgnoreExcess(me->text, FALSE); + FREE(I_value); + FREE(I_name); } break; @@ -4722,7 +4904,7 @@ PRIVATE void HTML_start_element ARGS5( if (!me->inFORM) { if (TRACE) { fprintf(stderr, - "HTML: TEXTAREA start tag not within FORM tag\n"); + "HTML: ***** TEXTAREA start tag not within FORM element *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -4744,10 +4926,23 @@ PRIVATE void HTML_start_element ARGS5( */ HTChunkClear(&me->textarea); if (present && present[HTML_TEXTAREA_NAME] && - value[HTML_TEXTAREA_NAME]) + value[HTML_TEXTAREA_NAME]) { StrAllocCopy(me->textarea_name, value[HTML_TEXTAREA_NAME]); - else + me->textarea_name_cs = ATTR_CS_IN; + if (strchr(value[HTML_TEXTAREA_NAME], '&') != NULL) { + UNESCAPE_FIELDNAME_TO_STD(&me->textarea_name); + } + } else { StrAllocCopy(me->textarea_name, ""); + } + + if (present && present[HTML_TEXTAREA_ACCEPT_CHARSET] && + value[HTML_TEXTAREA_ACCEPT_CHARSET]) { + StrAllocCopy(me->textarea_accept_cs, value[HTML_TEXTAREA_ACCEPT_CHARSET]); + TRANSLATE_AND_UNESCAPE_TO_STD(&me->textarea_accept_cs); + } else { + FREE(me->textarea_accept_cs); + } if (present && present[HTML_TEXTAREA_COLS] && value[HTML_TEXTAREA_COLS] && @@ -4771,7 +4966,7 @@ PRIVATE void HTML_start_element ARGS5( if (present && present[HTML_TEXTAREA_ID] && value[HTML_TEXTAREA_ID] && *value[HTML_TEXTAREA_ID]) { StrAllocCopy(id_string, value[HTML_TEXTAREA_ID]); - LYUnEscapeToLatinOne(&id_string, TRUE); + TRANSLATE_AND_UNESCAPE_TO_STD(&id_string); if ((id_string != '\0') && (ID_A = HTAnchor_findChildAndLink( me->node_anchor, /* Parent */ @@ -4799,7 +4994,7 @@ PRIVATE void HTML_start_element ARGS5( if (me->inSELECT) { if (TRACE) { fprintf(stderr, - "HTML: Embedded SELECT start end. Faking SELECT end tag.\n"); + "HTML: ***** SELECT start tag in SELECT element. Faking SELECT end tag. *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -4824,7 +5019,7 @@ PRIVATE void HTML_start_element ARGS5( if (!me->inFORM) { if (TRACE) { fprintf(stderr, - "HTML: SELECT start tag not within FORM tag\n"); + "HTML: ***** SELECT start tag not within FORM element *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -4842,7 +5037,7 @@ PRIVATE void HTML_start_element ARGS5( */ if (me->inTEXTAREA) { if (TRACE) { - fprintf(stderr, "HTML: Missing TEXTAREA end tag\n"); + fprintf(stderr, "HTML: ***** Missing TEXTAREA end tag *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -4857,11 +5052,15 @@ PRIVATE void HTML_start_element ARGS5( if (!me->text) UPDATE_STYLE; - if (present && present[HTML_SELECT_NAME] && - value[HTML_SELECT_NAME] && *value[HTML_SELECT_NAME]) - StrAllocCopy(name, value[HTML_SELECT_NAME]); - else + if (!(present && present[HTML_SELECT_NAME] && + value[HTML_SELECT_NAME] && *value[HTML_SELECT_NAME])) { StrAllocCopy(name, ""); + } else if (strchr(value[HTML_SELECT_NAME], '&') == NULL) { + StrAllocCopy(name, value[HTML_SELECT_NAME]); + } else { + StrAllocCopy(name, value[HTML_SELECT_NAME]); + UNESCAPE_FIELDNAME_TO_STD(&name); + } if (present && present[HTML_SELECT_MULTIPLE]) multiple=YES; if (present && present[HTML_SELECT_DISABLED]) @@ -4911,7 +5110,7 @@ PRIVATE void HTML_start_element ARGS5( CHECK_ID(HTML_SELECT_ID); - HText_beginSelect(name, multiple, size); + HText_beginSelect(name, ATTR_CS_IN, multiple, size); FREE(name); FREE(size); @@ -4932,7 +5131,7 @@ PRIVATE void HTML_start_element ARGS5( if (!me->inSELECT) { if (TRACE) { fprintf(stderr, - "HTML: OPTION tag not within SELECT tag\n"); + "HTML: ***** OPTION tag not within SELECT element *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -4960,7 +5159,9 @@ PRIVATE void HTML_start_element ARGS5( me->option.data, me->LastOptionValue, MIDDLE_ORDER, - me->LastOptionChecked); + me->LastOptionChecked, + me->UCLYhndl, + ATTR_CS_IN); } /* @@ -4993,6 +5194,9 @@ PRIVATE void HTML_start_element ARGS5( I.lang=NULL; I.max=NULL; I.maxlength=NULL; I.md=NULL; I.min=NULL; I.name=NULL; I.size=NULL; I.src=NULL; I.type=NULL; I.value=NULL; I.width=NULL; + I.accept_cs = NULL; + I.name_cs = -1; + I.value_cs = current_char_set; I.type = "OPTION"; @@ -5011,12 +5215,22 @@ PRIVATE void HTML_start_element ARGS5( BOOLEAN CurrentUseDefaultRawMode = LYUseDefaultRawMode; HTCJKlang CurrentHTCJK = HTCJK; + StrAllocCopy(I_value, value[HTML_OPTION_VALUE]); + me->HiddenValue = TRUE; +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES6(&I_value, + ATTR_CS_IN, + ATTR_CS_IN, + NO, + me->UsePlainSpace, me->HiddenValue); + I.value_cs = ATTR_CS_IN; +#else /* !EXP_CHARTRANS */ if (CurrentCharSet) { current_char_set = 0; /* Default ISO-Latin1 */ LYUseDefaultRawMode = TRUE; HTMLSetCharacterHandling(current_char_set); } - LYUnEscapeEntities((char *)value[HTML_OPTION_VALUE], + LYUnEscapeEntities(I_value, me->UsePlainSpace, me->HiddenValue); if (CurrentCharSet) { current_char_set = CurrentCharSet; @@ -5025,8 +5239,10 @@ PRIVATE void HTML_start_element ARGS5( HTPassEightBitRaw = CurrentEightBitRaw; HTCJK = CurrentHTCJK; } +#endif /* !EXP_CHARTRANS */ + me->HiddenValue = FALSE; - I.value = (char *)value[HTML_OPTION_VALUE]; + I.value = I_value; } if (me->select_disabled || @@ -5089,10 +5305,45 @@ PRIVATE void HTML_start_element ARGS5( if (present && present[HTML_OPTION_VALUE] && - value[HTML_OPTION_VALUE]) - StrAllocCopy(me->LastOptionValue, value[HTML_OPTION_VALUE]); - else + value[HTML_OPTION_VALUE]) { + if (!I_value) { + /* + * Convert any HTML entities or decimal escaping. - FM + */ + int CurrentCharSet = current_char_set; + BOOL CurrentEightBitRaw = HTPassEightBitRaw; + BOOLEAN CurrentUseDefaultRawMode = LYUseDefaultRawMode; + HTCJKlang CurrentHTCJK = HTCJK; + + StrAllocCopy(I_value, value[HTML_OPTION_VALUE]); + me->HiddenValue = TRUE; +#ifdef EXP_CHARTRANS + TRANSLATE_AND_UNESCAPE_ENTITIES6(&I_value, + ATTR_CS_IN, + ATTR_CS_IN, + NO, + me->UsePlainSpace, me->HiddenValue); +#else /* !EXP_CHARTRANS */ + if (CurrentCharSet) { + current_char_set = 0; /* Default ISO-Latin1 */ + LYUseDefaultRawMode = TRUE; + HTMLSetCharacterHandling(current_char_set); + } + LYUnEscapeEntities(I_value, me->UsePlainSpace, me->HiddenValue); + if (CurrentCharSet) { + current_char_set = CurrentCharSet; + LYUseDefaultRawMode = CurrentUseDefaultRawMode; + HTMLSetCharacterHandling(current_char_set); + HTPassEightBitRaw = CurrentEightBitRaw; + HTCJK = CurrentHTCJK; + } +#endif /* !EXP_CHARTRANS */ + me->HiddenValue = FALSE; + } + StrAllocCopy(me->LastOptionValue, I_value); + } else { StrAllocCopy(me->LastOptionValue, me->option.data); + } /* * If this is a popup option, print its option @@ -5112,6 +5363,7 @@ PRIVATE void HTML_start_element ARGS5( } } } + FREE(I_value); } break; @@ -5466,7 +5718,7 @@ PRIVATE void HTML_end_element ARGS3( */ if (me->inTEXTAREA && element_number != HTML_TEXTAREA) if (TRACE) { - fprintf(stderr, "HTML: Missing TEXTAREA end tag\n"); + fprintf(stderr, "HTML: ***** Missing TEXTAREA end tag *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -5484,7 +5736,12 @@ PRIVATE void HTML_end_element ARGS3( if (me->inA || me->inSELECT || me->inTEXTAREA) if (TRACE) { fprintf(stderr, - "HTML: Something not closed before HTML close-tag\n"); + "HTML: ***** %s%s%s%s%s not closed before HTML end tag\n", + me->inSELECT ? "SELECT" : "", + (me->inSELECT && me->inTEXTAREA) ? ", " : "", + me->inTEXTAREA ? "TEXTAREA" : "", + ((me->inSELECT || me->inTEXTAREA) && me->inA) ? ", " : "", + me->inA ? "A" : ""); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -5584,7 +5841,12 @@ PRIVATE void HTML_end_element ARGS3( if (me->inA || me->inSELECT || me->inTEXTAREA) if (TRACE) { fprintf(stderr, - "HTML: Something not closed before BODY close-tag\n"); + "HTML: ***** %s%s%s%s%s not closed before BODY end tag *****\n", + me->inSELECT ? "SELECT" : "", + (me->inSELECT && me->inTEXTAREA) ? ", " : "", + me->inTEXTAREA ? "TEXTAREA" : "", + ((me->inSELECT || me->inTEXTAREA) && me->inA) ? ", " : "", + me->inA ? "A" : ""); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -5672,7 +5934,6 @@ PRIVATE void HTML_end_element ARGS3( } break; - case HTML_P: UPDATE_STYLE; /* @@ -6036,7 +6297,7 @@ PRIVATE void HTML_end_element ARGS3( */ if (TRACE) { fprintf(stderr, - "HTML: Unmatched OBJECT start and end tags. Discarding content:\n%s\n", + "HTML: ***** Unmatched OBJECT start and end tags. ***** Discarding content:\n%s\n", me->object.data); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); @@ -6114,7 +6375,7 @@ PRIVATE void HTML_end_element ARGS3( } else { if (TRACE) { fprintf(stderr, - "HTML: Unmatched OBJECT start and end tags. Discarding content.\n"); + "HTML: ***** Unmatched OBJECT start and end tags. ***** Discarding content.\n"); goto End_Object; } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); @@ -6318,7 +6579,7 @@ End_Object: */ if (!me->inFORM) { if (TRACE) { - fprintf(stderr, "HTML: Unmatched FORM end tag\n"); + fprintf(stderr, "HTML: ***** Unmatched FORM end tag *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -6340,7 +6601,7 @@ End_Object: if (me->inSELECT) { if (TRACE) { fprintf(stderr, - "HTML: Open SELECT at FORM end. Faking SELECT end tag.\n"); + "HTML: ***** Open SELECT at FORM end. Faking SELECT end tag. *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -6401,7 +6662,7 @@ End_Object: */ if (!me->inTEXTAREA) { if (TRACE) { - fprintf(stderr, "HTML: Unmatched TEXTAREA end tag\n"); + fprintf(stderr, "HTML: ***** Unmatched TEXTAREA end tag *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -6423,6 +6684,7 @@ End_Object: I.lang=NULL; I.max=NULL; I.maxlength=NULL; I.md=NULL; I.min=NULL; I.name=NULL; I.size=NULL; I.src=NULL; I.type=NULL; I.value=NULL; I.width=NULL; + I.value_cs = current_char_set; UPDATE_STYLE; /* @@ -6443,21 +6705,40 @@ End_Object: I.type = "textarea"; I.size = me->textarea_cols; I.name = me->textarea_name; + I.name_cs = me->textarea_name_cs; + I.accept_cs = me->textarea_accept_cs; + me->textarea_accept_cs = NULL; I.disabled = me->textarea_disabled; I.id = me->textarea_id; me->UsePlainSpace = TRUE; + +#ifndef EXP_CHARTRANS if (current_char_set) LYExpandString(&me->textarea.data); +#else + TRANSLATE_AND_UNESCAPE_ENTITIES5(&me->textarea.data, + me->UCLYhndl, + current_char_set, + me->UsePlainSpace, me->HiddenValue); +#define CHUNK_TRANSLATED 1 +#endif if ((cp = strtok(me->textarea.data, "\n")) != NULL) { StrAllocCopy(temp, cp); +#if ! CHUNK_TRANSLATED LYUnEscapeEntities(temp, me->UsePlainSpace, me->HiddenValue); +#endif } else { FREE(temp); } for (i = 0; i < me->textarea_rows; i++) { + int j; + for (j = 0; temp && temp[j]; j++) { + if (temp[j] == '\r') + temp[j] = (temp[j+1] ? ' ' : '\0'); + } I.value = temp; chars = HText_beginInput(me->text, me->inUnderline, &I); for (; chars > 0; chars--) @@ -6466,9 +6747,11 @@ End_Object: if (cp) { if ((cp = strtok(NULL, "\n")) != NULL) { StrAllocCopy(temp, cp); +#if ! CHUNK_TRANSLATED LYUnEscapeEntities(temp, me->UsePlainSpace, me->HiddenValue); +#endif } else { FREE(temp); } @@ -6479,7 +6762,11 @@ End_Object: * Check for more data lines than the rows attribute. */ while (cp) { - StrAllocCopy(temp, cp); + int j; + for (j = 0; temp && temp[j]; j++) { + if (temp[j] == '\r') + temp[j] = (temp[j+1] ? ' ' : '\0'); + } I.value = temp; chars = HText_beginInput(me->text, me->inUnderline, &I); for (chars = atoi(me->textarea_cols); chars > 0; chars--) @@ -6487,9 +6774,11 @@ End_Object: HText_appendCharacter(me->text, '\r'); if ((cp = strtok(NULL, "\n")) != NULL) { StrAllocCopy(temp, cp); +#if ! CHUNK_TRANSLATED LYUnEscapeEntities(temp, me->UsePlainSpace, me->HiddenValue); +#endif } else { FREE(temp); } @@ -6500,6 +6789,7 @@ End_Object: HTChunkClear(&me->textarea); FREE(me->textarea_name); + me->textarea_name_cs = -1; FREE(me->textarea_cols); FREE(me->textarea_id); break; @@ -6516,7 +6806,7 @@ End_Object: */ if (!me->inSELECT) { if (TRACE) { - fprintf(stderr, "HTML: Unmatched SELECT end tag\n"); + fprintf(stderr, "HTML: ***** Unmatched SELECT end tag *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -6541,7 +6831,7 @@ End_Object: if (!me->inFORM) { if (TRACE) { fprintf(stderr, - "HTML: SELECT end tag not within FORM tag\n"); + "HTML: ***** SELECT end tag not within FORM element *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -6566,7 +6856,9 @@ End_Object: me->option.data, me->LastOptionValue, LAST_ORDER, - me->LastOptionChecked); + me->LastOptionChecked, + me->UCLYhndl, + ATTR_CS_IN); FREE(me->LastOptionValue); me->LastOptionChecked = FALSE; @@ -6856,9 +7148,9 @@ PRIVATE void HTML_free ARGS1(HTStructured *, me) if (!dump_output_immediately && HText_sourceAnchors(me->text) < 1 && HText_HiddenLinkCount(me->text) > 0) { - HTML_start_element(me, HTML_P, 0, 0, (char **)&include); + HTML_start_element(me, HTML_P, 0, 0, -1, (char **)&include); HTML_put_character(me, '['); - HTML_start_element(me, HTML_EM, 0, 0, (char **)&include); + HTML_start_element(me, HTML_EM, 0, 0, -1, (char **)&include); HTML_put_string(me, "Document has only hidden links. Use the 'l'ist command."); HTML_end_element(me, HTML_EM, (char **)&include); @@ -6946,6 +7238,7 @@ PRIVATE void HTML_abort ARGS2(HTStructured *, me, HTError, e) FREE(me->base_href); FREE(me->map_address); FREE(me->textarea_name); + FREE(me->textarea_accept_cs); FREE(me->textarea_cols); FREE(me->textarea_id); FREE(me->LastOptionValue); @@ -7130,6 +7423,8 @@ PUBLIC HTStructured* HTML_new ARGS3( me->textarea.allocated = 0; me->textarea.data = NULL; me->textarea_name = NULL; + me->textarea_name_cs = -1; + me->textarea_accept_cs = NULL; me->textarea_cols = NULL; me->textarea_rows = 4; me->textarea_disabled = NO; diff --git a/src/HTML.h b/src/HTML.h index a19dcf4e..c5a13c31 100644 --- a/src/HTML.h +++ b/src/HTML.h @@ -76,6 +76,8 @@ struct _HTStructured { BOOL select_disabled; HTChunk textarea; /* Grow by 128 */ char * textarea_name; + int textarea_name_cs; + char * textarea_accept_cs; char * textarea_cols; int textarea_rows; int textarea_disabled; @@ -148,6 +150,7 @@ struct _HTStructured { LYUCcharset * UCI; /* pointer to node_anchor's UCInfo */ int UCLYhndl; /* tells us what charset we are fed */ UCTransParams T; + int tag_charset; /* charset for attribute values etc. */ #endif }; diff --git a/src/LYCharSets.c b/src/LYCharSets.c index 44e9cbaa..41b5a138 100644 --- a/src/LYCharSets.c +++ b/src/LYCharSets.c @@ -2286,7 +2286,6 @@ PUBLIC void HTMLSetCharacterHandling ARGS1(int,i) } #ifdef EXP_CHARTRANS - if (LYCharSet_UC[i].enc != UCT_ENC_CJK) { if (LYRawMode) { UCLYhndl_for_unspec = i; @@ -2294,12 +2293,14 @@ PUBLIC void HTMLSetCharacterHandling ARGS1(int,i) int chndl = 0; if (UCAssume_MIMEcharset) chndl = UCGetLYhndl_byMIME(UCAssume_MIMEcharset); - if (chndl != i) - UCLYhndl_for_unspec = chndl < 0 ? 0 : chndl; - else + if (chndl != i && chndl >= 0 && + (LYCharSet_UC[i].enc != UCT_ENC_CJK || + LYCharSet_UC[chndl].enc != UCT_ENC_CJK)) { + UCLYhndl_for_unspec = chndl; + } else { UCLYhndl_for_unspec = 0; + } } - } #endif /* EXP_CHARTRANS */ #ifdef USE_SLANG diff --git a/src/LYCharSets.h b/src/LYCharSets.h index 48a0c53a..ca53bb79 100644 --- a/src/LYCharSets.h +++ b/src/LYCharSets.h @@ -4,7 +4,7 @@ /* LYchar_set_name[current_char_set] points to the currently active set */ extern int current_char_set; -extern CONST char *LYchar_set_names[]; +extern CONST char * LYchar_set_names[]; extern char ** LYCharSets[]; diff --git a/src/LYCharUtils.c b/src/LYCharUtils.c index 3f55f204..b25b30ac 100644 --- a/src/LYCharUtils.c +++ b/src/LYCharUtils.c @@ -829,7 +829,7 @@ PUBLIC void LYGetChartransInfo ARGS1( ** to their HTML entity names and then translated for ** the current character set. - FM */ -PUBLIC void LYExpandString ARGS1( +PUBLIC void LYExpandString_old ARGS1( char **, str) { char *p = *str; @@ -904,6 +904,122 @@ PUBLIC void LYExpandString ARGS1( } /* +** Get UCS character code for one character from UTF-8 encoded string. +** +** On entry: +** *ppuni should point to beginning of UTF-8 encoding character +** On exit: +** *ppuni is advanced to point to the last byte of UTF-8 sequence, +** if there was a valid one; otherwise unchanged. +** returns the UCS value +** returns negative value on error (invalid UTF-8 sequence) +*/ +PRIVATE UCode_t UCGetUniFromUtf8String ARGS1(char **, ppuni) +{ + UCode_t uc_out = 0; + char * p = *ppuni; + int utf_count, i; + if (!(**ppuni&0x80)) + return (UCode_t) **ppuni; /* ASCII range character */ + else if (!(**ppuni&0x40)) + return (-1); /* not a valid UTF-8 start */ + if ((*p & 0xe0) == 0xc0) { + utf_count = 1; + } else if ((*p & 0xf0) == 0xe0) { + utf_count = 2; + } else if ((*p & 0xf8) == 0xf0) { + utf_count = 3; + } else if ((*p & 0xfc) == 0xf8) { + utf_count = 4; + } else if ((*p & 0xfe) == 0xfc) { + utf_count = 5; + } else { /* garbage */ + return (-1); + } + for (p = *ppuni, i = 0; i < utf_count ; i++) { + if ((*(++p) & 0xc0) != 0x80) + return (-1); + } + p = *ppuni; + switch (utf_count) { + case 1: + uc_out = (((*p&0x1f) << 6) | (*(p+1)&0x3f)); + break; + case 2: + uc_out = (((((*p&0x0f) << 6) | (*(p+1)&0x3f)) << 6) | (*(p+2)&0x3f)); + break; + case 3: + uc_out = (((((((*p&0x07) << 6) | (*(p+1)&0x3f)) << 6) | (*(p+2)&0x3f)) << 6) + | (*(p+3)&0x3f)); + break; + case 4: + uc_out = (((((((((*p&0x03) << 6) | (*(p+1)&0x3f)) << 6) | (*(p+2)&0x3f)) << 6) + | (*(p+3)&0x3f)) << 6) | (*(p+4)&0x3f)); + break; + case 5: + uc_out = (((((((((((*p&0x01) << 6) | (*(p+1)&0x3f)) << 6) | (*(p+2)&0x3f)) << 6) + | (*(p+3)&0x3f)) << 6) | (*(p+4)&0x3f)) << 6) | (*(p+5)&0x3f)); + break; + } + *ppuni = p + utf_count; + return uc_out; +} + +/* + * Given an UCS character code, will fill buffer passed in as q with + * the code's UTF-8 encoding. + * If terminate = YES, terminates string on success and returns pointer + * to beginning. + * If terminate = NO, does not terminate string, and returns pointer + * next char after the UTF-8 put into buffer. + * On failure, including invalid code or 7-bit code, returns NULL. + */ +PRIVATE char * UCPutUtf8ToBuffer ARGS3(char *, q, UCode_t, code, BOOL, terminate) +{ + char *q_in = q; + if (!q) + return NULL; + if (code > 127 && code < 0x7fffffffL) { + if (code < 0x800L) { + *q++ = (char)(0xc0 | (code>>6)); + *q++ = (char)(0x80 | (0x3f & (code))); + } else if (code < 0x10000L) { + *q++ = (char)(0xe0 | (code>>12)); + *q++ = (char)(0x80 | (0x3f & (code>>6))); + *q++ = (char)(0x80 | (0x3f & (code))); + } else if (code < 0x200000L) { + *q++ = (char)(0xf0 | (code>>18)); + *q++ = (char)(0x80 | (0x3f & (code>>12))); + *q++ = (char)(0x80 | (0x3f & (code>>6))); + *q++ = (char)(0x80 | (0x3f & (code))); + } else if (code < 0x4000000L) { + *q++ = (char)(0xf8 | (code>>24)); + *q++ = (char)(0x80 | (0x3f & (code>>18))); + *q++ = (char)(0x80 | (0x3f & (code>>12))); + *q++ = (char)(0x80 | (0x3f & (code>>6))); + *q++ = (char)(0x80 | (0x3f & (code))); + } else { + *q++ = (char)(0xfc | (code>>30)); + *q++ = (char)(0x80 | (0x3f & (code>>24))); + *q++ = (char)(0x80 | (0x3f & (code>>18))); + *q++ = (char)(0x80 | (0x3f & (code>>12))); + *q++ = (char)(0x80 | (0x3f & (code>>6))); + *q++ = (char)(0x80 | (0x3f & (code))); + } + } else { + return NULL; + } + if (terminate) { + *q = '\0'; + return q_in; + } else { + return q; + } +} + +PRIVATE char *hex = "0123456789ABCDEF"; + +/* ** This function converts HTML named entities within a string ** to their translations in the active LYCharSets.c array. ** It also converts numeric entities to their HTML entity names @@ -916,32 +1032,704 @@ PUBLIC void LYExpandString ARGS1( ** space (32). If hidden is TRUE, entities will be translated ** but escape sequences will be passed unaltered. - FM */ -PUBLIC char * LYUnEscapeEntities ARGS3( - char *, str, - BOOLEAN, plain_space, - BOOLEAN, hidden) +PRIVATE char ** LYUnEscapeEntities ARGS5( + char **, str, + int, cs_to, + BOOLEAN, plain_space, + BOOLEAN, hidden, + CharUtil_st, stype) { - char * p = str; - char * q = str; + char * p; + char *q, *Str; char * cp; char cpe; - int len, value; + char *esc = NULL; + char buf[2]; + char replace_buf[61]; + int uck; + int lowest_8; + UCode_t code; + long int lcode; + BOOL output_utf8, repl_translated_C0; + size_t len; int high, low, diff = 0, i; + CONST char ** entities = HTML_dtd.entity_names; + CONST UC_entity_info * extra_entities = HTML_dtd.extra_entity_info; + CONST char * name; enum _state - { S_text, S_esc, S_dollar, S_paren, - S_nonascii_text, S_dollar_paren } state = S_text; + { S_text, S_esc, S_dollar, S_paren, S_nonascii_text, S_dollar_paren, + S_trans_byte, S_check_ent, S_ncr, S_check_uni, S_check_name, S_named, + S_recover, + S_got_oututf8, S_got_outstring, S_put_urlstring, + S_got_outchar, S_put_urlchar, S_next_char, S_done} state = S_text; + enum _parsing_what + { P_text, P_utf8, P_hex, P_decimal, P_named + } what = P_text; /* ** Make sure we have a non-empty string. - FM */ - if (!str || *str == '\0') + if (!str || *str == NULL || **str == '\0') return str; + /* + ** Save malloc/calloc overhead in simple case - kw + */ + if (hidden && (stype != st_URL) && (strchr(*str, '&') == NULL)) + return str; + p = *str; + + if (cs_to < 0) + return NULL; + output_utf8 = (LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8 || + HText_hasUTF8OutputSet(HTMainText)); +#if 0 + cs_to = current_char_set; /* for now */ + lowest_8 = LYlowest_eightbit[HTAnchor_getUCLYhndl( + me->node_anchor, + UCT_STAGE_STRUCTURED)]; + repl_translated_C0 = me->T.repl_translated_C0; +#endif + lowest_8 = LYlowest_eightbit[cs_to]; + repl_translated_C0 = (LYCharSet_UC[cs_to].enc == UCT_ENC_8BIT_C0); /* - ** Loop through string, making conversions as needed. - FM + ** Create a buffer string seven times the length of the original, + ** so we have plenty of room for expansions. - FM + */ + len = (strlen(p) * 7) + 1; + if (len < 16) + len = 16; + if ((Str = (char *)calloc(1, len)) == NULL) { + fprintf(stderr, + "LYUnEscapeEntities: calloc(1, %lu) failed for '%s'\r\n", + (unsigned long)len, *str); + outofmem(__FILE__, "LYUnEscapeEntities"); + } + q = Str; + + /* + * Loop through string, making conversions as needed. + * + * The while() checks for a non-'\0' char only for the normal + * text states since other states may temporarily modify p or *p + * (which should be restored before S_done!) - kw */ + + while (*p || (state != S_text && state != S_nonascii_text)) { + switch(state) { + case S_text: + code = (unsigned char)(*p); + if (*p == '\033') { + if ((HTCJK != NOCJK && !hidden) || stype != st_HTML) { + state = S_esc; + if (stype == st_URL) { + *q++ = '%'; *q++ = '1'; *q++ = 'B'; + p++; + continue; + } else if (stype != st_HTML) { + p++; + continue; + } else { + *q++ = *p++; + continue; + } + } else if (!hidden) { + /* + ** CJK handling not on, and not a hidden INPUT, + ** so block escape. - FM + */ + state = S_next_char; + } else { + state = S_check_ent; + } + } else { + state = S_check_ent; + } + break; + + case S_esc: + if (*p == '$') { + state = S_dollar; + *q++ = *p++; + continue; + } else if (*p == '(') { + state = S_paren; + *q++ = *p++; + continue; + } else { + state = S_text; + } + + case S_dollar: + if (*p == '@' || *p == 'B' || *p == 'A') { + state = S_nonascii_text; + *q++ = *p++; + continue; + } else if (*p == '(') { + state = S_dollar_paren; + *q++ = *p++; + continue; + } else { + state = S_text; + } + break; + + case S_dollar_paren: + if (*p == 'C') { + state = S_nonascii_text; + *q++ = *p++; + continue; + } else { + state = S_text; + } + break; + + case S_paren: + if (*p == 'B' || *p == 'J' || *p == 'T') { + state = S_text; + *q++ = *p++; + continue; + } else if (*p == 'I') { + state = S_nonascii_text; + *q++ = *p++; + continue; + } else { + state = S_text; + } + break; + + case S_nonascii_text: + if (*p == '\033') { + if ((HTCJK != NOCJK && !hidden) || stype != st_HTML) { + state = S_esc; + if (stype == st_URL) { + *q++ = '%'; *q++ = '1'; *q++ = 'B'; + p++; + continue; + } else if (stype != st_HTML) { + p++; + continue; + } + } + } + *q++ = *p++; + continue; + + case S_trans_byte: + /* character translation could go here */ + state = S_got_outchar; + break; + + case S_check_ent: + if (*p == '&') { + char * pp = p + 1; + len = strlen(pp); + /* + ** Check for a numeric entity. - FM + */ + if (*pp == '#' && len > 2 && + (unsigned char)*(pp+1) == 'x' && + (unsigned char)*(pp+2) < 127 && + isxdigit((unsigned char)*(pp+2))) { + what = P_hex; + state = S_ncr; + } else if (*pp == '#' && len > 2 && + (unsigned char)*(pp+1) < 127 && + isdigit((unsigned char)*(pp+1))) { + what = P_decimal; + state = S_ncr; + } else if ((unsigned char)*pp < 127 && + isalpha((unsigned char)*pp)) { + what = P_named; + state = S_named; + } else { + state = S_trans_byte; + } + } else { + state = S_trans_byte; + } + break; + + case S_ncr: + if (what == P_hex) { + p += 3; + } else { /* P_decimal */ + p += 2; + } + cp = p; + while (*p && (unsigned char)*p < 127 && + (what == P_hex ? isxdigit((unsigned char)*p) : + isdigit((unsigned char)*p))) { + p++; + } + /* + ** Save the terminator and isolate the digit(s). - FM + */ + cpe = *p; + if (*p) + *p++ = '\0'; + /* + ** Show the numeric entity if the value: + ** (1) Is greater than 255 and unhandled Unicode. + ** (2) Is less than 32, and not valid and we don't + ** have HTCJK set. + ** (3) Is 127 and we don't have HTPassHighCtrlRaw + ** or HTCJK set. + ** (4) Is 128 - 159 and we don't have HTPassHighCtrlNum set. + */ + if ((((what == P_hex) ? sscanf(cp, "%lx", &lcode) : + sscanf(cp, "%ld", &lcode)) != 1) || + lcode > 0x7fffffffL || lcode < 0) { + state = S_recover; + } else { + code = lcode; + state = S_check_uni; + } + break; + case S_check_uni: + /* + ** Show the numeric entity if the value: + ** (2) Is less than 32, and not valid and we don't + ** have HTCJK set. + ** (3) Is 127 and we don't have HTPassHighCtrlRaw + ** or HTCJK set. + ** (4) Is 128 - 159 and we don't have HTPassHighCtrlNum set. + */ + if ((code < 32 && + code != 9 && code != 10 && code != 13 && + HTCJK == NOCJK) || + (code == 127 && + !(HTPassHighCtrlRaw || HTCJK != NOCJK)) || + (code > 127 && code < 160 && + !HTPassHighCtrlNum)) { + state = S_recover; + break; + } + /* + ** Convert the value as an unsigned char, + ** hex escaped if isURL is set and it's + ** 8-bit, and then recycle the terminator + ** if it is not a semicolon. - FM + */ + if (code > 159 && stype == st_URL) { + state = S_got_oututf8; + break; + } + /* + ** For 160 (nbsp), use that value if it's + ** a hidden INPUT, otherwise use an ASCII + ** space (32) if plain_space is TRUE, + ** otherwise use the Lynx special character. - FM + */ + if (code == 160) { + if (hidden) { + ; + } else if (plain_space) { + code = ' '; + } else { + code = HT_NON_BREAK_SPACE; + } + state = S_got_outchar; + break; + } + /* + ** For 173 (shy), use that value if it's + ** a hidden INPUT, otherwise ignore it + ** if plain_space is TRUE, otherwise use + ** the Lynx special character. - FM + */ + if (code == 173) { + if (hidden) { + ; + } else if (plain_space) { + replace_buf[0] = '\0'; + state = S_got_outstring; + break; + } else { + code = LY_SOFT_HYPHEN; + } + state = S_got_outchar; + break; + } + /* + ** Seek a translation from the chartrans tables. + */ + if ((uck = UCTransUniChar(code, + cs_to)) >= 32 && + uck < 256 && + (uck < 127 || uck >= lowest_8)) { + if (uck == 160 && cs_to == 0) { + /* + ** Would only happen if some other unicode + ** is mapped to Latin-1 160. + */ + if (hidden) { + ; + } else if (plain_space) { + code = ' '; + } else { + code = HT_NON_BREAK_SPACE; + } + } else if (uck == 173 && cs_to == 0) { + /* + ** Would only happen if some other unicode + ** is mapped to Latin-1 173. + */ + if (hidden) { + ; + } else if (plain_space) { + replace_buf[0] = '\0'; + state = S_got_outstring; + break; + } else { + code = LY_SOFT_HYPHEN; + } + } else { + code = uck; + } + state = S_got_outchar; + break; + } else if ((uck == -4 || + (repl_translated_C0 && + uck > 0 && uck < 32)) && + /* + ** Not found; look for replacement string. + */ + (uck = UCTransUniCharStr(replace_buf, + 60, code, + current_char_set, + 0) >= 0)) { + state = S_got_outstring; + break; + } + if (output_utf8 && + code > 127 && code < 0x7fffffffL) { + state = S_got_oututf8; + break; + } + /* + ** For 8482 (trade) use the character reference if it's + ** a hidden INPUT, otherwise use whatever the tables have + ** for ™. - FM & KW + */ + if (code == 8482 && hidden) { + state = S_recover; + break; + /* + ** For 8194 (ensp), 8195 (emsp), or 8201 (thinsp), + ** use the character reference if it's a hidden INPUT, + ** otherwise use an ASCII space (32) if plain_space is + ** TRUE, otherwise use the Lynx special character. - FM + */ + } else if (code == 8194 || code == 8195 || code == 8201) { + if (hidden) { + state = S_recover; + } else if (plain_space) { + code = ' '; + state = S_got_outchar; + } else { + code = HT_EM_SPACE; + state = S_got_outchar; + } + break; + /* + ** For 8211 (ndash) or 8212 (mdash), use the character + ** reference if it's a hidden INPUT, otherwise use an + ** ASCII dash. - FM + */ + } else if (code == 8211 || code == 8212) { + if (hidden) { + state = S_recover; + } else { + code = '-'; + state = S_got_outchar; + } + break; + /* + ** Show the numeric entity if the value: + ** (1) Is greater than 255 and unhandled Unicode. + */ + } else if (code > 255 && code != 8482) { + /* + ** Illegal or not yet handled value. + ** Recover the "&#" and continue + ** from there. - FM + */ + state = S_recover; + break; + /* + ** If it's ASCII, or is 8-bit but HTPassEightBitNum + ** is set or the character set is "ISO Latin 1", + ** use it's value. - FM + */ + } else if (code < 161 || + (code < 256 && + (HTPassEightBitNum || + !strncmp(LYchar_set_names[current_char_set], + "ISO Latin 1", 11)))) { + /* + ** No conversion needed. + */ + state = S_got_outchar; + break; + /* + ** If we get to here, convert and handle + ** the character as a named entity. - FM + */ + } else { + if (code == 8482) { + /* + ** Trade mark sign falls through to here. - KW + */ + name = "trade"; + } else { + name = HTMLGetEntityName(code - 160); + } + state = S_check_name; + break; + } + + case S_recover: + if (what == P_decimal || what == P_hex) { + /* + ** Illegal or not yet handled value. + ** Recover the "&#" and continue + ** from there. - FM + */ + *q++ = '&'; + *q++ = '#'; + if (what == P_hex) + *q++ = 'x'; + if (cpe != '\0') + *(p-1) = cpe; + p = cp; + } else if (what == P_named) { + *cp = cpe; + *q++ = '&'; + } + state = S_done; + break; + + case S_named: + cp = ++p; + while (*cp && (unsigned char)*cp < 127 && + isalnum((unsigned char)*cp)) + cp++; + cpe = *cp; + *cp = '\0'; +/* ppuni = cp - 1; */ + name = p; + state = S_check_name; + break; + + case S_check_name: + for (low = 0, high = HTML_dtd.number_of_entities; + high > low; + diff < 0 ? (low = i+1) : (high = i)) { + /* + ** Binary search. + */ + i = (low + (high-low)/2); + diff = strcmp(entities[i], name); + if (diff == 0) { + strncpy(replace_buf, + (cs_to >= 0 && LYCharSets[cs_to]) ? + LYCharSets[cs_to][i] : LYCharSets[0][i], + sizeof(replace_buf)); + replace_buf[sizeof(replace_buf) - 1] = '\0'; + if (hidden) { + /* + ** If it's hidden, use 160 for nbsp. - FM + */ + if (!strcmp("nbsp", entities[i]) || + (replace_buf[1] == '\0' && + replace_buf[0] == HT_NON_BREAK_SPACE)) { + replace_buf[0] = 160; + replace_buf[1] = '\0'; + state = S_got_outstring; + break; + /* + ** If it's hidden, use 173 for shy. - FM + */ + } else if (!strcmp("shy", entities[i]) || + (replace_buf[1] == '\0' && + replace_buf[0] == LY_SOFT_HYPHEN)) { + replace_buf[0] = 173; + replace_buf[1] = '\0'; + state = S_got_outstring; + break; + } + /* + ** Check whether we want a plain space for nbsp, + ** ensp, emsp or thinsp. - FM + */ + } else if (plain_space) { + if (!strcmp("nbsp", entities[i]) || + !strcmp("emsp", entities[i]) || + !strcmp("ensp", entities[i]) || + !strcmp("thinsp", entities[i]) || + (replace_buf[1] == '\0' && + replace_buf[0] == HT_EM_SPACE)) { + code = ' '; + state = S_got_outchar; + break; + /* + ** If plain_space is set, ignore shy. - FM + */ + } else if (!strcmp("shy", entities[i]) || + (replace_buf[1] == '\0' && + replace_buf[0] == LY_SOFT_HYPHEN)) { + replace_buf[0] = '\0'; + state = S_got_outstring; + break; + } + } + /* + ** Found the entity. If the length + ** of the value exceeds the length of + ** replace_buf it is cut off. + */ + state = S_got_outstring; + break; + } + } + if (diff == 0) { + break; + } + /* + ** Entity name lookup failed (diff != 0). + ** No point in repeating for extra entities. - kw + */ + if (what != P_named) { + /* + ** Didn't find the entity. + ** Recover the "&#" and continue + ** from there. - FM + */ + state = S_recover; + break; + } + /* + ** Not found, repeat for extra entities. - FM + */ + for (low = 0, high = HTML_dtd.number_of_extra_entities; + high > low; + diff < 0 ? (low = i+1) : (high = i)) { + /* + ** Binary search. + */ + i = (low + (high - low)/2); + diff = strcmp(extra_entities[i].name, p); + if (diff == 0) { + /* + ** Found the entity. + */ + code = extra_entities[i].code; + if (code <= 0x7fffffffL && code > 0) { + state = S_check_uni; + } else { + state = S_recover; + } + break; + } + } + if (diff == 0) + break; + /* + ** Didn't find the entity. + ** Recover. + */ + state = S_recover; + break; + + /* * * O U T P U T S T A T E S * * */ + + case S_got_oututf8: + if (code > 255 || + (code >= 128 && LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8)) { + UCPutUtf8ToBuffer(replace_buf, code, YES); + state = S_got_outstring; + } else { + state = S_got_outchar; + } + break; + case S_got_outstring: + if (what == P_decimal || what == P_hex) { + if (cpe != ';' && cpe != '\0') + *(--p) = cpe; + p--; + } else if (what == P_named) { + *cp = cpe; + p = (*cp != ';') ? (cp - 1) : cp; + } +#if 0 + if (what == P_decimal || what == P_hex) { + if (cpe != ';' && cpe != '\0') + *(--p) = cpe; + } else if (what == P_named) { + *cp = cpe; + p = (*cp != ';') ? cp : (cp + 1); + } +#endif + if (replace_buf[0] == '\0') { + state = S_next_char; + break; + } + if (stype == st_URL) { + code = replace_buf[0]; /* assume string OK if first char is */ + if (code >= 127 || + (code < 32 && (code != 9 && code != 10 && code != 0))) { + state = S_put_urlstring; + } + } + for (i = 0; replace_buf[i]; i++) + *q++ = replace_buf[i]; + state = S_next_char; + break; + case S_put_urlstring: + esc = HTEscape(replace_buf, URL_XALPHAS); + for (i = 0; esc[i]; i++) + *q++ = esc[i]; + FREE(esc); + state = S_next_char; + break; + case S_got_outchar: + if (what == P_decimal || what == P_hex) { + if (cpe != ';' && cpe != '\0') + *(--p) = cpe; + p--; + } else if (what == P_named) { + *cp = cpe; + p = (*cp != ';') ? (cp - 1) : cp; + } + if (stype == st_URL) { + /* Not a full HTEscape, only for 8bit and ctrl chars */ + if (code >= 127 || + (code < 32 && (code != 9 && code != 10))) { + state = S_put_urlchar; + break; + } + } + *q++ = (char)code; + state = S_next_char; + break; + case S_put_urlchar: + *q++ = '%'; + *q++ = hex[(code >> 4) & 15]; + *q++ = hex[(code & 15)]; + /* fall through */ + case S_next_char: + p++; /* fall through */ + case S_done: + state = S_text; + what = P_text; + /* for next round */ + } + } + +#if 0 while (*p) { - if (HTCJK != NOCJK && !hidden) { + if ((HTCJK != NOCJK && !hidden) || stype != st_HTML) { /* ** Handle CJK escape sequences, based on patch ** from Takuya ASADA (asada@three-a.co.jp). - FM @@ -950,7 +1738,12 @@ PUBLIC char * LYUnEscapeEntities ARGS3( case S_text: if (*p == '\033') { state = S_esc; - *q++ = *p++; + if (stype == st_URL) { + *q++ = '%'; *q++ = '1'; *q++ = 'B'; + p++; + } else { + *q++ = *p++; + } continue; } break; @@ -1007,15 +1800,17 @@ PUBLIC char * LYUnEscapeEntities ARGS3( break; case S_nonascii_text: - if (*p == '\033') + if (*p == '\033') { state = S_esc; + if (stype == st_URL) { + *q++ = '%'; *q++ = '1'; *q++ = 'B'; + p++; + continue; + } + } *q++ = *p++; continue; - break; - default: - p++; - continue; } } else if (*p == '\033' && !hidden) { @@ -1027,6 +1822,7 @@ PUBLIC char * LYUnEscapeEntities ARGS3( continue; } + code = *p; /* ** Check for a numeric or named entity. - FM */ @@ -1069,23 +1865,21 @@ PUBLIC char * LYUnEscapeEntities ARGS3( /* ** Show the numeric entity if the value: ** (1) Is greater than 255 and unhandled Unicode. - ** (2) Is less than 32, and not valid or we don't + ** (2) Is less than 32, and not valid and we don't ** have HTCJK set. ** (3) Is 127 and we don't have HTPassHighCtrlRaw ** or HTCJK set. ** (4) Is 128 - 159 and we don't have HTPassHighCtrlNum set. */ - if (((isHex ? sscanf(cp, "%x", &value) : - sscanf(cp, "%d", &value)) != 1) || - (value > 255 && - value != 8194 && value != 8195 && value != 8201 && - value != 8211 && value != 8212 && value != 8482) || - (value < 32 && - value != 9 && value != 10 && value != 13 && + if (((isHex ? sscanf(cp, "%lx", &lcode) : + sscanf(cp, "%ld", &lcode)) != 1) || + lcode > 0x7fffffffL || lcode < 0 || + ((code =lcode) < 32 && + code != 9 && code != 10 && code != 13 && HTCJK == NOCJK) || - (value == 127 && + (code == 127 && !(HTPassHighCtrlRaw || HTCJK != NOCJK)) || - (value > 127 && value < 160 && + (code > 127 && code < 160 && !HTPassHighCtrlNum)) { /* ** Illegal or not yet handled value. @@ -1095,59 +1889,170 @@ PUBLIC char * LYUnEscapeEntities ARGS3( *q++ = '&'; *q++ = '#'; if (isHex) - *q++ = 'x'; + *q++ = 'x'; if (cpe != '\0') - *(p-1) = cpe; + *(p-1) = cpe; p = cp; continue; + } /* - ** For 160 (nbsp), use that value if it's - ** a hidden INPUT, otherwise use an ASCII - ** space (32) if plain_space is TRUE, - ** otherwise use the Lynx special character. - FM + ** Convert the value as an unsigned char, + ** hex escaped if isURL is set and it's + ** 8-bit, and then recycle the terminator + ** if it is not a semicolon. - FM */ - } else if (value == 160) { + if (code > 159 && stype == st_URL) { + int e; + if (LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8) { + UCPutUtf8ToBuffer(replace_buf, code, YES); + esc = HTEscape(replace_buf, URL_XALPHAS); + } else { + buf[0] = code; + esc = HTEscape(buf, URL_XALPHAS); + } + for (e = 0; esc[e]; e++) + *q++ = esc[e]; + FREE(esc); + if (cpe != ';' && cpe != '\0') { + p--; + *p = cpe; + } + continue; + } + /* + ** For 160 (nbsp), use that value if it's + ** a hidden INPUT, otherwise use an ASCII + ** space (32) if plain_space is TRUE, + ** otherwise use the Lynx special character. - FM + */ + if (code == 160) { if (hidden) { - *q++ = 160; + *q++ = 160; } else if (plain_space) { - *q++ = ' '; + *q++ = ' '; } else { - *q++ = HT_NON_BREAK_SPACE; + *q++ = HT_NON_BREAK_SPACE; } if (cpe != ';' && cpe != '\0') { - p--; + p--; *p = cpe; } continue; + } /* - ** For 173 (shy), use that value if it's - ** a hidden INPUT, otherwise ignore it if - ** plain space is TRUE, otherwise use the - ** Lynx special character. - FM - */ - } else if (value == 173) { + ** For 173 (shy), use that value if it's + ** a hidden INPUT, otherwise ignore it + ** if plain_space is TRUE, otherwise use + ** the Lynx special character. - FM + */ + if (code == 173) { if (hidden) { - *q++ = 173; + *q++ = 173; } else if (plain_space) { - ; + ; } else { - *q++ = LY_SOFT_HYPHEN; + *q++ = LY_SOFT_HYPHEN; } if (cpe != ';' && cpe != '\0') { - p--; + p--; *p = cpe; } continue; + } + /* + ** Seek a translation from the chartrans tables. + */ + if ((uck = UCTransUniChar(code, + current_char_set)) >= 32 && + uck < 256 && + (uck < 127 || uck >= lowest_8)) { + if (uck == 160 && current_char_set == 0) { + /* + ** Would only happen if some other unicode + ** is mapped to Latin-1 160. + */ + if (hidden) { + *q++ = 160; + } else if (plain_space) { + *q++ = ' '; + } else { + *q++ = HT_NON_BREAK_SPACE; + } + if (cpe != ';' && cpe != '\0') { + p--; + *p = cpe; + } + continue; + } else if (uck == 173 && current_char_set == 0) { + /* + ** Would only happen if some other unicode + ** is mapped to Latin-1 173. + */ + if (hidden) { + *q++ = 173; + } else if (plain_space) { + ; + } else { + *q++ = LY_SOFT_HYPHEN; + } + if (cpe != ';' && cpe != '\0') { + p--; + *p = cpe; + } + continue; + } else { + *q++ = (char)uck; + } + } else if ((uck == -4 || + (repl_translated_C0 && + uck > 0 && uck < 32)) && + /* + ** Not found; look for replacement string. + */ + (uck = UCTransUniCharStr(replace_buf, + 60, code, + current_char_set, + 0) >= 0)) { + for (i = 0; replace_buf[i]; i++) { + *q++ = replace_buf[i]; + } + if (cpe != ';' && cpe != '\0') { + p--; + *p = cpe; + } + continue; + } else if (output_utf8 && + code > 127 && code < 0x7fffffffL) { + UCPutUtf8ToBuffer(q, code, NO); + if (cpe != ';' && cpe != '\0') { + p--; + *p = cpe; + } + continue; + /* + ** For 8482 (trade) use the character reference if it's + ** a hidden INPUT, otherwise use whatever the tables have + ** for ™. - FM & KW + */ + } else if (code == 8482 && hidden) { + *q++ = '&'; + *q++ = '#'; + if (isHex) + *q++ = 'x'; + if (cpe != '\0') + *(p-1) = cpe; + p = cp; + continue; /* ** For 8194 (ensp), 8195 (emsp), or 8201 (thinsp), ** use the character reference if it's a hidden INPUT, ** otherwise use an ASCII space (32) if plain_space is ** TRUE, otherwise use the Lynx special character. - FM */ - } else if (value == 8194 || value == 8195 || value == 8201) { + } else if (code == 8194 || code == 8195 || code == 8201) { if (hidden) { - *q++ = '&'; - *q++ = '#'; + *q++ = '&'; + *q++ = '#'; if (isHex) *q++ = 'x'; if (cpe != '\0') @@ -1155,24 +2060,24 @@ PUBLIC char * LYUnEscapeEntities ARGS3( p = cp; continue; } else if (plain_space) { - *q++ = ' '; + *q++ = ' '; } else { - *q++ = HT_EM_SPACE; + *q++ = HT_EM_SPACE; } if (cpe != ';' && cpe != '\0') { - p--; + p--; *p = cpe; } continue; - /* - ** For 8211 (ndash) or 8212 (mdash), use the character - ** reference if it's a hidden INPUT, otherwise use an - ** ASCII dash. - FM - */ - } else if (value == 8211 || value == 8212) { + /* + ** For 8211 (ndash) or 8212 (mdash), use the character + ** reference if it's a hidden INPUT, otherwise use an + ** ASCII dash. - FM + */ + } else if (code == 8211 || code == 8212) { if (hidden) { - *q++ = '&'; - *q++ = '#'; + *q++ = '&'; + *q++ = '#'; if (isHex) *q++ = 'x'; if (cpe != '\0') @@ -1180,60 +2085,64 @@ PUBLIC char * LYUnEscapeEntities ARGS3( p = cp; continue; } else { - *q++ = '-'; + *q++ = '-'; } if (cpe != ';' && cpe != '\0') { - p--; + p--; *p = cpe; } continue; - /* - ** For 8482 (trade) use the character reference if it's - ** a hidden INPUT, otherwise use whatever the tables have - ** for ™. - FM & KW - */ - } else if (value == 8482 && hidden) { - *q++ = '&'; - *q++ = '#'; - if (isHex) - *q++ = 'x'; - if (cpe != '\0') - *(p-1) = cpe; - p = cp; - continue; + /* + ** Show the numeric entity if the value: + ** (1) Is greater than 255 and unhandled Unicode. + */ + } else if (code > 255 && code != 8482) { + /* + ** Illegal or not yet handled value. + ** Recover the "&#" and continue + ** from there. - FM + */ + *q++ = '&'; + *q++ = '#'; + if (isHex) + *q++ = 'x'; + if (cpe != '\0') + *(p-1) = cpe; + p = cp; + continue; /* ** If it's ASCII, or is 8-bit but HTPassEightBitNum ** is set or the character set is "ISO Latin 1", ** use it's value. - FM */ - } else if (value < 161 || - (value < 256 && + } else if (code < 161 || + (code < 256 && (HTPassEightBitNum || !strncmp(LYchar_set_names[current_char_set], "ISO Latin 1", 11)))) { /* ** No conversion needed. */ - *q++ = (unsigned char)value; + *q++ = (unsigned char)code; if (cpe != ';' && cpe != '\0') { - p--; + p--; *p = cpe; } continue; - /* - ** If we get to here, convert and handle - ** the character as a named entity. - FM - */ + /* + ** If we get to here, convert and handle + ** the character as a named entity. - FM + */ } else { CONST char * name; - if (value == 8482) { + if (code == 8482) { /* ** Trade mark sign falls through to here. - KW */ name = "trade"; } else { - value -= 160; - name = HTMLGetEntityName(value); + code -= 160; + name = HTMLGetEntityName(code); } for (low = 0, high = HTML_dtd.number_of_entities; high > low; @@ -1242,7 +2151,7 @@ PUBLIC char * LYUnEscapeEntities ARGS3( ** Binary search. */ i = (low + (high-low)/2); - diff = strcmp(HTML_dtd.entity_names[i], name); + diff = strcmp(entities[i], name); if (diff == 0) { /* ** Found the entity. Assume that the length @@ -1257,16 +2166,35 @@ PUBLIC char * LYUnEscapeEntities ARGS3( break; } } + /* + ** No point in repeating for extra entities. - kw + */ + if (diff != 0) { + /* + ** Didn't find the entity. + ** Recover the "&#" and continue + ** from there. - FM + */ + *q++ = '&'; + *q++ = '#'; + if (isHex) + *q++ = 'x'; + if (cpe != '\0') + *(p-1) = cpe; + p = cp; + continue; + } /* ** Recycle the terminator if it isn't the ** standard ';' for HTML. - FM */ if (cpe != ';' && cpe != '\0') { - p--; + p--; *p = cpe; } continue; } + /* ** Check for a named entity. - FM */ @@ -1285,7 +2213,7 @@ PUBLIC char * LYUnEscapeEntities ARGS3( ** Binary search. */ i = (low + (high-low)/2); - diff = strcmp(HTML_dtd.entity_names[i], p); + diff = strcmp(entities[i], p); if (diff == 0) { /* ** Found the entity. Assume that the length @@ -1295,36 +2223,91 @@ PUBLIC char * LYUnEscapeEntities ARGS3( ** true in the LYCharSets arrays. - FM */ int j; + /* + ** Found the entity. Convert it to + ** an ISO-8859-1 character, or our + ** substitute for any non-ISO-8859-1 + ** character, hex escaped if isURL + ** is set and it's 8-bit. - FM + */ + if (stype != st_HTML) { + int e; + buf[0] = HTMLGetLatinOneValue(i); + if (buf[0] == '\0') { + /* + ** The entity does not have an 8859-1 + ** representation of exactly one char length. + ** Try to deal with it anyway - either HTEscape + ** the whole mess, or pass through raw. So + ** make sure the ISO_Latin1 table, which is the + ** first table in LYCharSets, has reasonable + ** substitution strings! (if it really must + ** have any longer than one char) - KW + */ + if (!LYCharSets[0][i][0]) { + /* + ** Totally empty, skip. - KW + */ + ; /* do nothing */ + } else if (stype == st_URL) { + /* + ** All will be HTEscape'd. - KW + */ + esc = HTEscape(LYCharSets[0][i], URL_XALPHAS); + for (e = 0; esc[e]; e++) + *q++ = esc[e]; + FREE(esc); + } else { + /* + ** Nothing will be HTEscape'd. - KW + */ + for (e = 0; LYCharSets[0][i][e]; e++) { + *q++ = + (unsigned char)(LYCharSets[0][i][e]); + } + } + } else if ((unsigned char)buf[0] > 159 && + stype == st_URL) { + if (LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8) { + UCPutUtf8ToBuffer(replace_buf, code, YES); + esc = HTEscape(replace_buf, URL_XALPHAS); + } else { + buf[0] = code; + esc = HTEscape(buf, URL_XALPHAS); + } + for (e = 0; esc[e]; e++) + *q++ = esc[e]; + FREE(esc); + } else { + *q++ = buf[0]; + } /* ** If it's hidden, use 160 for nbsp. - FM */ - if (hidden && - !strcmp("nbsp", HTML_dtd.entity_names[i])) { + } else if (hidden && + !strcmp("nbsp", entities[i])) { *q++ = 160; /* ** If it's hidden, use 173 for shy. - FM */ } else if (hidden && - !strcmp("shy", HTML_dtd.entity_names[i])) { + !strcmp("shy", entities[i])) { *q++ = 173; /* ** Check whether we want a plain space for nbsp, - ** ensp or emsp. - FM + ** ensp, emsp or thinsp. - FM */ } else if (plain_space && - (!strcmp("nbsp", - HTML_dtd.entity_names[i]) || - !strcmp("emsp", - HTML_dtd.entity_names[i]) || - !strcmp("ensp", - HTML_dtd.entity_names[i]))) { + (!strcmp("nbsp", entities[i]) || + !strcmp("emsp", entities[i]) || + !strcmp("ensp", entities[i]) || + !strcmp("thinsp", entities[i]))) { *q++ = ' '; /* ** If plain_space is set, ignore shy. - FM */ } else if (plain_space && - !strcmp("shy", - HTML_dtd.entity_names[i])) { + !strcmp("shy", entities[i])) { ; /* ** If we haven't used something else, use the @@ -1347,6 +2330,268 @@ PUBLIC char * LYUnEscapeEntities ARGS3( break; } } + if (diff != 0) { + /* + ** Not found, repeat for extra entities. - FM + */ + for (low = 0, high = HTML_dtd.number_of_extra_entities; + high > low; + diff < 0 ? (low = i+1) : (high = i)) { + /* + ** Binary search. + */ + i = (low + (high - low)/2); + diff = strcmp(extra_entities[i].name, p); + if (diff == 0) { + /* + ** Found the entity. + */ + code = extra_entities[i].code; + if ((stype == st_URL && code > 127) || + (stype == st_other && + (code > 255 || + LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8))) { + int e; + if (stype == st_URL) { + if (LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8 || + code > 255) { + UCPutUtf8ToBuffer(replace_buf, code, YES); + esc = HTEscape(replace_buf, URL_XALPHAS); + } else { + buf[0] = code; + esc = HTEscape(buf, URL_XALPHAS); + } + for (e = 0; esc[e]; e++) + *q++ = esc[e]; + FREE(esc); + } else if (LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8 || + code > 255) { + UCPutUtf8ToBuffer(q, code, NO); + } else { + *q++ = buf[0]; + } + *cp = cpe; + if (*cp != ';') + p = cp; + else + p = (cp+1); + break; + /* + ** If it's hidden, use 160 for nbsp. - FM + */ + } + if (code == 160) { + /* + ** nbsp. + */ + if (hidden) { + *q++ = 160; + } else if (plain_space) { + *q++ = ' '; + } else { + *q++ = HT_NON_BREAK_SPACE; + } + /* + ** Recycle the terminator if it isn't the + ** standard ';' for HTML. - FM + */ + *cp = cpe; + if (*cp != ';') + p = cp; + else + p = (cp+1); + break; + } else if (code == 173) { + /* + ** shy. + */ + if (hidden) { + *q++ = 173; + } else if (plain_space) { + ; + } else { + *q++ = LY_SOFT_HYPHEN; + } + /* + ** Recycle the terminator if it isn't the + ** standard ';' for HTML. - FM + */ + *cp = cpe; + if (*cp != ';') + p = cp; + else + p = (cp+1); + break; + } else if (code == 8194 || + code == 8195 || + code == 8201) { + /* + ** ensp, emsp or thinsp. + */ + if (hidden) { + *q++ = '&'; + *cp = cpe; + break; + } else if (plain_space) { + *q++ = ' '; + } else { + *q++ = HT_EM_SPACE; + } + /* + ** Recycle the terminator if it isn't the + ** standard ';' for HTML. - FM + */ + *cp = cpe; + if (*cp != ';') + p = cp; + else + p = (cp+1); + break; + } else if (code == 8211 || + code == 8212) { + /* + ** ndash or mdash. + */ + if (hidden) { + *q++ = '&'; + *cp = cpe; + break; + } else { + *q++ = '-'; + } + /* + ** Recycle the terminator if it isn't the + ** standard ';' for HTML. - FM + */ + *cp = cpe; + if (*cp != ';') + p = cp; + else + p = (cp+1); + break; + } else if (output_utf8 && + code > 127 && + code < 0x7fffffffL) { + UCPutUtf8ToBuffer(q, code, NO); + /* + ** Recycle the terminator if it isn't the + ** standard ';' for HTML. - FM + */ + *cp = cpe; + if (*cp != ';') + p = cp; + else + p = (cp+1); + break; + } + /* + ** Seek a translation from the chartrans tables. + */ + if (((uck = UCTransUniChar(code, + current_char_set)) >= 32 || + uck == 9 || uck == 10 || uck == 13) && + uck < 256 && + (uck < 127 || + uck >= lowest_8)) { + if (uck == 160 && current_char_set == 0) { + /* + ** Would only happen if some other unicode + ** is mapped to Latin-1 160. + */ + if (hidden) { + *q++ = 160; + } else if (plain_space) { + *q++ = ' '; + } else { + *q++ = HT_NON_BREAK_SPACE; + } + /* + ** Recycle the terminator if it isn't the + ** standard ';' for HTML. - FM + */ + *cp = cpe; + if (*cp != ';') + p = cp; + else + p = (cp+1); + break; + } else if (uck == 173 && + current_char_set == 0) { + /* + ** Would only happen if some other unicode + ** is mapped to Latin-1 173. + */ + if (hidden) { + *q++ = 173; + } else if (plain_space) { + ; + } else { + *q++ = LY_SOFT_HYPHEN; + } + /* + ** Recycle the terminator if it isn't the + ** standard ';' for HTML. - FM + */ + *cp = cpe; + if (*cp != ';') + p = cp; + else + p = (cp+1); + break; + } else if (!hidden && uck == 10 && + q != Str && *(q-1) == 13) { + /* + ** If this is not a hidden string, and we + ** have an encoded encoded LF (
) of a + ** CRLF pair, drop the CR. - kw + */ + *(q-1) = (char)uck; + } else { + *q++ = (char)uck; + } + /* + ** Recycle the terminator if it isn't the + ** standard ';' for HTML. - FM + */ + *cp = cpe; + if (*cp != ';') + p = cp; + else + p = (cp+1); + break; + } else if ((uck == -4 || + (repl_translated_C0 && + uck > 0 && uck < 32)) && + /* + ** Not found. Look for + ** replacement string. + */ + (uck = + UCTransUniCharStr(replace_buf, + 60, + code, + current_char_set, + 0) >= 0)) { + for (i = 0; replace_buf[i]; i++) { + *q++ = replace_buf[i]; + } + /* + ** Recycle the terminator if it isn't the + ** standard ';' for HTML. - FM + */ + *cp = cpe; + if (*cp != ';') + p = cp; + else + p = (cp+1); + break; + } + *cp = cpe; + *q++ = '&'; + break; + } + } + } *cp = cpe; if (diff != 0) { /* @@ -1377,7 +2622,17 @@ PUBLIC char * LYUnEscapeEntities ARGS3( HTCJK != NOCJK)))) { *q++ = ' '; p++; - } else if (!hidden && *p == 10 && q != str && *(q-1) == 13) { + } else if (stype == st_URL && + (code >= 127 || + (code < 32 && (code != 9 && code != 10)))) { + *q++ = '%'; + *q++ = hex[(code >> 4) & 15]; + *q++ = hex[(code & 15)]; + p++; + /* + ** If it's hidden, use 160 for nbsp. - FM + */ + } else if (!hidden && *p == 10 && q != Str && *(q-1) == 13) { /* ** If this is not a hidden string, and the current char is ** the LF ('\n') of a CRLF pair, drop the CR ('\r'). - KW @@ -1388,8 +2643,15 @@ PUBLIC char * LYUnEscapeEntities ARGS3( } } } +#endif /* 0 */ *q = '\0'; + if (stype == st_URL) { + LYTrimHead(Str); + LYTrimTail(Str); + } + StrAllocCopy(*str, Str); + FREE(Str); return str; } @@ -1403,9 +2665,9 @@ PUBLIC char * LYUnEscapeEntities ARGS3( ** it strips out ESC, as would be done when the ** "ISO Latin 1" Character Set is selected. - FM */ -PUBLIC void LYUnEscapeToLatinOne ARGS2( +PRIVATE void LYUnEscapeToLatinOne_unused ARGS2( char **, str, - BOOLEAN, isURL) + CharUtil_st, stype) { char *p = *str; char *q = NULL; @@ -1431,7 +2693,7 @@ PUBLIC void LYUnEscapeToLatinOne ARGS2( /* ** If the isURL flag is TRUE, set up for hex escaping. - FM */ - if (isURL == TRUE) { + if (stype == st_URL) { if ((url = (char *)calloc(1, ((strlen(p) * 3) + 1))) == NULL) { outofmem(__FILE__, "LYUnEscapeToLatinOne"); } @@ -1451,7 +2713,7 @@ PUBLIC void LYUnEscapeToLatinOne ARGS2( case S_text: if (*p == '\033') { state = S_esc; - if (isURL == TRUE) { + if (stype == st_URL) { buf[0] = *p; esc = HTEscape(buf, URL_XALPHAS); for (e = 0; esc[e]; e++) @@ -1517,7 +2779,7 @@ PUBLIC void LYUnEscapeToLatinOne ARGS2( case S_nonascii_text: if (*p == '\033') { state = S_esc; - if (isURL == TRUE) { + if (stype == st_URL) { buf[0] = *p; esc = HTEscape(buf, URL_XALPHAS); for (e = 0; esc[e]; e++) @@ -1586,7 +2848,7 @@ PUBLIC void LYUnEscapeToLatinOne ARGS2( ** 8-bit, and then recycle the terminator ** if it is not a semicolon. - FM */ - if (value > 159 && isURL == TRUE) { + if (value > 159 && stype == st_URL) { buf[0] = value; esc = HTEscape(buf, URL_XALPHAS); for (e = 0; esc[e]; e++) @@ -1644,7 +2906,7 @@ PUBLIC void LYUnEscapeToLatinOne ARGS2( ** Totally empty, skip. - KW */ ; /* do nothing */ - } else if (isURL) { + } else if (stype == st_URL) { /* ** All will be HTEscape'd. - KW */ @@ -1662,7 +2924,7 @@ PUBLIC void LYUnEscapeToLatinOne ARGS2( } } } else if ((unsigned char)buf[0] > 159 && - isURL == TRUE) { + stype == st_URL) { esc = HTEscape(buf, URL_XALPHAS); for (e = 0; esc[e]; e++) *q++ = esc[e]; @@ -1710,7 +2972,7 @@ PUBLIC void LYUnEscapeToLatinOne ARGS2( ** Clean up and return. - FM */ *q = '\0'; - if (isURL == TRUE) { + if (stype == st_URL) { LYTrimHead(url); LYTrimTail(url); StrAllocCopy(*str, url); @@ -1719,6 +2981,286 @@ PUBLIC void LYUnEscapeToLatinOne ARGS2( } /* +** This is a generalized version of LYExpandString. +** +** This function translates a string from charset +** cs_from to charset cs_to, reallocating it if necessary. +** +** If use_lynx_specials is YES, translate 160 and 173 +** (U+00A0 and U+00AD) to HT_NON_BREAK_SPACE and +** LY_SOFT_HYPHEN, respectively (unless input and output +** charset are both iso-8859-1, for compatibility with +** usage in HTML.c). +** +** Returns YES if string translated or translation +** unnecessary, +** NO otherwise. +** +*/ +#define REPLACE_STRING(s) \ + p[i] = '\0'; \ + StrAllocCat(*str, q); \ + StrAllocCat(*str, s); \ + q = (puni > p+i ? puni+1 : &p[i+1]) + +#define REPLACE_CHAR(c) if (puni > &p[i]) { \ + p[i] = c; \ + p[i+1] = '\0'; \ + StrAllocCat(*str, q); \ + q = puni + 1; \ + } else \ + p[i] = c + +/* + * Back: try 'backward' translation + * PlainText: only used with Back (?) + */ +PRIVATE BOOL LYUCTranslateString ARGS7( + char **, str, + int, cs_from, + int, cs_to, + BOOL, use_lynx_specials, + BOOLEAN, PlainText, + BOOL, Back, + CharUtil_st, stype) /* stype unused */ +{ + char *p = *str; + char *q = *str; + CONST char *name; + char replace_buf[21]; + UCode_t unsign_c, uck; + UCTransParams T; + BOOL from_is_utf8, done; + char * puni; + int i, j, value, high, low, diff = 0; + + /* + ** Don't do anything if we have no string, + ** or if original AND target character sets + ** are both iso-8859-1, + ** or if we are in CJK mode. + */ + if (!p || *p == '\0' || + (cs_to == 0 && cs_from == cs_to) || + HTCJK != NOCJK) + return YES; + + /* No need to translate or examine the string any further */ + else if (!use_lynx_specials && !Back && + UCNeedNotTranslate(cs_from, cs_to)) + return YES; + + /* Can't do, caller should figure out what to do... */ + else if (!UCCanTranslateFromTo(cs_from, cs_to)) + return NO; + /* + ** Start a clean copy of the string, without + ** invalidating our pointer to the original. - FM + */ + *str = NULL; + StrAllocCopy(*str, ""); + + UCTransParams_clear(&T); + UCSetTransParams(&T, cs_from, &LYCharSet_UC[cs_from], + cs_to, &LYCharSet_UC[cs_to]); + from_is_utf8 = (LYCharSet_UC[cs_from].enc == UCT_ENC_UTF8); + puni = p; + /* + ** Check each character in the original string, + ** and add the characters or substitutions to + ** our clean copy. - FM + */ + for (i = 0; p[i]; i++) { + unsign_c = (unsigned char)p[i]; + done = NO; + if (Back) { + int rev_c; + if (p[i] == HT_NON_BREAK_SPACE || + p[i] == HT_EM_SPACE) { + if (PlainText) { + unsign_c = p[i] = ' '; + done = YES; + } else { + p[i] = 160; + unsign_c = 160; + if (LYCharSet_UC[cs_to].enc == UCT_ENC_8859 || + (LYCharSet_UC[cs_to].like8859 & UCT_R_8859SPECL)) { + done = YES; + } + } + } else if (p[i] == LY_SOFT_HYPHEN) { + p[i] = 173; + unsign_c = 173; + if (LYCharSet_UC[cs_to].enc == UCT_ENC_8859 || + (LYCharSet_UC[cs_to].like8859 & UCT_R_8859SPECL)) { + done = YES; + } + } else if (unsign_c < 127 || T.transp) { + done = YES; + } + if (!done) { + rev_c = UCReverseTransChar(p[i], cs_to, cs_from); + if (rev_c > 127) { + p[i] = rev_c; + done = YES; + } + } + } else if (unsign_c < 127) + done = YES; + + if (!done) { + if (from_is_utf8) { + if ((p[i]&0xc0)==0xc0) { + puni = p+i; + unsign_c = UCGetUniFromUtf8String(&puni); + if (unsign_c <= 0) { + unsign_c = (unsigned char)p[i]; + puni = p+i; + } + } + } else if (use_lynx_specials && !Back && + (unsign_c == 160 || unsign_c == 173) && + (LYCharSet_UC[cs_from].enc == UCT_ENC_8859 || + (LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) { + if (unsign_c == 160) + p[i] = HT_NON_BREAK_SPACE; + else if (unsign_c == 173) + p[i] = LY_SOFT_HYPHEN; + done = YES; + } else if (T.trans_to_uni) { + unsign_c = UCTransToUni(p[i], cs_from); + if (unsign_c <= 0) { + /* What else can we do? */ + unsign_c = (unsigned char)p[i]; + } + } else if (T.strip_raw_char_in && + (unsigned char)p[i] >= 0xc0 && + (unsigned char)p[i] < 255) { + REPLACE_CHAR((p[i] & 0x7f)); + done = YES; + } else if (!T.trans_from_uni) { + done = YES; + } + /* + ** Substitute Lynx special character for + ** 160 (nbsp) if use_lynx_specials is set. + */ + if (!done && use_lynx_specials && !Back && + (unsign_c == 160 || unsign_c == 173)) { + REPLACE_CHAR((unsign_c==160 ? HT_NON_BREAK_SPACE : LY_SOFT_HYPHEN)); + done = YES; + } + } + /* At this point we should have the UCS value in unsign_c */ + if (!done) { + if (T.output_utf8 && UCPutUtf8ToBuffer(replace_buf, unsign_c, YES)) { + REPLACE_STRING(replace_buf); + } else if ((uck = UCTransUniChar(unsign_c, cs_to)) >= 32 && + uck < 256) { + REPLACE_CHAR((char)uck) ; + } else if (uck == UCTRANS_NOTFOUND && + (uck = UCTransUniCharStr(replace_buf,21, unsign_c, + cs_to, 0)) >= 0) { + REPLACE_STRING(replace_buf); + } + /* + ** fall through to old method: + ** + ** Substitute other 8-bit characters based on + ** the LYCharsets.c tables if HTPassEightBitRaw + ** is not set. - FM + */ + else if (unsign_c > 160 && unsign_c <= 255 && + !HTPassEightBitRaw) { + value = (int)(unsign_c - 160); + name = HTMLGetEntityName(value); + for (low = 0, high = HTML_dtd.number_of_entities; + high > low; + diff < 0 ? (low = j+1) : (high = j)) { + /* Binary search */ + j = (low + (high-low)/2); + diff = strcmp(HTML_dtd.entity_names[j], name); + if (diff == 0) { + REPLACE_STRING(LYCharSets[cs_to][j]); + break; + } + } + if (diff != 0) { + sprintf(replace_buf, "U%.2lX", unsign_c); + REPLACE_STRING(replace_buf); + } + } else if (unsign_c > 255) { + if (T.strip_raw_char_in && + (unsigned char)p[i] >= 0xc0 && + (unsigned char)p[i] < 255) { + REPLACE_CHAR((p[i] & 0x7f)); + } else { + sprintf(replace_buf, "U%.2lX", unsign_c); + REPLACE_STRING(replace_buf); + } + } + } + if ((puni-p) > i) + i = (puni-p); /* point to last byte of UTF sequence */ + } + StrAllocCat(*str, q); + free_and_clear(&p); + return YES; +} + +PUBLIC BOOL LYUCFullyTranslateString ARGS7( + char **, str, + int, cs_from, + int, cs_to, + BOOL, use_lynx_specials, + BOOLEAN, plain_space, + BOOLEAN, hidden, + CharUtil_st, stype) +{ + BOOL ret = YES; + /* May reallocate *str even if cs_to == 0 */ + if (!LYUCTranslateString(str, cs_from, cs_to, use_lynx_specials, FALSE, NO, stype)) { + LYExpandString_old(str); + ret = NO; + } + + if (!LYUnEscapeEntities(str, cs_to, plain_space, hidden, stype)) { + ret = NO; + } + return ret; +} + +PUBLIC BOOL LYUCTranslateBackFormData ARGS4( + char **, str, + int, cs_from, + int, cs_to, + BOOLEAN, plain_space) +{ + /* May reallocate *str even if cs_to == 0 */ + return (LYUCTranslateString(str, cs_from, cs_to, NO, plain_space, YES, st_HTML)); + +} + +#ifdef NOTUSED +PUBLIC BOOL LYUCFullyTranslateString ARGS6( + char **, str, + int, cs_from, + int, cs_to, + BOOL, use_lynx_specials, + BOOLEAN, plain_space, + BOOLEAN, hidden) +{ + if (cs_to) { + if (!LYUCTranslateString(str, cs_from, cs_to, use_lynx_specials)) + LYExpandString_old(str); + } + /* Note that it is guaranteed that *str is not reallocated + if cs_to == 0 */ + LYUnEscapeEntities(*str, plain_space, hidden); +} +#endif /* NOTUSED */ + +/* ** This function processes META tags in HTML streams. - FM */ PUBLIC void LYHandleMETA ARGS4( @@ -1742,7 +3284,12 @@ PUBLIC void LYHandleMETA ARGS4( value[HTML_META_HTTP_EQUIV] && *value[HTML_META_HTTP_EQUIV]) { StrAllocCopy(http_equiv, value[HTML_META_HTTP_EQUIV]); convert_to_spaces(http_equiv, TRUE); +#ifdef EXP_CHARTRANS + LYUCFullyTranslateString(&http_equiv, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); +#else LYUnEscapeToLatinOne(&http_equiv, FALSE); +#endif LYTrimHead(http_equiv); LYTrimTail(http_equiv); if (*http_equiv == '\0') { @@ -1753,7 +3300,12 @@ PUBLIC void LYHandleMETA ARGS4( value[HTML_META_NAME] && *value[HTML_META_NAME]) { StrAllocCopy(name, value[HTML_META_NAME]); convert_to_spaces(name, TRUE); +#ifdef EXP_CHARTRANS + LYUCFullyTranslateString(&name, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); +#else LYUnEscapeToLatinOne(&name, FALSE); +#endif LYTrimHead(name); LYTrimTail(name); if (*name == '\0') { @@ -1801,7 +3353,12 @@ PUBLIC void LYHandleMETA ARGS4( */ if (!strcasecomp((http_equiv ? http_equiv : ""), "Pragma") || !strcasecomp((http_equiv ? http_equiv : ""), "Cache-Control")) { +#ifdef EXP_CHARTRANS + LYUCFullyTranslateString(&content, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); +#else LYUnEscapeToLatinOne(&content, FALSE); +#endif LYTrimHead(content); LYTrimTail(content); if (!strcasecomp(content, "no-cache")) { @@ -1875,7 +3432,12 @@ PUBLIC void LYHandleMETA ARGS4( * Date header from a server when making the * comparsion. - FM */ +#ifdef EXP_CHARTRANS + LYUCFullyTranslateString(&content, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); +#else LYUnEscapeToLatinOne(&content, FALSE); +#endif LYTrimHead(content); LYTrimTail(content); StrAllocCopy(me->node_anchor->expires, content); @@ -1915,7 +3477,12 @@ PUBLIC void LYHandleMETA ARGS4( */ } else if (!(me->node_anchor->charset && *me->node_anchor->charset) && !strcasecomp((http_equiv ? http_equiv : ""), "Content-Type")) { +#ifdef EXP_CHARTRANS + LYUCFullyTranslateString(&content, me->tag_charset, me->tag_charset, + NO, NO, YES, st_other); +#else LYUnEscapeToLatinOne(&content, FALSE); +#endif LYTrimHead(content); LYTrimTail(content); /* @@ -2434,7 +4001,7 @@ PUBLIC void LYHandleSELECT ARGS5( if (!me->inFORM) { if (TRACE) { fprintf(stderr, - "HTML: SELECT start tag not within FORM tag\n"); + "HTML: ***** SELECT start tag not within FORM element *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -2452,7 +4019,7 @@ PUBLIC void LYHandleSELECT ARGS5( */ if (me->inTEXTAREA) { if (TRACE) { - fprintf(stderr, "HTML: Missing TEXTAREA end tag\n"); + fprintf(stderr, "HTML: ***** Missing TEXTAREA end tag *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -2538,7 +4105,7 @@ PUBLIC void LYHandleSELECT ARGS5( */ if (!me->inSELECT) { if (TRACE) { - fprintf(stderr, "HTML: Unmatched SELECT end tag\n"); + fprintf(stderr, "HTML: ***** Unmatched SELECT end tag *****\n"); } else if (!me->inBadHTML) { _statusline(BAD_HTML_USE_TRACE); me->inBadHTML = TRUE; @@ -2682,7 +4249,12 @@ PUBLIC int LYLegitimizeHREF ARGS4( } if (*(*href) == '\0') return(url_type); +#ifdef EXP_CHARTRANS + LYUCFullyTranslateString(href, me->tag_charset, me->tag_charset, + NO, NO, YES, st_URL); +#else LYUnEscapeToLatinOne(&(*href), TRUE); +#endif url_type = is_url(*href); if (!url_type && force_slash && (!strcmp(*href, ".") || !strcmp(*href, "..")) && @@ -2832,7 +4404,8 @@ PUBLIC void LYCheckForContentBase ARGS1( present[i] = NO; present[HTML_BASE_HREF] = YES; value[HTML_BASE_HREF] = (CONST char *)cp; - (*me->isa->start_element)(me, HTML_BASE, present, value, 0); + (*me->isa->start_element)(me, HTML_BASE, present, value, + 0, 0); FREE(cp); } @@ -2858,7 +4431,12 @@ PUBLIC void LYCheckForID ARGS4( * Translate any named or numeric character references. - FM */ StrAllocCopy(temp, value[attribute]); +#ifdef EXP_CHARTRANS + LYUCFullyTranslateString(&temp, me->tag_charset, me->tag_charset, + NO, NO, YES, st_URL); +#else LYUnEscapeToLatinOne(&temp, TRUE); +#endif /* * Create the link if we still have a non-zero-length string. - FM diff --git a/src/LYCharUtils.h b/src/LYCharUtils.h index 51c33962..c1c58f40 100644 --- a/src/LYCharUtils.h +++ b/src/LYCharUtils.h @@ -6,6 +6,13 @@ #include "HTUtils.h" #endif /* HTUTILS_H */ +typedef enum { + st_HTML = 0, /* attributes and content found in HTML, probably meant for display */ + st_URL, /* URLs, fragments, NAME and ID */ + st_other +} CharUtil_st; + +#ifndef EXP_CHARTRANS extern char * LYUnEscapeEntities PARAMS(( char * str, BOOLEAN plain_space, @@ -15,6 +22,23 @@ extern void LYUnEscapeToLatinOne PARAMS(( BOOLEAN isURL)); extern void LYExpandString PARAMS(( char ** str)); +#endif /* !EXP_CHARTRANS */ + +#ifdef EXP_CHARTRANS +extern BOOL LYUCFullyTranslateString PARAMS(( + char ** str, + int cs_from, + int cs_to, + BOOL use_lynx_specials, + BOOLEAN plain_space, + BOOLEAN hidden, + CharUtil_st stype)); +extern BOOL LYUCTranslateBackFormData PARAMS(( + char ** str, + int cs_from, + int cs_to, + BOOLEAN plain_space)); +#endif extern void LYEntify PARAMS(( char ** str, BOOLEAN isTITLE)); diff --git a/src/LYForms.c b/src/LYForms.c index 1a2c14b1..a92b0918 100644 --- a/src/LYForms.c +++ b/src/LYForms.c @@ -111,6 +111,10 @@ PUBLIC int change_form_link ARGS6( * Set the value. */ form->cp_submit_value = opt_ptr->cp_submit_value; + /* + * Set charset in which we have the submit value. - kw + */ + form->value_cs = opt_ptr->value_cs; } #if defined(FANCY_CURSES) || defined(USE_SLANG) if (!enable_scrollback && form->num_value == OrigNumValue) diff --git a/src/LYGetFile.c b/src/LYGetFile.c index 17c3e31f..b29584aa 100644 --- a/src/LYGetFile.c +++ b/src/LYGetFile.c @@ -654,7 +654,8 @@ Try_Redirected_URL: cp1 += 2; StrAllocCopy(temp, doc->address); #ifdef DOSPATH - StrAllocCat(temp, HTDOS_wwwName((char *)Home_Dir())); + StrAllocCat(temp, "/"); + StrAllocCat(temp, HTDOS_wwwName((char *)Home_Dir())); #else #ifdef VMS StrAllocCat(temp, @@ -1252,10 +1253,12 @@ PUBLIC BOOLEAN exec_ok ARGS3( * Security: reject on strange character. */ for (cp = link; *cp != '\0'; cp++) { - if (!isalnum(*cp) && *cp != '_' && *cp != '-' && - *cp != ' ' && *cp != ':' && *cp != '.' && - *cp != '/' && *cp != '@' && *cp != '~' && - *cp != '$' && *cp != '\t') { + if (!isalnum(*cp) && + *cp != '_' && *cp != '-' && *cp != ' ' && + *cp != ':' && *cp != '.' && *cp != '/' && + *cp != '@' && *cp != '~' && *cp != '$' && + *cp != '&' && *cp != '+' && *cp != '=' && + *cp != '\t') { char buf[128]; sprintf(buf, diff --git a/src/LYGlobalDefs.h b/src/LYGlobalDefs.h index 6ecc9211..fad330cc 100644 --- a/src/LYGlobalDefs.h +++ b/src/LYGlobalDefs.h @@ -152,6 +152,7 @@ extern BOOLEAN LYresubmit_posts; extern BOOLEAN user_mode; /* novice or advanced */ extern BOOLEAN is_www_index; extern BOOLEAN dump_output_immediately; +extern int dump_output_width; extern BOOLEAN lynx_mode; extern BOOLEAN bold_headers; extern BOOLEAN bold_H1; diff --git a/src/LYKeymap.c b/src/LYKeymap.c index 1ffddd5e..c47c3611 100644 --- a/src/LYKeymap.c +++ b/src/LYKeymap.c @@ -488,7 +488,7 @@ PRIVATE struct rmap revmap[] = { }; PRIVATE char *funckey[] = { - "Up Arrow", + "Up Arrow", "Down Arrow", "Right Arrow", "Left Arrow", @@ -588,7 +588,9 @@ PRIVATE int LYLoadKeymap ARGS4 ( } } for (i = 1; i < sizeof(keymap); i++) { - /* LYK_PIPE not implemented yet */ + /* + * LYK_PIPE not implemented yet. + */ if ((i > 127 || i <= ' ' || !isalpha(i-1)) && strcmp(revmap[(unsigned char)keymap[i]].name, "PIPE")) { print_binding(target, buf, i); diff --git a/src/LYLocal.c b/src/LYLocal.c index c7b914d5..83ae0edb 100644 --- a/src/LYLocal.c +++ b/src/LYLocal.c @@ -1449,7 +1449,7 @@ PUBLIC int local_dired ARGS1( StrAllocCopy(line, line_url); HTUnEscape(line); /* _file_ (not URL) syntax, for those functions - that need it. DOn't forget to FREE it. */ + that need it. Don't forget to FREE it. */ tp = NULL; if (!strncmp(line, "LYNXDIRED://NEW_FILE", 20)) { diff --git a/src/LYMail.h b/src/LYMail.h index 52e49ff6..22ce9063 100644 --- a/src/LYMail.h +++ b/src/LYMail.h @@ -9,15 +9,15 @@ extern BOOLEAN term_letter; extern void mailform PARAMS(( - char * mailto_address, - char * mailto_subject, - char * mailto_content, - char * mailto_type)); + char * mailto_address, + char * mailto_subject, + char * mailto_content, + char * mailto_type)); extern void mailmsg PARAMS(( - int cur, - char * owner_address, - char * filename, - char * linkname)); + int cur, + char * owner_address, + char * filename, + char * linkname)); extern void reply_by_mail PARAMS(( char * mail_address, char * filename, diff --git a/src/LYMain.c b/src/LYMain.c index 39acab19..7f9c8800 100644 --- a/src/LYMain.c +++ b/src/LYMain.c @@ -320,6 +320,7 @@ PUBLIC char *UCAssume_unrecMIMEcharset = NULL; #endif /* EXP_CHARTRANS */ PUBLIC int LYlines = 24; PUBLIC int LYcols = 80; +PUBLIC int dump_output_width = 0; PUBLIC linkstruct links[MAXLINKS]; PUBLIC histstruct history[MAXHIST]; PUBLIC int nlinks = 0; /* number of links in memory */ @@ -1360,6 +1361,9 @@ PUBLIC int main ARGS2( if (LYPreparsedSource) { HTPreparsedFormatInit(); } + if (dump_output_width > 0) { + LYcols = dump_output_width; + } #if defined(EXEC_LINKS) || defined(EXEC_SCRIPTS) #ifdef NEVER_ALLOW_REMOTE_EXEC @@ -1604,7 +1608,8 @@ PUBLIC int main ARGS2( * are all allocated and synchronized. - FM */ if (!bookmark_page || *bookmark_page == '\0') { - StrAllocCopy(bookmark_page, "lynx_bookmarks.html"); + StrAllocCopy(bookmark_page, "lynx_bookmarks"); + StrAllocCat(bookmark_page, HTML_SUFFIX); StrAllocCopy(BookmarkPage, bookmark_page); StrAllocCopy(MBM_A_subbookmark[0], bookmark_page); StrAllocCopy(MBM_A_subdescript[0], "Default"); @@ -2140,13 +2145,14 @@ PRIVATE void parse_arg ARGS3( ccount = atoi(cp); #if defined(USEHASH) } else if (strncmp(argv[0], "-lss", 4) == 0) { - fprintf(stderr, "***********************\n"); if ((cp=strchr(argv[0],'=')) != NULL) StrAllocCopy(lynx_lss_file, cp+1); else { StrAllocCopy(lynx_lss_file, argv[1]); i++; } + fprintf(stderr, "LYMain found -lss flag, lss file is %s\n", + lynx_lss_file ? lynx_lss_file : "<NONE>"); #endif } else if (strncmp(argv[0], "-localhost", 10) == 0) { @@ -2596,6 +2602,21 @@ PRIVATE void parse_arg ARGS3( } break; + case 'w': + if (strncmp(argv[0], "-width", 2) == 0) { + if (nextarg) { + int w = atoi(cp); + if (w > 0) { + dump_output_width = w; + break; + } + } + dump_output_width = 80; + } else { + goto Output_Error_and_Help_List; + } + break; + default: Output_Error_and_Help_List: #ifdef VMS diff --git a/src/LYMainLoop.c b/src/LYMainLoop.c index 1e7dfdc5..95166360 100644 --- a/src/LYMainLoop.c +++ b/src/LYMainLoop.c @@ -993,7 +993,7 @@ try_again: */ HTAtom *encoding; - if (HTFileFormat(temp, &encoding) != WWW_HTML) { + if (HTFileFormat(temp, &encoding, NULL) != WWW_HTML) { HTSetSuffix(temp, "text/html", "8bit", 1.0); } } @@ -3649,12 +3649,12 @@ check_goto_URL: if (nlinks > 0) { cp = links[curdoc.link].lname; if (is_url(cp) == FILE_URL_TYPE) { - if (!strncmp(tp, "file://localhost", 16)) { + if (!strncmp(cp, "file://localhost", 16)) { /* * This is the only case that should occur. - kw */ StrAllocCopy(tp, cp + 16); - } else if (!strncmp(tp, "file:", 5)) { + } else if (!strncmp(cp, "file:", 5)) { StrAllocCopy(tp, cp + 5); } else { StrAllocCopy(tp, cp); diff --git a/src/LYOptions.c b/src/LYOptions.c index 80c840fa..8a03ec78 100644 --- a/src/LYOptions.c +++ b/src/LYOptions.c @@ -11,6 +11,7 @@ #include "LYClean.h" #include "LYCharSets.h" #include "LYCharUtils.h" +#include "UCMap.h" #include "LYKeymap.h" #include "LYrcFile.h" #include "HTAlert.h" @@ -46,6 +47,18 @@ PRIVATE int popup_choice PARAMS(( #define MAXCHOICES 10 +#define L_Bool_A (use_assume_charset ? L_BOOL_A + 1 : L_BOOL_A) +#define L_Bool_B (use_assume_charset ? L_BOOL_B + 1 : L_BOOL_B) +#define L_Exec (use_assume_charset ? L_EXEC + 1 : L_EXEC) +#define L_Rawmode (use_assume_charset ? L_RAWMODE + 1 : L_RAWMODE) +#define L_Charset (use_assume_charset ? L_CHARSET + 1 : L_CHARSET) +#define L_Color (use_assume_charset ? L_COLOR + 1 : L_COLOR) +#define L_Keypad (use_assume_charset ? L_KEYPAD + 1 : L_KEYPAD) +#define L_Lineed (use_assume_charset ? L_LINEED + 1 : L_LINEED) +#define L_Dired (use_assume_charset ? L_DIRED + 1 : L_DIRED) +#define L_User_Mode (use_assume_charset ? L_USER_MODE + 1 : L_USER_MODE) +#define L_User_Agent (use_assume_charset ? L_USER_AGENT + 1 : L_USER_AGENT) + PRIVATE void option_statusline ARGS1( CONST char *, text) { @@ -109,8 +122,10 @@ PUBLIC void options NOARGS char *choices[MAXCHOICES]; int CurrentCharSet = current_char_set; int CurrentShowColor = LYShowColor; + int CurrentAssumeCharSet = UCLYhndl_for_unspec; BOOLEAN CurrentRawMode = LYRawMode; BOOLEAN AddValueAccepted = FALSE; + BOOL use_assume_charset, old_use_assume_charset; #ifdef DIRED_SUPPORT #ifdef ALLOW_USERS_TO_CHANGE_EXEC_WITHIN_OPTIONS @@ -178,7 +193,12 @@ PUBLIC void options NOARGS #endif /* USE_SLANG || COLOR_CURSES */ } + old_use_assume_charset = + use_assume_charset = (user_mode == ADVANCED_MODE); + draw_options: + + old_use_assume_charset = use_assume_charset; /* * NOTE that printw() should be avoided for strings that * might have non-ASCII or multibyte/CJK characters. - FM @@ -232,9 +252,9 @@ draw_options: addstr("S)earching type : "); addstr(case_sensitive ? "CASE SENSITIVE " : "CASE INSENSITIVE"); - move(L_CHARSET, 5); + move(L_Charset, 5); addstr("display (C)haracter set : "); - addstr(LYchar_set_names[current_char_set]); + addstr((char *)LYchar_set_names[current_char_set]); move(L_LANGUAGE, 5); addstr("preferred document lan(G)uage: "); @@ -244,12 +264,22 @@ draw_options: addstr("preferred document c(H)arset : "); addstr((pref_charset && *pref_charset) ? pref_charset : "NONE"); - move(L_RAWMODE, 5); + if (use_assume_charset) { + move(L_ASSUME_CHARSET, 5); + addstr("^A)ssume charset if unknown : "); + if (UCAssume_MIMEcharset) + addstr(UCAssume_MIMEcharset); + else + addstr((UCLYhndl_for_unspec >= 0) ? + LYCharSet_UC[UCLYhndl_for_unspec].MIMEname : "NONE"); + } + + move(L_Rawmode, 5); addstr("Raw 8-bit or CJK m(O)de : "); addstr(LYRawMode ? "ON " : "OFF"); #if defined(USE_SLANG) || defined(COLOR_CURSES) - move(L_COLOR, B_COLOR); + move(L_Color, B_COLOR); addstr("show color (&) : "); if (no_option_save) { addstr((LYShowColor == SHOW_COLOR_OFF ? "OFF" : @@ -276,27 +306,27 @@ draw_options: } #endif /* USE_SLANG || COLOR_CURSES */ - move(L_BOOL_A, B_VIKEYS); + move(L_Bool_A, B_VIKEYS); addstr("V)I keys: "); addstr(vi_keys ? "ON " : "OFF"); - move(L_BOOL_A, B_EMACSKEYS); + move(L_Bool_A, B_EMACSKEYS); addstr("e(M)acs keys: "); addstr(emacs_keys ? "ON " : "OFF"); - move(L_BOOL_A, B_SHOW_DOTFILES); + move(L_Bool_A, B_SHOW_DOTFILES); addstr("sho(W) dot files: "); addstr((!no_dotfiles && show_dotfiles) ? "ON " : "OFF"); - move(L_BOOL_B, B_SELECT_POPUPS); + move(L_Bool_B, B_SELECT_POPUPS); addstr("popups for selec(T) fields : "); addstr(LYSelectPopups ? "ON " : "OFF"); - move(L_BOOL_B, B_SHOW_CURSOR); + move(L_Bool_B, B_SHOW_CURSOR); addstr("show cursor (@) : "); addstr(LYShowCursor ? "ON " : "OFF"); - move(L_KEYPAD, 5); + move(L_Keypad, 5); addstr("K)eypad mode : "); addstr((keypad_mode == NUMBERS_AS_ARROWS) ? "Numbers act as arrows " : @@ -304,30 +334,30 @@ draw_options: "Links are numbered " : "Links and form fields are numbered")); - move(L_LINEED, 5); + move(L_Lineed, 5); addstr("li(N)e edit style : "); addstr(LYLineeditNames[current_lineedit]); #ifdef DIRED_SUPPORT - move(L_DIRED, 5); + move(L_Dired, 5); addstr("l(I)st directory style : "); addstr((dir_list_style == FILES_FIRST) ? "Files first " : ((dir_list_style == MIXED_STYLE) ? "Mixed style " : "Directories first")); #endif /* DIRED_SUPPORT */ - move(L_USER_MODE, 5); + move(L_User_Mode, 5); addstr("U)ser mode : "); addstr( (user_mode == NOVICE_MODE) ? "Novice " : ((user_mode == INTERMEDIATE_MODE) ? "Intermediate" : "Advanced ")); - move(L_USER_AGENT, 5); + move(L_User_Agent, 5); addstr("user (A)gent : "); addstr((LYUserAgent && *LYUserAgent) ? LYUserAgent : "NONE"); #ifdef ALLOW_USERS_TO_CHANGE_EXEC_WITHIN_OPTIONS - move(L_EXEC, 5); + move(L_Exec, 5); addstr("local e(X)ecution links : "); #ifndef NEVER_ALLOW_REMOTE_EXEC addstr( local_exec ? "ALWAYS ON " : @@ -753,21 +783,99 @@ draw_options: response = ' '; break; + case '\001': /* Change assume_charset setting. */ + if (use_assume_charset) { + int i, curval; + char ** assume_list; + assume_list = calloc(LYNumCharsets + 1, sizeof(char *)); + if (!assume_list) { + outofmem(__FILE__, "options"); + } + for (i = 0; i < LYNumCharsets; i++) { + assume_list[i] = (char *)LYCharSet_UC[i].MIMEname; + } + curval = UCLYhndl_for_unspec; + if (curval == current_char_set && UCAssume_MIMEcharset) { + curval = UCGetLYhndl_byMIME(UCAssume_MIMEcharset); + } + if (curval < 0) + curval = LYRawMode ? current_char_set : 0; + if (!LYSelectPopups) { + UCLYhndl_for_unspec = boolean_choice(curval, + L_ASSUME_CHARSET, -1, + assume_list); + } else { + UCLYhndl_for_unspec = popup_choice(curval, + L_ASSUME_CHARSET, -1, + assume_list, + 0, FALSE); +#if defined(VMS) || defined(USE_SLANG) + move(L_ASSUME_CHARSET, COL_OPTION_VALUES); + clrtoeol(); + if (UCLYhndl_for_unspec >= 0) + addstr(LYCharSet_UC[UCLYhndl_for_unspec].MIMEname); +#endif /* VMS || USE_SLANG */ + } + + /* + * Set the raw 8-bit or CJK mode defaults and + * character set if changed. - FM + */ + if (CurrentAssumeCharSet != UCLYhndl_for_unspec || + UCLYhndl_for_unspec != curval) { + if (UCLYhndl_for_unspec != CurrentAssumeCharSet) { + StrAllocCopy(UCAssume_MIMEcharset, + LYCharSet_UC[UCLYhndl_for_unspec].MIMEname); + } + LYRawMode = (UCLYhndl_for_unspec == current_char_set); + HTMLSetUseDefaultRawMode(current_char_set, LYRawMode); + HTMLUseCharacterSet(current_char_set); + CurrentAssumeCharSet = UCLYhndl_for_unspec; + CurrentRawMode = LYRawMode; +#if !defined(VMS) && !defined(USE_SLANG) + if (!LYSelectPopups) +#endif /* !VMS && !USE_SLANG */ + { + move(L_Rawmode, COL_OPTION_VALUES); + clrtoeol(); + addstr(LYRawMode ? "ON " : "OFF"); + } + } + FREE(assume_list); + response = ' '; + if (LYSelectPopups) { +#if !defined(VMS) || defined(USE_SLANG) + if (term_options) { + term_options = FALSE; + } else { + AddValueAccepted = TRUE; + } + goto draw_options; +#else + term_options = FALSE; +#endif /* !VMS || USE_SLANG */ + } + } else { + option_statusline(NEED_ADVANCED_USER_MODE); + AddValueAccepted = FALSE; + } + break; + case 'c': /* Change charset setting. */ case 'C': if (!LYSelectPopups) { current_char_set = boolean_choice(current_char_set, - L_CHARSET, -1, + L_Charset, -1, (char **)LYchar_set_names); } else { current_char_set = popup_choice(current_char_set, - L_CHARSET, -1, + L_Charset, -1, (char **)LYchar_set_names, 0, FALSE); #if defined(VMS) || defined(USE_SLANG) - move(L_CHARSET, COL_OPTION_VALUES); + move(L_Charset, COL_OPTION_VALUES); clrtoeol(); - addstr(LYchar_set_names[current_char_set]); + addstr((char *)LYchar_set_names[current_char_set]); #endif /* VMS || USE_SLANG */ } /* @@ -784,7 +892,7 @@ draw_options: if (!LYSelectPopups) #endif /* !VMS && !USE_SLANG */ { - move(L_RAWMODE, COL_OPTION_VALUES); + move(L_Rawmode, COL_OPTION_VALUES); clrtoeol(); addstr(LYRawMode ? "ON " : "OFF"); } @@ -814,7 +922,7 @@ draw_options: choices[1] = NULL; StrAllocCopy(choices[1], "ON "); choices[2] = NULL; - LYRawMode = boolean_choice(LYRawMode, L_RAWMODE, -1, choices); + LYRawMode = boolean_choice(LYRawMode, L_Rawmode, -1, choices); /* * Set the LYUseDefaultRawMode value and character * handling if LYRawMode was changed. - FM @@ -914,7 +1022,7 @@ draw_options: StrAllocCopy(choices[1], "ON "); choices[2] = NULL; vi_keys = boolean_choice(vi_keys, - L_BOOL_A, C_VIKEYS, + L_Bool_A, C_VIKEYS, choices); if (vi_keys) { set_vi_keys(); @@ -937,7 +1045,7 @@ draw_options: StrAllocCopy(choices[1], "ON "); choices[2] = NULL; emacs_keys = boolean_choice(emacs_keys, - L_BOOL_A, C_EMACSKEYS, + L_Bool_A, C_EMACSKEYS, choices); if (emacs_keys) { set_emacs_keys(); @@ -963,7 +1071,7 @@ draw_options: StrAllocCopy(choices[1], "ON "); choices[2] = NULL; show_dotfiles = boolean_choice(show_dotfiles, - L_BOOL_A, + L_Bool_A, C_SHOW_DOTFILES, choices); FREE(choices[0]); @@ -983,7 +1091,7 @@ draw_options: StrAllocCopy(choices[1], "ON "); choices[2] = NULL; LYSelectPopups = boolean_choice(LYSelectPopups, - L_BOOL_B, + L_Bool_B, C_SELECT_POPUPS, choices); FREE(choices[0]); @@ -1015,7 +1123,7 @@ draw_options: StrAllocCopy(choices[1], "ON "); choices[2] = NULL; LYShowColor = boolean_choice((LYShowColor - 1), - L_COLOR, + L_Color, C_COLOR, choices); if (LYShowColor == 0) { @@ -1046,12 +1154,12 @@ draw_options: do { if (!LYSelectPopups) { chosen = boolean_choice(LYChosenShowColor, - L_COLOR, + L_Color, C_COLOR, choices); } else { chosen = popup_choice(LYChosenShowColor, - L_COLOR, + L_Color, C_COLOR, choices, 4, FALSE); } @@ -1072,7 +1180,7 @@ draw_options: LYChosenShowColor = chosen; #if defined(VMS) if (LYSelectPopups) { - move(L_COLOR, C_COLOR); + move(L_Color, C_COLOR); clrtoeol(); addstr(choices[LYChosenShowColor]); } @@ -1119,7 +1227,7 @@ draw_options: StrAllocCopy(choices[1], "ON "); choices[2] = NULL; LYShowCursor = boolean_choice(LYShowCursor, - L_BOOL_B, + L_Bool_B, C_SHOW_CURSOR, choices); FREE(choices[0]); @@ -1144,15 +1252,15 @@ draw_options: choices[3] = NULL; if (!LYSelectPopups) { keypad_mode = boolean_choice(keypad_mode, - L_KEYPAD, -1, + L_Keypad, -1, choices); } else { keypad_mode = popup_choice(keypad_mode, - L_KEYPAD, -1, + L_Keypad, -1, choices, 3, FALSE); #if defined(VMS) || defined(USE_SLANG) - move(L_KEYPAD, COL_OPTION_VALUES); + move(L_Keypad, COL_OPTION_VALUES); clrtoeol(); addstr(choices[keypad_mode]); #endif /* VMS || USE_SLANG */ @@ -1184,15 +1292,15 @@ draw_options: case 'N': if (!LYSelectPopups) { current_lineedit = boolean_choice(current_lineedit, - L_LINEED, -1, + L_Lineed, -1, LYLineeditNames); } else { current_lineedit = popup_choice(current_lineedit, - L_LINEED, -1, + L_Lineed, -1, LYLineeditNames, 0, FALSE); #if defined(VMS) || defined(USE_SLANG) - move(L_LINEED, COL_OPTION_VALUES); + move(L_Lineed, COL_OPTION_VALUES); clrtoeol(); addstr(LYLineeditNames[current_lineedit]); #endif /* VMS || USE_SLANG */ @@ -1227,15 +1335,15 @@ draw_options: choices[3] = NULL; if (!LYSelectPopups) { dir_list_style = boolean_choice(dir_list_style, - L_DIRED, -1, + L_Dired, -1, choices); } else { dir_list_style = popup_choice(dir_list_style, - L_DIRED, -1, + L_Dired, -1, choices, 3, FALSE); #if defined(VMS) || defined(USE_SLANG) - move(L_DIRED, COL_OPTION_VALUES); + move(L_Dired, COL_OPTION_VALUES); clrtoeol(); addstr(choices[dir_list_style]); #endif /* VMS || USE_SLANG */ @@ -1273,17 +1381,21 @@ draw_options: choices[3] = NULL; if (!LYSelectPopups) { user_mode = boolean_choice(user_mode, - L_USER_MODE, -1, + L_User_Mode, -1, choices); + use_assume_charset = (user_mode >= 2); } else { user_mode = popup_choice(user_mode, - L_USER_MODE, -1, + L_User_Mode, -1, choices, 3, FALSE); + use_assume_charset = (user_mode >= 2); #if defined(VMS) || defined(USE_SLANG) - move(L_USER_MODE, COL_OPTION_VALUES); - clrtoeol(); - addstr(choices[user_mode]); + if (use_assume_charset == old_use_assume_charset) { + move(L_User_Mode, COL_OPTION_VALUES); + clrtoeol(); + addstr(choices[user_mode]); + } #endif /* VMS || USE_SLANG */ } FREE(choices[0]); @@ -1305,6 +1417,8 @@ draw_options: goto draw_options; #else term_options = FALSE; + if (use_assume_charset != old_use_assume_charset) + goto draw_options; #endif /* !VMS || USE_SLANG */ } break; @@ -1320,12 +1434,12 @@ draw_options: *display_option = '\0'; } option_statusline(ACCEPT_DATA_OR_DEFAULT); - move(L_USER_AGENT, COL_OPTION_VALUES); + move(L_User_Agent, COL_OPTION_VALUES); start_bold(); ch = LYgetstr(display_option, VISIBLE, sizeof(display_option), NORECALL); stop_bold(); - move(L_USER_AGENT, COL_OPTION_VALUES); + move(L_User_Agent, COL_OPTION_VALUES); if (term_options || ch == -1) { addstr((LYUserAgent && *LYUserAgent) ? @@ -1391,15 +1505,15 @@ draw_options: #endif /* !NEVER_ALLOW_REMOTE_EXEC */ if (!LYSelectPopups) { itmp = boolean_choice(itmp, - L_EXEC, -1, + L_Exec, -1, choices); } else { itmp = popup_choice(itmp, - L_EXEC, -1, + L_Exec, -1, choices, 0, (exec_frozen ? TRUE : FALSE)); #if defined(VMS) || defined(USE_SLANG) - move(L_EXEC, COL_OPTION_VALUES); + move(L_Exec, COL_OPTION_VALUES); clrtoeol(); addstr(choices[itmp]); #endif /* VMS || USE_SLANG */ diff --git a/src/LYOptions.h b/src/LYOptions.h index 8855563f..ea9e914a 100644 --- a/src/LYOptions.h +++ b/src/LYOptions.h @@ -26,6 +26,7 @@ extern void edit_bookmarks NOPARAMS; #define L_SSEARCH 7 #define L_LANGUAGE 8 #define L_PREF_CHARSET 9 +#define L_ASSUME_CHARSET (L_PREF_CHARSET + 1) #define L_CHARSET 10 #define L_RAWMODE 11 diff --git a/src/LYPrint.c b/src/LYPrint.c index d6d444ce..5ded8fc1 100644 --- a/src/LYPrint.c +++ b/src/LYPrint.c @@ -227,8 +227,9 @@ PUBLIC int printfile ARGS1( change_sug_filename(filename); if (!(HTisDocumentSource()) && (cp = strrchr(filename, '.')) != NULL) { - format = HTFileFormat(filename, &encoding); - if (!strcasecomp(format->name, "text/html")) { + format = HTFileFormat(filename, &encoding, NULL); + if (!strcasecomp(format->name, "text/html") || + !IsUnityEnc(encoding)) { *cp = '\0'; strcat(filename, ".txt"); } @@ -817,9 +818,9 @@ PUBLIC int printfile ARGS1( #endif /* VMS */ NULL == strchr(cp, '/')) { if (HTisDocumentSource() && - strcasecomp(cp, ".html")) { + strcasecomp(cp, HTML_SUFFIX)) { *cp = '\0'; - strcat(tempfile, ".html"); + strcat(tempfile, HTML_SUFFIX); } else if (!HTisDocumentSource() && strcasecomp(cp, ".txt")) { *cp = '\0'; @@ -876,8 +877,9 @@ PUBLIC int printfile ARGS1( change_sug_filename(filename); if (!(HTisDocumentSource()) && (cp = strrchr(filename, '.')) != NULL) { - format = HTFileFormat(filename, &encoding); - if (!strcasecomp(format->name, "text/html")) { + format = HTFileFormat(filename, &encoding, NULL); + if (!strcasecomp(format->name, "text/html") || + !IsUnityEnc(encoding)) { *cp = '\0'; strcat(filename, ".txt"); } diff --git a/src/LYStrings.h b/src/LYStrings.h index 6432f475..9d26458c 100644 --- a/src/LYStrings.h +++ b/src/LYStrings.h @@ -7,13 +7,19 @@ extern char *strstr(); #endif /* __STRICT_BSD__ */ extern int get_mouse_link NOPARAMS; -extern char * LYstrncpy PARAMS((char *dst, CONST char *src, int n)); +extern char * LYstrncpy PARAMS(( + char * dst, + CONST char * src, + int n)); extern int LYgetch NOPARAMS; -extern int LYgetstr PARAMS((char *inputline, int hidden, - int bufsize, int recall)); -extern char * LYstrstr PARAMS((char *chptr, char *tarptr)); -extern char * LYno_attr_char_strstr PARAMS((char *chptr, char *tarptr)); -extern char * LYno_attr_char_case_strstr PARAMS((char *chptr, char *tarptr)); +extern int LYgetstr PARAMS(( + char * inputline, + int hidden, + int bufsize, + int recall)); +extern char * LYstrstr PARAMS(( + char * chptr, + char * tarptr)); extern char * LYno_attr_mbcs_strstr PARAMS(( char * chptr, @@ -42,9 +48,21 @@ extern char * LYmbcsstrncpy PARAMS(( #define LYmbcsstrncpy(dest,src,n,n_glyphs,enc) LYstrncpy(dest, src, n) #endif -extern char * SNACopy PARAMS((char **dest, CONST char *src, int n)); -extern char * SNACat PARAMS((char **dest, CONST char *src, int n)); +extern char * LYno_attr_char_strstr PARAMS(( + char * chptr, + char * tarptr)); +extern char * LYno_attr_char_case_strstr PARAMS(( + char * chptr, + char * tarptr)); +extern char * SNACopy PARAMS(( + char ** dest, + CONST char * src, + int n)); +extern char * SNACat PARAMS(( + char ** dest, + CONST char * src, + int n)); #define StrnAllocCopy(dest, src, n) SNACopy (&(dest), src, n) #define StrnAllocCat(dest, src, n) SNACat (&(dest), src, n) @@ -123,11 +141,18 @@ typedef struct _EditFieldData { #define LYE_AIX (LYE_UPPER +1) /* Hex 97 */ -extern void LYSetupEdit PARAMS((EditFieldData *edit, char *old, int maxstr, - int maxdsp)); -extern void LYRefreshEdit PARAMS((EditFieldData *edit)); -extern int LYEdit1 PARAMS((EditFieldData *edit, int ch, - int action, BOOL maxMessage)); +extern void LYSetupEdit PARAMS(( + EditFieldData * edit, + char * old, + int maxstr, + int maxdsp)); +extern void LYRefreshEdit PARAMS(( + EditFieldData * edit)); +extern int LYEdit1 PARAMS(( + EditFieldData * edit, + int ch, + int action, + BOOL maxMessage)); extern int current_lineedit; diff --git a/src/LYUtils.c b/src/LYUtils.c index c2d6a13a..58005020 100644 --- a/src/LYUtils.c +++ b/src/LYUtils.c @@ -1882,6 +1882,11 @@ PUBLIC void statusline ARGS1( } clrtoeol(); if (text != NULL) { +#ifdef EXP_CHARTRANS + if (LYCharSet_UC[current_char_set].enc == UCT_ENC_UTF8) { + refresh(); + } +#endif #ifndef USE_COLOR_STYLE lynx_start_status_color (); addstr (buffer); @@ -3184,8 +3189,8 @@ PUBLIC void tempname ARGS2( lynx_temp_space, (int)getpid(), counter-1); remove(namebuffer); sprintf(namebuffer, - "%sL%d%uTMP.html", - lynx_temp_space, (int)getpid(), counter-1); + "%sL%d%uTMP%s", + lynx_temp_space, (int)getpid(), counter-1, HTML_SUFFIX); remove(namebuffer); } } else { @@ -3201,6 +3206,10 @@ PUBLIC void tempname ARGS2( * the count and try again. Otherwise, return * with the name which has the .html suffix * loaded in namebuffer. - FM + * + * Some systems may use .htm instead of .html. This + * should be done consistently by always using HTML_SUFFIX + * where filenames are generated for new local files. - kw */ sprintf(namebuffer, "%sL%d%uTMP.txt", @@ -3227,8 +3236,8 @@ PUBLIC void tempname ARGS2( continue; } sprintf(namebuffer, - "%sL%d%uTMP.html", - lynx_temp_space, (int)getpid(), counter++); + "%sL%d%uTMP%s", + lynx_temp_space, (int)getpid(), counter++, HTML_SUFFIX); if ((fp = fopen(namebuffer, "r")) != NULL) { fclose(fp); if (TRACE) @@ -5380,7 +5389,7 @@ int remove ARGS1(char *, name) * first, before opening it. If the chmod fails because of some reason other * than a non-existent file, there's no point in trying to open it. */ -static FILE *OpenHiddenFile ARGS2(char *, name, char *, mode) +PRIVATE FILE *OpenHiddenFile ARGS2(char *, name, char *, mode) { int save = umask(HIDE_UMASK); FILE *fp = 0; @@ -5395,7 +5404,7 @@ static FILE *OpenHiddenFile ARGS2(char *, name, char *, mode) # endif #endif -FILE *LYNewBinFile ARGS1(char *, name) +PUBLIC FILE *LYNewBinFile ARGS1(char *, name) { #ifdef VMS FILE *fp = fopen (name, "wb", "mbc=32"); @@ -5406,7 +5415,7 @@ FILE *LYNewBinFile ARGS1(char *, name) return fp; } -FILE *LYNewTxtFile ARGS1(char *, name) +PUBLIC FILE *LYNewTxtFile ARGS1(char *, name) { #ifdef VMS FILE *fp = fopen (name, "w", "shr=get"); @@ -5417,7 +5426,7 @@ FILE *LYNewTxtFile ARGS1(char *, name) return fp; } -FILE *LYAppendToTxtFile ARGS1(char *, name) +PUBLIC FILE *LYAppendToTxtFile ARGS1(char *, name) { #ifdef VMS FILE *fp = fopen (name, "a+", "shr=get"); diff --git a/src/LYrcFile.c b/src/LYrcFile.c index e269f7ad..e83d72cb 100644 --- a/src/LYrcFile.c +++ b/src/LYrcFile.c @@ -536,7 +536,15 @@ PUBLIC int save_rc NOPARAMS /* * Header. */ - fprintf(fp, "# Lynx User Defaults File\n\n"); + fprintf(fp, "# Lynx User Defaults File\n#\n\ +# This file contains options saved from the Lynx Options Screen (normally\n\ +# with the '>' key). There is normally no need to edit this file manually,\n\ +# since the defaults here can be controlled from the Options Screen, and the\n\ +# next time options are saved from the Options Screen this file will be\n\ +# completely rewritten. You have been warned...\n\ +# If you are looking for the general configuration file - it is normally\n\ +# called lynx.cfg, and it has different content and a different format.\n\ +# It is not this file.\n\n"); /* * File editor diff --git a/src/UCAux.c b/src/UCAux.c index bb95e915..3ee93929 100644 --- a/src/UCAux.c +++ b/src/UCAux.c @@ -82,6 +82,56 @@ PUBLIC BOOL UCCanTranslateFromTo ARGS2( return (LYCharSet_UC[from].UChndl >= 0); } +/* Returns YES if no tranlation necessary (because charsets +** are equal, are equivalent, etc.) +*/ +PUBLIC BOOL UCNeedNotTranslate ARGS2(int, from, int, to) +{ + CONST char *fromname; + CONST char *toname; + if (from==to) + return YES; + if (from < 0) + return NO; /* ??? */ + if (LYCharSet_UC[from].enc == UCT_ENC_7BIT) { + return YES; /* only 7bit chars */ + } + fromname = LYCharSet_UC[from].MIMEname; + if (0==strcmp(fromname,"x-transparent") || + 0==strcmp(fromname,"us-ascii")) { + return YES; + } + if (to < 0) + return NO; /* ??? */ + if (to==0) { + if (LYCharSet_UC[from].codepoints & (UCT_CP_SUBSETOF_LAT1)) + return YES; + } + toname = LYCharSet_UC[to].MIMEname; + if (0==strcmp(toname,"x-transparent")) { + return YES; + } + if (LYCharSet_UC[to].enc == UCT_ENC_UTF8) { + return NO; + } + if (from==0) { + if (LYCharSet_UC[from].codepoints & (UCT_CP_SUPERSETOF_LAT1)) + return YES; + } + if (LYCharSet_UC[from].enc == UCT_ENC_CJK) { + if (HTCJK == NOCJK) /* use that global flag, for now */ + return NO; + if (HTCJK == JAPANESE && ( + 0==strcmp(fromname,"euc-jp") || + 0==strncmp(fromname,"iso-2022-jp",11) || + 0==strcmp(fromname,"shift_jis") + )) + return YES; /* ??? */ + return NO; /* if not handled by (from==to) above */ + } + return NO; +} + /* * The idea here is that any stage of the stream pipe which is interested * in some charset dependent processing will call this function. @@ -93,8 +143,8 @@ PUBLIC BOOL UCCanTranslateFromTo ARGS2( * output charsets has changed (e.g. by SGML.c stage after HTML.c stage * has processed a META tag). * The global flags (LYRawMode, HTPassEightBitRaw etc.) are currently - * not taken into account here, it's still up to the caller to do something - * about them. + * not taken into account here (except for HTCJK, somewhat), it's still + * up to the caller to do something about them. */ PUBLIC void UCSetTransParams ARGS5( UCTransParams *, pT, diff --git a/src/UCdomap.c b/src/UCdomap.c index 548096d5..61bdfd70 100644 --- a/src/UCdomap.c +++ b/src/UCdomap.c @@ -30,12 +30,21 @@ #include "LYCharSets.h" /* - * Include hash tables & parameters. + * Include tables & parameters. */ #ifdef VMS #include "[.chrtrans]def7_uni.h" #include "[.chrtrans]iso01_uni.h" #include "[.chrtrans]iso02_uni.h" +#include "[.chrtrans]cp437_uni.h" +#include "[.chrtrans]cp850_uni.h" +#include "[.chrtrans]koi8r_uni.h" +#include "[.chrtrans]cp852_uni.h" +#include "[.chrtrans]cp866_uni.h" +#include "[.chrtrans]cp1252_uni.h" +#include "[.chrtrans]cp1250_uni.h" +#include "[.chrtrans]cp1251_uni.h" +#include "[.chrtrans]viscii_uni.h" #include "[.chrtrans]iso03_uni.h" #include "[.chrtrans]iso04_uni.h" #include "[.chrtrans]iso05_uni.h" @@ -44,15 +53,6 @@ #include "[.chrtrans]iso08_uni.h" #include "[.chrtrans]iso09_uni.h" #include "[.chrtrans]iso10_uni.h" -#include "[.chrtrans]koi8r_uni.h" -#include "[.chrtrans]cp437_uni.h" -#include "[.chrtrans]cp850_uni.h" -#include "[.chrtrans]cp852_uni.h" -#include "[.chrtrans]cp866_uni.h" -#include "[.chrtrans]cp1250_uni.h" -#include "[.chrtrans]cp1251_uni.h" -#include "[.chrtrans]cp1252_uni.h" -#include "[.chrtrans]viscii_uni.h" #include "[.chrtrans]utf8_uni.h" #include "[.chrtrans]rfc_suni.h" #include "[.chrtrans]mnemonic_suni.h" @@ -63,23 +63,23 @@ #include "chrtrans/def7_uni.h" #include "chrtrans/iso01_uni.h" #include "chrtrans/iso02_uni.h" -#include "chrtrans/iso03_uni.h" -#include "chrtrans/iso04_uni.h" -#include "chrtrans/iso05_uni.h" -#include "chrtrans/iso06_uni.h" -#include "chrtrans/iso07_uni.h" -#include "chrtrans/iso08_uni.h" -#include "chrtrans/iso09_uni.h" -#include "chrtrans/iso10_uni.h" -#include "chrtrans/koi8r_uni.h" #include "chrtrans/cp437_uni.h" #include "chrtrans/cp850_uni.h" +#include "chrtrans/koi8r_uni.h" #include "chrtrans/cp852_uni.h" #include "chrtrans/cp866_uni.h" #include "chrtrans/cp1250_uni.h" #include "chrtrans/cp1251_uni.h" #include "chrtrans/cp1252_uni.h" #include "chrtrans/viscii_uni.h" +#include "chrtrans/iso03_uni.h" +#include "chrtrans/iso04_uni.h" +#include "chrtrans/iso05_uni.h" +#include "chrtrans/iso06_uni.h" +#include "chrtrans/iso07_uni.h" +#include "chrtrans/iso08_uni.h" +#include "chrtrans/iso09_uni.h" +#include "chrtrans/iso10_uni.h" #include "chrtrans/utf8_uni.h" #include "chrtrans/rfc_suni.h" #include "chrtrans/mnemonic_suni.h" @@ -251,9 +251,12 @@ PRIVATE u16 translations[][256] = { }; static u16 *UC_translate = NULL; -/* The standard kernel character-to-font mappings are not invertible - -- this is just a best effort. */ +PRIVATE struct UC_charset UCInfo[MAXCHARSETS]; +/* + * The standard kernel character-to-font mappings are not invertible + * -- this is just a best effort. + */ #define MAX_GLYPH 512 /* Max possible glyph value */ PRIVATE unsigned char * inv_translate = NULL; @@ -880,8 +883,8 @@ UCconsole_map_init NOARGS * OK now, finally, some stuff that is more specifically for Lynx: - KW */ #ifdef NOTDEFINED -PUBLIC int UCGetcharset_byMIMEname PARAMS((char * UC_MIMEcharset)); -PUBLIC int UCGetcharset_byLYNXname PARAMS((char * UC_LYNXcharset)); +PUBLIC int UCGetcharset_byMIMEname PARAMS((CONST char * UC_MIMEcharset)); +PUBLIC int UCGetcharset_byLYNXname PARAMS((CONST char * UC_LYNXcharset)); #endif /* NOTDEFINED */ PUBLIC int UCTransUniChar ARGS2( @@ -1001,6 +1004,9 @@ PRIVATE int UC_MapGN ARGS2( UCInfo[UChndl].GN = Gn; UC_GNhandles[Gn] = UChndl; } + if (TRACE) + fprintf(stderr,"UC_Map...... Using %i <- %i (%s)\n", + Gn, UChndl, UCInfo[UChndl].MIMEname); UC_con_set_trans(UChndl,Gn,update_flag); return Gn; } @@ -1091,49 +1097,48 @@ PUBLIC long int UCTransToUni ARGS2( return unicode; } -#if 0 /* UNUSED */ PUBLIC int UCReverseTransChar ARGS3(char, ch_out, int, charset_in, int, charset_out) { - int Gn; - int rc; - int UChndl_in, UChndl_out; - u16 * ut; + int Gn; + int rc; + int UChndl_in, UChndl_out; + int i_ch = (unsigned char)ch_out; + u16 * ut; #ifndef UC_NO_SHORTCUTS - if (charset_in == charset_out) - return ch_out; + if (charset_in == charset_out) + return ch_out; #endif /* UC_NO_SHORTCUTS */ if (charset_in < 0) return -11; - if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) - return -11; + if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) + return -11; if (charset_out < 0) return -12; - if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) - return -12; - if (!UCInfo[UChndl_in].num_uni) - return -11; + if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) + return -12; + if (!UCInfo[UChndl_in].num_uni) + return -11; - ut = UCInfo[UChndl_out].unitable; - if (ut == UC_current_unitable) { - if ((Gn = UCInfo[UChndl_in].GN) >= 0) { - UC_translate = set_translate(Gn); - rc = inv_translate[(unsigned int)ch_out]; + ut = UCInfo[UChndl_out].unitable; + if (ut == UC_current_unitable) { + if ((Gn = UCInfo[UChndl_in].GN) >= 0) { + UC_translate = set_translate(Gn); + rc = inv_translate[i_ch]; if (rc >= 32) { return rc; - } + } } else { - Gn = UC_MapGN(UChndl_in,1); - UC_translate = set_translate(Gn); - rc = inv_translate[(unsigned int)ch_out]; + Gn = UC_MapGN(UChndl_in,1); + UC_translate = set_translate(Gn); + rc = inv_translate[i_ch]; if (rc >= 32) { return rc; } + } } - } - return UCTransChar(ch_out, charset_out, charset_in); + return UCTransChar(ch_out, charset_out, charset_in); } -#endif /* UNUSED */ /* * Returns string length, or negative value for error. @@ -1221,7 +1226,7 @@ if (buflen<2) } PRIVATE int UC_FindGN_byMIME ARGS1( - CONST char *, UC_MIMEcharset) + CONST char *, UC_MIMEcharset) { int i; @@ -1272,7 +1277,8 @@ PUBLIC int UCGetLYhndl_byMIME ARGS1( if (!strcmp(UC_MIMEcharset, "utf-8")) { return UCGetLYhndl_byMIME("unicode-1-1-utf-8"); } - if (!strncmp(UC_MIMEcharset, "iso-2022-jp", 11)) { + if (!strncmp(UC_MIMEcharset, "iso-2022-jp", 11) || + !strcmp(UC_MIMEcharset, "x-euc-jp")) { return UCGetLYhndl_byMIME("euc-jp"); } else if (!strcmp(UC_MIMEcharset, "iso-2022-kr")) { return UCGetLYhndl_byMIME("euc-kr"); @@ -1466,8 +1472,8 @@ PRIVATE char ** UC_setup_LYCharSets_repl ARGS2( (*p)[1] = '\0'; #else /* - * Use this instead... make those buggers - * int HTAtoms, so they will be cleaned up + * Use this instead... make those 1-char strings + * into HTAtoms, so they will be cleaned up * at exit... all for the sake of preventing * memory leaks, sigh. */ @@ -1502,9 +1508,9 @@ PRIVATE char ** UC_setup_LYCharSets_repl ARGS2( */ PRIVATE int UC_Register_with_LYCharSets ARGS4( int, s, - CONST char *, UC_MIMEcharset, - CONST char *, UC_LYNXcharset, - int, lowest_eightbit) + CONST char *, UC_MIMEcharset, + CONST char *, UC_LYNXcharset, + int, lowest_eightbit) { int i, LYhndl,found; char ** repl; @@ -1525,14 +1531,14 @@ PRIVATE int UC_Register_with_LYCharSets ARGS4( * after all, this is experimental... */ for (i = 0; i < MAXCHARSETS && LYchar_set_names[i] && LYhndl < 0; i++) { - if (!strcmp(UC_LYNXcharset,LYchar_set_names[i])) { - LYhndl = i; + if (!strcmp(UC_LYNXcharset, LYchar_set_names[i])) { + LYhndl = i; } } for (i = 0; i < MAXCHARSETS && LYchar_set_names[i] && LYhndl < 0; i++) { - if (LYCharSet_UC[i].MIMEname && - !strcmp(UC_MIMEcharset,LYCharSet_UC[i].MIMEname)) { - LYhndl = i; + if (LYCharSet_UC[i].MIMEname && + !strcmp(UC_MIMEcharset, LYCharSet_UC[i].MIMEname)) { + LYhndl = i; } } @@ -1542,7 +1548,7 @@ PRIVATE int UC_Register_with_LYCharSets ARGS4( if (TRACE) { fprintf(stderr, "UC_Register_with_LYCharSets: Too many. Ignoring %s/%s.", - UC_MIMEcharset,UC_LYNXcharset); + UC_MIMEcharset, UC_LYNXcharset); } return -1; } @@ -1556,8 +1562,8 @@ PRIVATE int UC_Register_with_LYCharSets ARGS4( /* * Hmm, try to be conservative here. */ - LYchar_set_names[LYhndl] = UC_LYNXcharset; - LYchar_set_names[LYhndl+1] = NULL; + LYchar_set_names[LYhndl] = UC_LYNXcharset; + LYchar_set_names[LYhndl+1] = NULL; /* * Terminating NULL may be looked for by Lynx code. */ @@ -1605,7 +1611,7 @@ PUBLIC void UC_Charset_Setup ARGS8( int, nnuni, struct unimapdesc_str, replacedesc, int, lowest_eight, - int, UC_rawuni) + int, UC_rawuni) { int s, Gn; int i, status = 0, found; @@ -1613,19 +1619,19 @@ PUBLIC void UC_Charset_Setup ARGS8( /* * Get (new?) slot. */ - found = -1; - for (i=0; i<UCNumCharsets && found<0; i++) { - if (!strcmp(UCInfo[i].MIMEname,UC_MIMEcharset)) { - found = i; - } + found = -1; + for (i = 0; i < UCNumCharsets && found < 0; i++) { + if (!strcmp(UCInfo[i].MIMEname, UC_MIMEcharset)) { + found = i; + } } if (found >= 0) { s = found; } else { if (UCNumCharsets >= MAXCHARSETS) { if (TRACE) { - fprintf(stderr,"UC_Charset_Setup: Too many. Ignoring %s/%s.", - UC_MIMEcharset,UC_LYNXcharset); + fprintf(stderr, "UC_Charset_Setup: Too many. Ignoring %s/%s.", + UC_MIMEcharset, UC_LYNXcharset); } return; } @@ -1674,6 +1680,16 @@ PUBLIC void UCInit NOARGS UC_CHARSET_SETUP; UC_CHARSET_SETUP_iso_8859_1; UC_CHARSET_SETUP_iso_8859_2; + UC_CHARSET_SETUP_cp437; + UC_CHARSET_SETUP_cp850; + UC_CHARSET_SETUP_koi8_r; + + UC_CHARSET_SETUP_cp852; + UC_CHARSET_SETUP_cp866; + UC_CHARSET_SETUP_iso_8859_1_windows_; + UC_CHARSET_SETUP_windows_1250; + UC_CHARSET_SETUP_windows_1251; + UC_CHARSET_SETUP_viscii; UC_CHARSET_SETUP_iso_8859_3; UC_CHARSET_SETUP_iso_8859_4; UC_CHARSET_SETUP_iso_8859_5; @@ -1682,16 +1698,7 @@ PUBLIC void UCInit NOARGS UC_CHARSET_SETUP_iso_8859_8; UC_CHARSET_SETUP_iso_8859_9; UC_CHARSET_SETUP_iso_8859_10; - UC_CHARSET_SETUP_koi8_r; - UC_CHARSET_SETUP_cp437; - UC_CHARSET_SETUP_cp850; - UC_CHARSET_SETUP_cp852; - UC_CHARSET_SETUP_cp866; - UC_CHARSET_SETUP_windows_1250; - UC_CHARSET_SETUP_windows_1251; - UC_CHARSET_SETUP_iso_8859_1_windows_; - UC_CHARSET_SETUP_viscii; UC_CHARSET_SETUP_unicode_1_1_utf_8; UC_CHARSET_SETUP_mnemonic_ascii_0; UC_CHARSET_SETUP_mnemonic; diff --git a/src/UCdomap.h b/src/UCdomap.h index 8c941464..fc5b40c4 100644 --- a/src/UCdomap.h +++ b/src/UCdomap.h @@ -30,29 +30,27 @@ extern void UC_Charset_Setup PARAMS(( int nnuni, struct unimapdesc_str replacedesc, int lowest_eight, - int UC_rawuni)); + int UC_rawuni)); char *UC_GNsetMIMEnames[4] = {"iso-8859-1","x-dec-graphics","cp437","x-transparent"}; int UC_GNhandles[4] = {-1, -1, -1, -1}; struct UC_charset { - CONST char *MIMEname; - CONST char *LYNXname; - u8* unicount; - u16* unitable; - int num_uni; - struct unimapdesc_str replacedesc; - int uc_status; - int LYhndl; - int GN; - int lowest_eight; - int enc; + CONST char *MIMEname; + CONST char *LYNXname; + u8* unicount; + u16* unitable; + int num_uni; + struct unimapdesc_str replacedesc; + int uc_status; + int LYhndl; + int GN; + int lowest_eight; + int enc; }; -PUBLIC struct UC_charset UCInfo[MAXCHARSETS]; - -PUBLIC int UCNumCharsets; +extern int UCNumCharsets; extern void UCInit NOARGS; diff --git a/src/chrtrans/def7_uni.tbl b/src/chrtrans/def7_uni.tbl index a2b5a4b2..0f6b6030 100644 --- a/src/chrtrans/def7_uni.tbl +++ b/src/chrtrans/def7_uni.tbl @@ -1234,7 +1234,8 @@ U+1fdf:?; U+1fed:!: U+1fef:!* U+1ffe:;; -0x20 U+2002 U+2004-U+2006 U+2009 # spaces +0x20 U+2000 U+2002 U+2004-U+2009 # spaces +U+2001: U+2003: U+200e:(->) U+200f:(<-) @@ -1270,6 +1271,7 @@ U+203a:> U+203b::X U+203c:!! U+203e:'- +0x2d U+2043 # HYPHEN BULLET ? U+2044:/ U+2070:^0 U+2074:^4 @@ -1312,6 +1314,7 @@ U+211e:Rx U+2120:(SM) U+2122:(TM) U+2126:Ohm +0x4b U+212A # Kelvin sign - K U+212b:Ang. U+2153: 1/3 U+2154: 2/3 diff --git a/src/chrtrans/makeuctb.c b/src/chrtrans/makeuctb.c index f981f777..62ad102d 100644 --- a/src/chrtrans/makeuctb.c +++ b/src/chrtrans/makeuctb.c @@ -87,6 +87,9 @@ struct unimapdesc_str themap_str = {0, NULL}; char *tblname; +PRIVATE int RawOrEnc = 0; +PRIVATE int Raw_found = 0; /* whether explicit R directive found */ + PRIVATE void addpair_str ARGS2( char *, str, int, un) @@ -138,6 +141,22 @@ PRIVATE void addpair ARGS2( { int i; + if (!Raw_found) { /* enc not (yet) explicitly given with 'R' */ + if (fp >= 128) { + if (RawOrEnc != UCT_ENC_8BIT && RawOrEnc <= UCT_ENC_8859) { + if (fp < 160) { /* cannot be 8859 */ + RawOrEnc = UCT_ENC_8BIT; + } else if (fp != 160 && fp != 173) { + RawOrEnc = UCT_ENC_8859; /* hmmm.. more tests needed? */ + } else if (unicount[fp] == 0 && fp != un) { + /* first unicode for fp doesn't map to itself */ + RawOrEnc = UCT_ENC_8BIT; + } else { + RawOrEnc = UCT_ENC_8859; /* hmmm.. more tests needed? */ + } + } + } + } if (un <= 0xfffe) { /* * Check that it isn't a duplicate. @@ -165,7 +184,6 @@ char this_MIMEcharset[UC_MAXLEN_MIMECSNAME +1]; char this_LYNXcharset[UC_MAXLEN_LYNXCSNAME +1]; char id_append[UC_MAXLEN_ID_APPEND +1] = "_"; int this_isDefaultMap = -1; -int RawUni = 0; int lowest_eight = 999; PUBLIC int main ARGS2( @@ -253,7 +271,8 @@ PUBLIC int main ARGS2( while (*p == ' ' || *p == '\t') { p++; } - RawUni = strtol(p,0,10); + RawOrEnc = strtol(p,0,10); + Raw_found = 1; continue; /* @@ -348,7 +367,11 @@ PUBLIC int main ARGS2( } continue; } - + +/* Input line (after skipping spaces) doesn't start with one + of the specially recognized characters, so try to interpret + it as starting with a fontpos. +*/ fp0 = strtol(p, &p1, 0); if (p1 == p) { fprintf(stderr, "Bad input line: %s\n", buffer); @@ -473,7 +496,7 @@ PUBLIC int main ARGS2( } /* - * Okay, we hit EOF, now output hash table. + * Okay, we hit EOF, now output tables. */ fclose(ctbl); @@ -561,8 +584,8 @@ static u8 dfont_unicount%s[%d] = \n\ * If lowest_eightbit is anything else but 999, * this can't be 7-bit only. */ - if (lowest_eight != 999 && !RawUni) { - RawUni = UCT_ENC_8BIT; + if (lowest_eight != 999 && !RawOrEnc) { + RawOrEnc = UCT_ENC_8BIT; } if (nuni) { @@ -621,10 +644,10 @@ static struct unimapdesc_str dfont_replacedesc%s = {0,NULL};\n",id_append); printf("#define UC_CHARSET_SETUP%s UC_Charset_Setup(\ \"%s\",\\\n\"%s\",\\\n\ -dfont_unicount%s,dfont_unitable%s,%i,\\\n\ -dfont_replacedesc%s,%i,%i)\n", +dfont_unicount%s,dfont_unitable%s,%d,\\\n\ +dfont_replacedesc%s,%d,%d)\n", id_append, this_MIMEcharset, this_LYNXcharset, -id_append, id_append, nuni, id_append, lowest_eight, RawUni); +id_append, id_append, nuni, id_append, lowest_eight, RawOrEnc); exit(EX_OK); } |