/* * $LynxId: LYCharUtils.c,v 1.109 2010/11/07 21:21:01 tom Exp $ * * Functions associated with LYCharSets.c and the Lynx version of HTML.c - FM * ========================================================================== */ #include #include #define Lynx_HTML_Handler #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Used for nested lists. - FM */ int OL_CONTINUE = -29999; /* flag for whether CONTINUE is set */ int OL_VOID = -29998; /* flag for whether a count is set */ /* * This function converts any ampersands in allocated * strings to "&". If isTITLE is TRUE, it also * converts any angle-brackets to "<" or ">". - FM */ void LYEntify(char **str, int isTITLE) { char *p = *str; char *q = NULL, *cp = NULL; int amps = 0, lts = 0, gts = 0; #ifdef CJK_EX enum _state { S_text, S_esc, S_dollar, S_paren, S_nonascii_text, S_dollar_paren } state = S_text; int in_sjis = 0; #endif if (isEmpty(p)) return; /* * Count the ampersands. - FM */ while ((*p != '\0') && (q = strchr(p, '&')) != NULL) { amps++; p = (q + 1); } /* * Count the left-angle-brackets, if needed. - FM */ if (isTITLE == TRUE) { p = *str; while ((*p != '\0') && (q = strchr(p, '<')) != NULL) { lts++; p = (q + 1); } } /* * Count the right-angle-brackets, if needed. - FM */ if (isTITLE == TRUE) { p = *str; while ((*p != '\0') && (q = strchr(p, '>')) != NULL) { gts++; p = (q + 1); } } /* * Check whether we need to convert anything. - FM */ if (amps == 0 && lts == 0 && gts == 0) return; /* * Allocate space and convert. - FM */ q = typecallocn(char, (strlen(*str) + (unsigned)(4 * amps) + (unsigned)(3 * lts) + (unsigned)(3 * gts) + 1)); if ((cp = q) == NULL) outofmem(__FILE__, "LYEntify"); assert(cp != NULL); assert(q != NULL); for (p = *str; *p; p++) { #ifdef CJK_EX if (IS_CJK_TTY) { switch (state) { case S_text: if (*p == '\033') { state = S_esc; *q++ = *p; continue; } break; case S_esc: if (*p == '$') { state = S_dollar; *q++ = *p; continue; } else if (*p == '(') { state = S_paren; *q++ = *p; continue; } else { state = S_text; *q++ = *p; continue; } case S_dollar: if (*p == '@' || *p == 'B' || *p == 'A') { state = S_nonascii_text; *q++ = *p; continue; } else if (*p == '(') { state = S_dollar_paren; *q++ = *p; continue; } else { state = S_text; *q++ = *p; continue; } case S_dollar_paren: if (*p == 'C') { state = S_nonascii_text; *q++ = *p; continue; } else { state = S_text; *q++ = *p; continue; } case S_paren: if (*p == 'B' || *p == 'J' || *p == 'T') { state = S_text; *q++ = *p; continue; } else if (*p == 'I') { state = S_nonascii_text; *q++ = *p; continue; } /* FALLTHRU */ case S_nonascii_text: if (*p == '\033') state = S_esc; *q++ = *p; continue; default: break; } if (*(p + 1) != '\0' && (IS_EUC(UCH(*p), UCH(*(p + 1))) || IS_SJIS(UCH(*p), UCH(*(p + 1)), in_sjis) || IS_BIG5(UCH(*p), UCH(*(p + 1))))) { *q++ = *p++; *q++ = *p; continue; } } #endif if (*p == '&') { *q++ = '&'; *q++ = 'a'; *q++ = 'm'; *q++ = 'p'; *q++ = ';'; } else if (isTITLE && *p == '<') { *q++ = '&'; *q++ = 'l'; *q++ = 't'; *q++ = ';'; } else if (isTITLE && *p == '>') { *q++ = '&'; *q++ = 'g'; *q++ = 't'; *q++ = ';'; } else { *q++ = *p; } } *q = '\0'; FREE(*str); *str = cp; } /* * Callers to LYEntifyTitle/LYEntifyValue do not look at the 'target' param. * Optimize things a little by avoiding the memory allocation if not needed, * as is usually the case. */ static BOOL MustEntify(const char *source) { BOOL result; #ifdef CJK_EX if (IS_CJK_TTY && strchr(source, '\033') != 0) { result = TRUE; } else #endif { size_t length = strlen(source); size_t reject = strcspn(source, "<&>"); result = (BOOL) (length != reject); } return result; } /* * Wrappers for LYEntify() which do not assume that the source was allocated, * e.g., output from gettext(). */ const char *LYEntifyTitle(char **target, const char *source) { const char *result = 0; if (MustEntify(source)) { StrAllocCopy(*target, source); LYEntify(target, TRUE); result = *target; } else { result = source; } return result; } const char *LYEntifyValue(char **target, const char *source) { const char *result = 0; if (MustEntify(source)) { StrAllocCopy(*target, source); LYEntify(target, FALSE); result = *target; } else { result = source; } return result; } /* * This function trims characters <= that of a space (32), * including HT_NON_BREAK_SPACE (1) and HT_EN_SPACE (2), * but not ESC, from the heads of strings. - FM */ void LYTrimHead(char *str) { const char *s = str; if (isEmpty(s)) return; while (*s && WHITE(*s) && UCH(*s) != UCH(CH_ESC)) /* S/390 -- gil -- 1669 */ s++; if (s > str) { char *ns = str; while (*s) { *ns++ = *s++; } *ns = '\0'; } } /* * This function trims characters <= that of a space (32), * including HT_NON_BREAK_SPACE (1), HT_EN_SPACE (2), and * ESC from the tails of strings. - FM */ void LYTrimTail(char *str) { int i; if (isEmpty(str)) return; i = (int) strlen(str) - 1; while (i >= 0) { if (WHITE(str[i])) str[i] = '\0'; else break; i--; } } /* * This function should receive a pointer to the start * of a comment. It returns a pointer to the end ('>') * character of comment, or it's best guess if the comment * is invalid. - FM */ char *LYFindEndOfComment(char *str) { char *cp, *cp1; enum comment_state { start1, start2, end1, end2 } state; if (str == NULL) /* * We got NULL, so return NULL. - FM */ return NULL; if (StrNCmp(str, "