/* Plain text object HTWrite.c
** =================
**
** This version of the stream object just writes to a socket.
** The socket is assumed open and left open.
**
** Bugs:
** strings written must be less than buffer size.
*/
#include "HTUtils.h"
#include "HTPlain.h"
#define BUFFER_SIZE 4096; /* Tradeoff */
#include "HText.h"
#include "HTStyle.h"
#include "HTMLDTD.h"
#include "HTCJK.h"
#ifdef EXP_CHARTRANS
#include "UCMap.h"
#include "UCDefs.h"
#include "UCAux.h"
#endif /* EXP_CHARTRANS */
#include "LYLeaks.h"
#define FREE(x) if (x) {free(x); x = NULL;}
extern HTStyleSheet * styleSheet;
extern int current_char_set;
extern char * LYchar_set_names[];
extern CONST char **LYCharSets[];
#ifdef EXP_CHARTRANS
extern int LYlowest_eightbit[];
#endif /* EXP_CHARTRANS */
extern CONST char * HTMLGetEntityName PARAMS((int i));
extern BOOL HTPassEightBitRaw;
extern BOOL HTPassHighCtrlRaw;
extern HTCJKlang HTCJK;
PUBLIC int HTPlain_lastraw = -1;
/* HTML Object
** -----------
*/
struct _HTStream {
CONST HTStreamClass * isa;
HText * text;
#ifdef EXP_CHARTRANS
LYUCcharset * UCI; /* pointer to node_anchor's UCInfo */
int in_char_set; /* tells us what charset we are fed */
int htext_char_set; /* what charset feed to HText */
char utf_count;
long utf_char;
char utf_buf[7];
char * utf_buf_p;
UCTransParams T;
#endif /* EXP_CHARTRANS */
};
#ifdef EXP_CHARTRANS
PRIVATE char replace_buf [61]; /* buffer for replacement strings */
PRIVATE void HTPlain_getChartransInfo ARGS2(
HTStream *, me,
HTParentAnchor *, anchor)
{
if (me->in_char_set < 0) {
HTAnchor_copyUCInfoStage(anchor, UCT_STAGE_PARSER, UCT_STAGE_MIME,
UCT_SETBY_PARSER);
me->in_char_set = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER);
}
if (me->htext_char_set < 0) {
int chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);
if (chndl < 0) {
chndl = current_char_set;
HTAnchor_setUCInfoStage(anchor, chndl,
UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT);
}
HTAnchor_setUCInfoStage(anchor, chndl,
UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT);
me->htext_char_set = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);
}
me->UCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER);
}
#endif /* EXP_CHARTRANS */
/* Write the buffer out to the socket
** ----------------------------------
*/
/*_________________________________________________________________________
**
** A C T I O N R O U T I N E S
*/
#ifdef EXP_CHARTRANS
/* for forward reference to HTPlain_write - kw */
#ifdef _WINDOWS
PRIVATE void HTPlain_write (HTStream * me, CONST char* s, int l);
#else
PRIVATE void HTPlain_write PARAMS((
HTStream * me,
CONST char * s,
int l));
#endif /* _WINDOWS */
#endif /* EXP_CHARTRANS */
/* Character handling
** ------------------
*/
PRIVATE void HTPlain_put_character ARGS2(
HTStream *, me,
char, c)
{
#ifdef REMOVE_CR_ONLY
/*
** Throw away \r's.
*/
if (c != '\r') {
HText_appendCharacter(me->text, c);
}
#else
/*
** See HTPlain_write() for explanations of the following code
** (we've been called via HTPlain_put_string() to do for each
** character of a terminated string what HTPlain_write() does
** via a while loop for each character in a stream of given
** length). - FM
*/
if ((HTPlain_lastraw == '\r') && c == '\n') {
HTPlain_lastraw = -1;
return;
}
HTPlain_lastraw = c;
if (c == '\r') {
HText_appendCharacter(me->text, '\n');
#ifdef EXP_CHARTRANS
/* for now don't repeat everything here that has been done below - kw*/
} else if ((unsigned char)c >= 127) {
HTPlain_write(me, &c, 1);
#endif
} else if (HTCJK != NOCJK) {
HText_appendCharacter(me->text, c);
} else if ((unsigned char)c >= 127 && (unsigned char)c < 161 &&
HTPassHighCtrlRaw) {
HText_appendCharacter(me->text, c);
} else if ((unsigned char)c == 160) {
HText_appendCharacter(me->text, ' ');
} else if ((unsigned char)c == 173) {
return;
} else if (((unsigned char)c >= 32 && (unsigned char)c < 127) ||
c == '\n' || c == '\t') {
HText_appendCharacter(me->text, c);
} else if ((unsigned char)c > 160) {
if (!HTPassEightBitRaw &&
strncmp(LYchar_set_names[current_char_set], "ISO Latin 1", 11)) {
int len, high, low, i, diff = 1;
CONST char * name;
int value = (int)((unsigned char)c - 160);
name = HTMLGetEntityName(value);
len = strlen(name);
for (low = 0, high = HTML_dtd.number_of_entities;
high > low;
diff < 0 ? (low = i+1) : (high = i)) {
/* Binary search */
i = (low + (high-low)/2);
diff = strncmp(HTML_dtd.entity_names[i], name, len);
if (diff == 0) {
HText_appendText(me->text,
LYCharSets[current_char_set][i]);
break;
}
}
if (diff) {
HText_appendCharacter(me->text, c);
}
} else {
HText_appendCharacter(me->text, c);
}
}
#endif /* REMOVE_CR_ONLY */
}
/* String handling
** ---------------
**
*/
PRIVATE void HTPlain_put_string ARGS2(HTStream *, me, CONST char*, s)
{
#ifdef REMOVE_CR_ONLY
HText_appendText(me->text, s);
#else
CONST char * p;
if (s == NULL)
return;
for (p = s; *p; p++) {
HTPlain_put_character(me, *p);
}
#endif /* REMOVE_CR_ONLY */
}
/*
** Entry function for displayed text/plain and WWW_SOURCE strings. - FM
** ---------------------------------------------------------------
*/
PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
{
CONST char * p;
CONST char * e = s+l;
#ifdef EXP_CHARTRANS
BOOL chk;
long unsign_c, uck;
char c_p;
#endif /* EXP_CHARTRANS */
for (p = s; p < e; p++) {
#ifdef REMOVE_CR_ONLY
/*
** Append the whole string, but remove any \r's. - FM
*/
if (*p != '\r') {
HText_appendCharacter(me->text, *p);
}
#else
/*
** Try to handle lone LFs, CRLFs and lone CRs
** as newline, and to deal with control, ASCII,
** and 8-bit characters based on best guesses
** of what's appropriate. - FM
*/
if ((HTPlain_lastraw == '\r') && *p == '\n') {
HTPlain_lastraw = -1;
continue;
}
HTPlain_lastraw = *p;
if (*p == '\r') {
HText_appendCharacter(me->text, '\n');
continue;
}
#ifdef EXP_CHARTRANS
unsign_c = (unsigned char)(*p);
c_p = *p;
if (me->T.decode_utf8) {
/*
** Combine UTF-8 into Unicode.
** Incomplete characters silently ignored.
** from Linux kernel's console.c
*/
if ((unsigned char)(*p) > 0x7f) {
if (me->utf_count > 0 && (*p & 0xc0) == 0x80) {
me->utf_char = (me->utf_char << 6) | (*p & 0x3f);
me->utf_count--;
*(me->utf_buf_p++) = *p;
if (me->utf_count == 0) {
*(me->utf_buf_p) = '\0';
unsign_c = me->utf_char;
if (unsign_c<256) c_p = (char)unsign_c;
}
else continue; /* iterate for more */
} else {
me->utf_buf_p = me->utf_buf;
*(me->utf_buf_p++) = *p;
if ((*p & 0xe0) == 0xc0) {
me->utf_count = 1;
me->utf_char = (*p & 0x1f);
} else if ((*p & 0xf0) == 0xe0) {
me->utf_count = 2;
me->utf_char = (*p & 0x0f);
} else if ((*p & 0xf8) == 0xf0) {
me->utf_count = 3;
me->utf_char = (*p & 0x07);
} else if ((*p & 0xfc) == 0xf8) {
me->utf_count = 4;
me->utf_char = (*p & 0x03);
} else if ((*p & 0xfe) == 0xfc) {
me->utf_count = 5;
me->utf_char = (*p & 0x01);
} else { /* garbage */
me->utf_count = 0;
me->utf_buf_p = me->utf_buf;
*(me->utf_buf_p) = '\0';
}
continue; /* iterate for more */
}
} else { /* got an ASCII char */
me->utf_count = 0;
me->utf_buf_p = me->utf_buf;
*(me->utf_buf_p) = '\0';
}
}
if (me->T.trans_to_uni && unsign_c >= 127) {
unsign_c = UCTransToUni(c_p, me->in_char_set);
if (unsign_c > 0) {
if (unsign_c < 256) {
c_p = (char)unsign_c;
}
}
}
/*
** At this point we have either unsign_c in Unicode
** (and c in latin1 if unsign_c is in the latin1 range),
** or unsign_c and c will have to be passed raw.
*/
#else
#define unsign_c (unsigned char)*p
#define c_p *p
#endif /* EXP_CHARTRANS */
/*
** If CJK mode is on, we'll assume the document matches
** the user's selected character set, and if not, the
** user should toggle off raw/CJK mode to reload. - FM
*/
if (HTCJK != NOCJK) {
HText_appendCharacter(me->text, c_p);
#ifndef EXP_CHARTRANS
#define PASSHICTRL HTPassHighCtrlRaw
#define PASS8859SPECL HTPassHighCtrlRaw
#define PASSHI8BIT HTPassEightBitRaw
#else
#define PASSHICTRL (me->T.transp || \
unsign_c >= LYlowest_eightbit[me->in_char_set])
#define PASS8859SPECL me->T.pass_160_173_raw
#define PASSHI8BIT (HTPassEightBitRaw || \
(me->T.do_8bitraw && !me->T.trans_from_uni))
#endif /* EXP_CHARTRANS */
/*
** If HTPassHighCtrlRaw is set (e.g., for KOI8-R) assume the
** document matches and pass 127-160 8-bit characters. If it
** doesn't match, the user should toggle raw/CJK mode off. - FM
*/
} else if (unsign_c >= 127 && unsign_c < 161 &&
PASSHICTRL && PASS8859SPECL) {
HText_appendCharacter(me->text, *p);
} else if (unsign_c == 173 && PASS8859SPECL) {
HText_appendCharacter(me->text, *p);
/*
** If neither HTPassHighCtrlRaw nor CJK is set, play it safe
** and treat 160 (nbsp) as an ASCII space (32). - FM
*/
} else if (unsign_c == 160) {
HText_appendCharacter(me->text, ' ');
/*
** If neither HTPassHighCtrlRaw nor CJK is set, play it safe
** and ignore 173 (shy). - FM
*/
} else if (unsign_c == 173) {
continue;
#ifdef EXP_CHARTRANS
} else if (me->T.strip_raw_char_in &&
(unsigned char)*p >= 0xc0 &&
(unsigned char)*p < 255) {
/*
** KOI special: strip high bit, gives
** (somewhat) readable ASCII.
*/
HText_appendCharacter(me->text, (char)(*p & 0x7f));
#endif /* EXP_CHARTRANS */
/*
** If we get to here, pass the displayable ASCII characters. - FM
*/
} else if ((unsign_c >= 32 && unsign_c < 127) ||
#ifdef EXP_CHARTRANS
(PASSHI8BIT && c_p>=LYlowest_eightbit[me->htext_char_set])||
#endif
*p == '\n' || *p == '\t') {
HText_appendCharacter(me->text, c_p);
#ifdef EXP_CHARTRANS
} else if (me->T.use_raw_char_in) {
HText_appendCharacter(me->text, *p);
/******************************************************************
* I. LATIN-1 OR UCS2 TO DISPLAY CHARSET
******************************************************************/
} else if ((chk = (me->T.trans_from_uni && unsign_c >= 160)) &&
(uck = UCTransUniChar(unsign_c,
me->htext_char_set)) >= 32 &&
uck < 256) {
if (TRACE) {
fprintf(stderr,
"UCTransUniChar returned 0x%lx:'%c'.\n",
uck, (char)uck);
}
HText_appendCharacter(me->text, (char)(uck & 0xff));
} else if (chk && (uck == -4) &&
/*
** Not found; look for replacement string.
*/
(uck = UCTransUniCharStr(replace_buf,60, unsign_c,
me->htext_char_set, 0) >= 0)) {
/*
** No further tests for valididy - assume that whoever
** defined replacement strings knew what she was doing.
*/
HText_appendText(me->text, replace_buf);
/*
** If we get to here, and should have translated,
** translation has failed so far.
*/
} else if (chk && unsign_c > 127 && me->T.output_utf8 &&
*me->utf_buf) {
HText_appendText(me->text, me->utf_buf);
me->utf_buf_p = me->utf_buf;
*(me->utf_buf_p) = '\0';
} else if (me->T.trans_from_uni && unsign_c > 255) {
sprintf(replace_buf, "U%.2lx", unsign_c);
HText_appendText(me->text, replace_buf);
#endif /* EXP_CHARTRANS */
/*
** If we get to here and HTPassEightBitRaw or the
** selected character set is not "ISO Latin 1",
** use the translation tables for 161-255 8-bit
** characters (173 was handled above). - FM
*/
} else if (unsign_c > 160) {
if (!HTPassEightBitRaw && unsign_c <= 255 &&
strncmp(LYchar_set_names[current_char_set],
"ISO Latin 1", 11)) {
/*
** Attempt to translate. - FM
*/
int len, high, low, i, diff=1;
CONST char * name;
int value = (int)(unsign_c - 160);
name = HTMLGetEntityName(value);
len = strlen(name);
for(low = 0, high = HTML_dtd.number_of_entities;
high > low;
diff < 0 ? (low = i+1) : (high = i)) {
/* Binary search */
i = (low + (high-low)/2);
diff = strncmp(HTML_dtd.entity_names[i], name, len);
if (diff == 0) {
HText_appendText(me->text,
LYCharSets[current_char_set][i]);
break;
}
}
if (diff) {
/*
** Something went wrong in the translation, so
** either output as UTF8 or a hex representation or
** pass the raw character and hope it's OK.
*/
#ifdef EXP_CHARTRANS
if (!PASSHI8BIT)
c_p = (char)unsign_c;
if (me->T.output_utf8 &&
*me->utf_buf) {
HText_appendText(me->text, me->utf_buf);
me->utf_buf_p = me->utf_buf;
*(me->utf_buf_p) = '\0';
} else if (me->T.trans_from_uni) {
sprintf(replace_buf,"U%.2lx",unsign_c);
HText_appendText(me->text, replace_buf);
} else
#endif /* EXP_CHARTRANS */
HText_appendCharacter(me->text, c_p);
}
} else {
/*
** Didn't attempt a translation. - FM
*/
#ifdef EXP_CHARTRANS
/* either output as UTF8 or a hex representation or
** pass the raw character and hope it's OK.
*/
if (unsign_c <= 255 && !PASSHI8BIT)
c_p = (char)unsign_c;
if (unsign_c > 127 &&
me->T.output_utf8 &&
*me->utf_buf) {
HText_appendText(me->text, me->utf_buf);
me->utf_buf_p = me->utf_buf;
*(me->utf_buf_p) = '\0';
} else if (me->T.trans_from_uni && unsign_c >= 127) {
sprintf(replace_buf,"U%.2lx",unsign_c);
HText_appendText(me->text, replace_buf);
} else
#endif /* EXP_CHARTRANS */
HText_appendCharacter(me->text, c_p);
}
}
#endif /* REMOVE_CR_ONLY */
}
}
/* Free an HTML object
** -------------------
**
** Note that the SGML parsing context is freed, but the created object is
** not, as it takes on an existence of its own unless explicitly freed.
*/
PRIVATE void HTPlain_free ARGS1(
HTStream *, me)
{
FREE(me);
}
/* End writing
*/
PRIVATE void HTPlain_abort ARGS2(
HTStream *, me,
HTError, e)
{
HTPlain_free(me);
}
/* Structured Object Class
** -----------------------
*/
PUBLIC CONST HTStreamClass HTPlain =
{
"SocketWriter",
HTPlain_free,
HTPlain_abort,
HTPlain_put_character, HTPlain_put_string, HTPlain_write,
};
/* New object
** ----------
*/
PUBLIC HTStream* HTPlainPresent ARGS3(
HTPresentation *, pres,
HTParentAnchor *, anchor,
HTStream *, sink)
{
HTStream* me = (HTStream*)malloc(sizeof(*me));
if (me == NULL)
outofmem(__FILE__, "HTPlain_new");
me->isa = &HTPlain;
HTPlain_lastraw = -1;
#ifdef EXP_CHARTRANS
me->utf_count = 0;
me->utf_char = 0;
me->utf_buf[0] = me->utf_buf[6] = '\0';
me->utf_buf_p = me->utf_buf;
me->htext_char_set =
HTAnchor_getUCLYhndl(anchor,UCT_STAGE_HTEXT);
me->in_char_set = HTAnchor_getUCLYhndl(anchor,UCT_STAGE_PARSER);
HTPlain_getChartransInfo(me, anchor);
UCSetTransParams(&me->T,
me->in_char_set, me->UCI,
me->htext_char_set,
HTAnchor_getUCInfoStage(anchor,UCT_STAGE_HTEXT));
#endif /* EXP_CHARTRANS */
me->text = HText_new(anchor);
HText_setStyle(me->text, HTStyleNamed(styleSheet, "Example"));
HText_beginAppend(me->text);
return (HTStream*) me;
}