about summary refs log tree commit diff stats
path: root/src/HTML.h
blob: 81cbfbcc08f8559e8d79d2b14e2f5a7667c6ebff (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
/*					HTML to rich text converter for libwww
**
**			THE HTML TO RTF OBJECT CONVERTER
**
**  This interprets the HTML semantics.
*/
#ifndef HTML_H
#define HTML_H

#ifndef HTUTILS_H
#include <HTUtils.h>
#endif /* HTUTILS_H */

#include <UCDefs.h>
#include <UCAux.h>
#include <HTAnchor.h>
#include <HTMLDTD.h>

/* #define ATTR_CS_IN (me->T.output_utf8 ? me->UCLYhndl : 0) */
#define ATTR_CS_IN me->tag_charset

#define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \
	LYUCTranslateHTMLString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML)

#define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \
	LYUCTranslateHTMLString(s, cs_from, cs_to, YES, p, h, st_HTML)

#define TRANSLATE_AND_UNESCAPE_ENTITIES6(s,cs_from,cs_to,spcls,p,h) \
	LYUCTranslateHTMLString(s, cs_from, cs_to, spcls, p, h, st_HTML)

#define TRANSLATE_HTML(s,p,h) \
	LYUCFullyTranslateString(s, me->UCLYhndl, current_char_set, NO, YES, p, h, NO, st_HTML)

#define TRANSLATE_HTML5(s,cs_from,cs_to,p,h) \
	LYUCFullyTranslateString(s, cs_from, cs_to, NO, YES, p, h, NO, st_HTML)

#define TRANSLATE_HTML7(s,cs_from,cs_to,spcls,p,h,Back) \
	LYUCFullyTranslateString(s, cs_from, cs_to, NO, spcls, p, h, Back, st_HTML)

/*
 *  Strings from attributes which should be converted to some kind
 *  of "standard" representation (character encoding), was Latin-1,
 *  esp. URLs (incl. #fragments) and HTML NAME and ID stuff.
 */
#define TRANSLATE_AND_UNESCAPE_TO_STD(s) \
	LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_URL)
#define UNESCAPE_FIELDNAME_TO_STD(s) \
	LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_HTML)

extern CONST HTStructuredClass HTMLPresentation;

#ifdef Lynx_HTML_Handler
/*
**	This section is semi-private to HTML.c and it's helper modules. - FM
**	--------------------------------------------------------------------
*/

typedef struct _stack_element {
	HTStyle *	style;
	int		tag_number;
} stack_element;

/*		HTML Object
**		-----------
*/
#define MAX_NESTING 800		/* Should be checked by parser */

struct _HTStructured {
    CONST HTStructuredClass * 	isa;
    HTParentAnchor * 		node_anchor;
    HText * 			text;

    HTStream*			target;			/* Output stream */
    HTStreamClass		targetClass;		/* Output routines */

    HTChildAnchor *		CurrentA;	/* current HTML_A anchor */
    int				CurrentANum;	/* current HTML_A number */
    char *			base_href;	/* current HTML_BASE href */
    char *			map_address;	/* current HTML_MAP address */

    HTChunk 			title;		/* Grow by 128 */
    HTChunk			object;		/* Grow by 128 */
    BOOL			object_started;
    BOOL			object_declare;
    BOOL			object_shapes;
    BOOL			object_ismap;
    char *			object_usemap;
    char *			object_id;
    char *			object_title;
    char *			object_data;
    char *			object_type;
    char *			object_classid;
    char *			object_codebase;
    char *			object_codetype;
    char *			object_name;
    int				objects_mixed_open,
    				objects_figged_open;
    HTChunk			option;		/* Grow by 128 */
    BOOL			first_option;	/* First OPTION in SELECT? */
    char *			LastOptionValue;
    BOOL			LastOptionChecked;
    BOOL			select_disabled;
    HTChunk			textarea;	/* Grow by 128 */
    char *			textarea_name;
    int				textarea_name_cs;
    char *			textarea_accept_cs;
    char *			textarea_cols;
    int 			textarea_rows;
    int				textarea_disabled;
    char *			textarea_id;
    HTChunk			math;		/* Grow by 128 */
    HTChunk			style_block;	/* Grow by 128 */
    HTChunk			script;		/* Grow by 128 */

    /*
     *  Used for nested lists. - FM
     */
    int		List_Nesting_Level;	/* counter for list nesting level */
    int 	OL_Counter[12];		/* counter for ordered lists */
    char 	OL_Type[12];		/* types for ordered lists */
    int 	Last_OL_Count;		/* last count in ordered lists */
    char 	Last_OL_Type;		/* last type in ordered lists */

    int				Division_Level;
    short			DivisionAlignments[MAX_NESTING];
    int				Underline_Level;
    int				Quote_Level;

    BOOL			UsePlainSpace;
    BOOL			HiddenValue;
    int				lastraw;

    char *			comment_start;	/* for literate programming */
    char *			comment_end;

    HTTag *			current_tag;
    BOOL			style_change;
    HTStyle *			new_style;
    HTStyle *			old_style;
    int				current_default_alignment;
    BOOL			in_word;  /* Have just had a non-white char */
    stack_element 	stack[MAX_NESTING];
    stack_element 	*sp;		/* Style stack pointer */
    BOOL		stack_overrun;	/* Was MAX_NESTING exceeded? */
    int			skip_stack; /* flag to skip next style stack operation */

    /*
    **  Track if we are in an anchor, paragraph, address, base, etc.
    */
    BOOL		inA;
    BOOL		inAPPLET;
    BOOL		inAPPLETwithP;
    BOOL		inBadBASE;
    BOOL		inBadHREF;
    BOOL		inBadHTML;
    BOOL		inBASE;
    BOOL		inBoldA;
    BOOL		inBoldH;
    BOOL		inCAPTION;
    BOOL		inCREDIT;
    BOOL		inFIG;
    BOOL		inFIGwithP;
    BOOL		inFONT;
    BOOL		inFORM;
    BOOL		inLABEL;
    BOOL		inP;
    BOOL		inPRE;
    BOOL		inSELECT;
    BOOL		inTABLE;
    BOOL		inTEXTAREA;
    BOOL		inUnderline;

    BOOL		needBoldH;

    char *		xinclude; /* if no include strin address passed */
    /*
    **  UCI and UCLYhndl give the UCInfo and charset registered for
    **  the HTML parser in the node_anchor's UCStages structure.  It
    **  indicates what is fed to the HTML parser as the stream of character
    **  data (not necessarily tags and attributes).  It should currently
    **  always be set to be the same as UCI and UCLhndl for the HTEXT stage
    **  in the node_anchor's UCStages structure, since the HTML parser sends
    **  its input character data to the output without further charset
    **  translation.
    */
    LYUCcharset	*	UCI;
    int			UCLYhndl;
    /*
    **  inUCI and inUCLYhndl indicate the UCInfo and charset which the
    **  HTML parser treats at the input charset.  It is normally set
    **  to the UCI and UCLhndl for the SGML parser in the node_anchor's
    **  UCStages structure (which may be a dummy, based on the MIME
    **  parser's UCI and UCLhndl in that structure, when we are handling
    **  a local file or non-http(s) gateway).  It could be changed
    **  temporarily by the HTML parser, for conversions of attribute
    **  strings, but should be reset once done. - FM
    */
    LYUCcharset	*	inUCI;
    int			inUCLYhndl;
    /*
    **  outUCI and outUCLYhndl indicate the UCInfo and charset which
    **  the HTML parser treats as the output charset.  It is normally
    **  set to its own UCI and UCLhndl.  It could be changed for
    **  conversions of attribute strings, but should be reset once
    **  done. - FM
    */
    LYUCcharset	*	outUCI;
    int			outUCLYhndl;
    /*
    **  T holds the transformation rules for conversions of strings
    **  between the input and output charsets by the HTML parser. - FM
    */
    UCTransParams	T;

    int 		tag_charset; /* charset for attribute values etc. */
};

extern  HTStyle *LYstyles PARAMS((int style_number));
extern	BOOL LYBadHTML PARAMS((HTStructured *me)); 

/*
 *	Semi-Private functions. - FM
 */
extern void HTML_put_character PARAMS((HTStructured *me, char c));
extern void HTML_put_string PARAMS((HTStructured *me, CONST char *s));
extern void HTML_write PARAMS((HTStructured *me, CONST char *s, int l));
extern int HTML_put_entity PARAMS((HTStructured *me, int entity_number));
extern void actually_set_style PARAMS((HTStructured * me));

/*	Style buffering avoids dummy paragraph begin/ends.
*/
#define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
#endif /* Lynx_HTML_Handler */

extern void strtolower PARAMS((char* i));

/*				P U B L I C
*/

/*
**  HTConverter to present HTML
*/
extern HTStream* HTMLToPlain PARAMS((
	HTPresentation *	pres,
	HTParentAnchor *	anchor,
	HTStream *		sink));

extern HTStream* HTMLParsedPresent PARAMS((
	HTPresentation *	pres,
	HTParentAnchor *	anchor,
	HTStream *		sink));

extern HTStream* HTMLToC PARAMS((
	HTPresentation *	pres,
	HTParentAnchor *	anchor,
	HTStream *		sink));

extern HTStream* HTMLPresent PARAMS((
	HTPresentation *	pres,
	HTParentAnchor *	anchor,
	HTStream *		sink));

extern HTStructured* HTML_new PARAMS((
	HTParentAnchor * anchor,
	HTFormat	format_out,
	HTStream *	target));

/*
**  Record error message as a hypertext object.
**
**  The error message should be marked as an error so that it can be
**  reloaded later.  This implementation just throws up an error message
**  and leaves the document unloaded.
**
**  On entry,
**      sink    is a stream to the output device if any
**      number  is the HTTP error number
**      message is the human readable message.
**  On exit,
**      a retrun code like HT_LOADED if object exists else 60; 0
*/
extern int HTLoadError PARAMS((
	HTStream *	sink,
	int		number,
	CONST char *	message));

#endif /* HTML_H */