1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
|
/* HTML to rich text converter for libwww
**
** THE HTML TO RTF OBJECT CONVERTER
**
** This interprets the HTML semantics.
*/
#ifndef HTML_H
#define HTML_H
#ifdef EXP_CHARTRANS
#include "UCDefs.h"
#include "UCAux.h"
#endif
#ifndef HTUTILS_H
#include "HTUtils.h"
#endif /* HTUTILS_H */
#include "HTAnchor.h"
#include "HTMLDTD.h"
#ifdef SHORT_NAMES
#define HTMLPresentation HTMLPren
#define HTMLPresent HTMLPres
#endif /* SHORT_NAMES */
extern CONST HTStructuredClass HTMLPresentation;
#ifdef Lynx_HTML_Handler
/*
** This section is semi-private to HTML.c and it's helper modules. - FM
** --------------------------------------------------------------------
*/
typedef struct _stack_element {
HTStyle * style;
int tag_number;
} stack_element;
/* HTML Object
** -----------
*/
#define MAX_NESTING 800 /* Should be checked by parser */
struct _HTStructured {
CONST HTStructuredClass * isa;
HTParentAnchor * node_anchor;
HText * text;
HTStream* target; /* Output stream */
HTStreamClass targetClass; /* Output routines */
HTChildAnchor * CurrentA; /* current HTML_A anchor */
int CurrentANum; /* current HTML_A number */
char * base_href; /* current HTML_BASE href */
char * map_address; /* current HTML_MAP address */
HTChunk title; /* Grow by 128 */
HTChunk object; /* Grow by 128 */
BOOL object_started;
BOOL object_declare;
BOOL object_shapes;
BOOL object_ismap;
char * object_usemap;
char * object_id;
char * object_title;
char * object_data;
char * object_type;
char * object_classid;
char * object_codebase;
char * object_codetype;
char * object_name;
HTChunk option; /* Grow by 128 */
BOOL first_option; /* First OPTION in SELECT? */
char * LastOptionValue;
BOOL LastOptionChecked;
BOOL select_disabled;
HTChunk textarea; /* Grow by 128 */
char * textarea_name;
int textarea_name_cs;
char * textarea_accept_cs;
char * textarea_cols;
int textarea_rows;
int textarea_disabled;
char * textarea_id;
HTChunk math; /* Grow by 128 */
HTChunk style_block; /* Grow by 128 */
HTChunk script; /* Grow by 128 */
/*
* Used for nested lists. - FM
*/
int List_Nesting_Level; /* counter for list nesting level */
int OL_Counter[12]; /* counter for ordered lists */
char OL_Type[12]; /* types for ordered lists */
int Last_OL_Count; /* last count in ordered lists */
char Last_OL_Type; /* last type in ordered lists */
int Division_Level;
short DivisionAlignments[MAX_NESTING];
int Underline_Level;
int Quote_Level;
BOOL UsePlainSpace;
BOOL HiddenValue;
int lastraw;
char * comment_start; /* for literate programming */
char * comment_end;
HTTag * current_tag;
BOOL style_change;
HTStyle * new_style;
HTStyle * old_style;
int current_default_alignment;
BOOL in_word; /* Have just had a non-white char */
stack_element stack[MAX_NESTING];
stack_element *sp; /* Style stack pointer */
BOOL stack_overrun; /* Was MAX_NESTING exceeded? */
int skip_stack; /* flag to skip next style stack operation */
/*
** Track if we are in an anchor, paragraph, address, base, etc.
*/
BOOL inA;
BOOL inAPPLET;
BOOL inAPPLETwithP;
BOOL inBadBASE;
BOOL inBadHREF;
BOOL inBadHTML;
BOOL inBASE;
BOOL inBoldA;
BOOL inBoldH;
BOOL inCAPTION;
BOOL inCREDIT;
BOOL inFIG;
BOOL inFIGwithP;
BOOL inFONT;
BOOL inFORM;
BOOL inLABEL;
BOOL inP;
BOOL inPRE;
BOOL inSELECT;
BOOL inTABLE;
BOOL inTEXTAREA;
BOOL inUnderline;
BOOL needBoldH;
/*
** UCI and UCLYhndl give the UCInfo and charset registered for
** the HTML parser in the node_anchor's UCStages structure. It
** indicates what is fed to the HTML parser as the stream of character
** data (not necessarily tags and attributes). It should currently
** always be set to be the same as UCI and UCLhndl for the HTEXT stage
** in the node_anchor's UCStages structure, since the HTML parser sends
** its input character data to the output without further charset
** translation.
*/
LYUCcharset * UCI;
int UCLYhndl;
/*
** inUCI and inUCLYhndl indicate the UCInfo and charset which the
** HTML parser treats at the input charset. It is normally set
** to the UCI and UCLhndl for the SGML parser in the node_anchor's
** UCStages structure (which may be a dummy, based on the MIME
** parser's UCI and UCLhndl in that structure, when we are handling
** a local file or non-http(s) gateway). It could be changed
** temporarily by the HTML parser, for conversions of attribute
** strings, but should be reset once done. - FM
*/
LYUCcharset * inUCI;
int inUCLYhndl;
/*
** outUCI and outUCLYhndl indicate the UCInfo and charset which
** the HTML parser treats as the output charset. It is normally
** set to its own UCI and UCLhndl. It could be changed for
** conversions of attribute strings, but should be reset once
** done. - FM
*/
LYUCcharset * outUCI;
int outUCLYhndl;
/*
** T holds the transformation rules for conversions of strings
** between the input and output charsets by the HTML parser. - FM
*/
UCTransParams T;
int tag_charset; /* charset for attribute values etc. */
};
struct _HTStream {
CONST HTStreamClass * isa;
/* .... */
};
/*
* Semi-Private functions. - FM
*/
extern void HTML_put_character PARAMS((HTStructured *me, char c));
extern void HTML_put_string PARAMS((HTStructured *me, CONST char *s));
extern void HTML_write PARAMS((HTStructured *me, CONST char *s, int l));
extern int HTML_put_entity PARAMS((HTStructured *me, int entity_number));
#endif /* Lynx_HTML_Handler */
/* P U B L I C
*/
/*
** HTConverter to present HTML
*/
extern HTStream* HTMLToPlain PARAMS((
HTPresentation * pres,
HTParentAnchor * anchor,
HTStream * sink));
extern HTStream* HTMLToC PARAMS((
HTPresentation * pres,
HTParentAnchor * anchor,
HTStream * sink));
extern HTStream* HTMLPresent PARAMS((
HTPresentation * pres,
HTParentAnchor * anchor,
HTStream * sink));
extern HTStructured* HTML_new PARAMS((
HTParentAnchor * anchor,
HTFormat format_out,
HTStream * target));
/*
** Names for selected internal representations.
*/
typedef enum _HTMLCharacterSet {
HTML_ISO_LATIN1,
HTML_NEXT_CHARS,
HTML_PC_CP950
} HTMLCharacterSet;
/*
** Record error message as a hypertext object.
**
** The error message should be marked as an error so that it can be
** reloaded later. This implementation just throws up an error message
** and leaves the document unloaded.
**
** On entry,
** sink is a stream to the output device if any
** number is the HTTP error number
** message is the human readable message.
** On exit,
** a retrun code like HT_LOADED if object exists else 60; 0
*/
extern int HTLoadError PARAMS((
HTStream * sink,
int number,
CONST char * message));
#endif /* HTML_H */
|