about summary refs log tree commit diff stats
path: root/WWW/Library/Implementation/HTMLDTD.c
blob: 382c141af555079540228c276d44414bffae2e2e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
/*
 * $LynxId: HTMLDTD.c,v 1.58 2021/07/23 00:00:03 tom Exp $
 *
 *		Our Static DTD for HTML
 *		-----------------------
 */

/* Implements:
*/

#include <HTUtils.h>
#include <HTMLDTD.h>
#include <LYLeaks.h>
#include <LYJustify.h>

/*
 * Character entities like &nbsp now excluded from our DTD tables, they are
 * mapped to Unicode and handled by chartrans code directly the similar way the
 * numeric entities like &#123 does.  See src/chrtrans/entities.h for real
 * mapping.
 */

/*	Entity Names
 *	------------
 *
 *	This table must be matched exactly with ALL the translation tables
 *		(this is an obsolete translation mechanism, probably unused,
 *		currently replaced with Unicode chartrans in most cases...)
 */
static const char *entities[] =
{
    "AElig",			/* capital AE diphthong (ligature) */
    "Aacute",			/* capital A, acute accent */
    "Acirc",			/* capital A, circumflex accent */
    "Agrave",			/* capital A, grave accent */
    "Aring",			/* capital A, ring */
    "Atilde",			/* capital A, tilde */
    "Auml",			/* capital A, dieresis or umlaut mark */
    "Ccedil",			/* capital C, cedilla */
    "Dstrok",			/* capital Eth, Icelandic */
    "ETH",			/* capital Eth, Icelandic */
    "Eacute",			/* capital E, acute accent */
    "Ecirc",			/* capital E, circumflex accent */
    "Egrave",			/* capital E, grave accent */
    "Euml",			/* capital E, dieresis or umlaut mark */
    "Iacute",			/* capital I, acute accent */
    "Icirc",			/* capital I, circumflex accent */
    "Igrave",			/* capital I, grave accent */
    "Iuml",			/* capital I, dieresis or umlaut mark */
    "Ntilde",			/* capital N, tilde */
    "Oacute",			/* capital O, acute accent */
    "Ocirc",			/* capital O, circumflex accent */
    "Ograve",			/* capital O, grave accent */
    "Oslash",			/* capital O, slash */
    "Otilde",			/* capital O, tilde */
    "Ouml",			/* capital O, dieresis or umlaut mark */
    "THORN",			/* capital THORN, Icelandic */
    "Uacute",			/* capital U, acute accent */
    "Ucirc",			/* capital U, circumflex accent */
    "Ugrave",			/* capital U, grave accent */
    "Uuml",			/* capital U, dieresis or umlaut mark */
    "Yacute",			/* capital Y, acute accent */
    "aacute",			/* small a, acute accent */
    "acirc",			/* small a, circumflex accent */
    "acute",			/* spacing acute */
    "aelig",			/* small ae diphthong (ligature) */
    "agrave",			/* small a, grave accent */
    "amp",			/* ampersand */
    "aring",			/* small a, ring */
    "atilde",			/* small a, tilde */
    "auml",			/* small a, dieresis or umlaut mark */
    "brkbar",			/* broken vertical bar */
    "brvbar",			/* broken vertical bar */
    "ccedil",			/* small c, cedilla */
    "cedil",			/* spacing cedilla */
    "cent",			/* cent sign */
    "copy",			/* copyright sign */
    "curren",			/* currency sign */
    "deg",			/* degree sign */
    "die",			/* spacing dieresis */
    "divide",			/* division sign */
    "eacute",			/* small e, acute accent */
    "ecirc",			/* small e, circumflex accent */
    "egrave",			/* small e, grave accent */
    "emdash",			/* dash the width of emsp */
    "emsp",			/* em space - not collapsed */
    "endash",			/* dash the width of ensp */
    "ensp",			/* en space - not collapsed */
    "eth",			/* small eth, Icelandic */
    "euml",			/* small e, dieresis or umlaut mark */
    "frac12",			/* fraction 1/2 */
    "frac14",			/* fraction 1/4 */
    "frac34",			/* fraction 3/4 */
    "gt",			/* greater than */
    "hibar",			/* spacing macron */
    "iacute",			/* small i, acute accent */
    "icirc",			/* small i, circumflex accent */
    "iexcl",			/* inverted exclamation mark */
    "igrave",			/* small i, grave accent */
    "iquest",			/* inverted question mark */
    "iuml",			/* small i, dieresis or umlaut mark */
    "laquo",			/* angle quotation mark, left */
    "lt",			/* less than */
    "macr",			/* spacing macron */
    "mdash",			/* dash the width of emsp */
    "micro",			/* micro sign */
    "middot",			/* middle dot */
    "nbsp",			/* non breaking space */
    "ndash",			/* dash the width of ensp */
    "not",			/* negation sign */
    "ntilde",			/* small n, tilde */
    "oacute",			/* small o, acute accent */
    "ocirc",			/* small o, circumflex accent */
    "ograve",			/* small o, grave accent */
    "ordf",			/* feminine ordinal indicator */
    "ordm",			/* masculine ordinal indicator */
    "oslash",			/* small o, slash */
    "otilde",			/* small o, tilde */
    "ouml",			/* small o, dieresis or umlaut mark */
    "para",			/* paragraph sign */
    "plusmn",			/* plus-or-minus sign */
    "pound",			/* pound sign */
    "quot",			/* quote '"' */
    "raquo",			/* angle quotation mark, right */
    "reg",			/* circled R registered sign */
    "sect",			/* section sign */
    "shy",			/* soft hyphen */
    "sup1",			/* superscript 1 */
    "sup2",			/* superscript 2 */
    "sup3",			/* superscript 3 */
    "szlig",			/* small sharp s, German (sz ligature) */
    "thinsp",			/* thin space (not collapsed) */
    "thorn",			/* small thorn, Icelandic */
    "times",			/* multiplication sign */
    "trade",			/* trade mark sign (U+2122) */
    "uacute",			/* small u, acute accent */
    "ucirc",			/* small u, circumflex accent */
    "ugrave",			/* small u, grave accent */
    "uml",			/* spacing dieresis */
    "uuml",			/* small u, dieresis or umlaut mark */
    "yacute",			/* small y, acute accent */
    "yen",			/* yen sign */
    "yuml",			/* small y, dieresis or umlaut mark */
};

/*		Attribute Lists
 *		---------------
 *
 *	Lists must be in alphabetical order by attribute name
 *	The tag elements contain the number of attributes
 */

/* From Peter Flynn's intro to the HTML Pro DTD:

   %structure;

   DIV, CENTER, H1 to H6, P, UL, OL, DL, DIR, MENU, PRE, XMP, LISTING, BLOCKQUOTE, BQ,
   2	1	2     2   1  8	 8   8	 8    8     8	 8    8        4	   4
   MULTICOL,?NOBR, FORM, TABLE, ADDRESS, FIG, BDO, NOTE, and FN; plus?WBR, LI, and LH
   8 n	    ?1 n   8	 8	2	 2    2    2	     2	    ?1 nE  4	   4

   %insertions;

   Elements which usually contain special-purpose material, or no text material at all.

   BASEFONT, APPLET, OBJECT, EMBED, SCRIPT, MAP, MARQUEE, HR, ISINDEX, BGSOUND, TAB,?IMG,
   1 e?      2	     2 l     1 e    2 l     8	 4	  4 E 1? E     1 E	! E ?1 E
   IMAGE, BR, plus NOEMBED, SERVER, SPACER, AUDIOSCOPE, and SIDEBAR; ?area
   1 n	  1 E	     n	      n	      n	      n		      n	      8 E

   %text;

   Elements within the %structure; which directly contain running text.

   Descriptive or analytic markup: EM, STRONG, DFN, CODE, SAMP, KBD, VAR, CITE, Q, LANG, AU,
				   2   2       2    2	  2	2    2	  2	2  2 n	 2
   AUTHOR, PERSON, ACRONYM, ABBR, INS, DEL, and SPAN
   2	   2 n	   2	    2	    2	 2	  2
   Visual markup:S, STRIKE, I, B, TT, U,?NOBR,?WBR, BR, BIG, SMALL, FONT, STYLE, BLINK, TAB,
		 1  1	    1  1  1   1  ?1 n ?1nE? 1 E  1   1	    1	  1 l	 1	1 E?
   BLACKFACE, LIMITTEXT, NOSMARTQUOTES, and SHADOW
   1 n	      1 n	 1 n		    1 n
   Hypertext and graphics: A and?IMG
			   8	?8 E
   Mathematical: SUB, SUP, and MATH
		 4    4        4 l
   Documentary: COMMENT, ENTITY, ELEMENT, and ATTRIB
		4	 4 n	 4 n	      4 n
   %formula;
 */

/*	Elements
 *	--------
 *
 *	Must match definitions in HTMLDTD.html!
 *	Must be in alphabetical order.
 *
 *  The T_* extra info is listed here, even though most fields are not used
 *  in SGML.c if Old_DTD is set (with the exception of some Tgf_* flags).
 *  This simplifies comparison of the tags_table0[] table (otherwise unchanged
 *  from original Lynx treatment) with the tags_table1[] table below. - kw
 *
 *    Name*,	Attributes,	No. of attributes,     content,   extra info...
 */

#include <src0_HTMLDTD.h>
#include <src1_HTMLDTD.h>

/* Dummy space, will be filled with the contents of either tags_table1
   or tags_table0 on calling HTSwitchDTD - kw */

static HTTag tags[HTML_ALL_ELEMENTS];

const SGML_dtd HTML_dtd =
{
    tags,
    HTML_ELEMENTS,
    entities,			/* probably unused */
    TABLESIZE(entities),
};

/* This function fills the "tags" part of the HTML_dtd structure with
   what we want to use, either tags_table0 or tags_table1.  Note that it
   has to be called at least once before HTML_dtd is used, otherwise
   the HTML_dtd contents will be invalid!  This could be coded in a way
   that would make an initialisation call unnecessary, but my C knowledge
   is limited and I didn't want to list the whole tags_table1 table
   twice... - kw */
void HTSwitchDTD(int new_flag)
{
    if (TRACE)
	CTRACE((tfp,
		"HTMLDTD: Copying %s DTD element info of size %d, %d * %d\n",
		new_flag ? "strict" : "tagsoup",
		(int) (new_flag ? sizeof(tags_table1) : sizeof(tags_table0)),
		HTML_ALL_ELEMENTS,
		(int) sizeof(HTTag)));
    if (new_flag)
	MemCpy(tags, tags_table1, HTML_ALL_ELEMENTS * sizeof(HTTag));
    else
	MemCpy(tags, tags_table0, HTML_ALL_ELEMENTS * sizeof(HTTag));
}

HTTag HTTag_unrecognized =

{NULL_HTTag, NULL, 0, 0, SGML_EMPTY, T__UNREC_, 0, 0};

/*
 *	Utility Routine:  Useful for people building HTML objects.
 */

/*	Start anchor element
 *	--------------------
 *
 *	It is kinda convenient to have a particulr routine for
 *	starting an anchor element, as everything else for HTML is
 *	simple anyway.
 */
struct _HTStructured {
    HTStructuredClass *isa;
    /* ... */
};

void HTStartAnchor(HTStructured * obj, const char *name,
		   const char *href)
{
    BOOL present[HTML_A_ATTRIBUTES];
    const char *value[HTML_A_ATTRIBUTES];
    int i;

    for (i = 0; i < HTML_A_ATTRIBUTES; i++)
	present[i] = NO;

    if (name && *name) {
	present[HTML_A_NAME] = YES;
	value[HTML_A_NAME] = (const char *) name;
    }
    if (href) {
	present[HTML_A_HREF] = YES;
	value[HTML_A_HREF] = (const char *) href;
    }

    (*obj->isa->start_element) (obj, HTML_A, present, value, -1, 0);
}

void HTStartAnchor5(HTStructured * obj, const char *name,
		    const char *href,
		    const char *linktype,
		    int tag_charset)
{
    BOOL present[HTML_A_ATTRIBUTES];
    const char *value[HTML_A_ATTRIBUTES];
    int i;

    for (i = 0; i < HTML_A_ATTRIBUTES; i++)
	present[i] = NO;

    if (name && *name) {
	present[HTML_A_NAME] = YES;
	value[HTML_A_NAME] = name;
    }
    if (href && *href) {
	present[HTML_A_HREF] = YES;
	value[HTML_A_HREF] = href;
    }
    if (linktype && *linktype) {
	present[HTML_A_TYPE] = YES;
	value[HTML_A_TYPE] = linktype;
    }

    (*obj->isa->start_element) (obj, HTML_A, present, value, tag_charset, 0);
}

void HTStartIsIndex(HTStructured * obj, const char *prompt,
		    const char *href)
{
    BOOL present[HTML_ISINDEX_ATTRIBUTES];
    const char *value[HTML_ISINDEX_ATTRIBUTES];
    int i;

    for (i = 0; i < HTML_ISINDEX_ATTRIBUTES; i++)
	present[i] = NO;

    if (prompt && *prompt) {
	present[HTML_ISINDEX_PROMPT] = YES;
	value[HTML_ISINDEX_PROMPT] = (const char *) prompt;
    }
    if (href) {
	present[HTML_ISINDEX_HREF] = YES;
	value[HTML_ISINDEX_HREF] = (const char *) href;
    }

    (*obj->isa->start_element) (obj, HTML_ISINDEX, present, value, -1, 0);
}