summary refs log tree commit diff stats
BranchCommit messageAuthorAge
develstricter set type match, implicit conversion for literals (#24176)metagn8 months
 
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
/*
 * $LynxId: SGML.h,v 1.50 2023/01/05 22:23:44 tom Exp $
 *			       SGML parse and stream definition for libwww
 *                             SGML AND STRUCTURED STREAMS
 *
 * The SGML parser is a state machine.	It is called for every character
 * of the input stream.	 The DTD data structure contains pointers
 * to functions which are called to implement the actual effect of the
 * text read. When these functions are called, the attribute structures pointed to by the
 * DTD are valid, and the function is passed a pointer to the current tag structure, and an
 * "element stack" which represents the state of nesting within SGML elements.
 *
 * The following aspects are from Dan Connolly's suggestions:  Binary search,
 * Structured object scheme basically, SGML content enum type.
 *
 * (c) Copyright CERN 1991 - See Copyright.html
 *
 */
#ifndef SGML_H
#define SGML_H

#include <HTStream.h>
#include <HTAnchor.h>
#include <LYJustify.h>

#ifdef __cplusplus
extern "C" {
#endif
/*
 *
 * SGML content types
 *
 */ typedef enum {
	SGML_EMPTY,		/* No content. */
	SGML_LITTERAL,		/* Literal character data.  Recognize exact close tag only.
				   Old www server compatibility only!  Not SGML */
	SGML_CDATA,		/* Character data.  Recognize </ only.
				   (But we treat it just as SGML_LITTERAL.) */
	SGML_SCRIPT,		/* Like CDATA, but allow it to be a comment */
	SGML_RCDATA,		/* Replaceable character data. Should recognize </ and &ref;
				   (but we treat it like SGML_MIXED for old times' sake). */
	SGML_MIXED,		/* Elements and parsed character data.
				   Recognize all markup. */
	SGML_ELEMENT,		/* Any data found should be regarded as an error.
				   (But we treat it just like SGML_MIXED.) */
	SGML_PCDATA		/* Should contain no elements but &ref; is parsed.
				   (We treat it like SGML_CDATA wrt. contained tags
				   i.e. pass them on literally, i.e. like we should
				   treat SGML_RCDATA) (added by KW). */
    } SGMLContent;

    typedef struct {
	const char *name;	/* The name of the attribute */
#ifdef USE_PRETTYSRC
	char type;		/* code of the type of the attribute. Code
				   values are in HTMLDTD.h */
#endif
    } attr;

    typedef const attr *AttrList;

    typedef struct {
	const char *name;
	AttrList list;
    } AttrType;

    typedef int TagClass;

    /* textflow */
#define Tgc_FONTlike	0x00001	/* S,STRIKE,I,B,TT,U,BIG,SMALL,STYLE,BLINK;BR,TAB */
#define Tgc_EMlike	0x00002	/* EM,STRONG,DFN,CODE,SAMP,KBD,VAR,CITE,Q,INS,DEL,SPAN,.. */
#define Tgc_MATHlike	0x00004	/* SUB,SUP,MATH,COMMENT */
#define Tgc_Alike	0x00008	/* A */
#define Tgc_formula	0x00010	/* not used until math is supported better... */
    /* used for special structures: forms, tables,... */
#define Tgc_TRlike	0x00020	/* TR and similar */
#define Tgc_SELECTlike	0x00040	/* SELECT,INPUT,TEXTAREA(,...) */
    /* structure */
#define Tgc_FORMlike	0x00080	/* FORM itself */
#define Tgc_Plike	0x00100	/* P,H1..H6,... structures containing text or
				   insertion but not other structures */
#define Tgc_DIVlike	0x00200	/* ADDRESS,FIG,BDO,NOTE,FN,DIV,CENTER;FIG
				   structures which can contain other structures */
#define Tgc_LIlike	0x00400	/* LH,LI,DT,DD;TH,TD structure-like, only valid
				   within certain other structures */
#define Tgc_ULlike	0x00800	/* UL,OL,DL,DIR,MENU;TABLE;XMP,LISTING
				   special in some way, cannot contain (parsed)
				   text directly */
    /* insertions */
#define Tgc_BRlike	0x01000	/* BR,IMG,TAB allowed in any text */
#define Tgc_APPLETlike	0x02000	/* APPLET,OBJECT,EMBED,SCRIPT;BUTTON */
#define Tgc_HRlike	0x04000	/* HR,MARQUEE can contain all kinds of things
				   and/or are not allowed (?) in running text */
#define Tgc_MAPlike	0x08000	/* MAP,AREA some specials that never contain
				   (directly or indirectly) other things than
				   special insertions */
#define Tgc_outer	0x10000	/* HTML,FRAMESET,FRAME,PLAINTEXT; */
#define Tgc_BODYlike	0x20000	/* BODY,BODYTEXT,NOFRAMES,TEXTFLOW; */
#define Tgc_HEADstuff	0x40000	/* HEAD,BASE,STYLE,TITLE; */
    /* special relations */
#define Tgc_same	0x80000
#define Tgc_DELlike	0x100000
    /* DELlike is a class of aliases for inline DEL/INS */
    typedef unsigned char TagAlias;

/*
 * Groups for contains-data.
 */
#define Tgc_INLINElike	(Tgc_Alike | Tgc_APPLETlike | Tgc_BRlike | Tgc_EMlike | Tgc_FONTlike | Tgc_SELECTlike)
#define Tgc_LISTlike	(Tgc_LIlike | Tgc_ULlike)
#define Tgc_BLOCKlike	(Tgc_DIVlike | Tgc_LISTlike)

/* Some more properties of tags (or rather, elements) and rules how
   to deal with them. - kw */
    typedef int TagFlags;

#define Tgf_endO	0x00001	/* end tag can be Omitted */
#define Tgf_startO	0x00002	/* start tag can be Omitted */
#define Tgf_mafse	0x00004	/* Make Attribute-Free Start-tag End instead
				   (if found invalid) */
#define Tgf_strict	0x00008	/* Ignore contained invalid elements,
				   don't pass them on; or other variant
				   handling for some content types */
#define Tgf_nreie	0x00010	/* Not Really Empty If Empty,
				   used by color style code */
#define Tgf_frecyc	0x00020	/* Pass element content on in a form that
				   allows recycling, i.e. don't translate to
				   output (display) character set yet (treat
				   content similar to attribute values) */
#define Tgf_nolyspcl	0x00040	/* Don't generate lynx special characters
				   for soft hyphen and various spaces (nbsp,
				   ensp,..) */

/*		A tag structure describes an SGML element.
 *		-----------------------------------------
 *
 *
 *	name		is the string which comes after the tag opener "<".
 *
 *	attributes	points to a zero-terminated array
 *			of attribute names.
 */
    typedef struct _tag HTTag;
    struct _tag {
	const char *name;	/* The name of the tag */
#ifdef USE_COLOR_STYLE
	unsigned name_len;	/* The length of the name */
#endif
#ifdef USE_JUSTIFY_ELTS
	BOOL can_justify;	/* justification allowed? */
#endif
	AttrList attributes;	/* The list of acceptable attributes */
	int number_of_attributes;	/* Number of possible attributes */
	const AttrType *attr_types;
	SGMLContent contents;	/* End only on end tag @@ */
	TagClass tagclass;
	TagClass contains;	/* which classes of elements this one can contain directly */
	TagClass icontains;	/* which classes of elements this one can contain indirectly */
	TagClass contained;	/* in which classes can this tag be contained ? */
	TagClass icontained;	/* in which classes can this tag be indirectly contained ? */
	TagClass canclose;	/* which classes of elements can this one close
				   if something looks wrong ? */
	TagFlags flags;
	TagAlias alias;		/* extra levels, e.g, DEL/INS */
	TagAlias aliases;	/* number of extra levels, e.g, DEL/INS */
    };

/*		DTD Information
 *		---------------
 *
 *  Not the whole DTD, but all this parser uses of it.
 */
    typedef struct {
	HTTag *tags;		/* Must be in strcmp order by name */
	int number_of_tags;
	STRING2PTR entity_names;	/* Must be in strcmp order by name */
	size_t number_of_entities;
	/*  "entity_names" table probably unused,
	 *  see comments in HTMLDTD.c near the top
	 */
    } SGML_dtd;

/*	SGML context passed to parsers
*/
    typedef struct _HTSGMLContext *HTSGMLContext;	/* Hidden */

/*__________________________________________________________________________
*/

/*

Structured Object definition

   A structured object is something which can reasonably be represented
   in SGML.  I'll rephrase that.  A structured object is an ordered
   tree-structured arrangement of data which is representable as text.
   The SGML parser outputs to a Structured object.  A Structured object
   can output its contents to another Structured Object.  It's a kind of
   typed stream.  The architecture is largely Dan Conolly's.  Elements and
   entities are passed to the sob by number, implying a knowledge of the
   DTD.	 Knowledge of the SGML syntax is not here, though.

   Superclass: HTStream

   The creation methods will vary on the type of Structured Object.
   Maybe the callerData is enough info to pass along.

 */
    typedef struct _HTStructured HTStructured;

    typedef struct _HTStructuredClass {

	const char *name;	/* Just for diagnostics */

	void (*_free) (HTStructured * me);

	void (*_abort) (HTStructured * me, HTError e);

	void (*put_character) (HTStructured * me, int ch);

	void (*put_string) (HTStructured * me, const char *str);

	void (*put_block) (HTStructured * me, const char *str, int len);

	/* HTStreamClass ends here */

	int (*start_element) (HTStructured * me, int element_number,
			      const BOOL *attribute_present,
			      STRING2PTR attribute_value,
			      int charset,
			      char **include);

	int (*end_element) (HTStructured * me, int element_number,
			    char **include);

	int (*put_entity) (HTStructured * me, int entity_number);

    } HTStructuredClass;

/*
  Equivalents to the following functions possibly could be generalised
  into additional HTStructuredClass members.  For now they don't do
  anything target-specific. - kw
  */
    extern BOOLEAN LYCheckForCSI(HTParentAnchor *anchor, char **url);
    extern void LYDoCSI(char *url, const char *comment, char **csi);
    extern BOOLEAN LYCommentHacks(HTParentAnchor *anchor, const char *comment);

/*

Find a Tag by Name

   Returns a pointer to the tag within the DTD.

 */
    extern HTTag *SGMLFindTag(const SGML_dtd * dtd,
			      const char *string);

/*
 * Return the current offset within the file that SGML is parsing
 */
    extern int SGML_offset(void);

/*

Create an SGML parser

 */
/*
 * On entry,
 *	dtd		must point to a DTD structure as defined above
 *	callbacks	must point to user routines.
 *	callData	is returned in callbacks transparently.
 * On exit,
 *		The default tag starter has been processed.
 */
    extern HTStream *SGML_new(const SGML_dtd * dtd,
			      HTParentAnchor *anchor,
			      HTStructured * target,
			      int extended_html);

    extern const HTStreamClass SGMLParser;

#ifdef __cplusplus
}
#endif
#endif				/* SGML_H */