diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 2012-02-20 01:32:18 -0500 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 2012-02-20 01:32:18 -0500 |
commit | bb5fd6e44e480f571bcb713788cc50eea44095e5 (patch) | |
tree | dc3b9975b9bf9e18ce454348ab31ae232a372107 /WWW/Library/Implementation/SGML.h | |
parent | 3e8c172cd64e8a34029b60208c0d3016d3609505 (diff) | |
download | lynx-snapshots-bb5fd6e44e480f571bcb713788cc50eea44095e5.tar.gz |
snapshot of project "lynx", label v2-8-8dev_10b
Diffstat (limited to 'WWW/Library/Implementation/SGML.h')
-rw-r--r-- | WWW/Library/Implementation/SGML.h | 286 |
1 files changed, 0 insertions, 286 deletions
diff --git a/WWW/Library/Implementation/SGML.h b/WWW/Library/Implementation/SGML.h deleted file mode 100644 index 9fccdda2..00000000 --- a/WWW/Library/Implementation/SGML.h +++ /dev/null @@ -1,286 +0,0 @@ -/* - * $LynxId: SGML.h,v 1.46 2012/02/10 18:32:26 tom Exp $ - * SGML parse and stream definition for libwww - * SGML AND STRUCTURED STREAMS - * - * The SGML parser is a state machine. It is called for every character - * of the input stream. The DTD data structure contains pointers - * to functions which are called to implement the actual effect of the - * text read. When these functions are called, the attribute structures pointed to by the - * DTD are valid, and the function is passed a pointer to the current tag structure, and an - * "element stack" which represents the state of nesting within SGML elements. - * - * The following aspects are from Dan Connolly's suggestions: Binary search, - * Structured object scheme basically, SGML content enum type. - * - * (c) Copyright CERN 1991 - See Copyright.html - * - */ -#ifndef SGML_H -#define SGML_H - -#include <HTStream.h> -#include <HTAnchor.h> -#include <LYJustify.h> - -#ifdef __cplusplus -extern "C" { -#endif -/* - * - * SGML content types - * - */ typedef enum { - SGML_EMPTY, /* No content. */ - SGML_LITTERAL, /* Literal character data. Recognize exact close tag only. - Old www server compatibility only! Not SGML */ - SGML_CDATA, /* Character data. Recognize </ only. - (But we treat it just as SGML_LITTERAL.) */ - SGML_SCRIPT, /* Like CDATA, but allow it to be a comment */ - SGML_RCDATA, /* Replaceable character data. Should recognize </ and &ref; - (but we treat it like SGML_MIXED for old times' sake). */ - SGML_MIXED, /* Elements and parsed character data. - Recognize all markup. */ - SGML_ELEMENT, /* Any data found should be regarded as an error. - (But we treat it just like SGML_MIXED.) */ - SGML_PCDATA /* Should contain no elements but &ref; is parsed. - (We treat it like SGML_CDATA wrt. contained tags - i.e. pass them on literally, i.e. like we should - treat SGML_RCDATA) (added by KW). */ - } SGMLContent; - - typedef struct { - const char *name; /* The name of the attribute */ -#ifdef USE_PRETTYSRC - char type; /* code of the type of the attribute. Code - values are in HTMLDTD.h */ -#endif - } attr; - - typedef const attr *AttrList; - - typedef struct { - const char *name; - AttrList list; - } AttrType; - - typedef int TagClass; - - /* textflow */ -#define Tgc_FONTlike 0x00001 /* S,STRIKE,I,B,TT,U,BIG,SMALL,STYLE,BLINK;BR,TAB */ -#define Tgc_EMlike 0x00002 /* EM,STRONG,DFN,CODE,SAMP,KBD,VAR,CITE,Q,INS,DEL,SPAN,.. */ -#define Tgc_MATHlike 0x00004 /* SUB,SUP,MATH,COMMENT */ -#define Tgc_Alike 0x00008 /* A */ -#define Tgc_formula 0x00010 /* not used until math is supported better... */ - /* used for special structures: forms, tables,... */ -#define Tgc_TRlike 0x00020 /* TR and similar */ -#define Tgc_SELECTlike 0x00040 /* SELECT,INPUT,TEXTAREA(,...) */ - /* structure */ -#define Tgc_FORMlike 0x00080 /* FORM itself */ -#define Tgc_Plike 0x00100 /* P,H1..H6,... structures containing text or - insertion but not other structures */ -#define Tgc_DIVlike 0x00200 /* ADDRESS,FIG,BDO,NOTE,FN,DIV,CENTER;FIG - structures which can contain other structures */ -#define Tgc_LIlike 0x00400 /* LH,LI,DT,DD;TH,TD structure-like, only valid - within certain other structures */ -#define Tgc_ULlike 0x00800 /* UL,OL,DL,DIR,MENU;TABLE;XMP,LISTING - special in some way, cannot contain (parsed) - text directly */ - /* insertions */ -#define Tgc_BRlike 0x01000 /* BR,IMG,TAB allowed in any text */ -#define Tgc_APPLETlike 0x02000 /* APPLET,OBJECT,EMBED,SCRIPT;BUTTON */ -#define Tgc_HRlike 0x04000 /* HR,MARQUEE can contain all kinds of things - and/or are not allowed (?) in running text */ -#define Tgc_MAPlike 0x08000 /* MAP,AREA some specials that never contain - (directly or indirectly) other things than - special insertions */ -#define Tgc_outer 0x10000 /* HTML,FRAMESET,FRAME,PLAINTEXT; */ -#define Tgc_BODYlike 0x20000 /* BODY,BODYTEXT,NOFRAMES,TEXTFLOW; */ -#define Tgc_HEADstuff 0x40000 /* HEAD,BASE,STYLE,TITLE; */ - /* special relations */ -#define Tgc_same 0x80000 - -/* - * Groups for contains-data. - */ -#define Tgc_INLINElike (Tgc_Alike | Tgc_APPLETlike | Tgc_BRlike | Tgc_EMlike | Tgc_FONTlike | Tgc_SELECTlike) -#define Tgc_LISTlike (Tgc_LIlike | Tgc_ULlike) -#define Tgc_BLOCKlike (Tgc_DIVlike | Tgc_LISTlike) - -/* Some more properties of tags (or rather, elements) and rules how - to deal with them. - kw */ - typedef int TagFlags; - -#define Tgf_endO 0x00001 /* end tag can be Omitted */ -#define Tgf_startO 0x00002 /* start tag can be Omitted */ -#define Tgf_mafse 0x00004 /* Make Attribute-Free Start-tag End instead - (if found invalid) */ -#define Tgf_strict 0x00008 /* Ignore contained invalid elements, - don't pass them on; or other variant - handling for some content types */ -#define Tgf_nreie 0x00010 /* Not Really Empty If Empty, - used by color style code */ -#define Tgf_frecyc 0x00020 /* Pass element content on in a form that - allows recycling, i.e. don't translate to - output (display) character set yet (treat - content similar to attribute values) */ -#define Tgf_nolyspcl 0x00040 /* Don't generate lynx special characters - for soft hyphen and various spaces (nbsp, - ensp,..) */ - -/* A tag structure describes an SGML element. - * ----------------------------------------- - * - * - * name is the string which comes after the tag opener "<". - * - * attributes points to a zero-terminated array - * of attribute names. - * - * litteral determines how the SGML engine parses the characters - * within the element. If set, tag openers are ignored - * except for that which opens a matching closing tag. - * - */ - typedef struct _tag HTTag; - struct _tag { - const char *name; /* The name of the tag */ -#ifdef USE_COLOR_STYLE - unsigned name_len; /* The length of the name */ -#endif -#ifdef USE_JUSTIFY_ELTS - BOOL can_justify; /* justification allowed? */ -#endif - AttrList attributes; /* The list of acceptable attributes */ - int number_of_attributes; /* Number of possible attributes */ - const AttrType *attr_types; - SGMLContent contents; /* End only on end tag @@ */ - TagClass tagclass; - TagClass contains; /* which classes of elements this one can contain directly */ - TagClass icontains; /* which classes of elements this one can contain indirectly */ - TagClass contained; /* in which classes can this tag be contained ? */ - TagClass icontained; /* in which classes can this tag be indirectly contained ? */ - TagClass canclose; /* which classes of elements can this one close - if something looks wrong ? */ - TagFlags flags; - }; - -/* DTD Information - * --------------- - * - * Not the whole DTD, but all this parser uses of it. - */ - typedef struct { - HTTag *tags; /* Must be in strcmp order by name */ - int number_of_tags; - STRING2PTR entity_names; /* Must be in strcmp order by name */ - size_t number_of_entities; - /* "entity_names" table probably unused, - * see comments in HTMLDTD.c near the top - */ - } SGML_dtd; - -/* SGML context passed to parsers -*/ - typedef struct _HTSGMLContext *HTSGMLContext; /* Hidden */ - -/*__________________________________________________________________________ -*/ - -/* - -Structured Object definition - - A structured object is something which can reasonably be represented - in SGML. I'll rephrase that. A structured object is an ordered - tree-structured arrangement of data which is representable as text. - The SGML parser outputs to a Structured object. A Structured object - can output its contents to another Structured Object. It's a kind of - typed stream. The architecture is largely Dan Conolly's. Elements and - entities are passed to the sob by number, implying a knowledge of the - DTD. Knowledge of the SGML syntax is not here, though. - - Superclass: HTStream - - The creation methods will vary on the type of Structured Object. - Maybe the callerData is enough info to pass along. - - */ - typedef struct _HTStructured HTStructured; - - typedef struct _HTStructuredClass { - - const char *name; /* Just for diagnostics */ - - void (*_free) (HTStructured * me); - - void (*_abort) (HTStructured * me, HTError e); - - void (*put_character) (HTStructured * me, int ch); - - void (*put_string) (HTStructured * me, const char *str); - - void (*put_block) (HTStructured * me, const char *str, int len); - - /* HTStreamClass ends here */ - - int (*start_element) (HTStructured * me, int element_number, - const BOOL *attribute_present, - STRING2PTR attribute_value, - int charset, - char **include); - - int (*end_element) (HTStructured * me, int element_number, - char **include); - - int (*put_entity) (HTStructured * me, int entity_number); - - } HTStructuredClass; - -/* - Equivalents to the following functions possibly could be generalised - into additional HTStructuredClass members. For now they don't do - anything target-specific. - kw - */ - extern BOOLEAN LYCheckForCSI(HTParentAnchor *anchor, char **url); - extern void LYDoCSI(char *url, const char *comment, char **csi); - extern BOOLEAN LYCommentHacks(HTParentAnchor *anchor, const char *comment); - -/* - -Find a Tag by Name - - Returns a pointer to the tag within the DTD. - - */ - extern HTTag *SGMLFindTag(const SGML_dtd * dtd, - const char *string); - -/* - * Return the current offset within the file that SGML is parsing - */ - extern int SGML_offset(void); - -/* - -Create an SGML parser - - */ -/* - * On entry, - * dtd must point to a DTD structure as defined above - * callbacks must point to user routines. - * callData is returned in callbacks transparently. - * On exit, - * The default tag starter has been processed. - */ - extern HTStream *SGML_new(const SGML_dtd * dtd, - HTParentAnchor *anchor, - HTStructured * target); - - extern const HTStreamClass SGMLParser; - -#ifdef __cplusplus -} -#endif -#endif /* SGML_H */ |