about summary refs log blame commit diff stats
path: root/src/HTML.h
blob: be2e98f0058e9950d642aa04eee11d996ac561e1 (plain) (tree)























































































































































































































































































                                                                                               
/*
 * $LynxId: HTML.h,v 1.33 2011/05/19 09:57:53 tom Exp $
 *
 *					HTML to rich text converter for libwww
 *
 *			THE HTML TO RTF OBJECT CONVERTER
 *
 *  This interprets the HTML semantics.
 */
#ifndef HTML_H
#define HTML_H

#ifndef HTUTILS_H
#include <HTUtils.h>
#endif /* HTUTILS_H */

#include <UCDefs.h>
#include <UCAux.h>
#include <HTAnchor.h>
#include <HTMLDTD.h>

#ifdef __cplusplus
extern "C" {
#endif
/* #define ATTR_CS_IN (me->T.output_utf8 ? me->UCLYhndl : 0) */
#define ATTR_CS_IN me->tag_charset
#define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \
	LYUCTranslateHTMLString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML)
#define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \
	LYUCTranslateHTMLString(s, cs_from, cs_to, YES, p, h, st_HTML)
#define TRANSLATE_AND_UNESCAPE_ENTITIES6(s,cs_from,cs_to,spcls,p,h) \
	LYUCTranslateHTMLString(s, cs_from, cs_to, spcls, p, h, st_HTML)
#define TRANSLATE_HTML(s,p,h) \
	LYUCFullyTranslateString(s, me->UCLYhndl, current_char_set, NO, YES, p, h, NO, st_HTML)
#define TRANSLATE_HTML5(s,cs_from,cs_to,p,h) \
	LYUCFullyTranslateString(s, cs_from, cs_to, NO, YES, p, h, NO, st_HTML)
#define TRANSLATE_HTML7(s,cs_from,cs_to,spcls,p,h,Back) \
	LYUCFullyTranslateString(s, cs_from, cs_to, NO, spcls, p, h, Back, st_HTML)
/*
 * Strings from attributes which should be converted to some kind of "standard"
 * representation (character encoding), was Latin-1, esp.  URLs (incl. 
 * #fragments) and HTML NAME and ID stuff.
 */
#define TRANSLATE_AND_UNESCAPE_TO_STD(s) \
	LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_URL)
#define UNESCAPE_FIELDNAME_TO_STD(s) \
	LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_HTML)
    extern const HTStructuredClass HTMLPresentation;

#ifdef Lynx_HTML_Handler
/*
 *	This section is semi-private to HTML.c and it's helper modules. - FM
 *	--------------------------------------------------------------------
 */

    typedef struct _stack_element {
	HTStyle *style;
	int tag_number;
    } stack_element;

/*		HTML Object
 *		-----------
 */
#define MAX_NESTING 800		/* Should be checked by parser */

    struct _HTStructured {
	const HTStructuredClass *isa;
	HTParentAnchor *node_anchor;
	HText *text;

	HTStream *target;	/* Output stream */
	HTStreamClass targetClass;	/* Output routines */

	HTChildAnchor *CurrentA;	/* current HTML_A anchor */
	int CurrentANum;	/* current HTML_A number */
	char *base_href;	/* current HTML_BASE href */
	char *map_address;	/* current HTML_MAP address */

	HTChunk title;		/* Grow by 128 */
	HTChunk object;		/* Grow by 128 */
	BOOL object_started;
	BOOL object_declare;
	BOOL object_shapes;
	BOOL object_ismap;
	char *object_usemap;
	char *object_id;
	char *object_title;
	char *object_data;
	char *object_type;
	char *object_classid;
	char *object_codebase;
	char *object_codetype;
	char *object_name;
	int objects_mixed_open, objects_figged_open;
	HTChunk option;		/* Grow by 128 */
	BOOL first_option;	/* First OPTION in SELECT? */
	char *LastOptionValue;
	BOOL LastOptionChecked;
	BOOL select_disabled;
	HTChunk textarea;	/* Grow by 128 */
	char *textarea_name;
	int textarea_name_cs;
	char *textarea_accept_cs;
	int textarea_cols;
	int textarea_rows;
	int textarea_disabled;
	int textarea_readonly;
	char *textarea_id;
	HTChunk math;		/* Grow by 128 */
	HTChunk style_block;	/* Grow by 128 */
	HTChunk script;		/* Grow by 128 */

	/*
	 *  Used for nested lists. - FM
	 */
	int List_Nesting_Level;	/* counter for list nesting level */
	int OL_Counter[12];	/* counter for ordered lists */
	char OL_Type[12];	/* types for ordered lists */
	int Last_OL_Count;	/* last count in ordered lists */
	char Last_OL_Type;	/* last type in ordered lists */

	int Division_Level;
	short DivisionAlignments[MAX_NESTING];
	int Underline_Level;
	int Quote_Level;

	BOOL UsePlainSpace;
	BOOL HiddenValue;
	int lastraw;

	const char *comment_start;	/* for literate programming */
	const char *comment_end;

	HTTag *current_tag;
	BOOL style_change;
	HTStyle *new_style;
	HTStyle *old_style;
	int current_default_alignment;
	BOOL in_word;		/* Have just had a non-white char */
	stack_element stack[MAX_NESTING];
	stack_element *sp;	/* Style stack pointer */
	BOOL stack_overrun;	/* Was MAX_NESTING exceeded? */
	int skip_stack;		/* flag to skip next style stack operation */

	/*
	 *  Track if we are in an anchor, paragraph, address, base, etc.
	 */
	BOOL inA;
	BOOL inAPPLET;
	BOOL inAPPLETwithP;
	BOOL inBadBASE;
	BOOL inBadHREF;
	BOOL inBadHTML;
	BOOL inBASE;
	BOOL inBoldA;
	BOOL inBoldH;
	BOOL inCAPTION;
	BOOL inCREDIT;
	BOOL inFIG;
	BOOL inFIGwithP;
	BOOL inFONT;
	BOOL inFORM;
	BOOL inLABEL;
	BOOL inP;
	BOOL inPRE;
	BOOL inSELECT;
	BOOL inTABLE;
	BOOL inTEXTAREA;
	BOOL inUnderline;

	BOOL needBoldH;

	char *xinclude;		/* if no include strin address passed */
	/*
	 * UCI and UCLYhndl give the UCInfo and charset registered for the HTML
	 * parser in the node_anchor's UCStages structure.  It indicates what is
	 * fed to the HTML parser as the stream of character data (not necessarily
	 * tags and attributes).  It should currently always be set to be the same
	 * as UCI and UCLhndl for the HTEXT stage in the node_anchor's UCStages
	 * structure, since the HTML parser sends its input character data to the
	 * output without further charset translation.
	 */
	LYUCcharset *UCI;
	int UCLYhndl;
	/*
	 * inUCI and inUCLYhndl indicate the UCInfo and charset which the HTML
	 * parser treats at the input charset.  It is normally set to the UCI and
	 * UCLhndl for the SGML parser in the node_anchor's UCStages structure
	 * (which may be a dummy, based on the MIME parser's UCI and UCLhndl in
	 * that structure, when we are handling a local file or non-http(s)
	 * gateway).  It could be changed temporarily by the HTML parser, for
	 * conversions of attribute strings, but should be reset once done.  - FM
	 */
	LYUCcharset *inUCI;
	int inUCLYhndl;
	/*
	 * outUCI and outUCLYhndl indicate the UCInfo and charset which the HTML
	 * parser treats as the output charset.  It is normally set to its own UCI
	 * and UCLhndl.  It could be changed for conversions of attribute strings,
	 * but should be reset once done.  - FM
	 */
	LYUCcharset *outUCI;
	int outUCLYhndl;
	/*
	 * T holds the transformation rules for conversions of strings between the
	 * input and output charsets by the HTML parser.  - FM
	 */
	UCTransParams T;

	int tag_charset;	/* charset for attribute values etc. */
    };

    extern HTStyle *LYstyles(int style_number);
    extern BOOL LYBadHTML(HTStructured * me);
    extern void LYShowBadHTML(const char *s);

/*
 *	Semi-Private functions. - FM
 */
    extern void HTML_put_character(HTStructured * me, int c);
    extern void HTML_put_string(HTStructured * me, const char *s);
    extern void HTML_write(HTStructured * me, const char *s, int l);
    extern int HTML_put_entity(HTStructured * me, int entity_number);
    extern void actually_set_style(HTStructured * me);

/*	Style buffering avoids dummy paragraph begin/ends.
*/
#define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
#endif				/* Lynx_HTML_Handler */

    extern void strtolower(char *i);

/*				P U B L I C
*/

/*
 *  HTConverter to present HTML
 */
    extern HTStream *HTMLToPlain(HTPresentation *pres,
				 HTParentAnchor *anchor,
				 HTStream *sink);

    extern HTStream *HTMLParsedPresent(HTPresentation *pres,
				       HTParentAnchor *anchor,
				       HTStream *sink);

    extern HTStream *HTMLToC(HTPresentation *pres,
			     HTParentAnchor *anchor,
			     HTStream *sink);

    extern HTStream *HTMLPresent(HTPresentation *pres,
				 HTParentAnchor *anchor,
				 HTStream *sink);

    extern HTStructured *HTML_new(HTParentAnchor *anchor,
				  HTFormat format_out,
				  HTStream *target);

/*
 * Record error message as a hypertext object.
 *
 * The error message should be marked as an error so that it can be reloaded
 * later.  This implementation just throws up an error message and leaves the
 * document unloaded.
 *
 * On entry,
 *      sink    is a stream to the output device if any
 *      number  is the HTTP error number
 *      message is the human readable message.
 * On exit,
 *      a return code like HT_LOADED if object exists else 60; 0
 */
    extern int HTLoadError(HTStream *sink,
			   int number,
			   const char *message);

#ifdef __cplusplus
}
#endif
#endif				/* HTML_H */