diff options
Diffstat (limited to 'WWW/Library/Implementation/HTMLDTD.c')
-rw-r--r-- | WWW/Library/Implementation/HTMLDTD.c | 278 |
1 files changed, 143 insertions, 135 deletions
diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c index 7eb1d86b..cef1eaef 100644 --- a/WWW/Library/Implementation/HTMLDTD.c +++ b/WWW/Library/Implementation/HTMLDTD.c @@ -135,9 +135,8 @@ static CONST char* entities[] = { /* Extra Entity Names ** ------------------ ** -** This table contains Unicodes in addition to the Names. +** This table contains Unicodes in addition to the Names. - kw ** -** Just an idea how it could be done. -kw * * I think in the future the whole entities[] thing above could migrate * to this kind of structure. The structured streams to which @@ -160,137 +159,137 @@ static CONST char* entities[] = { - lrm, rlm, zwnj and zwj */ static CONST UC_entity_info extra_entities[] = { - {"Aacute", 0x00c1}, /* A with acute */ - {"Abreve", 0x0102}, /* A with breve */ - {"Acirc", 0x00c2}, /* A with circumflex */ - {"Aogon", 0x0104}, /* A with ogonek */ - {"Auml", 0x00c4}, /* A with diaeresis */ - {"Cacute", 0x0106}, /* C with acute */ - {"Ccaron", 0x010c}, /* C with caron */ - {"Ccedil", 0x00c7}, /* C with cedilla */ - {"Dcaron", 0x010e}, /* D with caron */ - {"Dstrok", 0x0110}, /* D with stroke */ - {"Eacute", 0x00c9}, /* E with acute */ - {"Ecaron", 0x011a}, /* E with caron */ - {"Eogon", 0x0118}, /* E with ogonek */ - {"Euml", 0x00cb}, /* E with diaeresis */ - {"Iacute", 0x00cd}, /* I with acute */ - {"Icirc", 0x00ce}, /* I with circumflex */ - {"Lacute", 0x0139}, /* L with acute */ - {"Lcaron", 0x013d}, /* L with caron */ - {"Lstrok", 0x0141}, /* L with stroke */ - {"Nacute", 0x0143}, /* N with acute */ - {"Ncaron", 0x0147}, /* N with caron */ - {"Oacute", 0x00d3}, /* O with acute */ - {"Ocirc", 0x00d4}, /* O with circumflex */ - {"Odblac", 0x0150}, /* O with double acute */ - {"Ouml", 0x00d6}, /* O with diaeresis */ - {"Racute", 0x0154}, /* R with acute */ - {"Rcaron", 0x0158}, /* R with caron */ - {"Sacute", 0x015a}, /* S with acute */ - {"Scaron", 0x0160}, /* S with caron */ - {"Scedil", 0x015e}, /* S with cedilla */ - {"Tcaron", 0x0164}, /* T with caron */ - {"Tcedil", 0x0162}, /* T with cedilla */ - {"Uacute", 0x00da}, /* U with acute */ - {"Udblac", 0x0170}, /* U with double acute */ - {"Uring", 0x016e}, /* U with ring above */ - {"Uuml", 0x00dc}, /* U with diaeresis */ - {"Yacute", 0x00dd}, /* Y with acute */ - {"Zacute", 0x0179}, /* Z with acute */ - {"Zcaron", 0x017d}, /* Z with caron */ - {"Zdot", 0x017b}, /* Z with dot above */ - {"aacute", 0x00e1}, /* a with acute */ - {"abreve", 0x0103}, /* a with breve */ - {"acirc", 0x00e2}, /* a with circumflex */ - {"acute", 0x00b4}, /* acuteaccent */ - {"amp", 0x0026}, /* ampersand */ - {"aogon", 0x0105}, /* a with ogonek */ - {"apos", 0x0027}, /* apostrophe */ - {"ast", 0x002a}, /* asterisk */ - {"auml", 0x00e4}, /* a with diaeresis */ - {"breve", 0x02d8}, /* breve */ - {"bsol", 0x005c}, /* reversesolidus */ - {"cacute", 0x0107}, /* c with acute */ - {"caron", 0x02c7}, /* caron */ - {"ccaron", 0x010d}, /* c with caron */ - {"ccedil", 0x00e7}, /* c with cedilla */ - {"cedil", 0x00b8}, /* cedilla */ - {"circ", 0x005e}, /* circumflexaccent */ - {"colon", 0x003a}, /* colon */ - {"comma", 0x002c}, /* comma */ - {"commat", 0x0040}, /* commercialat */ - {"curren", 0x00a4}, /* currencysign */ - {"dblac", 0x02dd}, /* doubleacuteaccent */ - {"dcaron", 0x010f}, /* d with caron */ - {"deg", 0x00b0}, /* degreesign */ - {"divide", 0x00f7}, /* divisionsign */ - {"dollar", 0x0024}, /* dollarsign */ - {"dot", 0x02d9}, /* dotabove */ - {"dstrok", 0x0111}, /* d with stroke */ - {"eacute", 0x00e9}, /* e with acute */ - {"ecaron", 0x011b}, /* e with caron */ - {"eogon", 0x0119}, /* e with ogonek */ - {"equals", 0x003d}, /* equalssign */ - {"euml", 0x00eb}, /* e with diaeresis */ - {"excl", 0x0021}, /* exclamationmark */ - {"grave", 0x0060}, /* graveaccent */ - {"gt", 0x003e}, /* greater-thansign */ - {"hyphen", 0x002d}, /* hyphen-minus */ - {"iacute", 0x00ed}, /* i with acute */ - {"icirc", 0x00ee}, /* i with circumflex */ - {"lacute", 0x013a}, /* l with acute */ - {"lcaron", 0x013e}, /* l with caron */ - {"lcub", 0x007b}, /* leftcurlybracket */ - {"lowbar", 0x005f}, /* lowline */ - {"lpar", 0x0028}, /* leftparenthesis */ - {"lrm", 8206}, /* left-to-right mark */ - {"lsqb", 0x005b}, /* leftsquarebracket */ - {"lstrok", 0x0142}, /* l with stroke */ - {"lt", 0x003c}, /* less-thansign */ - {"nacute", 0x0144}, /* n with acute */ - {"nbsp", 0x00a0}, /* no-breakspace */ - {"ncaron", 0x0148}, /* n with caron */ - {"num", 0x0023}, /* numbersign */ - {"oacute", 0x00f3}, /* o with acute */ - {"ocirc", 0x00f4}, /* o with circumflex */ - {"odblac", 0x0151}, /* o with double acute */ - {"ogon", 0x02db}, /* ogonek */ - {"ouml", 0x00f6}, /* o with diaeresis */ - {"percnt", 0x0025}, /* percentsign */ - {"period", 0x002e}, /* fullstop */ - {"plus", 0x002b}, /* plussign */ - {"quest", 0x003f}, /* questionmark */ - {"quot", 0x0022}, /* quotationmark */ - {"racute", 0x0155}, /* r with acute */ - {"rcaron", 0x0159}, /* r with caron */ - {"rcub", 0x007d}, /* rightcurlybracket */ - {"rlm", 8207}, /* right-to-left mark */ - {"rpar", 0x0029}, /* rightparenthesis */ - {"rsqb", 0x005d}, /* rightsquarebracket */ - {"sacute", 0x015b}, /* s with acute */ - {"scaron", 0x0161}, /* s with caron */ - {"scedil", 0x015f}, /* s with cedilla */ - {"sect", 0x00a7}, /* sectionsign */ - {"semi", 0x003b}, /* semicolon */ - {"shy", 0x00ad}, /* softhyphen */ - {"sol", 0x002f}, /* solidus */ - {"tcaron", 0x0165}, /* t with caron */ - {"tcedil", 0x0163}, /* t with cedilla */ - {"tilde", 0x007e}, /* tilde */ - {"times", 0x00d7}, /* multiplicationsign */ - {"uacute", 0x00fa}, /* u with acute */ - {"udblac", 0x0171}, /* u with double acute */ - {"uml", 0x00a8}, /* diaeresis */ - {"uring", 0x016f}, /* u with ring above */ - {"uuml", 0x00fc}, /* u with diaeresis */ - {"verbar", 0x007c}, /* verticalline */ - {"yacute", 0x00fd}, /* y with acute */ - {"zacute", 0x017a}, /* z with acute */ - {"zcaron", 0x017e}, /* z with caron */ - {"zdot", 0x017c}, /* z with dot above */ - {"zwj", 8205}, /* zero width joiner */ - {"zwnj", 8204}, /* zero width non-joiner */ + {"Aacute", 0x00c1}, /* A with acute */ + {"Abreve", 0x0102}, /* A with breve */ + {"Acirc", 0x00c2}, /* A with circumflex */ + {"Aogon", 0x0104}, /* A with ogonek */ + {"Auml", 0x00c4}, /* A with diaeresis */ + {"Cacute", 0x0106}, /* C with acute */ + {"Ccaron", 0x010c}, /* C with caron */ + {"Ccedil", 0x00c7}, /* C with cedilla */ + {"Dcaron", 0x010e}, /* D with caron */ + {"Dstrok", 0x0110}, /* D with stroke */ + {"Eacute", 0x00c9}, /* E with acute */ + {"Ecaron", 0x011a}, /* E with caron */ + {"Eogon", 0x0118}, /* E with ogonek */ + {"Euml", 0x00cb}, /* E with diaeresis */ + {"Iacute", 0x00cd}, /* I with acute */ + {"Icirc", 0x00ce}, /* I with circumflex */ + {"Lacute", 0x0139}, /* L with acute */ + {"Lcaron", 0x013d}, /* L with caron */ + {"Lstrok", 0x0141}, /* L with stroke */ + {"Nacute", 0x0143}, /* N with acute */ + {"Ncaron", 0x0147}, /* N with caron */ + {"Oacute", 0x00d3}, /* O with acute */ + {"Ocirc", 0x00d4}, /* O with circumflex */ + {"Odblac", 0x0150}, /* O with double acute */ + {"Ouml", 0x00d6}, /* O with diaeresis */ + {"Racute", 0x0154}, /* R with acute */ + {"Rcaron", 0x0158}, /* R with caron */ + {"Sacute", 0x015a}, /* S with acute */ + {"Scaron", 0x0160}, /* S with caron */ + {"Scedil", 0x015e}, /* S with cedilla */ + {"Tcaron", 0x0164}, /* T with caron */ + {"Tcedil", 0x0162}, /* T with cedilla */ + {"Uacute", 0x00da}, /* U with acute */ + {"Udblac", 0x0170}, /* U with double acute */ + {"Uring", 0x016e}, /* U with ring above */ + {"Uuml", 0x00dc}, /* U with diaeresis */ + {"Yacute", 0x00dd}, /* Y with acute */ + {"Zacute", 0x0179}, /* Z with acute */ + {"Zcaron", 0x017d}, /* Z with caron */ + {"Zdot", 0x017b}, /* Z with dot above */ + {"aacute", 0x00e1}, /* a with acute */ + {"abreve", 0x0103}, /* a with breve */ + {"acirc", 0x00e2}, /* a with circumflex */ + {"acute", 0x00b4}, /* acuteaccent */ + {"amp", 0x0026}, /* ampersand */ + {"aogon", 0x0105}, /* a with ogonek */ + {"apos", 0x0027}, /* apostrophe */ + {"ast", 0x002a}, /* asterisk */ + {"auml", 0x00e4}, /* a with diaeresis */ + {"breve", 0x02d8}, /* breve */ + {"bsol", 0x005c}, /* reversesolidus */ + {"cacute", 0x0107}, /* c with acute */ + {"caron", 0x02c7}, /* caron */ + {"ccaron", 0x010d}, /* c with caron */ + {"ccedil", 0x00e7}, /* c with cedilla */ + {"cedil", 0x00b8}, /* cedilla */ + {"circ", 0x005e}, /* circumflexaccent */ + {"colon", 0x003a}, /* colon */ + {"comma", 0x002c}, /* comma */ + {"commat", 0x0040}, /* commercialat */ + {"curren", 0x00a4}, /* currencysign */ + {"dblac", 0x02dd}, /* doubleacuteaccent */ + {"dcaron", 0x010f}, /* d with caron */ + {"deg", 0x00b0}, /* degreesign */ + {"divide", 0x00f7}, /* divisionsign */ + {"dollar", 0x0024}, /* dollarsign */ + {"dot", 0x02d9}, /* dotabove */ + {"dstrok", 0x0111}, /* d with stroke */ + {"eacute", 0x00e9}, /* e with acute */ + {"ecaron", 0x011b}, /* e with caron */ + {"eogon", 0x0119}, /* e with ogonek */ + {"equals", 0x003d}, /* equalssign */ + {"euml", 0x00eb}, /* e with diaeresis */ + {"excl", 0x0021}, /* exclamationmark */ + {"grave", 0x0060}, /* graveaccent */ + {"gt", 0x003e}, /* greater-thansign */ + {"hyphen", 0x002d}, /* hyphen-minus */ + {"iacute", 0x00ed}, /* i with acute */ + {"icirc", 0x00ee}, /* i with circumflex */ + {"lacute", 0x013a}, /* l with acute */ + {"lcaron", 0x013e}, /* l with caron */ + {"lcub", 0x007b}, /* leftcurlybracket */ + {"lowbar", 0x005f}, /* lowline */ + {"lpar", 0x0028}, /* leftparenthesis */ + {"lrm", 8206}, /* left-to-right mark */ + {"lsqb", 0x005b}, /* leftsquarebracket */ + {"lstrok", 0x0142}, /* l with stroke */ + {"lt", 0x003c}, /* less-thansign */ + {"nacute", 0x0144}, /* n with acute */ + {"nbsp", 0x00a0}, /* no-breakspace */ + {"ncaron", 0x0148}, /* n with caron */ + {"num", 0x0023}, /* numbersign */ + {"oacute", 0x00f3}, /* o with acute */ + {"ocirc", 0x00f4}, /* o with circumflex */ + {"odblac", 0x0151}, /* o with double acute */ + {"ogon", 0x02db}, /* ogonek */ + {"ouml", 0x00f6}, /* o with diaeresis */ + {"percnt", 0x0025}, /* percentsign */ + {"period", 0x002e}, /* fullstop */ + {"plus", 0x002b}, /* plussign */ + {"quest", 0x003f}, /* questionmark */ + {"quot", 0x0022}, /* quotationmark */ + {"racute", 0x0155}, /* r with acute */ + {"rcaron", 0x0159}, /* r with caron */ + {"rcub", 0x007d}, /* rightcurlybracket */ + {"rlm", 8207}, /* right-to-left mark */ + {"rpar", 0x0029}, /* rightparenthesis */ + {"rsqb", 0x005d}, /* rightsquarebracket */ + {"sacute", 0x015b}, /* s with acute */ + {"scaron", 0x0161}, /* s with caron */ + {"scedil", 0x015f}, /* s with cedilla */ + {"sect", 0x00a7}, /* sectionsign */ + {"semi", 0x003b}, /* semicolon */ + {"shy", 0x00ad}, /* softhyphen */ + {"sol", 0x002f}, /* solidus */ + {"tcaron", 0x0165}, /* t with caron */ + {"tcedil", 0x0163}, /* t with cedilla */ + {"tilde", 0x007e}, /* tilde */ + {"times", 0x00d7}, /* multiplicationsign */ + {"uacute", 0x00fa}, /* u with acute */ + {"udblac", 0x0171}, /* u with double acute */ + {"uml", 0x00a8}, /* diaeresis */ + {"uring", 0x016f}, /* u with ring above */ + {"uuml", 0x00fc}, /* u with diaeresis */ + {"verbar", 0x007c}, /* verticalline */ + {"yacute", 0x00fd}, /* y with acute */ + {"zacute", 0x017a}, /* z with acute */ + {"zcaron", 0x017e}, /* z with caron */ + {"zdot", 0x017c}, /* z with dot above */ + {"zwj", 8205}, /* zero width joiner */ + {"zwnj", 8204}, /* zero width non-joiner */ }; #endif /* EXP_CHARTRANS */ @@ -608,6 +607,7 @@ static attr font_attr[] = { /* FONT attributes */ }; static attr form_attr[] = { /* FORM attributes */ + { "ACCEPT-CHARSET"}, /* HTML 4.0 draft - kw */ { "ACTION"}, { "CLASS" }, { "CLEAR" }, @@ -738,6 +738,7 @@ static attr img_attr[] = { /* IMG attributes */ static attr input_attr[] = { /* INPUT attributes */ { "ACCEPT" }, + { "ACCEPT-CHARSET" }, /* RFC 2070 HTML i18n - kw */ { "ALIGN" }, { "ALT" }, { "CHECKED" }, @@ -822,6 +823,7 @@ static attr legend_attr[] = { /* LEGEND attributes */ }; static attr link_attr[] = { /* LINK attributes */ + { "CHARSET" }, /* RFC 2070 HTML i18n -- hint for UA -- - kw */ { "CLASS" }, { "HREF" }, { "ID" }, @@ -1126,6 +1128,7 @@ static attr td_attr[] = { /* TD and TH attributes */ }; static attr textarea_attr[] = { /* TEXTAREA attributes */ + { "ACCEPT-CHARSET" }, /* RFC 2070 HTML i18n - kw */ { "ALIGN" }, { "CLASS" }, { "CLEAR" }, @@ -1471,6 +1474,8 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "XMP" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_LITTERAL }, */ #define T_XMP 0x0800, 0x00000,0x00000,0x367E0,0x36FFF,0x0875F,0x00001 +#define T__UNREC_ 0x0000, 0x00000,0x00000,0x00000,0x00000,0x00000,0x00000 + /* Elements ** -------- ** @@ -1762,6 +1767,9 @@ PUBLIC void HTSwitchDTD ARGS1( memcpy(tags, tags_old, HTML_ELEMENTS * sizeof(HTTag)); } +PUBLIC CONST HTTag HTTag_unrecognized = + { NULL, NULL, 0, SGML_EMPTY,T__UNREC_}; + /* ** Utility Routine: Useful for people building HTML objects. */ @@ -1799,7 +1807,7 @@ PUBLIC void HTStartAnchor ARGS3( value[HTML_A_HREF] = (CONST char *)href; } - (*obj->isa->start_element)(obj, HTML_A, present, value, 0); + (*obj->isa->start_element)(obj, HTML_A, present, value, -1, 0); } PUBLIC void HTStartIsIndex ARGS3( @@ -1823,5 +1831,5 @@ PUBLIC void HTStartIsIndex ARGS3( value[HTML_ISINDEX_HREF] = (CONST char *)href; } - (*obj->isa->start_element)(obj, HTML_ISINDEX , present, value, 0); + (*obj->isa->start_element)(obj, HTML_ISINDEX , present, value, -1, 0); } |