about summary refs log tree commit diff stats
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/ALT88592.html171
-rw-r--r--test/ISO_LATIN1_test.html83
-rw-r--r--test/README.txt8
-rw-r--r--test/TestComment.html50
-rw-r--r--test/bad-html.html46
-rw-r--r--test/c1.html63
-rw-r--r--test/cp-1252.html178
-rw-r--r--test/cp-1252a.html183
-rw-r--r--test/iso-8859-1.html241
-rw-r--r--test/iso-8859-1a.html275
-rw-r--r--test/iso-8859-2.html174
-rw-r--r--test/iso-8859-2a.html208
-rw-r--r--test/koi8-r.html321
-rw-r--r--test/quickbrown.html103
-rw-r--r--test/raw8bit.html38
-rw-r--r--test/sgml.html1081
-rw-r--r--test/spaces.html37
-rw-r--r--test/special_urls.html22
-rw-r--r--test/tabtest.html39
-rw-r--r--test/tags.html219
-rw-r--r--test/test-styles.html106
-rw-r--r--test/unicode.html915
-rw-r--r--test/utf-8-demo.html216
23 files changed, 4777 insertions, 0 deletions
diff --git a/test/ALT88592.html b/test/ALT88592.html
new file mode 100644
index 00000000..1924f281
--- /dev/null
+++ b/test/ALT88592.html
@@ -0,0 +1,171 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Character table modified and enhanced for iso8859-2 - ALT test</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+<!-- A BASE tag for the SRC attributes of dummy images.
+     They should be inaccessible so that the ALT text will be shown in graphical browsers.
+     Use file: to save network resources. -->
+<BASE HREF="file://localhost/this.path.intentionally.invalid/">
+</HEAD>
+
+<BODY> 
+
+<H1 ALIGN=center>iso8859-2 plus table - ALT test</H1> 
+
+<PRE>
+Description                               Code            Entity name   
+===================================       ============    ==============
+quotation mark                           <IMG SRC=X ALT=" &amp;#34;  --> &#34;     &amp;quot;   --> &quot;">
+ampersand                                <IMG SRC=X ALT=" &amp;#38;  --> &#38;     &amp;amp;    --> &amp;">
+less-than sign                           <IMG SRC=X ALT=" &amp;#60;  --> &#60;     &amp;lt;     --> &lt;">
+greater-than sign                        <IMG SRC=X ALT=" &amp;#62;  --> &#62;     &amp;gt;     --> &gt;">
+
+Description                          Char Code            Entity name   
+===================================  ==== ============    ==============
+non-breaking space                  <IMG SRC=X ALT="      &amp;#160; --> &#160;    &amp;nbsp;   --> &nbsp;">
+capital A, ogonek                   <IMG SRC=X ALT=" ¡    &amp;#260; --> &#260;    &amp;Aogon;  --> &Aogon;">
+breve                              <IMG SRC=X ALT=" {¢}  {&amp;#728;}-->{&#728;}  {&amp;breve;} -->{&breve;}">
+capital L, stroke                   <IMG SRC=X ALT=" £    &amp;#321; --> &#321;    &amp;Lstrok; --> &Lstrok;">
+general currency sign               <IMG SRC=X ALT=" ¤    &amp;#164; --> &#164;    &amp;curren; --> &curren;">
+capital L, caron                    <IMG SRC=X ALT=" ¥    &amp;#317; --> &#317;    &amp;Lcaron; --> &Lcaron;">
+capital S, acute accent             <IMG SRC=X ALT=" ¦    &amp;#346; --> &#346;    &amp;Sacute; --> &Sacute;">
+section sign                        <IMG SRC=X ALT=" §    &amp;#167; --> &#167;    &amp;sect;   --> &sect;">
+umlaut (dieresis)                   <IMG SRC=X ALT=" ¨    &amp;#168; --> &#168;    &amp;uml;    --> &uml;">
+                                                         <IMG SRC=X ALT=" &amp;die;    --> &die;">
+capital S, caron                    <IMG SRC=X ALT=" ©    &amp;#352; --> &#352;    &amp;Scaron; --> &Scaron;">
+capital S, cedilla                  <IMG SRC=X ALT=" ª    &amp;#350; --> &#350;    &amp;Scedil; --> &Scedil;">
+capital T, caron                    <IMG SRC=X ALT=" «    &amp;#356; --> &#356;    &amp;Tcaron; --> &Tcaron;">
+capital Z, acute accent             <IMG SRC=X ALT=" ¬    &amp;#377; --> &#377;    &amp;Zacute; --> &Zacute;">
+soft hyphen                        <IMG SRC=X ALT=" [­]  [&amp;#173;]-->[&#173;]  [&amp;shy;]   -->[&shy;]">
+capital Z, caron                    <IMG SRC=X ALT=" ®    &amp;#381; --> &#381;    &amp;Zcaron; --> &Zcaron;">
+capital Z, dot above                <IMG SRC=X ALT=" ¯    &amp;#379; --> &#379;    &amp;Zdot;   --> &Zdot;">
+degree sign                         <IMG SRC=X ALT=" °    &amp;#176; --> &#176;    &amp;deg;    --> &deg;">
+small a, ogonek                     <IMG SRC=X ALT=" ±    &amp;#261; --> &#261;    &amp;aogon;  --> &aogon;">
+ogonek                             <IMG SRC=X ALT=" {²}  {&amp;#731;}-->{&#731;}  {&amp;ogon;}  -->{&ogon;}">
+small l, stroke                     <IMG SRC=X ALT=" ³    &amp;#322; --> &#322;    &amp;lstrok; --> &lstrok;">
+acute accent                        <IMG SRC=X ALT=" ´    &amp;#180; --> &#180;    &amp;acute;  --> &acute;">
+small l, caron                      <IMG SRC=X ALT=" µ    &amp;#318; --> &#318;    &amp;lcaron; --> &lcaron;">
+small s, acute accent               <IMG SRC=X ALT=" ¶    &amp;#347; --> &#347;    &amp;sacute; --> &sacute;">
+caron                              <IMG SRC=X ALT=" {·}  {&amp;#711;}-->{&#711;}  {&amp;caron;} -->{&caron;}">
+cedilla                             <IMG SRC=X ALT=" ¸    &amp;#184; --> &#184;    &amp;cedil;  --> &cedil;">
+small s, caron                      <IMG SRC=X ALT=" ¹    &amp;#353; --> &#353;    &amp;scaron; --> &scaron;">
+small s, cedilla                    <IMG SRC=X ALT=" º    &amp;#351; --> &#351;    &amp;scedil; --> &scedil;">
+small t, caron                      <IMG SRC=X ALT=" »    &amp;#357; --> &#357;    &amp;tcaron; --> &tcaron;">
+small z, acute accent               <IMG SRC=X ALT=" ¼    &amp;#378; --> &#378;    &amp;zacute; --> &zacute;">
+double acute accent                <IMG SRC=X ALT=" {½}  {&amp;#733;}-->{&#733;}  {&amp;dblac;} -->{&dblac;}">
+small z, caron                      <IMG SRC=X ALT=" ¾    &amp;#382; --> &#382;    &amp;zcaron; --> &zcaron;">
+small z, dot above                  <IMG SRC=X ALT=" ¿    &amp;#380; --> &#380;    &amp;zdot;   --> &zdot;  ">
+capital R, acute accent             <IMG SRC=X ALT=" À    &amp;#340; --> &#340;    &amp;Racute; --> &Racute;">
+capital A, acute accent             <IMG SRC=X ALT=" Á    &amp;#193; --> &#193;    &amp;Aacute; --> &Aacute;">
+capital A, circumflex accent        <IMG SRC=X ALT=" Â    &amp;#194; --> &#194;    &amp;Acirc;  --> &Acirc;">
+capital A, breve                    <IMG SRC=X ALT=" Ã    &amp;#258; --> &#258;    &amp;Abreve; --> &Abreve;">
+capital A, dieresis or umlaut mark  <IMG SRC=X ALT=" Ä    &amp;#196; --> &#196;    &amp;Auml;   --> &Auml;">
+capital L, acute accent             <IMG SRC=X ALT=" Å    &amp;#313; --> &#313;    &amp;Lacute; --> &Lacute;">
+capital C, acute accent             <IMG SRC=X ALT=" Æ    &amp;#262; --> &#262;    &amp;Cacute; --> &Cacute;">
+capital C, cedilla                  <IMG SRC=X ALT=" Ç    &amp;#199; --> &#199;    &amp;Ccedil; --> &Ccedil;">
+capital C, caron                    <IMG SRC=X ALT=" È    &amp;#268; --> &#268;    &amp;Ccaron; --> &Ccaron;">
+capital E, acute accent             <IMG SRC=X ALT=" É    &amp;#201; --> &#201;    &amp;Eacute; --> &Eacute;">
+capital E, ogonek                   <IMG SRC=X ALT=" Ê    &amp;#280; --> &#280;    &amp;Eogon;  --> &Eogon;">
+capital E, dieresis or umlaut mark  <IMG SRC=X ALT=" Ë    &amp;#203; --> &#203;    &amp;Euml;   --> &Euml;">
+capital E, caron                    <IMG SRC=X ALT=" Ì    &amp;#282; --> &#282;    &amp;Ecaron; --> &Ecaron;">
+capital I, acute accent             <IMG SRC=X ALT=" Í    &amp;#205; --> &#205;    &amp;Iacute; --> &Iacute;">
+capital I, circumflex accent        <IMG SRC=X ALT=" Î    &amp;#206; --> &#206;    &amp;Icirc;  --> &Icirc;">
+capital D, caron                    <IMG SRC=X ALT=" Ï    &amp;#270; --> &#270;    &amp;Dcaron; --> &Dcaron;">
+capital D, stroke                   <IMG SRC=X ALT=" Ð    &amp;#272; --> &#272;    &amp;Dstrok; --> &Dstrok;">
+capital Eth, Icelandic              <IMG SRC=X ALT=" N/A  &amp;#208; --> &#208;    &amp;ETH;    --> &ETH;">
+capital N, acute accent             <IMG SRC=X ALT=" Ñ    &amp;#323; --> &#323;    &amp;Nacute; --> &Nacute;">
+capital N, caron                    <IMG SRC=X ALT=" Ò    &amp;#327; --> &#327;    &amp;Ncaron; --> &Ncaron;">
+capital O, acute accent             <IMG SRC=X ALT=" Ó    &amp;#211; --> &#211;    &amp;Oacute; --> &Oacute;">
+capital O, circumflex accent        <IMG SRC=X ALT=" Ô    &amp;#212; --> &#212;    &amp;Ocirc;  --> &Ocirc;">
+capital O, double acute accent      <IMG SRC=X ALT=" Õ    &amp;#368; --> &#368;    &amp;Odblac; --> &Odblac;">
+capital O, dieresis or umlaut mark  <IMG SRC=X ALT=" Ö    &amp;#214; --> &#214;    &amp;Ouml;   --> &Ouml;">
+multiply sign                       <IMG SRC=X ALT=" ×    &amp;#215; --> &#215;    &amp;times;  --> &times;">
+capital R, caron                    <IMG SRC=X ALT=" Ø    &amp;#344; --> &#344;    &amp;Rcaron; --> &Rcaron;">
+capital U, ring                     <IMG SRC=X ALT=" Ù    &amp;#366; --> &#366;    &amp;Uring;  --> &Uring;">
+capital U, acute accent             <IMG SRC=X ALT=" Ú    &amp;#218; --> &#218;    &amp;Uacute; --> &Uacute;">
+capital U, double acute accent      <IMG SRC=X ALT=" Û    &amp;#368; --> &#368;    &amp;Udblac; --> &Udblac;">
+capital U, dieresis or umlaut mark  <IMG SRC=X ALT=" Ü    &amp;#220; --> &#220;    &amp;Uuml;   --> &Uuml;">
+capital Y, acute accent             <IMG SRC=X ALT=" Ý    &amp;#221; --> &#221;    &amp;Yacute; --> &Yacute;">
+capital T, cedilla                  <IMG SRC=X ALT=" Þ    &amp;#354; --> &#354;    &amp;Tcedil; --> &Tcedil;">
+small sharp s, German (sz ligature) <IMG SRC=X ALT=" ß    &amp;#223; --> &#223;    &amp;szlig;  --> &szlig;">
+small r, acute accent               <IMG SRC=X ALT=" à    &amp;#341; --> &#341;    &amp;racute; --> &racute;">
+small a, acute accent               <IMG SRC=X ALT=" á    &amp;#225; --> &#225;    &amp;aacute; --> &aacute;">
+small a, circumflex accent          <IMG SRC=X ALT=" â    &amp;#226; --> &#226;    &amp;acirc;  --> &acirc;">
+small a, breve                      <IMG SRC=X ALT=" ã    &amp;#259; --> &#259;    &amp;abreve; --> &abreve;">
+small a, dieresis or umlaut mark    <IMG SRC=X ALT=" ä    &amp;#228; --> &#228;    &amp;auml;   --> &auml;">
+small l, acute accent               <IMG SRC=X ALT=" å    &amp;#314; --> &#314;    &amp;lacute; --> &lacute;">
+small c, acute accent               <IMG SRC=X ALT=" æ    &amp;#263; --> &#263;    &amp;cacute; --> &cacute;">
+small c, cedilla                    <IMG SRC=X ALT=" ç    &amp;#231; --> &#231;    &amp;ccedil; --> &ccedil;">
+small c, caron                      <IMG SRC=X ALT=" è    &amp;#269; --> &#269;    &amp;ccaron; --> &ccaron;">
+small e, acute accent               <IMG SRC=X ALT=" é    &amp;#233; --> &#233;    &amp;eacute; --> &eacute;">
+small e, ogonek                     <IMG SRC=X ALT=" ê    &amp;#281; --> &#281;    &amp;eogon;  --> &eogon;">
+small e, dieresis or umlaut mark    <IMG SRC=X ALT=" ë    &amp;#235; --> &#235;    &amp;euml;   --> &euml;">
+small e, caron                      <IMG SRC=X ALT=" ì    &amp;#283; --> &#283;    &amp;ecaron; --> &ecaron;">
+small i, acute accent               <IMG SRC=X ALT=" í    &amp;#237; --> &#237;    &amp;iacute; --> &iacute;">
+small i, circumflex accent          <IMG SRC=X ALT=" î    &amp;#238; --> &#238;    &amp;icirc;  --> &icirc;">
+small d, caron                      <IMG SRC=X ALT=" ï    &amp;#271; --> &#271;    &amp;dcaron; --> &dcaron;">
+small d, stroke                     <IMG SRC=X ALT=" ð    &amp;#273; --> &#273;    &amp;dstrok; --> &dstrok;">
+small eth, Icelandic                <IMG SRC=X ALT=" N/A  &amp;#240; --> &#240;    &amp;eth;    --> &eth;">
+small n, acute accent               <IMG SRC=X ALT=" ñ    &amp;#324; --> &#324;    &amp;nacute; --> &nacute;">
+small n, caron                      <IMG SRC=X ALT=" ò    &amp;#328; --> &#328;    &amp;ncaron; --> &ncaron;">
+small o, acute accent               <IMG SRC=X ALT=" ó    &amp;#243; --> &#243;    &amp;oacute; --> &oacute;">
+small o, circumflex accent          <IMG SRC=X ALT=" ô    &amp;#244; --> &#244;    &amp;ocirc;  --> &ocirc;">
+small o, double acute accent        <IMG SRC=X ALT=" õ    &amp;#369; --> &#369;    &amp;odblac; --> &odblac;">
+small o, dieresis or umlaut mark    <IMG SRC=X ALT=" ö    &amp;#246; --> &#246;    &amp;ouml;   --> &ouml;">
+division sign                       <IMG SRC=X ALT=" ÷    &amp;#247; --> &#247;    &amp;divide; --> &divide;">
+small r, caron                      <IMG SRC=X ALT=" ø    &amp;#345; --> &#345;    &amp;rcaron; --> &rcaron;">
+small u, ring                       <IMG SRC=X ALT=" ù    &amp;#367; --> &#367;    &amp;uring;  --> &uring;">
+small u, acute accent               <IMG SRC=X ALT=" ú    &amp;#250; --> &#250;    &amp;uacute; --> &uacute;">
+small u, double acute accent        <IMG SRC=X ALT=" û    &amp;#369; --> &#369;    &amp;udblac; --> &udblac;">
+small u, dieresis or umlaut mark    <IMG SRC=X ALT=" ü    &amp;#252; --> &#252;    &amp;uuml;   --> &uuml;">
+small y, acute accent               <IMG SRC=X ALT=" ý    &amp;#253; --> &#253;    &amp;yacute; --> &yacute;">
+small t, cedilla                    <IMG SRC=X ALT=" þ    &amp;#355; --> &#355;    &amp;tcedil; --> &tcedil;">
+dot above                          <IMG SRC=X ALT=" {ÿ}  {&amp;#729;}-->{&#729;}  {&amp;dot;}   -->{&dot;}">
+
+Some other characters of interest    Char Code            Entity name   
+===================================  ==== ============    ==============
+capital AE diphthong (ligature)     <IMG SRC=X ALT=" N/A  &amp;#198; --> &#198;    &amp;AElig;  --> &AElig;">
+small ae diphthong (ligature)       <IMG SRC=X ALT=" N/A  &amp;#230; --> &#230;    &amp;aelig;  --> &aelig;">
+capital OE ligature                 <IMG SRC=X ALT=" N/A {&amp;#338;}-->{&#338;}  {&amp;OElig;} -->{&OElig;}">
+small oe ligature                   <IMG SRC=X ALT=" N/A {&amp;#339;}-->{&#339;}  {&amp;oelig;} -->{&oelig;}">
+copyright                           <IMG SRC=X ALT=" N/A  &amp;#169; --> &#169;    &amp;copy;   --> &copy;">
+registered trademark                <IMG SRC=X ALT=" N/A  &amp;#174; --> &#174;    &amp;reg;    --> &reg;">
+trademark sign                      <IMG SRC=X ALT=" N/A  &amp;#8482;--> &#8482;   &amp;trade;  --> &trade;">
+em space                            <IMG SRC=X ALT=" N/A [&amp;#8195;]->[&#8195;] [&amp;emsp;]  -->[&emsp;]">
+en space                            <IMG SRC=X ALT=" N/A [&amp;#8194;]->[&#8194;] [&amp;ensp;]  -->[&ensp;]">
+1/3-em space                        <IMG SRC=X ALT=" N/A [&amp;#8196;]->[&#8196;] [&amp;emsp13;] -->[&emsp13;]">
+1/4-em space                        <IMG SRC=X ALT=" N/A [&amp;#8197;]->[&#8197;] [&amp;emsp14;] -->[&emsp14;]">
+thin space                          <IMG SRC=X ALT=" N/A [&amp;#8201;]->[&#8201;] [&amp;thinsp;]-->[&thinsp;]">
+hair space                          <IMG SRC=X ALT=" N/A [&amp;#8202;]->[&#8202;] [&amp;hairsp;]-->[&hairsp;]">
+em dash                             <IMG SRC=X ALT=" N/A [&amp;#8212;]->[&#8212;] [&amp;mdash;] -->[&mdash;]">
+en dash                             <IMG SRC=X ALT=" N/A [&amp;#8211;]->[&#8211;] [&amp;ndash;] -->[&ndash;]">
+
+</PRE><!-- </PRE> no HotJava preBeta hackx - kw -->
+<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 -->
+<HR>
+<P>
+Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column.
+Some characters for which I could not find entity names in either 
+<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A>
+or the 
+<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A> 
+sets (the ones included by Peter Flynn's
+<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>)
+are shown enclosed in <TT>{</TT>braces<TT>}</TT>.
+</P>
+<P>
+See Martin Ramsch's original
+<A CHARSET="iso-8859-1" HREF="http://www.uni-passau.de/~ramsch/iso8859-1.html">ISO-8859-1 Table</A>
+for related info and links, and for some notes on entity names.  
+This file is mostly just an adaptation of his table 
+to the ISO-8859-2 character set.
+
+<HR>
+
+<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS>
+
+</BODY>
+</HTML>
diff --git a/test/ISO_LATIN1_test.html b/test/ISO_LATIN1_test.html
new file mode 100644
index 00000000..d767978e
--- /dev/null
+++ b/test/ISO_LATIN1_test.html
@@ -0,0 +1,83 @@
+<!DOCTYPE html public "-//IETF//DTD HTML 3.0//EN">
+<html>
+<head>
+<title>Test of minimal ISO LATIN1 character set</title>
+<link rev="made" href="mailto:lynx-dev@nongnu.org">
+</head>
+
+<body>
+<h1>minimal ISO LATIN1 text entities</h1>
+<ul>
+  <li>"&AElig;",	/* capital AE diphthong (ligature) */ 
+  <li>"&Aacute;",	/* capital A, acute accent */ 
+  <li>"&Acirc;",	/* capital A, circumflex accent */ 
+  <li>"&Agrave;",	/* capital A, grave accent */ 
+  <li>"&Aring;",	/* capital A, ring */ 
+  <li>"&Atilde;",	/* capital A, tilde */ 
+  <li>"&Auml;", 	/* capital A, dieresis or umlaut mark */ 
+  <li>"&Ccedil;",	/* capital C, cedilla */ 
+  <li>"&ETH;", 		/* capital Eth, Icelandic */ 
+  <li>"&Eacute;",	/* capital E, acute accent */ 
+  <li>"&Ecirc;",	/* capital E, circumflex accent */ 
+  <li>"&Egrave;",	/* capital E, grave accent */ 
+  <li>"&Euml;",		/* capital E, dieresis or umlaut mark */ 
+  <li>"&Iacute;",	/* capital I, acute accent */ 
+  <li>"&Icirc;",	/* capital I, circumflex accent */ 
+  <li>"&Igrave;",	/* capital I, grave accent */ 
+  <li>"&Iuml;",		/* capital I, dieresis or umlaut mark */ 
+  <li>"&Ntilde;",	/* capital N, tilde */ 
+  <li>"&Oacute;",	/* capital O, acute accent */ 
+  <li>"&Ocirc;",	/* capital O, circumflex accent */ 
+  <li>"&Ograve;",	/* capital O, grave accent */ 
+  <li>"&Oslash;",	/* capital O, slash */ 
+  <li>"&Otilde;",	/* capital O, tilde */ 
+  <li>"&Ouml;",		/* capital O, dieresis or umlaut mark */ 
+  <li>"&THORN;",	/* capital THORN, Icelandic */ 
+  <li>"&Uacute;",	/* capital U, acute accent */ 
+  <li>"&Ucirc;",	/* capital U, circumflex accent */ 
+  <li>"&Ugrave;",	/* capital U, grave accent */ 
+  <li>"&Uuml;",		/* capital U, dieresis or umlaut mark */ 
+  <li>"&Yacute;",	/* capital Y, acute accent */ 
+  <li>"&aacute;",	/* small a, acute accent */ 
+  <li>"&acirc;",	/* small a, circumflex accent */ 
+  <li>"&aelig;",	/* small ae diphthong (ligature) */ 
+  <li>"&agrave;",	/* small a, grave accent */ 
+  <li>"&amp;",		/* ampersand */ 
+  <li>"&aring;",	/* small a, ring */ 
+  <li>"&atilde;",	/* small a, tilde */ 
+  <li>"&auml;",		/* small a, dieresis or umlaut mark */ 
+  <li>"&ccedil;",	/* small c, cedilla */ 
+  <li>"&eacute;",	/* small e, acute accent */ 
+  <li>"&ecirc;",	/* small e, circumflex accent */ 
+  <li>"&egrave;",	/* small e, grave accent */ 
+  <li>"&emsp;",		/* emsp, em space - not collapsed */
+  <li>"&ensp;",		/* ensp, en space - not collapsed */
+  <li>"&eth;",		/* small eth, Icelandic */ 
+  <li>"&euml;",		/* small e, dieresis or umlaut mark */ 
+  <li>"&gt;",		/* greater than */ 
+  <li>"&iacute;",	/* small i, acute accent */ 
+  <li>"&icirc;",	/* small i, circumflex accent */ 
+  <li>"&igrave;",	/* small i, grave accent */ 
+  <li>"&iuml;",		/* small i, dieresis or umlaut mark */ 
+  <li>"&lt;",		/* less than */ 
+  <li>"&nbsp;",		/* nbsp, non breaking space */
+  <li>"&ntilde;",	/* small n, tilde */ 
+  <li>"&oacute;",	/* small o, acute accent */ 
+  <li>"&ocirc;",	/* small o, circumflex accent */ 
+  <li>"&ograve;",	/* small o, grave accent */ 
+  <li>"&oslash;",	/* small o, slash */ 
+  <li>"&otilde;",	/* small o, tilde */ 
+  <li>"&ouml;",		/* small o, dieresis or umlaut mark */ 
+  <li>"&quot;",		/* quote, '"' */
+  <li>"&szlig;",	/* small sharp s, German (sz ligature) */ 
+  <li>"&thorn;",	/* small thorn, Icelandic */ 
+  <li>"&uacute;",	/* small u, acute accent */ 
+  <li>"&ucirc;",	/* small u, circumflex accent */ 
+  <li>"&ugrave;",	/* small u, grave accent */ 
+  <li>"&uuml;",		/* small u, dieresis or umlaut mark */ 
+  <li>"&yacute;",	/* small y, acute accent */ 
+  <li>"&yuml;",		/* small y, dieresis or umlaut mark */ 
+</ul>
+
+</body>
+</html>
diff --git a/test/README.txt b/test/README.txt
new file mode 100644
index 00000000..b681e3f3
--- /dev/null
+++ b/test/README.txt
@@ -0,0 +1,8 @@
+ISO_LATIN1_test.html and iso8859-1.html are for testing the translation of
+HTML entities with the character sets that are selectable via the 'o'ptions
+menu.
+
+TestComment.html and tabtest.html are for testing comment and TAB handling.
+
+Any other files in this directory do not represent a test suite.  They
+are used during program testing to track down odd and mysterious bugs.
diff --git a/test/TestComment.html b/test/TestComment.html
new file mode 100644
index 00000000..873169f2
--- /dev/null
+++ b/test/TestComment.html
@@ -0,0 +1,50 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Level 2//EN">
+<html>
+<head>
+<title>HTML Comment Parser Test</title>
+<link rev="made" href="mailto:pg@sweng.stortek.com">
+<base href="http://nyx10.cs.du.edu:8001/~pgilmart/TestComment.html">
+</head>
+
+<body>
+<P> Test of the HTML/SGML comment syntax, as given in the W3 HTML Spec:
+<a
+href="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_3.html#SEC15">
+Comments</a>
+
+<P>See especially, the footnote:
+<a
+href="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_foot.html#FOOT10"
+>(10)</a>
+
+<P>
+Co-vary the LYK_MINIMAL and LYK_HISTORICAL command key toggles (use the
+'k'eymap command to see their key bindings) to establish Valid, Minimal
+or Historical comment parsing, and toggle trace mode on (Ctrl-T), to see
+how comment parsing is affected.
+
+<P>Case 01 through Case 14 should appear
+as short separate paragraphs with the case numbers aligned vertically.
+Some noise characters may appear to the right as a byproduct of code present
+for error recovery, but there should be no noise before each case number.
+
+<P> Case <!-- trivial --> | 01 | Trivial
+<P> Case <!-- extra hyphens and spaces -- -- --  > | 02 | Hyphens and Spaces
+<P> Case <!-- extra < < < --> | 03 | Extra LT --> --> -->
+<P> Case <!-- balanced < < < > > > --> | 04 | Balanced
+<P> Case <!-- extra > -- --> > still in comment --> | 05 | Extra GT
+<P> Case <!-- stuff between -- and > -- still in comment --> | 06 | Stuff Inside
+<P> Case <!-- Extra <!-- -- Second Comment --> | 07 | Extra Open --> -->
+<P> Case <!-- New Line between -- 
+           -- Second Comment   --
+                           > | 08 | New line
+<P> Case <!---> degenerate --> | 09 | Degenerate <P> Case <!----> | 10 | Empty
+<P> Case <!-- perverse <!--> | 11 | Perverse --> --> -->
+<P> Case <!-- Comment -- -- and a half > this is still in comment -- > | 12 |  Multiple Comments --> --> -->
+<P> Case <!> | 13 | Zero Comments
+<P> Case <!-- < >
+< > Still in comment --> | 14 | Last
+
+<P>&lt;<STRONG>Tests completed!</STRONG>&gt;
+</body>
+</html>
diff --git a/test/bad-html.html b/test/bad-html.html
new file mode 100644
index 00000000..8c0b9dc1
--- /dev/null
+++ b/test/bad-html.html
@@ -0,0 +1,46 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE>Examples of "Bad HTML" per Lynx</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+</HEAD>
+
+<BODY> 
+<h2>Unterminated TEXTAREA</h2>
+<form action="http://localhost/cgi-bin/bogus-parms" method="get">
+<textarea name="50cols" cols="50" rows=3>
+This is not empty.
+</textarea>
+<br>
+<textarea name="50percent" cols="50%" rows=3>
+This seems to have a button.
+<button>Button 1</button>
+</textarea>
+<hr>
+<input type="submit" value="Submit this form">
+<br>
+<input type="reset" value="Reset this form">
+</form>
+
+<h2>Unterminated SELECT</h2>
+<select>
+<option>first option</option>
+<option>second option</option>
+<option>third option</option>
+</notselect>
+<br>
+<select>
+<option>first option</option>
+<option>second option</option>
+<option>third option</option>
+</select>
+
+<h2>OPTION not within SELECT</h2>
+<option>third option</option>
+
+<h2>TEXTAREA ending without starting</h2>
+</textarea>
+
+</BODY> 
diff --git a/test/c1.html b/test/c1.html
new file mode 100644
index 00000000..6ec70aa5
--- /dev/null
+++ b/test/c1.html
@@ -0,0 +1,63 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML> 
+<HEAD> 
+<TITLE>Test of invalid NCRs 128-159</TITLE> 
+</HEAD> 
+<BODY><H2>Test of invalid NCRs 128-159</H2> 
+<P> 
+Authoring tools on MS Windows, in particular MS FrontPage ("WYSIWYG" HTML editor), 
+generate invalid <DFN>Numerical Character References</DFN> for characters 
+commonly found in positions 128...159 (0x80...0x9f) in Windows fonts.  Although 
+these are valid codepoints for <em>windows-1252</em> (and other 
+windows-xxxx) charsets, valid NCRs always refer to the document character set 
+in the SGML sense, not to the character encoding scheme (or charset).  For HTML, 
+the SGML document character set is fixed, it is always a subset of Unicode 
+(or ISO 10646).  In Unicode and its iso-8859-1 subset, values 128...159 are 
+C1 control characters, they must not appear in HTML.  Valid NCRs for the 
+intended characters use Unicode values greater than 256. 
+<p> 
+Lynx tries to interpret some of the invalid codes, by assuming that they are 
+windows-1252 codepoints. 
+<PRE> 
+ 
+You may want to press '\' to view the source of this test. 
+ 
+<em>Code      invalid NCR    <!--    --> <tab id=c>valid NCR, description</em> 
+<em>        normal   in ALT  <a id=table></a>				</em> 
+                             
+0x80    &#x80;	<IMG SRC=X ALT="&#x80;"> <tab to=c>&#x20AC;	#EURO SIGN 
+0x81    &#x81;	<IMG SRC=X ALT="&#x81;"> <!--&#x0081;-->	#NOT USED 
+0x82    &#x82;	<IMG SRC=X ALT="&#x82;"> <tab to=c>&#x201a;	#SINGLE LOW-9 QUOTATION MARK 
+0x83    &#x83;	<IMG SRC=X ALT="&#x83;"> <tab to=c>&#x0192;	#LATIN SMALL LETTER F WITH HOOK 
+0x84    &#x84;	<IMG SRC=X ALT="&#x84;"> <tab to=c>&#x201e;	#DOUBLE LOW-9 QUOTATION MARK 
+0x85    &#x85;	<IMG SRC=X ALT="&#x85;"> <tab to=c>&#x2026;	#HORIZONTAL ELLIPSIS 
+0x86    &#x86;	<IMG SRC=X ALT="&#x86;"> <tab to=c>&#x2020;	#DAGGER 
+0x87    &#x87;	<IMG SRC=X ALT="&#x87;"> <tab to=c>&#x2021;	#DOUBLE DAGGER 
+0x88    &#x88;	<IMG SRC=X ALT="&#x88;"> <tab to=c>&#x02c6;	#MODIFIER LETTER CIRCUMFLEX ACCENT 
+0x89    &#x89;	<IMG SRC=X ALT="&#x89;"> <tab to=c>&#x2030;	#PER MILLE SIGN 
+0x8a    &#x8a;	<IMG SRC=X ALT="&#x8a;"> <tab to=c>&#x0160;	#LATIN CAPITAL LETTER S WITH CARON 
+0x8b    &#x8b;	<IMG SRC=X ALT="&#x8b;"> <tab to=c>&#x2039;	#SINGLE LEFT-POINTING ANGLE QUOTATION MARK 
+0x8c    &#x8c;	<IMG SRC=X ALT="&#x8c;"> <tab to=c>&#x0152;	#LATIN CAPITAL LIGATURE OE 
+0x8d    &#x8d;	<IMG SRC=X ALT="&#x8d;"> <!--&#x008d;-->	#NOT USED 
+0x8e    &#x8e;	<IMG SRC=X ALT="&#x8e;"> <!--&#x008e;-->	#NOT USED 
+0x8f    &#x8f;	<IMG SRC=X ALT="&#x8f;"> <!--&#x008f;-->	#NOT USED 
+0x90    &#x90;	<IMG SRC=X ALT="&#x90;"> <!--&#x0090;-->	#NOT USED 
+0x91    &#x91;	<IMG SRC=X ALT="&#x91;"> <tab to=c>&#x2018;	#LEFT SINGLE QUOTATION MARK 
+0x92    &#x92;	<IMG SRC=X ALT="&#x92;"> <tab to=c>&#x2019;	#RIGHT SINGLE QUOTATION MARK 
+0x93    &#x93;	<IMG SRC=X ALT="&#x93;"> <tab to=c>&#x201c;	#LEFT DOUBLE QUOTATION MARK 
+0x94    &#x94;	<IMG SRC=X ALT="&#x94;"> <tab to=c>&#x201d;	#RIGHT DOUBLE QUOTATION MARK 
+0x95    &#x95;	<IMG SRC=X ALT="&#x95;"> <tab to=c>&#x2022;	#BULLET 
+0x96    &#x96;	<IMG SRC=X ALT="&#x96;"> <tab to=c>&#x2013;	#EN DASH 
+0x97    &#x97;	<IMG SRC=X ALT="&#x97;"> <tab to=c>&#x2014;	#EM DASH 
+0x98    &#x98;	<IMG SRC=X ALT="&#x98;"> <tab to=c>&#x02dc;	#SMALL TILDE 
+0x99    &#x99;	<IMG SRC=X ALT="&#x99;"> <tab to=c>&#x2122;	#TRADE MARK SIGN 
+0x9a    &#x9a;	<IMG SRC=X ALT="&#x9a;"> <tab to=c>&#x0161;	#LATIN SMALL LETTER S WITH CARON 
+0x9b    &#x9b;	<IMG SRC=X ALT="&#x9b;"> <tab to=c>&#x203a;	#SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 
+0x9c    &#x9c;	<IMG SRC=X ALT="&#x9c;"> <tab to=c>&#x0153;	#LATIN SMALL LIGATURE OE 
+0x9d    &#x9d;	<IMG SRC=X ALT="&#x9d;"> <!--&#x009d;-->	#NOT USED 
+0x9e    &#x9e;	<IMG SRC=X ALT="&#x9e;"> <!--&#x009e;-->	#NOT USED 
+0x9f    &#x9f;	<IMG SRC=X ALT="&#x9f;"> <tab to=c>&#x0178;	#LATIN CAPITAL LETTER Y WITH DIAERESIS 
+ 
+</PRE> 
+</BODY> 
+</HTML> 
diff --git a/test/cp-1252.html b/test/cp-1252.html
new file mode 100644
index 00000000..d46f341a
--- /dev/null
+++ b/test/cp-1252.html
@@ -0,0 +1,178 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE>Character table for cp-1252</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=cp-1252">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+<LINK REL="sibling" HREF="iso8859-1.html" TITLE="iso-8859-1 test">
+<LINK REL="sibling" HREF="ALT88592.html"  TITLE="iso-8859-2 ALT test">
+</HEAD>
+
+<BODY> 
+
+<H1 ALIGN=center>cp-1252 table</H1> 
+
+<PRE>
+Description                                 Code            Entity name   
+===================================         ============    ==============
+quotation mark                              &amp;#34;  --> &#34;     &amp;quot;   --> &quot;
+ampersand                                   &amp;#38;  --> &#38;     &amp;amp;    --> &amp;
+less-than sign                              &amp;#60;  --> &#60;     &amp;lt;     --> &lt;
+greater-than sign                           &amp;#62;  --> &#62;     &amp;gt;     --> &gt;
+
+Description                            Char Code            Entity name   
+===================================    ==== ============    ==============
+euro sign                              €    &amp;128; --> &#128;
+single low-9 quotation mark            ‚    &amp;130; --> &#130;
+latin small letter f with hook         ƒ    &amp;131; --> &#131;
+double low-9 quotation mark            „    &amp;132; --> &#132;
+horizontal ellipsis                    …    &amp;133; --> &#133;
+dagger                                 †    &amp;134; --> &#134;
+double dagger                          ‡    &amp;135; --> &#135;
+modifier letter circumflex accent      ˆ    &amp;136; --> &#136;
+per mille sign                         ‰    &amp;137; --> &#137;
+latin capital letter s with caron      Š    &amp;138; --> &#138;
+single left-pointing angle quote mark  ‹    &amp;139; --> &#139;
+latin capital ligature oe              Œ    &amp;140; --> &#140;
+latin capital letter z with caron      Ž    &amp;142; --> &#142;
+
+left single quotation mark             ‘    &amp;145; --> &#145;
+right single quotation mark            ’    &amp;146; --> &#146;
+left double quotation mark             “    &amp;147; --> &#147;
+right double quotation mark            ”    &amp;148; --> &#148;
+bullet                                 •    &amp;149; --> &#149;
+en dash                                –    &amp;150; --> &#150;
+em dash                                —    &amp;151; --> &#151;
+small tilde                            ˜    &amp;152; --> &#152;
+trade mark sign                        ™    &amp;153; --> &#153;
+latin small letter s with caron        š    &amp;154; --> &#154;
+single right-pointing angle quote mark ›    &amp;155; --> &#155;
+latin small ligature oe                œ    &amp;156; --> &#156;
+latin small letter z with caron        ž    &amp;158; --> &#158;
+latin capital letter y with diaeresis  Ÿ    &amp;159; --> &#159;
+
+non-breaking space                          &amp;#160; --> &#160;    &amp;nbsp;   --> &nbsp;
+inverted exclamation                   ¡    &amp;#161; --> &#161;    &amp;iexcl;  --> &iexcl;
+cent sign                              ¢    &amp;#162; --> &#162;    &amp;cent;   --> &cent;
+pound sterling                         £    &amp;#163; --> &#163;    &amp;pound;  --> &pound;
+general currency sign                  ¤    &amp;#164; --> &#164;    &amp;curren; --> &curren;
+yen sign                               ¥    &amp;#165; --> &#165;    &amp;yen;    --> &yen;
+broken vertical bar                    ¦    &amp;#166; --> &#166;    &amp;brvbar; --> &brvbar;
+section sign                           §    &amp;#167; --> &#167;    &amp;sect;   --> &sect;
+umlaut (dieresis)                      ¨    &amp;#168; --> &#168;    &amp;uml;    --> &uml;
+copyright                              ©    &amp;#169; --> &#169;    &amp;copy;   --> &copy;
+feminine ordinal                       ª    &amp;#170; --> &#170;    &amp;ordf;   --> &ordf;
+left angle quote, guillemotleft        «    &amp;#171; --> &#171;    &amp;laquo;  --> &laquo;
+not sign                               ¬    &amp;#172; --> &#172;    &amp;not;    --> &not;
+soft hyphen                            ­    &amp;#173; --> &#173;    &amp;shy;    --> &shy;
+registered trademark                   ®    &amp;#174; --> &#174;    &amp;reg;    --> &reg;
+macron accent                          ¯    &amp;#175; --> &#175;    &amp;macr;   --> &macr;
+
+degree sign                            °    &amp;#176; --> &#176;    &amp;deg;    --> &deg;
+plus or minus                          ±    &amp;#177; --> &#177;    &amp;plusmn; --> &plusmn;
+superscript two                        ²    &amp;#178; --> &#178;    &amp;sup2;   --> &sup2;
+superscript three                      ³    &amp;#179; --> &#179;    &amp;sup3;   --> &sup3;
+acute accent                           ´    &amp;#180; --> &#180;    &amp;acute;  --> &acute;
+micro sign                             µ    &amp;#181; --> &#181;    &amp;micro;  --> &micro;
+paragraph sign                         ¶    &amp;#182; --> &#182;    &amp;para;   --> &para;
+middle dot                             ·    &amp;#183; --> &#183;    &amp;middot; --> &middot;
+cedilla                                ¸    &amp;#184; --> &#184;    &amp;cedil;  --> &cedil;
+superscript one                        ¹    &amp;#185; --> &#185;    &amp;sup1;   --> &sup1;
+masculine ordinal                      º    &amp;#186; --> &#186;    &amp;ordm;   --> &ordm;
+right angle quote, guillemotright      »    &amp;#187; --> &#187;    &amp;raquo;  --> &raquo;
+vulgar fraction one-quarter            ¼    &amp;#188; --> &#188;    &amp;frac14; --> &frac14;
+vulgar fraction one-half               ½    &amp;#189; --> &#189;    &amp;frac12; --> &frac12;
+vulgar fraction three-fourths          ¾    &amp;#190; --> &#190;    &amp;frac34; --> &frac34;
+inverted question mark                 ¿    &amp;#191; --> &#191;    &amp;iquest; --> &iquest;
+
+latin capital letter a with grave      À    &amp;#192; --> &#192;    &amp;Agrave; --> &Agrave;
+latin capital letter a with acute      Á    &amp;#193; --> &#193;    &amp;Aacute; --> &Aacute;
+latin capital letter a with circumflex     &amp;#194; --> &#194;    &amp;Acirc;  --> &Acirc;
+latin capital letter a with tilde      Ã    &amp;#195; --> &#195;    &amp;Atilde; --> &Atilde;
+latin capital letter a with diaeresis  Ä    &amp;#196; --> &#196;    &amp;Auml;   --> &Auml;
+latin capital letter a with ring above Å    &amp;#197; --> &#197;    &amp;Aring;  --> &Aring;
+latin capital letter ae                Æ    &amp;#198; --> &#198;    &amp;AElig;  --> &AElig;
+latin capital letter c with cedilla    Ç    &amp;#199; --> &#199;    &amp;Ccedil; --> &Ccedil;
+latin capital letter e with grave      È    &amp;#200; --> &#200;    &amp;Egrave; --> &Egrave;
+latin capital letter e with acute      É    &amp;#201; --> &#201;    &amp;Eacute; --> &Eacute;
+latin capital letter e with circumflex Ê    &amp;#202; --> &#202;    &amp;Ecirc;  --> &Ecirc;
+latin capital letter e with diaeresis  Ë    &amp;#203; --> &#203;    &amp;Euml;   --> &Euml;
+latin capital letter i with grave      Ì    &amp;#204; --> &#204;    &amp;Igrave; --> &Igrave;
+latin capital letter i with acute      Í    &amp;#205; --> &#205;    &amp;Iacute; --> &Iacute;
+latin capital letter i with circumflex Î    &amp;#206; --> &#206;    &amp;Icirc;  --> &Icirc;
+latin capital letter i with diaeresis  Ï    &amp;#207; --> &#207;    &amp;Iuml;   --> &Iuml;
+
+latin capital letter eth               Ð    &amp;#208; --> &#208;    &amp;ETH;    --> &ETH;
+latin capital letter n with tilde      Ñ    &amp;#209; --> &#209;    &amp;Ntilde; --> &Ntilde;
+latin capital letter o with grave      Ò    &amp;#210; --> &#210;    &amp;Ograve; --> &Ograve;
+latin capital letter o with acute      Ó    &amp;#211; --> &#211;    &amp;Oacute; --> &Oacute;
+latin capital letter o with circumflex Ô    &amp;#212; --> &#212;    &amp;Ocirc;  --> &Ocirc;
+latin capital letter o with tilde      Õ    &amp;#213; --> &#213;    &amp;Otilde; --> &Otilde;
+latin capital letter o with diaeresis  Ö    &amp;#214; --> &#214;    &amp;Ouml;   --> &Ouml;
+multiplication sign                    ×    &amp;#215; --> &#215;    &amp;times;  --> &times;
+latin capital letter o with stroke     Ø    &amp;#216; --> &#216;    &amp;Oslash; --> &Oslash;
+latin capital letter u with grave      Ù    &amp;#217; --> &#217;    &amp;Ugrave; --> &Ugrave;
+latin capital letter u with acute      Ú    &amp;#218; --> &#218;    &amp;Uacute; --> &Uacute;
+latin capital letter u with circumflex Û    &amp;#219; --> &#219;    &amp;Ucirc;  --> &Ucirc;
+latin capital letter u with diaeresis  Ü    &amp;#220; --> &#220;    &amp;Uuml;   --> &Uuml;
+latin capital letter y with acute      Ý    &amp;#221; --> &#221;    &amp;Yacute; --> &Yacute;
+latin capital letter thorn             Þ    &amp;#222; --> &#222;    &amp;THORN;  --> &THORN;
+latin small letter sharp s             ß    &amp;#223; --> &#223;    &amp;szlig;  --> &szlig;
+
+latin small letter a with grave        à    &amp;#224; --> &#224;    &amp;agrave; --> &agrave;
+latin small letter a with acute        á    &amp;#225; --> &#225;    &amp;aacute; --> &aacute;
+latin small letter a with circumflex   â    &amp;#226; --> &#226;    &amp;acirc;  --> &acirc;
+latin small letter a with tilde        ã    &amp;#227; --> &#227;    &amp;atilde; --> &atilde;
+latin small letter a with diaeresis    ä    &amp;#228; --> &#228;    &amp;auml;   --> &auml;
+latin small letter a with ring above   å    &amp;#229; --> &#229;    &amp;aring;  --> &aring;
+latin small letter ae                  æ    &amp;#230; --> &#230;    &amp;aelig;  --> &aelig;
+latin small letter c with cedilla      ç    &amp;#231; --> &#231;    &amp;ccedil; --> &ccedil;
+latin small letter e with grave        è    &amp;#232; --> &#232;    &amp;egrave; --> &egrave;
+latin small letter e with acute        é    &amp;#233; --> &#233;    &amp;eacute; --> &eacute;
+latin small letter e with circumflex   ê    &amp;#234; --> &#234;    &amp;ecirc;  --> &ecirc;
+latin small letter e with diaeresis    ë    &amp;#235; --> &#235;    &amp;euml;   --> &euml;
+latin small letter i with grave        ì    &amp;#236; --> &#236;    &amp;igrave; --> &igrave;
+latin small letter i with acute        í    &amp;#237; --> &#237;    &amp;iacute; --> &iacute;
+latin small letter i with circumflex   î    &amp;#238; --> &#238;    &amp;icirc;  --> &icirc;
+latin small letter i with diaeresis    ï    &amp;#239; --> &#239;    &amp;iuml;   --> &iuml;
+
+latin small letter eth                 ð    &amp;#240; --> &#240;    &amp;eth;    --> &eth;
+latin small letter n with tilde        ñ    &amp;#241; --> &#241;    &amp;ntilde; --> &ntilde;
+latin small letter o with grave        ò    &amp;#242; --> &#242;    &amp;ograve; --> &ograve;
+latin small letter o with acute        ó    &amp;#243; --> &#243;    &amp;oacute; --> &oacute;
+latin small letter o with circumflex   ô    &amp;#244; --> &#244;    &amp;ocirc;  --> &ocirc;
+latin small letter o with tilde        õ    &amp;#245; --> &#245;    &amp;otilde; --> &otilde;
+latin small letter o with diaeresis    ö    &amp;#246; --> &#246;    &amp;ouml;   --> &ouml;
+division sign                          ÷    &amp;#247; --> &#247;    &amp;divide; --> &divide;
+latin small letter o with stroke       ø    &amp;#248; --> &#248;    &amp;oslash; --> &oslash;
+latin small letter u with grave        ù    &amp;#249; --> &#249;    &amp;ugrave; --> &ugrave;
+latin small letter u with acute        ú    &amp;#250; --> &#250;    &amp;uacute; --> &uacute;
+latin small letter u with circumflex   û    &amp;#251; --> &#251;    &amp;ucirc;  --> &ucirc;
+latin small letter u with diaeresis    ü    &amp;#252; --> &#252;    &amp;uuml;   --> &uuml;
+latin small letter y with acute        ý    &amp;#253; --> &#253;    &amp;yacute; --> &yacute;
+latin small letter thorn               þ    &amp;#254; --> &#254;    &amp;thorn;  --> &thorn;
+latin small letter y with diaeresis   {ÿ}  {&amp;#255;}-->{&#255;}  {&amp;yuml;}  -->{&yuml;}
+
+Some other characters of interest      Char Code            Entity name   
+===================================    ==== ============    ==============
+capital AE diphthong (ligature)        N/A  &amp;#198; --> &#198;    &amp;AElig;  --> &AElig;
+small ae diphthong (ligature)          N/A  &amp;#230; --> &#230;    &amp;aelig;  --> &aelig;
+capital OE ligature                    N/A {&amp;#338;}-->{&#338;}  {&amp;OElig;} -->{&OElig;}
+small oe ligature                      N/A {&amp;#339;}-->{&#339;}  {&amp;oelig;} -->{&oelig;}
+copyright                              N/A  &amp;#169; --> &#169;    &amp;copy;   --> &copy;
+registered trademark                   N/A  &amp;#174; --> &#174;    &amp;reg;    --> &reg;
+trademark sign                         N/A  &amp;#8482;--> &#8482;   &amp;trade;  --> &trade;
+em space                               N/A [&amp;#8195;]->[&#8195;] [&amp;emsp;]  -->[&emsp;]
+en space                               N/A [&amp;#8194;]->[&#8194;] [&amp;ensp;]  -->[&ensp;]
+1/3-em space                           N/A [&amp;#8196;]->[&#8196;] [&amp;emsp13;] -->[&emsp13;]
+1/4-em space                           N/A [&amp;#8197;]->[&#8197;] [&amp;emsp14;] -->[&emsp14;]
+thin space                             N/A [&amp;#8201;]->[&#8201;] [&amp;thinsp;]-->[&thinsp;]
+hair space                             N/A [&amp;#8202;]->[&#8202;] [&amp;hairsp;]-->[&hairsp;]
+em dash                                N/A [&amp;#8212;]->[&#8212;] [&amp;mdash;] -->[&mdash;]
+en dash                                N/A [&amp;#8211;]->[&#8211;] [&amp;ndash;] -->[&ndash;]
+
+</PRE>
+
+</BODY>
+</HTML>
diff --git a/test/cp-1252a.html b/test/cp-1252a.html
new file mode 100644
index 00000000..3a532218
--- /dev/null
+++ b/test/cp-1252a.html
@@ -0,0 +1,183 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE>Character table for cp-1252</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=cp-1252">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+<LINK REL="sibling" HREF="iso8859-1.html" TITLE="iso-8859-1 test">
+<LINK REL="sibling" HREF="ALT88592.html"  TITLE="iso-8859-2 ALT test">
+</HEAD>
+
+<BODY> 
+
+<H1 ALIGN=center>cp-1252 table</H1> 
+
+<PRE>
+Description                                 Code            Entity name   
+===================================         ============    ==============
+quotation mark                              &amp;#34;  --> &#34;     &amp;quot;   --> &quot;
+ampersand                                   &amp;#38;  --> &#38;     &amp;amp;    --> &amp;
+less-than sign                              &amp;#60;  --> &#60;     &amp;lt;     --> &lt;
+greater-than sign                           &amp;#62;  --> &#62;     &amp;gt;     --> &gt;
+
+Description                            Char Code            Entity name   
+===================================    ==== ============    ==============
+euro sign                              €    &amp;128; --> &#128;
+undefined                                  &amp;129; --> &#129;
+single low-9 quotation mark            ‚    &amp;130; --> &#130;
+latin small letter f with hook         ƒ    &amp;131; --> &#131;
+double low-9 quotation mark            „    &amp;132; --> &#132;
+horizontal ellipsis                    …    &amp;133; --> &#133;
+dagger                                 †    &amp;134; --> &#134;
+double dagger                          ‡    &amp;135; --> &#135;
+modifier letter circumflex accent      ˆ    &amp;136; --> &#136;
+per mille sign                         ‰    &amp;137; --> &#137;
+latin capital letter s with caron      Š    &amp;138; --> &#138;
+single left-pointing angle quote mark  ‹    &amp;139; --> &#139;
+latin capital ligature oe              Œ    &amp;140; --> &#140;
+undefined                                  &amp;141; --> &#141;
+latin capital letter z with caron      Ž    &amp;142; --> &#142;
+undefined                                  &amp;143; --> &#143;
+
+undefined                                  &amp;144; --> &#144;
+left single quotation mark             ‘    &amp;145; --> &#145;
+right single quotation mark            ’    &amp;146; --> &#146;
+left double quotation mark             “    &amp;147; --> &#147;
+right double quotation mark            ”    &amp;148; --> &#148;
+bullet                                 •    &amp;149; --> &#149;
+en dash                                –    &amp;150; --> &#150;
+em dash                                —    &amp;151; --> &#151;
+small tilde                            ˜    &amp;152; --> &#152;
+trade mark sign                        ™    &amp;153; --> &#153;
+latin small letter s with caron        š    &amp;154; --> &#154;
+single right-pointing angle quote mark ›    &amp;155; --> &#155;
+latin small ligature oe                œ    &amp;156; --> &#156;
+undefined                                  &amp;157; --> &#157;
+latin small letter z with caron        ž    &amp;158; --> &#158;
+latin capital letter y with diaeresis  Ÿ    &amp;159; --> &#159;
+
+non-breaking space                          &amp;#160; --> &#160;    &amp;nbsp;   --> &nbsp;
+inverted exclamation                   ¡    &amp;#161; --> &#161;    &amp;iexcl;  --> &iexcl;
+cent sign                              ¢    &amp;#162; --> &#162;    &amp;cent;   --> &cent;
+pound sterling                         £    &amp;#163; --> &#163;    &amp;pound;  --> &pound;
+general currency sign                  ¤    &amp;#164; --> &#164;    &amp;curren; --> &curren;
+yen sign                               ¥    &amp;#165; --> &#165;    &amp;yen;    --> &yen;
+broken vertical bar                    ¦    &amp;#166; --> &#166;    &amp;brvbar; --> &brvbar;
+section sign                           §    &amp;#167; --> &#167;    &amp;sect;   --> &sect;
+umlaut (dieresis)                      ¨    &amp;#168; --> &#168;    &amp;uml;    --> &uml;
+copyright                              ©    &amp;#169; --> &#169;    &amp;copy;   --> &copy;
+feminine ordinal                       ª    &amp;#170; --> &#170;    &amp;ordf;   --> &ordf;
+left angle quote, guillemotleft        «    &amp;#171; --> &#171;    &amp;laquo;  --> &laquo;
+not sign                               ¬    &amp;#172; --> &#172;    &amp;not;    --> &not;
+soft hyphen                            ­    &amp;#173; --> &#173;    &amp;shy;    --> &shy;
+registered trademark                   ®    &amp;#174; --> &#174;    &amp;reg;    --> &reg;
+macron accent                          ¯    &amp;#175; --> &#175;    &amp;macr;   --> &macr;
+
+degree sign                            °    &amp;#176; --> &#176;    &amp;deg;    --> &deg;
+plus or minus                          ±    &amp;#177; --> &#177;    &amp;plusmn; --> &plusmn;
+superscript two                        ²    &amp;#178; --> &#178;    &amp;sup2;   --> &sup2;
+superscript three                      ³    &amp;#179; --> &#179;    &amp;sup3;   --> &sup3;
+acute accent                           ´    &amp;#180; --> &#180;    &amp;acute;  --> &acute;
+micro sign                             µ    &amp;#181; --> &#181;    &amp;micro;  --> &micro;
+paragraph sign                         ¶    &amp;#182; --> &#182;    &amp;para;   --> &para;
+middle dot                             ·    &amp;#183; --> &#183;    &amp;middot; --> &middot;
+cedilla                                ¸    &amp;#184; --> &#184;    &amp;cedil;  --> &cedil;
+superscript one                        ¹    &amp;#185; --> &#185;    &amp;sup1;   --> &sup1;
+masculine ordinal                      º    &amp;#186; --> &#186;    &amp;ordm;   --> &ordm;
+right angle quote, guillemotright      »    &amp;#187; --> &#187;    &amp;raquo;  --> &raquo;
+vulgar fraction one-quarter            ¼    &amp;#188; --> &#188;    &amp;frac14; --> &frac14;
+vulgar fraction one-half               ½    &amp;#189; --> &#189;    &amp;frac12; --> &frac12;
+vulgar fraction three-fourths          ¾    &amp;#190; --> &#190;    &amp;frac34; --> &frac34;
+inverted question mark                 ¿    &amp;#191; --> &#191;    &amp;iquest; --> &iquest;
+
+latin capital letter a with grave      À    &amp;#192; --> &#192;    &amp;Agrave; --> &Agrave;
+latin capital letter a with acute      Á    &amp;#193; --> &#193;    &amp;Aacute; --> &Aacute;
+latin capital letter a with circumflex     &amp;#194; --> &#194;    &amp;Acirc;  --> &Acirc;
+latin capital letter a with tilde      Ã    &amp;#195; --> &#195;    &amp;Atilde; --> &Atilde;
+latin capital letter a with diaeresis  Ä    &amp;#196; --> &#196;    &amp;Auml;   --> &Auml;
+latin capital letter a with ring above Å    &amp;#197; --> &#197;    &amp;Aring;  --> &Aring;
+latin capital letter ae                Æ    &amp;#198; --> &#198;    &amp;AElig;  --> &AElig;
+latin capital letter c with cedilla    Ç    &amp;#199; --> &#199;    &amp;Ccedil; --> &Ccedil;
+latin capital letter e with grave      È    &amp;#200; --> &#200;    &amp;Egrave; --> &Egrave;
+latin capital letter e with acute      É    &amp;#201; --> &#201;    &amp;Eacute; --> &Eacute;
+latin capital letter e with circumflex Ê    &amp;#202; --> &#202;    &amp;Ecirc;  --> &Ecirc;
+latin capital letter e with diaeresis  Ë    &amp;#203; --> &#203;    &amp;Euml;   --> &Euml;
+latin capital letter i with grave      Ì    &amp;#204; --> &#204;    &amp;Igrave; --> &Igrave;
+latin capital letter i with acute      Í    &amp;#205; --> &#205;    &amp;Iacute; --> &Iacute;
+latin capital letter i with circumflex Î    &amp;#206; --> &#206;    &amp;Icirc;  --> &Icirc;
+latin capital letter i with diaeresis  Ï    &amp;#207; --> &#207;    &amp;Iuml;   --> &Iuml;
+
+latin capital letter eth               Ð    &amp;#208; --> &#208;    &amp;ETH;    --> &ETH;
+latin capital letter n with tilde      Ñ    &amp;#209; --> &#209;    &amp;Ntilde; --> &Ntilde;
+latin capital letter o with grave      Ò    &amp;#210; --> &#210;    &amp;Ograve; --> &Ograve;
+latin capital letter o with acute      Ó    &amp;#211; --> &#211;    &amp;Oacute; --> &Oacute;
+latin capital letter o with circumflex Ô    &amp;#212; --> &#212;    &amp;Ocirc;  --> &Ocirc;
+latin capital letter o with tilde      Õ    &amp;#213; --> &#213;    &amp;Otilde; --> &Otilde;
+latin capital letter o with diaeresis  Ö    &amp;#214; --> &#214;    &amp;Ouml;   --> &Ouml;
+multiplication sign                    ×    &amp;#215; --> &#215;    &amp;times;  --> &times;
+latin capital letter o with stroke     Ø    &amp;#216; --> &#216;    &amp;Oslash; --> &Oslash;
+latin capital letter u with grave      Ù    &amp;#217; --> &#217;    &amp;Ugrave; --> &Ugrave;
+latin capital letter u with acute      Ú    &amp;#218; --> &#218;    &amp;Uacute; --> &Uacute;
+latin capital letter u with circumflex Û    &amp;#219; --> &#219;    &amp;Ucirc;  --> &Ucirc;
+latin capital letter u with diaeresis  Ü    &amp;#220; --> &#220;    &amp;Uuml;   --> &Uuml;
+latin capital letter y with acute      Ý    &amp;#221; --> &#221;    &amp;Yacute; --> &Yacute;
+latin capital letter thorn             Þ    &amp;#222; --> &#222;    &amp;THORN;  --> &THORN;
+latin small letter sharp s             ß    &amp;#223; --> &#223;    &amp;szlig;  --> &szlig;
+
+latin small letter a with grave        à    &amp;#224; --> &#224;    &amp;agrave; --> &agrave;
+latin small letter a with acute        á    &amp;#225; --> &#225;    &amp;aacute; --> &aacute;
+latin small letter a with circumflex   â    &amp;#226; --> &#226;    &amp;acirc;  --> &acirc;
+latin small letter a with tilde        ã    &amp;#227; --> &#227;    &amp;atilde; --> &atilde;
+latin small letter a with diaeresis    ä    &amp;#228; --> &#228;    &amp;auml;   --> &auml;
+latin small letter a with ring above   å    &amp;#229; --> &#229;    &amp;aring;  --> &aring;
+latin small letter ae                  æ    &amp;#230; --> &#230;    &amp;aelig;  --> &aelig;
+latin small letter c with cedilla      ç    &amp;#231; --> &#231;    &amp;ccedil; --> &ccedil;
+latin small letter e with grave        è    &amp;#232; --> &#232;    &amp;egrave; --> &egrave;
+latin small letter e with acute        é    &amp;#233; --> &#233;    &amp;eacute; --> &eacute;
+latin small letter e with circumflex   ê    &amp;#234; --> &#234;    &amp;ecirc;  --> &ecirc;
+latin small letter e with diaeresis    ë    &amp;#235; --> &#235;    &amp;euml;   --> &euml;
+latin small letter i with grave        ì    &amp;#236; --> &#236;    &amp;igrave; --> &igrave;
+latin small letter i with acute        í    &amp;#237; --> &#237;    &amp;iacute; --> &iacute;
+latin small letter i with circumflex   î    &amp;#238; --> &#238;    &amp;icirc;  --> &icirc;
+latin small letter i with diaeresis    ï    &amp;#239; --> &#239;    &amp;iuml;   --> &iuml;
+
+latin small letter eth                 ð    &amp;#240; --> &#240;    &amp;eth;    --> &eth;
+latin small letter n with tilde        ñ    &amp;#241; --> &#241;    &amp;ntilde; --> &ntilde;
+latin small letter o with grave        ò    &amp;#242; --> &#242;    &amp;ograve; --> &ograve;
+latin small letter o with acute        ó    &amp;#243; --> &#243;    &amp;oacute; --> &oacute;
+latin small letter o with circumflex   ô    &amp;#244; --> &#244;    &amp;ocirc;  --> &ocirc;
+latin small letter o with tilde        õ    &amp;#245; --> &#245;    &amp;otilde; --> &otilde;
+latin small letter o with diaeresis    ö    &amp;#246; --> &#246;    &amp;ouml;   --> &ouml;
+division sign                          ÷    &amp;#247; --> &#247;    &amp;divide; --> &divide;
+latin small letter o with stroke       ø    &amp;#248; --> &#248;    &amp;oslash; --> &oslash;
+latin small letter u with grave        ù    &amp;#249; --> &#249;    &amp;ugrave; --> &ugrave;
+latin small letter u with acute        ú    &amp;#250; --> &#250;    &amp;uacute; --> &uacute;
+latin small letter u with circumflex   û    &amp;#251; --> &#251;    &amp;ucirc;  --> &ucirc;
+latin small letter u with diaeresis    ü    &amp;#252; --> &#252;    &amp;uuml;   --> &uuml;
+latin small letter y with acute        ý    &amp;#253; --> &#253;    &amp;yacute; --> &yacute;
+latin small letter thorn               þ    &amp;#254; --> &#254;    &amp;thorn;  --> &thorn;
+latin small letter y with diaeresis   {ÿ}  {&amp;#255;}-->{&#255;}  {&amp;yuml;}  -->{&yuml;}
+
+Some other characters of interest      Char Code            Entity name   
+===================================    ==== ============    ==============
+capital AE diphthong (ligature)        N/A  &amp;#198; --> &#198;    &amp;AElig;  --> &AElig;
+small ae diphthong (ligature)          N/A  &amp;#230; --> &#230;    &amp;aelig;  --> &aelig;
+capital OE ligature                    N/A {&amp;#338;}-->{&#338;}  {&amp;OElig;} -->{&OElig;}
+small oe ligature                      N/A {&amp;#339;}-->{&#339;}  {&amp;oelig;} -->{&oelig;}
+copyright                              N/A  &amp;#169; --> &#169;    &amp;copy;   --> &copy;
+registered trademark                   N/A  &amp;#174; --> &#174;    &amp;reg;    --> &reg;
+trademark sign                         N/A  &amp;#8482;--> &#8482;   &amp;trade;  --> &trade;
+em space                               N/A [&amp;#8195;]->[&#8195;] [&amp;emsp;]  -->[&emsp;]
+en space                               N/A [&amp;#8194;]->[&#8194;] [&amp;ensp;]  -->[&ensp;]
+1/3-em space                           N/A [&amp;#8196;]->[&#8196;] [&amp;emsp13;] -->[&emsp13;]
+1/4-em space                           N/A [&amp;#8197;]->[&#8197;] [&amp;emsp14;] -->[&emsp14;]
+thin space                             N/A [&amp;#8201;]->[&#8201;] [&amp;thinsp;]-->[&thinsp;]
+hair space                             N/A [&amp;#8202;]->[&#8202;] [&amp;hairsp;]-->[&hairsp;]
+em dash                                N/A [&amp;#8212;]->[&#8212;] [&amp;mdash;] -->[&mdash;]
+en dash                                N/A [&amp;#8211;]->[&#8211;] [&amp;ndash;] -->[&ndash;]
+
+</PRE>
+
+</BODY>
+</HTML>
diff --git a/test/iso-8859-1.html b/test/iso-8859-1.html
new file mode 100644
index 00000000..b9349fa2
--- /dev/null
+++ b/test/iso-8859-1.html
@@ -0,0 +1,241 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<!-- X-URL: http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html -->
+<!-- Date: Tue, 28 Dec 2004 20:24:09 GMT -->
+<!-- Last-Modified: Mon, 15 May 2000 09:37:37 GMT -->
+<HTML>
+<HEAD>
+<TITLE>Martin Ramsch - iso8859-1 table</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<BASE HREF="http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html">
+</HEAD>
+
+<BODY> 
+
+<H1 ALIGN=center>iso8859-1 table</H1> 
+
+<PRE>
+Description                               Code            Entity name   
+===================================       ============    ==============
+quotation mark                            &amp;#34;  --> &#34;    &amp;quot;   --> &quot;
+ampersand                                 &amp;#38;  --> &#38;    &amp;amp;    --> &amp;
+less-than sign                            &amp;#60;  --> &#60;    &amp;lt;     --> &lt;
+greater-than sign                         &amp;#62;  --> &#62;    &amp;gt;     --> &gt;
+
+Description                          Char Code            Entity name   
+===================================  ==== ============    ==============
+non-breaking space                        &amp;#160; --> &#160;    &amp;nbsp;   --> &nbsp;
+inverted exclamation                 ¡    &amp;#161; --> &#161;    &amp;iexcl;  --> &iexcl;
+cent sign                            ¢    &amp;#162; --> &#162;    &amp;cent;   --> &cent;
+pound sterling                       £    &amp;#163; --> &#163;    &amp;pound;  --> &pound;
+general currency sign                ¤    &amp;#164; --> &#164;    &amp;curren; --> &curren;
+yen sign                             ¥    &amp;#165; --> &#165;    &amp;yen;    --> &yen;
+broken vertical bar                  ¦    &amp;#166; --> &#166;    &amp;brvbar; --> &brvbar;
+                                             Non-standard &amp;brkbar; --> &brkbar;
+section sign                         §    &amp;#167; --> &#167;    &amp;sect;   --> &sect;
+umlaut (dieresis)                    ¨    &amp;#168; --> &#168;    &amp;uml;    --> &uml;
+                                             Non-standard &amp;die;    --> &die;
+copyright                            ©    &amp;#169; --> &#169;    &amp;copy;   --> &copy;
+feminine ordinal                     ª    &amp;#170; --> &#170;    &amp;ordf;   --> &ordf;
+left angle quote, guillemotleft      «    &amp;#171; --> &#171;    &amp;laquo;  --> &laquo;
+not sign                             ¬    &amp;#172; --> &#172;    &amp;not;    --> &not;
+soft hyphen                          ­    &amp;#173; --> &#173;    &amp;shy;    --> &shy;
+registered trademark                 ®    &amp;#174; --> &#174;    &amp;reg;    --> &reg;
+macron accent                        ¯    &amp;#175; --> &#175;    &amp;macr;   --> &macr;
+                                             Non-standard &amp;hibar;  --> &hibar;
+degree sign                          °    &amp;#176; --> &#176;    &amp;deg;    --> &deg;
+plus or minus                        ±    &amp;#177; --> &#177;    &amp;plusmn; --> &plusmn;
+superscript two                      ²    &amp;#178; --> &#178;    &amp;sup2;   --> &sup2;
+superscript three                    ³    &amp;#179; --> &#179;    &amp;sup3;   --> &sup3;
+acute accent                         ´    &amp;#180; --> &#180;    &amp;acute;  --> &acute;
+micro sign                           µ    &amp;#181; --> &#181;    &amp;micro;  --> &micro;
+paragraph sign                       ¶    &amp;#182; --> &#182;    &amp;para;   --> &para;
+middle dot                           ·    &amp;#183; --> &#183;    &amp;middot; --> &middot;
+cedilla                              ¸    &amp;#184; --> &#184;    &amp;cedil;  --> &cedil;
+superscript one                      ¹    &amp;#185; --> &#185;    &amp;sup1;   --> &sup1;
+masculine ordinal                    º    &amp;#186; --> &#186;    &amp;ordm;   --> &ordm;
+right angle quote, guillemotright    »    &amp;#187; --> &#187;    &amp;raquo;  --> &raquo;
+fraction one-fourth                  ¼    &amp;#188; --> &#188;    &amp;frac14; --> &frac14;
+fraction one-half                    ½    &amp;#189; --> &#189;    &amp;frac12; --> &frac12;
+fraction three-fourths               ¾    &amp;#190; --> &#190;    &amp;frac34; --> &frac34;
+inverted question mark               ¿    &amp;#191; --> &#191;    &amp;iquest; --> &iquest;
+capital A, grave accent              À    &amp;#192; --> &#192;    &amp;Agrave; --> &Agrave;
+capital A, acute accent              Á    &amp;#193; --> &#193;    &amp;Aacute; --> &Aacute;
+capital A, circumflex accent         Â    &amp;#194; --> &#194;    &amp;Acirc;  --> &Acirc;
+capital A, tilde                     Ã    &amp;#195; --> &#195;    &amp;Atilde; --> &Atilde;
+capital A, dieresis or umlaut mark   Ä    &amp;#196; --> &#196;    &amp;Auml;   --> &Auml;
+capital A, ring                      Å    &amp;#197; --> &#197;    &amp;Aring;  --> &Aring;
+capital AE diphthong (ligature)      Æ    &amp;#198; --> &#198;    &amp;AElig;  --> &AElig;
+capital C, cedilla                   Ç    &amp;#199; --> &#199;    &amp;Ccedil; --> &Ccedil;
+capital E, grave accent              È    &amp;#200; --> &#200;    &amp;Egrave; --> &Egrave;
+capital E, acute accent              É    &amp;#201; --> &#201;    &amp;Eacute; --> &Eacute;
+capital E, circumflex accent         Ê    &amp;#202; --> &#202;    &amp;Ecirc;  --> &Ecirc;
+capital E, dieresis or umlaut mark   Ë    &amp;#203; --> &#203;    &amp;Euml;   --> &Euml;
+capital I, grave accent              Ì    &amp;#204; --> &#204;    &amp;Igrave; --> &Igrave;
+capital I, acute accent              Í    &amp;#205; --> &#205;    &amp;Iacute; --> &Iacute;
+capital I, circumflex accent         Î    &amp;#206; --> &#206;    &amp;Icirc;  --> &Icirc;
+capital I, dieresis or umlaut mark   Ï    &amp;#207; --> &#207;    &amp;Iuml;   --> &Iuml;
+capital Eth, Icelandic               Ð    &amp;#208; --> &#208;    &amp;ETH;    --> &ETH;
+                                             Non-standard &amp;Dstrok; --> &Dstrok;
+capital N, tilde                     Ñ    &amp;#209; --> &#209;    &amp;Ntilde; --> &Ntilde;
+capital O, grave accent              Ò    &amp;#210; --> &#210;    &amp;Ograve; --> &Ograve;
+capital O, acute accent              Ó    &amp;#211; --> &#211;    &amp;Oacute; --> &Oacute;
+capital O, circumflex accent         Ô    &amp;#212; --> &#212;    &amp;Ocirc;  --> &Ocirc;
+capital O, tilde                     Õ    &amp;#213; --> &#213;    &amp;Otilde; --> &Otilde;
+capital O, dieresis or umlaut mark   Ö    &amp;#214; --> &#214;    &amp;Ouml;   --> &Ouml;
+multiply sign                        ×    &amp;#215; --> &#215;    &amp;times;  --> &times;
+capital O, slash                     Ø    &amp;#216; --> &#216;    &amp;Oslash; --> &Oslash;
+capital U, grave accent              Ù    &amp;#217; --> &#217;    &amp;Ugrave; --> &Ugrave;
+capital U, acute accent              Ú    &amp;#218; --> &#218;    &amp;Uacute; --> &Uacute;
+capital U, circumflex accent         Û    &amp;#219; --> &#219;    &amp;Ucirc;  --> &Ucirc;
+capital U, dieresis or umlaut mark   Ü    &amp;#220; --> &#220;    &amp;Uuml;   --> &Uuml;
+capital Y, acute accent              Ý    &amp;#221; --> &#221;    &amp;Yacute; --> &Yacute;
+capital THORN, Icelandic             Þ    &amp;#222; --> &#222;    &amp;THORN;  --> &THORN;
+small sharp s, German (sz ligature)  ß    &amp;#223; --> &#223;    &amp;szlig;  --> &szlig;
+small a, grave accent                à    &amp;#224; --> &#224;    &amp;agrave; --> &agrave;
+small a, acute accent                á    &amp;#225; --> &#225;    &amp;aacute; --> &aacute;
+small a, circumflex accent           â    &amp;#226; --> &#226;    &amp;acirc;  --> &acirc;
+small a, tilde                       ã    &amp;#227; --> &#227;    &amp;atilde; --> &atilde;
+small a, dieresis or umlaut mark     ä    &amp;#228; --> &#228;    &amp;auml;   --> &auml;
+small a, ring                        å    &amp;#229; --> &#229;    &amp;aring;  --> &aring;
+small ae diphthong (ligature)        æ    &amp;#230; --> &#230;    &amp;aelig;  --> &aelig;
+small c, cedilla                     ç    &amp;#231; --> &#231;    &amp;ccedil; --> &ccedil;
+small e, grave accent                è    &amp;#232; --> &#232;    &amp;egrave; --> &egrave;
+small e, acute accent                é    &amp;#233; --> &#233;    &amp;eacute; --> &eacute;
+small e, circumflex accent           ê    &amp;#234; --> &#234;    &amp;ecirc;  --> &ecirc;
+small e, dieresis or umlaut mark     ë    &amp;#235; --> &#235;    &amp;euml;   --> &euml;
+small i, grave accent                ì    &amp;#236; --> &#236;    &amp;igrave; --> &igrave;
+small i, acute accent                í    &amp;#237; --> &#237;    &amp;iacute; --> &iacute;
+small i, circumflex accent           î    &amp;#238; --> &#238;    &amp;icirc;  --> &icirc;
+small i, dieresis or umlaut mark     ï    &amp;#239; --> &#239;    &amp;iuml;   --> &iuml;
+small eth, Icelandic                 ð    &amp;#240; --> &#240;    &amp;eth;    --> &eth;
+small n, tilde                       ñ    &amp;#241; --> &#241;    &amp;ntilde; --> &ntilde;
+small o, grave accent                ò    &amp;#242; --> &#242;    &amp;ograve; --> &ograve;
+small o, acute accent                ó    &amp;#243; --> &#243;    &amp;oacute; --> &oacute;
+small o, circumflex accent           ô    &amp;#244; --> &#244;    &amp;ocirc;  --> &ocirc;
+small o, tilde                       õ    &amp;#245; --> &#245;    &amp;otilde; --> &otilde;
+small o, dieresis or umlaut mark     ö    &amp;#246; --> &#246;    &amp;ouml;   --> &ouml;
+division sign                        ÷    &amp;#247; --> &#247;    &amp;divide; --> &divide;
+small o, slash                       ø    &amp;#248; --> &#248;    &amp;oslash; --> &oslash;
+small u, grave accent                ù    &amp;#249; --> &#249;    &amp;ugrave; --> &ugrave;
+small u, acute accent                ú    &amp;#250; --> &#250;    &amp;uacute; --> &uacute;
+small u, circumflex accent           û    &amp;#251; --> &#251;    &amp;ucirc;  --> &ucirc;
+small u, dieresis or umlaut mark     ü    &amp;#252; --> &#252;    &amp;uuml;   --> &uuml;
+small y, acute accent                ý    &amp;#253; --> &#253;    &amp;yacute; --> &yacute;
+small thorn, Icelandic               þ    &amp;#254; --> &#254;    &amp;thorn;  --> &thorn;
+small y, dieresis or umlaut mark     ÿ    &amp;#255; --> &#255;    &amp;yuml;   --> &yuml;
+</PRE>
+<!-- removed: second /PRE, a hack for HotJava 1.0 preBeta 1 -->
+<HR>
+
+<STRONG>How to read</STRONG> this table.  The columns are
+<DL COMPACT>
+<DT>1st:<DD>textual <EM>description</EM> of the character
+<DT>2nd:<DD>character inserted directly into the HTML page as <EM>one
+            byte</EM>
+<DT>3rd:<DD>character written as <EM>numeric HTML entity</EM>, in the
+            format:<BR>"how it looks literally" <CODE>--&gt;</CODE>
+            "what your browser does with it"
+<DT>4th:<DD>character written as <EM>symbolic HTML entity</EM>, in the
+            format:<BR>"how it looks literally" <CODE>--&gt;</CODE>
+            "what your browser does with it"
+</DL>
+
+So for example, if you see something like "<CODE>&amp;divide; -->
+&amp;divide;</CODE>" in the 4th column, this means your browser
+doesn't know about the entity name "divide" and just puts it
+literally.
+
+<P>
+<STRONG>This table</STRONG> grew out of an overview of the "ISO
+Latin-1 Character Set" overview related to the Hyper-G Text Format
+(<A HREF="http://www.hyperwave.de/HTFdoc">HTF</A>).
+
+The entity names <CODE>&amp;brkbar;</CODE> and <CODE>&amp;Dstrok;</CODE>
+seem to be unique to HTF.
+
+The entity name <CODE>&amp;hibar;</CODE> has been supported by X Mosaic
+but seems to be replaced with <CODE>&amp;macr;</CODE>.
+
+The entity names <CODE>&amp;uml;</CODE> and <CODE>&amp;die;</CODE> should
+be equivalent.
+
+<P><STRONG>The standards stuff:</STRONG>
+The 
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/">HTML 2.0 Standard</A>
+includes a section on
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_9.html#SEC99">Character Entity Sets</A>
+and an overview on the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_13.html#SEC106">HTML Coded Character Set</A>
+(The entity names are derived from <A HREF="http://www.ucc.ie/info/net/isolat1.html">ISO 8879</A>).
+<BR>
+
+Or have a look at the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/latin1.html">Latin-1 Character Entities</A>
+as listed in an draft for the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/CoverPage.html">HTML 3.0 specification</A>.
+<BR>
+
+The
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_59.html">Appendix II</A>
+of CERN's
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_1.html">HTML+ Discussion Document</A>
+contains a
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_table.ps">table</A>
+(in PostScript format) of the proposed character entities for HTML+ and their
+corresponding character codes for Unicode and the Adobe Latin-1 &amp; Symbol
+character sets.
+<P>
+
+<STRONG>Please note</STRONG> that there is nothing wrong with using
+characters of ISO Latin-1 above 127: the normal transmission protocol
+for the WWW,
+<A HREF="http://www.w3.org/pub/WWW/Protocols/rfc1945/rfc1945">HTTP/1.0</A>,
+uses the 8bit ISO latin-1 as default encoding.
+(Thanks to Roman 
+Czyborra for pointing this out!)
+<P>
+
+<STRONG>Other information:</STRONG>
+<UL>
+
+<LI><STRONG>Kevin J. Brewer</STRONG> has done two very good pages on the subject:
+  <UL>
+   <LI><A HREF="http://www.bbsinc.com/iso8859.html">ASCII - ISO 8859-1 (Latin-1) with HTML 3.0 Entities Table</A> and
+   <LI><A HREF="http://www.bbsinc.com/iso8879.html">ISO 8879 Entities Gopher Menu</A>
+  </UL>
+
+<LI>The excellent overview on the series of
+    <A HREF="http://czyborra.com/charsets/iso8859.html">ISO 8859
+    character sets</A> compiled by Roman Czyborra.
+
+<LI>Also have a look on Alan Flavell's page of
+    <A HREF="http://ppewww.ph.gla.ac.uk/%7Eflavell/iso8859/iso8859-pointers.html">pointers
+    to information about ISO8859</A>. It's written very well!
+
+<LI>Maybe also of interest to you is the
+    <A HREF="ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/FAQ-ISO-8859-1">ISO 
+     8859-1 FAQ</A> by Michael Gschwind
+    (<A HREF="mailto:mike@vlsivie.tuwien.ac.at">mike@vlsivie.tuwien.ac.at</A>),
+    part of his page on
+    <A HREF="http://www.vlsivie.tuwien.ac.at/mike/i18n.html">Internationalization</A>.
+
+<LI>For users of X11R5 on SunOS systems: the
+    <A HREF="Compose.txt">table over the compose combinations</A>
+    (also coded <A HREF="Compose.html">with entities</A> where possible).
+     It's taken from the MIT X sources in
+     <CODE>server/ddx/sun/Compose.list</CODE>.
+
+<LI>Finally you could have a look at
+    <A HREF="ftp://ds.internic.net/rfc/rfc1345.txt">RFC 1345: 
+     Character Mnemonics &amp; Character Sets</A>
+     by K. Simonsen (06/11/92, 103 pages, approx. 240 kbyte).
+
+</UL>
+
+
+<HR>
+
+<ADDRESS><A HREF="http://ramsch.home.pages.de/">Martin Ramsch</A>, 16.02.1994, 07.01.1996, 01.07.1996, 1998-10-09, 2000-05-15</ADDRESS>
+
+</BODY>
+</HTML>
diff --git a/test/iso-8859-1a.html b/test/iso-8859-1a.html
new file mode 100644
index 00000000..972329d3
--- /dev/null
+++ b/test/iso-8859-1a.html
@@ -0,0 +1,275 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<!-- X-URL: http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html -->
+<!-- Date: Tue, 28 Dec 2004 20:24:09 GMT -->
+<!-- Last-Modified: Mon, 15 May 2000 09:37:37 GMT -->
+<HTML>
+<HEAD>
+<TITLE>Martin Ramsch - iso8859-1 table</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<BASE HREF="http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html">
+</HEAD>
+
+<BODY> 
+
+<H1 ALIGN=center>iso8859-1 table, with cp-1252</H1> 
+
+<PRE>
+Description                               Code            Entity name   
+===================================       ============    ==============
+quotation mark                            &amp;#34;  --> &#34;    &amp;quot;   --> &quot;
+ampersand                                 &amp;#38;  --> &#38;    &amp;amp;    --> &amp;
+less-than sign                            &amp;#60;  --> &#60;    &amp;lt;     --> &lt;
+greater-than sign                         &amp;#62;  --> &#62;    &amp;gt;     --> &gt;
+
+Description                          Char Code            Entity name   
+===================================  ==== ============    ==============
+euro sign                              €    &amp;128; --> &#128;
+undefined                                  &amp;129; --> &#129;
+single low-9 quotation mark            ‚    &amp;130; --> &#130;
+latin small letter f with hook         ƒ    &amp;131; --> &#131;
+double low-9 quotation mark            „    &amp;132; --> &#132;
+horizontal ellipsis                    …    &amp;133; --> &#133;
+dagger                                 †    &amp;134; --> &#134;
+double dagger                          ‡    &amp;135; --> &#135;
+modifier letter circumflex accent      ˆ    &amp;136; --> &#136;
+per mille sign                         ‰    &amp;137; --> &#137;
+latin capital letter s with caron      Š    &amp;138; --> &#138;
+single left-pointing angle quote mark  ‹    &amp;139; --> &#139;
+latin capital ligature oe              Œ    &amp;140; --> &#140;
+undefined                                  &amp;141; --> &#141;
+latin capital letter z with caron      Ž    &amp;142; --> &#142;
+undefined                                  &amp;143; --> &#143;
+
+undefined                                  &amp;144; --> &#144;
+left single quotation mark             ‘    &amp;145; --> &#145;
+right single quotation mark            ’    &amp;146; --> &#146;
+left double quotation mark             “    &amp;147; --> &#147;
+right double quotation mark            ”    &amp;148; --> &#148;
+bullet                                 •    &amp;149; --> &#149;
+en dash                                –    &amp;150; --> &#150;
+em dash                                —    &amp;151; --> &#151;
+small tilde                            ˜    &amp;152; --> &#152;
+trade mark sign                        ™    &amp;153; --> &#153;
+latin small letter s with caron        š    &amp;154; --> &#154;
+single right-pointing angle quote mark ›    &amp;155; --> &#155;
+latin small ligature oe                œ    &amp;156; --> &#156;
+undefined                                  &amp;157; --> &#157;
+latin small letter z with caron        ž    &amp;158; --> &#158;
+latin capital letter y with diaeresis  Ÿ    &amp;159; --> &#159;
+
+non-breaking space                        &amp;#160; --> &#160;    &amp;nbsp;   --> &nbsp;
+inverted exclamation                 ¡    &amp;#161; --> &#161;    &amp;iexcl;  --> &iexcl;
+cent sign                            ¢    &amp;#162; --> &#162;    &amp;cent;   --> &cent;
+pound sterling                       £    &amp;#163; --> &#163;    &amp;pound;  --> &pound;
+general currency sign                ¤    &amp;#164; --> &#164;    &amp;curren; --> &curren;
+yen sign                             ¥    &amp;#165; --> &#165;    &amp;yen;    --> &yen;
+broken vertical bar                  ¦    &amp;#166; --> &#166;    &amp;brvbar; --> &brvbar;
+                                             Non-standard &amp;brkbar; --> &brkbar;
+section sign                         §    &amp;#167; --> &#167;    &amp;sect;   --> &sect;
+umlaut (dieresis)                    ¨    &amp;#168; --> &#168;    &amp;uml;    --> &uml;
+                                             Non-standard &amp;die;    --> &die;
+copyright                            ©    &amp;#169; --> &#169;    &amp;copy;   --> &copy;
+feminine ordinal                     ª    &amp;#170; --> &#170;    &amp;ordf;   --> &ordf;
+left angle quote, guillemotleft      «    &amp;#171; --> &#171;    &amp;laquo;  --> &laquo;
+not sign                             ¬    &amp;#172; --> &#172;    &amp;not;    --> &not;
+soft hyphen                          ­    &amp;#173; --> &#173;    &amp;shy;    --> &shy;
+registered trademark                 ®    &amp;#174; --> &#174;    &amp;reg;    --> &reg;
+macron accent                        ¯    &amp;#175; --> &#175;    &amp;macr;   --> &macr;
+                                             Non-standard &amp;hibar;  --> &hibar;
+degree sign                          °    &amp;#176; --> &#176;    &amp;deg;    --> &deg;
+plus or minus                        ±    &amp;#177; --> &#177;    &amp;plusmn; --> &plusmn;
+superscript two                      ²    &amp;#178; --> &#178;    &amp;sup2;   --> &sup2;
+superscript three                    ³    &amp;#179; --> &#179;    &amp;sup3;   --> &sup3;
+acute accent                         ´    &amp;#180; --> &#180;    &amp;acute;  --> &acute;
+micro sign                           µ    &amp;#181; --> &#181;    &amp;micro;  --> &micro;
+paragraph sign                       ¶    &amp;#182; --> &#182;    &amp;para;   --> &para;
+middle dot                           ·    &amp;#183; --> &#183;    &amp;middot; --> &middot;
+cedilla                              ¸    &amp;#184; --> &#184;    &amp;cedil;  --> &cedil;
+superscript one                      ¹    &amp;#185; --> &#185;    &amp;sup1;   --> &sup1;
+masculine ordinal                    º    &amp;#186; --> &#186;    &amp;ordm;   --> &ordm;
+right angle quote, guillemotright    »    &amp;#187; --> &#187;    &amp;raquo;  --> &raquo;
+fraction one-fourth                  ¼    &amp;#188; --> &#188;    &amp;frac14; --> &frac14;
+fraction one-half                    ½    &amp;#189; --> &#189;    &amp;frac12; --> &frac12;
+fraction three-fourths               ¾    &amp;#190; --> &#190;    &amp;frac34; --> &frac34;
+inverted question mark               ¿    &amp;#191; --> &#191;    &amp;iquest; --> &iquest;
+capital A, grave accent              À    &amp;#192; --> &#192;    &amp;Agrave; --> &Agrave;
+capital A, acute accent              Á    &amp;#193; --> &#193;    &amp;Aacute; --> &Aacute;
+capital A, circumflex accent         Â    &amp;#194; --> &#194;    &amp;Acirc;  --> &Acirc;
+capital A, tilde                     Ã    &amp;#195; --> &#195;    &amp;Atilde; --> &Atilde;
+capital A, dieresis or umlaut mark   Ä    &amp;#196; --> &#196;    &amp;Auml;   --> &Auml;
+capital A, ring                      Å    &amp;#197; --> &#197;    &amp;Aring;  --> &Aring;
+capital AE diphthong (ligature)      Æ    &amp;#198; --> &#198;    &amp;AElig;  --> &AElig;
+capital C, cedilla                   Ç    &amp;#199; --> &#199;    &amp;Ccedil; --> &Ccedil;
+capital E, grave accent              È    &amp;#200; --> &#200;    &amp;Egrave; --> &Egrave;
+capital E, acute accent              É    &amp;#201; --> &#201;    &amp;Eacute; --> &Eacute;
+capital E, circumflex accent         Ê    &amp;#202; --> &#202;    &amp;Ecirc;  --> &Ecirc;
+capital E, dieresis or umlaut mark   Ë    &amp;#203; --> &#203;    &amp;Euml;   --> &Euml;
+capital I, grave accent              Ì    &amp;#204; --> &#204;    &amp;Igrave; --> &Igrave;
+capital I, acute accent              Í    &amp;#205; --> &#205;    &amp;Iacute; --> &Iacute;
+capital I, circumflex accent         Î    &amp;#206; --> &#206;    &amp;Icirc;  --> &Icirc;
+capital I, dieresis or umlaut mark   Ï    &amp;#207; --> &#207;    &amp;Iuml;   --> &Iuml;
+capital Eth, Icelandic               Ð    &amp;#208; --> &#208;    &amp;ETH;    --> &ETH;
+                                             Non-standard &amp;Dstrok; --> &Dstrok;
+capital N, tilde                     Ñ    &amp;#209; --> &#209;    &amp;Ntilde; --> &Ntilde;
+capital O, grave accent              Ò    &amp;#210; --> &#210;    &amp;Ograve; --> &Ograve;
+capital O, acute accent              Ó    &amp;#211; --> &#211;    &amp;Oacute; --> &Oacute;
+capital O, circumflex accent         Ô    &amp;#212; --> &#212;    &amp;Ocirc;  --> &Ocirc;
+capital O, tilde                     Õ    &amp;#213; --> &#213;    &amp;Otilde; --> &Otilde;
+capital O, dieresis or umlaut mark   Ö    &amp;#214; --> &#214;    &amp;Ouml;   --> &Ouml;
+multiply sign                        ×    &amp;#215; --> &#215;    &amp;times;  --> &times;
+capital O, slash                     Ø    &amp;#216; --> &#216;    &amp;Oslash; --> &Oslash;
+capital U, grave accent              Ù    &amp;#217; --> &#217;    &amp;Ugrave; --> &Ugrave;
+capital U, acute accent              Ú    &amp;#218; --> &#218;    &amp;Uacute; --> &Uacute;
+capital U, circumflex accent         Û    &amp;#219; --> &#219;    &amp;Ucirc;  --> &Ucirc;
+capital U, dieresis or umlaut mark   Ü    &amp;#220; --> &#220;    &amp;Uuml;   --> &Uuml;
+capital Y, acute accent              Ý    &amp;#221; --> &#221;    &amp;Yacute; --> &Yacute;
+capital THORN, Icelandic             Þ    &amp;#222; --> &#222;    &amp;THORN;  --> &THORN;
+small sharp s, German (sz ligature)  ß    &amp;#223; --> &#223;    &amp;szlig;  --> &szlig;
+small a, grave accent                à    &amp;#224; --> &#224;    &amp;agrave; --> &agrave;
+small a, acute accent                á    &amp;#225; --> &#225;    &amp;aacute; --> &aacute;
+small a, circumflex accent           â    &amp;#226; --> &#226;    &amp;acirc;  --> &acirc;
+small a, tilde                       ã    &amp;#227; --> &#227;    &amp;atilde; --> &atilde;
+small a, dieresis or umlaut mark     ä    &amp;#228; --> &#228;    &amp;auml;   --> &auml;
+small a, ring                        å    &amp;#229; --> &#229;    &amp;aring;  --> &aring;
+small ae diphthong (ligature)        æ    &amp;#230; --> &#230;    &amp;aelig;  --> &aelig;
+small c, cedilla                     ç    &amp;#231; --> &#231;    &amp;ccedil; --> &ccedil;
+small e, grave accent                è    &amp;#232; --> &#232;    &amp;egrave; --> &egrave;
+small e, acute accent                é    &amp;#233; --> &#233;    &amp;eacute; --> &eacute;
+small e, circumflex accent           ê    &amp;#234; --> &#234;    &amp;ecirc;  --> &ecirc;
+small e, dieresis or umlaut mark     ë    &amp;#235; --> &#235;    &amp;euml;   --> &euml;
+small i, grave accent                ì    &amp;#236; --> &#236;    &amp;igrave; --> &igrave;
+small i, acute accent                í    &amp;#237; --> &#237;    &amp;iacute; --> &iacute;
+small i, circumflex accent           î    &amp;#238; --> &#238;    &amp;icirc;  --> &icirc;
+small i, dieresis or umlaut mark     ï    &amp;#239; --> &#239;    &amp;iuml;   --> &iuml;
+small eth, Icelandic                 ð    &amp;#240; --> &#240;    &amp;eth;    --> &eth;
+small n, tilde                       ñ    &amp;#241; --> &#241;    &amp;ntilde; --> &ntilde;
+small o, grave accent                ò    &amp;#242; --> &#242;    &amp;ograve; --> &ograve;
+small o, acute accent                ó    &amp;#243; --> &#243;    &amp;oacute; --> &oacute;
+small o, circumflex accent           ô    &amp;#244; --> &#244;    &amp;ocirc;  --> &ocirc;
+small o, tilde                       õ    &amp;#245; --> &#245;    &amp;otilde; --> &otilde;
+small o, dieresis or umlaut mark     ö    &amp;#246; --> &#246;    &amp;ouml;   --> &ouml;
+division sign                        ÷    &amp;#247; --> &#247;    &amp;divide; --> &divide;
+small o, slash                       ø    &amp;#248; --> &#248;    &amp;oslash; --> &oslash;
+small u, grave accent                ù    &amp;#249; --> &#249;    &amp;ugrave; --> &ugrave;
+small u, acute accent                ú    &amp;#250; --> &#250;    &amp;uacute; --> &uacute;
+small u, circumflex accent           û    &amp;#251; --> &#251;    &amp;ucirc;  --> &ucirc;
+small u, dieresis or umlaut mark     ü    &amp;#252; --> &#252;    &amp;uuml;   --> &uuml;
+small y, acute accent                ý    &amp;#253; --> &#253;    &amp;yacute; --> &yacute;
+small thorn, Icelandic               þ    &amp;#254; --> &#254;    &amp;thorn;  --> &thorn;
+small y, dieresis or umlaut mark     ÿ    &amp;#255; --> &#255;    &amp;yuml;   --> &yuml;
+</PRE>
+<!-- removed: second /PRE, a hack for HotJava 1.0 preBeta 1 -->
+<HR>
+
+<STRONG>How to read</STRONG> this table.  The columns are
+<DL COMPACT>
+<DT>1st:<DD>textual <EM>description</EM> of the character
+<DT>2nd:<DD>character inserted directly into the HTML page as <EM>one
+            byte</EM>
+<DT>3rd:<DD>character written as <EM>numeric HTML entity</EM>, in the
+            format:<BR>"how it looks literally" <CODE>--&gt;</CODE>
+            "what your browser does with it"
+<DT>4th:<DD>character written as <EM>symbolic HTML entity</EM>, in the
+            format:<BR>"how it looks literally" <CODE>--&gt;</CODE>
+            "what your browser does with it"
+</DL>
+
+So for example, if you see something like "<CODE>&amp;divide; -->
+&amp;divide;</CODE>" in the 4th column, this means your browser
+doesn't know about the entity name "divide" and just puts it
+literally.
+
+<P>
+<STRONG>This table</STRONG> grew out of an overview of the "ISO
+Latin-1 Character Set" overview related to the Hyper-G Text Format
+(<A HREF="http://www.hyperwave.de/HTFdoc">HTF</A>).
+
+The entity names <CODE>&amp;brkbar;</CODE> and <CODE>&amp;Dstrok;</CODE>
+seem to be unique to HTF.
+
+The entity name <CODE>&amp;hibar;</CODE> has been supported by X Mosaic
+but seems to be replaced with <CODE>&amp;macr;</CODE>.
+
+The entity names <CODE>&amp;uml;</CODE> and <CODE>&amp;die;</CODE> should
+be equivalent.
+
+<P><STRONG>The standards stuff:</STRONG>
+The 
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/">HTML 2.0 Standard</A>
+includes a section on
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_9.html#SEC99">Character Entity Sets</A>
+and an overview on the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_13.html#SEC106">HTML Coded Character Set</A>
+(The entity names are derived from <A HREF="http://www.ucc.ie/info/net/isolat1.html">ISO 8879</A>).
+<BR>
+
+Or have a look at the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/latin1.html">Latin-1 Character Entities</A>
+as listed in an draft for the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/CoverPage.html">HTML 3.0 specification</A>.
+<BR>
+
+The
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_59.html">Appendix II</A>
+of CERN's
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_1.html">HTML+ Discussion Document</A>
+contains a
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_table.ps">table</A>
+(in PostScript format) of the proposed character entities for HTML+ and their
+corresponding character codes for Unicode and the Adobe Latin-1 &amp; Symbol
+character sets.
+<P>
+
+<STRONG>Please note</STRONG> that there is nothing wrong with using
+characters of ISO Latin-1 above 127: the normal transmission protocol
+for the WWW,
+<A HREF="http://www.w3.org/pub/WWW/Protocols/rfc1945/rfc1945">HTTP/1.0</A>,
+uses the 8bit ISO latin-1 as default encoding.
+(Thanks to Roman 
+Czyborra for pointing this out!)
+<P>
+
+<STRONG>Other information:</STRONG>
+<UL>
+
+<LI><STRONG>Kevin J. Brewer</STRONG> has done two very good pages on the subject:
+  <UL>
+   <LI><A HREF="http://www.bbsinc.com/iso8859.html">ASCII - ISO 8859-1 (Latin-1) with HTML 3.0 Entities Table</A> and
+   <LI><A HREF="http://www.bbsinc.com/iso8879.html">ISO 8879 Entities Gopher Menu</A>
+  </UL>
+
+<LI>The excellent overview on the series of
+    <A HREF="http://czyborra.com/charsets/iso8859.html">ISO 8859
+    character sets</A> compiled by Roman Czyborra.
+
+<LI>Also have a look on Alan Flavell's page of
+    <A HREF="http://ppewww.ph.gla.ac.uk/%7Eflavell/iso8859/iso8859-pointers.html">pointers
+    to information about ISO8859</A>. It's written very well!
+
+<LI>Maybe also of interest to you is the
+    <A HREF="ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/FAQ-ISO-8859-1">ISO 
+     8859-1 FAQ</A> by Michael Gschwind
+    (<A HREF="mailto:mike@vlsivie.tuwien.ac.at">mike@vlsivie.tuwien.ac.at</A>),
+    part of his page on
+    <A HREF="http://www.vlsivie.tuwien.ac.at/mike/i18n.html">Internationalization</A>.
+
+<LI>For users of X11R5 on SunOS systems: the
+    <A HREF="Compose.txt">table over the compose combinations</A>
+    (also coded <A HREF="Compose.html">with entities</A> where possible).
+     It's taken from the MIT X sources in
+     <CODE>server/ddx/sun/Compose.list</CODE>.
+
+<LI>Finally you could have a look at
+    <A HREF="ftp://ds.internic.net/rfc/rfc1345.txt">RFC 1345: 
+     Character Mnemonics &amp; Character Sets</A>
+     by K. Simonsen (06/11/92, 103 pages, approx. 240 kbyte).
+
+</UL>
+
+
+<HR>
+
+<ADDRESS><A HREF="http://ramsch.home.pages.de/">Martin Ramsch</A>, 16.02.1994, 07.01.1996, 01.07.1996, 1998-10-09, 2000-05-15</ADDRESS>
+
+</BODY>
+</HTML>
diff --git a/test/iso-8859-2.html b/test/iso-8859-2.html
new file mode 100644
index 00000000..3a203b5e
--- /dev/null
+++ b/test/iso-8859-2.html
@@ -0,0 +1,174 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<!-- X-URL: http://www.uni-passau.de/~ramsch/iso8859-1.html -->
+<HTML>
+<HEAD>
+<TITLE>Martin Ramsch's character table modified and enhanced for iso8859-2</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+<LINK REL="sibling" HREF="iso8859-1.html" TITLE="iso-8859-1 test">
+<LINK REL="sibling" HREF="ALT88592.html"  TITLE="iso-8859-2 ALT test">
+</HEAD>
+
+<BODY> 
+
+<H1 ALIGN=center>iso8859-2 plus table</H1> 
+
+<PRE>
+Description                               Code            Entity name   
+===================================       ============    ==============
+quotation mark                            &amp;#34;  --> &#34;     &amp;quot;   --> &quot;
+ampersand                                 &amp;#38;  --> &#38;     &amp;amp;    --> &amp;
+less-than sign                            &amp;#60;  --> &#60;     &amp;lt;     --> &lt;
+greater-than sign                         &amp;#62;  --> &#62;     &amp;gt;     --> &gt;
+
+Description                          Char Code            Entity name   
+===================================  ==== ============    ==============
+non-breaking space                        &amp;#160; --> &#160;    &amp;nbsp;   --> &nbsp;
+capital A, ogonek                    ¡    &amp;#260; --> &#260;    &amp;Aogon;  --> &Aogon;
+breve                               {¢}  {&amp;#728;}-->{&#728;}  {&amp;breve;} -->{&breve;}
+capital L, stroke                    £    &amp;#321; --> &#321;    &amp;Lstrok; --> &Lstrok;
+general currency sign                ¤    &amp;#164; --> &#164;    &amp;curren; --> &curren;
+capital L, caron                     ¥    &amp;#317; --> &#317;    &amp;Lcaron; --> &Lcaron;
+capital S, acute accent              ¦    &amp;#346; --> &#346;    &amp;Sacute; --> &Sacute;
+section sign                         §    &amp;#167; --> &#167;    &amp;sect;   --> &sect;
+umlaut (dieresis)                    ¨    &amp;#168; --> &#168;    &amp;uml;    --> &uml;
+                                                          &amp;die;    --> &die;
+capital S, caron                     ©    &amp;#352; --> &#352;    &amp;Scaron; --> &Scaron;
+capital S, cedilla                   ª    &amp;#350; --> &#350;    &amp;Scedil; --> &Scedil;
+capital T, caron                     «    &amp;#356; --> &#356;    &amp;Tcaron; --> &Tcaron;
+capital Z, acute accent              ¬    &amp;#377; --> &#377;    &amp;Zacute; --> &Zacute;
+soft hyphen                         [­]  [&amp;#173;]-->[&#173;]  [&amp;shy;]   -->[&shy;]
+capital Z, caron                     ®    &amp;#381; --> &#381;    &amp;Zcaron; --> &Zcaron;
+capital Z, dot above                 ¯    &amp;#379; --> &#379;    &amp;Zdot;   --> &Zdot;
+degree sign                          °    &amp;#176; --> &#176;    &amp;deg;    --> &deg;
+small a, ogonek                      ±    &amp;#261; --> &#261;    &amp;aogon;  --> &aogon;
+ogonek                              {²}  {&amp;#731;}-->{&#731;}  {&amp;ogon;}  -->{&ogon;}
+small l, stroke                      ³    &amp;#322; --> &#322;    &amp;lstrok; --> &lstrok;
+acute accent                         ´    &amp;#180; --> &#180;    &amp;acute;  --> &acute;
+small l, caron                       µ    &amp;#318; --> &#318;    &amp;lcaron; --> &lcaron;
+small s, acute accent                ¶    &amp;#347; --> &#347;    &amp;sacute; --> &sacute;
+caron                               {·}  {&amp;#711;}-->{&#711;}  {&amp;caron;} -->{&caron;}
+cedilla                              ¸    &amp;#184; --> &#184;    &amp;cedil;  --> &cedil;
+small s, caron                       ¹    &amp;#353; --> &#353;    &amp;scaron; --> &scaron;
+small s, cedilla                     º    &amp;#351; --> &#351;    &amp;scedil; --> &scedil;
+small t, caron                       »    &amp;#357; --> &#357;    &amp;tcaron; --> &tcaron;
+small z, acute accent                ¼    &amp;#378; --> &#378;    &amp;zacute; --> &zacute;
+double acute accent                 {½}  {&amp;#733;}-->{&#733;}  {&amp;dblac;} -->{&dblac;}
+small z, caron                       ¾    &amp;#382; --> &#382;    &amp;zcaron; --> &zcaron;
+small z, dot above                   ¿    &amp;#380; --> &#380;    &amp;zdot;   --> &zdot;  
+capital R, acute accent              À    &amp;#340; --> &#340;    &amp;Racute; --> &Racute;
+capital A, acute accent              Á    &amp;#193; --> &#193;    &amp;Aacute; --> &Aacute;
+capital A, circumflex accent         Â    &amp;#194; --> &#194;    &amp;Acirc;  --> &Acirc;
+capital A, breve                     Ã    &amp;#258; --> &#258;    &amp;Abreve; --> &Abreve;
+capital A, dieresis or umlaut mark   Ä    &amp;#196; --> &#196;    &amp;Auml;   --> &Auml;
+capital L, acute accent              Å    &amp;#313; --> &#313;    &amp;Lacute; --> &Lacute;
+capital C, acute accent              Æ    &amp;#262; --> &#262;    &amp;Cacute; --> &Cacute;
+capital C, cedilla                   Ç    &amp;#199; --> &#199;    &amp;Ccedil; --> &Ccedil;
+capital C, caron                     È    &amp;#268; --> &#268;    &amp;Ccaron; --> &Ccaron;
+capital E, acute accent              É    &amp;#201; --> &#201;    &amp;Eacute; --> &Eacute;
+capital E, ogonek                    Ê    &amp;#280; --> &#280;    &amp;Eogon;  --> &Eogon;
+capital E, dieresis or umlaut mark   Ë    &amp;#203; --> &#203;    &amp;Euml;   --> &Euml;
+capital E, caron                     Ì    &amp;#282; --> &#282;    &amp;Ecaron; --> &Ecaron;
+capital I, acute accent              Í    &amp;#205; --> &#205;    &amp;Iacute; --> &Iacute;
+capital I, circumflex accent         Î    &amp;#206; --> &#206;    &amp;Icirc;  --> &Icirc;
+capital D, caron                     Ï    &amp;#270; --> &#270;    &amp;Dcaron; --> &Dcaron;
+capital D, stroke                    Ð    &amp;#272; --> &#272;    &amp;Dstrok; --> &Dstrok;
+capital Eth, Icelandic               N/A  &amp;#208; --> &#208;    &amp;ETH;    --> &ETH;
+capital N, acute accent              Ñ    &amp;#323; --> &#323;    &amp;Nacute; --> &Nacute;
+capital N, caron                     Ò    &amp;#327; --> &#327;    &amp;Ncaron; --> &Ncaron;
+capital O, acute accent              Ó    &amp;#211; --> &#211;    &amp;Oacute; --> &Oacute;
+capital O, circumflex accent         Ô    &amp;#212; --> &#212;    &amp;Ocirc;  --> &Ocirc;
+capital O, double acute accent       Õ    &amp;#368; --> &#368;    &amp;Odblac; --> &Odblac;
+capital O, dieresis or umlaut mark   Ö    &amp;#214; --> &#214;    &amp;Ouml;   --> &Ouml;
+multiply sign                        ×    &amp;#215; --> &#215;    &amp;times;  --> &times;
+capital R, caron                     Ø    &amp;#344; --> &#344;    &amp;Rcaron; --> &Rcaron;
+capital U, ring                      Ù    &amp;#366; --> &#366;    &amp;Uring;  --> &Uring;
+capital U, acute accent              Ú    &amp;#218; --> &#218;    &amp;Uacute; --> &Uacute;
+capital U, double acute accent       Û    &amp;#368; --> &#368;    &amp;Udblac; --> &Udblac;
+capital U, dieresis or umlaut mark   Ü    &amp;#220; --> &#220;    &amp;Uuml;   --> &Uuml;
+capital Y, acute accent              Ý    &amp;#221; --> &#221;    &amp;Yacute; --> &Yacute;
+capital T, cedilla                   Þ    &amp;#354; --> &#354;    &amp;Tcedil; --> &Tcedil;
+small sharp s, German (sz ligature)  ß    &amp;#223; --> &#223;    &amp;szlig;  --> &szlig;
+small r, acute accent                à    &amp;#341; --> &#341;    &amp;racute; --> &racute;
+small a, acute accent                á    &amp;#225; --> &#225;    &amp;aacute; --> &aacute;
+small a, circumflex accent           â    &amp;#226; --> &#226;    &amp;acirc;  --> &acirc;
+small a, breve                       ã    &amp;#259; --> &#259;    &amp;abreve; --> &abreve;
+small a, dieresis or umlaut mark     ä    &amp;#228; --> &#228;    &amp;auml;   --> &auml;
+small l, acute accent                å    &amp;#314; --> &#314;    &amp;lacute; --> &lacute;
+small c, acute accent                æ    &amp;#263; --> &#263;    &amp;cacute; --> &cacute;
+small c, cedilla                     ç    &amp;#231; --> &#231;    &amp;ccedil; --> &ccedil;
+small c, caron                       è    &amp;#269; --> &#269;    &amp;ccaron; --> &ccaron;
+small e, acute accent                é    &amp;#233; --> &#233;    &amp;eacute; --> &eacute;
+small e, ogonek                      ê    &amp;#281; --> &#281;    &amp;eogon;  --> &eogon;
+small e, dieresis or umlaut mark     ë    &amp;#235; --> &#235;    &amp;euml;   --> &euml;
+small e, caron                       ì    &amp;#283; --> &#283;    &amp;ecaron; --> &ecaron;
+small i, acute accent                í    &amp;#237; --> &#237;    &amp;iacute; --> &iacute;
+small i, circumflex accent           î    &amp;#238; --> &#238;    &amp;icirc;  --> &icirc;
+small d, caron                       ï    &amp;#271; --> &#271;    &amp;dcaron; --> &dcaron;
+small d, stroke                      ð    &amp;#273; --> &#273;    &amp;dstrok; --> &dstrok;
+small eth, Icelandic                 N/A  &amp;#240; --> &#240;    &amp;eth;    --> &eth;
+small n, acute accent                ñ    &amp;#324; --> &#324;    &amp;nacute; --> &nacute;
+small n, caron                       ò    &amp;#328; --> &#328;    &amp;ncaron; --> &ncaron;
+small o, acute accent                ó    &amp;#243; --> &#243;    &amp;oacute; --> &oacute;
+small o, circumflex accent           ô    &amp;#244; --> &#244;    &amp;ocirc;  --> &ocirc;
+small o, double acute accent         õ    &amp;#369; --> &#369;    &amp;odblac; --> &odblac;
+small o, dieresis or umlaut mark     ö    &amp;#246; --> &#246;    &amp;ouml;   --> &ouml;
+division sign                        ÷    &amp;#247; --> &#247;    &amp;divide; --> &divide;
+small r, caron                       ø    &amp;#345; --> &#345;    &amp;rcaron; --> &rcaron;
+small u, ring                        ù    &amp;#367; --> &#367;    &amp;uring;  --> &uring;
+small u, acute accent                ú    &amp;#250; --> &#250;    &amp;uacute; --> &uacute;
+small u, double acute accent         û    &amp;#369; --> &#369;    &amp;udblac; --> &udblac;
+small u, dieresis or umlaut mark     ü    &amp;#252; --> &#252;    &amp;uuml;   --> &uuml;
+small y, acute accent                ý    &amp;#253; --> &#253;    &amp;yacute; --> &yacute;
+small t, cedilla                     þ    &amp;#355; --> &#355;    &amp;tcedil; --> &tcedil;
+dot above                           {ÿ}  {&amp;#729;}-->{&#729;}  {&amp;dot;}   -->{&dot;}
+
+Some other characters of interest    Char Code            Entity name   
+===================================  ==== ============    ==============
+capital AE diphthong (ligature)      N/A  &amp;#198; --> &#198;    &amp;AElig;  --> &AElig;
+small ae diphthong (ligature)        N/A  &amp;#230; --> &#230;    &amp;aelig;  --> &aelig;
+capital OE ligature                  N/A {&amp;#338;}-->{&#338;}  {&amp;OElig;} -->{&OElig;}
+small oe ligature                    N/A {&amp;#339;}-->{&#339;}  {&amp;oelig;} -->{&oelig;}
+copyright                            N/A  &amp;#169; --> &#169;    &amp;copy;   --> &copy;
+registered trademark                 N/A  &amp;#174; --> &#174;    &amp;reg;    --> &reg;
+trademark sign                       N/A  &amp;#8482;--> &#8482;   &amp;trade;  --> &trade;
+em space                             N/A [&amp;#8195;]->[&#8195;] [&amp;emsp;]  -->[&emsp;]
+en space                             N/A [&amp;#8194;]->[&#8194;] [&amp;ensp;]  -->[&ensp;]
+1/3-em space                         N/A [&amp;#8196;]->[&#8196;] [&amp;emsp13;] -->[&emsp13;]
+1/4-em space                         N/A [&amp;#8197;]->[&#8197;] [&amp;emsp14;] -->[&emsp14;]
+thin space                           N/A [&amp;#8201;]->[&#8201;] [&amp;thinsp;]-->[&thinsp;]
+hair space                           N/A [&amp;#8202;]->[&#8202;] [&amp;hairsp;]-->[&hairsp;]
+em dash                              N/A [&amp;#8212;]->[&#8212;] [&amp;mdash;] -->[&mdash;]
+en dash                              N/A [&amp;#8211;]->[&#8211;] [&amp;ndash;] -->[&ndash;]
+
+</PRE><!-- </PRE> no HotJava preBeta hackx - kw -->
+<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 -->
+<HR>
+<P>
+Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column.
+Some characters for which I could not find entity names in either 
+<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A>
+or the 
+<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A> 
+sets (the ones included by Peter Flynn's
+<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>)
+are shown enclosed in <TT>{</TT>braces<TT>}</TT>.
+</P>
+<P>
+There also is a variation of this table which tests
+<A HREF="ALT88592.html">ISO-8859-2 characters and entities in ALT attributes</A>.
+</P>
+<P>
+See Martin Ramsch's original
+<A CHARSET="iso-8859-1" HREF="http://www.uni-passau.de/~ramsch/iso8859-1.html">ISO-8859-1 Table</A>
+for related info and links, and for some notes on entity names.  
+This file is mostly just an adaptation of his table 
+to the ISO-8859-2 character set.
+</P>
+<HR>
+
+<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS>
+
+</BODY>
+</HTML>
diff --git a/test/iso-8859-2a.html b/test/iso-8859-2a.html
new file mode 100644
index 00000000..ffaa1cbd
--- /dev/null
+++ b/test/iso-8859-2a.html
@@ -0,0 +1,208 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<!-- X-URL: http://www.uni-passau.de/~ramsch/iso8859-1.html -->
+<HTML>
+<HEAD>
+<TITLE>Martin Ramsch's character table modified and enhanced for iso8859-2</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+<LINK REL="sibling" HREF="iso8859-1.html" TITLE="iso-8859-1 test">
+<LINK REL="sibling" HREF="ALT88592.html"  TITLE="iso-8859-2 ALT test">
+</HEAD>
+
+<BODY> 
+
+<H1 ALIGN=center>iso8859-2 plus table, and cp-1252</H1> 
+
+<PRE>
+Description                               Code            Entity name   
+===================================       ============    ==============
+quotation mark                            &amp;#34;  --> &#34;     &amp;quot;   --> &quot;
+ampersand                                 &amp;#38;  --> &#38;     &amp;amp;    --> &amp;
+less-than sign                            &amp;#60;  --> &#60;     &amp;lt;     --> &lt;
+greater-than sign                         &amp;#62;  --> &#62;     &amp;gt;     --> &gt;
+
+Description                          Char Code            Entity name   
+===================================  ==== ============    ==============
+euro sign                              €    &amp;128; --> &#128;
+undefined                                  &amp;129; --> &#129;
+single low-9 quotation mark            ‚    &amp;130; --> &#130;
+latin small letter f with hook         ƒ    &amp;131; --> &#131;
+double low-9 quotation mark            „    &amp;132; --> &#132;
+horizontal ellipsis                    …    &amp;133; --> &#133;
+dagger                                 †    &amp;134; --> &#134;
+double dagger                          ‡    &amp;135; --> &#135;
+modifier letter circumflex accent      ˆ    &amp;136; --> &#136;
+per mille sign                         ‰    &amp;137; --> &#137;
+latin capital letter s with caron      Š    &amp;138; --> &#138;
+single left-pointing angle quote mark  ‹    &amp;139; --> &#139;
+latin capital ligature oe              Œ    &amp;140; --> &#140;
+undefined                                  &amp;141; --> &#141;
+latin capital letter z with caron      Ž    &amp;142; --> &#142;
+undefined                                  &amp;143; --> &#143;
+
+undefined                                  &amp;144; --> &#144;
+left single quotation mark             ‘    &amp;145; --> &#145;
+right single quotation mark            ’    &amp;146; --> &#146;
+left double quotation mark             “    &amp;147; --> &#147;
+right double quotation mark            ”    &amp;148; --> &#148;
+bullet                                 •    &amp;149; --> &#149;
+en dash                                –    &amp;150; --> &#150;
+em dash                                —    &amp;151; --> &#151;
+small tilde                            ˜    &amp;152; --> &#152;
+trade mark sign                        ™    &amp;153; --> &#153;
+latin small letter s with caron        š    &amp;154; --> &#154;
+single right-pointing angle quote mark ›    &amp;155; --> &#155;
+latin small ligature oe                œ    &amp;156; --> &#156;
+undefined                                  &amp;157; --> &#157;
+latin small letter z with caron        ž    &amp;158; --> &#158;
+latin capital letter y with diaeresis  Ÿ    &amp;159; --> &#159;
+
+non-breaking space                        &amp;#160; --> &#160;    &amp;nbsp;   --> &nbsp;
+capital A, ogonek                    ¡    &amp;#260; --> &#260;    &amp;Aogon;  --> &Aogon;
+breve                               {¢}  {&amp;#728;}-->{&#728;}  {&amp;breve;} -->{&breve;}
+capital L, stroke                    £    &amp;#321; --> &#321;    &amp;Lstrok; --> &Lstrok;
+general currency sign                ¤    &amp;#164; --> &#164;    &amp;curren; --> &curren;
+capital L, caron                     ¥    &amp;#317; --> &#317;    &amp;Lcaron; --> &Lcaron;
+capital S, acute accent              ¦    &amp;#346; --> &#346;    &amp;Sacute; --> &Sacute;
+section sign                         §    &amp;#167; --> &#167;    &amp;sect;   --> &sect;
+umlaut (dieresis)                    ¨    &amp;#168; --> &#168;    &amp;uml;    --> &uml;
+                                                          &amp;die;    --> &die;
+capital S, caron                     ©    &amp;#352; --> &#352;    &amp;Scaron; --> &Scaron;
+capital S, cedilla                   ª    &amp;#350; --> &#350;    &amp;Scedil; --> &Scedil;
+capital T, caron                     «    &amp;#356; --> &#356;    &amp;Tcaron; --> &Tcaron;
+capital Z, acute accent              ¬    &amp;#377; --> &#377;    &amp;Zacute; --> &Zacute;
+soft hyphen                         [­]  [&amp;#173;]-->[&#173;]  [&amp;shy;]   -->[&shy;]
+capital Z, caron                     ®    &amp;#381; --> &#381;    &amp;Zcaron; --> &Zcaron;
+capital Z, dot above                 ¯    &amp;#379; --> &#379;    &amp;Zdot;   --> &Zdot;
+degree sign                          °    &amp;#176; --> &#176;    &amp;deg;    --> &deg;
+small a, ogonek                      ±    &amp;#261; --> &#261;    &amp;aogon;  --> &aogon;
+ogonek                              {²}  {&amp;#731;}-->{&#731;}  {&amp;ogon;}  -->{&ogon;}
+small l, stroke                      ³    &amp;#322; --> &#322;    &amp;lstrok; --> &lstrok;
+acute accent                         ´    &amp;#180; --> &#180;    &amp;acute;  --> &acute;
+small l, caron                       µ    &amp;#318; --> &#318;    &amp;lcaron; --> &lcaron;
+small s, acute accent                ¶    &amp;#347; --> &#347;    &amp;sacute; --> &sacute;
+caron                               {·}  {&amp;#711;}-->{&#711;}  {&amp;caron;} -->{&caron;}
+cedilla                              ¸    &amp;#184; --> &#184;    &amp;cedil;  --> &cedil;
+small s, caron                       ¹    &amp;#353; --> &#353;    &amp;scaron; --> &scaron;
+small s, cedilla                     º    &amp;#351; --> &#351;    &amp;scedil; --> &scedil;
+small t, caron                       »    &amp;#357; --> &#357;    &amp;tcaron; --> &tcaron;
+small z, acute accent                ¼    &amp;#378; --> &#378;    &amp;zacute; --> &zacute;
+double acute accent                 {½}  {&amp;#733;}-->{&#733;}  {&amp;dblac;} -->{&dblac;}
+small z, caron                       ¾    &amp;#382; --> &#382;    &amp;zcaron; --> &zcaron;
+small z, dot above                   ¿    &amp;#380; --> &#380;    &amp;zdot;   --> &zdot;  
+capital R, acute accent              À    &amp;#340; --> &#340;    &amp;Racute; --> &Racute;
+capital A, acute accent              Á    &amp;#193; --> &#193;    &amp;Aacute; --> &Aacute;
+capital A, circumflex accent         Â    &amp;#194; --> &#194;    &amp;Acirc;  --> &Acirc;
+capital A, breve                     Ã    &amp;#258; --> &#258;    &amp;Abreve; --> &Abreve;
+capital A, dieresis or umlaut mark   Ä    &amp;#196; --> &#196;    &amp;Auml;   --> &Auml;
+capital L, acute accent              Å    &amp;#313; --> &#313;    &amp;Lacute; --> &Lacute;
+capital C, acute accent              Æ    &amp;#262; --> &#262;    &amp;Cacute; --> &Cacute;
+capital C, cedilla                   Ç    &amp;#199; --> &#199;    &amp;Ccedil; --> &Ccedil;
+capital C, caron                     È    &amp;#268; --> &#268;    &amp;Ccaron; --> &Ccaron;
+capital E, acute accent              É    &amp;#201; --> &#201;    &amp;Eacute; --> &Eacute;
+capital E, ogonek                    Ê    &amp;#280; --> &#280;    &amp;Eogon;  --> &Eogon;
+capital E, dieresis or umlaut mark   Ë    &amp;#203; --> &#203;    &amp;Euml;   --> &Euml;
+capital E, caron                     Ì    &amp;#282; --> &#282;    &amp;Ecaron; --> &Ecaron;
+capital I, acute accent              Í    &amp;#205; --> &#205;    &amp;Iacute; --> &Iacute;
+capital I, circumflex accent         Î    &amp;#206; --> &#206;    &amp;Icirc;  --> &Icirc;
+capital D, caron                     Ï    &amp;#270; --> &#270;    &amp;Dcaron; --> &Dcaron;
+capital D, stroke                    Ð    &amp;#272; --> &#272;    &amp;Dstrok; --> &Dstrok;
+capital Eth, Icelandic               N/A  &amp;#208; --> &#208;    &amp;ETH;    --> &ETH;
+capital N, acute accent              Ñ    &amp;#323; --> &#323;    &amp;Nacute; --> &Nacute;
+capital N, caron                     Ò    &amp;#327; --> &#327;    &amp;Ncaron; --> &Ncaron;
+capital O, acute accent              Ó    &amp;#211; --> &#211;    &amp;Oacute; --> &Oacute;
+capital O, circumflex accent         Ô    &amp;#212; --> &#212;    &amp;Ocirc;  --> &Ocirc;
+capital O, double acute accent       Õ    &amp;#368; --> &#368;    &amp;Odblac; --> &Odblac;
+capital O, dieresis or umlaut mark   Ö    &amp;#214; --> &#214;    &amp;Ouml;   --> &Ouml;
+multiply sign                        ×    &amp;#215; --> &#215;    &amp;times;  --> &times;
+capital R, caron                     Ø    &amp;#344; --> &#344;    &amp;Rcaron; --> &Rcaron;
+capital U, ring                      Ù    &amp;#366; --> &#366;    &amp;Uring;  --> &Uring;
+capital U, acute accent              Ú    &amp;#218; --> &#218;    &amp;Uacute; --> &Uacute;
+capital U, double acute accent       Û    &amp;#368; --> &#368;    &amp;Udblac; --> &Udblac;
+capital U, dieresis or umlaut mark   Ü    &amp;#220; --> &#220;    &amp;Uuml;   --> &Uuml;
+capital Y, acute accent              Ý    &amp;#221; --> &#221;    &amp;Yacute; --> &Yacute;
+capital T, cedilla                   Þ    &amp;#354; --> &#354;    &amp;Tcedil; --> &Tcedil;
+small sharp s, German (sz ligature)  ß    &amp;#223; --> &#223;    &amp;szlig;  --> &szlig;
+small r, acute accent                à    &amp;#341; --> &#341;    &amp;racute; --> &racute;
+small a, acute accent                á    &amp;#225; --> &#225;    &amp;aacute; --> &aacute;
+small a, circumflex accent           â    &amp;#226; --> &#226;    &amp;acirc;  --> &acirc;
+small a, breve                       ã    &amp;#259; --> &#259;    &amp;abreve; --> &abreve;
+small a, dieresis or umlaut mark     ä    &amp;#228; --> &#228;    &amp;auml;   --> &auml;
+small l, acute accent                å    &amp;#314; --> &#314;    &amp;lacute; --> &lacute;
+small c, acute accent                æ    &amp;#263; --> &#263;    &amp;cacute; --> &cacute;
+small c, cedilla                     ç    &amp;#231; --> &#231;    &amp;ccedil; --> &ccedil;
+small c, caron                       è    &amp;#269; --> &#269;    &amp;ccaron; --> &ccaron;
+small e, acute accent                é    &amp;#233; --> &#233;    &amp;eacute; --> &eacute;
+small e, ogonek                      ê    &amp;#281; --> &#281;    &amp;eogon;  --> &eogon;
+small e, dieresis or umlaut mark     ë    &amp;#235; --> &#235;    &amp;euml;   --> &euml;
+small e, caron                       ì    &amp;#283; --> &#283;    &amp;ecaron; --> &ecaron;
+small i, acute accent                í    &amp;#237; --> &#237;    &amp;iacute; --> &iacute;
+small i, circumflex accent           î    &amp;#238; --> &#238;    &amp;icirc;  --> &icirc;
+small d, caron                       ï    &amp;#271; --> &#271;    &amp;dcaron; --> &dcaron;
+small d, stroke                      ð    &amp;#273; --> &#273;    &amp;dstrok; --> &dstrok;
+small eth, Icelandic                 N/A  &amp;#240; --> &#240;    &amp;eth;    --> &eth;
+small n, acute accent                ñ    &amp;#324; --> &#324;    &amp;nacute; --> &nacute;
+small n, caron                       ò    &amp;#328; --> &#328;    &amp;ncaron; --> &ncaron;
+small o, acute accent                ó    &amp;#243; --> &#243;    &amp;oacute; --> &oacute;
+small o, circumflex accent           ô    &amp;#244; --> &#244;    &amp;ocirc;  --> &ocirc;
+small o, double acute accent         õ    &amp;#369; --> &#369;    &amp;odblac; --> &odblac;
+small o, dieresis or umlaut mark     ö    &amp;#246; --> &#246;    &amp;ouml;   --> &ouml;
+division sign                        ÷    &amp;#247; --> &#247;    &amp;divide; --> &divide;
+small r, caron                       ø    &amp;#345; --> &#345;    &amp;rcaron; --> &rcaron;
+small u, ring                        ù    &amp;#367; --> &#367;    &amp;uring;  --> &uring;
+small u, acute accent                ú    &amp;#250; --> &#250;    &amp;uacute; --> &uacute;
+small u, double acute accent         û    &amp;#369; --> &#369;    &amp;udblac; --> &udblac;
+small u, dieresis or umlaut mark     ü    &amp;#252; --> &#252;    &amp;uuml;   --> &uuml;
+small y, acute accent                ý    &amp;#253; --> &#253;    &amp;yacute; --> &yacute;
+small t, cedilla                     þ    &amp;#355; --> &#355;    &amp;tcedil; --> &tcedil;
+dot above                           {ÿ}  {&amp;#729;}-->{&#729;}  {&amp;dot;}   -->{&dot;}
+
+Some other characters of interest    Char Code            Entity name   
+===================================  ==== ============    ==============
+capital AE diphthong (ligature)      N/A  &amp;#198; --> &#198;    &amp;AElig;  --> &AElig;
+small ae diphthong (ligature)        N/A  &amp;#230; --> &#230;    &amp;aelig;  --> &aelig;
+capital OE ligature                  N/A {&amp;#338;}-->{&#338;}  {&amp;OElig;} -->{&OElig;}
+small oe ligature                    N/A {&amp;#339;}-->{&#339;}  {&amp;oelig;} -->{&oelig;}
+copyright                            N/A  &amp;#169; --> &#169;    &amp;copy;   --> &copy;
+registered trademark                 N/A  &amp;#174; --> &#174;    &amp;reg;    --> &reg;
+trademark sign                       N/A  &amp;#8482;--> &#8482;   &amp;trade;  --> &trade;
+em space                             N/A [&amp;#8195;]->[&#8195;] [&amp;emsp;]  -->[&emsp;]
+en space                             N/A [&amp;#8194;]->[&#8194;] [&amp;ensp;]  -->[&ensp;]
+1/3-em space                         N/A [&amp;#8196;]->[&#8196;] [&amp;emsp13;] -->[&emsp13;]
+1/4-em space                         N/A [&amp;#8197;]->[&#8197;] [&amp;emsp14;] -->[&emsp14;]
+thin space                           N/A [&amp;#8201;]->[&#8201;] [&amp;thinsp;]-->[&thinsp;]
+hair space                           N/A [&amp;#8202;]->[&#8202;] [&amp;hairsp;]-->[&hairsp;]
+em dash                              N/A [&amp;#8212;]->[&#8212;] [&amp;mdash;] -->[&mdash;]
+en dash                              N/A [&amp;#8211;]->[&#8211;] [&amp;ndash;] -->[&ndash;]
+
+</PRE><!-- </PRE> no HotJava preBeta hackx - kw -->
+<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 -->
+<HR>
+<P>
+Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column.
+Some characters for which I could not find entity names in either 
+<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A>
+or the 
+<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A> 
+sets (the ones included by Peter Flynn's
+<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>)
+are shown enclosed in <TT>{</TT>braces<TT>}</TT>.
+</P>
+<P>
+There also is a variation of this table which tests
+<A HREF="ALT88592.html">ISO-8859-2 characters and entities in ALT attributes</A>.
+</P>
+<P>
+See Martin Ramsch's original
+<A CHARSET="iso-8859-1" HREF="http://www.uni-passau.de/~ramsch/iso8859-1.html">ISO-8859-1 Table</A>
+for related info and links, and for some notes on entity names.  
+This file is mostly just an adaptation of his table 
+to the ISO-8859-2 character set.
+</P>
+<HR>
+
+<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS>
+
+</BODY>
+</HTML>
diff --git a/test/koi8-r.html b/test/koi8-r.html
new file mode 100644
index 00000000..d6792195
--- /dev/null
+++ b/test/koi8-r.html
@@ -0,0 +1,321 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Test of the KOI8-R symbols</TITLE>
+</HEAD>
+<BODY>
+<PRE>
+
+    This table prepared from KOI8-R.TXT available at ftp.unicode.org
+
+         ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
+         (if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC)
+
+
+original comment:
+
+#
+#       Name:             KOI8-R (RFC1489) to Unicode
+#       Unicode version:  3.0
+#       Table version:    1.0
+#       Table format:     Format A
+#       Date:             18 August 1999
+#       Authors:          Helmut Richter &lt;richter@lrz.de&gt;
+#
+#       Copyright (c) 1991-1999 Unicode, Inc.  All Rights reserved.
+#
+#       This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+#       No claims are made as to fitness for any particular purpose.  No
+#       warranties of any kind are expressed or implied.  The recipient
+#       agrees to determine applicability of information provided.  If this
+#       file has been provided on optical media by Unicode, Inc., the sole
+#       remedy for any claim will be exchange of defective media within 90
+#       days of receipt.
+#
+#       Unicode, Inc. hereby grants the right to freely use the information
+#       supplied in this file in the creation of products supporting the
+#       Unicode Standard, and to make copies of this file in any form for
+#       internal or external distribution as long as this notice remains
+#       attached.
+#
+#       General notes:
+#
+#       This table contains the data the Unicode Consortium has on how
+#       KOI8-R characters map into Unicode. The underlying document is the
+#       mapping described in RFC 1489. No statements are made as to whether
+#       this mapping is the same as the mapping defined as "Code Page 878"
+#       with some vendors.
+#
+#       Format:  Three tab-separated columns
+#                Column #1 is the KOI8-R code (in hex as 0xXX)
+#                Column #2 is the Unicode (in hex as 0xXXXX)
+#                Column #3 the Unicode name (follows a comment sign, '#')
+#
+#       The entries are in KOI8-R order.
+#
+#       Version history
+#       1.0 version: created.
+#
+#       Any comments or problems, contact &lt;errata@unicode.org&gt;
+#       Please note that &lt;errata@unicode.org&gt; is an archival address;
+#       notices will be checked, but do not expect an immediate response.
+#
+0x00    0x0000 "&#x0000"	  # NULL
+0x01    0x0001 "&#x0001"	  # START OF HEADING
+0x02    0x0002 "&#x0002"	  # START OF TEXT
+0x03    0x0003 "&#x0003"	  # END OF TEXT
+0x04    0x0004 "&#x0004"	  # END OF TRANSMISSION
+0x05    0x0005 "&#x0005"	  # ENQUIRY
+0x06    0x0006 "&#x0006"	  # ACKNOWLEDGE
+0x07    0x0007 "&#x0007"	  # BELL
+0x08    0x0008 "&#x0008"	  # BACKSPACE
+0x09    0x0009 "&#x0009"	  # HORIZONTAL TABULATION
+0x0A    0x000A "&#x000A"	  # LINE FEED
+0x0B    0x000B "&#x000B"	  # VERTICAL TABULATION
+0x0C    0x000C "&#x000C"	  # FORM FEED
+0x0D    0x000D "&#x000D"	  # CARRIAGE RETURN
+0x0E    0x000E "&#x000E"	  # SHIFT OUT
+0x0F    0x000F "&#x000F"	  # SHIFT IN
+0x10    0x0010 "&#x0010"	  # DATA LINK ESCAPE
+0x11    0x0011 "&#x0011"	  # DEVICE CONTROL ONE
+0x12    0x0012 "&#x0012"	  # DEVICE CONTROL TWO
+0x13    0x0013 "&#x0013"	  # DEVICE CONTROL THREE
+0x14    0x0014 "&#x0014"	  # DEVICE CONTROL FOUR
+0x15    0x0015 "&#x0015"	  # NEGATIVE ACKNOWLEDGE
+0x16    0x0016 "&#x0016"	  # SYNCHRONOUS IDLE
+0x17    0x0017 "&#x0017"	  # END OF TRANSMISSION BLOCK
+0x18    0x0018 "&#x0018"	  # CANCEL
+0x19    0x0019 "&#x0019"	  # END OF MEDIUM
+0x1A    0x001A "&#x001A"	  # SUBSTITUTE
+0x1B    0x001B "&#x001B"	  # ESCAPE
+0x1C    0x001C "&#x001C"	  # FILE SEPARATOR
+0x1D    0x001D "&#x001D"	  # GROUP SEPARATOR
+0x1E    0x001E "&#x001E"	  # RECORD SEPARATOR
+0x1F    0x001F "&#x001F"	  # UNIT SEPARATOR
+0x20    0x0020 "&#x0020"	  # SPACE
+0x21    0x0021 "&#x0021"	  # EXCLAMATION MARK
+0x22    0x0022 "&#x0022"	  # QUOTATION MARK
+0x23    0x0023 "&#x0023"	  # NUMBER SIGN
+0x24    0x0024 "&#x0024"	  # DOLLAR SIGN
+0x25    0x0025 "&#x0025"	  # PERCENT SIGN
+0x26    0x0026 "&#x0026"	  # AMPERSAND
+0x27    0x0027 "&#x0027"	  # APOSTROPHE
+0x28    0x0028 "&#x0028"	  # LEFT PARENTHESIS
+0x29    0x0029 "&#x0029"	  # RIGHT PARENTHESIS
+0x2A    0x002A "&#x002A"	  # ASTERISK
+0x2B    0x002B "&#x002B"	  # PLUS SIGN
+0x2C    0x002C "&#x002C"	  # COMMA
+0x2D    0x002D "&#x002D"	  # HYPHEN-MINUS
+0x2E    0x002E "&#x002E"	  # FULL STOP
+0x2F    0x002F "&#x002F"	  # SOLIDUS
+0x30    0x0030 "&#x0030"	  # DIGIT ZERO
+0x31    0x0031 "&#x0031"	  # DIGIT ONE
+0x32    0x0032 "&#x0032"	  # DIGIT TWO
+0x33    0x0033 "&#x0033"	  # DIGIT THREE
+0x34    0x0034 "&#x0034"	  # DIGIT FOUR
+0x35    0x0035 "&#x0035"	  # DIGIT FIVE
+0x36    0x0036 "&#x0036"	  # DIGIT SIX
+0x37    0x0037 "&#x0037"	  # DIGIT SEVEN
+0x38    0x0038 "&#x0038"	  # DIGIT EIGHT
+0x39    0x0039 "&#x0039"	  # DIGIT NINE
+0x3A    0x003A "&#x003A"	  # COLON
+0x3B    0x003B "&#x003B"	  # SEMICOLON
+0x3C    0x003C "&#x003C"	  # LESS-THAN SIGN
+0x3D    0x003D "&#x003D"	  # EQUALS SIGN
+0x3E    0x003E "&#x003E"	  # GREATER-THAN SIGN
+0x3F    0x003F "&#x003F"	  # QUESTION MARK
+0x40    0x0040 "&#x0040"	  # COMMERCIAL AT
+0x41    0x0041 "&#x0041"	  # LATIN CAPITAL LETTER A
+0x42    0x0042 "&#x0042"	  # LATIN CAPITAL LETTER B
+0x43    0x0043 "&#x0043"	  # LATIN CAPITAL LETTER C
+0x44    0x0044 "&#x0044"	  # LATIN CAPITAL LETTER D
+0x45    0x0045 "&#x0045"	  # LATIN CAPITAL LETTER E
+0x46    0x0046 "&#x0046"	  # LATIN CAPITAL LETTER F
+0x47    0x0047 "&#x0047"	  # LATIN CAPITAL LETTER G
+0x48    0x0048 "&#x0048"	  # LATIN CAPITAL LETTER H
+0x49    0x0049 "&#x0049"	  # LATIN CAPITAL LETTER I
+0x4A    0x004A "&#x004A"	  # LATIN CAPITAL LETTER J
+0x4B    0x004B "&#x004B"	  # LATIN CAPITAL LETTER K
+0x4C    0x004C "&#x004C"	  # LATIN CAPITAL LETTER L
+0x4D    0x004D "&#x004D"	  # LATIN CAPITAL LETTER M
+0x4E    0x004E "&#x004E"	  # LATIN CAPITAL LETTER N
+0x4F    0x004F "&#x004F"	  # LATIN CAPITAL LETTER O
+0x50    0x0050 "&#x0050"	  # LATIN CAPITAL LETTER P
+0x51    0x0051 "&#x0051"	  # LATIN CAPITAL LETTER Q
+0x52    0x0052 "&#x0052"	  # LATIN CAPITAL LETTER R
+0x53    0x0053 "&#x0053"	  # LATIN CAPITAL LETTER S
+0x54    0x0054 "&#x0054"	  # LATIN CAPITAL LETTER T
+0x55    0x0055 "&#x0055"	  # LATIN CAPITAL LETTER U
+0x56    0x0056 "&#x0056"	  # LATIN CAPITAL LETTER V
+0x57    0x0057 "&#x0057"	  # LATIN CAPITAL LETTER W
+0x58    0x0058 "&#x0058"	  # LATIN CAPITAL LETTER X
+0x59    0x0059 "&#x0059"	  # LATIN CAPITAL LETTER Y
+0x5A    0x005A "&#x005A"	  # LATIN CAPITAL LETTER Z
+0x5B    0x005B "&#x005B"	  # LEFT SQUARE BRACKET
+0x5C    0x005C "&#x005C"	  # REVERSE SOLIDUS
+0x5D    0x005D "&#x005D"	  # RIGHT SQUARE BRACKET
+0x5E    0x005E "&#x005E"	  # CIRCUMFLEX ACCENT
+0x5F    0x005F "&#x005F"	  # LOW LINE
+0x60    0x0060 "&#x0060"	  # GRAVE ACCENT
+0x61    0x0061 "&#x0061"	  # LATIN SMALL LETTER A
+0x62    0x0062 "&#x0062"	  # LATIN SMALL LETTER B
+0x63    0x0063 "&#x0063"	  # LATIN SMALL LETTER C
+0x64    0x0064 "&#x0064"	  # LATIN SMALL LETTER D
+0x65    0x0065 "&#x0065"	  # LATIN SMALL LETTER E
+0x66    0x0066 "&#x0066"	  # LATIN SMALL LETTER F
+0x67    0x0067 "&#x0067"	  # LATIN SMALL LETTER G
+0x68    0x0068 "&#x0068"	  # LATIN SMALL LETTER H
+0x69    0x0069 "&#x0069"	  # LATIN SMALL LETTER I
+0x6A    0x006A "&#x006A"	  # LATIN SMALL LETTER J
+0x6B    0x006B "&#x006B"	  # LATIN SMALL LETTER K
+0x6C    0x006C "&#x006C"	  # LATIN SMALL LETTER L
+0x6D    0x006D "&#x006D"	  # LATIN SMALL LETTER M
+0x6E    0x006E "&#x006E"	  # LATIN SMALL LETTER N
+0x6F    0x006F "&#x006F"	  # LATIN SMALL LETTER O
+0x70    0x0070 "&#x0070"	  # LATIN SMALL LETTER P
+0x71    0x0071 "&#x0071"	  # LATIN SMALL LETTER Q
+0x72    0x0072 "&#x0072"	  # LATIN SMALL LETTER R
+0x73    0x0073 "&#x0073"	  # LATIN SMALL LETTER S
+0x74    0x0074 "&#x0074"	  # LATIN SMALL LETTER T
+0x75    0x0075 "&#x0075"	  # LATIN SMALL LETTER U
+0x76    0x0076 "&#x0076"	  # LATIN SMALL LETTER V
+0x77    0x0077 "&#x0077"	  # LATIN SMALL LETTER W
+0x78    0x0078 "&#x0078"	  # LATIN SMALL LETTER X
+0x79    0x0079 "&#x0079"	  # LATIN SMALL LETTER Y
+0x7A    0x007A "&#x007A"	  # LATIN SMALL LETTER Z
+0x7B    0x007B "&#x007B"	  # LEFT CURLY BRACKET
+0x7C    0x007C "&#x007C"	  # VERTICAL LINE
+0x7D    0x007D "&#x007D"	  # RIGHT CURLY BRACKET
+0x7E    0x007E "&#x007E"	  # TILDE
+0x7F    0x007F "&#x007F"	  # DELETE
+0x80    0x2500 "&#x2500"	  # BOX DRAWINGS LIGHT HORIZONTAL
+0x81    0x2502 "&#x2502"	  # BOX DRAWINGS LIGHT VERTICAL
+0x82    0x250C "&#x250C"	  # BOX DRAWINGS LIGHT DOWN AND RIGHT
+0x83    0x2510 "&#x2510"	  # BOX DRAWINGS LIGHT DOWN AND LEFT
+0x84    0x2514 "&#x2514"	  # BOX DRAWINGS LIGHT UP AND RIGHT
+0x85    0x2518 "&#x2518"	  # BOX DRAWINGS LIGHT UP AND LEFT
+0x86    0x251C "&#x251C"	  # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+0x87    0x2524 "&#x2524"	  # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+0x88    0x252C "&#x252C"	  # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+0x89    0x2534 "&#x2534"	  # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+0x8A    0x253C "&#x253C"	  # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+0x8B    0x2580 "&#x2580"	  # UPPER HALF BLOCK
+0x8C    0x2584 "&#x2584"	  # LOWER HALF BLOCK
+0x8D    0x2588 "&#x2588"	  # FULL BLOCK
+0x8E    0x258C "&#x258C"	  # LEFT HALF BLOCK
+0x8F    0x2590 "&#x2590"	  # RIGHT HALF BLOCK
+0x90    0x2591 "&#x2591"	  # LIGHT SHADE
+0x91    0x2592 "&#x2592"	  # MEDIUM SHADE
+0x92    0x2593 "&#x2593"	  # DARK SHADE
+0x93    0x2320 "&#x2320"	  # TOP HALF INTEGRAL
+0x94    0x25A0 "&#x25A0"	  # BLACK SQUARE
+0x95    0x2219 "&#x2219"	  # BULLET OPERATOR
+0x96    0x221A "&#x221A"	  # SQUARE ROOT
+0x97    0x2248 "&#x2248"	  # ALMOST EQUAL TO
+0x98    0x2264 "&#x2264"	  # LESS-THAN OR EQUAL TO
+0x99    0x2265 "&#x2265"	  # GREATER-THAN OR EQUAL TO
+0x9A    0x00A0 "&#x00A0"	  # NO-BREAK SPACE
+0x9B    0x2321 "&#x2321"	  # BOTTOM HALF INTEGRAL
+0x9C    0x00B0 "&#x00B0"	  # DEGREE SIGN
+0x9D    0x00B2 "&#x00B2"	  # SUPERSCRIPT TWO
+0x9E    0x00B7 "&#x00B7"	  # MIDDLE DOT
+0x9F    0x00F7 "&#x00F7"	  # DIVISION SIGN
+0xA0    0x2550 "&#x2550"	  # BOX DRAWINGS DOUBLE HORIZONTAL
+0xA1    0x2551 "&#x2551"	  # BOX DRAWINGS DOUBLE VERTICAL
+0xA2    0x2552 "&#x2552"	  # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+0xA3    0x0451 "&#x0451"	  # CYRILLIC SMALL LETTER IO
+0xA4    0x2553 "&#x2553"	  # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+0xA5    0x2554 "&#x2554"	  # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+0xA6    0x2555 "&#x2555"	  # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+0xA7    0x2556 "&#x2556"	  # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+0xA8    0x2557 "&#x2557"	  # BOX DRAWINGS DOUBLE DOWN AND LEFT
+0xA9    0x2558 "&#x2558"	  # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+0xAA    0x2559 "&#x2559"	  # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+0xAB    0x255A "&#x255A"	  # BOX DRAWINGS DOUBLE UP AND RIGHT
+0xAC    0x255B "&#x255B"	  # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+0xAD    0x255C "&#x255C"	  # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+0xAE    0x255D "&#x255D"	  # BOX DRAWINGS DOUBLE UP AND LEFT
+0xAF    0x255E "&#x255E"	  # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+0xB0    0x255F "&#x255F"	  # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+0xB1    0x2560 "&#x2560"	  # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+0xB2    0x2561 "&#x2561"	  # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+0xB3    0x0401 "&#x0401"	  # CYRILLIC CAPITAL LETTER IO
+0xB4    0x2562 "&#x2562"	  # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+0xB5    0x2563 "&#x2563"	  # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+0xB6    0x2564 "&#x2564"	  # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+0xB7    0x2565 "&#x2565"	  # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+0xB8    0x2566 "&#x2566"	  # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+0xB9    0x2567 "&#x2567"	  # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+0xBA    0x2568 "&#x2568"	  # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+0xBB    0x2569 "&#x2569"	  # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+0xBC    0x256A "&#x256A"	  # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+0xBD    0x256B "&#x256B"	  # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+0xBE    0x256C "&#x256C"	  # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+0xBF    0x00A9 "&#x00A9"	  # COPYRIGHT SIGN
+0xC0    0x044E "&#x044E"	  # CYRILLIC SMALL LETTER YU
+0xC1    0x0430 "&#x0430"	  # CYRILLIC SMALL LETTER A
+0xC2    0x0431 "&#x0431"	  # CYRILLIC SMALL LETTER BE
+0xC3    0x0446 "&#x0446"	  # CYRILLIC SMALL LETTER TSE
+0xC4    0x0434 "&#x0434"	  # CYRILLIC SMALL LETTER DE
+0xC5    0x0435 "&#x0435"	  # CYRILLIC SMALL LETTER IE
+0xC6    0x0444 "&#x0444"	  # CYRILLIC SMALL LETTER EF
+0xC7    0x0433 "&#x0433"	  # CYRILLIC SMALL LETTER GHE
+0xC8    0x0445 "&#x0445"	  # CYRILLIC SMALL LETTER HA
+0xC9    0x0438 "&#x0438"	  # CYRILLIC SMALL LETTER I
+0xCA    0x0439 "&#x0439"	  # CYRILLIC SMALL LETTER SHORT I
+0xCB    0x043A "&#x043A"	  # CYRILLIC SMALL LETTER KA
+0xCC    0x043B "&#x043B"	  # CYRILLIC SMALL LETTER EL
+0xCD    0x043C "&#x043C"	  # CYRILLIC SMALL LETTER EM
+0xCE    0x043D "&#x043D"	  # CYRILLIC SMALL LETTER EN
+0xCF    0x043E "&#x043E"	  # CYRILLIC SMALL LETTER O
+0xD0    0x043F "&#x043F"	  # CYRILLIC SMALL LETTER PE
+0xD1    0x044F "&#x044F"	  # CYRILLIC SMALL LETTER YA
+0xD2    0x0440 "&#x0440"	  # CYRILLIC SMALL LETTER ER
+0xD3    0x0441 "&#x0441"	  # CYRILLIC SMALL LETTER ES
+0xD4    0x0442 "&#x0442"	  # CYRILLIC SMALL LETTER TE
+0xD5    0x0443 "&#x0443"	  # CYRILLIC SMALL LETTER U
+0xD6    0x0436 "&#x0436"	  # CYRILLIC SMALL LETTER ZHE
+0xD7    0x0432 "&#x0432"	  # CYRILLIC SMALL LETTER VE
+0xD8    0x044C "&#x044C"	  # CYRILLIC SMALL LETTER SOFT SIGN
+0xD9    0x044B "&#x044B"	  # CYRILLIC SMALL LETTER YERU
+0xDA    0x0437 "&#x0437"	  # CYRILLIC SMALL LETTER ZE
+0xDB    0x0448 "&#x0448"	  # CYRILLIC SMALL LETTER SHA
+0xDC    0x044D "&#x044D"	  # CYRILLIC SMALL LETTER E
+0xDD    0x0449 "&#x0449"	  # CYRILLIC SMALL LETTER SHCHA
+0xDE    0x0447 "&#x0447"	  # CYRILLIC SMALL LETTER CHE
+0xDF    0x044A "&#x044A"	  # CYRILLIC SMALL LETTER HARD SIGN
+0xE0    0x042E "&#x042E"	  # CYRILLIC CAPITAL LETTER YU
+0xE1    0x0410 "&#x0410"	  # CYRILLIC CAPITAL LETTER A
+0xE2    0x0411 "&#x0411"	  # CYRILLIC CAPITAL LETTER BE
+0xE3    0x0426 "&#x0426"	  # CYRILLIC CAPITAL LETTER TSE
+0xE4    0x0414 "&#x0414"	  # CYRILLIC CAPITAL LETTER DE
+0xE5    0x0415 "&#x0415"	  # CYRILLIC CAPITAL LETTER IE
+0xE6    0x0424 "&#x0424"	  # CYRILLIC CAPITAL LETTER EF
+0xE7    0x0413 "&#x0413"	  # CYRILLIC CAPITAL LETTER GHE
+0xE8    0x0425 "&#x0425"	  # CYRILLIC CAPITAL LETTER HA
+0xE9    0x0418 "&#x0418"	  # CYRILLIC CAPITAL LETTER I
+0xEA    0x0419 "&#x0419"	  # CYRILLIC CAPITAL LETTER SHORT I
+0xEB    0x041A "&#x041A"	  # CYRILLIC CAPITAL LETTER KA
+0xEC    0x041B "&#x041B"	  # CYRILLIC CAPITAL LETTER EL
+0xED    0x041C "&#x041C"	  # CYRILLIC CAPITAL LETTER EM
+0xEE    0x041D "&#x041D"	  # CYRILLIC CAPITAL LETTER EN
+0xEF    0x041E "&#x041E"	  # CYRILLIC CAPITAL LETTER O
+0xF0    0x041F "&#x041F"	  # CYRILLIC CAPITAL LETTER PE
+0xF1    0x042F "&#x042F"	  # CYRILLIC CAPITAL LETTER YA
+0xF2    0x0420 "&#x0420"	  # CYRILLIC CAPITAL LETTER ER
+0xF3    0x0421 "&#x0421"	  # CYRILLIC CAPITAL LETTER ES
+0xF4    0x0422 "&#x0422"	  # CYRILLIC CAPITAL LETTER TE
+0xF5    0x0423 "&#x0423"	  # CYRILLIC CAPITAL LETTER U
+0xF6    0x0416 "&#x0416"	  # CYRILLIC CAPITAL LETTER ZHE
+0xF7    0x0412 "&#x0412"	  # CYRILLIC CAPITAL LETTER VE
+0xF8    0x042C "&#x042C"	  # CYRILLIC CAPITAL LETTER SOFT SIGN
+0xF9    0x042B "&#x042B"	  # CYRILLIC CAPITAL LETTER YERU
+0xFA    0x0417 "&#x0417"	  # CYRILLIC CAPITAL LETTER ZE
+0xFB    0x0428 "&#x0428"	  # CYRILLIC CAPITAL LETTER SHA
+0xFC    0x042D "&#x042D"	  # CYRILLIC CAPITAL LETTER E
+0xFD    0x0429 "&#x0429"	  # CYRILLIC CAPITAL LETTER SHCHA
+0xFE    0x0427 "&#x0427"	  # CYRILLIC CAPITAL LETTER CHE
+0xFF    0x042A "&#x042A"	  # CYRILLIC CAPITAL LETTER HARD SIGN
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/quickbrown.html b/test/quickbrown.html
new file mode 100644
index 00000000..e3207222
--- /dev/null
+++ b/test/quickbrown.html
@@ -0,0 +1,103 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Markus Kuhn's quick-brown-fox UTF-8 demo</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<LINK REV="made" HREF="mailto:dickey@invisible-island.net">
+</HEAD>
+
+<BODY> 
+<pre>
+Sentences that contain all letters commonly used in a language
+--------------------------------------------------------------
+
+Markus Kuhn &lt;mkuhn@acm.org&gt; -- 1998-11-30
+
+This file was UTF-8 encoded.
+
+
+German (de)
+-----------
+
+  Falsches &#xdc;ben von Xylophonmusik qu&#xe4;lt jeden gr&#xf6;&#xdf;eren Zwerg
+  (= Wrongful practicing of xylophone music tortures every larger dwarf)
+
+  Zw&#xf6;lf Boxk&#xe4;mpfer jagten Eva quer &#xfc;ber den Sylter Deich
+  (= Twelve boxing fighters hunted Eva across the dike of Sylt)
+
+  Heiz&#xf6;lr&#xfc;cksto&#xdf;abd&#xe4;mpfung
+  (= fuel oil recoil absorber) (jqvwxy missing, but all non-ASCII letters in one word)
+
+English (en)
+------------
+
+  The quick brown fox jumps over the lazy dog
+
+French (fr)
+-----------
+
+  Portez ce vieux whisky au juge blond qui fume sur son &#xee;le int&#xe9;rieure, &#xe0;
+  c&#xf4;t&#xe9; de l'alc&#xf4;ve ovo&#xef;de, o&#xf9; les b&#xfb;ches se consument dans l'&#xe2;tre, ce qui lui
+  permet de penser &#xe0; la c&#xe6;nogen&#xe8;se de l'&#xea;tre dont il est question dans la
+  cause ambigu&#xeb; entendue &#xe0; Mo&#xff;, dans un capharna&#xfc;m qui, pense-t-il, diminue
+  &#xe7;&#xe0; et l&#xe0; la qualit&#xe9; de son &#x153;uvre. 
+
+  l'&#xee;le exigu&#xeb;
+  O&#xf9; l'ob&#xe8;se jury m&#xfb;r
+  F&#xea;te l'ha&#xef; volap&#xfc;k,
+  &#xc2;ne ex a&#xe9;quo au whist,
+  &#xd4;tez ce v&#x153;u d&#xe9;&#xe7;u.
+
+  Le c&#x153;ur d&#xe9;&#xe7;u mais l'&#xe2;me plut&#xf4;t na&#xef;ve, Lou&#xff;s r&#xea;va de crapa&#xfc;ter en
+  cano&#xeb; au del&#xe0; des &#xee;les, pr&#xe8;s du m&#xe4;lstr&#xf6;m o&#xf9; br&#xfb;lent les nov&#xe6;.
+
+Irish Gaelic (ga)
+-----------------
+
+  D'fhuascail &#xcd;osa, &#xda;rmhac na h&#xd3;ighe Beannaithe, p&#xf3;r &#xc9;ava agus &#xc1;dhaimh
+
+Icelandic (is)
+--------------
+
+  K&#xe6;mi n&#xfd; &#xf6;xi h&#xe9;r ykist &#xfe;j&#xf3;fum n&#xfa; b&#xe6;&#xf0;i v&#xed;l og &#xe1;drepa
+
+  S&#xe6;v&#xf6;r gr&#xe9;t &#xe1;&#xf0;an &#xfe;v&#xed; &#xfa;lpan var &#xf3;n&#xfd;t
+  (some ASCII letters missing)
+
+Hebrew (iw)
+-----------
+
+  &#x5d3;&#x5d2; &#x5e1;&#x5e7;&#x5e8;&#x5df; &#x5e9;&#x5d8; &#x5d1;&#x5d9;&#x5dd; &#x5de;&#x5d0;&#x5d5;&#x5db;&#x5d6;&#x5d1; &#x5d5;&#x5dc;&#x5e4;&#x5ea;&#x5e2; &#x5de;&#x5e6;&#x5d0; &#x5dc;&#x5d5; &#x5d7;&#x5d1;&#x5e8;&#x5d4; &#x5d0;&#x5d9;&#x5da; &#x5d4;&#x5e7;&#x5dc;&#x5d9;&#x5d8;&#x5d4;?
+
+Polish (pl)
+-----------
+
+  Pchn&#x105;&#x107; w t&#x119; &#x142;&#xf3;d&#x17a; je&#x17c;a lub o&#x15b;m skrzy&#x144; fig
+
+Russian (ru)
+------------
+
+  &#x412; &#x447;&#x430;&#x449;&#x430;&#x445; &#x44e;&#x433;&#x430; &#x436;&#x438;&#x43b; &#x431;&#x44b; &#x446;&#x438;&#x442;&#x440;&#x443;&#x441;? &#x414;&#x430;, &#x43d;&#x43e; &#x444;&#x430;&#x43b;&#x44c;&#x448;&#x438;&#x432;&#x44b;&#x439; &#x44d;&#x43a;&#x437;&#x435;&#x43c;&#x43f;&#x43b;&#x44f;&#x440;!
+  (= Would a citrus live in the bushes of south? Yes, but a only a fake!)
+
+
+Please let me know if you find others! Special thanks to the people
+from all over the world who contributed these sentences.
+
+</pre>
+See also:
+<ul>
+<li><a href="http://www.columbia.edu/kermit/utf8.html"
+            >http://www.columbia.edu/kermit/utf8.html</a>
+<li><a href="http://www.kernel.org/"
+            >http://www.kernel.org/</a>
+<li><a href="http://www.unicode.org/"
+            >http://www.unicode.org/</a>
+<br>and
+<li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt"
+            >http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt</a>
+<li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt"
+            >http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt</a>
+</ul>
+</BODY>
+</HTML>
diff --git a/test/raw8bit.html b/test/raw8bit.html
new file mode 100644
index 00000000..f0d0eeb0
--- /dev/null
+++ b/test/raw8bit.html
@@ -0,0 +1,38 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE> Test of raw 8-bit symbols </TITLE>
+<!-- you may uncomment the next line
+and set the document's charset directly via META tag -->
+<!--META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"-->
+</HEAD>
+<BODY>
+<PRE>
+This is a test of translation 8-bit letters for different pairs of
+document's charset (assumed charset) and display charset,
+both can be reached from 'O'ptions menu.
+
+This page (obviously) corresponds to text/html mode
+but you may test text/plain just by pressing '\'
+Try also: '@' for ``raw mode'' and '=' for Information Page.
+
+
+    0 1 2 3 4 5 6 7 8 9 A B C D E F
+20    ! " # $ % & ' ( ) * + , - . /
+30  0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+40  @ A B C D E F G H I J K L M N O
+50  P Q R S T U V W X Y Z [ \ ] ^ _
+60  ` a b c d e f g h i j k l m n o
+70  p q r s t u v w x y z { | } ~ 
+80  €  ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ  Ž 
+90   ‘ ’ “ ” • – — ˜ ™ š › œ  ž Ÿ
+A0    ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯
+B0  ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
+C0  À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
+D0  Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
+E0  à á â ã ä å æ ç è é ê ë ì í î ï
+F0  ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
+
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/sgml.html b/test/sgml.html
new file mode 100644
index 00000000..94425345
--- /dev/null
+++ b/test/sgml.html
@@ -0,0 +1,1081 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Test of some Unicode symbols enclosed as SGML entity names</TITLE>
+</HEAD>
+<BODY>
+<PRE>
+
+    This table prepared from SGML.TXT available at ftp.unicode.org
+
+         ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/SGML.TXT
+         (if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC)
+
+
+original comment:
+
+# Author: John Cowan &lt;cowan@ccil.org&gt;
+# Date: 25 July 1997
+#
+# The following table maps SGML character entities from various
+# public sets (namely, ISOamsa, ISOamsb, ISOamsc, ISOamsn, ISOamso,
+# ISOamsr, ISObox, ISOcyr1, ISOcyr2, ISOdia, ISOgrk1, ISOgrk2,
+# ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, ISOpub, ISOtech,
+# HTMLspecial, HTMLsymbol) to corresponding Unicode characters.
+#
+# The table has four tab-separated columns:
+#	Column 1: SGML character entity name
+#	Column 2: SGML public entity set
+#	Column 3: Unicode 2.0 character code
+#	Column 4: Unicode 2.0 character name (UPPER CASE)
+# Entries which don't have Unicode equivalents have "0x????"
+# in Column 3 and a lower case description (from the public entity
+# set DTD) in Column 4.  The mapping is not reversible, because many
+# distinctions are unified away in Unicode, particularly between
+# mathematical symbols.
+#
+# The table is sorted case-blind by SGML character entity name.
+#
+# The contents of this table are drawn from various sources, and
+# are in the public domain.
+#
+<!-- Changes:
++   {"euro",    0x20AC},  /* EURO SIGN                                     */
+    {"loz",     0x25CA},  /* LOZENGE                                       */
+! /*  {"loz",   0x2727},  WHITE FOUR POINTED STAR                          */
+!  /* Warning: Duplicated &loz; entry.  HTML 4,0 defines it as U+25CA. */
+-   {"b.delta", 0x03B3},  /* GREEK SMALL LETTER GAMMA                      */
++   {"b.delta", 0x03B4},  /* GREEK SMALL LETTER DELTA                      */
+
+-->
+
+This test illuminates SGML character entities implementation in your browser.
+We sort the entities according to unicode numbers.
+You should see visible character if your display character set supports it
+or some substitution string picked up from  src/chrtrans/def7_uni.tbl.
+If you see &amp;somename; - this name is not implemented yet,
+you may search for &amp;. (Sorry, ISOgrk4 which holds a dot in its name
+seems to be nonvisible for most browsers.  Keep in mind that
+this table is much wider than in the HTML 4.0 draft).
+							Leonid Pauzner.
+
+
+0x0021    &excl;	      ISOnum	# EXCLAMATION MARK
+0x0022    &quot;	      ISOnum	# QUOTATION MARK
+0x0023    &num;	      ISOnum	# NUMBER SIGN
+0x0024    &dollar;	      ISOnum	# DOLLAR SIGN
+0x0025    &percnt;	      ISOnum	# PERCENT SIGN
+0x0026    &amp;	      ISOnum	# AMPERSAND
+0x0028    &lpar;	      ISOnum	# LEFT PARENTHESIS
+0x0029    &rpar;	      ISOnum	# RIGHT PARENTHESIS
+0x002A    &ast;	      ISOnum	# ASTERISK
+0x002B    &plus;	      ISOnum	# PLUS SIGN
+0x002C    &comma;	      ISOnum	# COMMA
+0x002D    &hyphen;	      ISOnum	# HYPHEN-MINUS
+0x002E    &period;	      ISOnum	# FULL STOP
+0x002F    &sol;	      ISOnum	# SOLIDUS
+0x003A    &colon;	      ISOnum	# COLON
+0x003B    &semi;	      ISOnum	# SEMICOLON
+0x003C    &lt;	      ISOnum	# LESS-THAN SIGN
+0x003D    &equals;	      ISOnum	# EQUALS SIGN
+0x003E    &gt;	      ISOnum	# GREATER-THAN SIGN
+0x003F    &quest;	      ISOnum	# QUESTION MARK
+0x0040    &commat;	      ISOnum	# COMMERCIAL AT
+0x005B    &lsqb;	      ISOnum	# LEFT SQUARE BRACKET
+0x005C    &bsol;	      ISOnum	# REVERSE SOLIDUS
+0x005C    &sbsol;	      ISOamso	# REVERSE SOLIDUS
+0x005D    &rsqb;	      ISOnum	# RIGHT SQUARE BRACKET
+0x005F    &lowbar;	      ISOnum	# LOW LINE
+0x0060    &grave;	      ISOdia	# GRAVE ACCENT
+0x007B    &lcub;	      ISOnum	# LEFT CURLY BRACKET
+0x007C    &verbar;	      ISOnum	# VERTICAL LINE
+0x007D    &rcub;	      ISOnum	# RIGHT CURLY BRACKET
+0x00A0    &nbsp;	      ISOnum	# NO-BREAK SPACE
+0x00A1    &iexcl;	      ISOnum	# INVERTED EXCLAMATION MARK
+0x00A2    &cent;	      ISOnum	# CENT SIGN
+0x00A3    &pound;	      ISOnum	# POUND SIGN
+0x00A4    &curren;	      ISOnum	# CURRENCY SIGN
+0x00A5    &yen;	      ISOnum	# YEN SIGN
+0x00A6    &brvbar;	      ISOnum	# BROKEN BAR
+0x00A7    &sect;	      ISOnum	# SECTION SIGN
+0x00A8    &Dot;	      ISOtech	# DIAERESIS
+0x00A8    &die;	      ISOdia	# DIAERESIS
+0x00A8    &uml;	      ISOdia	# DIAERESIS
+0x00A9    &copy;	      ISOnum	# COPYRIGHT SIGN
+0x00AA    &ordf;	      ISOnum	# FEMININE ORDINAL INDICATOR
+0x00AB    &laquo;	      ISOnum	# LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x00AC    &not;	      ISOnum	# NOT SIGN
+0x00AD    &shy;	      ISOnum	# SOFT HYPHEN
+0x00AE    &reg;	      ISOnum	# REGISTERED SIGN
+0x00AF    &macr;	      ISOdia	# MACRON
+0x00B0    &deg;	      ISOnum	# DEGREE SIGN
+0x00B1    &plusmn;	      ISOnum	# PLUS-MINUS SIGN
+0x00B2    &sup2;	      ISOnum	# SUPERSCRIPT TWO
+0x00B3    &sup3;	      ISOnum	# SUPERSCRIPT THREE
+0x00B4    &acute;	      ISOdia	# ACUTE ACCENT
+0x00B5    &micro;	      ISOnum	# MICRO SIGN
+0x00B6    &para;	      ISOnum	# PILCROW SIGN
+0x00B7    &middot;	      ISOnum	# MIDDLE DOT
+0x00B8    &cedil;	      ISOdia	# CEDILLA
+0x00B9    &sup1;	      ISOnum	# SUPERSCRIPT ONE
+0x00BA    &ordm;	      ISOnum	# MASCULINE ORDINAL INDICATOR
+0x00BB    &raquo;	      ISOnum	# RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x00BC    &frac14;	      ISOnum	# VULGAR FRACTION ONE QUARTER
+0x00BD    &frac12;	      ISOnum	# VULGAR FRACTION ONE HALF
+0x00BD    &half;	      ISOnum	# VULGAR FRACTION ONE HALF
+0x00BE    &frac34;	      ISOnum	# VULGAR FRACTION THREE QUARTERS
+0x00BF    &iquest;	      ISOnum	# INVERTED QUESTION MARK
+0x00C0    &Agrave;	      ISOlat1	# LATIN CAPITAL LETTER A WITH GRAVE
+0x00C1    &Aacute;	      ISOlat1	# LATIN CAPITAL LETTER A WITH ACUTE
+0x00C2    &Acirc;	      ISOlat1	# LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0x00C3    &Atilde;	      ISOlat1	# LATIN CAPITAL LETTER A WITH TILDE
+0x00C4    &Auml;	      ISOlat1	# LATIN CAPITAL LETTER A WITH DIAERESIS
+0x00C5    &Aring;	      ISOlat1	# LATIN CAPITAL LETTER A WITH RING ABOVE
+0x00C6    &AElig;	      ISOlat1	# LATIN CAPITAL LETTER AE
+0x00C7    &Ccedil;	      ISOlat1	# LATIN CAPITAL LETTER C WITH CEDILLA
+0x00C8    &Egrave;	      ISOlat1	# LATIN CAPITAL LETTER E WITH GRAVE
+0x00C9    &Eacute;	      ISOlat1	# LATIN CAPITAL LETTER E WITH ACUTE
+0x00CA    &Ecirc;	      ISOlat1	# LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0x00CB    &Euml;	      ISOlat1	# LATIN CAPITAL LETTER E WITH DIAERESIS
+0x00CC    &Igrave;	      ISOlat1	# LATIN CAPITAL LETTER I WITH GRAVE
+0x00CD    &Iacute;	      ISOlat1	# LATIN CAPITAL LETTER I WITH ACUTE
+0x00CE    &Icirc;	      ISOlat1	# LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0x00CF    &Iuml;	      ISOlat1	# LATIN CAPITAL LETTER I WITH DIAERESIS
+0x00D0    &ETH;	      ISOlat1	# LATIN CAPITAL LETTER ETH
+0x00D1    &Ntilde;	      ISOlat1	# LATIN CAPITAL LETTER N WITH TILDE
+0x00D2    &Ograve;	      ISOlat1	# LATIN CAPITAL LETTER O WITH GRAVE
+0x00D3    &Oacute;	      ISOlat1	# LATIN CAPITAL LETTER O WITH ACUTE
+0x00D4    &Ocirc;	      ISOlat1	# LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0x00D5    &Otilde;	      ISOlat1	# LATIN CAPITAL LETTER O WITH TILDE
+0x00D6    &Ouml;	      ISOlat1	# LATIN CAPITAL LETTER O WITH DIAERESIS
+0x00D7    &times;	      ISOnum	# MULTIPLICATION SIGN
+0x00D8    &Oslash;	      ISOlat1	# LATIN CAPITAL LETTER O WITH STROKE
+0x00D9    &Ugrave;	      ISOlat1	# LATIN CAPITAL LETTER U WITH GRAVE
+0x00DA    &Uacute;	      ISOlat1	# LATIN CAPITAL LETTER U WITH ACUTE
+0x00DB    &Ucirc;	      ISOlat1	# LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0x00DC    &Uuml;	      ISOlat1	# LATIN CAPITAL LETTER U WITH DIAERESIS
+0x00DD    &Yacute;	      ISOlat1	# LATIN CAPITAL LETTER Y WITH ACUTE
+0x00DE    &THORN;	      ISOlat1	# LATIN CAPITAL LETTER THORN
+0x00DF    &szlig;	      ISOlat1	# LATIN SMALL LETTER SHARP S
+0x00E0    &agrave;	      ISOlat1	# LATIN SMALL LETTER A WITH GRAVE
+0x00E1    &aacute;	      ISOlat1	# LATIN SMALL LETTER A WITH ACUTE
+0x00E2    &acirc;	      ISOlat1	# LATIN SMALL LETTER A WITH CIRCUMFLEX
+0x00E3    &atilde;	      ISOlat1	# LATIN SMALL LETTER A WITH TILDE
+0x00E4    &auml;	      ISOlat1	# LATIN SMALL LETTER A WITH DIAERESIS
+0x00E5    &aring;	      ISOlat1	# LATIN SMALL LETTER A WITH RING ABOVE
+0x00E6    &aelig;	      ISOlat1	# LATIN SMALL LETTER AE
+0x00E7    &ccedil;	      ISOlat1	# LATIN SMALL LETTER C WITH CEDILLA
+0x00E8    &egrave;	      ISOlat1	# LATIN SMALL LETTER E WITH GRAVE
+0x00E9    &eacute;	      ISOlat1	# LATIN SMALL LETTER E WITH ACUTE
+0x00EA    &ecirc;	      ISOlat1	# LATIN SMALL LETTER E WITH CIRCUMFLEX
+0x00EB    &euml;	      ISOlat1	# LATIN SMALL LETTER E WITH DIAERESIS
+0x00EC    &igrave;	      ISOlat1	# LATIN SMALL LETTER I WITH GRAVE
+0x00ED    &iacute;	      ISOlat1	# LATIN SMALL LETTER I WITH ACUTE
+0x00EE    &icirc;	      ISOlat1	# LATIN SMALL LETTER I WITH CIRCUMFLEX
+0x00EF    &iuml;	      ISOlat1	# LATIN SMALL LETTER I WITH DIAERESIS
+0x00F0    &eth;	      ISOlat1	# LATIN SMALL LETTER ETH
+0x00F1    &ntilde;	      ISOlat1	# LATIN SMALL LETTER N WITH TILDE
+0x00F2    &ograve;	      ISOlat1	# LATIN SMALL LETTER O WITH GRAVE
+0x00F3    &oacute;	      ISOlat1	# LATIN SMALL LETTER O WITH ACUTE
+0x00F4    &ocirc;	      ISOlat1	# LATIN SMALL LETTER O WITH CIRCUMFLEX
+0x00F5    &otilde;	      ISOlat1	# LATIN SMALL LETTER O WITH TILDE
+0x00F6    &ouml;	      ISOlat1	# LATIN SMALL LETTER O WITH DIAERESIS
+0x00F7    &divide;	      ISOnum	# DIVISION SIGN
+0x00F8    &oslash;	      ISOlat1	# LATIN SMALL LETTER O WITH STROKE
+0x00F9    &ugrave;	      ISOlat1	# LATIN SMALL LETTER U WITH GRAVE
+0x00FA    &uacute;	      ISOlat1	# LATIN SMALL LETTER U WITH ACUTE
+0x00FB    &ucirc;	      ISOlat1	# LATIN SMALL LETTER U WITH CIRCUMFLEX
+0x00FC    &uuml;	      ISOlat1	# LATIN SMALL LETTER U WITH DIAERESIS
+0x00FD    &yacute;	      ISOlat1	# LATIN SMALL LETTER Y WITH ACUTE
+0x00FE    &thorn;	      ISOlat1	# LATIN SMALL LETTER THORN
+0x00FF    &yuml;	      ISOlat1	# LATIN SMALL LETTER Y WITH DIAERESIS
+0x0100    &Amacr;	      ISOlat2	# LATIN CAPITAL LETTER A WITH MACRON
+0x0101    &amacr;	      ISOlat2	# LATIN SMALL LETTER A WITH MACRON
+0x0102    &Abreve;	      ISOlat2	# LATIN CAPITAL LETTER A WITH BREVE
+0x0103    &abreve;	      ISOlat2	# LATIN SMALL LETTER A WITH BREVE
+0x0104    &Aogon;	      ISOlat2	# LATIN CAPITAL LETTER A WITH OGONEK
+0x0105    &aogon;	      ISOlat2	# LATIN SMALL LETTER A WITH OGONEK
+0x0106    &Cacute;	      ISOlat2	# LATIN CAPITAL LETTER C WITH ACUTE
+0x0107    &cacute;	      ISOlat2	# LATIN SMALL LETTER C WITH ACUTE
+0x0108    &Ccirc;	      ISOlat2	# LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+0x0109    &ccirc;	      ISOlat2	# LATIN SMALL LETTER C WITH CIRCUMFLEX
+0x010A    &Cdot;	      ISOlat2	# LATIN CAPITAL LETTER C WITH DOT ABOVE
+0x010B    &cdot;	      ISOlat2	# LATIN SMALL LETTER C WITH DOT ABOVE
+0x010C    &Ccaron;	      ISOlat2	# LATIN CAPITAL LETTER C WITH CARON
+0x010D    &ccaron;	      ISOlat2	# LATIN SMALL LETTER C WITH CARON
+0x010E    &Dcaron;	      ISOlat2	# LATIN CAPITAL LETTER D WITH CARON
+0x010F    &dcaron;	      ISOlat2	# LATIN SMALL LETTER D WITH CARON
+0x0110    &Dstrok;	      ISOlat2	# LATIN CAPITAL LETTER D WITH STROKE
+0x0111    &dstrok;	      ISOlat2	# LATIN SMALL LETTER D WITH STROKE
+0x0112    &Emacr;	      ISOlat2	# LATIN CAPITAL LETTER E WITH MACRON
+0x0113    &emacr;	      ISOlat2	# LATIN SMALL LETTER E WITH MACRON
+0x0116    &Edot;	      ISOlat2	# LATIN CAPITAL LETTER E WITH DOT ABOVE
+0x0117    &edot;	      ISOlat2	# LATIN SMALL LETTER E WITH DOT ABOVE
+0x0118    &Eogon;	      ISOlat2	# LATIN CAPITAL LETTER E WITH OGONEK
+0x0119    &eogon;	      ISOlat2	# LATIN SMALL LETTER E WITH OGONEK
+0x011A    &Ecaron;	      ISOlat2	# LATIN CAPITAL LETTER E WITH CARON
+0x011B    &ecaron;	      ISOlat2	# LATIN SMALL LETTER E WITH CARON
+0x011C    &Gcirc;	      ISOlat2	# LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+0x011D    &gcirc;	      ISOlat2	# LATIN SMALL LETTER G WITH CIRCUMFLEX
+0x011E    &Gbreve;	      ISOlat2	# LATIN CAPITAL LETTER G WITH BREVE
+0x011F    &gbreve;	      ISOlat2	# LATIN SMALL LETTER G WITH BREVE
+0x0120    &Gdot;	      ISOlat2	# LATIN CAPITAL LETTER G WITH DOT ABOVE
+0x0121    &gdot;	      ISOlat2	# LATIN SMALL LETTER G WITH DOT ABOVE
+0x0122    &Gcedil;	      ISOlat2	# LATIN CAPITAL LETTER G WITH CEDILLA
+0x0123    &gcedil;	      ISOlat2	# LATIN SMALL LETTER G WITH CEDILLA
+0x0124    &Hcirc;	      ISOlat2	# LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0x0125    &hcirc;	      ISOlat2	# LATIN SMALL LETTER H WITH CIRCUMFLEX
+0x0126    &Hstrok;	      ISOlat2	# LATIN CAPITAL LETTER H WITH STROKE
+0x0127    &hstrok;	      ISOlat2	# LATIN SMALL LETTER H WITH STROKE
+0x0128    &Itilde;	      ISOlat2	# LATIN CAPITAL LETTER I WITH TILDE
+0x0129    &itilde;	      ISOlat2	# LATIN SMALL LETTER I WITH TILDE
+0x012A    &Imacr;	      ISOlat2	# LATIN CAPITAL LETTER I WITH MACRON
+0x012B    &imacr;	      ISOlat2	# LATIN SMALL LETTER I WITH MACRON
+0x012E    &Iogon;	      ISOlat2	# LATIN CAPITAL LETTER I WITH OGONEK
+0x012F    &iogon;	      ISOlat2	# LATIN SMALL LETTER I WITH OGONEK
+0x0130    &Idot;	      ISOlat2	# LATIN CAPITAL LETTER I WITH DOT ABOVE
+0x0131    &inodot;	      ISOamso	# LATIN SMALL LETTER DOTLESS I
+0x0131    &inodot;	      ISOlat2	# LATIN SMALL LETTER DOTLESS I
+0x0132    &IJlig;	      ISOlat2	# LATIN CAPITAL LIGATURE IJ
+0x0133    &ijlig;	      ISOlat2	# LATIN SMALL LIGATURE IJ
+0x0134    &Jcirc;	      ISOlat2	# LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0x0135    &jcirc;	      ISOlat2	# LATIN SMALL LETTER J WITH CIRCUMFLEX
+0x0136    &Kcedil;	      ISOlat2	# LATIN CAPITAL LETTER K WITH CEDILLA
+0x0137    &kcedil;	      ISOlat2	# LATIN SMALL LETTER K WITH CEDILLA
+0x0138    &kgreen;	      ISOlat2	# LATIN SMALL LETTER KRA
+0x0139    &Lacute;	      ISOlat2	# LATIN CAPITAL LETTER L WITH ACUTE
+0x013A    &lacute;	      ISOlat2	# LATIN SMALL LETTER L WITH ACUTE
+0x013B    &Lcedil;	      ISOlat2	# LATIN CAPITAL LETTER L WITH CEDILLA
+0x013C    &lcedil;	      ISOlat2	# LATIN SMALL LETTER L WITH CEDILLA
+0x013D    &Lcaron;	      ISOlat2	# LATIN CAPITAL LETTER L WITH CARON
+0x013E    &lcaron;	      ISOlat2	# LATIN SMALL LETTER L WITH CARON
+0x013F    &Lmidot;	      ISOlat2	# LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0x0140    &lmidot;	      ISOlat2	# LATIN SMALL LETTER L WITH MIDDLE DOT
+0x0141    &Lstrok;	      ISOlat2	# LATIN CAPITAL LETTER L WITH STROKE
+0x0142    &lstrok;	      ISOlat2	# LATIN SMALL LETTER L WITH STROKE
+0x0143    &Nacute;	      ISOlat2	# LATIN CAPITAL LETTER N WITH ACUTE
+0x0144    &nacute;	      ISOlat2	# LATIN SMALL LETTER N WITH ACUTE
+0x0145    &Ncedil;	      ISOlat2	# LATIN CAPITAL LETTER N WITH CEDILLA
+0x0146    &ncedil;	      ISOlat2	# LATIN SMALL LETTER N WITH CEDILLA
+0x0147    &Ncaron;	      ISOlat2	# LATIN CAPITAL LETTER N WITH CARON
+0x0148    &ncaron;	      ISOlat2	# LATIN SMALL LETTER N WITH CARON
+0x0149    &napos;	      ISOlat2	# LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+0x014A    &ENG;	      ISOlat2	# LATIN CAPITAL LETTER ENG
+0x014B    &eng;	      ISOlat2	# LATIN SMALL LETTER ENG
+0x014C    &Omacr;	      ISOlat2	# LATIN CAPITAL LETTER O WITH MACRON
+0x014D    &omacr;	      ISOlat2	# LATIN SMALL LETTER O WITH MACRON
+0x0150    &Odblac;	      ISOlat2	# LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0x0151    &odblac;	      ISOlat2	# LATIN SMALL LETTER O WITH DOUBLE ACUTE
+0x0152    &OElig;	      ISOlat2	# LATIN CAPITAL LIGATURE OE
+0x0153    &oelig;	      ISOlat2	# LATIN SMALL LIGATURE OE
+0x0154    &Racute;	      ISOlat2	# LATIN CAPITAL LETTER R WITH ACUTE
+0x0155    &racute;	      ISOlat2	# LATIN SMALL LETTER R WITH ACUTE
+0x0156    &Rcedil;	      ISOlat2	# LATIN CAPITAL LETTER R WITH CEDILLA
+0x0157    &rcedil;	      ISOlat2	# LATIN SMALL LETTER R WITH CEDILLA
+0x0158    &Rcaron;	      ISOlat2	# LATIN CAPITAL LETTER R WITH CARON
+0x0159    &rcaron;	      ISOlat2	# LATIN SMALL LETTER R WITH CARON
+0x015A    &Sacute;	      ISOlat2	# LATIN CAPITAL LETTER S WITH ACUTE
+0x015B    &sacute;	      ISOlat2	# LATIN SMALL LETTER S WITH ACUTE
+0x015C    &Scirc;	      ISOlat2	# LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+0x015D    &scirc;	      ISOlat2	# LATIN SMALL LETTER S WITH CIRCUMFLEX
+0x015E    &Scedil;	      ISOlat2	# LATIN CAPITAL LETTER S WITH CEDILLA
+0x015F    &scedil;	      ISOlat2	# LATIN SMALL LETTER S WITH CEDILLA
+0x0160    &Scaron;	      ISOlat2	# LATIN CAPITAL LETTER S WITH CARON
+0x0161    &scaron;	      ISOlat2	# LATIN SMALL LETTER S WITH CARON
+0x0162    &Tcedil;	      ISOlat2	# LATIN CAPITAL LETTER T WITH CEDILLA
+0x0163    &tcedil;	      ISOlat2	# LATIN SMALL LETTER T WITH CEDILLA
+0x0164    &Tcaron;	      ISOlat2	# LATIN CAPITAL LETTER T WITH CARON
+0x0165    &tcaron;	      ISOlat2	# LATIN SMALL LETTER T WITH CARON
+0x0166    &Tstrok;	      ISOlat2	# LATIN CAPITAL LETTER T WITH STROKE
+0x0167    &tstrok;	      ISOlat2	# LATIN SMALL LETTER T WITH STROKE
+0x0168    &Utilde;	      ISOlat2	# LATIN CAPITAL LETTER U WITH TILDE
+0x0169    &utilde;	      ISOlat2	# LATIN SMALL LETTER U WITH TILDE
+0x016A    &Umacr;	      ISOlat2	# LATIN CAPITAL LETTER U WITH MACRON
+0x016B    &umacr;	      ISOlat2	# LATIN SMALL LETTER U WITH MACRON
+0x016C    &Ubreve;	      ISOlat2	# LATIN CAPITAL LETTER U WITH BREVE
+0x016D    &ubreve;	      ISOlat2	# LATIN SMALL LETTER U WITH BREVE
+0x016E    &Uring;	      ISOlat2	# LATIN CAPITAL LETTER U WITH RING ABOVE
+0x016F    &uring;	      ISOlat2	# LATIN SMALL LETTER U WITH RING ABOVE
+0x0170    &Udblac;	      ISOlat2	# LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0x0171    &udblac;	      ISOlat2	# LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0x0172    &Uogon;	      ISOlat2	# LATIN CAPITAL LETTER U WITH OGONEK
+0x0173    &uogon;	      ISOlat2	# LATIN SMALL LETTER U WITH OGONEK
+0x0174    &Wcirc;	      ISOlat2	# LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0x0175    &wcirc;	      ISOlat2	# LATIN SMALL LETTER W WITH CIRCUMFLEX
+0x0176    &Ycirc;	      ISOlat2	# LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0x0177    &ycirc;	      ISOlat2	# LATIN SMALL LETTER Y WITH CIRCUMFLEX
+0x0178    &Yuml;	      ISOlat2	# LATIN CAPITAL LETTER Y WITH DIAERESIS
+0x0179    &Zacute;	      ISOlat2	# LATIN CAPITAL LETTER Z WITH ACUTE
+0x017A    &zacute;	      ISOlat2	# LATIN SMALL LETTER Z WITH ACUTE
+0x017B    &Zdot;	      ISOlat2	# LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0x017C    &zdot;	      ISOlat2	# LATIN SMALL LETTER Z WITH DOT ABOVE
+0x017D    &Zcaron;	      ISOlat2	# LATIN CAPITAL LETTER Z WITH CARON
+0x017E    &zcaron;	      ISOlat2	# LATIN SMALL LETTER Z WITH CARON
+0x0192    &fnof;	      ISOtech	# LATIN SMALL LETTER F WITH HOOK
+0x01F5    &gacute;	      ISOlat2	# LATIN SMALL LETTER G WITH ACUTE
+0x02BC    &apos;	      ISOnum	# MODIFIER LETTER APOSTROPHE
+0x02C6    &circ;	      ISOdia	# MODIFIER LETTER CIRCUMFLEX ACCENT
+0x02C7    &caron;	      ISOdia	# CARON
+0x02D8    &breve;	      ISOdia	# BREVE
+0x02D9    &dot;	      ISOdia	# DOT ABOVE
+0x02DA    &ring;	      ISOdia	# RING ABOVE
+0x02DB    &ogon;	      ISOdia	# OGONEK
+0x02DC    &tilde;	      ISOdia	# SMALL TILDE
+0x02DD    &dblac;	      ISOdia	# DOUBLE ACUTE ACCENT
+0x0386    &Aacgr;	      ISOgrk2	# GREEK CAPITAL LETTER ALPHA WITH TONOS
+0x0388    &Eacgr;	      ISOgrk2	# GREEK CAPITAL LETTER EPSILON WITH TONOS
+0x0389    &EEacgr;	      ISOgrk2	# GREEK CAPITAL LETTER ETA WITH TONOS
+0x038A    &Iacgr;	      ISOgrk2	# GREEK CAPITAL LETTER IOTA WITH TONOS
+0x038C    &Oacgr;	      ISOgrk2	# GREEK CAPITAL LETTER OMICRON WITH TONOS
+0x038E    &Uacgr;	      ISOgrk2	# GREEK CAPITAL LETTER UPSILON WITH TONOS
+0x038F    &OHacgr;	      ISOgrk2	# GREEK CAPITAL LETTER OMEGA WITH TONOS
+0x0390    &idiagr;	      ISOgrk2	# GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0x0391    &Agr;	      ISOgrk1	# GREEK CAPITAL LETTER ALPHA
+0x0391    &Alpha;	      HTMLsymbol	# GREEK CAPITAL LETTER ALPHA
+0x0392    &Beta;	      HTMLsymbol	# GREEK CAPITAL LETTER BETA
+0x0392    &Bgr;	      ISOgrk1	# GREEK CAPITAL LETTER BETA
+0x0393    &Gamma;	      ISOgrk3	# GREEK CAPITAL LETTER GAMMA
+0x0393    &Ggr;	      ISOgrk1	# GREEK CAPITAL LETTER GAMMA
+0x0393    &b.Gamma;	      ISOgrk4	# GREEK CAPITAL LETTER GAMMA
+0x0394    &Delta;	      ISOgrk3	# GREEK CAPITAL LETTER DELTA
+0x0394    &Dgr;	      ISOgrk1	# GREEK CAPITAL LETTER DELTA
+0x0394    &b.Delta;	      ISOgrk4	# GREEK CAPITAL LETTER DELTA
+0x0395    &Egr;	      ISOgrk1	# GREEK CAPITAL LETTER EPSILON
+0x0395    &Epsilon;	      HTMLsymbol	# GREEK CAPITAL LETTER EPSILON
+0x0396    &Zeta;	      HTMLsymbol	# GREEK CAPITAL LETTER ZETA
+0x0396    &Zgr;	      ISOgrk1	# GREEK CAPITAL LETTER ZETA
+0x0397    &EEgr;	      ISOgrk1	# GREEK CAPITAL LETTER ETA
+0x0397    &Eta;	      HTMLsymbol	# GREEK CAPITAL LETTER ETA
+0x0398    &THgr;	      ISOgrk1	# GREEK CAPITAL LETTER THETA
+0x0398    &Theta;	      ISOgrk3	# GREEK CAPITAL LETTER THETA
+0x0398    &b.Theta;	      ISOgrk4	# GREEK CAPITAL LETTER THETA
+0x0399    &Igr;	      ISOgrk1	# GREEK CAPITAL LETTER IOTA
+0x0399    &Iota;	      HTMLsymbol	# GREEK CAPITAL LETTER IOTA
+0x039A    &Kappa;	      HTMLsymbol	# GREEK CAPITAL LETTER KAPPA
+0x039A    &Kgr;	      ISOgrk1	# GREEK CAPITAL LETTER KAPPA
+0x039B    &Lambda;	      ISOgrk3	# GREEK CAPITAL LETTER LAMDA
+0x039B    &Lgr;	      ISOgrk1	# GREEK CAPITAL LETTER LAMDA
+0x039B    &b.Lambda;	      ISOgrk4	# GREEK CAPITAL LETTER LAMDA
+0x039C    &Mgr;	      ISOgrk1	# GREEK CAPITAL LETTER MU
+0x039C    &Mu;	      HTMLsymbol	# GREEK CAPITAL LETTER MU
+0x039D    &Ngr;	      ISOgrk1	# GREEK CAPITAL LETTER NU
+0x039D    &Nu;	      HTMLsymbol	# GREEK CAPITAL LETTER NU
+0x039E    &Xgr;	      ISOgrk1	# GREEK CAPITAL LETTER XI
+0x039E    &Xi;	      ISOgrk3	# GREEK CAPITAL LETTER XI
+0x039E    &b.Xi;	      ISOgrk4	# GREEK CAPITAL LETTER XI
+0x039F    &Ogr;	      ISOgrk1	# GREEK CAPITAL LETTER OMICRON
+0x039F    &Omicron;	      HTMLsymbol	# GREEK CAPITAL LETTER OMICRON
+0x03A0    &Pgr;	      ISOgrk1	# GREEK CAPITAL LETTER PI
+0x03A0    &Pi;	      ISOgrk3	# GREEK CAPITAL LETTER PI
+0x03A0    &b.Pi;	      ISOgrk4	# GREEK CAPITAL LETTER PI
+0x03A1    &Rgr;	      ISOgrk1	# GREEK CAPITAL LETTER RHO
+0x03A1    &Rho;	      HTMLsymbol	# GREEK CAPITAL LETTER RHO
+0x03A3    &Sgr;	      ISOgrk1	# GREEK CAPITAL LETTER SIGMA
+0x03A3    &Sigma;	      ISOgrk3	# GREEK CAPITAL LETTER SIGMA
+0x03A3    &b.Sigma;	      ISOgrk4	# GREEK CAPITAL LETTER SIGMA
+0x03A4    &Tau;	      HTMLsymbol	# GREEK CAPITAL LETTER TAU
+0x03A4    &Tgr;	      ISOgrk1	# GREEK CAPITAL LETTER TAU
+0x03A5    &Ugr;	      ISOgrk1	# GREEK CAPITAL LETTER UPSILON
+0x03A5    &Upsi;	      ISOgrk3	# GREEK CAPITAL LETTER UPSILON
+0x03A5    &Upsilon;	      HTMLsymbol	# GREEK CAPITAL LETTER UPSILON
+0x03A5    &b.Upsi;	      ISOgrk4	# GREEK CAPITAL LETTER UPSILON
+0x03A6    &PHgr;	      ISOgrk1	# GREEK CAPITAL LETTER PHI
+0x03A6    &Phi;	      ISOgrk3	# GREEK CAPITAL LETTER PHI
+0x03A6    &b.Phi;	      ISOgrk4	# GREEK CAPITAL LETTER PHI
+0x03A7    &Chi;	      HTMLsymbol	# GREEK CAPITAL LETTER CHI
+0x03A7    &KHgr;	      ISOgrk1	# GREEK CAPITAL LETTER CHI
+0x03A8    &PSgr;	      ISOgrk1	# GREEK CAPITAL LETTER PSI
+0x03A8    &Psi;	      ISOgrk3	# GREEK CAPITAL LETTER PSI
+0x03A8    &b.Psi;	      ISOgrk4	# GREEK CAPITAL LETTER PSI
+0x03A9    &OHgr;	      ISOgrk1	# GREEK CAPITAL LETTER OMEGA
+0x03A9    &Omega;	      ISOgrk3	# GREEK CAPITAL LETTER OMEGA
+0x03A9    &b.Omega;	      ISOgrk4	# GREEK CAPITAL LETTER OMEGA
+0x03AA    &Idigr;	      ISOgrk2	# GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+0x03AB    &Udigr;	      ISOgrk2	# GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+0x03AC    &aacgr;	      ISOgrk2	# GREEK SMALL LETTER ALPHA WITH TONOS
+0x03AD    &eacgr;	      ISOgrk2	# GREEK SMALL LETTER EPSILON WITH TONOS
+0x03AE    &eeacgr;	      ISOgrk2	# GREEK SMALL LETTER ETA WITH TONOS
+0x03AF    &iacgr;	      ISOgrk2	# GREEK SMALL LETTER IOTA WITH TONOS
+0x03B0    &udiagr;	      ISOgrk2	# GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND
+0x03B1    &agr;	      ISOgrk1	# GREEK SMALL LETTER ALPHA
+0x03B1    &alpha;	      ISOgrk3	# GREEK SMALL LETTER ALPHA
+0x03B1    &b.alpha;	      ISOgrk4	# GREEK SMALL LETTER ALPHA
+0x03B2    &b.beta;	      ISOgrk4	# GREEK SMALL LETTER BETA
+0x03B2    &beta;	      ISOgrk3	# GREEK SMALL LETTER BETA
+0x03B2    &bgr;	      ISOgrk1	# GREEK SMALL LETTER BETA
+0x03B3    &b.gamma;	      ISOgrk4	# GREEK SMALL LETTER GAMMA
+0x03B3    &gamma;	      ISOgrk3	# GREEK SMALL LETTER GAMMA
+0x03B3    &ggr;	      ISOgrk1	# GREEK SMALL LETTER GAMMA
+0x03B4    &b.delta;	      ISOgrk4   # GREEK SMALL LETTER DELTA
+0x03B4    &delta;	      ISOgrk3	# GREEK SMALL LETTER DELTA
+0x03B4    &dgr;	      ISOgrk1	# GREEK SMALL LETTER DELTA
+0x03B5    &b.epsi;	      ISOgrk4	# GREEK SMALL LETTER EPSILON
+0x03B5    &b.epsis;	      ISOgrk4	# GREEK SMALL LETTER EPSILON
+0x03B5    &b.epsiv;	      ISOgrk4	# GREEK SMALL LETTER EPSILON
+0x03B5    &egr;	      ISOgrk1	# GREEK SMALL LETTER EPSILON
+0x03B5    &epsi;	      ISOgrk3	# GREEK SMALL LETTER EPSILON
+0x03B5    &epsilon;	      HTMLsymbol	# GREEK SMALL LETTER EPSILON
+0x03B6    &b.zeta;	      ISOgrk4	# GREEK SMALL LETTER ZETA
+0x03B6    &zeta;	      ISOgrk3	# GREEK SMALL LETTER ZETA
+0x03B6    &zgr;	      ISOgrk1	# GREEK SMALL LETTER ZETA
+0x03B7    &b.eta;	      ISOgrk4	# GREEK SMALL LETTER ETA
+0x03B7    &eegr;	      ISOgrk1	# GREEK SMALL LETTER ETA
+0x03B7    &eta;	      ISOgrk3	# GREEK SMALL LETTER ETA
+0x03B8    &b.thetas;	      ISOgrk4	# GREEK SMALL LETTER THETA
+0x03B8    &theta;	      HTMLsymbol	# GREEK SMALL LETTER THETA
+0x03B8    &thetas;	      ISOgrk3	# GREEK SMALL LETTER THETA
+0x03B8    &thgr;	      ISOgrk1	# GREEK SMALL LETTER THETA
+0x03B9    &b.iota;	      ISOgrk4	# GREEK SMALL LETTER IOTA
+0x03B9    &igr;	      ISOgrk1	# GREEK SMALL LETTER IOTA
+0x03B9    &iota;	      ISOgrk3	# GREEK SMALL LETTER IOTA
+0x03BA    &b.kappa;	      ISOgrk4	# GREEK SMALL LETTER KAPPA
+0x03BA    &kappa;	      ISOgrk3	# GREEK SMALL LETTER KAPPA
+0x03BA    &kgr;	      ISOgrk1	# GREEK SMALL LETTER KAPPA
+0x03BB    &b.lambda;	      ISOgrk4	# GREEK SMALL LETTER LAMDA
+0x03BB    &lambda;	      ISOgrk3	# GREEK SMALL LETTER LAMDA
+0x03BB    &lgr;	      ISOgrk1	# GREEK SMALL LETTER LAMDA
+0x03BC    &b.mu;	      ISOgrk4	# GREEK SMALL LETTER MU
+0x03BC    &mgr;	      ISOgrk1	# GREEK SMALL LETTER MU
+0x03BC    &mu;	      ISOgrk3	# GREEK SMALL LETTER MU
+0x03BD    &b.nu;	      ISOgrk4	# GREEK SMALL LETTER NU
+0x03BD    &ngr;	      ISOgrk1	# GREEK SMALL LETTER NU
+0x03BD    &nu;	      ISOgrk3	# GREEK SMALL LETTER NU
+0x03BE    &b.xi;	      ISOgrk4	# GREEK SMALL LETTER XI
+0x03BE    &xgr;	      ISOgrk1	# GREEK SMALL LETTER XI
+0x03BE    &xi;	      ISOgrk3	# GREEK SMALL LETTER XI
+0x03BF    &ogr;	      ISOgrk1	# GREEK SMALL LETTER OMICRON
+0x03BF    &omicron;	      HTMLsymbol	# GREEK SMALL LETTER OMICRON
+0x03C0    &b.pi;	      ISOgrk4	# GREEK SMALL LETTER PI
+0x03C0    &pgr;	      ISOgrk1	# GREEK SMALL LETTER PI
+0x03C0    &pi;	      ISOgrk3	# GREEK SMALL LETTER PI
+0x03C1    &b.rho;	      ISOgrk4	# GREEK SMALL LETTER RHO
+0x03C1    &rgr;	      ISOgrk1	# GREEK SMALL LETTER RHO
+0x03C1    &rho;	      ISOgrk3	# GREEK SMALL LETTER RHO
+0x03C2    &b.sigmav;	      ISOgrk4	# GREEK SMALL LETTER FINAL SIGMA
+0x03C2    &sfgr;	      ISOgrk1	# GREEK SMALL LETTER FINAL SIGMA
+0x03C2    &sigmaf;	      HTMLsymbol	# GREEK SMALL LETTER FINAL SIGMA
+0x03C2    &sigmav;	      ISOgrk3	# GREEK SMALL LETTER FINAL SIGMA
+0x03C3    &b.sigma;	      ISOgrk4	# GREEK SMALL LETTER SIGMA
+0x03C3    &sgr;	      ISOgrk1	# GREEK SMALL LETTER SIGMA
+0x03C3    &sigma;	      ISOgrk3	# GREEK SMALL LETTER SIGMA
+0x03C4    &b.tau;	      ISOgrk4	# GREEK SMALL LETTER TAU
+0x03C4    &tau;	      ISOgrk3	# GREEK SMALL LETTER TAU
+0x03C4    &tgr;	      ISOgrk1	# GREEK SMALL LETTER TAU
+0x03C5    &b.upsi;	      ISOgrk4	# GREEK SMALL LETTER UPSILON
+0x03C5    &ugr;	      ISOgrk1	# GREEK SMALL LETTER UPSILON
+0x03C5    &upsi;	      ISOgrk3	# GREEK SMALL LETTER UPSILON
+0x03C5    &upsilon;	      HTMLsymbol	# GREEK SMALL LETTER UPSILON
+0x03C6    &b.phis;	      ISOgrk4	# GREEK SMALL LETTER PHI
+0x03C6    &phgr;	      ISOgrk1	# GREEK SMALL LETTER PHI
+0x03C6    &phi;	      HTMLsymbol	# GREEK SMALL LETTER PHI
+0x03C6    &phis;	      ISOgrk3	# GREEK SMALL LETTER PHI
+0x03C7    &b.chi;	      ISOgrk4	# GREEK SMALL LETTER CHI
+0x03C7    &chi;	      ISOgrk3	# GREEK SMALL LETTER CHI
+0x03C7    &khgr;	      ISOgrk1	# GREEK SMALL LETTER CHI
+0x03C8    &b.psi;	      ISOgrk4	# GREEK SMALL LETTER PSI
+0x03C8    &psgr;	      ISOgrk1	# GREEK SMALL LETTER PSI
+0x03C8    &psi;	      ISOgrk3	# GREEK SMALL LETTER PSI
+0x03C9    &ohgr;	      ISOgrk1	# GREEK SMALL LETTER OMEGA
+0x03C9    &omega;	      ISOgrk3	# GREEK SMALL LETTER OMEGA
+0x03CA    &idigr;	      ISOgrk2	# GREEK SMALL LETTER IOTA WITH DIALYTIKA
+0x03CB    &udigr;	      ISOgrk2	# GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+0x03CC    &oacgr;	      ISOgrk2	# GREEK SMALL LETTER OMICRON WITH TONOS
+0x03CD    &uacgr;	      ISOgrk2	# GREEK SMALL LETTER UPSILON WITH TONOS
+0x03CE    &b.omega;	      ISOgrk4	# GREEK SMALL LETTER OMEGA WITH TONOS
+0x03CE    &ohacgr;	      ISOgrk2	# GREEK SMALL LETTER OMEGA WITH TONOS
+0x03D1    &b.thetav;	      ISOgrk4	# GREEK THETA SYMBOL
+0x03D1    &thetasym;	      HTMLsymbol	# GREEK THETA SYMBOL
+0x03D1    &thetav;	      ISOgrk3	# GREEK THETA SYMBOL
+0x03D2    &upsih;	      HTMLsymbol	# GREEK UPSILON WITH HOOK SYMBOL
+0x03D5    &b.phiv;	      ISOgrk4	# GREEK PHI SYMBOL
+0x03D5    &phiv;	      ISOgrk3	# GREEK PHI SYMBOL
+0x03D6    &b.piv;	      ISOgrk4	# GREEK PI SYMBOL
+0x03D6    &piv;	      ISOgrk3	# GREEK PI SYMBOL
+0x03DC    &b.gammad;	      ISOgrk4	# GREEK LETTER DIGAMMA
+0x03DC    &gammad;	      ISOgrk3	# GREEK LETTER DIGAMMA
+0x03F0    &b.kappav;	      ISOgrk4	# GREEK KAPPA SYMBOL
+0x03F0    &kappav;	      ISOgrk3	# GREEK KAPPA SYMBOL
+0x03F1    &b.rhov;	      ISOgrk4	# GREEK RHO SYMBOL
+0x03F1    &rhov;	      ISOgrk3	# GREEK RHO SYMBOL
+0x0401    &IOcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER IO
+0x0402    &DJcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER DJE
+0x0403    &GJcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER GJE
+0x0404    &Jukcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0x0405    &DScy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER DZE
+0x0406    &Iukcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0x0407    &YIcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER YI
+0x0408    &Jsercy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER JE
+0x0409    &LJcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER LJE
+0x040A    &NJcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER NJE
+0x040B    &TSHcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER TSHE
+0x040C    &KJcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER KJE
+0x040E    &Ubrcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER SHORT U
+0x040F    &DZcy;	      ISOcyr2	# CYRILLIC CAPITAL LETTER DZHE
+0x0410    &Acy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER A
+0x0411    &Bcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER BE
+0x0412    &Vcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER VE
+0x0413    &Gcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER GHE
+0x0414    &Dcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER DE
+0x0415    &IEcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER IE
+0x0416    &ZHcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER ZHE
+0x0417    &Zcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER ZE
+0x0418    &Icy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER I
+0x0419    &Jcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER SHORT I
+0x041A    &Kcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER KA
+0x041B    &Lcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER EL
+0x041C    &Mcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER EM
+0x041D    &Ncy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER EN
+0x041E    &Ocy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER O
+0x041F    &Pcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER PE
+0x0420    &Rcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER ER
+0x0421    &Scy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER ES
+0x0422    &Tcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER TE
+0x0423    &Ucy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER U
+0x0424    &Fcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER EF
+0x0425    &KHcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER HA
+0x0426    &TScy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER TSE
+0x0427    &CHcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER CHE
+0x0428    &SHcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER SHA
+0x0429    &SHCHcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER SHCHA
+0x042A    &HARDcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER HARD SIGN
+0x042B    &Ycy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER YERU
+0x042C    &SOFTcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER SOFT SIGN
+0x042D    &Ecy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER E
+0x042E    &YUcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER YU
+0x042F    &YAcy;	      ISOcyr1	# CYRILLIC CAPITAL LETTER YA
+0x0430    &acy;	      ISOcyr1	# CYRILLIC SMALL LETTER A
+0x0431    &bcy;	      ISOcyr1	# CYRILLIC SMALL LETTER BE
+0x0432    &vcy;	      ISOcyr1	# CYRILLIC SMALL LETTER VE
+0x0433    &gcy;	      ISOcyr1	# CYRILLIC SMALL LETTER GHE
+0x0434    &dcy;	      ISOcyr1	# CYRILLIC SMALL LETTER DE
+0x0435    &iecy;	      ISOcyr1	# CYRILLIC SMALL LETTER IE
+0x0436    &zhcy;	      ISOcyr1	# CYRILLIC SMALL LETTER ZHE
+0x0437    &zcy;	      ISOcyr1	# CYRILLIC SMALL LETTER ZE
+0x0438    &icy;	      ISOcyr1	# CYRILLIC SMALL LETTER I
+0x0439    &jcy;	      ISOcyr1	# CYRILLIC SMALL LETTER SHORT I
+0x043A    &kcy;	      ISOcyr1	# CYRILLIC SMALL LETTER KA
+0x043B    &lcy;	      ISOcyr1	# CYRILLIC SMALL LETTER EL
+0x043C    &mcy;	      ISOcyr1	# CYRILLIC SMALL LETTER EM
+0x043D    &ncy;	      ISOcyr1	# CYRILLIC SMALL LETTER EN
+0x043E    &ocy;	      ISOcyr1	# CYRILLIC SMALL LETTER O
+0x043F    &pcy;	      ISOcyr1	# CYRILLIC SMALL LETTER PE
+0x0440    &rcy;	      ISOcyr1	# CYRILLIC SMALL LETTER ER
+0x0441    &scy;	      ISOcyr1	# CYRILLIC SMALL LETTER ES
+0x0442    &tcy;	      ISOcyr1	# CYRILLIC SMALL LETTER TE
+0x0443    &ucy;	      ISOcyr1	# CYRILLIC SMALL LETTER U
+0x0444    &fcy;	      ISOcyr1	# CYRILLIC SMALL LETTER EF
+0x0445    &khcy;	      ISOcyr1	# CYRILLIC SMALL LETTER HA
+0x0446    &tscy;	      ISOcyr1	# CYRILLIC SMALL LETTER TSE
+0x0447    &chcy;	      ISOcyr1	# CYRILLIC SMALL LETTER CHE
+0x0448    &shcy;	      ISOcyr1	# CYRILLIC SMALL LETTER SHA
+0x0449    &shchcy;	      ISOcyr1	# CYRILLIC SMALL LETTER SHCHA
+0x044A    &hardcy;	      ISOcyr1	# CYRILLIC SMALL LETTER HARD SIGN
+0x044B    &ycy;	      ISOcyr1	# CYRILLIC SMALL LETTER YERU
+0x044C    &softcy;	      ISOcyr1	# CYRILLIC SMALL LETTER SOFT SIGN
+0x044D    &ecy;	      ISOcyr1	# CYRILLIC SMALL LETTER E
+0x044E    &yucy;	      ISOcyr1	# CYRILLIC SMALL LETTER YU
+0x044F    &yacy;	      ISOcyr1	# CYRILLIC SMALL LETTER YA
+0x0451    &iocy;	      ISOcyr1	# CYRILLIC SMALL LETTER IO
+0x0452    &djcy;	      ISOcyr2	# CYRILLIC SMALL LETTER DJE
+0x0453    &gjcy;	      ISOcyr2	# CYRILLIC SMALL LETTER GJE
+0x0454    &jukcy;	      ISOcyr2	# CYRILLIC SMALL LETTER UKRAINIAN IE
+0x0455    &dscy;	      ISOcyr2	# CYRILLIC SMALL LETTER DZE
+0x0456    &iukcy;	      ISOcyr2	# CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+0x0457    &yicy;	      ISOcyr2	# CYRILLIC SMALL LETTER YI
+0x0458    &jsercy;	      ISOcyr2	# CYRILLIC SMALL LETTER JE
+0x0459    &ljcy;	      ISOcyr2	# CYRILLIC SMALL LETTER LJE
+0x045A    &njcy;	      ISOcyr2	# CYRILLIC SMALL LETTER NJE
+0x045B    &tshcy;	      ISOcyr2	# CYRILLIC SMALL LETTER TSHE
+0x045C    &kjcy;	      ISOcyr2	# CYRILLIC SMALL LETTER KJE
+0x045E    &ubrcy;	      ISOcyr2	# CYRILLIC SMALL LETTER SHORT U
+0x045F    &dzcy;	      ISOcyr2	# CYRILLIC SMALL LETTER DZHE
+0x2002    &ensp;	      ISOpub	# EN SPACE
+0x2003    &emsp;	      ISOpub	# EM SPACE
+0x2004    &emsp13;	      ISOpub	# THREE-PER-EM SPACE
+0x2005    &emsp14;	      ISOpub	# FOUR-PER-EM SPACE
+0x2007    &numsp;	      ISOpub	# FIGURE SPACE
+0x2008    &puncsp;	      ISOpub	# PUNCTUATION SPACE
+0x2009    &thinsp;	      ISOpub	# THIN SPACE
+0x200A    &hairsp;	      ISOpub	# HAIR SPACE
+0x200C    &zwnj;	      HTMLspecial	# ZERO WIDTH NON-JOINER
+0x200D    &zwj;	      HTMLspecial	# ZERO WIDTH JOINER
+0x200E    &lrm;	      HTMLspecial	# LEFT-TO-RIGHT MARK
+0x200F    &rlm;	      HTMLspecial	# RIGHT-TO-LEFT MARK
+0x2010    &dash;	      ISOpub	# HYPHEN
+0x2013    &ndash;	      ISOpub	# EN DASH
+0x2014    &mdash;	      ISOpub	# EM DASH
+0x2015    &horbar;	      ISOnum	# HORIZONTAL BAR
+0x2016    &Verbar;	      ISOtech	# DOUBLE VERTICAL LINE
+0x2018    &lsquo;	      ISOnum	# LEFT SINGLE QUOTATION MARK
+0x2018    &rsquor;	      ISOpub	# LEFT SINGLE QUOTATION MARK
+0x2019    &rsquo;	      ISOnum	# RIGHT SINGLE QUOTATION MARK
+0x201A    &lsquor;	      ISOpub	# SINGLE LOW-9 QUOTATION MARK
+0x201A    &sbquo;	      HTMLspecial	# SINGLE LOW-9 QUOTATION MARK
+0x201C    &ldquo;	      ISOnum	# LEFT DOUBLE QUOTATION MARK
+0x201C    &rdquor;	      ISOpub	# LEFT DOUBLE QUOTATION MARK
+0x201D    &rdquo;	      ISOnum	# RIGHT DOUBLE QUOTATION MARK
+0x201E    &bdquo;	      HTMLspecial	# DOUBLE LOW-9 QUOTATION MARK
+0x201E    &ldquor;	      ISOpub	# DOUBLE LOW-9 QUOTATION MARK
+0x2020    &dagger;	      ISOpub	# DAGGER
+0x2021    &Dagger;	      ISOpub	# DOUBLE DAGGER
+0x2022    &bull;	      ISOpub	# BULLET
+0x2025    &nldr;	      ISOpub	# TWO DOT LEADER
+0x2026    &hellip;	      ISOpub	# HORIZONTAL ELLIPSIS
+0x2026    &mldr;	      ISOpub	# HORIZONTAL ELLIPSIS
+0x2030    &permil;	      ISOtech	# PER MILLE SIGN
+0x2032    &prime;	      ISOtech	# PRIME
+0x2032    &vprime;	      ISOamso	# PRIME
+0x2033    &Prime;	      ISOtech	# DOUBLE PRIME
+0x2034    &tprime;	      ISOtech	# TRIPLE PRIME
+0x2035    &bprime;	      ISOamso	# REVERSED PRIME
+0x2039    &lsaquo;	      HTMLspecial	# SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x203A    &rsaquo;	      HTMLspecial	# SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x203E    &oline;	      HTMLsymbol	# OVERLINE
+0x2041    &caret;	      ISOpub	# CARET INSERTION POINT
+0x2043    &hybull;	      ISOpub	# HYPHEN BULLET
+0x2044    &frasl;	      HTMLsymbol	# FRACTION SLASH
+0x20AC    &euro;	      new       # EURO SIGN
+0x20DB    &tdot;	      ISOtech	# COMBINING THREE DOTS ABOVE
+0x20DC    &DotDot;	      ISOtech	# COMBINING FOUR DOTS ABOVE
+0x2105    &incare;	      ISOpub	# CARE OF
+0x210B    &hamilt;	      ISOtech	# SCRIPT CAPITAL H
+0x210F    &planck;	      ISOamso	# PLANCK CONSTANT OVER TWO PI
+0x2111    &image;	      ISOamso	# BLACK-LETTER CAPITAL I
+0x2112    &lagran;	      ISOtech	# SCRIPT CAPITAL L
+0x2113    &ell;	      ISOamso	# SCRIPT SMALL L
+0x2116    &numero;	      ISOcyr1	# NUMERO SIGN
+0x2117    &copysr;	      ISOpub	# SOUND RECORDING COPYRIGHT
+0x2118    &weierp;	      ISOamso	# SCRIPT CAPITAL P
+0x211C    &real;	      ISOamso	# BLACK-LETTER CAPITAL R
+0x211E    &rx;	      ISOpub	# PRESCRIPTION TAKE
+0x2122    &trade;	      ISOnum	# TRADE MARK SIGN
+0x2126    &ohm;	      ISOnum	# OHM SIGN
+0x212B    &angst;	      ISOtech	# ANGSTROM SIGN
+0x212C    &bernou;	      ISOtech	# SCRIPT CAPITAL B
+0x2133    &phmmat;	      ISOtech	# SCRIPT CAPITAL M
+0x2134    &order;	      ISOtech	# SCRIPT SMALL O
+0x2135    &alefsym;	      HTMLsymbol	# ALEF SYMBOL
+0x2135    &aleph;	      ISOtech	# ALEF SYMBOL
+0x2136    &beth;	      ISOamso	# BET SYMBOL
+0x2137    &gimel;	      ISOamso	# GIMEL SYMBOL
+0x2138    &daleth;	      ISOamso	# DALET SYMBOL
+0x2153    &frac13;	      ISOpub	# VULGAR FRACTION ONE THIRD
+0x2154    &frac23;	      ISOpub	# VULGAR FRACTION TWO THIRDS
+0x2155    &frac15;	      ISOpub	# VULGAR FRACTION ONE FIFTH
+0x2156    &frac25;	      ISOpub	# VULGAR FRACTION TWO FIFTHS
+0x2157    &frac35;	      ISOpub	# VULGAR FRACTION THREE FIFTHS
+0x2158    &frac45;	      ISOpub	# VULGAR FRACTION FOUR FIFTHS
+0x2159    &frac16;	      ISOpub	# VULGAR FRACTION ONE SIXTH
+0x215A    &frac56;	      ISOpub	# VULGAR FRACTION FIVE SIXTHS
+0x215B    &frac18;	      ISOnum	# VULGAR FRACTION ONE EIGHTH
+0x215C    &frac38;	      ISOnum	# VULGAR FRACTION THREE EIGHTHS
+0x215D    &frac58;	      ISOnum	# VULGAR FRACTION FIVE EIGHTHS
+0x215E    &frac78;	      ISOnum	# VULGAR FRACTION SEVEN EIGHTHS
+0x2190    &larr;	      ISOnum	# LEFTWARDS ARROW
+0x2191    &uarr;	      ISOnum	# UPWARDS ARROW
+0x2192    &rarr;	      ISOnum	# RIGHTWARDS ARROW
+0x2193    &darr;	      ISOnum	# DOWNWARDS ARROW
+0x2194    &harr;	      ISOamsa	# LEFT RIGHT ARROW
+0x2194    &xhArr;	      ISOamsa	# LEFT RIGHT ARROW
+0x2194    &xharr;	      ISOamsa	# LEFT RIGHT ARROW
+0x2195    &varr;	      ISOamsa	# UP DOWN ARROW
+0x2196    &nwarr;	      ISOamsa	# NORTH WEST ARROW
+0x2197    &nearr;	      ISOamsa	# NORTH EAST ARROW
+0x2198    &drarr;	      ISOamsa	# SOUTH EAST ARROW
+0x2199    &dlarr;	      ISOamsa	# SOUTH WEST ARROW
+0x219A    &nlarr;	      ISOamsa	# LEFTWARDS ARROW WITH STROKE
+0x219B    &nrarr;	      ISOamsa	# RIGHTWARDS ARROW WITH STROKE
+0x219D    &rarrw;	      ISOamsa	# RIGHTWARDS WAVE ARROW
+0x219E    &Larr;	      ISOamsa	# LEFTWARDS TWO HEADED ARROW
+0x21A0    &Rarr;	      ISOamsa	# RIGHTWARDS TWO HEADED ARROW
+0x21A2    &larrtl;	      ISOamsa	# LEFTWARDS ARROW WITH TAIL
+0x21A3    &rarrtl;	      ISOamsa	# RIGHTWARDS ARROW WITH TAIL
+0x21A6    &map;	      ISOamsa	# RIGHTWARDS ARROW FROM BAR
+0x21A9    &larrhk;	      ISOamsa	# LEFTWARDS ARROW WITH HOOK
+0x21AA    &rarrhk;	      ISOamsa	# RIGHTWARDS ARROW WITH HOOK
+0x21AB    &larrlp;	      ISOamsa	# LEFTWARDS ARROW WITH LOOP
+0x21AC    &rarrlp;	      ISOamsa	# RIGHTWARDS ARROW WITH LOOP
+0x21AD    &harrw;	      ISOamsa	# LEFT RIGHT WAVE ARROW
+0x21AE    &nharr;	      ISOamsa	# LEFT RIGHT ARROW WITH STROKE
+0x21B0    &lsh;	      ISOamsa	# UPWARDS ARROW WITH TIP LEFTWARDS
+0x21B1    &rsh;	      ISOamsa	# UPWARDS ARROW WITH TIP RIGHTWARDS
+0x21B5    &crarr;	      HTMLsymbol	# DOWNWARDS ARROW WITH CORNER LEFTWARDS
+0x21B6    &cularr;	      ISOamsa	# ANTICLOCKWISE TOP SEMICIRCLE ARROW
+0x21B7    &curarr;	      ISOamsa	# CLOCKWISE TOP SEMICIRCLE ARROW
+0x21BA    &olarr;	      ISOamsa	# ANTICLOCKWISE OPEN CIRCLE ARROW
+0x21BB    &orarr;	      ISOamsa	# CLOCKWISE OPEN CIRCLE ARROW
+0x21BC    &lharu;	      ISOamsa	# LEFTWARDS HARPOON WITH BARB UPWARDS
+0x21BD    &lhard;	      ISOamsa	# LEFTWARDS HARPOON WITH BARB DOWNWARDS
+0x21BE    &uharr;	      ISOamsa	# UPWARDS HARPOON WITH BARB RIGHTWARDS
+0x21BF    &uharl;	      ISOamsa	# UPWARDS HARPOON WITH BARB LEFTWARDS
+0x21C0    &rharu;	      ISOamsa	# RIGHTWARDS HARPOON WITH BARB UPWARDS
+0x21C1    &rhard;	      ISOamsa	# RIGHTWARDS HARPOON WITH BARB DOWNWARDS
+0x21C2    &dharr;	      ISOamsa	# DOWNWARDS HARPOON WITH BARB RIGHTWARDS
+0x21C3    &dharl;	      ISOamsa	# DOWNWARDS HARPOON WITH BARB LEFTWARDS
+0x21C4    &rlarr2;	      ISOamsa	# RIGHTWARDS ARROW OVER LEFTWARDS ARROW
+0x21C6    &lrarr2;	      ISOamsa	# LEFTWARDS ARROW OVER RIGHTWARDS ARROW
+0x21C7    &larr2;	      ISOamsa	# LEFTWARDS PAIRED ARROWS
+0x21C8    &uarr2;	      ISOamsa	# UPWARDS PAIRED ARROWS
+0x21C9    &rarr2;	      ISOamsa	# RIGHTWARDS PAIRED ARROWS
+0x21CA    &darr2;	      ISOamsa	# DOWNWARDS PAIRED ARROWS
+0x21CB    &lrhar2;	      ISOamsa	# LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON
+0x21CC    &rlhar2;	      ISOamsa	# RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
+0x21CD    &nlArr;	      ISOamsa	# LEFTWARDS DOUBLE ARROW WITH STROKE
+0x21CE    &nhArr;	      ISOamsa	# LEFT RIGHT DOUBLE ARROW WITH STROKE
+0x21CF    &nrArr;	      ISOamsa	# RIGHTWARDS DOUBLE ARROW WITH STROKE
+0x21D0    &lArr;	      ISOtech	# LEFTWARDS DOUBLE ARROW
+0x21D0    &xlArr;	      ISOamsa	# LEFTWARDS DOUBLE ARROW
+0x21D1    &uArr;	      ISOamsa	# UPWARDS DOUBLE ARROW
+0x21D2    &rArr;	      ISOtech	# RIGHTWARDS DOUBLE ARROW
+0x21D2    &xrArr;	      ISOamsa	# RIGHTWARDS DOUBLE ARROW
+0x21D3    &dArr;	      ISOamsa	# DOWNWARDS DOUBLE ARROW
+0x21D4    &hArr;	      ISOamsa	# LEFT RIGHT DOUBLE ARROW
+0x21D4    &iff;	      ISOtech	# LEFT RIGHT DOUBLE ARROW
+0x21D5    &vArr;	      ISOamsa	# UP DOWN DOUBLE ARROW
+0x21DA    &lAarr;	      ISOamsa	# LEFTWARDS TRIPLE ARROW
+0x21DB    &rAarr;	      ISOamsa	# RIGHTWARDS TRIPLE ARROW
+0x2200    &forall;	      ISOtech	# FOR ALL
+0x2201    &comp;	      ISOamso	# COMPLEMENT
+0x2202    &part;	      ISOtech	# PARTIAL DIFFERENTIAL
+0x2203    &exist;	      ISOtech	# THERE EXISTS
+0x2204    &nexist;	      ISOamso	# THERE DOES NOT EXIST
+0x2205    &empty;	      ISOamso	# EMPTY SET
+0x2207    &nabla;	      ISOtech	# NABLA
+0x2208    &isin;	      ISOtech	# ELEMENT OF
+0x2209    &notin;	      ISOtech	# NOT AN ELEMENT OF
+0x220A    &epsis;	      ISOgrk3	# SMALL ELEMENT OF
+0x220B    &ni;	      ISOtech	# CONTAINS AS MEMBER
+0x220D    &bepsi;	      ISOamsr	# SMALL CONTAINS AS MEMBER
+0x220F    &prod;	      ISOamsb	# N-ARY PRODUCT
+0x2210    &amalg;	      ISOamsb	# N-ARY COPRODUCT
+0x2210    &coprod;	      ISOamsb	# N-ARY COPRODUCT
+0x2210    &samalg;	      ISOamsr	# N-ARY COPRODUCT
+0x2211    &sum;	      ISOamsb	# N-ARY SUMMATION
+0x2212    &minus;	      ISOtech	# MINUS SIGN
+0x2213    &mnplus;	      ISOtech	# MINUS-OR-PLUS SIGN
+0x2214    &plusdo;	      ISOamsb	# DOT PLUS
+0x2216    &setmn;	      ISOamsb	# SET MINUS
+0x2216    &ssetmn;	      ISOamsb	# SET MINUS
+0x2217    &lowast;	      ISOtech	# ASTERISK OPERATOR
+0x2218    &compfn;	      ISOtech	# RING OPERATOR
+0x221A    &radic;	      ISOtech	# SQUARE ROOT
+0x221D    &prop;	      ISOtech	# PROPORTIONAL TO
+0x221D    &vprop;	      ISOamsr	# PROPORTIONAL TO
+0x221E    &infin;	      ISOtech	# INFINITY
+0x221F    &ang90;	      ISOtech	# RIGHT ANGLE
+0x2220    &ang;	      ISOamso	# ANGLE
+0x2221    &angmsd;	      ISOamso	# MEASURED ANGLE
+0x2222    &angsph;	      ISOtech	# SPHERICAL ANGLE
+0x2223    &mid;	      ISOamsr	# DIVIDES
+0x2224    &nmid;	      ISOamsn	# DOES NOT DIVIDE
+0x2225    &par;	      ISOtech	# PARALLEL TO
+0x2225    &spar;	      ISOamsr	# PARALLEL TO
+0x2226    &npar;	      ISOamsn	# NOT PARALLEL TO
+0x2226    &nspar;	      ISOamsn	# NOT PARALLEL TO
+0x2227    &and;	      ISOtech	# LOGICAL AND
+0x2228    &or;	      ISOtech	# LOGICAL OR
+0x2229    &cap;	      ISOtech	# INTERSECTION
+0x222A    &cup;	      ISOtech	# UNION
+0x222B    &int;	      ISOtech	# INTEGRAL
+0x222E    &conint;	      ISOtech	# CONTOUR INTEGRAL
+0x2234    &there4;	      ISOtech	# THEREFORE
+0x2235    &becaus;	      ISOtech	# BECAUSE
+0x223C    &sim;	      ISOtech	# TILDE OPERATOR
+0x223C    &thksim;	      ISOamsr	# TILDE OPERATOR
+0x223D    &bsim;	      ISOamsr	# REVERSED TILDE
+0x2240    &wreath;	      ISOamsb	# WREATH PRODUCT
+0x2241    &nsim;	      ISOamsn	# NOT TILDE
+0x2243    &sime;	      ISOtech	# ASYMPTOTICALLY EQUAL TO
+0x2244    &nsime;	      ISOamsn	# NOT ASYMPTOTICALLY EQUAL TO
+0x2245    &cong;	      ISOtech	# APPROXIMATELY EQUAL TO
+0x2247    &ncong;	      ISOamsn	# NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+0x2248    &ap;	      ISOtech	# ALMOST EQUAL TO
+0x2248    &asymp;	      ISOamsr	# ALMOST EQUAL TO
+0x2248    &thkap;	      ISOamsr	# ALMOST EQUAL TO
+0x2249    &nap;	      ISOamsn	# NOT ALMOST EQUAL TO
+0x224A    &ape;	      ISOamsr	# ALMOST EQUAL OR EQUAL TO
+0x224C    &bcong;	      ISOamsr	# ALL EQUAL TO
+0x224E    &bump;	      ISOamsr	# GEOMETRICALLY EQUIVALENT TO
+0x224F    &bumpe;	      ISOamsr	# DIFFERENCE BETWEEN
+0x2250    &esdot;	      ISOamsr	# APPROACHES THE LIMIT
+0x2251    &eDot;	      ISOamsr	# GEOMETRICALLY EQUAL TO
+0x2252    &efDot;	      ISOamsr	# APPROXIMATELY EQUAL TO OR THE IMAGE OF
+0x2253    &erDot;	      ISOamsr	# IMAGE OF OR APPROXIMATELY EQUAL TO
+0x2254    &colone;	      ISOamsr	# COLON EQUALS
+0x2255    &ecolon;	      ISOamsr	# EQUALS COLON
+0x2256    &ecir;	      ISOamsr	# RING IN EQUAL TO
+0x2257    &cire;	      ISOamsr	# RING EQUAL TO
+0x2259    &wedgeq;	      ISOtech	# ESTIMATES
+0x225C    &trie;	      ISOamsr	# DELTA EQUAL TO
+0x2260    &ne;	      ISOtech	# NOT EQUAL TO
+0x2261    &equiv;	      ISOtech	# IDENTICAL TO
+0x2262    &nequiv;	      ISOamsn	# NOT IDENTICAL TO
+0x2264    &le;	      ISOtech	# LESS-THAN OR EQUAL TO
+0x2264    &les;	      ISOamsr	# LESS-THAN OR EQUAL TO
+0x2265    &ge;	      ISOtech	# GREATER-THAN OR EQUAL TO
+0x2265    &ges;	      ISOamsr	# GREATER-THAN OR EQUAL TO
+0x2266    &lE;	      ISOamsr	# LESS-THAN OVER EQUAL TO
+0x2267    &gE;	      ISOamsr	# GREATER-THAN OVER EQUAL TO
+0x2268    &lnE;	      ISOamsn	# LESS-THAN BUT NOT EQUAL TO
+0x2268    &lne;	      ISOamsn	# LESS-THAN BUT NOT EQUAL TO
+0x2268    &lvnE;	      ISOamsn	# LESS-THAN BUT NOT EQUAL TO
+0x2269    &gnE;	      ISOamsn	# GREATER-THAN BUT NOT EQUAL TO
+0x2269    &gne;	      ISOamsn	# GREATER-THAN BUT NOT EQUAL TO
+0x2269    &gvnE;	      ISOamsn	# GREATER-THAN BUT NOT EQUAL TO
+0x226A    &Lt;	      ISOamsr	# MUCH LESS-THAN
+0x226B    &Gt;	      ISOamsr	# MUCH GREATER-THAN
+0x226C    &twixt;	      ISOamsr	# BETWEEN
+0x226E    &nlt;	      ISOamsn	# NOT LESS-THAN
+0x226F    &ngt;	      ISOamsn	# NOT GREATER-THAN
+0x2270    &nle;	      ISOamsn	# NEITHER LESS-THAN NOR EQUAL TO
+0x2270    &nles;	      ISOamsn	# NEITHER LESS-THAN NOR EQUAL TO
+0x2271    &nge;	      ISOamsn	# NEITHER GREATER-THAN NOR EQUAL TO
+0x2271    &nges;	      ISOamsn	# NEITHER GREATER-THAN NOR EQUAL TO
+0x2272    &lsim;	      ISOamsr	# LESS-THAN OR EQUIVALENT TO
+0x2273    &gsim;	      ISOamsr	# GREATER-THAN OR EQUIVALENT TO
+0x2276    &lg;	      ISOamsr	# LESS-THAN OR GREATER-THAN
+0x2277    &gl;	      ISOamsr	# GREATER-THAN OR LESS-THAN
+0x227A    &pr;	      ISOamsr	# PRECEDES
+0x227B    &sc;	      ISOamsr	# SUCCEEDS
+0x227C    &cupre;	      ISOamsr	# PRECEDES OR EQUAL TO
+0x227C    &pre;	      ISOamsr	# PRECEDES OR EQUAL TO
+0x227D    &sccue;	      ISOamsr	# SUCCEEDS OR EQUAL TO
+0x227D    &sce;	      ISOamsr	# SUCCEEDS OR EQUAL TO
+0x227E    &prsim;	      ISOamsr	# PRECEDES OR EQUIVALENT TO
+0x227F    &scsim;	      ISOamsr	# SUCCEEDS OR EQUIVALENT TO
+0x2280    &npr;	      ISOamsn	# DOES NOT PRECEDE
+0x2281    &nsc;	      ISOamsn	# DOES NOT SUCCEED
+0x2282    &sub;	      ISOtech	# SUBSET OF
+0x2283    &sup;	      ISOtech	# SUPERSET OF
+0x2284    &nsub;	      ISOamsn	# NOT A SUBSET OF
+0x2285    &nsup;	      ISOamsn	# NOT A SUPERSET OF
+0x2286    &subE;	      ISOamsr	# SUBSET OF OR EQUAL TO
+0x2286    &sube;	      ISOtech	# SUBSET OF OR EQUAL TO
+0x2287    &supE;	      ISOamsr	# SUPERSET OF OR EQUAL TO
+0x2287    &supe;	      ISOtech	# SUPERSET OF OR EQUAL TO
+0x2288    &nsubE;	      ISOamsn	# NEITHER A SUBSET OF NOR EQUAL TO
+0x2288    &nsube;	      ISOamsn	# NEITHER A SUBSET OF NOR EQUAL TO
+0x2289    &nsupE;	      ISOamsn	# NEITHER A SUPERSET OF NOR EQUAL TO
+0x2289    &nsupe;	      ISOamsn	# NEITHER A SUPERSET OF NOR EQUAL TO
+0x228A    &subnE;	      ISOamsn	# SUBSET OF WITH NOT EQUAL TO
+0x228A    &subne;	      ISOamsn	# SUBSET OF WITH NOT EQUAL TO
+0x228A    &vsubnE;	      ISOamsn	# SUBSET OF WITH NOT EQUAL TO
+0x228A    &vsubne;	      ISOamsn	# SUBSET OF WITH NOT EQUAL TO
+0x228B    &supnE;	      ISOamsn	# SUPERSET OF WITH NOT EQUAL TO
+0x228B    &supne;	      ISOamsn	# SUPERSET OF WITH NOT EQUAL TO
+0x228B    &vsupnE;	      ISOamsn	# SUPERSET OF WITH NOT EQUAL TO
+0x228B    &vsupne;	      ISOamsn	# SUPERSET OF WITH NOT EQUAL TO
+0x228E    &uplus;	      ISOamsb	# MULTISET UNION
+0x228F    &sqsub;	      ISOamsr	# SQUARE IMAGE OF
+0x2290    &sqsup;	      ISOamsr	# SQUARE ORIGINAL OF
+0x2291    &sqsube;	      ISOamsr	# SQUARE IMAGE OF OR EQUAL TO
+0x2292    &sqsupe;	      ISOamsr	# SQUARE ORIGINAL OF OR EQUAL TO
+0x2293    &sqcap;	      ISOamsb	# SQUARE CAP
+0x2294    &sqcup;	      ISOamsb	# SQUARE CUP
+0x2295    &oplus;	      ISOamsb	# CIRCLED PLUS
+0x2296    &ominus;	      ISOamsb	# CIRCLED MINUS
+0x2297    &otimes;	      ISOamsb	# CIRCLED TIMES
+0x2298    &osol;	      ISOamsb	# CIRCLED DIVISION SLASH
+0x2299    &odot;	      ISOamsb	# CIRCLED DOT OPERATOR
+0x229A    &ocir;	      ISOamsb	# CIRCLED RING OPERATOR
+0x229B    &oast;	      ISOamsb	# CIRCLED ASTERISK OPERATOR
+0x229D    &odash;	      ISOamsb	# CIRCLED DASH
+0x229E    &plusb;	      ISOamsb	# SQUARED PLUS
+0x229F    &minusb;	      ISOamsb	# SQUARED MINUS
+0x22A0    &timesb;	      ISOamsb	# SQUARED TIMES
+0x22A1    &sdotb;	      ISOamsb	# SQUARED DOT OPERATOR
+0x22A2    &vdash;	      ISOamsr	# RIGHT TACK
+0x22A3    &dashv;	      ISOamsr	# LEFT TACK
+0x22A4    &top;	      ISOamsb	# DOWN TACK
+0x22A5    &bottom;	      ISOtech	# UP TACK
+0x22A5    &perp;	      ISOtech	# UP TACK
+0x22A7    &models;	      ISOamsr	# MODELS
+0x22A8    &vDash;	      ISOamsr	# TRUE
+0x22A9    &Vdash;	      ISOamsr	# FORCES
+0x22AA    &Vvdash;	      ISOamsr	# TRIPLE VERTICAL BAR RIGHT TURNSTILE
+0x22AC    &nvdash;	      ISOamsn	# DOES NOT PROVE
+0x22AD    &nvDash;	      ISOamsn	# NOT TRUE
+0x22AE    &nVdash;	      ISOamsn	# DOES NOT FORCE
+0x22AF    &nVDash;	      ISOamsn	# NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT
+0x22B2    &vltri;	      ISOamsr	# NORMAL SUBGROUP OF
+0x22B3    &vrtri;	      ISOamsr	# CONTAINS AS NORMAL SUBGROUP
+0x22B4    &ltrie;	      ISOamsr	# NORMAL SUBGROUP OF OR EQUAL TO
+0x22B5    &rtrie;	      ISOamsr	# CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+0x22B8    &mumap;	      ISOamsa	# MULTIMAP
+0x22BA    &intcal;	      ISOamsb	# INTERCALATE
+0x22BB    &veebar;	      ISOamsr	# XOR
+0x22BC    &barwed;	      ISOamsb	# NAND
+0x22C4    &diam;	      ISOamsb	# DIAMOND OPERATOR
+0x22C5    &sdot;	      ISOamsb	# DOT OPERATOR
+0x22C6    &sstarf;	      ISOamsb	# STAR OPERATOR
+0x22C7    &divonx;	      ISOamsb	# DIVISION TIMES
+0x22C8    &bowtie;	      ISOamsr	# BOWTIE
+0x22C9    &ltimes;	      ISOamsb	# LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
+0x22CA    &rtimes;	      ISOamsb	# RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
+0x22CB    &lthree;	      ISOamsb	# LEFT SEMIDIRECT PRODUCT
+0x22CC    &rthree;	      ISOamsb	# RIGHT SEMIDIRECT PRODUCT
+0x22CD    &bsime;	      ISOamsr	# REVERSED TILDE EQUALS
+0x22CE    &cuvee;	      ISOamsb	# CURLY LOGICAL OR
+0x22CF    &cuwed;	      ISOamsb	# CURLY LOGICAL AND
+0x22D0    &Sub;	      ISOamsr	# DOUBLE SUBSET
+0x22D1    &Sup;	      ISOamsr	# DOUBLE SUPERSET
+0x22D2    &Cap;	      ISOamsb	# DOUBLE INTERSECTION
+0x22D3    &Cup;	      ISOamsb	# DOUBLE UNION
+0x22D4    &fork;	      ISOamsr	# PITCHFORK
+0x22D6    &ldot;	      ISOamsr	# LESS-THAN WITH DOT
+0x22D7    &gsdot;	      ISOamsr	# GREATER-THAN WITH DOT
+0x22D8    &Ll;	      ISOamsr	# VERY MUCH LESS-THAN
+0x22D9    &Gg;	      ISOamsr	# VERY MUCH GREATER-THAN
+0x22DA    &leg;	      ISOamsr	# LESS-THAN EQUAL TO OR GREATER-THAN
+0x22DB    &gel;	      ISOamsr	# GREATER-THAN EQUAL TO OR LESS-THAN
+0x22DC    &els;	      ISOamsr	# EQUAL TO OR LESS-THAN
+0x22DD    &egs;	      ISOamsr	# EQUAL TO OR GREATER-THAN
+0x22DE    &cuepr;	      ISOamsr	# EQUAL TO OR PRECEDES
+0x22DF    &cuesc;	      ISOamsr	# EQUAL TO OR SUCCEEDS
+0x22E0    &npre;	      ISOamsn	# DOES NOT PRECEDE OR EQUAL
+0x22E1    &nsce;	      ISOamsn	# DOES NOT SUCCEED OR EQUAL
+0x22E6    &lnsim;	      ISOamsn	# LESS-THAN BUT NOT EQUIVALENT TO
+0x22E7    &gnsim;	      ISOamsn	# GREATER-THAN BUT NOT EQUIVALENT TO
+0x22E8    &prnsim;	      ISOamsn	# PRECEDES BUT NOT EQUIVALENT TO
+0x22E9    &scnsim;	      ISOamsn	# SUCCEEDS BUT NOT EQUIVALENT TO
+0x22EA    &nltri;	      ISOamsn	# NOT NORMAL SUBGROUP OF
+0x22EB    &nrtri;	      ISOamsn	# DOES NOT CONTAIN AS NORMAL SUBGROUP
+0x22EC    &nltrie;	      ISOamsn	# NOT NORMAL SUBGROUP OF OR EQUAL TO
+0x22ED    &nrtrie;	      ISOamsn	# DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+0x22EE    &vellip;	      ISOpub	# VERTICAL ELLIPSIS
+0x2306    &Barwed;	      ISOamsb	# PERSPECTIVE
+0x2308    &lceil;	      ISOamsc	# LEFT CEILING
+0x2309    &rceil;	      ISOamsc	# RIGHT CEILING
+0x230A    &lfloor;	      ISOamsc	# LEFT FLOOR
+0x230B    &rfloor;	      ISOamsc	# RIGHT FLOOR
+0x230C    &drcrop;	      ISOpub	# BOTTOM RIGHT CROP
+0x230D    &dlcrop;	      ISOpub	# BOTTOM LEFT CROP
+0x230E    &urcrop;	      ISOpub	# TOP RIGHT CROP
+0x230F    &ulcrop;	      ISOpub	# TOP LEFT CROP
+0x2315    &telrec;	      ISOpub	# TELEPHONE RECORDER
+0x2316    &target;	      ISOpub	# POSITION INDICATOR
+0x231C    &ulcorn;	      ISOamsc	# TOP LEFT CORNER
+0x231D    &urcorn;	      ISOamsc	# TOP RIGHT CORNER
+0x231E    &dlcorn;	      ISOamsc	# BOTTOM LEFT CORNER
+0x231F    &drcorn;	      ISOamsc	# BOTTOM RIGHT CORNER
+0x2322    &frown;	      ISOamsr	# FROWN
+0x2322    &sfrown;	      ISOamsr	# FROWN
+0x2323    &smile;	      ISOamsr	# SMILE
+0x2323    &ssmile;	      ISOamsr	# SMILE
+0x2329    &lang;	      ISOtech	# LEFT-POINTING ANGLE BRACKET
+0x232A    &rang;	      ISOtech	# RIGHT-POINTING ANGLE BRACKET
+0x2423    &blank;	      ISOpub	# OPEN BOX
+0x24C8    &oS;	      ISOamso	# CIRCLED LATIN CAPITAL LETTER S
+0x2500    &boxh;	      ISObox	# BOX DRAWINGS LIGHT HORIZONTAL
+0x2502    &boxv;	      ISObox	# BOX DRAWINGS LIGHT VERTICAL
+0x250C    &boxdr;	      ISObox	# BOX DRAWINGS LIGHT DOWN AND RIGHT
+0x2510    &boxdl;	      ISObox	# BOX DRAWINGS LIGHT DOWN AND LEFT
+0x2514    &boxur;	      ISObox	# BOX DRAWINGS LIGHT UP AND RIGHT
+0x2518    &boxul;	      ISObox	# BOX DRAWINGS LIGHT UP AND LEFT
+0x251C    &boxvr;	      ISObox	# BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+0x2524    &boxvl;	      ISObox	# BOX DRAWINGS LIGHT VERTICAL AND LEFT
+0x252C    &boxhd;	      ISObox	# BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+0x2534    &boxhu;	      ISObox	# BOX DRAWINGS LIGHT UP AND HORIZONTAL
+0x253C    &boxvh;	      ISObox	# BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+0x2550    &boxH;	      ISObox	# BOX DRAWINGS DOUBLE HORIZONTAL
+0x2551    &boxV;	      ISObox	# BOX DRAWINGS DOUBLE VERTICAL
+0x2552    &boxdR;	      ISObox	# BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+0x2553    &boxDr;	      ISObox	# BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+0x2554    &boxDR;	      ISObox	# BOX DRAWINGS DOUBLE DOWN AND RIGHT
+0x2555    &boxdL;	      ISObox	# BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+0x2556    &boxDl;	      ISObox	# BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+0x2557    &boxDL;	      ISObox	# BOX DRAWINGS DOUBLE DOWN AND LEFT
+0x2558    &boxuR;	      ISObox	# BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+0x2559    &boxUr;	      ISObox	# BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+0x255A    &boxUR;	      ISObox	# BOX DRAWINGS DOUBLE UP AND RIGHT
+0x255B    &boxuL;	      ISObox	# BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+0x255C    &boxUl;	      ISObox	# BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+0x255D    &boxUL;	      ISObox	# BOX DRAWINGS DOUBLE UP AND LEFT
+0x255E    &boxvR;	      ISObox	# BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+0x255F    &boxVr;	      ISObox	# BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+0x2560    &boxVR;	      ISObox	# BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+0x2561    &boxvL;	      ISObox	# BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+0x2562    &boxVl;	      ISObox	# BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+0x2563    &boxVL;	      ISObox	# BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+0x2564    &boxHd;	      ISObox	# BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+0x2565    &boxhD;	      ISObox	# BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+0x2566    &boxHD;	      ISObox	# BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+0x2567    &boxHu;	      ISObox	# BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+0x2568    &boxhU;	      ISObox	# BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+0x2569    &boxHU;	      ISObox	# BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+0x256A    &boxvH;	      ISObox	# BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+0x256B    &boxVh;	      ISObox	# BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+0x256C    &boxVH;	      ISObox	# BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+0x2580    &uhblk;	      ISOpub	# UPPER HALF BLOCK
+0x2584    &lhblk;	      ISOpub	# LOWER HALF BLOCK
+0x2588    &block;	      ISOpub	# FULL BLOCK
+0x2591    &blk14;	      ISOpub	# LIGHT SHADE
+0x2592    &blk12;	      ISOpub	# MEDIUM SHADE
+0x2593    &blk34;	      ISOpub	# DARK SHADE
+0x25A1    &squ;	      ISOpub	# WHITE SQUARE
+0x25A1    &square;	      ISOtech	# WHITE SQUARE
+0x25AA    &squf;	      ISOpub	# BLACK SMALL SQUARE
+0x25AD    &rect;	      ISOpub	# WHITE RECTANGLE
+0x25AE    &marker;	      ISOpub	# BLACK VERTICAL RECTANGLE
+0x25B3    &xutri;	      ISOamsb	# WHITE UP-POINTING TRIANGLE
+0x25B4    &utrif;	      ISOpub	# BLACK UP-POINTING SMALL TRIANGLE
+0x25B5    &utri;	      ISOpub	# WHITE UP-POINTING SMALL TRIANGLE
+0x25B8    &rtrif;	      ISOpub	# BLACK RIGHT-POINTING SMALL TRIANGLE
+0x25B9    &rtri;	      ISOpub	# WHITE RIGHT-POINTING SMALL TRIANGLE
+0x25BD    &xdtri;	      ISOamsb	# WHITE DOWN-POINTING TRIANGLE
+0x25BE    &dtrif;	      ISOpub	# BLACK DOWN-POINTING SMALL TRIANGLE
+0x25BF    &dtri;	      ISOpub	# WHITE DOWN-POINTING SMALL TRIANGLE
+0x25C2    &ltrif;	      ISOpub	# BLACK LEFT-POINTING SMALL TRIANGLE
+0x25C3    &ltri;	      ISOpub	# WHITE LEFT-POINTING SMALL TRIANGLE
+0x25CA    &loz;	      ISOpub	# LOZENGE
+0x25CB    &cir;	      ISOpub	# WHITE CIRCLE
+0x25CB    &xcirc;	      ISOamsb	# WHITE CIRCLE
+0x2605    &starf;	      ISOpub	# BLACK STAR
+0x2606    &star;	      ISOpub	# WHITE STAR
+0x260E    &phone;	      ISOpub	# BLACK TELEPHONE
+0x2640    &female;	      ISOpub	# FEMALE SIGN
+0x2642    &male;	      ISOpub	# MALE SIGN
+0x2660    &spades;	      ISOpub	# BLACK SPADE SUIT
+0x2663    &clubs;	      ISOpub	# BLACK CLUB SUIT
+0x2665    &hearts;	      ISOpub	# BLACK HEART SUIT
+0x2666    &diams;	      ISOpub	# BLACK DIAMOND SUIT
+0x266A    &sung;	      ISOnum	# EIGHTH NOTE
+0x266D    &flat;	      ISOpub	# MUSIC FLAT SIGN
+0x266E    &natur;	      ISOpub	# MUSIC NATURAL SIGN
+0x266F    &sharp;	      ISOpub	# MUSIC SHARP SIGN
+0x2713    &check;	      ISOpub	# CHECK MARK
+0x2717    &cross;	      ISOpub	# BALLOT X
+0x2720    &malt;	      ISOpub	# MALTESE CROSS
+0x2726    &lozf;	      ISOpub	# BLACK FOUR POINTED STAR
+<!-- 0x2727    &loz;         ISOpub    # WHITE FOUR POINTED STAR -->
+0x2736    &sext;	      ISOpub	# SIX POINTED BLACK STAR
+0x????    &epsiv;	      ISOgrk3	# variant epsilon
+0x????    &fjlig;	      ISOpub	# fj ligature
+0x????    &gEl;	      ISOamsr	# greater-than, double equals, less-than
+0x????    &gap;	      ISOamsr	# greater-than, approximately equal to
+0x????    &gnap;	      ISOamsn	# greater-than, not approximately equal to
+0x????    &jnodot;	      ISOamso	# latin small letter dotless j
+0x????    &lEg;	      ISOamsr	# less-than, double equals, greater-than
+0x????    &lap;	      ISOamsr	# less-than, approximately equal to
+0x????    &lnap;	      ISOamsn	# less-than, not approximately equal to
+0x????    &lpargt;	      ISOamsc	# left parenthesis, greater-than
+0x????    &ngE;	      ISOamsn	# not greater-than, double equals
+0x????    &nlE;	      ISOamsn	# not less-than, double equals
+0x????    &nsmid;	      ISOamsn	# nshortmid
+0x????    &prap;	      ISOamsr	# precedes, approximately equal to
+0x????    &prnE;	      ISOamsn	# precedes, not double equal
+0x????    &prnap;	      ISOamsn	# precedes, not approximately equal to
+0x????    &rpargt;	      ISOamsc	# right parenthesis, greater-than
+0x????    &scap;	      ISOamsr	# succeeds, approximately equal to
+0x????    &scnE;	      ISOamsn	# succeeds, not double equals
+0x????    &scnap;	      ISOamsn	# succeeds, not approximately equal to
+0x????    &smid;	      ISOamsr	# shortmid
+0xFB00    &fflig;	      ISOpub	# LATIN SMALL LIGATURE FF
+0xFB01    &filig;	      ISOpub	# LATIN SMALL LIGATURE FI
+0xFB02    &fllig;	      ISOpub	# LATIN SMALL LIGATURE FL
+0xFB03    &ffilig;	      ISOpub	# LATIN SMALL LIGATURE FFI
+0xFB04    &ffllig;	      ISOpub	# LATIN SMALL LIGATURE FFL
+
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/spaces.html b/test/spaces.html
new file mode 100644
index 00000000..d527a193
--- /dev/null
+++ b/test/spaces.html
@@ -0,0 +1,37 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE> Test of some symbols </TITLE>
+</HEAD>
+<BODY>
+<!-- Multiple spaces are normally collapsed unless we are in a <PRE> mode
+or use "special" spaces like &nbsp; or &emsp; - try playing around this page
+by adding more spaces inside brackets or using <PRE>.
+-->
+<!-- PRE -->
+
+You may press '\' to view the source of this test<br>
+<em>UNICODE   NCR  alt-NCR  named  alt-named</em><br>
+<p>
+0x2000    [&#x2000;]   <IMG SRC=X ALT="[&#x2000;]">                            # EN QUAD<br>
+0x2001    [&#x2001;]   <IMG SRC=X ALT="[&#x2001;]">                            # EM QUAD<br>
+0x2002    [&#x2002;]   <IMG SRC=X ALT="[&#x2002;]">    [&ensp;]        <IMG SRC=X ALT="[&ensp;]">              # EN SPACE<br>
+0x2003    [&#x2003;]   <IMG SRC=X ALT="[&#x2003;]">    [&emsp;]        <IMG SRC=X ALT="[&emsp;]">              # EM SPACE<br>
+0x2004    [&#x2004;]   <IMG SRC=X ALT="[&#x2004;]">    [&emsp13;]      <IMG SRC=X ALT="[&emsp13;]">            # THREE-PER-EM SPACE<br>
+0x2005    [&#x2005;]   <IMG SRC=X ALT="[&#x2005;]">    [&emsp14;]      <IMG SRC=X ALT="[&emsp14;]">            # FOUR-PER-EM SPACE<br>
+0x2007    [&#x2007;]   <IMG SRC=X ALT="[&#x2007;]">    [&numsp;]       <IMG SRC=X ALT="[&numsp;]">             # FIGURE SPACE<br>
+0x2008    [&#x2008;]   <IMG SRC=X ALT="[&#x2008;]">    [&puncsp;]      <IMG SRC=X ALT="[&puncsp;]">            # PUNCTUATION SPACE<br>
+0x2009    [&#x2009;]   <IMG SRC=X ALT="[&#x2009;]">    [&thinsp;]      <IMG SRC=X ALT="[&thinsp;]">            # THIN SPACE<br>
+0x200A    [&#x200A;]   <IMG SRC=X ALT="[&#x200A;]">    [&hairsp;]      <IMG SRC=X ALT="[&hairsp;]">            # HAIR SPACE<br>
+0x200C    [&#x200C;]   <IMG SRC=X ALT="[&#x200C;]">    [&zwnj;]        <IMG SRC=X ALT="[&zwnj;]">              # ZERO WIDTH NON-JOINER<br>
+0x200D    [&#x200D;]   <IMG SRC=X ALT="[&#x200D;]">    [&zwj;] <IMG SRC=X ALT="[&zwj;]">               # ZERO WIDTH JOINER<br>
+0x200E    [&#x200E;]   <IMG SRC=X ALT="[&#x200E;]">    [&lrm;] <IMG SRC=X ALT="[&lrm;]">               # LEFT-TO-RIGHT MARK<br>
+0x200F    [&#x200F;]   <IMG SRC=X ALT="[&#x200F;]">    [&rlm;] <IMG SRC=X ALT="[&rlm;]">               # RIGHT-TO-LEFT MARK<br>
+0x2010    [&#x2010;]   <IMG SRC=X ALT="[&#x2010;]">    [&dash;]        <IMG SRC=X ALT="[&dash;]">              # HYPHEN<br>
+0x2013    [&#x2013;]   <IMG SRC=X ALT="[&#x2013;]">    [&ndash;]       <IMG SRC=X ALT="[&ndash;]">             # EN DASH<br>
+0x2014    [&#x2014;]   <IMG SRC=X ALT="[&#x2014;]">    [&mdash;]       <IMG SRC=X ALT="[&mdash;]">             # EM DASH<br>
+
+
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/special_urls.html b/test/special_urls.html
new file mode 100644
index 00000000..c9d35066
--- /dev/null
+++ b/test/special_urls.html
@@ -0,0 +1,22 @@
+<html>
+<head>
+<title>Lynx Special URLs</title>
+<link rev="made" href="mailto:WebMaster@foo.blah.dom">
+</head>
+<body>
+<h1>Lynx Special URLs</h1>
+<dl compact>
+<dd>LYNXCFG:<a href="LYNXCFG:">LYNXCFG (ok)</a>
+<dd>LYNXCOMPILEOPTS:<a href="LYNXCOMPILEOPTS:">LYNXCOMPILEOPTS (ok)</a>
+<dd>LYNXCOOKIE:<a href="LYNXCOOKIE:">LYNXCOOKIE is not allowed</a>
+<dd>LYNXDIRED:<a href="LYNXDIRED:">LYNXDIRED is not allowed</a>
+<dd>LYNXDOWNLOAD:<a href="LYNXDOWNLOAD:">LYNXDOWNLOAD is not allowed</a>
+<dd>LYNXHIST:<a href="LYNXHIST:">LYNXHIST is not allowed</a>
+<dd>LYNXIMGMAP:<a href="LYNXIMGMAP:">LYNXIMGMAP is not allowed</a>
+<dd>LYNXKEYMAP:<a href="LYNXKEYMAP:">LYNXKEYMAP (ok)</a>
+<dd>LYNXMESSAGES:<a href="LYNXMESSAGES:">LYNXMESSAGES (ok)</a>
+<dd>LYNXOPTIONS:<a href="LYNXOPTIONS:">LYNXOPTIONS (ok)</a>
+<dd>LYNXPRINT:<a href="LYNXPRINT:">LYNXPRINT is not allowed</a>
+</dl>
+</body>
+</html>
diff --git a/test/tabtest.html b/test/tabtest.html
new file mode 100644
index 00000000..45184a88
--- /dev/null
+++ b/test/tabtest.html
@@ -0,0 +1,39 @@
+<!DOCTYPE HTML PUBLIC "-//W3O//DTD W3 HTML 3.0//EN">
+<html>
+<head>
+<title>Tests of TAB element.</title>
+<link rev="made" href="mailto:lynx-dev@nongnu.org">
+</head>
+
+<body>
+<h1>Tests of TAB element.</h1>
+
+<TAB INDENT="16" ID="t0"><em>Normal Style:</em><br>
+One<TAB INDENT="26" ID="t1">Two<TAB INDENT="44" ID="t2">Three
+<TAB INDENT="62" ID="t3">Four<TAB INDENT="80" ID="t4">Five
+<TAB INDENT="98" ID="t5">Six<TAB INDENT="116" ID="t6">Seven
+<TAB INDENT="132" ID="t7">Eight<br>
+1.<TAB TO="t1">2.<TAB TO="t2">3.<TAB TO="t3">4.<TAB TO="t4">5.
+<TAB TO="t5">6.<TAB TO="t6">7.<TAB TO="t7">8.<br>
+i.<TAB TO="t1">ii.<TAB TO="t2">iii.<TAB TO="t3">iv.<TAB TO="t4">v.
+<TAB TO="t5">vi.<TAB TO="t6">vii.<TAB TO="t7">viii.
+
+<p><pre><TAB TO="t0"><em>In PRE block:</em>
+One<TAB TO="t1">Two<TAB TO="t3">Three<TAB TO="t5">Four<TAB TO="t7">Five
+1.<TAB TO="t1">2.<TAB TO="t3">3.<TAB TO="t5">4.<TAB TO="t7">5.
+i.<TAB TO="t1">ii.<TAB TO="t3">iii.<TAB TO="t5">iv.<TAB TO="t7">v.
+</pre>
+
+<bq>
+<TAB TO="t0"><em>In BQ block:</em><br>
+One<TAB TO="t2">Two<TAB TO="t4">Three<TAB TO="t6">Four<br>
+1.<TAB TO="t2">2.<TAB TO="t4">3.<TAB TO="t6">4.<br>
+i.<TAB TO="t2">ii.<TAB TO="t4">iii.<TAB TO="t6">iv.
+</bq>
+
+<p><b>noct<TAB ID="tn">ambulant</b> - walking at night<br>
+<TAB TO="tn">(from Latin: <i>nox noctis</i> night + <i>ambulare</i> walk)
+<pre>|<TAB INDENT="78">|<TAB INDENT="156">|
+0<TAB INDENT="76">80<TAB INDENT="152">158</pre>
+</body>
+</html>
diff --git a/test/tags.html b/test/tags.html
new file mode 100644
index 00000000..b64b0f98
--- /dev/null
+++ b/test/tags.html
@@ -0,0 +1,219 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Tags to Test Color-Style</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<link href="nobody" rev="made">
+</HEAD>
+
+<BODY alink="green" bgcolor="yellow">
+<!-- ====================================================================== -->
+<br>
+<h1>Content of an H1 Tag</h1>
+Text after an H1 Tag.
+<p>Paragraph after an H1 Tag.
+<br>
+<h2>Content of an H2 Tag</h2>
+Text after an H2 Tag.
+<p>Paragraph after an H2 Tag.
+<br>
+<h3>Content of an H3 Tag</h3>
+Text after an H3 Tag.
+<p>Paragraph after an H3 Tag.
+<br>
+<h4>Content of an H4 Tag</h4>
+Text after an H4 Tag.
+<p>Paragraph after an H4 Tag.
+<br>
+<h5>Content of an H5 Tag</h5>
+Text after an H5 Tag.
+<p>Paragraph after an H5 Tag.
+<br>
+<h6>Content of an H6 Tag</h6>
+Text after an H6 Tag.
+<p>Paragraph after an H6 Tag.
+<!-- ====================================================================== -->
+This is an <a href="next">"a"</a> tag.
+<br>
+This is an <address>"address"</address> tag.
+<br>
+This is a <b>"b"</b> tag.
+<br>
+This is a <big>"big"</big> tag.
+<br>
+Before quote, <blockquote>this is a "blockquote"</blockquote>, after quote.
+<br>
+This is a <center>"center"</center> tag.
+<br>
+This is a <cite>"cite"</cite> tag.
+<br>
+This is a <code>"code"</code> tag.
+<br>
+This is a <div>div</div> tag.
+<br>
+This is an <em>"em"</em> tag.
+<br>
+This is a <font>"font"</font> tag.
+<!-- ====================================================================== -->
+<br>
+This is an <hr>"hr"<hr> tag.
+<br>
+This is an <i>"i"</i> tag.
+<br>
+This is an <iframe>"iframe"</iframe> tag.
+<br>
+This is an <img alt="img" src="image.jpg"> tag.
+<br>
+This is an <label>"label"</label> tag.
+<br>
+map:		normal:			lightgray:	blue
+<br>
+<pre>
+This is
+pre-formatted
+text (three lines, with pre's on preceding/following lines).
+</pre>
+<br>
+This is a <q>"q"</q>tag.
+<br>
+This is a <samp>"samp"</samp> tag.
+<br>
+This is a <small>"small"</small> tag.
+<br>
+This is a <strong>"strong"</strong> tag.
+<br>
+This is a <sub>"sub"</sub> tag.
+<br>
+This is a <sup>"sup"</sup> tag.
+<br>
+This is a <tt>"tt"</tt> tag.
+<br>
+This is a <var>"var"</var> tag.
+<!-- ====================================================================== -->
+<h1>Forms</h1>
+<hr>
+<form action="http://localhost/cgi-bin/bogus-parms" method="get">
+First: <input type="text" name="First" size=20>
+Last: <input type="text" name="Last" size=20>
+Description: <textarea rows=3 cols=40>
+contents of textarea
+</textarea>
+<hr>
+<input type="submit" value="Submit this form">
+<br>
+<input type="reset" value="Reset this form">
+</form>
+
+<h1 align="left">Another form</h1>
+<hr>
+<form action="http://localhost/cgi-bin/bogus-parms" method="get">
+<hr>
+<input type="checkbox" value="first">first
+<br><input type="checkbox" value="second">second
+<br><input type="checkbox" value="third">third
+<br><input type="checkbox" value="">empty
+<hr>
+<input type="submit" value="done">done
+</form>
+
+<h1 align="right">Another form</h1>
+<hr>
+<form action="http://localhost/cgi-bin/bogus-parms" method="get">
+<select>
+<option>first option</option>
+<option>second option</option>
+<option>third option</option>
+</select>
+<hr>
+<input type="submit" value="Submit this form">
+<br>
+<input type="reset" value="Reset this form">
+</form>
+<!-- ====================================================================== -->
+<table border=2 summary="unquoted table">
+<caption>Unquoted Table</caption>
+<tr>
+<td>First:</td>
+<td>the first row</td>
+<td>short</td>
+<td>last</td></tr>
+<tr>
+<td>Second:</td>
+<td>the second row</td>
+<td>very long string</td>
+<td>lower-right</td></tr>
+</table>
+<!-- ====================================================================== -->
+<blockquote><table border=2 summary="quoted table">
+<caption>Quoted Table</caption>
+<tr>
+<td>First:</td>
+<td>the first row</td>
+<td>very long string</td>
+<td>last</td></tr>
+<tr>
+<td>Second:</td>
+<td>the second row</td>
+<td>short</td>
+<td>lower-right</td></tr>
+</table></blockquote>
+<!-- ====================================================================== -->
+<br>
+<h1>An image map</h1>
+<map name="IMAGEMAP">
+<area alt="Square" shape="rect" coords="18,18,82,80" href="square.html">
+<area alt="Circle" shape="circle" coords="127,48,31" href="circle.html">
+<area alt="Triangle" shape="polygon" coords="232,78,303,78,263,14,232,76"
+   href="triangle.html">
+   </map>
+<!-- ====================================================================== -->
+<br>
+<h1>Definition List</h1>
+This is an definition list:
+<dl>
+<dt>the first dt
+<dd>the first dd
+<dt>the second dt
+<dd>the second dd
+<dl>
+<dt>the first dt
+<dd>the first dd
+<dt>the second dt
+<dd>the second dd
+<dt>the third dt
+<dd>the third dd
+</dl>
+<dt>the third dt
+<dd>the third dd
+</dl>
+<!-- ====================================================================== -->
+<br>
+<h1>Unordered List</h1>
+This is an unordered list:
+<ul>
+<li>first item
+<li>second item
+<ul>
+<li>first item
+<li>second item
+<li>third item
+</ul>
+<li>third item
+</ul>
+<!-- ====================================================================== -->
+<br>
+<h1>Ordered List</h1>
+This is an ordered list:
+<ol>
+<li>first item
+<li>second item
+<ol>
+<li>first item
+<li>second item
+<li>third item
+</ol>
+<li>third item
+</ol>
+
+</BODY>
+</HTML>
diff --git a/test/test-styles.html b/test/test-styles.html
new file mode 100644
index 00000000..39dfb9fa
--- /dev/null
+++ b/test/test-styles.html
@@ -0,0 +1,106 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Test Color-Styles</TITLE>
+</HEAD>
+<BODY>
+<h1>Heading 1</h1>
+<h2>Heading 2</h2>
+<h3>Heading 3</h3>
+<h4>Heading 4</h4>
+<h5>Heading 5</h5>
+<h6>Heading 6</h6>
+
+<h1>Heading 1 - Ordinary Text</h1>
+<h2><a name="#fontlike_text">Heading 2 - <b>Fontlike</b> Text</a></h2>
+<!-- STYLE,BR,TAB -->
+This is <b>b (bold)</b>.
+<br>
+This is <big>big</big>.
+<br>
+This is <blink>blink</blink>.
+<br>
+This is <i>i (italicized)</i>.
+<br>
+This is <small>small</small>.
+<br>
+This is <strike>strike</strike>.
+<br>
+This is <tt>tt (typewriter)</tt>.
+<br>
+This is <u>u (underlined)</u>.
+
+<h2><a name="#emphasized_text">Heading 2 - <em>Emphasized</em> Text</a></h2>
+This is <cite>cite (citation)</cite>.
+<br>
+This is <code>code</code>.
+<br>
+This is <del>del</del>.
+<br>
+This is <dfn>dfn (definition)</dfn>.
+<br>
+This is <em>emphasized</em>.
+<br>
+This is <ins>ins</ins>.
+<br>
+This is <kbd>kbd (keyboard)</kbd>.
+<br>
+This is <q>q (quoted)</q>.
+<br>
+This is <samp>samp (sample)</samp>.
+<br>
+This is <span>span</span>.
+<br>
+This is <strong>strong</strong>.
+<br>
+This is <var>var</var>.
+
+<h1>Heading 1 - Ordinary Links</h1>
+<a href="#fontlike_text">This is a link</a> to fontlike text.
+<br>
+<a href="#emphasized_text">This is a link</a> to emphasized text.
+
+<h1>Heading 1 - Emphasized Links</h1>
+<h2><a name="#fontlike_links">Heading 2 - <b>Fontlike</b> Links</a></h2>
+<br>
+This is <a href="#fontlike_text"><b>b (bold)</b> link</a>.
+<br>
+This is <a href="#fontlike_text"><big>big</big> link</a>.
+<br>
+This is <a href="#fontlike_text"><blink>blink</blink> link</a>.
+<br>
+This is <a href="#fontlike_text"><i>i (italicized)</i> link</a>.
+<br>
+This is <a href="#fontlike_text"><small>small</small> link</a>.
+<br>
+This is <a href="#fontlike_text"><strike>strike</strike> link</a>.
+<br>
+This is <a href="#fontlike_text"><tt>tt (typewriter)</tt> link</a>.
+<br>
+This is <a href="#fontlike_text"><u>u (underlined)</u> link</a>.
+
+<h2><a name="#emphasized_links">Heading 2 - <b>Emphasized</b> Links</a></h2>
+This is <a href="#emphasized_text"><cite>cite (citation)</cite> link</a>.
+<br>
+This is <a href="#emphasized_text"><code>code</code> link</a>.
+<br>
+This is <a href="#emphasized_text"><del>del</del> link</a>.
+<br>
+This is <a href="#emphasized_text"><dfn>dfn (definition)</dfn> link</a>.
+<br>
+This is <a href="#emphasized_text"><em>emphasized</em> link</a>.
+<br>
+This is <a href="#emphasized_text"><ins>ins</ins> link</a>.
+<br>
+This is <a href="#emphasized_text"><kbd>kbd (keyboard)</kbd> link</a>.
+<br>
+This is <a href="#emphasized_text"><q>q (quoted)</q> link</a>.
+<br>
+This is <a href="#emphasized_text"><samp>samp (sample)</samp> link</a>.
+<br>
+This is <a href="#emphasized_text"><span>span</span> link</a>.
+<br>
+This is <a href="#emphasized_text"><strong>strong</strong> link</a>.
+<br>
+This is <a href="#emphasized_text"><var>var</var> link</a>.
+</BODY>
diff --git a/test/unicode.html b/test/unicode.html
new file mode 100644
index 00000000..7abcd1a2
--- /dev/null
+++ b/test/unicode.html
@@ -0,0 +1,915 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Test of some Unicode symbols in numeric character reference form</TITLE>
+</HEAD>
+<BODY>
+<PRE>
+
+    This table prepared from SGML.TXT available at ftp.unicode.org
+
+         ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/SGML.TXT
+         (if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC)
+
+
+original comment:
+
+# Author: John Cowan &lt;cowan@ccil.org&gt;
+# Date: 25 July 1997
+#
+# The following table maps SGML character entities from various
+# public sets (namely, ISOamsa, ISOamsb, ISOamsc, ISOamsn, ISOamso,
+# ISOamsr, ISObox, ISOcyr1, ISOcyr2, ISOdia, ISOgrk1, ISOgrk2,
+# ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, ISOpub, ISOtech,
+# HTMLspecial, HTMLsymbol) to corresponding Unicode characters.
+#
+# The table has four tab-separated columns:
+#	Column 1: SGML character entity name
+#	Column 2: SGML public entity set
+#	Column 3: Unicode 2.0 character code
+#	Column 4: Unicode 2.0 character name (UPPER CASE)
+# Entries which don't have Unicode equivalents have "0x????"
+# in Column 3 and a lower case description (from the public entity
+# set DTD) in Column 4.  The mapping is not reversible, because many
+# distinctions are unified away in Unicode, particularly between
+# mathematical symbols.
+#
+# The table is sorted case-blind by SGML character entity name.
+#
+# The contents of this table are drawn from various sources, and
+# are in the public domain.
+#
+<!-- Changes:
++   {"euro",    0x20AC},  /* EURO SIGN                                     */
+
+-->
+
+This test is illuminated Unicode numeric entities like &amp;#x22AB;
+We sort the entities according to unicode numbers.
+You should see visible characters if your display character set support them
+or some substitution string picked up from  src/chrtrans/def7_uni.tbl
+
+If you see something like &amp;#x34D2; - this number unknown to def7_uni.tbl
+or the internal browser's implementation is broken.
+							Leonid Pauzner.
+
+
+
+
+0x0021    &#x0021;  		# EXCLAMATION MARK
+0x0022    &#x0022;  		# QUOTATION MARK
+0x0023    &#x0023;  		# NUMBER SIGN
+0x0024    &#x0024;  		# DOLLAR SIGN
+0x0025    &#x0025;  		# PERCENT SIGN
+0x0026    &#x0026;  		# AMPERSAND
+0x0028    &#x0028;  		# LEFT PARENTHESIS
+0x0029    &#x0029;  		# RIGHT PARENTHESIS
+0x002A    &#x002A;  		# ASTERISK
+0x002B    &#x002B;  		# PLUS SIGN
+0x002C    &#x002C;  		# COMMA
+0x002D    &#x002D;  		# HYPHEN-MINUS
+0x002E    &#x002E;  		# FULL STOP
+0x002F    &#x002F;  		# SOLIDUS
+0x003A    &#x003A;  		# COLON
+0x003B    &#x003B;  		# SEMICOLON
+0x003C    &#x003C;  		# LESS-THAN SIGN
+0x003D    &#x003D;  		# EQUALS SIGN
+0x003E    &#x003E;  		# GREATER-THAN SIGN
+0x003F    &#x003F;  		# QUESTION MARK
+0x0040    &#x0040;  		# COMMERCIAL AT
+0x005B    &#x005B;  		# LEFT SQUARE BRACKET
+0x005C    &#x005C;  		# REVERSE SOLIDUS
+0x005C    &#x005C;  		# REVERSE SOLIDUS
+0x005D    &#x005D;  		# RIGHT SQUARE BRACKET
+0x005F    &#x005F;  		# LOW LINE
+0x0060    &#x0060;  		# GRAVE ACCENT
+0x007B    &#x007B;  		# LEFT CURLY BRACKET
+0x007C    &#x007C;  		# VERTICAL LINE
+0x007D    &#x007D;  		# RIGHT CURLY BRACKET
+0x00A0    &#x00A0;  		# NO-BREAK SPACE
+0x00A1    &#x00A1;  		# INVERTED EXCLAMATION MARK
+0x00A2    &#x00A2;  		# CENT SIGN
+0x00A3    &#x00A3;  		# POUND SIGN
+0x00A4    &#x00A4;  		# CURRENCY SIGN
+0x00A5    &#x00A5;  		# YEN SIGN
+0x00A6    &#x00A6;  		# BROKEN BAR
+0x00A7    &#x00A7;  		# SECTION SIGN
+0x00A8    &#x00A8;  		# DIAERESIS
+0x00A9    &#x00A9;  		# COPYRIGHT SIGN
+0x00AA    &#x00AA;  		# FEMININE ORDINAL INDICATOR
+0x00AB    &#x00AB;  		# LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x00AC    &#x00AC;  		# NOT SIGN
+0x00AD    &#x00AD;  		# SOFT HYPHEN
+0x00AE    &#x00AE;  		# REGISTERED SIGN
+0x00AF    &#x00AF;  		# MACRON
+0x00B0    &#x00B0;  		# DEGREE SIGN
+0x00B1    &#x00B1;  		# PLUS-MINUS SIGN
+0x00B2    &#x00B2;  		# SUPERSCRIPT TWO
+0x00B3    &#x00B3;  		# SUPERSCRIPT THREE
+0x00B4    &#x00B4;  		# ACUTE ACCENT
+0x00B5    &#x00B5;  		# MICRO SIGN
+0x00B6    &#x00B6;  		# PILCROW SIGN
+0x00B7    &#x00B7;  		# MIDDLE DOT
+0x00B8    &#x00B8;  		# CEDILLA
+0x00B9    &#x00B9;  		# SUPERSCRIPT ONE
+0x00BA    &#x00BA;  		# MASCULINE ORDINAL INDICATOR
+0x00BB    &#x00BB;  		# RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x00BC    &#x00BC;  		# VULGAR FRACTION ONE QUARTER
+0x00BD    &#x00BD;  		# VULGAR FRACTION ONE HALF
+0x00BE    &#x00BE;  		# VULGAR FRACTION THREE QUARTERS
+0x00BF    &#x00BF;  		# INVERTED QUESTION MARK
+0x00C0    &#x00C0;  		# LATIN CAPITAL LETTER A WITH GRAVE
+0x00C1    &#x00C1;  		# LATIN CAPITAL LETTER A WITH ACUTE
+0x00C2    &#x00C2;  		# LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0x00C3    &#x00C3;  		# LATIN CAPITAL LETTER A WITH TILDE
+0x00C4    &#x00C4;  		# LATIN CAPITAL LETTER A WITH DIAERESIS
+0x00C5    &#x00C5;  		# LATIN CAPITAL LETTER A WITH RING ABOVE
+0x00C6    &#x00C6;  		# LATIN CAPITAL LETTER AE
+0x00C7    &#x00C7;  		# LATIN CAPITAL LETTER C WITH CEDILLA
+0x00C8    &#x00C8;  		# LATIN CAPITAL LETTER E WITH GRAVE
+0x00C9    &#x00C9;  		# LATIN CAPITAL LETTER E WITH ACUTE
+0x00CA    &#x00CA;  		# LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0x00CB    &#x00CB;  		# LATIN CAPITAL LETTER E WITH DIAERESIS
+0x00CC    &#x00CC;  		# LATIN CAPITAL LETTER I WITH GRAVE
+0x00CD    &#x00CD;  		# LATIN CAPITAL LETTER I WITH ACUTE
+0x00CE    &#x00CE;  		# LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0x00CF    &#x00CF;  		# LATIN CAPITAL LETTER I WITH DIAERESIS
+0x00D0    &#x00D0;  		# LATIN CAPITAL LETTER ETH
+0x00D1    &#x00D1;  		# LATIN CAPITAL LETTER N WITH TILDE
+0x00D2    &#x00D2;  		# LATIN CAPITAL LETTER O WITH GRAVE
+0x00D3    &#x00D3;  		# LATIN CAPITAL LETTER O WITH ACUTE
+0x00D4    &#x00D4;  		# LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0x00D5    &#x00D5;  		# LATIN CAPITAL LETTER O WITH TILDE
+0x00D6    &#x00D6;  		# LATIN CAPITAL LETTER O WITH DIAERESIS
+0x00D7    &#x00D7;  		# MULTIPLICATION SIGN
+0x00D8    &#x00D8;  		# LATIN CAPITAL LETTER O WITH STROKE
+0x00D9    &#x00D9;  		# LATIN CAPITAL LETTER U WITH GRAVE
+0x00DA    &#x00DA;  		# LATIN CAPITAL LETTER U WITH ACUTE
+0x00DB    &#x00DB;  		# LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0x00DC    &#x00DC;  		# LATIN CAPITAL LETTER U WITH DIAERESIS
+0x00DD    &#x00DD;  		# LATIN CAPITAL LETTER Y WITH ACUTE
+0x00DE    &#x00DE;  		# LATIN CAPITAL LETTER THORN
+0x00DF    &#x00DF;  		# LATIN SMALL LETTER SHARP S
+0x00E0    &#x00E0;  		# LATIN SMALL LETTER A WITH GRAVE
+0x00E1    &#x00E1;  		# LATIN SMALL LETTER A WITH ACUTE
+0x00E2    &#x00E2;  		# LATIN SMALL LETTER A WITH CIRCUMFLEX
+0x00E3    &#x00E3;  		# LATIN SMALL LETTER A WITH TILDE
+0x00E4    &#x00E4;  		# LATIN SMALL LETTER A WITH DIAERESIS
+0x00E5    &#x00E5;  		# LATIN SMALL LETTER A WITH RING ABOVE
+0x00E6    &#x00E6;  		# LATIN SMALL LETTER AE
+0x00E7    &#x00E7;  		# LATIN SMALL LETTER C WITH CEDILLA
+0x00E8    &#x00E8;  		# LATIN SMALL LETTER E WITH GRAVE
+0x00E9    &#x00E9;  		# LATIN SMALL LETTER E WITH ACUTE
+0x00EA    &#x00EA;  		# LATIN SMALL LETTER E WITH CIRCUMFLEX
+0x00EB    &#x00EB;  		# LATIN SMALL LETTER E WITH DIAERESIS
+0x00EC    &#x00EC;  		# LATIN SMALL LETTER I WITH GRAVE
+0x00ED    &#x00ED;  		# LATIN SMALL LETTER I WITH ACUTE
+0x00EE    &#x00EE;  		# LATIN SMALL LETTER I WITH CIRCUMFLEX
+0x00EF    &#x00EF;  		# LATIN SMALL LETTER I WITH DIAERESIS
+0x00F0    &#x00F0;  		# LATIN SMALL LETTER ETH
+0x00F1    &#x00F1;  		# LATIN SMALL LETTER N WITH TILDE
+0x00F2    &#x00F2;  		# LATIN SMALL LETTER O WITH GRAVE
+0x00F3    &#x00F3;  		# LATIN SMALL LETTER O WITH ACUTE
+0x00F4    &#x00F4;  		# LATIN SMALL LETTER O WITH CIRCUMFLEX
+0x00F5    &#x00F5;  		# LATIN SMALL LETTER O WITH TILDE
+0x00F6    &#x00F6;  		# LATIN SMALL LETTER O WITH DIAERESIS
+0x00F7    &#x00F7;  		# DIVISION SIGN
+0x00F8    &#x00F8;  		# LATIN SMALL LETTER O WITH STROKE
+0x00F9    &#x00F9;  		# LATIN SMALL LETTER U WITH GRAVE
+0x00FA    &#x00FA;  		# LATIN SMALL LETTER U WITH ACUTE
+0x00FB    &#x00FB;  		# LATIN SMALL LETTER U WITH CIRCUMFLEX
+0x00FC    &#x00FC;  		# LATIN SMALL LETTER U WITH DIAERESIS
+0x00FD    &#x00FD;  		# LATIN SMALL LETTER Y WITH ACUTE
+0x00FE    &#x00FE;  		# LATIN SMALL LETTER THORN
+0x00FF    &#x00FF;  		# LATIN SMALL LETTER Y WITH DIAERESIS
+0x0100    &#x0100;  		# LATIN CAPITAL LETTER A WITH MACRON
+0x0101    &#x0101;  		# LATIN SMALL LETTER A WITH MACRON
+0x0102    &#x0102;  		# LATIN CAPITAL LETTER A WITH BREVE
+0x0103    &#x0103;  		# LATIN SMALL LETTER A WITH BREVE
+0x0104    &#x0104;  		# LATIN CAPITAL LETTER A WITH OGONEK
+0x0105    &#x0105;  		# LATIN SMALL LETTER A WITH OGONEK
+0x0106    &#x0106;  		# LATIN CAPITAL LETTER C WITH ACUTE
+0x0107    &#x0107;  		# LATIN SMALL LETTER C WITH ACUTE
+0x0108    &#x0108;  		# LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+0x0109    &#x0109;  		# LATIN SMALL LETTER C WITH CIRCUMFLEX
+0x010A    &#x010A;  		# LATIN CAPITAL LETTER C WITH DOT ABOVE
+0x010B    &#x010B;  		# LATIN SMALL LETTER C WITH DOT ABOVE
+0x010C    &#x010C;  		# LATIN CAPITAL LETTER C WITH CARON
+0x010D    &#x010D;  		# LATIN SMALL LETTER C WITH CARON
+0x010E    &#x010E;  		# LATIN CAPITAL LETTER D WITH CARON
+0x010F    &#x010F;  		# LATIN SMALL LETTER D WITH CARON
+0x0110    &#x0110;  		# LATIN CAPITAL LETTER D WITH STROKE
+0x0111    &#x0111;  		# LATIN SMALL LETTER D WITH STROKE
+0x0112    &#x0112;  		# LATIN CAPITAL LETTER E WITH MACRON
+0x0113    &#x0113;  		# LATIN SMALL LETTER E WITH MACRON
+0x0116    &#x0116;  		# LATIN CAPITAL LETTER E WITH DOT ABOVE
+0x0117    &#x0117;  		# LATIN SMALL LETTER E WITH DOT ABOVE
+0x0118    &#x0118;  		# LATIN CAPITAL LETTER E WITH OGONEK
+0x0119    &#x0119;  		# LATIN SMALL LETTER E WITH OGONEK
+0x011A    &#x011A;  		# LATIN CAPITAL LETTER E WITH CARON
+0x011B    &#x011B;  		# LATIN SMALL LETTER E WITH CARON
+0x011C    &#x011C;  		# LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+0x011D    &#x011D;  		# LATIN SMALL LETTER G WITH CIRCUMFLEX
+0x011E    &#x011E;  		# LATIN CAPITAL LETTER G WITH BREVE
+0x011F    &#x011F;  		# LATIN SMALL LETTER G WITH BREVE
+0x0120    &#x0120;  		# LATIN CAPITAL LETTER G WITH DOT ABOVE
+0x0121    &#x0121;  		# LATIN SMALL LETTER G WITH DOT ABOVE
+0x0122    &#x0122;  		# LATIN CAPITAL LETTER G WITH CEDILLA
+0x0123    &#x0123;  		# LATIN SMALL LETTER G WITH CEDILLA
+0x0124    &#x0124;  		# LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0x0125    &#x0125;  		# LATIN SMALL LETTER H WITH CIRCUMFLEX
+0x0126    &#x0126;  		# LATIN CAPITAL LETTER H WITH STROKE
+0x0127    &#x0127;  		# LATIN SMALL LETTER H WITH STROKE
+0x0128    &#x0128;  		# LATIN CAPITAL LETTER I WITH TILDE
+0x0129    &#x0129;  		# LATIN SMALL LETTER I WITH TILDE
+0x012A    &#x012A;  		# LATIN CAPITAL LETTER I WITH MACRON
+0x012B    &#x012B;  		# LATIN SMALL LETTER I WITH MACRON
+0x012E    &#x012E;  		# LATIN CAPITAL LETTER I WITH OGONEK
+0x012F    &#x012F;  		# LATIN SMALL LETTER I WITH OGONEK
+0x0130    &#x0130;  		# LATIN CAPITAL LETTER I WITH DOT ABOVE
+0x0131    &#x0131;  		# LATIN SMALL LETTER DOTLESS I
+0x0131    &#x0131;  		# LATIN SMALL LETTER DOTLESS I
+0x0132    &#x0132;  		# LATIN CAPITAL LIGATURE IJ
+0x0133    &#x0133;  		# LATIN SMALL LIGATURE IJ
+0x0134    &#x0134;  		# LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0x0135    &#x0135;  		# LATIN SMALL LETTER J WITH CIRCUMFLEX
+0x0136    &#x0136;  		# LATIN CAPITAL LETTER K WITH CEDILLA
+0x0137    &#x0137;  		# LATIN SMALL LETTER K WITH CEDILLA
+0x0138    &#x0138;  		# LATIN SMALL LETTER KRA
+0x0139    &#x0139;  		# LATIN CAPITAL LETTER L WITH ACUTE
+0x013A    &#x013A;  		# LATIN SMALL LETTER L WITH ACUTE
+0x013B    &#x013B;  		# LATIN CAPITAL LETTER L WITH CEDILLA
+0x013C    &#x013C;  		# LATIN SMALL LETTER L WITH CEDILLA
+0x013D    &#x013D;  		# LATIN CAPITAL LETTER L WITH CARON
+0x013E    &#x013E;  		# LATIN SMALL LETTER L WITH CARON
+0x013F    &#x013F;  		# LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0x0140    &#x0140;  		# LATIN SMALL LETTER L WITH MIDDLE DOT
+0x0141    &#x0141;  		# LATIN CAPITAL LETTER L WITH STROKE
+0x0142    &#x0142;  		# LATIN SMALL LETTER L WITH STROKE
+0x0143    &#x0143;  		# LATIN CAPITAL LETTER N WITH ACUTE
+0x0144    &#x0144;  		# LATIN SMALL LETTER N WITH ACUTE
+0x0145    &#x0145;  		# LATIN CAPITAL LETTER N WITH CEDILLA
+0x0146    &#x0146;  		# LATIN SMALL LETTER N WITH CEDILLA
+0x0147    &#x0147;  		# LATIN CAPITAL LETTER N WITH CARON
+0x0148    &#x0148;  		# LATIN SMALL LETTER N WITH CARON
+0x0149    &#x0149;  		# LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+0x014A    &#x014A;  		# LATIN CAPITAL LETTER ENG
+0x014B    &#x014B;  		# LATIN SMALL LETTER ENG
+0x014C    &#x014C;  		# LATIN CAPITAL LETTER O WITH MACRON
+0x014D    &#x014D;  		# LATIN SMALL LETTER O WITH MACRON
+0x0150    &#x0150;  		# LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0x0151    &#x0151;  		# LATIN SMALL LETTER O WITH DOUBLE ACUTE
+0x0152    &#x0152;  		# LATIN CAPITAL LIGATURE OE
+0x0153    &#x0153;  		# LATIN SMALL LIGATURE OE
+0x0154    &#x0154;  		# LATIN CAPITAL LETTER R WITH ACUTE
+0x0155    &#x0155;  		# LATIN SMALL LETTER R WITH ACUTE
+0x0156    &#x0156;  		# LATIN CAPITAL LETTER R WITH CEDILLA
+0x0157    &#x0157;  		# LATIN SMALL LETTER R WITH CEDILLA
+0x0158    &#x0158;  		# LATIN CAPITAL LETTER R WITH CARON
+0x0159    &#x0159;  		# LATIN SMALL LETTER R WITH CARON
+0x015A    &#x015A;  		# LATIN CAPITAL LETTER S WITH ACUTE
+0x015B    &#x015B;  		# LATIN SMALL LETTER S WITH ACUTE
+0x015C    &#x015C;  		# LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+0x015D    &#x015D;  		# LATIN SMALL LETTER S WITH CIRCUMFLEX
+0x015E    &#x015E;  		# LATIN CAPITAL LETTER S WITH CEDILLA
+0x015F    &#x015F;  		# LATIN SMALL LETTER S WITH CEDILLA
+0x0160    &#x0160;  		# LATIN CAPITAL LETTER S WITH CARON
+0x0161    &#x0161;  		# LATIN SMALL LETTER S WITH CARON
+0x0162    &#x0162;  		# LATIN CAPITAL LETTER T WITH CEDILLA
+0x0163    &#x0163;  		# LATIN SMALL LETTER T WITH CEDILLA
+0x0164    &#x0164;  		# LATIN CAPITAL LETTER T WITH CARON
+0x0165    &#x0165;  		# LATIN SMALL LETTER T WITH CARON
+0x0166    &#x0166;  		# LATIN CAPITAL LETTER T WITH STROKE
+0x0167    &#x0167;  		# LATIN SMALL LETTER T WITH STROKE
+0x0168    &#x0168;  		# LATIN CAPITAL LETTER U WITH TILDE
+0x0169    &#x0169;  		# LATIN SMALL LETTER U WITH TILDE
+0x016A    &#x016A;  		# LATIN CAPITAL LETTER U WITH MACRON
+0x016B    &#x016B;  		# LATIN SMALL LETTER U WITH MACRON
+0x016C    &#x016C;  		# LATIN CAPITAL LETTER U WITH BREVE
+0x016D    &#x016D;  		# LATIN SMALL LETTER U WITH BREVE
+0x016E    &#x016E;  		# LATIN CAPITAL LETTER U WITH RING ABOVE
+0x016F    &#x016F;  		# LATIN SMALL LETTER U WITH RING ABOVE
+0x0170    &#x0170;  		# LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0x0171    &#x0171;  		# LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0x0172    &#x0172;  		# LATIN CAPITAL LETTER U WITH OGONEK
+0x0173    &#x0173;  		# LATIN SMALL LETTER U WITH OGONEK
+0x0174    &#x0174;  		# LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0x0175    &#x0175;  		# LATIN SMALL LETTER W WITH CIRCUMFLEX
+0x0176    &#x0176;  		# LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0x0177    &#x0177;  		# LATIN SMALL LETTER Y WITH CIRCUMFLEX
+0x0178    &#x0178;  		# LATIN CAPITAL LETTER Y WITH DIAERESIS
+0x0179    &#x0179;  		# LATIN CAPITAL LETTER Z WITH ACUTE
+0x017A    &#x017A;  		# LATIN SMALL LETTER Z WITH ACUTE
+0x017B    &#x017B;  		# LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0x017C    &#x017C;  		# LATIN SMALL LETTER Z WITH DOT ABOVE
+0x017D    &#x017D;  		# LATIN CAPITAL LETTER Z WITH CARON
+0x017E    &#x017E;  		# LATIN SMALL LETTER Z WITH CARON
+0x0192    &#x0192;  		# LATIN SMALL LETTER F WITH HOOK
+0x01F5    &#x01F5;  		# LATIN SMALL LETTER G WITH ACUTE
+0x02BC    &#x02BC;  		# MODIFIER LETTER APOSTROPHE
+0x02C6    &#x02C6;  		# MODIFIER LETTER CIRCUMFLEX ACCENT
+0x02C7    &#x02C7;  		# CARON
+0x02D8    &#x02D8;  		# BREVE
+0x02D9    &#x02D9;  		# DOT ABOVE
+0x02DA    &#x02DA;  		# RING ABOVE
+0x02DB    &#x02DB;  		# OGONEK
+0x02DC    &#x02DC;  		# SMALL TILDE
+0x02DD    &#x02DD;  		# DOUBLE ACUTE ACCENT
+0x0386    &#x0386;  		# GREEK CAPITAL LETTER ALPHA WITH TONOS
+0x0388    &#x0388;  		# GREEK CAPITAL LETTER EPSILON WITH TONOS
+0x0389    &#x0389;  		# GREEK CAPITAL LETTER ETA WITH TONOS
+0x038A    &#x038A;  		# GREEK CAPITAL LETTER IOTA WITH TONOS
+0x038C    &#x038C;  		# GREEK CAPITAL LETTER OMICRON WITH TONOS
+0x038E    &#x038E;  		# GREEK CAPITAL LETTER UPSILON WITH TONOS
+0x038F    &#x038F;  		# GREEK CAPITAL LETTER OMEGA WITH TONOS
+0x0390    &#x0390;  		# GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0x0391    &#x0391;  		# GREEK CAPITAL LETTER ALPHA
+0x0392    &#x0392;  		# GREEK CAPITAL LETTER BETA
+0x0393    &#x0393;  		# GREEK CAPITAL LETTER GAMMA
+0x0394    &#x0394;  		# GREEK CAPITAL LETTER DELTA
+0x0395    &#x0395;  		# GREEK CAPITAL LETTER EPSILON
+0x0396    &#x0396;  		# GREEK CAPITAL LETTER ZETA
+0x0397    &#x0397;  		# GREEK CAPITAL LETTER ETA
+0x0398    &#x0398;  		# GREEK CAPITAL LETTER THETA
+0x0399    &#x0399;  		# GREEK CAPITAL LETTER IOTA
+0x039A    &#x039A;  		# GREEK CAPITAL LETTER KAPPA
+0x039B    &#x039B;  		# GREEK CAPITAL LETTER LAMDA
+0x039C    &#x039C;  		# GREEK CAPITAL LETTER MU
+0x039D    &#x039D;  		# GREEK CAPITAL LETTER NU
+0x039E    &#x039E;  		# GREEK CAPITAL LETTER XI
+0x039F    &#x039F;  		# GREEK CAPITAL LETTER OMICRON
+0x03A0    &#x03A0;  		# GREEK CAPITAL LETTER PI
+0x03A1    &#x03A1;  		# GREEK CAPITAL LETTER RHO
+0x03A3    &#x03A3;  		# GREEK CAPITAL LETTER SIGMA
+0x03A4    &#x03A4;  		# GREEK CAPITAL LETTER TAU
+0x03A5    &#x03A5;  		# GREEK CAPITAL LETTER UPSILON
+0x03A6    &#x03A6;  		# GREEK CAPITAL LETTER PHI
+0x03A7    &#x03A7;  		# GREEK CAPITAL LETTER CHI
+0x03A8    &#x03A8;  		# GREEK CAPITAL LETTER PSI
+0x03A9    &#x03A9;  		# GREEK CAPITAL LETTER OMEGA
+0x03AA    &#x03AA;  		# GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+0x03AB    &#x03AB;  		# GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+0x03AC    &#x03AC;  		# GREEK SMALL LETTER ALPHA WITH TONOS
+0x03AD    &#x03AD;  		# GREEK SMALL LETTER EPSILON WITH TONOS
+0x03AE    &#x03AE;  		# GREEK SMALL LETTER ETA WITH TONOS
+0x03AF    &#x03AF;  		# GREEK SMALL LETTER IOTA WITH TONOS
+0x03B0    &#x03B0;  		# GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+0x03B1    &#x03B1;  		# GREEK SMALL LETTER ALPHA
+0x03B2    &#x03B2;  		# GREEK SMALL LETTER BETA
+0x03B3    &#x03B3;  		# GREEK SMALL LETTER GAMMA
+0x03B4    &#x03B4;  		# GREEK SMALL LETTER DELTA
+0x03B5    &#x03B5;  		# GREEK SMALL LETTER EPSILON
+0x03B6    &#x03B6;  		# GREEK SMALL LETTER ZETA
+0x03B7    &#x03B7;  		# GREEK SMALL LETTER ETA
+0x03B8    &#x03B8;  		# GREEK SMALL LETTER THETA
+0x03B9    &#x03B9;  		# GREEK SMALL LETTER IOTA
+0x03BA    &#x03BA;  		# GREEK SMALL LETTER KAPPA
+0x03BB    &#x03BB;  		# GREEK SMALL LETTER LAMDA
+0x03BC    &#x03BC;  		# GREEK SMALL LETTER MU
+0x03BD    &#x03BD;  		# GREEK SMALL LETTER NU
+0x03BE    &#x03BE;  		# GREEK SMALL LETTER XI
+0x03BF    &#x03BF;  		# GREEK SMALL LETTER OMICRON
+0x03C0    &#x03C0;  		# GREEK SMALL LETTER PI
+0x03C1    &#x03C1;  		# GREEK SMALL LETTER RHO
+0x03C2    &#x03C2;  		# GREEK SMALL LETTER FINAL SIGMA
+0x03C3    &#x03C3;  		# GREEK SMALL LETTER SIGMA
+0x03C4    &#x03C4;  		# GREEK SMALL LETTER TAU
+0x03C5    &#x03C5;  		# GREEK SMALL LETTER UPSILON
+0x03C6    &#x03C6;  		# GREEK SMALL LETTER PHI
+0x03C7    &#x03C7;  		# GREEK SMALL LETTER CHI
+0x03C8    &#x03C8;  		# GREEK SMALL LETTER PSI
+0x03C9    &#x03C9;  		# GREEK SMALL LETTER OMEGA
+0x03CA    &#x03CA;  		# GREEK SMALL LETTER IOTA WITH DIALYTIKA
+0x03CB    &#x03CB;  		# GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+0x03CC    &#x03CC;  		# GREEK SMALL LETTER OMICRON WITH TONOS
+0x03CE    &#x03CE;  		# GREEK SMALL LETTER OMEGA WITH TONOS
+0x03D1    &#x03D1;  		# GREEK THETA SYMBOL
+0x03D2    &#x03D2;  		# GREEK UPSILON WITH HOOK SYMBOL
+0x03D5    &#x03D5;  		# GREEK PHI SYMBOL
+0x03D6    &#x03D6;  		# GREEK PI SYMBOL
+0x03DC    &#x03DC;  		# GREEK LETTER DIGAMMA
+0x03F0    &#x03F0;  		# GREEK KAPPA SYMBOL
+0x03F1    &#x03F1;  		# GREEK RHO SYMBOL
+0x0401    &#x0401;  		# CYRILLIC CAPITAL LETTER IO
+0x0402    &#x0402;  		# CYRILLIC CAPITAL LETTER DJE
+0x0403    &#x0403;  		# CYRILLIC CAPITAL LETTER GJE
+0x0404    &#x0404;  		# CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0x0405    &#x0405;  		# CYRILLIC CAPITAL LETTER DZE
+0x0406    &#x0406;  		# CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0x0407    &#x0407;  		# CYRILLIC CAPITAL LETTER YI
+0x0408    &#x0408;  		# CYRILLIC CAPITAL LETTER JE
+0x0409    &#x0409;  		# CYRILLIC CAPITAL LETTER LJE
+0x040A    &#x040A;  		# CYRILLIC CAPITAL LETTER NJE
+0x040B    &#x040B;  		# CYRILLIC CAPITAL LETTER TSHE
+0x040C    &#x040C;  		# CYRILLIC CAPITAL LETTER KJE
+0x040E    &#x040E;  		# CYRILLIC CAPITAL LETTER SHORT U
+0x040F    &#x040F;  		# CYRILLIC CAPITAL LETTER DZHE
+0x0410    &#x0410;  		# CYRILLIC CAPITAL LETTER A
+0x0411    &#x0411;  		# CYRILLIC CAPITAL LETTER BE
+0x0412    &#x0412;  		# CYRILLIC CAPITAL LETTER VE
+0x0413    &#x0413;  		# CYRILLIC CAPITAL LETTER GHE
+0x0414    &#x0414;  		# CYRILLIC CAPITAL LETTER DE
+0x0415    &#x0415;  		# CYRILLIC CAPITAL LETTER IE
+0x0416    &#x0416;  		# CYRILLIC CAPITAL LETTER ZHE
+0x0417    &#x0417;  		# CYRILLIC CAPITAL LETTER ZE
+0x0418    &#x0418;  		# CYRILLIC CAPITAL LETTER I
+0x0419    &#x0419;  		# CYRILLIC CAPITAL LETTER SHORT I
+0x041A    &#x041A;  		# CYRILLIC CAPITAL LETTER KA
+0x041B    &#x041B;  		# CYRILLIC CAPITAL LETTER EL
+0x041C    &#x041C;  		# CYRILLIC CAPITAL LETTER EM
+0x041D    &#x041D;  		# CYRILLIC CAPITAL LETTER EN
+0x041E    &#x041E;  		# CYRILLIC CAPITAL LETTER O
+0x041F    &#x041F;  		# CYRILLIC CAPITAL LETTER PE
+0x0420    &#x0420;  		# CYRILLIC CAPITAL LETTER ER
+0x0421    &#x0421;  		# CYRILLIC CAPITAL LETTER ES
+0x0422    &#x0422;  		# CYRILLIC CAPITAL LETTER TE
+0x0423    &#x0423;  		# CYRILLIC CAPITAL LETTER U
+0x0424    &#x0424;  		# CYRILLIC CAPITAL LETTER EF
+0x0425    &#x0425;  		# CYRILLIC CAPITAL LETTER HA
+0x0426    &#x0426;  		# CYRILLIC CAPITAL LETTER TSE
+0x0427    &#x0427;  		# CYRILLIC CAPITAL LETTER CHE
+0x0428    &#x0428;  		# CYRILLIC CAPITAL LETTER SHA
+0x0429    &#x0429;  		# CYRILLIC CAPITAL LETTER SHCHA
+0x042A    &#x042A;  		# CYRILLIC CAPITAL LETTER HARD SIGN
+0x042B    &#x042B;  		# CYRILLIC CAPITAL LETTER YERU
+0x042C    &#x042C;  		# CYRILLIC CAPITAL LETTER SOFT SIGN
+0x042D    &#x042D;  		# CYRILLIC CAPITAL LETTER E
+0x042E    &#x042E;  		# CYRILLIC CAPITAL LETTER YU
+0x042F    &#x042F;  		# CYRILLIC CAPITAL LETTER YA
+0x0430    &#x0430;  		# CYRILLIC SMALL LETTER A
+0x0431    &#x0431;  		# CYRILLIC SMALL LETTER BE
+0x0432    &#x0432;  		# CYRILLIC SMALL LETTER VE
+0x0433    &#x0433;  		# CYRILLIC SMALL LETTER GHE
+0x0434    &#x0434;  		# CYRILLIC SMALL LETTER DE
+0x0435    &#x0435;  		# CYRILLIC SMALL LETTER IE
+0x0436    &#x0436;  		# CYRILLIC SMALL LETTER ZHE
+0x0437    &#x0437;  		# CYRILLIC SMALL LETTER ZE
+0x0438    &#x0438;  		# CYRILLIC SMALL LETTER I
+0x0439    &#x0439;  		# CYRILLIC SMALL LETTER SHORT I
+0x043A    &#x043A;  		# CYRILLIC SMALL LETTER KA
+0x043B    &#x043B;  		# CYRILLIC SMALL LETTER EL
+0x043C    &#x043C;  		# CYRILLIC SMALL LETTER EM
+0x043D    &#x043D;  		# CYRILLIC SMALL LETTER EN
+0x043E    &#x043E;  		# CYRILLIC SMALL LETTER O
+0x043F    &#x043F;  		# CYRILLIC SMALL LETTER PE
+0x0440    &#x0440;  		# CYRILLIC SMALL LETTER ER
+0x0441    &#x0441;  		# CYRILLIC SMALL LETTER ES
+0x0442    &#x0442;  		# CYRILLIC SMALL LETTER TE
+0x0443    &#x0443;  		# CYRILLIC SMALL LETTER U
+0x0444    &#x0444;  		# CYRILLIC SMALL LETTER EF
+0x0445    &#x0445;  		# CYRILLIC SMALL LETTER HA
+0x0446    &#x0446;  		# CYRILLIC SMALL LETTER TSE
+0x0447    &#x0447;  		# CYRILLIC SMALL LETTER CHE
+0x0448    &#x0448;  		# CYRILLIC SMALL LETTER SHA
+0x0449    &#x0449;  		# CYRILLIC SMALL LETTER SHCHA
+0x044A    &#x044A;  		# CYRILLIC SMALL LETTER HARD SIGN
+0x044B    &#x044B;  		# CYRILLIC SMALL LETTER YERU
+0x044C    &#x044C;  		# CYRILLIC SMALL LETTER SOFT SIGN
+0x044D    &#x044D;  		# CYRILLIC SMALL LETTER E
+0x044E    &#x044E;  		# CYRILLIC SMALL LETTER YU
+0x044F    &#x044F;  		# CYRILLIC SMALL LETTER YA
+0x0451    &#x0451;  		# CYRILLIC SMALL LETTER IO
+0x0452    &#x0452;  		# CYRILLIC SMALL LETTER DJE
+0x0453    &#x0453;  		# CYRILLIC SMALL LETTER GJE
+0x0454    &#x0454;  		# CYRILLIC SMALL LETTER UKRAINIAN IE
+0x0455    &#x0455;  		# CYRILLIC SMALL LETTER DZE
+0x0456    &#x0456;  		# CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+0x0457    &#x0457;  		# CYRILLIC SMALL LETTER YI
+0x0458    &#x0458;  		# CYRILLIC SMALL LETTER JE
+0x0459    &#x0459;  		# CYRILLIC SMALL LETTER LJE
+0x045A    &#x045A;  		# CYRILLIC SMALL LETTER NJE
+0x045B    &#x045B;  		# CYRILLIC SMALL LETTER TSHE
+0x045C    &#x045C;  		# CYRILLIC SMALL LETTER KJE
+0x045E    &#x045E;  		# CYRILLIC SMALL LETTER SHORT U
+0x045F    &#x045F;  		# CYRILLIC SMALL LETTER DZHE
+0x2002    &#x2002;  		# EN SPACE
+0x2003    &#x2003;  		# EM SPACE
+0x2004    &#x2004;  		# THREE-PER-EM SPACE
+0x2005    &#x2005;  		# FOUR-PER-EM SPACE
+0x2007    &#x2007;  		# FIGURE SPACE
+0x2008    &#x2008;  		# PUNCTUATION SPACE
+0x2009    &#x2009;  		# THIN SPACE
+0x200A    &#x200A;  		# HAIR SPACE
+0x200C    &#x200C;  		# ZERO WIDTH NON-JOINER
+0x200D    &#x200D;  		# ZERO WIDTH JOINER
+0x200E    &#x200E;  		# LEFT-TO-RIGHT MARK
+0x200F    &#x200F;  		# RIGHT-TO-LEFT MARK
+0x2010    &#x2010;  		# HYPHEN
+0x2013    &#x2013;  		# EN DASH
+0x2014    &#x2014;  		# EM DASH
+0x2015    &#x2015;  		# HORIZONTAL BAR
+0x2016    &#x2016;  		# DOUBLE VERTICAL LINE
+0x2018    &#x2018;  		# LEFT SINGLE QUOTATION MARK
+0x2018    &#x2018;  		# LEFT SINGLE QUOTATION MARK
+0x2019    &#x2019;  		# RIGHT SINGLE QUOTATION MARK
+0x201A    &#x201A;  		# SINGLE LOW-9 QUOTATION MARK
+0x201A    &#x201A;  		# SINGLE LOW-9 QUOTATION MARK
+0x201C    &#x201C;  		# LEFT DOUBLE QUOTATION MARK
+0x201C    &#x201C;  		# LEFT DOUBLE QUOTATION MARK
+0x201D    &#x201D;  		# RIGHT DOUBLE QUOTATION MARK
+0x201E    &#x201E;  		# DOUBLE LOW-9 QUOTATION MARK
+0x201E    &#x201E;  		# DOUBLE LOW-9 QUOTATION MARK
+0x2020    &#x2020;  		# DAGGER
+0x2021    &#x2021;  		# DOUBLE DAGGER
+0x2022    &#x2022;  		# BULLET
+0x2025    &#x2025;  		# TWO DOT LEADER
+0x2026    &#x2026;  		# HORIZONTAL ELLIPSIS
+0x2026    &#x2026;  		# HORIZONTAL ELLIPSIS
+0x2030    &#x2030;  		# PER MILLE SIGN
+0x2032    &#x2032;  		# PRIME
+0x2032    &#x2032;  		# PRIME
+0x2033    &#x2033;  		# DOUBLE PRIME
+0x2034    &#x2034;  		# TRIPLE PRIME
+0x2035    &#x2035;  		# REVERSED PRIME
+0x2039    &#x2039;  		# SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x203A    &#x203A;  		# SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x203E    &#x203E;  		# OVERLINE
+0x2041    &#x2041;  		# CARET INSERTION POINT
+0x2043    &#x2043;  		# HYPHEN BULLET
+0x2044    &#x2044;  		# FRACTION SLASH
+0x20AC    &#x20AC;  		# EURO SIGN
+0x20DB    &#x20DB;  		# COMBINING THREE DOTS ABOVE
+0x20DC    &#x20DC;  		# COMBINING FOUR DOTS ABOVE
+0x2105    &#x2105;  		# CARE OF
+0x210B    &#x210B;  		# SCRIPT CAPITAL H
+0x210F    &#x210F;  		# PLANCK CONSTANT OVER TWO PI
+0x2111    &#x2111;  		# BLACK-LETTER CAPITAL I
+0x2112    &#x2112;  		# SCRIPT CAPITAL L
+0x2113    &#x2113;  		# SCRIPT SMALL L
+0x2116    &#x2116;  		# NUMERO SIGN
+0x2117    &#x2117;  		# SOUND RECORDING COPYRIGHT
+0x2118    &#x2118;  		# SCRIPT CAPITAL P
+0x211C    &#x211C;  		# BLACK-LETTER CAPITAL R
+0x211E    &#x211E;  		# PRESCRIPTION TAKE
+0x2122    &#x2122;  		# TRADE MARK SIGN
+0x2126    &#x2126;  		# OHM SIGN
+0x212B    &#x212B;  		# ANGSTROM SIGN
+0x212C    &#x212C;  		# SCRIPT CAPITAL B
+0x2133    &#x2133;  		# SCRIPT CAPITAL M
+0x2134    &#x2134;  		# SCRIPT SMALL O
+0x2135    &#x2135;  		# ALEF SYMBOL
+0x2135    &#x2135;  		# ALEF SYMBOL
+0x2136    &#x2136;  		# BET SYMBOL
+0x2137    &#x2137;  		# GIMEL SYMBOL
+0x2138    &#x2138;  		# DALET SYMBOL
+0x2153    &#x2153;  		# VULGAR FRACTION ONE THIRD
+0x2154    &#x2154;  		# VULGAR FRACTION TWO THIRDS
+0x2155    &#x2155;  		# VULGAR FRACTION ONE FIFTH
+0x2156    &#x2156;  		# VULGAR FRACTION TWO FIFTHS
+0x2157    &#x2157;  		# VULGAR FRACTION THREE FIFTHS
+0x2158    &#x2158;  		# VULGAR FRACTION FOUR FIFTHS
+0x2159    &#x2159;  		# VULGAR FRACTION ONE SIXTH
+0x215A    &#x215A;  		# VULGAR FRACTION FIVE SIXTHS
+0x215B    &#x215B;  		# VULGAR FRACTION ONE EIGHTH
+0x215C    &#x215C;  		# VULGAR FRACTION THREE EIGHTHS
+0x215D    &#x215D;  		# VULGAR FRACTION FIVE EIGHTHS
+0x215E    &#x215E;  		# VULGAR FRACTION SEVEN EIGHTHS
+0x2190    &#x2190;  		# LEFTWARDS ARROW
+0x2191    &#x2191;  		# UPWARDS ARROW
+0x2192    &#x2192;  		# RIGHTWARDS ARROW
+0x2193    &#x2193;  		# DOWNWARDS ARROW
+0x2194    &#x2194;  		# LEFT RIGHT ARROW
+0x2195    &#x2195;  		# UP DOWN ARROW
+0x2196    &#x2196;  		# NORTH WEST ARROW
+0x2197    &#x2197;  		# NORTH EAST ARROW
+0x2198    &#x2198;  		# SOUTH EAST ARROW
+0x2199    &#x2199;  		# SOUTH WEST ARROW
+0x219A    &#x219A;  		# LEFTWARDS ARROW WITH STROKE
+0x219B    &#x219B;  		# RIGHTWARDS ARROW WITH STROKE
+0x219D    &#x219D;  		# RIGHTWARDS WAVE ARROW
+0x219E    &#x219E;  		# LEFTWARDS TWO HEADED ARROW
+0x21A0    &#x21A0;  		# RIGHTWARDS TWO HEADED ARROW
+0x21A2    &#x21A2;  		# LEFTWARDS ARROW WITH TAIL
+0x21A3    &#x21A3;  		# RIGHTWARDS ARROW WITH TAIL
+0x21A6    &#x21A6;  		# RIGHTWARDS ARROW FROM BAR
+0x21A9    &#x21A9;  		# LEFTWARDS ARROW WITH HOOK
+0x21AA    &#x21AA;  		# RIGHTWARDS ARROW WITH HOOK
+0x21AB    &#x21AB;  		# LEFTWARDS ARROW WITH LOOP
+0x21AC    &#x21AC;  		# RIGHTWARDS ARROW WITH LOOP
+0x21AD    &#x21AD;  		# LEFT RIGHT WAVE ARROW
+0x21AE    &#x21AE;  		# LEFT RIGHT ARROW WITH STROKE
+0x21B0    &#x21B0;  		# UPWARDS ARROW WITH TIP LEFTWARDS
+0x21B1    &#x21B1;  		# UPWARDS ARROW WITH TIP RIGHTWARDS
+0x21B5    &#x21B5;  		# DOWNWARDS ARROW WITH CORNER LEFTWARDS
+0x21B6    &#x21B6;  		# ANTICLOCKWISE TOP SEMICIRCLE ARROW
+0x21B7    &#x21B7;  		# CLOCKWISE TOP SEMICIRCLE ARROW
+0x21BA    &#x21BA;  		# ANTICLOCKWISE OPEN CIRCLE ARROW
+0x21BB    &#x21BB;  		# CLOCKWISE OPEN CIRCLE ARROW
+0x21BC    &#x21BC;  		# LEFTWARDS HARPOON WITH BARB UPWARDS
+0x21BD    &#x21BD;  		# LEFTWARDS HARPOON WITH BARB DOWNWARDS
+0x21BE    &#x21BE;  		# UPWARDS HARPOON WITH BARB RIGHTWARDS
+0x21BF    &#x21BF;  		# UPWARDS HARPOON WITH BARB LEFTWARDS
+0x21C0    &#x21C0;  		# RIGHTWARDS HARPOON WITH BARB UPWARDS
+0x21C1    &#x21C1;  		# RIGHTWARDS HARPOON WITH BARB DOWNWARDS
+0x21C2    &#x21C2;  		# DOWNWARDS HARPOON WITH BARB RIGHTWARDS
+0x21C3    &#x21C3;  		# DOWNWARDS HARPOON WITH BARB LEFTWARDS
+0x21C4    &#x21C4;  		# RIGHTWARDS ARROW OVER LEFTWARDS ARROW
+0x21C6    &#x21C6;  		# LEFTWARDS ARROW OVER RIGHTWARDS ARROW
+0x21C7    &#x21C7;  		# LEFTWARDS PAIRED ARROWS
+0x21C8    &#x21C8;  		# UPWARDS PAIRED ARROWS
+0x21C9    &#x21C9;  		# RIGHTWARDS PAIRED ARROWS
+0x21CA    &#x21CA;  		# DOWNWARDS PAIRED ARROWS
+0x21CB    &#x21CB;  		# LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON
+0x21CC    &#x21CC;  		# RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
+0x21CD    &#x21CD;  		# LEFTWARDS DOUBLE ARROW WITH STROKE
+0x21CE    &#x21CE;  		# LEFT RIGHT DOUBLE ARROW WITH STROKE
+0x21CF    &#x21CF;  		# RIGHTWARDS DOUBLE ARROW WITH STROKE
+0x21D0    &#x21D0;  		# LEFTWARDS DOUBLE ARROW
+0x21D1    &#x21D1;  		# UPWARDS DOUBLE ARROW
+0x21D2    &#x21D2;  		# RIGHTWARDS DOUBLE ARROW
+0x21D3    &#x21D3;  		# DOWNWARDS DOUBLE ARROW
+0x21D4    &#x21D4;  		# LEFT RIGHT DOUBLE ARROW
+0x21D5    &#x21D5;  		# UP DOWN DOUBLE ARROW
+0x21DA    &#x21DA;  		# LEFTWARDS TRIPLE ARROW
+0x21DB    &#x21DB;  		# RIGHTWARDS TRIPLE ARROW
+0x2200    &#x2200;  		# FOR ALL
+0x2201    &#x2201;  		# COMPLEMENT
+0x2202    &#x2202;  		# PARTIAL DIFFERENTIAL
+0x2203    &#x2203;  		# THERE EXISTS
+0x2204    &#x2204;  		# THERE DOES NOT EXIST
+0x2205    &#x2205;  		# EMPTY SET
+0x2207    &#x2207;  		# NABLA
+0x2208    &#x2208;  		# ELEMENT OF
+0x2209    &#x2209;  		# NOT AN ELEMENT OF
+0x220A    &#x220A;  		# SMALL ELEMENT OF
+0x220B    &#x220B;  		# CONTAINS AS MEMBER
+0x220D    &#x220D;  		# SMALL CONTAINS AS MEMBER
+0x220F    &#x220F;  		# N-ARY PRODUCT
+0x2210    &#x2210;  		# N-ARY COPRODUCT
+0x2211    &#x2211;  		# N-ARY SUMMATION
+0x2212    &#x2212;  		# MINUS SIGN
+0x2213    &#x2213;  		# MINUS-OR-PLUS SIGN
+0x2214    &#x2214;  		# DOT PLUS
+0x2216    &#x2216;  		# SET MINUS
+0x2217    &#x2217;  		# ASTERISK OPERATOR
+0x2218    &#x2218;  		# RING OPERATOR
+0x221A    &#x221A;  		# SQUARE ROOT
+0x221D    &#x221D;  		# PROPORTIONAL TO
+0x221E    &#x221E;  		# INFINITY
+0x221F    &#x221F;  		# RIGHT ANGLE
+0x2220    &#x2220;  		# ANGLE
+0x2221    &#x2221;  		# MEASURED ANGLE
+0x2222    &#x2222;  		# SPHERICAL ANGLE
+0x2223    &#x2223;  		# DIVIDES
+0x2224    &#x2224;  		# DOES NOT DIVIDE
+0x2225    &#x2225;  		# PARALLEL TO
+0x2226    &#x2226;  		# NOT PARALLEL TO
+0x2227    &#x2227;  		# LOGICAL AND
+0x2228    &#x2228;  		# LOGICAL OR
+0x2229    &#x2229;  		# INTERSECTION
+0x222A    &#x222A;  		# UNION
+0x222B    &#x222B;  		# INTEGRAL
+0x222E    &#x222E;  		# CONTOUR INTEGRAL
+0x2234    &#x2234;  		# THEREFORE
+0x2235    &#x2235;  		# BECAUSE
+0x223C    &#x223C;  		# TILDE OPERATOR
+0x223D    &#x223D;  		# REVERSED TILDE
+0x2240    &#x2240;  		# WREATH PRODUCT
+0x2241    &#x2241;  		# NOT TILDE
+0x2243    &#x2243;  		# ASYMPTOTICALLY EQUAL TO
+0x2244    &#x2244;  		# NOT ASYMPTOTICALLY EQUAL TO
+0x2245    &#x2245;  		# APPROXIMATELY EQUAL TO
+0x2247    &#x2247;  		# NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+0x2248    &#x2248;  		# ALMOST EQUAL TO
+0x2249    &#x2249;  		# NOT ALMOST EQUAL TO
+0x224A    &#x224A;  		# ALMOST EQUAL OR EQUAL TO
+0x224C    &#x224C;  		# ALL EQUAL TO
+0x224E    &#x224E;  		# GEOMETRICALLY EQUIVALENT TO
+0x224F    &#x224F;  		# DIFFERENCE BETWEEN
+0x2250    &#x2250;  		# APPROACHES THE LIMIT
+0x2251    &#x2251;  		# GEOMETRICALLY EQUAL TO
+0x2252    &#x2252;  		# APPROXIMATELY EQUAL TO OR THE IMAGE OF
+0x2253    &#x2253;  		# IMAGE OF OR APPROXIMATELY EQUAL TO
+0x2254    &#x2254;  		# COLON EQUALS
+0x2255    &#x2255;  		# EQUALS COLON
+0x2256    &#x2256;  		# RING IN EQUAL TO
+0x2257    &#x2257;  		# RING EQUAL TO
+0x2259    &#x2259;  		# ESTIMATES
+0x225C    &#x225C;  		# DELTA EQUAL TO
+0x2260    &#x2260;  		# NOT EQUAL TO
+0x2261    &#x2261;  		# IDENTICAL TO
+0x2262    &#x2262;  		# NOT IDENTICAL TO
+0x2264    &#x2264;  		# LESS-THAN OR EQUAL TO
+0x2265    &#x2265;  		# GREATER-THAN OR EQUAL TO
+0x2266    &#x2266;  		# LESS-THAN OVER EQUAL TO
+0x2267    &#x2267;  		# GREATER-THAN OVER EQUAL TO
+0x2268    &#x2268;  		# LESS-THAN BUT NOT EQUAL TO
+0x2269    &#x2269;  		# GREATER-THAN BUT NOT EQUAL TO
+0x226A    &#x226A;  		# MUCH LESS-THAN
+0x226B    &#x226B;  		# MUCH GREATER-THAN
+0x226C    &#x226C;  		# BETWEEN
+0x226E    &#x226E;  		# NOT LESS-THAN
+0x226F    &#x226F;  		# NOT GREATER-THAN
+0x2270    &#x2270;  		# NEITHER LESS-THAN NOR EQUAL TO
+0x2271    &#x2271;  		# NEITHER GREATER-THAN NOR EQUAL TO
+0x2272    &#x2272;  		# LESS-THAN OR EQUIVALENT TO
+0x2273    &#x2273;  		# GREATER-THAN OR EQUIVALENT TO
+0x2276    &#x2276;  		# LESS-THAN OR GREATER-THAN
+0x2277    &#x2277;  		# GREATER-THAN OR LESS-THAN
+0x227A    &#x227A;  		# PRECEDES
+0x227B    &#x227B;  		# SUCCEEDS
+0x227C    &#x227C;  		# PRECEDES OR EQUAL TO
+0x227D    &#x227D;  		# SUCCEEDS OR EQUAL TO
+0x227E    &#x227E;  		# PRECEDES OR EQUIVALENT TO
+0x227F    &#x227F;  		# SUCCEEDS OR EQUIVALENT TO
+0x2280    &#x2280;  		# DOES NOT PRECEDE
+0x2281    &#x2281;  		# DOES NOT SUCCEED
+0x2282    &#x2282;  		# SUBSET OF
+0x2283    &#x2283;  		# SUPERSET OF
+0x2284    &#x2284;  		# NOT A SUBSET OF
+0x2285    &#x2285;  		# NOT A SUPERSET OF
+0x2286    &#x2286;  		# SUBSET OF OR EQUAL TO
+0x2287    &#x2287;  		# SUPERSET OF OR EQUAL TO
+0x2288    &#x2288;  		# NEITHER A SUBSET OF NOR EQUAL TO
+0x2289    &#x2289;  		# NEITHER A SUPERSET OF NOR EQUAL TO
+0x228A    &#x228A;  		# SUBSET OF WITH NOT EQUAL TO
+0x228B    &#x228B;  		# SUPERSET OF WITH NOT EQUAL TO
+0x228E    &#x228E;  		# MULTISET UNION
+0x228F    &#x228F;  		# SQUARE IMAGE OF
+0x2290    &#x2290;  		# SQUARE ORIGINAL OF
+0x2291    &#x2291;  		# SQUARE IMAGE OF OR EQUAL TO
+0x2292    &#x2292;  		# SQUARE ORIGINAL OF OR EQUAL TO
+0x2293    &#x2293;  		# SQUARE CAP
+0x2294    &#x2294;  		# SQUARE CUP
+0x2295    &#x2295;  		# CIRCLED PLUS
+0x2296    &#x2296;  		# CIRCLED MINUS
+0x2297    &#x2297;  		# CIRCLED TIMES
+0x2298    &#x2298;  		# CIRCLED DIVISION SLASH
+0x2299    &#x2299;  		# CIRCLED DOT OPERATOR
+0x229A    &#x229A;  		# CIRCLED RING OPERATOR
+0x229B    &#x229B;  		# CIRCLED ASTERISK OPERATOR
+0x229D    &#x229D;  		# CIRCLED DASH
+0x229E    &#x229E;  		# SQUARED PLUS
+0x229F    &#x229F;  		# SQUARED MINUS
+0x22A0    &#x22A0;  		# SQUARED TIMES
+0x22A1    &#x22A1;  		# SQUARED DOT OPERATOR
+0x22A2    &#x22A2;  		# RIGHT TACK
+0x22A3    &#x22A3;  		# LEFT TACK
+0x22A4    &#x22A4;  		# DOWN TACK
+0x22A5    &#x22A5;  		# UP TACK
+0x22A7    &#x22A7;  		# MODELS
+0x22A8    &#x22A8;  		# TRUE
+0x22A9    &#x22A9;  		# FORCES
+0x22AA    &#x22AA;  		# TRIPLE VERTICAL BAR RIGHT TURNSTILE
+0x22AC    &#x22AC;  		# DOES NOT PROVE
+0x22AD    &#x22AD;  		# NOT TRUE
+0x22AE    &#x22AE;  		# DOES NOT FORCE
+0x22AF    &#x22AF;  		# NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+0x22B2    &#x22B2;  		# NORMAL SUBGROUP OF
+0x22B3    &#x22B3;  		# CONTAINS AS NORMAL SUBGROUP
+0x22B4    &#x22B4;  		# NORMAL SUBGROUP OF OR EQUAL TO
+0x22B5    &#x22B5;  		# CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+0x22B8    &#x22B8;  		# MULTIMAP
+0x22BA    &#x22BA;  		# INTERCALATE
+0x22BB    &#x22BB;  		# XOR
+0x22BC    &#x22BC;  		# NAND
+0x22C4    &#x22C4;  		# DIAMOND OPERATOR
+0x22C5    &#x22C5;  		# DOT OPERATOR
+0x22C6    &#x22C6;  		# STAR OPERATOR
+0x22C7    &#x22C7;  		# DIVISION TIMES
+0x22C8    &#x22C8;  		# BOWTIE
+0x22C9    &#x22C9;  		# LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
+0x22CA    &#x22CA;  		# RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
+0x22CB    &#x22CB;  		# LEFT SEMIDIRECT PRODUCT
+0x22CC    &#x22CC;  		# RIGHT SEMIDIRECT PRODUCT
+0x22CD    &#x22CD;  		# REVERSED TILDE EQUALS
+0x22CE    &#x22CE;  		# CURLY LOGICAL OR
+0x22CF    &#x22CF;  		# CURLY LOGICAL AND
+0x22D0    &#x22D0;  		# DOUBLE SUBSET
+0x22D1    &#x22D1;  		# DOUBLE SUPERSET
+0x22D2    &#x22D2;  		# DOUBLE INTERSECTION
+0x22D3    &#x22D3;  		# DOUBLE UNION
+0x22D4    &#x22D4;  		# PITCHFORK
+0x22D6    &#x22D6;  		# LESS-THAN WITH DOT
+0x22D7    &#x22D7;  		# GREATER-THAN WITH DOT
+0x22D8    &#x22D8;  		# VERY MUCH LESS-THAN
+0x22D9    &#x22D9;  		# VERY MUCH GREATER-THAN
+0x22DA    &#x22DA;  		# LESS-THAN EQUAL TO OR GREATER-THAN
+0x22DB    &#x22DB;  		# GREATER-THAN EQUAL TO OR LESS-THAN
+0x22DC    &#x22DC;  		# EQUAL TO OR LESS-THAN
+0x22DD    &#x22DD;  		# EQUAL TO OR GREATER-THAN
+0x22DE    &#x22DE;  		# EQUAL TO OR PRECEDES
+0x22DF    &#x22DF;  		# EQUAL TO OR SUCCEEDS
+0x22E0    &#x22E0;  		# DOES NOT PRECEDE OR EQUAL
+0x22E1    &#x22E1;  		# DOES NOT SUCCEED OR EQUAL
+0x22E6    &#x22E6;  		# LESS-THAN BUT NOT EQUIVALENT TO
+0x22E7    &#x22E7;  		# GREATER-THAN BUT NOT EQUIVALENT TO
+0x22E8    &#x22E8;  		# PRECEDES BUT NOT EQUIVALENT TO
+0x22E9    &#x22E9;  		# SUCCEEDS BUT NOT EQUIVALENT TO
+0x22EA    &#x22EA;  		# NOT NORMAL SUBGROUP OF
+0x22EB    &#x22EB;  		# DOES NOT CONTAIN AS NORMAL SUBGROUP
+0x22EC    &#x22EC;  		# NOT NORMAL SUBGROUP OF OR EQUAL TO
+0x22ED    &#x22ED;  		# DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+0x22EE    &#x22EE;  		# VERTICAL ELLIPSIS
+0x2306    &#x2306;  		# PERSPECTIVE
+0x2308    &#x2308;  		# LEFT CEILING
+0x2309    &#x2309;  		# RIGHT CEILING
+0x230A    &#x230A;  		# LEFT FLOOR
+0x230B    &#x230B;  		# RIGHT FLOOR
+0x230C    &#x230C;  		# BOTTOM RIGHT CROP
+0x230D    &#x230D;  		# BOTTOM LEFT CROP
+0x230E    &#x230E;  		# TOP RIGHT CROP
+0x230F    &#x230F;  		# TOP LEFT CROP
+0x2315    &#x2315;  		# TELEPHONE RECORDER
+0x2316    &#x2316;  		# POSITION INDICATOR
+0x231C    &#x231C;  		# TOP LEFT CORNER
+0x231D    &#x231D;  		# TOP RIGHT CORNER
+0x231E    &#x231E;  		# BOTTOM LEFT CORNER
+0x231F    &#x231F;  		# BOTTOM RIGHT CORNER
+0x2322    &#x2322;  		# FROWN
+0x2323    &#x2323;  		# SMILE
+0x2329    &#x2329;  		# LEFT-POINTING ANGLE BRACKET
+0x232A    &#x232A;  		# RIGHT-POINTING ANGLE BRACKET
+0x2423    &#x2423;  		# OPEN BOX
+0x24C8    &#x24C8;  		# CIRCLED LATIN CAPITAL LETTER S
+0x2500    &#x2500;  		# BOX DRAWINGS LIGHT HORIZONTAL
+0x2502    &#x2502;  		# BOX DRAWINGS LIGHT VERTICAL
+0x250C    &#x250C;  		# BOX DRAWINGS LIGHT DOWN AND RIGHT
+0x2510    &#x2510;  		# BOX DRAWINGS LIGHT DOWN AND LEFT
+0x2514    &#x2514;  		# BOX DRAWINGS LIGHT UP AND RIGHT
+0x2518    &#x2518;  		# BOX DRAWINGS LIGHT UP AND LEFT
+0x251C    &#x251C;  		# BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+0x2524    &#x2524;  		# BOX DRAWINGS LIGHT VERTICAL AND LEFT
+0x252C    &#x252C;  		# BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+0x2534    &#x2534;  		# BOX DRAWINGS LIGHT UP AND HORIZONTAL
+0x253C    &#x253C;  		# BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+0x2550    &#x2550;  		# BOX DRAWINGS DOUBLE HORIZONTAL
+0x2551    &#x2551;  		# BOX DRAWINGS DOUBLE VERTICAL
+0x2552    &#x2552;  		# BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+0x2553    &#x2553;  		# BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+0x2554    &#x2554;  		# BOX DRAWINGS DOUBLE DOWN AND RIGHT
+0x2555    &#x2555;  		# BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+0x2556    &#x2556;  		# BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+0x2557    &#x2557;  		# BOX DRAWINGS DOUBLE DOWN AND LEFT
+0x2558    &#x2558;  		# BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+0x2559    &#x2559;  		# BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+0x255A    &#x255A;  		# BOX DRAWINGS DOUBLE UP AND RIGHT
+0x255B    &#x255B;  		# BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+0x255C    &#x255C;  		# BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+0x255D    &#x255D;  		# BOX DRAWINGS DOUBLE UP AND LEFT
+0x255E    &#x255E;  		# BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+0x255F    &#x255F;  		# BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+0x2560    &#x2560;  		# BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+0x2561    &#x2561;  		# BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+0x2562    &#x2562;  		# BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+0x2563    &#x2563;  		# BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+0x2564    &#x2564;  		# BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+0x2565    &#x2565;  		# BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+0x2566    &#x2566;  		# BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+0x2567    &#x2567;  		# BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+0x2568    &#x2568;  		# BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+0x2569    &#x2569;  		# BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+0x256A    &#x256A;  		# BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+0x256B    &#x256B;  		# BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+0x256C    &#x256C;  		# BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+0x2580    &#x2580;  		# UPPER HALF BLOCK
+0x2584    &#x2584;  		# LOWER HALF BLOCK
+0x2588    &#x2588;  		# FULL BLOCK
+0x2591    &#x2591;  		# LIGHT SHADE
+0x2592    &#x2592;  		# MEDIUM SHADE
+0x2593    &#x2593;  		# DARK SHADE
+0x25A1    &#x25A1;  		# WHITE SQUARE
+0x25AA    &#x25AA;  		# BLACK SMALL SQUARE
+0x25AD    &#x25AD;  		# WHITE RECTANGLE
+0x25AE    &#x25AE;  		# BLACK VERTICAL RECTANGLE
+0x25B3    &#x25B3;  		# WHITE UP-POINTING TRIANGLE
+0x25B4    &#x25B4;  		# BLACK UP-POINTING SMALL TRIANGLE
+0x25B5    &#x25B5;  		# WHITE UP-POINTING SMALL TRIANGLE
+0x25B8    &#x25B8;  		# BLACK RIGHT-POINTING SMALL TRIANGLE
+0x25B9    &#x25B9;  		# WHITE RIGHT-POINTING SMALL TRIANGLE
+0x25BD    &#x25BD;  		# WHITE DOWN-POINTING TRIANGLE
+0x25BE    &#x25BE;  		# BLACK DOWN-POINTING SMALL TRIANGLE
+0x25BF    &#x25BF;  		# WHITE DOWN-POINTING SMALL TRIANGLE
+0x25C2    &#x25C2;  		# BLACK LEFT-POINTING SMALL TRIANGLE
+0x25C3    &#x25C3;  		# WHITE LEFT-POINTING SMALL TRIANGLE
+0x25CA    &#x25CA;  		# LOZENGE
+0x25CB    &#x25CB;  		# WHITE CIRCLE
+0x2605    &#x2605;  		# BLACK STAR
+0x2606    &#x2606;  		# WHITE STAR
+0x260E    &#x260E;  		# BLACK TELEPHONE
+0x2640    &#x2640;  		# FEMALE SIGN
+0x2642    &#x2642;  		# MALE SIGN
+0x2660    &#x2660;  		# BLACK SPADE SUIT
+0x2663    &#x2663;  		# BLACK CLUB SUIT
+0x2665    &#x2665;  		# BLACK HEART SUIT
+0x2666    &#x2666;  		# BLACK DIAMOND SUIT
+0x266A    &#x266A;  		# EIGHTH NOTE
+0x266D    &#x266D;  		# MUSIC FLAT SIGN
+0x266E    &#x266E;  		# MUSIC NATURAL SIGN
+0x266F    &#x266F;  		# MUSIC SHARP SIGN
+0x2713    &#x2713;  		# CHECK MARK
+0x2717    &#x2717;  		# BALLOT X
+0x2720    &#x2720;  		# MALTESE CROSS
+0x2726    &#x2726;  		# BLACK FOUR POINTED STAR
+0x2727    &#x2727;  		# WHITE FOUR POINTED STAR
+0x2736    &#x2736;  		# SIX POINTED BLACK STAR
+0xFB00    &#xFB00;  		# LATIN SMALL LIGATURE FF
+0xFB01    &#xFB01;  		# LATIN SMALL LIGATURE FI
+0xFB02    &#xFB02;  		# LATIN SMALL LIGATURE FL
+0xFB03    &#xFB03;  		# LATIN SMALL LIGATURE FFI
+0xFB04    &#xFB04;  		# LATIN SMALL LIGATURE FFL
+
+
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/utf-8-demo.html b/test/utf-8-demo.html
new file mode 100644
index 00000000..d792903f
--- /dev/null
+++ b/test/utf-8-demo.html
@@ -0,0 +1,216 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Markus Kuhn's UTF-8 demo</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<LINK REV="made" HREF="mailto:dickey@invisible-island.net">
+</HEAD>
+
+<BODY> 
+<pre>
+UTF-8 encoded sample plain-text file
+&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;
+
+Markus Kuhn [&#x2c8;ma&#x2b3;k&#x28a;s ku&#x2d0;n] &lt;mkuhn@acm.org&gt; &#x2014; 1999-08-20
+
+
+The ASCII compatible UTF-8 encoding of ISO 10646 and Unicode
+plain-text files is defined in RFC 2279 and in ISO 10646-1 Annex R.
+
+
+Using Unicode/UTF-8, you can write in emails and source code things such as
+
+Mathematics and Sciences:
+
+  &#x222e; E&#x22c5;da = Q,  n &#x2192; &#x221e;, &#x2211; f(i) = &#x220f; g(i), &#x2200;x&#x2208;&#x211d;: &#x2308;x&#x2309; = &#x2212;&#x230a;&#x2212;x&#x230b;, &#x3b1; &#x2227; &#xac;&#x3b2; = &#xac;(&#xac;&#x3b1; &#x2228; &#x3b2;),
+
+  &#x2115; &#x2286; &#x2115;&#x2080; &#x2282; &#x2124; &#x2282; &#x211a; &#x2282; &#x211d; &#x2282; &#x2102;, &#x22a5; &lt; a &#x2260; b &#x2261; c &#x2264; d &#x226a; &#x22a4; &#x21d2; (A &#x21d4; B),
+
+  2H&#x2082; + O&#x2082; &#x21cc; 2H&#x2082;O, R = 4.7 k&#x3a9;, &#x2300; 200 mm
+
+Linguistics and dictionaries:
+
+  &#xf0;i &#x131;nt&#x259;&#x2c8;n&#xe6;&#x283;&#x259;n&#x259;l f&#x259;&#x2c8;n&#x25b;t&#x131;k &#x259;so&#x28a;si&#x2c8;e&#x131;&#x283;n
+  Y [&#x2c8;&#x28f;psil&#x254;n], Yen [j&#x25b;n], Yoga [&#x2c8;jo&#x2d0;g&#x251;]
+
+APL:
+
+  ((V&#x2373;V)=&#x2373;&#x2374;V)/V&#x2190;,V    &#x2337;&#x2190;&#x2373;&#x2192;&#x2374;&#x2206;&#x2207;&#x2283;&#x203e;&#x234e;&#x2355;&#x2308;
+
+Nicer typography in plain text files:
+
+  &#x2554;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2557;
+  &#x2551;                                          &#x2551;
+  &#x2551;   &#x2022; &#x2018;single&#x2019; and &#x201c;double&#x201d; quotes         &#x2551;
+  &#x2551;                                          &#x2551;
+  &#x2551;   &#x2022; Curly apostrophes: &#x201c;We&#x2019;ve been here&#x201d; &#x2551;
+  &#x2551;                                          &#x2551;
+  &#x2551;   &#x2022; Latin-1 apostrophe and accents: '&#xb4;`  &#x2551;
+  &#x2551;                                          &#x2551;
+  &#x2551;   &#x2022; &#x201a;deutsche&#x2018; &#x201e;Anf&#xfc;hrungszeichen&#x201c;       &#x2551;
+  &#x2551;                                          &#x2551;
+  &#x2551;   &#x2022; &#x2020;, &#x2021;, &#x2030;, &#x2022;, 3&#x2013;4, &#x2014;, &#x2212;5/+5, &#x2122;, &#x2026;      &#x2551;
+  &#x2551;                                          &#x2551;
+  &#x2551;   &#x2022; ASCII safety test: 1lI|, 0OD, 8B     &#x2551;
+  &#x2551;                      &#x256d;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x256e;         &#x2551;
+  &#x2551;   &#x2022; the euro symbol: &#x2502; &#x20ac; 14.95 &#x2502;         &#x2551;
+  &#x2551;                      &#x2570;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x256f;         &#x2551;
+  &#x255a;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x255d;
+
+Greek (in Polytonic):
+
+  The Greek anthem:
+
+  &#x3a3;&#x1f72; &#x3b3;&#x3bd;&#x3c9;&#x3c1;&#x1f77;&#x3b6;&#x3c9; &#x1f00;&#x3c0;&#x1f78; &#x3c4;&#x1f74;&#x3bd; &#x3ba;&#x1f79;&#x3c8;&#x3b7;
+  &#x3c4;&#x3bf;&#x1fe6; &#x3c3;&#x3c0;&#x3b1;&#x3b8;&#x3b9;&#x3bf;&#x1fe6; &#x3c4;&#x1f74;&#x3bd; &#x3c4;&#x3c1;&#x3bf;&#x3bc;&#x3b5;&#x3c1;&#x1f75;,
+  &#x3c3;&#x1f72; &#x3b3;&#x3bd;&#x3c9;&#x3c1;&#x1f77;&#x3b6;&#x3c9; &#x1f00;&#x3c0;&#x1f78; &#x3c4;&#x1f74;&#x3bd; &#x1f44;&#x3c8;&#x3b7;
+  &#x3c0;&#x3bf;&#x1f7a; &#x3bc;&#x1f72; &#x3b2;&#x1f77;&#x3b1; &#x3bc;&#x3b5;&#x3c4;&#x3c1;&#x1f71;&#x3b5;&#x3b9; &#x3c4;&#x1f74; &#x3b3;&#x1fc6;.
+
+  &#x1fbf;&#x391;&#x3c0;&#x1fbf; &#x3c4;&#x1f70; &#x3ba;&#x1f79;&#x3ba;&#x3ba;&#x3b1;&#x3bb;&#x3b1; &#x3b2;&#x3b3;&#x3b1;&#x3bb;&#x3bc;&#x1f73;&#x3bd;&#x3b7;
+  &#x3c4;&#x1ff6;&#x3bd; &#x1ffe;&#x395;&#x3bb;&#x3bb;&#x1f75;&#x3bd;&#x3c9;&#x3bd; &#x3c4;&#x1f70; &#x1f31;&#x3b5;&#x3c1;&#x1f71;
+  &#x3ba;&#x3b1;&#x1f76; &#x3c3;&#x1f70;&#x3bd; &#x3c0;&#x3c1;&#x1ff6;&#x3c4;&#x3b1; &#x1f00;&#x3bd;&#x3b4;&#x3c1;&#x3b5;&#x3b9;&#x3c9;&#x3bc;&#x1f73;&#x3bd;&#x3b7;
+  &#x3c7;&#x3b1;&#x1fd6;&#x3c1;&#x3b5;, &#x1f66; &#x3c7;&#x3b1;&#x1fd6;&#x3c1;&#x3b5;, &#x1fbf;&#x395;&#x3bb;&#x3b5;&#x3c5;&#x3b8;&#x3b5;&#x3c1;&#x3b9;&#x1f71;!
+
+  From a speech of Demosthenes in the 4th century BC:
+
+  &#x39f;&#x1f50;&#x3c7;&#x1f76; &#x3c4;&#x3b1;&#x1f50;&#x3c4;&#x1f70; &#x3c0;&#x3b1;&#x3c1;&#x1f77;&#x3c3;&#x3c4;&#x3b1;&#x3c4;&#x3b1;&#x1f77; &#x3bc;&#x3bf;&#x3b9; &#x3b3;&#x3b9;&#x3b3;&#x3bd;&#x1f7d;&#x3c3;&#x3ba;&#x3b5;&#x3b9;&#x3bd;, &#x1f66; &#x1f04;&#x3bd;&#x3b4;&#x3c1;&#x3b5;&#x3c2; &#x1fbf;&#x391;&#x3b8;&#x3b7;&#x3bd;&#x3b1;&#x1fd6;&#x3bf;&#x3b9;,
+  &#x1f45;&#x3c4;&#x3b1;&#x3bd; &#x3c4;&#x1fbf; &#x3b5;&#x1f30;&#x3c2; &#x3c4;&#x1f70; &#x3c0;&#x3c1;&#x1f71;&#x3b3;&#x3bc;&#x3b1;&#x3c4;&#x3b1; &#x1f00;&#x3c0;&#x3bf;&#x3b2;&#x3bb;&#x1f73;&#x3c8;&#x3c9; &#x3ba;&#x3b1;&#x1f76; &#x1f45;&#x3c4;&#x3b1;&#x3bd; &#x3c0;&#x3c1;&#x1f78;&#x3c2; &#x3c4;&#x3bf;&#x1f7a;&#x3c2;
+  &#x3bb;&#x1f79;&#x3b3;&#x3bf;&#x3c5;&#x3c2; &#x3bf;&#x1f53;&#x3c2; &#x1f00;&#x3ba;&#x3bf;&#x1f7b;&#x3c9;&#x387; &#x3c4;&#x3bf;&#x1f7a;&#x3c2; &#x3bc;&#x1f72;&#x3bd; &#x3b3;&#x1f70;&#x3c1; &#x3bb;&#x1f79;&#x3b3;&#x3bf;&#x3c5;&#x3c2; &#x3c0;&#x3b5;&#x3c1;&#x1f76; &#x3c4;&#x3bf;&#x1fe6;
+  &#x3c4;&#x3b9;&#x3bc;&#x3c9;&#x3c1;&#x1f75;&#x3c3;&#x3b1;&#x3c3;&#x3b8;&#x3b1;&#x3b9; &#x3a6;&#x1f77;&#x3bb;&#x3b9;&#x3c0;&#x3c0;&#x3bf;&#x3bd; &#x1f41;&#x3c1;&#x1ff6; &#x3b3;&#x3b9;&#x3b3;&#x3bd;&#x3bf;&#x3bc;&#x1f73;&#x3bd;&#x3bf;&#x3c5;&#x3c2;, &#x3c4;&#x1f70; &#x3b4;&#x1f72; &#x3c0;&#x3c1;&#x1f71;&#x3b3;&#x3bc;&#x3b1;&#x3c4;&#x1fbf; 
+  &#x3b5;&#x1f30;&#x3c2; &#x3c4;&#x3bf;&#x1fe6;&#x3c4;&#x3bf; &#x3c0;&#x3c1;&#x3bf;&#x1f75;&#x3ba;&#x3bf;&#x3bd;&#x3c4;&#x3b1;,  &#x1f65;&#x3c3;&#x3b8;&#x1fbf; &#x1f45;&#x3c0;&#x3c9;&#x3c2; &#x3bc;&#x1f74; &#x3c0;&#x3b5;&#x3b9;&#x3c3;&#x1f79;&#x3bc;&#x3b5;&#x3b8;&#x1fbf; &#x3b1;&#x1f50;&#x3c4;&#x3bf;&#x1f76;
+  &#x3c0;&#x3c1;&#x1f79;&#x3c4;&#x3b5;&#x3c1;&#x3bf;&#x3bd; &#x3ba;&#x3b1;&#x3ba;&#x1ff6;&#x3c2; &#x3c3;&#x3ba;&#x1f73;&#x3c8;&#x3b1;&#x3c3;&#x3b8;&#x3b1;&#x3b9; &#x3b4;&#x1f73;&#x3bf;&#x3bd;. &#x3bf;&#x1f50;&#x3b4;&#x1f73;&#x3bd; &#x3bf;&#x1f56;&#x3bd; &#x1f04;&#x3bb;&#x3bb;&#x3bf; &#x3bc;&#x3bf;&#x3b9; &#x3b4;&#x3bf;&#x3ba;&#x3bf;&#x1fe6;&#x3c3;&#x3b9;&#x3bd;
+  &#x3bf;&#x1f31; &#x3c4;&#x1f70; &#x3c4;&#x3bf;&#x3b9;&#x3b1;&#x1fe6;&#x3c4;&#x3b1; &#x3bb;&#x1f73;&#x3b3;&#x3bf;&#x3bd;&#x3c4;&#x3b5;&#x3c2; &#x1f22; &#x3c4;&#x1f74;&#x3bd; &#x1f51;&#x3c0;&#x1f79;&#x3b8;&#x3b5;&#x3c3;&#x3b9;&#x3bd;, &#x3c0;&#x3b5;&#x3c1;&#x1f76; &#x1f27;&#x3c2; &#x3b2;&#x3bf;&#x3c5;&#x3bb;&#x3b5;&#x1f7b;&#x3b5;&#x3c3;&#x3b8;&#x3b1;&#x3b9;,
+  &#x3bf;&#x1f50;&#x3c7;&#x1f76; &#x3c4;&#x1f74;&#x3bd; &#x3bf;&#x1f56;&#x3c3;&#x3b1;&#x3bd; &#x3c0;&#x3b1;&#x3c1;&#x3b9;&#x3c3;&#x3c4;&#x1f71;&#x3bd;&#x3c4;&#x3b5;&#x3c2; &#x1f51;&#x3bc;&#x1fd6;&#x3bd; &#x1f01;&#x3bc;&#x3b1;&#x3c1;&#x3c4;&#x1f71;&#x3bd;&#x3b5;&#x3b9;&#x3bd;. &#x1f10;&#x3b3;&#x1f7c; &#x3b4;&#x1f73;, &#x1f45;&#x3c4;&#x3b9; &#x3bc;&#x1f73;&#x3bd;
+  &#x3c0;&#x3bf;&#x3c4;&#x1fbf; &#x1f10;&#x3be;&#x1fc6;&#x3bd; &#x3c4;&#x1fc7; &#x3c0;&#x1f79;&#x3bb;&#x3b5;&#x3b9; &#x3ba;&#x3b1;&#x1f76; &#x3c4;&#x1f70; &#x3b1;&#x1f51;&#x3c4;&#x1fc6;&#x3c2; &#x1f14;&#x3c7;&#x3b5;&#x3b9;&#x3bd; &#x1f00;&#x3c3;&#x3c6;&#x3b1;&#x3bb;&#x1ff6;&#x3c2; &#x3ba;&#x3b1;&#x1f76; &#x3a6;&#x1f77;&#x3bb;&#x3b9;&#x3c0;&#x3c0;&#x3bf;&#x3bd;
+  &#x3c4;&#x3b9;&#x3bc;&#x3c9;&#x3c1;&#x1f75;&#x3c3;&#x3b1;&#x3c3;&#x3b8;&#x3b1;&#x3b9;, &#x3ba;&#x3b1;&#x1f76; &#x3bc;&#x1f71;&#x3bb;&#x1fbf; &#x1f00;&#x3ba;&#x3c1;&#x3b9;&#x3b2;&#x1ff6;&#x3c2; &#x3bf;&#x1f36;&#x3b4;&#x3b1;&#x387; &#x1f10;&#x3c0;&#x1fbf; &#x1f10;&#x3bc;&#x3bf;&#x1fe6; &#x3b3;&#x1f71;&#x3c1;, &#x3bf;&#x1f50; &#x3c0;&#x1f71;&#x3bb;&#x3b1;&#x3b9;
+  &#x3b3;&#x1f73;&#x3b3;&#x3bf;&#x3bd;&#x3b5;&#x3bd; &#x3c4;&#x3b1;&#x1fe6;&#x3c4;&#x1fbf; &#x1f00;&#x3bc;&#x3c6;&#x1f79;&#x3c4;&#x3b5;&#x3c1;&#x3b1;&#x387; &#x3bd;&#x1fe6;&#x3bd; &#x3bc;&#x1f73;&#x3bd;&#x3c4;&#x3bf;&#x3b9; &#x3c0;&#x1f73;&#x3c0;&#x3b5;&#x3b9;&#x3c3;&#x3bc;&#x3b1;&#x3b9; &#x3c4;&#x3bf;&#x1fe6;&#x3b8;&#x1fbf; &#x1f31;&#x3ba;&#x3b1;&#x3bd;&#x1f78;&#x3bd;
+  &#x3c0;&#x3c1;&#x3bf;&#x3bb;&#x3b1;&#x3b2;&#x3b5;&#x1fd6;&#x3bd; &#x1f21;&#x3bc;&#x1fd6;&#x3bd; &#x3b5;&#x1f36;&#x3bd;&#x3b1;&#x3b9; &#x3c4;&#x1f74;&#x3bd; &#x3c0;&#x3c1;&#x1f7d;&#x3c4;&#x3b7;&#x3bd;, &#x1f45;&#x3c0;&#x3c9;&#x3c2; &#x3c4;&#x3bf;&#x1f7a;&#x3c2; &#x3c3;&#x3c5;&#x3bc;&#x3bc;&#x1f71;&#x3c7;&#x3bf;&#x3c5;&#x3c2;
+  &#x3c3;&#x1f7d;&#x3c3;&#x3bf;&#x3bc;&#x3b5;&#x3bd;. &#x1f10;&#x1f70;&#x3bd; &#x3b3;&#x1f70;&#x3c1; &#x3c4;&#x3bf;&#x1fe6;&#x3c4;&#x3bf; &#x3b2;&#x3b5;&#x3b2;&#x3b1;&#x1f77;&#x3c9;&#x3c2; &#x1f51;&#x3c0;&#x1f71;&#x3c1;&#x3be;&#x1fc3;, &#x3c4;&#x1f79;&#x3c4;&#x3b5; &#x3ba;&#x3b1;&#x1f76; &#x3c0;&#x3b5;&#x3c1;&#x1f76; &#x3c4;&#x3bf;&#x1fe6;
+  &#x3c4;&#x1f77;&#x3bd;&#x3b1; &#x3c4;&#x3b9;&#x3bc;&#x3c9;&#x3c1;&#x1f75;&#x3c3;&#x3b5;&#x3c4;&#x3b1;&#x1f77; &#x3c4;&#x3b9;&#x3c2; &#x3ba;&#x3b1;&#x1f76; &#x1f43;&#x3bd; &#x3c4;&#x3c1;&#x1f79;&#x3c0;&#x3bf;&#x3bd; &#x1f10;&#x3be;&#x1f73;&#x3c3;&#x3c4;&#x3b1;&#x3b9; &#x3c3;&#x3ba;&#x3bf;&#x3c0;&#x3b5;&#x1fd6;&#x3bd;&#x387; &#x3c0;&#x3c1;&#x1f76;&#x3bd; &#x3b4;&#x1f72;
+  &#x3c4;&#x1f74;&#x3bd; &#x1f00;&#x3c1;&#x3c7;&#x1f74;&#x3bd; &#x1f40;&#x3c1;&#x3b8;&#x1ff6;&#x3c2; &#x1f51;&#x3c0;&#x3bf;&#x3b8;&#x1f73;&#x3c3;&#x3b8;&#x3b1;&#x3b9;, &#x3bc;&#x1f71;&#x3c4;&#x3b1;&#x3b9;&#x3bf;&#x3bd; &#x1f21;&#x3b3;&#x3bf;&#x1fe6;&#x3bc;&#x3b1;&#x3b9; &#x3c0;&#x3b5;&#x3c1;&#x1f76; &#x3c4;&#x1fc6;&#x3c2;
+  &#x3c4;&#x3b5;&#x3bb;&#x3b5;&#x3c5;&#x3c4;&#x1fc6;&#x3c2; &#x1f41;&#x3bd;&#x3c4;&#x3b9;&#x3bd;&#x3bf;&#x1fe6;&#x3bd; &#x3c0;&#x3bf;&#x3b9;&#x3b5;&#x1fd6;&#x3c3;&#x3b8;&#x3b1;&#x3b9; &#x3bb;&#x1f79;&#x3b3;&#x3bf;&#x3bd;.
+
+  &#x394;&#x3b7;&#x3bc;&#x3bf;&#x3c3;&#x3b8;&#x1f73;&#x3bd;&#x3bf;&#x3c5;&#x3c2;, &#x393;&#x1ffd; &#x1fbf;&#x39f;&#x3bb;&#x3c5;&#x3bd;&#x3b8;&#x3b9;&#x3b1;&#x3ba;&#x1f78;&#x3c2;
+
+Georgian:
+
+  From a Unicode conference invitation:
+
+  &#x10d2;&#x10d7;&#x10ee;&#x10dd;&#x10d5;&#x10d7; &#x10d0;&#x10ee;&#x10da;&#x10d0;&#x10d5;&#x10d4; &#x10d2;&#x10d0;&#x10d8;&#x10d0;&#x10e0;&#x10dd;&#x10d7; &#x10e0;&#x10d4;&#x10d2;&#x10d8;&#x10e1;&#x10e2;&#x10e0;&#x10d0;&#x10ea;&#x10d8;&#x10d0; Unicode-&#x10d8;&#x10e1; &#x10db;&#x10d4;&#x10d0;&#x10d7;&#x10d4; &#x10e1;&#x10d0;&#x10d4;&#x10e0;&#x10d7;&#x10d0;&#x10e8;&#x10dd;&#x10e0;&#x10d8;&#x10e1;&#x10dd;
+  &#x10d9;&#x10dd;&#x10dc;&#x10e4;&#x10d4;&#x10e0;&#x10d4;&#x10dc;&#x10ea;&#x10d8;&#x10d0;&#x10d6;&#x10d4; &#x10d3;&#x10d0;&#x10e1;&#x10d0;&#x10e1;&#x10ec;&#x10e0;&#x10d4;&#x10d1;&#x10d0;&#x10d3;, &#x10e0;&#x10dd;&#x10db;&#x10d4;&#x10da;&#x10d8;&#x10ea; &#x10d2;&#x10d0;&#x10d8;&#x10db;&#x10d0;&#x10e0;&#x10d7;&#x10d4;&#x10d1;&#x10d0; 10-12 &#x10db;&#x10d0;&#x10e0;&#x10e2;&#x10e1;,
+  &#x10e5;. &#x10db;&#x10d0;&#x10d8;&#x10dc;&#x10ea;&#x10e8;&#x10d8;, &#x10d2;&#x10d4;&#x10e0;&#x10db;&#x10d0;&#x10dc;&#x10d8;&#x10d0;&#x10e8;&#x10d8;. &#x10d9;&#x10dd;&#x10dc;&#x10e4;&#x10d4;&#x10e0;&#x10d4;&#x10dc;&#x10ea;&#x10d8;&#x10d0; &#x10e8;&#x10d4;&#x10f0;&#x10d9;&#x10e0;&#x10d4;&#x10d1;&#x10e1; &#x10d4;&#x10e0;&#x10d7;&#x10d0;&#x10d3; &#x10db;&#x10e1;&#x10dd;&#x10e4;&#x10da;&#x10d8;&#x10dd;&#x10e1;
+  &#x10d4;&#x10e5;&#x10e1;&#x10de;&#x10d4;&#x10e0;&#x10e2;&#x10d4;&#x10d1;&#x10e1; &#x10d8;&#x10e1;&#x10d4;&#x10d7; &#x10d3;&#x10d0;&#x10e0;&#x10d2;&#x10d4;&#x10d1;&#x10e8;&#x10d8; &#x10e0;&#x10dd;&#x10d2;&#x10dd;&#x10e0;&#x10d8;&#x10ea;&#x10d0;&#x10d0; &#x10d8;&#x10dc;&#x10e2;&#x10d4;&#x10e0;&#x10dc;&#x10d4;&#x10e2;&#x10d8; &#x10d3;&#x10d0; Unicode-&#x10d8;,
+  &#x10d8;&#x10dc;&#x10e2;&#x10d4;&#x10e0;&#x10dc;&#x10d0;&#x10ea;&#x10d8;&#x10dd;&#x10dc;&#x10d0;&#x10da;&#x10d8;&#x10d6;&#x10d0;&#x10ea;&#x10d8;&#x10d0; &#x10d3;&#x10d0; &#x10da;&#x10dd;&#x10d9;&#x10d0;&#x10da;&#x10d8;&#x10d6;&#x10d0;&#x10ea;&#x10d8;&#x10d0;, Unicode-&#x10d8;&#x10e1; &#x10d2;&#x10d0;&#x10db;&#x10dd;&#x10e7;&#x10d4;&#x10dc;&#x10d4;&#x10d1;&#x10d0;
+  &#x10dd;&#x10de;&#x10d4;&#x10e0;&#x10d0;&#x10ea;&#x10d8;&#x10e3;&#x10da; &#x10e1;&#x10d8;&#x10e1;&#x10e2;&#x10d4;&#x10db;&#x10d4;&#x10d1;&#x10e1;&#x10d0;, &#x10d3;&#x10d0; &#x10d2;&#x10d0;&#x10db;&#x10dd;&#x10e7;&#x10d4;&#x10dc;&#x10d4;&#x10d1;&#x10d8;&#x10d7; &#x10de;&#x10e0;&#x10dd;&#x10d2;&#x10e0;&#x10d0;&#x10db;&#x10d4;&#x10d1;&#x10e8;&#x10d8;, &#x10e8;&#x10e0;&#x10d8;&#x10e4;&#x10e2;&#x10d4;&#x10d1;&#x10e8;&#x10d8;,
+  &#x10e2;&#x10d4;&#x10e5;&#x10e1;&#x10e2;&#x10d4;&#x10d1;&#x10d8;&#x10e1; &#x10d3;&#x10d0;&#x10db;&#x10e3;&#x10e8;&#x10d0;&#x10d5;&#x10d4;&#x10d1;&#x10d0;&#x10e1;&#x10d0; &#x10d3;&#x10d0; &#x10db;&#x10e0;&#x10d0;&#x10d5;&#x10d0;&#x10da;&#x10d4;&#x10dc;&#x10dd;&#x10d5;&#x10d0;&#x10dc; &#x10d9;&#x10dd;&#x10db;&#x10de;&#x10d8;&#x10e3;&#x10e2;&#x10d4;&#x10e0;&#x10e3;&#x10da; &#x10e1;&#x10d8;&#x10e1;&#x10e2;&#x10d4;&#x10db;&#x10d4;&#x10d1;&#x10e8;&#x10d8;.
+
+Russian:
+
+  From a Unicode conference invitation:
+
+  &#x417;&#x430;&#x440;&#x435;&#x433;&#x438;&#x441;&#x442;&#x440;&#x438;&#x440;&#x443;&#x439;&#x442;&#x435;&#x441;&#x44c; &#x441;&#x435;&#x439;&#x447;&#x430;&#x441; &#x43d;&#x430; &#x414;&#x435;&#x441;&#x44f;&#x442;&#x443;&#x44e; &#x41c;&#x435;&#x436;&#x434;&#x443;&#x43d;&#x430;&#x440;&#x43e;&#x434;&#x43d;&#x443;&#x44e; &#x41a;&#x43e;&#x43d;&#x444;&#x435;&#x440;&#x435;&#x43d;&#x446;&#x438;&#x44e; &#x43f;&#x43e;
+  Unicode, &#x43a;&#x43e;&#x442;&#x43e;&#x440;&#x430;&#x44f; &#x441;&#x43e;&#x441;&#x442;&#x43e;&#x438;&#x442;&#x441;&#x44f; 10-12 &#x43c;&#x430;&#x440;&#x442;&#x430; 1997 &#x433;&#x43e;&#x434;&#x430; &#x432; &#x41c;&#x430;&#x439;&#x43d;&#x446;&#x435; &#x432; &#x413;&#x435;&#x440;&#x43c;&#x430;&#x43d;&#x438;&#x438;.
+  &#x41a;&#x43e;&#x43d;&#x444;&#x435;&#x440;&#x435;&#x43d;&#x446;&#x438;&#x44f; &#x441;&#x43e;&#x431;&#x435;&#x440;&#x435;&#x442; &#x448;&#x438;&#x440;&#x43e;&#x43a;&#x438;&#x439; &#x43a;&#x440;&#x443;&#x433; &#x44d;&#x43a;&#x441;&#x43f;&#x435;&#x440;&#x442;&#x43e;&#x432; &#x43f;&#x43e;  &#x432;&#x43e;&#x43f;&#x440;&#x43e;&#x441;&#x430;&#x43c; &#x433;&#x43b;&#x43e;&#x431;&#x430;&#x43b;&#x44c;&#x43d;&#x43e;&#x433;&#x43e;
+  &#x418;&#x43d;&#x442;&#x435;&#x440;&#x43d;&#x435;&#x442;&#x430; &#x438; Unicode, &#x43b;&#x43e;&#x43a;&#x430;&#x43b;&#x438;&#x437;&#x430;&#x446;&#x438;&#x438; &#x438; &#x438;&#x43d;&#x442;&#x435;&#x440;&#x43d;&#x430;&#x446;&#x438;&#x43e;&#x43d;&#x430;&#x43b;&#x438;&#x437;&#x430;&#x446;&#x438;&#x438;, &#x432;&#x43e;&#x43f;&#x43b;&#x43e;&#x449;&#x435;&#x43d;&#x438;&#x44e; &#x438;
+  &#x43f;&#x440;&#x438;&#x43c;&#x435;&#x43d;&#x435;&#x43d;&#x438;&#x44e; Unicode &#x432; &#x440;&#x430;&#x437;&#x43b;&#x438;&#x447;&#x43d;&#x44b;&#x445; &#x43e;&#x43f;&#x435;&#x440;&#x430;&#x446;&#x438;&#x43e;&#x43d;&#x43d;&#x44b;&#x445; &#x441;&#x438;&#x441;&#x442;&#x435;&#x43c;&#x430;&#x445; &#x438; &#x43f;&#x440;&#x43e;&#x433;&#x440;&#x430;&#x43c;&#x43c;&#x43d;&#x44b;&#x445;
+  &#x43f;&#x440;&#x438;&#x43b;&#x43e;&#x436;&#x435;&#x43d;&#x438;&#x44f;&#x445;, &#x448;&#x440;&#x438;&#x444;&#x442;&#x430;&#x445;, &#x432;&#x435;&#x440;&#x441;&#x442;&#x43a;&#x435; &#x438; &#x43c;&#x43d;&#x43e;&#x433;&#x43e;&#x44f;&#x437;&#x44b;&#x447;&#x43d;&#x44b;&#x445; &#x43a;&#x43e;&#x43c;&#x43f;&#x44c;&#x44e;&#x442;&#x435;&#x440;&#x43d;&#x44b;&#x445; &#x441;&#x438;&#x441;&#x442;&#x435;&#x43c;&#x430;&#x445;.
+
+Thai (UCS Level 2):
+
+  Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese
+  classic 'San Gua'):
+
+  [----------------------------|------------------------]
+    &#xe4f; &#xe41;&#xe1c;&#xe48;&#xe19;&#xe14;&#xe34;&#xe19;&#xe2e;&#xe31;&#xe48;&#xe19;&#xe40;&#xe2a;&#xe37;&#xe48;&#xe2d;&#xe21;&#xe42;&#xe17;&#xe23;&#xe21;&#xe41;&#xe2a;&#xe19;&#xe2a;&#xe31;&#xe07;&#xe40;&#xe27;&#xe0a;  &#xe1e;&#xe23;&#xe30;&#xe1b;&#xe01;&#xe40;&#xe01;&#xe28;&#xe01;&#xe2d;&#xe07;&#xe1a;&#xe39;&#xe4a;&#xe01;&#xe39;&#xe49;&#xe02;&#xe36;&#xe49;&#xe19;&#xe43;&#xe2b;&#xe21;&#xe48;
+  &#xe2a;&#xe34;&#xe1a;&#xe2a;&#xe2d;&#xe07;&#xe01;&#xe29;&#xe31;&#xe15;&#xe23;&#xe34;&#xe22;&#xe4c;&#xe01;&#xe48;&#xe2d;&#xe19;&#xe2b;&#xe19;&#xe49;&#xe32;&#xe41;&#xe25;&#xe16;&#xe31;&#xe14;&#xe44;&#xe1b;       &#xe2a;&#xe2d;&#xe07;&#xe2d;&#xe07;&#xe04;&#xe4c;&#xe44;&#xe0b;&#xe23;&#xe49;&#xe42;&#xe07;&#xe48;&#xe40;&#xe02;&#xe25;&#xe32;&#xe40;&#xe1a;&#xe32;&#xe1b;&#xe31;&#xe0d;&#xe0d;&#xe32;
+    &#xe17;&#xe23;&#xe07;&#xe19;&#xe31;&#xe1a;&#xe16;&#xe37;&#xe2d;&#xe02;&#xe31;&#xe19;&#xe17;&#xe35;&#xe40;&#xe1b;&#xe47;&#xe19;&#xe17;&#xe35;&#xe48;&#xe1e;&#xe36;&#xe48;&#xe07;           &#xe1a;&#xe49;&#xe32;&#xe19;&#xe40;&#xe21;&#xe37;&#xe2d;&#xe07;&#xe08;&#xe36;&#xe07;&#xe27;&#xe34;&#xe1b;&#xe23;&#xe34;&#xe15;&#xe40;&#xe1b;&#xe47;&#xe19;&#xe19;&#xe31;&#xe01;&#xe2b;&#xe19;&#xe32;
+  &#xe42;&#xe2e;&#xe08;&#xe34;&#xe4b;&#xe19;&#xe40;&#xe23;&#xe35;&#xe22;&#xe01;&#xe17;&#xe31;&#xe1e;&#xe17;&#xe31;&#xe48;&#xe27;&#xe2b;&#xe31;&#xe27;&#xe40;&#xe21;&#xe37;&#xe2d;&#xe07;&#xe21;&#xe32;         &#xe2b;&#xe21;&#xe32;&#xe22;&#xe08;&#xe30;&#xe06;&#xe48;&#xe32;&#xe21;&#xe14;&#xe0a;&#xe31;&#xe48;&#xe27;&#xe15;&#xe31;&#xe27;&#xe2a;&#xe33;&#xe04;&#xe31;&#xe0d;
+    &#xe40;&#xe2b;&#xe21;&#xe37;&#xe2d;&#xe19;&#xe02;&#xe31;&#xe1a;&#xe44;&#xe2a;&#xe44;&#xe25;&#xe48;&#xe40;&#xe2a;&#xe37;&#xe2d;&#xe08;&#xe32;&#xe01;&#xe40;&#xe04;&#xe2b;&#xe32;      &#xe23;&#xe31;&#xe1a;&#xe2b;&#xe21;&#xe32;&#xe1b;&#xe48;&#xe32;&#xe40;&#xe02;&#xe49;&#xe32;&#xe21;&#xe32;&#xe40;&#xe25;&#xe22;&#xe2d;&#xe32;&#xe2a;&#xe31;&#xe0d;
+  &#xe1d;&#xe48;&#xe32;&#xe22;&#xe2d;&#xe49;&#xe2d;&#xe07;&#xe2d;&#xe38;&#xe49;&#xe19;&#xe22;&#xe38;&#xe41;&#xe22;&#xe01;&#xe43;&#xe2b;&#xe49;&#xe41;&#xe15;&#xe01;&#xe01;&#xe31;&#xe19;          &#xe43;&#xe0a;&#xe49;&#xe2a;&#xe32;&#xe27;&#xe19;&#xe31;&#xe49;&#xe19;&#xe40;&#xe1b;&#xe47;&#xe19;&#xe0a;&#xe19;&#xe27;&#xe19;&#xe0a;&#xe37;&#xe48;&#xe19;&#xe0a;&#xe27;&#xe19;&#xe43;&#xe08;
+    &#xe1e;&#xe25;&#xe31;&#xe19;&#xe25;&#xe34;&#xe09;&#xe38;&#xe22;&#xe01;&#xe38;&#xe22;&#xe01;&#xe35;&#xe01;&#xe25;&#xe31;&#xe1a;&#xe01;&#xe48;&#xe2d;&#xe40;&#xe2b;&#xe15;&#xe38;          &#xe0a;&#xe48;&#xe32;&#xe07;&#xe2d;&#xe32;&#xe40;&#xe1e;&#xe28;&#xe08;&#xe23;&#xe34;&#xe07;&#xe2b;&#xe19;&#xe32;&#xe1f;&#xe49;&#xe32;&#xe23;&#xe49;&#xe2d;&#xe07;&#xe44;&#xe2b;&#xe49;
+  &#xe15;&#xe49;&#xe2d;&#xe07;&#xe23;&#xe1a;&#xe23;&#xe32;&#xe06;&#xe48;&#xe32;&#xe1f;&#xe31;&#xe19;&#xe08;&#xe19;&#xe1a;&#xe23;&#xe23;&#xe25;&#xe31;&#xe22;           &#xe24;&#xe45;&#xe2b;&#xe32;&#xe43;&#xe04;&#xe23;&#xe04;&#xe49;&#xe33;&#xe0a;&#xe39;&#xe01;&#xe39;&#xe49;&#xe1a;&#xe23;&#xe23;&#xe25;&#xe31;&#xe07;&#xe01;&#xe4c; &#xe2f;
+
+  (The above is a two-column text. If combining characters are handled
+  correctly, the lines of the second column should be aligned with the
+  | character above.)
+
+Ethiopian:
+
+  Proverbs in the Amharic language:
+
+  &#x1230;&#x121b;&#x12ed; &#x12a0;&#x12ed;&#x1273;&#x1228;&#x1235; &#x1295;&#x1309;&#x1225; &#x12a0;&#x12ed;&#x12a8;&#x1230;&#x1235;&#x1362;
+  &#x1265;&#x120b; &#x12ab;&#x1208;&#x129d; &#x12a5;&#x1295;&#x12f0;&#x12a0;&#x1263;&#x1274; &#x1260;&#x1246;&#x1218;&#x1320;&#x129d;&#x1362;
+  &#x130c;&#x1325; &#x12eb;&#x1208;&#x1264;&#x1271; &#x1241;&#x121d;&#x1325;&#x1293; &#x1290;&#x12cd;&#x1362;
+  &#x12f0;&#x1200; &#x1260;&#x1215;&#x120d;&#x1219; &#x1245;&#x1264; &#x1263;&#x12ed;&#x1320;&#x1323; &#x1295;&#x1323;&#x1275; &#x1260;&#x1308;&#x12f0;&#x1208;&#x12cd;&#x1362;
+  &#x12e8;&#x12a0;&#x134d; &#x12c8;&#x1208;&#x121d;&#x1273; &#x1260;&#x1245;&#x1264; &#x12a0;&#x12ed;&#x1273;&#x123d;&#x121d;&#x1362;
+  &#x12a0;&#x12ed;&#x1325; &#x1260;&#x1260;&#x120b; &#x12f3;&#x12cb; &#x1270;&#x1218;&#x1273;&#x1362;
+  &#x1232;&#x1270;&#x1228;&#x1309;&#x1219; &#x12ed;&#x12f0;&#x1228;&#x130d;&#x1219;&#x1362;
+  &#x1240;&#x1235; &#x1260;&#x1240;&#x1235;&#x1365; &#x12d5;&#x1295;&#x1241;&#x120b;&#x120d; &#x1260;&#x12a5;&#x130d;&#x1229; &#x12ed;&#x1204;&#x12f3;&#x120d;&#x1362;
+  &#x12f5;&#x122d; &#x1262;&#x12eb;&#x1265;&#x122d; &#x12a0;&#x1295;&#x1260;&#x1233; &#x12eb;&#x1235;&#x122d;&#x1362;
+  &#x1230;&#x12cd; &#x12a5;&#x1295;&#x12f0;&#x1264;&#x1271; &#x12a5;&#x1295;&#x1305; &#x12a5;&#x1295;&#x12f0; &#x1309;&#x1228;&#x1264;&#x1271; &#x12a0;&#x12ed;&#x1270;&#x12f3;&#x12f0;&#x122d;&#x121d;&#x1362;
+  &#x12a5;&#x130d;&#x12dc;&#x122d; &#x12e8;&#x12a8;&#x1348;&#x1270;&#x12cd;&#x1295; &#x1309;&#x122e;&#x122e; &#x1233;&#x12ed;&#x12d8;&#x130b;&#x12cd; &#x12a0;&#x12ed;&#x12f5;&#x122d;&#x121d;&#x1362;
+  &#x12e8;&#x130e;&#x1228;&#x1264;&#x1275; &#x120c;&#x1263;&#x1365; &#x1262;&#x12eb;&#x12e9;&#x1275; &#x12ed;&#x1235;&#x1245; &#x1263;&#x12eb;&#x12e9;&#x1275; &#x12eb;&#x1320;&#x120d;&#x1245;&#x1362;
+  &#x1225;&#x122b; &#x12a8;&#x1218;&#x134d;&#x1273;&#x1275; &#x120d;&#x1304;&#x1295; &#x120b;&#x134b;&#x1273;&#x1275;&#x1362;
+  &#x12d3;&#x1263;&#x12ed; &#x121b;&#x12f0;&#x122a;&#x12eb; &#x12e8;&#x1208;&#x12cd;&#x1365; &#x130d;&#x1295;&#x12f5; &#x12ed;&#x12de; &#x12ed;&#x12de;&#x122b;&#x120d;&#x1362;
+  &#x12e8;&#x12a5;&#x1235;&#x120b;&#x121d; &#x12a0;&#x1308;&#x1229; &#x1218;&#x12ab; &#x12e8;&#x12a0;&#x121e;&#x122b; &#x12a0;&#x1308;&#x1229; &#x12cb;&#x122d;&#x12ab;&#x1362;
+  &#x1270;&#x1295;&#x130b;&#x120e; &#x1262;&#x1270;&#x1349; &#x1270;&#x1218;&#x120d;&#x1236; &#x1263;&#x1349;&#x1362;
+  &#x12c8;&#x12f3;&#x1305;&#x1205; &#x121b;&#x122d; &#x1262;&#x1206;&#x1295; &#x1328;&#x122d;&#x1235;&#x1205; &#x12a0;&#x1275;&#x120b;&#x1230;&#x12cd;&#x1362;
+  &#x12a5;&#x130d;&#x122d;&#x1205;&#x1295; &#x1260;&#x134d;&#x122b;&#x123d;&#x1205; &#x120d;&#x12ad; &#x12d8;&#x122d;&#x130b;&#x1362;
+
+Runes:
+
+  &#x16bb;&#x16d6; &#x16b3;&#x16b9;&#x16ab;&#x16a6; &#x16a6;&#x16ab;&#x16cf; &#x16bb;&#x16d6; &#x16d2;&#x16a2;&#x16de;&#x16d6; &#x16a9;&#x16be; &#x16a6;&#x16ab;&#x16d7; &#x16da;&#x16aa;&#x16be;&#x16de;&#x16d6; &#x16be;&#x16a9;&#x16b1;&#x16a6;&#x16b9;&#x16d6;&#x16aa;&#x16b1;&#x16de;&#x16a2;&#x16d7; &#x16b9;&#x16c1;&#x16a6; &#x16a6;&#x16aa; &#x16b9;&#x16d6;&#x16e5;&#x16ab;
+
+  (Old English, which transcribed into Latin reads 'He cwaeth that he
+  bude thaem lande northweardum with tha Westsae.' and means 'He said
+  that he lived in the northern land near the Western Sea.')
+
+Braille:
+
+  &#x284c;&#x2801;&#x2827;&#x2811; &#x283c;&#x2801;&#x2812;  &#x284d;&#x281c;&#x2807;&#x2811;&#x2839;&#x2830;&#x280e; &#x2863;&#x2815;&#x280c;
+
+  &#x284d;&#x281c;&#x2807;&#x2811;&#x2839; &#x283a;&#x2801;&#x280e; &#x2819;&#x2811;&#x2801;&#x2819;&#x2812; &#x281e;&#x2815; &#x2803;&#x2811;&#x281b;&#x2814; &#x283a;&#x280a;&#x2839;&#x2832; &#x2879;&#x283b;&#x2811; &#x280a;&#x280e; &#x281d;&#x2815; &#x2819;&#x2833;&#x2803;&#x281e;
+  &#x2831;&#x2801;&#x281e;&#x2811;&#x2827;&#x283b; &#x2801;&#x2803;&#x2833;&#x281e; &#x2839;&#x2801;&#x281e;&#x2832; &#x2879;&#x2811; &#x2817;&#x2811;&#x281b;&#x280a;&#x280c;&#x283b; &#x2815;&#x280b; &#x2819;&#x280a;&#x280e; &#x2803;&#x2825;&#x2817;&#x280a;&#x2801;&#x2807; &#x283a;&#x2801;&#x280e;
+  &#x280e;&#x280a;&#x281b;&#x281d;&#x282b; &#x2803;&#x2839; &#x2839;&#x2811; &#x280a;&#x2807;&#x283b;&#x281b;&#x2839;&#x280d;&#x2801;&#x281d;&#x2802; &#x2839;&#x2811; &#x280a;&#x2807;&#x283b;&#x2805;&#x2802; &#x2839;&#x2811; &#x2825;&#x281d;&#x2819;&#x283b;&#x281e;&#x2801;&#x2805;&#x283b;&#x2802;
+  &#x2801;&#x281d;&#x2819; &#x2839;&#x2811; &#x2821;&#x280a;&#x2811;&#x280b; &#x280d;&#x2833;&#x2817;&#x281d;&#x283b;&#x2832; &#x284e;&#x280a;&#x2817;&#x2815;&#x2815;&#x281b;&#x2811; &#x280e;&#x280a;&#x281b;&#x281d;&#x282b; &#x280a;&#x281e;&#x2832; &#x2841;&#x281d;&#x2819;
+  &#x284e;&#x280a;&#x2817;&#x2815;&#x2815;&#x281b;&#x2811;&#x2830;&#x280e; &#x281d;&#x2801;&#x280d;&#x2811; &#x283a;&#x2801;&#x280e; &#x281b;&#x2815;&#x2815;&#x2819; &#x2825;&#x280f;&#x2815;&#x281d; &#x2830;&#x2861;&#x2801;&#x281d;&#x281b;&#x2811;&#x2802; &#x280b;&#x2815;&#x2817; &#x2801;&#x281d;&#x2839;&#x2839;&#x2814;&#x281b; &#x2819;&#x2811; 
+  &#x2821;&#x2815;&#x280e;&#x2811; &#x281e;&#x2815; &#x280f;&#x2825;&#x281e; &#x2819;&#x280a;&#x280e; &#x2819;&#x2801;&#x281d;&#x2819; &#x281e;&#x2815;&#x2832;
+
+  &#x2855;&#x2807;&#x2819; &#x284d;&#x281c;&#x2807;&#x2811;&#x2839; &#x283a;&#x2801;&#x280e; &#x2801;&#x280e; &#x2819;&#x2811;&#x2801;&#x2819; &#x2801;&#x280e; &#x2801; &#x2819;&#x2815;&#x2815;&#x2817;&#x2824;&#x281d;&#x2801;&#x280a;&#x2807;&#x2832;
+
+  &#x284d;&#x2814;&#x2819;&#x2816; &#x284a; &#x2819;&#x2815;&#x281d;&#x2830;&#x281e; &#x280d;&#x2811;&#x2801;&#x281d; &#x281e;&#x2815; &#x280e;&#x2801;&#x2839; &#x2839;&#x2801;&#x281e; &#x284a; &#x2805;&#x281d;&#x282a;&#x2802; &#x2815;&#x280b; &#x280d;&#x2839;
+  &#x282a;&#x281d; &#x2805;&#x281d;&#x282a;&#x2807;&#x282b;&#x281b;&#x2811;&#x2802; &#x2831;&#x2801;&#x281e; &#x2839;&#x283b;&#x2811; &#x280a;&#x280e; &#x280f;&#x281c;&#x281e;&#x280a;&#x280a;&#x2825;&#x2807;&#x281c;&#x2807;&#x2839; &#x2819;&#x2811;&#x2801;&#x2819; &#x2801;&#x2803;&#x2833;&#x281e;
+  &#x2801; &#x2819;&#x2815;&#x2815;&#x2817;&#x2824;&#x281d;&#x2801;&#x280a;&#x2807;&#x2832; &#x284a; &#x280d;&#x280a;&#x2823;&#x281e; &#x2819;&#x2801;&#x2827;&#x2811; &#x2803;&#x2811;&#x2832; &#x2814;&#x280a;&#x2807;&#x2814;&#x282b;&#x2802; &#x280d;&#x2839;&#x280e;&#x2811;&#x2807;&#x280b;&#x2802; &#x281e;&#x2815;
+  &#x2817;&#x2811;&#x281b;&#x281c;&#x2819; &#x2801; &#x280a;&#x2815;&#x280b;&#x280b;&#x2814;&#x2824;&#x281d;&#x2801;&#x280a;&#x2807; &#x2801;&#x280e; &#x2839;&#x2811; &#x2819;&#x2811;&#x2801;&#x2819;&#x2811;&#x280c; &#x280f;&#x280a;&#x2811;&#x280a;&#x2811; &#x2815;&#x280b; &#x280a;&#x2817;&#x2815;&#x281d;&#x280d;&#x2815;&#x281d;&#x281b;&#x283b;&#x2839; 
+  &#x2814; &#x2839;&#x2811; &#x281e;&#x2817;&#x2801;&#x2819;&#x2811;&#x2832; &#x2843;&#x2825;&#x281e; &#x2839;&#x2811; &#x283a;&#x280a;&#x280e;&#x2819;&#x2815;&#x280d; &#x2815;&#x280b; &#x2833;&#x2817; &#x2801;&#x281d;&#x280a;&#x2811;&#x280c;&#x2815;&#x2817;&#x280e; 
+  &#x280a;&#x280e; &#x2814; &#x2839;&#x2811; &#x280e;&#x280a;&#x280d;&#x280a;&#x2807;&#x2811;&#x2806; &#x2801;&#x281d;&#x2819; &#x280d;&#x2839; &#x2825;&#x281d;&#x2819;&#x2801;&#x2807;&#x2807;&#x282a;&#x282b; &#x2819;&#x2801;&#x281d;&#x2819;&#x280e;
+  &#x2829;&#x2801;&#x2807;&#x2807; &#x281d;&#x2815;&#x281e; &#x2819;&#x280a;&#x280c;&#x2825;&#x2817;&#x2803; &#x280a;&#x281e;&#x2802; &#x2815;&#x2817; &#x2839;&#x2811; &#x284a;&#x2833;&#x281d;&#x281e;&#x2817;&#x2839;&#x2830;&#x280e; &#x2819;&#x2815;&#x281d;&#x2811; &#x280b;&#x2815;&#x2817;&#x2832; &#x2879;&#x2833;
+  &#x283a;&#x280a;&#x2807;&#x2807; &#x2839;&#x283b;&#x2811;&#x280b;&#x2815;&#x2817;&#x2811; &#x280f;&#x283b;&#x280d;&#x280a;&#x281e; &#x280d;&#x2811; &#x281e;&#x2815; &#x2817;&#x2811;&#x280f;&#x2811;&#x2801;&#x281e;&#x2802; &#x2811;&#x280d;&#x280f;&#x2819;&#x2801;&#x281e;&#x280a;&#x280a;&#x2801;&#x2807;&#x2807;&#x2839;&#x2802; &#x2839;&#x2801;&#x281e;
+  &#x284d;&#x281c;&#x2807;&#x2811;&#x2839; &#x283a;&#x2801;&#x280e; &#x2801;&#x280e; &#x2819;&#x2811;&#x2801;&#x2819; &#x2801;&#x280e; &#x2801; &#x2819;&#x2815;&#x2815;&#x2817;&#x2824;&#x281d;&#x2801;&#x280a;&#x2807;&#x2832;
+
+  (The first couple of paragraphs of "A Christmas Carol" by Dickens)
+
+Compact font selection example text:
+
+  ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789
+  abcdefghijklmnopqrstuvwxyz &#xa3;&#xa9;&#xb5;&#xc0;&#xc6;&#xd6;&#xde;&#xdf;&#xe9;&#xf6;&#xff;
+  &#x2013;&#x2014;&#x2018;&#x201c;&#x201d;&#x201e;&#x2020;&#x2022;&#x2026;&#x2030;&#x2122;&#x153;&#x160;&#x178;&#x17e;&#x20ac; &#x391;&#x392;&#x393;&#x394;&#x3a9;&#x3b1;&#x3b2;&#x3b3;&#x3b4;&#x3c9; &#x410;&#x411;&#x412;&#x413;&#x414;&#x430;&#x431;&#x432;&#x433;&#x434;
+  &#x2200;&#x2202;&#x2208;&#x211d;&#x2227;&#x222a;&#x2261;&#x221e; &#x2191;&#x2197;&#x21a8;&#x21bb;&#x21e3; &#x2510;&#x253c;&#x2554;&#x2558;&#x2591;&#x25ba;&#x263a;&#x2640; &#xfb01;&#xfffd;&#x2440;&#x2082;&#x1f20;&#x1e02;&#x4e5;&#x1e84;&#x250;&#x2d0;&#x234e;&#x5d0;&#x531;&#x10d0;
+
+Greetings in various languages:
+
+  Hello world, &#x39a;&#x3b1;&#x3bb;&#x3b7;&#x3bc;&#x1f73;&#x3c1;&#x3b1; &#x3ba;&#x1f79;&#x3c3;&#x3bc;&#x3b5;, &#x30b3;&#x30f3;&#x30cb;&#x30c1;&#x30cf;
+
+Box drawing alignment tests:                                          &#x2588;
+                                                                      &#x2589;
+  &#x2554;&#x2550;&#x2550;&#x2566;&#x2550;&#x2550;&#x2557;  &#x250c;&#x2500;&#x2500;&#x252c;&#x2500;&#x2500;&#x2510;  &#x256d;&#x2500;&#x2500;&#x252c;&#x2500;&#x2500;&#x256e;  &#x256d;&#x2500;&#x2500;&#x252c;&#x2500;&#x2500;&#x256e;  &#x250f;&#x2501;&#x2501;&#x2533;&#x2501;&#x2501;&#x2513;  &#x250e;&#x2512;&#x250f;&#x2511;   &#x2577;  &#x257b; &#x250f;&#x252f;&#x2513; &#x250c;&#x2530;&#x2510;    &#x258a; &#x2571;&#x2572;&#x2571;&#x2572;&#x2573;&#x2573;&#x2573;
+  &#x2551;&#x250c;&#x2500;&#x2568;&#x2500;&#x2510;&#x2551;  &#x2502;&#x2554;&#x2550;&#x2567;&#x2550;&#x2557;&#x2502;  &#x2502;&#x2552;&#x2550;&#x256a;&#x2550;&#x2555;&#x2502;  &#x2502;&#x2553;&#x2500;&#x2541;&#x2500;&#x2556;&#x2502;  &#x2503;&#x250c;&#x2500;&#x2542;&#x2500;&#x2510;&#x2503;  &#x2517;&#x2543;&#x2544;&#x2519;  &#x2576;&#x253c;&#x2574;&#x257a;&#x254b;&#x2578;&#x2520;&#x253c;&#x2528; &#x251d;&#x254b;&#x2525;    &#x258b; &#x2572;&#x2571;&#x2572;&#x2571;&#x2573;&#x2573;&#x2573;
+  &#x2551;&#x2502;&#x2572; &#x2571;&#x2502;&#x2551;  &#x2502;&#x2551;   &#x2551;&#x2502;  &#x2502;&#x2502; &#x2502; &#x2502;&#x2502;  &#x2502;&#x2551; &#x2503; &#x2551;&#x2502;  &#x2503;&#x2502; &#x257f; &#x2502;&#x2503;  &#x250d;&#x2545;&#x2546;&#x2513;   &#x2575;  &#x2579; &#x2517;&#x2537;&#x251b; &#x2514;&#x2538;&#x2518;    &#x258c; &#x2571;&#x2572;&#x2571;&#x2572;&#x2573;&#x2573;&#x2573;
+  &#x2560;&#x2561; &#x2573; &#x255e;&#x2563;  &#x251c;&#x2562;   &#x255f;&#x2524;  &#x251c;&#x253c;&#x2500;&#x253c;&#x2500;&#x253c;&#x2524;  &#x251c;&#x256b;&#x2500;&#x2542;&#x2500;&#x256b;&#x2524;  &#x2523;&#x253f;&#x257e;&#x253c;&#x257c;&#x253f;&#x252b;  &#x2515;&#x251b;&#x2516;&#x251a;     &#x250c;&#x2504;&#x2504;&#x2510; &#x254e; &#x250f;&#x2505;&#x2505;&#x2513; &#x250b; &#x258d; &#x2572;&#x2571;&#x2572;&#x2571;&#x2573;&#x2573;&#x2573;
+  &#x2551;&#x2502;&#x2571; &#x2572;&#x2502;&#x2551;  &#x2502;&#x2551;   &#x2551;&#x2502;  &#x2502;&#x2502; &#x2502; &#x2502;&#x2502;  &#x2502;&#x2551; &#x2503; &#x2551;&#x2502;  &#x2503;&#x2502; &#x257d; &#x2502;&#x2503;  &#x2591;&#x2591;&#x2592;&#x2592;&#x2593;&#x2593;&#x2588;&#x2588; &#x250a;  &#x2506; &#x254e; &#x254f;  &#x2507; &#x250b; &#x258e;
+  &#x2551;&#x2514;&#x2500;&#x2565;&#x2500;&#x2518;&#x2551;  &#x2502;&#x255a;&#x2550;&#x2564;&#x2550;&#x255d;&#x2502;  &#x2502;&#x2558;&#x2550;&#x256a;&#x2550;&#x255b;&#x2502;  &#x2502;&#x2559;&#x2500;&#x2540;&#x2500;&#x255c;&#x2502;  &#x2503;&#x2514;&#x2500;&#x2542;&#x2500;&#x2518;&#x2503;  &#x2591;&#x2591;&#x2592;&#x2592;&#x2593;&#x2593;&#x2588;&#x2588; &#x250a;  &#x2506; &#x254e; &#x254f;  &#x2507; &#x250b; &#x258f;
+  &#x255a;&#x2550;&#x2550;&#x2569;&#x2550;&#x2550;&#x255d;  &#x2514;&#x2500;&#x2500;&#x2534;&#x2500;&#x2500;&#x2518;  &#x2570;&#x2500;&#x2500;&#x2534;&#x2500;&#x2500;&#x256f;  &#x2570;&#x2500;&#x2500;&#x2534;&#x2500;&#x2500;&#x256f;  &#x2517;&#x2501;&#x2501;&#x253b;&#x2501;&#x2501;&#x251b;           &#x2514;&#x254c;&#x254c;&#x2518; &#x254e; &#x2517;&#x254d;&#x254d;&#x251b; &#x250b;  &#x2581;&#x2582;&#x2583;&#x2584;&#x2585;&#x2586;&#x2587;&#x2588;
+
+</pre>
+</BODY>
+</HTML>