/*
* $LynxId: dtd_util.c,v 1.78 2015/12/13 23:22:17 tom Exp $
*
* Given a SGML_dtd structure, write a corresponding flat file, or "C" source.
* Given the flat-file, write the "C" source.
*
* TODO: use symbols for HTMLA_NORMAL, etc.
*/
#include <HTUtils.h>
#include <HTMLDTD.h>
#include <string.h>
/*
* Tweaks to build standalone.
*/
#undef exit
BOOLEAN WWW_TraceFlag = FALSE;
FILE *TraceFP(void)
{
return stderr;
}
/*
* Begin the actual utility.
*/
#define GETOPT "chl:o:ts"
#define NOTE(message) fprintf(output, message "\n");
/* *INDENT-OFF* */
#ifdef USE_PRETTYSRC
# define N HTMLA_NORMAL
# define i HTMLA_ANAME
# define h HTMLA_HREF
# define c HTMLA_CLASS
# define x HTMLA_AUXCLASS
# define T(t) , t
#else
# define T(t) /*nothing */
#endif
#define ATTR_TYPE(name) { #name, name##_attr_list }
static const attr core_attr_list[] = {
{ "CLASS" T(c) },
{ "ID" T(i) },
{ "STYLE" T(N) },
{ "TITLE" T(N) },
{ 0 T(N) } /* Terminate list */
};
static const attr i18n_attr_list[] = {
{ "DIR" T(N) },
{ "LANG" T(N) },
{ 0 T(N) } /* Terminate list */
};
static const attr events_attr_list[] = {
{ "ONCLICK" T(N) },
{ "ONDBLCLICK" T(N) },
{ "ONKEYDOWN" T(N) },
{ "ONKEYPRESS" T(N) },
{ "ONKEYUP" T(N) },
{ "ONMOUSEDOWN" T(N) },
{ "ONMOUSEMOVE" T(N) },
{ "ONMOUSEOUT" T(N) },
{ "ONMOUSEOVER" T(N) },
{ "ONMOUSEUP" T(N) },
{ 0 T(N) } /* Terminate list */
};
static const attr align_attr_list[] = {
{ "ALIGN" T(N) },
{ 0 T(N) } /* Terminate list */
};
static const attr cellalign_attr_list[] = {
{ "ALIGN" T(N) },
{ "CHAR" T(N) },
{ "CHAROFF" T(N) },
{ "VALIGN" T(N) },
{ 0 T(N) } /* Terminate list */
};
static const attr bgcolor_attr_list[] = {
{ "BGCOLOR" T(N) },
{ 0 T(N) } /* Terminate list */
};
#undef T
/* *INDENT-ON* */
static void failed(const char *s)
{
perror(s);
exit(EXIT_FAILURE);
}
static void usage(void)
{
static const char *tbl[] =
{
"Usage: dtd_util [options]",
"",
"Options:",
" -c generate C-source"
" -h generate C-header"
" -l load",
" -o filename specify output (default: stdout)",
" -s strict (HTML DTD 0)",
" -t tagsoup (HTML DTD 1)",
};
unsigned n;
for (n = 0; n < TABLESIZE(tbl); ++n) {
fprintf(stderr, "%s\n", tbl[n]);
}
exit(EXIT_FAILURE);
}
static const char *SGMLContent2s(SGMLContent contents)
{
char *value = "?";
switch (contents) {
case SGML_EMPTY:
value = "SGML_EMPTY";
break;
case SGML_LITTERAL:
value = "SGML_LITTERAL";
break;
case SGML_CDATA:
value = "SGML_CDATA";
break;
case SGML_SCRIPT:
value = "SGML_SCRIPT";
break;
case SGML_RCDATA:
value = "SGML_RCDATA";
break;
case SGML_MIXED:
value = "SGML_MIXED";
break;
case SGML_ELEMENT:
value = "SGML_ELEMENT";
break;
case SGML_PCDATA:
value = "SGML_PCDATA";
break;
}
return value;
}
static SGMLContent s2SGMLContent(const char *value)
{
static SGMLContent table[] =
{
SGML_EMPTY,
SGML_LITTERAL,
SGML_CDATA,
SGML_SCRIPT,
SGML_RCDATA,
SGML_MIXED,
SGML_ELEMENT,
SGML_PCDATA
};
unsigned n;
SGMLContent result = SGML_EMPTY;
for (n = 0; n < TABLESIZE(table); ++n) {
if (!strcmp(SGMLContent2s(table[n]), value)) {
result = table[n];
break;
}
}
return result;
}
static void PrintF(FILE *, int, const char *,...) GCC_PRINTFLIKE(3, 4);
static void PrintF(FILE *output, int width, const char *fmt,...)
{
char buffer[BUFSIZ];
va_list ap;
va_start(ap, fmt);
vsprintf(buffer, fmt, ap);
va_end(ap);
fprintf(output, "%-*s", width, buffer);
}
static int same_AttrList(AttrList a, AttrList b)
{
int result = 1;
if (a && b) {
while (a->name && b->name) {
if (strcmp(a->name, b->name)) {
result = 0;
break;
}
++a, ++b;
}
if (a->name || b->name)
result = 0;
} else {
result = 0;
}
return result;
}
static int first_attrs(const SGML_dtd * dtd, int which)
{
int check;
int result = TRUE;
for (check = 0; check < which; ++check) {
if (dtd->tags[check].attributes == dtd->tags[which].attributes) {
result = FALSE;
break;
} else if (same_AttrList(dtd->tags[check].attributes,
dtd->tags[which].attributes)) {
result = FALSE;
dtd->tags[which].attributes = dtd->tags[check].attributes;
break;
}
}
return result;
}
static char *no_dashes(char *target, const char *source)
{
int j;
for (j = 0; (target[j] = source[j]) != '\0'; ++j) {
if (!isalnum(target[j]))
target[j] = '_';
}
return target;
}
/* the second "OBJECT" is treated specially */
static int first_object(const SGML_dtd * dtd, int which)
{
int check;
for (check = 0; check <= which; ++check) {
if (!strcmp(dtd->tags[check].name, "OBJECT"))
break;
}
return (check == which);
}
static const char *NameOfAttrs(const SGML_dtd * dtd, int which)
{
int check;
const char *result = dtd->tags[which].name;
for (check = 0; check < which; ++check) {
if (dtd->tags[check].attributes == dtd->tags[which].attributes) {
result = dtd->tags[check].name;
break;
}
}
/* special cases to match existing headers */
if (!strcmp(result, "ABBR"))
result = "GEN";
else if (!strcmp(result, "ARTICLE"))
result = "GEN5";
else if (!strcmp(result, "BLOCKQUOTE"))
result = "BQ";
else if (!strcmp(result, "BASEFONT"))
result = "FONT";
else if (!strcmp(result, "CENTER"))
result = "DIV";
else if (!strcmp(result, "DIR"))
result = "UL";
else if (!strcmp(result, "H1"))
result = "H";
else if (!strcmp(result, "TBODY"))
result = "TR";
return result;
}
static const char *DEF_name(const SGML_dtd * dtd, int which)
{
const char *result = dtd->tags[which].name;
if (!strcmp(result, "OBJECT") && !first_object(dtd, which))
result = "OBJECT_PCDATA";
return result;
}
typedef struct {
const char *name;
const attr *attrs;
int count;
int which;
} AttrInfo;
static int compare_attr_types(const void *a, const void *b)
{
const AttrType *p = (const AttrType *) a;
const AttrType *q = (const AttrType *) b;
int result = 0;
/* keep lowercase AttrType lists before uppercase, since latter are derived */
if (isupper(p->name[0]) ^ isupper(q->name[0])) {
if (isupper(p->name[0])) {
result = 1;
} else {
result = -1;
}
} else {
result = strcmp(p->name, q->name);
}
return result;
}
static int len_AttrTypes(const AttrType * data)
{
int result = 0;
for (result = 0; data[result].name != 0; ++result) {
;
}
return result;
}
static AttrType *sorted_AttrTypes(const AttrType * source)
{
AttrType *result = 0;
unsigned number = len_AttrTypes(source);
if (number != 0) {
result = typecallocn(AttrType, number + 1);
if (result != 0) {
MemCpy(result, source, number * sizeof(*result));
qsort(result, number, sizeof(*result), compare_attr_types);
}
}
return result;
}
static int compare_attr(const void *a, const void *b)
{
const AttrInfo *p = (const AttrInfo *) a;
const AttrInfo *q = (const AttrInfo *) b;
return strcmp(p->name, q->name);
}
static int len_AttrList(AttrList data)
{
int result = 0;
for (result = 0; data[result].name != 0; ++result) {
;
}
return result;
}
static void sort_uniq_AttrList(attr * data)
{
unsigned have = len_AttrList(data);
unsigned j, k;
qsort(data, have, sizeof(*data), compare_attr);
/*
* Eliminate duplicates
*/
for (j = 0; j < have; ++j) {
for (k = j; data[k].name; ++k) {
if (data[k + 1].name == 0)
break;
if (strcmp(data[j].name, data[k + 1].name)) {
break;
}
}
data[j] = data[k];
}
memset(data + j, 0, sizeof(data[0]));
}
static attr *copy_AttrList(AttrList data)
{
unsigned need = len_AttrList(data);
unsigned n;
attr *result = (attr *) calloc(need + 1, sizeof(attr));
for (n = 0; n < need; ++n)
result[n] = data[n];
sort_uniq_AttrList(result);
return result;
}
static attr *merge_AttrLists(const AttrType * data)
{
const AttrType *at;
attr *result = 0;
unsigned need = 1;
unsigned have = 0;
unsigned j;
for (at = data; at->name; ++at) {
need += len_AttrList(at->list);
}
result = (attr *) calloc(need + 1, sizeof(attr));
for (at = data; at->name; ++at) {
if (!strcmp(at->name, "events")) {
; /* lynx does not use events */
} else {
for (j = 0; at->list[j].name; ++j) {
result[have++] = at->list[j];
}
}
}
sort_uniq_AttrList(result);
return result;
}
static int clean_AttrList(attr * target, AttrList source)
{
int result = 0;
int j, k;
for (j = 0; target[j].name != 0; ++j) {
for (k = 0; source[k].name != 0; ++k) {
if (!strcmp(target[j].name, source[k].name)) {
k = j--;
for (;;) {
target[k] = target[k + 1];
if (target[k++].name == 0)
break;
}
++result;
break;
}
}
}
return result;
}
/*
* Actually COUNT the number of attributes, to make it possible to edit a
* attribute-table in src0_HTMLDTD.h and have all of the files updated by
* just doing a "make sources".
*/
static int AttrCount(HTTag * tag)
{
return len_AttrList(tag->attributes);
}
static AttrInfo *sorted_attrs(const SGML_dtd * dtd, unsigned *countp)
{
int j;
AttrInfo *data = (AttrInfo *) calloc(dtd->number_of_tags, sizeof(AttrInfo));
unsigned count = 0;
/* get the attribute-data */
for (j = 0; j < dtd->number_of_tags; ++j) {
if (first_attrs(dtd, j)) {
data[count].name = NameOfAttrs(dtd, j);
data[count].attrs = dtd->tags[j].attributes;
data[count].count = AttrCount(&(dtd->tags[j]));
data[count].which = j;
++count;
}
}
/* sort the data by the name of their associated tag */
qsort(data, count, sizeof(*data), compare_attr);
*countp = count;
return data;
}
static void dump_src_HTTag_Defines(FILE *output, const SGML_dtd * dtd, int which)
{
HTTag *tag = &(dtd->tags[which]);
#define myFMT "0x%05X"
fprintf(output,
"#define T_%-13s "
myFMT "," myFMT "," myFMT "," myFMT "," myFMT "," myFMT
"," myFMT "\n",
DEF_name(dtd, which),
tag->tagclass,
tag->contains,
tag->icontains,
tag->contained,
tag->icontained,
tag->canclose,
tag->flags);
}
static void dump_AttrItem(FILE *output, const attr * data)
{
char buffer[BUFSIZ];
char pretty = 'N';
sprintf(buffer, "\"%s\"", data->name);
#ifdef USE_PRETTYSRC
switch (data->type) {
case HTMLA_NORMAL:
pretty = 'N';
break;
case HTMLA_ANAME:
pretty = 'i';
break;
case HTMLA_HREF:
pretty = 'h';
break;
case HTMLA_CLASS:
pretty = 'c';
break;
case HTMLA_AUXCLASS:
pretty = 'x';
break;
}
#endif
fprintf(output, "\t{ %-15s T(%c) },\n", buffer, pretty);
}
static void dump_AttrItem0(FILE *output)
{
fprintf(output, "\t{ 0 T(N) }\t/* Terminate list */\n");
}
static void dump_src_AttrType(FILE *output, const char *name, AttrList data, const char **from)
{
int n;
fprintf(output, "static const attr %s_attr_list[] = {\n", name);
if (data != 0) {
for (n = 0; data[n].name != 0; ++n) {
dump_AttrItem(output, data + n);
}
}
fprintf(output, "\t{ 0 T(N) } /* Terminate list */\n");
fprintf(output, "};\n");
NOTE("");
fprintf(output, "static const AttrType %s_attr_type[] = {\n", name);
if (from != 0) {
while (*from != 0) {
fprintf(output, "\t{ ATTR_TYPE(%s) },\n", *from);
++from;
}
} else {
fprintf(output, "\t{ ATTR_TYPE(%s) },\n", name);
}
fprintf(output, "\t{ 0, 0 },\n");
fprintf(output, "};\n");
NOTE("");
}
static void dump_src_HTTag_Attrs(FILE *output, const SGML_dtd * dtd, int which)
{
HTTag *tag = &(dtd->tags[which]);
attr *list = merge_AttrLists(tag->attr_types);
char buffer[BUFSIZ];
int n;
int limit = len_AttrList(list);
sprintf(buffer, "static const attr %s_attr[] = {", NameOfAttrs(dtd, which));
fprintf(output,
"%-40s/* %s attributes */\n", buffer, tag->name);
for (n = 0; n < limit; ++n) {
dump_AttrItem(output, list + n);
}
dump_AttrItem0(output);
fprintf(output, "};\n");
NOTE("");
free(list);
}
static void dump_src_HTTag(FILE *output, const SGML_dtd * dtd, int which)
{
HTTag *tag = &(dtd->tags[which]);
char *P_macro = "P";
#ifdef USE_JUSTIFY_ELTS
if (!tag->can_justify)
P_macro = "P0";
#endif
PrintF(output, 19, " { %s(%s),", P_macro, tag->name);
PrintF(output, 24, "ATTR_DATA(%s), ", NameOfAttrs(dtd, which));
PrintF(output, 14, "%s,", SGMLContent2s(tag->contents));
fprintf(output, "T_%s", DEF_name(dtd, which));
fprintf(output, "},\n");
}
static void dump_source(FILE *output, const SGML_dtd * dtd, int dtd_version)
{
static AttrType generic_types[] =
{
ATTR_TYPE(core),
ATTR_TYPE(i18n),
ATTR_TYPE(events),
ATTR_TYPE(align),
ATTR_TYPE(cellalign),
ATTR_TYPE(bgcolor),
{0, 0}
};
AttrType *gt;
const char *marker = "src_HTMLDTD_H";
int j;
unsigned count = 0;
AttrInfo *data = sorted_attrs(dtd, &count);
fprintf(output, "/* %cLynxId%c */\n", '$', '$');
fprintf(output, "#ifndef %s%d\n", marker, dtd_version);
fprintf(output, "#define %s%d 1\n\n", marker, dtd_version);
/*
* If we ifdef this for once, and make the table names distinct, we can
* #include the strict- and tagsoup-output directly in HTMLDTD.c
*/
NOTE("#ifndef once_HTMLDTD");
NOTE("#define once_HTMLDTD 1");
NOTE("");
/* construct TagClass-define's */
for (j = 0; j <= dtd->number_of_tags; ++j) {
dump_src_HTTag_Defines(output, dtd, j);
}
NOTE("#define T__UNREC_ 0x00000,0x00000,0x00000,0x00000,0x00000,0x00000,0x00000");
/* construct attribute-tables */
NOTE("#ifdef USE_PRETTYSRC");
NOTE("# define N HTMLA_NORMAL");
NOTE("# define i HTMLA_ANAME");
NOTE("# define h HTMLA_HREF");
NOTE("# define c HTMLA_CLASS");
NOTE("# define x HTMLA_AUXCLASS");
NOTE("# define T(t) , t");
NOTE("#else");
NOTE("# define T(t) /*nothing */");
NOTE("#endif");
NOTE("/* *INDENT-OFF* */");
NOTE("");
NOTE("#define ATTR_TYPE(name) #name, name##_attr_list");
NOTE("");
NOTE("/* generic attributes, used in different tags */");
for (gt = generic_types; gt->name != 0; ++gt) {
dump_src_AttrType(output, gt->name, gt->list, 0);
}
NOTE("");
NOTE("/* tables defining attributes per-tag in terms of generic attributes (editable) */");
for (j = 0; j < (int) count; ++j) {
int which = data[j].which;
if (first_attrs(dtd, which)) {
HTTag *tag = &(dtd->tags[which]);
const AttrType *types = tag->attr_types;
const char *name = NameOfAttrs(dtd, which);
attr *list = 0;
const char *from_attr[10];
int from_size = 0;
while (types->name != 0) {
from_attr[from_size++] = types->name;
if (!strcmp(types->name, name)) {
list = copy_AttrList(types->list);
for (gt = generic_types; gt->name != 0; ++gt) {
if (clean_AttrList(list, gt->list)) {
int k;
int found = 0;
for (k = 0; k < from_size; ++k) {
if (!strcmp(from_attr[k], gt->name)) {
found = 1;
break;
}
}
if (!found)
from_attr[from_size++] = gt->name;
break;
}
}
}
++types;
}
from_attr[from_size] = 0;
if (list != 0) {
dump_src_AttrType(output, name, list, from_attr);
free(list);
}
}
}
NOTE("");
NOTE("/* attribute lists for the runtime (generated by dtd_util) */");
for (j = 0; j < (int) count; ++j) {
dump_src_HTTag_Attrs(output, dtd, data[j].which);
}
NOTE("/* *INDENT-ON* */");
NOTE("");
NOTE("/* justification-flags */");
NOTE("#undef N");
NOTE("#undef i");
NOTE("#undef h");
NOTE("#undef c");
NOTE("#undef x");
NOTE("");
NOTE("#undef T");
NOTE("");
NOTE("/* tag-names */");
for (j = 0; j <= dtd->number_of_tags; ++j) {
fprintf(output, "#undef %s\n", DEF_name(dtd, j));
}
NOTE("");
NOTE("/* these definitions are used in the tags-tables */");
NOTE("#undef P");
NOTE("#undef P_");
NOTE("#ifdef USE_COLOR_STYLE");
NOTE("#define P_(x) #x, (sizeof #x) -1");
NOTE("#define NULL_HTTag_ NULL, 0");
NOTE("#else");
NOTE("#define P_(x) #x");
NOTE("#define NULL_HTTag_ NULL");
NOTE("#endif");
NOTE("");
NOTE("#ifdef USE_JUSTIFY_ELTS");
NOTE("#define P(x) P_(x), 1");
NOTE("#define P0(x) P_(x), 0");
NOTE("#define NULL_HTTag NULL_HTTag_,0");
NOTE("#else");
NOTE("#define P(x) P_(x)");
NOTE("#define P0(x) P_(x)");
NOTE("#define NULL_HTTag NULL_HTTag_");
NOTE("#endif");
NOTE("");
NOTE("#define ATTR_DATA(name) name##_attr, HTML_##name##_ATTRIBUTES, name##_attr_type");
NOTE("");
NOTE("#endif /* once_HTMLDTD */");
NOTE("/* *INDENT-OFF* */");
/* construct the tags table */
fprintf(output,
"static const HTTag tags_table%d[HTML_ALL_ELEMENTS] = {\n",
dtd_version);
for (j = 0; j <= dtd->number_of_tags; ++j) {
if (j == dtd->number_of_tags) {
NOTE("/* additional (alternative variants), not counted in HTML_ELEMENTS: */");
NOTE("/* This one will be used as a temporary substitute within the parser when");
NOTE(" it has been signalled to parse OBJECT content as MIXED. - kw */");
}
dump_src_HTTag(output, dtd, j);
}
fprintf(output, "};\n");
NOTE("/* *INDENT-ON* */");
NOTE("");
fprintf(output, "#endif /* %s%d */\n", marker, dtd_version);
free(data);
}
static void dump_hdr_attr(FILE *output, AttrInfo * data)
{
int j;
char buffer[BUFSIZ];
for (j = 0; j < data->count; ++j) {
PrintF(output, 33, "#define HTML_%s_%s",
data->name,
no_dashes(buffer, data->attrs[j].name));
fprintf(output, "%2d\n", j);
}
PrintF(output, 33, "#define HTML_%s_ATTRIBUTES", data->name);
fprintf(output, "%2d\n", data->count);
fprintf(output, "\n");
}
static void dump_header(FILE *output, const SGML_dtd * dtd)
{
const char *marker = "hdr_HTMLDTD_H";
int j;
unsigned count = 0;
AttrInfo *data = sorted_attrs(dtd, &count);
fprintf(output, "/* %cLynxId%c */\n", '$', '$');
fprintf(output, "#ifndef %s\n", marker);
fprintf(output, "#define %s 1\n\n", marker);
NOTE("#ifdef __cplusplus");
NOTE("extern \"C\" {");
NOTE("#endif");
NOTE("/*");
NOTE("");
NOTE(" Element Numbers");
NOTE("");
NOTE(" Must Match all tables by element!");
NOTE(" These include tables in HTMLDTD.c");
NOTE(" and code in HTML.c.");
NOTE("");
NOTE(" */");
fprintf(output, " typedef enum {\n");
for (j = 0; j < dtd->number_of_tags; ++j) {
fprintf(output, "\tHTML_%s,\n", dtd->tags[j].name);
}
NOTE("\tHTML_ALT_OBJECT");
NOTE(" } HTMLElement;\n");
NOTE("/* Notes: HTML.c uses a different extension of the");
NOTE(" HTML_ELEMENTS space privately, see");
NOTE(" HTNestedList.h.");
NOTE("");
NOTE(" Do NOT replace HTML_ELEMENTS with");
NOTE(" TABLESIZE(mumble_dtd.tags).");
NOTE("");
NOTE(" Keep the following defines in synch with");
NOTE(" the above enum!");
NOTE(" */");
NOTE("");
NOTE("/* # of elements generally visible to Lynx code */");
fprintf(output, "#define HTML_ELEMENTS %d\n", dtd->number_of_tags);
NOTE("");
NOTE("/* # of elements visible to SGML parser */");
fprintf(output, "#define HTML_ALL_ELEMENTS %d\n", dtd->number_of_tags + 1);
NOTE("");
NOTE("/*");
NOTE("");
NOTE(" Attribute numbers");
NOTE("");
NOTE(" Identifier is HTML_<element>_<attribute>.");
NOTE(" These must match the tables in HTML.c!");
NOTE("");
NOTE(" */");
/* output the sorted list */
for (j = 0; j < (int) count; ++j) {
dump_hdr_attr(output, data + j);
}
free(data);
NOTE("#ifdef __cplusplus");
NOTE("}");
NOTE("#endif");
fprintf(output, "#endif\t\t\t\t/* %s */\n", marker);
}
#define FMT_NUM_ATTRS "%d attributes:\n"
#define FMT_ONE_ATTR "%d:%d:%s\n"
#define NUM_ONE_ATTR 3
static void dump_flat_attrs(FILE *output,
const attr * attributes,
int number_of_attributes)
{
int n;
fprintf(output, "\t\t" FMT_NUM_ATTRS, number_of_attributes);
for (n = 0; n < number_of_attributes; ++n) {
fprintf(output, "\t\t\t" FMT_ONE_ATTR, n,
#ifdef USE_PRETTYSRC
attributes[n].type,
#else
0, /* need placeholder for source-compat */
#endif
attributes[n].name
);
}
}
static void dump_flat_attr_types(FILE *output, const AttrType * attr_types)
{
const AttrType *p = sorted_AttrTypes(attr_types);
int number = len_AttrTypes(attr_types);
fprintf(output, "\t\t%d attr_types\n", number);
if (p != 0) {
while (p->name != 0) {
fprintf(output, "\t\t\t%s\n", p->name);
++p;
}
}
}
static void dump_flat_SGMLContent(FILE *output, const char *name, SGMLContent contents)
{
fprintf(output, "\t\t%s: %s\n", name, SGMLContent2s(contents));
}
#define DUMP(name) \
if (theClass & Tgc_##name) {\
fprintf(output, " " #name); \
theClass &= ~(Tgc_##name); \
}
static void dump_flat_TagClass(FILE *output, const char *name, TagClass theClass)
{
fprintf(output, "\t\t%s:", name);
DUMP(FONTlike);
DUMP(EMlike);
DUMP(MATHlike);
DUMP(Alike);
DUMP(formula);
DUMP(TRlike);
DUMP(SELECTlike);
DUMP(FORMlike);
DUMP(Plike);
DUMP(DIVlike);
DUMP(LIlike);
DUMP(ULlike);
DUMP(BRlike);
DUMP(APPLETlike);
DUMP(HRlike);
DUMP(MAPlike);
DUMP(outer);
DUMP(BODYlike);
DUMP(HEADstuff);
DUMP(same);
if (theClass)
fprintf(output, " OOPS:%#x", theClass);
fprintf(output, "\n");
}
#undef DUMP
#define DUMP(name) \
if (theFlags & Tgf_##name) {\
fprintf(output, " " #name); \
theFlags &= ~(Tgf_##name); \
}
static void dump_flat_TagFlags(FILE *output, const char *name, TagFlags theFlags)
{
fprintf(output, "\t\t%s:", name);
DUMP(endO);
DUMP(startO);
DUMP(mafse);
DUMP(strict);
DUMP(nreie);
DUMP(frecyc);
DUMP(nolyspcl);
if (theFlags)
fprintf(output, " OOPS:%#x", theFlags);
fprintf(output, "\n");
}
#undef DUMP
static void dump_flat_HTTag(FILE *output, unsigned n, HTTag * tag)
{
fprintf(output, "\t%u:%s\n", n, tag->name);
#ifdef USE_JUSTIFY_ELTS
fprintf(output, "\t\t%s\n", tag->can_justify ? "justify" : "nojustify");
#endif
dump_flat_attrs(output, tag->attributes, AttrCount(tag));
dump_flat_attr_types(output, tag->attr_types);
dump_flat_SGMLContent(output, "contents", tag->contents);
dump_flat_TagClass(output, "tagclass", tag->tagclass);
dump_flat_TagClass(output, "contains", tag->contains);
dump_flat_TagClass(output, "icontains", tag->icontains);
dump_flat_TagClass(output, "contained", tag->contained);
dump_flat_TagClass(output, "icontained", tag->icontained);
dump_flat_TagClass(output, "canclose", tag->canclose);
dump_flat_TagFlags(output, "flags", tag->flags);
}
static int count_attr_types(AttrType * attr_types, HTTag * tag)
{
int count = 0;
const AttrType *p;
AttrType *q;
if ((p = tag->attr_types) != 0) {
while (p->name != 0) {
if ((q = attr_types) != 0) {
while (q->name != 0) {
if (!strcmp(q->name, p->name)) {
--count;
break;
}
++q;
}
*q = *p;
}
++count;
++p;
}
}
return count;
}
static void dump_flatfile(FILE *output, const SGML_dtd * dtd)
{
AttrType *attr_types = 0;
int pass;
unsigned count = 0;
unsigned n;
/* merge all of the attr_types data */
for (pass = 0; pass < 2; ++pass) {
for (n = 0; (int) n < dtd->number_of_tags; ++n) {
count += count_attr_types(attr_types, &(dtd->tags[n]));
}
if (pass == 0) {
attr_types = typecallocn(AttrType, count + 1);
count = 0;
} else {
count = len_AttrTypes(attr_types);
qsort(attr_types, count, sizeof(*attr_types), compare_attr_types);
fprintf(output, "%d attr_types\n", count);
for (n = 0; n < count; ++n) {
fprintf(output, "\t%d:%s\n", n, attr_types[n].name);
dump_flat_attrs(output, attr_types[n].list,
len_AttrList(attr_types[n].list));
}
}
}
fprintf(output, "%d tags\n", dtd->number_of_tags);
for (n = 0; (int) n < dtd->number_of_tags; ++n) {
dump_flat_HTTag(output, n, &(dtd->tags[n]));
}
#if 0
fprintf(output, "%d entities\n", dtd->number_of_entities);
for (n = 0; n < dtd->number_of_entities; ++n) {
}
#endif
}
static char *get_line(FILE *input)
{
char temp[1024];
char *result = 0;
if (fgets(temp, (int) sizeof(temp), input) != 0) {
result = strdup(temp);
}
return result;
}
#define LOAD(name) \
if (!strcmp(data, #name)) {\
*theClass |= Tgc_##name; \
continue; \
}
static int load_flat_TagClass(FILE *input, const char *name, TagClass * theClass)
{
char prefix[80];
char *next = get_line(input);
char *data;
int result = 0;
*theClass = 0;
if (next != 0) {
sprintf(prefix, "\t\t%s:", name);
data = strtok(next, "\n ");
if (data != 0 && !strcmp(data, prefix)) {
result = 1;
while ((data = strtok(NULL, "\n ")) != 0) {
LOAD(FONTlike);
LOAD(EMlike);
LOAD(MATHlike);
LOAD(Alike);
LOAD(formula);
LOAD(TRlike);
LOAD(SELECTlike);
LOAD(FORMlike);
LOAD(Plike);
LOAD(DIVlike);
LOAD(LIlike);
LOAD(ULlike);
LOAD(BRlike);
LOAD(APPLETlike);
LOAD(HRlike);
LOAD(MAPlike);
LOAD(outer);
LOAD(BODYlike);
LOAD(HEADstuff);
LOAD(same);
fprintf(stderr, "Unexpected TagClass '%s'\n", data);
result = 0;
break;
}
} else if (data) {
fprintf(stderr, "load_flat_TagClass: '%s' vs '%s'\n", data, prefix);
}
free(next);
} else {
fprintf(stderr, "Did not find contents\n");
}
return result;
}
#undef LOAD
#define LOAD(name) \
if (!strcmp(data, #name)) {\
*flags |= Tgf_##name; \
continue; \
}
static int load_flat_TagFlags(FILE *input, const char *name, TagFlags * flags)
{
char prefix[80];
char *next = get_line(input);
char *data;
int result = 0;
*flags = 0;
if (next != 0) {
sprintf(prefix, "\t\t%s:", name);
data = strtok(next, "\n ");
if (data != 0 && !strcmp(data, prefix)) {
result = 1;
while ((data = strtok(NULL, "\n ")) != 0) {
LOAD(endO);
LOAD(startO);
LOAD(mafse);
LOAD(strict);
LOAD(nreie);
LOAD(frecyc);
LOAD(nolyspcl);
fprintf(stderr, "Unexpected TagFlag '%s'\n", data);
result = 0;
break;
}
} else if (data) {
fprintf(stderr, "load_flat_TagFlags: '%s' vs '%s'\n", data, prefix);
}
free(next);
}
return result;
}
#undef LOAD
static int load_flat_AttrList(FILE *input, AttrList * attrs, int *length)
{
attr *attributes;
int j, jcmp, code;
int result = 1;
char name[1024];
#ifdef USE_PRETTYSRC
int atype;
#endif
if (fscanf(input, FMT_NUM_ATTRS, length) == 1
&& *length > 0
&& (attributes = typecallocn(attr, (size_t) (*length + 1))) != 0) {
*attrs = attributes;
for (j = 0; j < *length; ++j) {
code = fscanf(input, FMT_ONE_ATTR,
&jcmp,
&atype,
name
);
if (code == NUM_ONE_ATTR && (j == jcmp)) {
attributes[j].name = strdup(name);
#ifdef USE_PRETTYSRC
attributes[j].type = atype;
#endif
} else {
fprintf(stderr, "Did not find attributes\n");
result = 0;
break;
}
}
if (*length > 1)
qsort(attributes, *length, sizeof(attributes[0]), compare_attr);
}
return result;
}
static int load_flat_HTTag(FILE *input, unsigned nref, HTTag * tag, AttrType * allTypes)
{
int result = 0;
unsigned ncmp = 0;
char name[1024];
int code;
int j;
code = fscanf(input, "%d:%s\n", &ncmp, name);
if (code == 2 && (nref == ncmp)) {
result = 1;
tag->name = strdup(name);
#ifdef USE_COLOR_STYLE
tag->name_len = strlen(tag->name);
#endif
#ifdef USE_JUSTIFY_ELTS
if (fscanf(input, "%s\n", name) == 1) {
tag->can_justify = !strcmp(name, "justify");
} else {
fprintf(stderr, "Did not find can_justify\n");
result = 0;
}
#endif
if (result) {
result = load_flat_AttrList(input, &(tag->attributes), &(tag->number_of_attributes));
}
if (result) {
AttrType *myTypes;
int k, count;
char *next = get_line(input);
if (next != 0
&& sscanf(next, "%d attr_types\n", &count)
&& (myTypes = typecallocn(AttrType, (size_t) (count + 1)))
!= 0) {
tag->attr_types = myTypes;
for (k = 0; k < count; ++k) {
next = get_line(input);
if (next != 0
&& sscanf(next, "%s\n", name)) {
for (j = 0; allTypes[j].name != 0; ++j) {
if (!strcmp(allTypes[j].name, name)) {
myTypes[k].name = strdup(name);
myTypes[k].list = allTypes[j].list;
break;
}
}
} else {
result = 0;
break;
}
}
if (result && count > 1)
qsort(myTypes, count, sizeof(myTypes[0]), compare_attr_types);
}
}
if (result) {
char *next = get_line(input);
if (next != 0
&& sscanf(next, "\t\tcontents: %s\n", name)) {
tag->contents = s2SGMLContent(name);
free(next);
} else {
fprintf(stderr, "Did not find contents\n");
result = 0;
}
}
if (result) {
result = load_flat_TagClass(input, "tagclass", &(tag->tagclass));
}
if (result) {
result = load_flat_TagClass(input, "contains", &(tag->contains));
}
if (result) {
result = load_flat_TagClass(input, "icontains", &(tag->icontains));
}
if (result) {
result = load_flat_TagClass(input, "contained", &(tag->contained));
}
if (result) {
result = load_flat_TagClass(input, "icontained", &(tag->icontained));
}
if (result) {
result = load_flat_TagClass(input, "canclose", &(tag->canclose));
}
if (result) {
result = load_flat_TagFlags(input, "flags", &(tag->flags));
}
} else {
fprintf(stderr, "load_flat_HTTag error\n");
}
return result;
}
static int load_flat_AttrType(FILE *input, AttrType * types, size_t ncmp)
{
int result = 0;
int ntst;
char name[1024];
if (fscanf(input, "%d:%s\n", &ntst, name) == 2
&& (ntst == (int) ncmp)) {
result = 1;
types->name = strdup(name);
if (!load_flat_AttrList(input, &(types->list), &ntst))
result = 0;
}
return result;
}
static SGML_dtd *load_flatfile(FILE *input)
{
AttrType *attr_types = 0;
SGML_dtd *result = 0;
size_t n;
int number_of_attrs = 0;
int number_of_tags = 0;
HTTag *tag;
int code;
code = fscanf(input, "%d attr_types\n", &number_of_attrs);
if (code
&& number_of_attrs
&& (attr_types = typecallocn(AttrType, number_of_attrs + 1)) != 0) {
for (n = 0; n < (size_t) number_of_attrs; ++n) {
if (!load_flat_AttrType(input, attr_types + n, n)) {
break;
}
}
}
code = fscanf(input, "%d tags\n", &number_of_tags);
if (code == 1) {
if ((result = typecalloc(SGML_dtd)) != 0
&& (result->tags = typecallocn(HTTag, (number_of_tags + 2))) != 0) {
for (n = 0; n < (size_t) number_of_tags; ++n) {
if (load_flat_HTTag(input, n, &(result->tags[n]), attr_types)) {
result->number_of_tags = (n + 1);
} else {
break;
}
}
tag = 0;
for (n = 0; n < (size_t) number_of_tags; ++n) {
if (result->tags[n].name != 0
&& !strcmp(result->tags[n].name, "OBJECT")) {
tag = result->tags + number_of_tags;
*tag = result->tags[n];
tag->contents = SGML_MIXED;
tag->flags = Tgf_strict;
break;
}
}
if (tag == 0) {
fprintf(stderr, "Did not find OBJECT tag\n");
result = 0;
}
}
}
return result;
}
int main(int argc, char *argv[])
{
const SGML_dtd *the_dtd = &HTML_dtd;
int ch;
int dtd_version = 0;
int c_option = FALSE;
int h_option = FALSE;
int l_option = FALSE;
FILE *input = stdin;
FILE *output = stdout;
while ((ch = getopt(argc, argv, GETOPT)) != -1) {
switch (ch) {
case 'c':
c_option = TRUE;
break;
case 'h':
h_option = TRUE;
break;
case 'l':
l_option = TRUE;
input = fopen(optarg, "r");
if (input == 0)
failed(optarg);
break;
case 'o':
output = fopen(optarg, "w");
if (output == 0)
failed(optarg);
break;
case 't':
dtd_version = 1;
break;
case 's':
dtd_version = 0;
break;
default:
usage();
}
}
HTSwitchDTD(dtd_version);
if (l_option)
the_dtd = load_flatfile(input);
if (the_dtd != 0) {
if (c_option)
dump_source(output, the_dtd, dtd_version);
if (h_option)
dump_header(output, the_dtd);
if (!c_option && !h_option)
dump_flatfile(output, the_dtd);
}
return EXIT_SUCCESS;
}