#!/usr/bin/perl -w
# $LynxId: tbl2html.pl,v 1.5 2011/05/21 15:18:16 tom Exp $
#
# Translate one or more ".tbl" files into ".html" files which can be used to
# test the charset support in lynx. Each of the ".html" files will use the
# charset that corresponds to the input ".tbl" file.
use strict;
use Getopt::Std;
use File::Basename;
use POSIX qw(strtod);
sub field($$) {
my $value = $_[0];
my $count = $_[1];
while ( $count > 0 ) {
$count -= 1;
$value =~ s/^\S*\s*//;
}
$value =~ s/\s.*//;
return $value;
}
sub notes($) {
my $value = $_[0];
$value =~ s/^[^#]*//;
$value =~ s/^#//;
$value =~ s/^\s+//;
return $value;
}
sub make_header($$$) {
my $source = $_[0];
my $charset = $_[1];
my $official = $_[2];
printf FP "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n";
printf FP "<HTML>\n";
printf FP "<HEAD>\n";
printf FP "<!-- $source -->\n";
printf FP "<TITLE>%s table</TITLE>\n", &escaped($official);
printf FP "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=%s\">\n", &escaped($charset);
printf FP "</HEAD>\n";
printf FP "\n";
printf FP "<BODY> \n";
printf FP "\n";
printf FP "<H1 ALIGN=center>%s table</H1> \n", &escaped($charset);
printf FP "\n";
printf FP "<PRE>\n";
printf FP "Code Char Entity Render Description\n";
}
sub make_mark() {
printf FP "---- ---- ------ ------ -----------------------------------\n";
}
sub escaped($) {
my $result = $_[0];
$result =~ s/&/&/g;
$result =~ s/</</g;
$result =~ s/>/>/g;
return $result;
}
sub make_row($$$) {
my $old_code = $_[0];
my $new_code = $_[1];
my $comments = $_[2];
# printf "# make_row %d %d %s\n", $old_code, $new_code, $comments;
my $visible = sprintf("&#%d; ", $new_code);
if ($old_code < 256) {
printf FP "%4x %c %.13s &#%d; %s\n",
$old_code, $old_code,
$visible, $new_code,
&escaped($comments);
} else {
printf FP "%4x . %.13s &#%d; %s\n",
$old_code,
$visible, $new_code,
&escaped($comments);
}
}
sub null_row($$) {
my $old_code = $_[0];
my $comments = $_[1];
if ($old_code < 256) {
printf FP "%4x %c %s\n",
$old_code, $old_code,
&escaped($comments);
} else {
printf FP "%4x . %s\n",
$old_code,
&escaped($comments);
}
}
sub make_footer() {
printf FP "</PRE>\n";
printf FP "</BODY>\n";
printf FP "</HTML>\n";
}
# return true if the string describes a range
sub is_range($) {
return ($_[0] =~ /.*-.*/);
}
# convert the U+'s to 0x's so strtod() can convert them.
sub zeroxes($) {
my $result = $_[0];
$result =~ s/^U\+/0x/;
$result =~ s/-U\+/-0x/;
return $result;
}
# convert a string to a number (-1's are outside the range of Unicode).
sub value_of($) {
my ($result, $oops) = strtod($_[0]);
$result = -1 if ($oops ne 0);
return $result;
}
# return the first number in a range
sub first_of($) {
my $range = &zeroxes($_[0]);
$range =~ s/-.*//;
return &value_of($range);
}
# return the last number in a range
sub last_of($) {
my $range = &zeroxes($_[0]);
$range =~ s/^.*-//;
return &value_of($range);
}
sub one_many($$$) {
my $oldcode = $_[0];
my $newcode = &zeroxes($_[1]);
my $comment = $_[2];
my $old_code = &value_of($oldcode);
if ( $old_code lt 0 ) {
printf "? Problem with number \"%s\"\n", $oldcode;
} else {
&make_mark if (( $old_code % 8 ) == 0 );
if ( $newcode =~ /^#.*/ ) {
&null_row($old_code, $comment);
} elsif ( &is_range($newcode) ) {
my $first_item = &first_of($newcode);
my $last_item = &last_of($newcode);
my $item;
if ( $first_item lt 0 or $last_item lt 0 ) {
printf "? Problem with one:many numbers \"%s\"\n", $newcode;
} else {
if ( $comment =~ /^$/ ) {
$comment = sprintf("mapped: %#x to %#x..%#x", $old_code, $first_item, $last_item);
} else {
$comment = $comment . " (range)";
}
for $item ( $first_item..$last_item) {
&make_row($old_code, $item, $comment);
}
}
} else {
my $new_code = &value_of($newcode);
if ( $new_code lt 0 ) {
printf "? Problem with number \"%s\"\n", $newcode;
} else {
if ( $comment =~ /^$/ ) {
$comment = sprintf("mapped: %#x to %#x", $old_code, $new_code);
}
&make_row($old_code, $new_code, $comment);
}
}
}
}
sub many_many($$$) {
my $oldcode = $_[0];
my $newcode = $_[1];
my $comment = $_[2];
my $first_old = &first_of($oldcode);
my $last_old = &last_of($oldcode);
my $item;
if (&is_range($newcode)) {
my $first_new = &first_of($newcode);
my $last_new = &last_of($newcode);
for $item ( $first_old..$last_old) {
&one_many($item, $first_new, $comment);
$first_new += 1;
}
} else {
for $item ( $first_old..$last_old) {
&one_many($item, $newcode, $comment);
}
}
}
sub approximate($$$) {
my $values = $_[0];
my $expect = sprintf("%-8s", $_[1]);
my $comment = $_[2];
my $escaped = &escaped($expect);
my $left;
my $this;
my $next;
$escaped =~ s/\\134/\\/g;
$escaped =~ s/\\015/\
\;/g;
$escaped =~ s/\\012/\
\;/g;
while ( $escaped =~ /^.*\\[0-7]{3}.*$/ ) {
$left = $escaped;
$left =~ s/\\[0-7]{3}.*//;
$this = substr $escaped,length($left)+1,3;
$next = substr $escaped,length($left)+4;
$escaped = sprintf("%s&#%d;%s", $left, oct $this, $next);
}
my $visible = sprintf("&#%d; ", $values);
if ($values < 256) {
printf FP "%4x %c %.13s &#%d; approx: %s\n",
$values, $values,
$visible,
$values,
$escaped;
} else {
printf FP "%4x . %.13s &#%d; approx: %s\n",
$values,
$visible,
$values,
$escaped;
}
}
sub doit($) {
my $source = $_[0];
printf "** %s\n", $source;
my $target = basename($source, ".tbl");
# Read the file into an array in memory.
open(FP,$source) || do {
print STDERR "Can't open input $source: $!\n";
return;
};
my (@input) = <FP>;
chomp @input;
close(FP);
my $n;
my $charset = "";
my $official = "";
my $empty = 1;
for $n (0..$#input) {
$input[$n] =~ s/\s*$//; # trim trailing blanks
$input[$n] =~ s/^\s*//; # trim leading blanks
$input[$n] =~ s/^#0x/0x/; # uncomment redundant stuff
next if $input[$n] =~ /^$/;
next if $input[$n] =~ /^#.*$/;
if ( $empty
and ( $input[$n] =~ /^\d/
or $input[$n] =~ /^U\+/ ) ) {
$target = $charset . ".html";
printf "=> %s\n", $target;
open(FP,">$target") || do {
print STDERR "Can't open output $target: $!\n";
return;
};
&make_header($source, $charset, $official);
$empty = 0;
}
if ( $input[$n] =~ /^M.*/ ) {
$charset = $input[$n];
$charset =~ s/^.//;
} elsif ( $input[$n] =~ /^O.*/ ) {
$official = $input[$n];
$official =~ s/^.//;
} elsif ( $input[$n] =~ /^\d/ ) {
my $newcode = &field($input[$n], 1);
next if ( $newcode eq "idem" );
next if ( $newcode eq "" );
my $oldcode = &field($input[$n], 0);
if ( &is_range($oldcode) ) {
&many_many($oldcode, $newcode, ¬es($input[$n]));
} else {
&one_many($oldcode, $newcode, ¬es($input[$n]));
}
} elsif ( $input[$n] =~ /^U\+/ ) {
if ( $input[$n] =~ /^U\+\w+:/ ) {
my $values = $input[$n];
my $expect = $input[$n];
$values =~ s/:.*//;
$values = &zeroxes($values);
$expect =~ s/^[^:]+://;
if ( &is_range($values) ) {
printf "fixme:%s(%s)(%s)\n", $input[$n], $values, $expect;
} else {
&approximate(&value_of($values), $expect, ¬es($input[$n]));
}
} else {
my $value = $input[$n];
$value =~ s/\s*".*//;
$value = &value_of(&zeroxes($value));
if ($value gt 0) {
my $quote = $input[$n];
my $comment = ¬es($input[$n]);
$quote =~ s/^[^"]*"//;
$quote =~ s/".*//;
&approximate($value, $quote, $comment);
} else {
printf "fixme:%d(%s)\n", $n, $input[$n];
}
}
} else {
# printf "skipping line %d:%s\n", $n + 1, $input[$n];
}
}
if ( ! $empty ) {
&make_footer();
}
close FP;
}
sub usage() {
print <<USAGE;
Usage: $0 [tbl-files]
The script writes a new ".html" file for each input, using
the same name as the input, stripping the ".tbl" suffix.
USAGE
exit(1);
}
if ( $#ARGV < 0 ) {
usage();
} else {
while ( $#ARGV >= 0 ) {
&doit ( shift @ARGV );
}
}
exit (0);