#!/bin/perl # Copyright (c) 2010,2011,2012 Todd T. Fries # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # read in 'mandoc -Tascii' formatted man pages, spit out txt useful for further # processing by other utilities use strict; use warnings; our $fileinfo = $ARGV[0]; our $verbose = 0; my $line; my @lines; while() { $line = $_; push @lines,$line; } my $oline = ""; my $fmtline = "%s"; foreach $line (@lines) { my $newline = ""; foreach my $seg (split(/(.\x08.)/,$line)) { my $newseg = $seg; $newseg =~ m/^(.)\x08(.)$/; if (!defined($1) || !defined($2)) { $newline .= $seg; next; } if ($1 eq $2) { $newline .= "${2}"; next; } if ($1 eq "_") { $newline .= "${2}"; next; } $newline .= $seg; next; } if ($verbose > 0) { printf STDERR "==> text{bf,it}\n line: <%s>\nnewline: <%s>\n",$line,$newline; } $line = $newline; $line =~ m/(.)\x08/; if (defined($1)) { printf STDERR "Removing %s\\x08\n",$1; } $line =~ s/.\x08//g; # combine adjacent entries foreach my $macro (("textbf", "textit")) { $oline = ""; while ($oline ne $line) { #printf STDERR "combine adjacent\n"; $oline = $line; $line =~ s/\xab\\${macro}\{([^\}]*)\}\xbb\xab\\${macro}\{([^\}]*)\}\xbb/\xab\\${macro}\{$1$2\}\xbb/g; } } # combine space separated foreach my $macro (("textbf")) { #printf STDERR "combine space\n"; $oline = ""; while ($oline ne $line) { $oline = $line; $line =~ s/\xab\\${macro}\{([^\}]*)\}\xbb[ ]+\xab\\${macro}\{([^\}]*)\}\xbb/\xab\\${macro}\{$1 $2\}\xbb/g; } } # do the substitution one at a time to be sure to add all man pages, not just the last ones per line. # XXX provide an exceptions list, audio(9) has mono(1) and stereo(2) # XXX references, which are _not_ man pages $oline = ""; while ($oline ne $line) { $oline=$line; $line =~ s/\{(http|ftp|https):\/\/(.*)\}/ $1:\/\/$2 /; if (0) { if ($line =~ m/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)([,\.\) ])/) { my $quote = texquote($1); $line =~ s/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)([,\.\) ])/ \xab\\man{$quote}{$2}\xbb$3/; } if ($line =~ m/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)$/) { my $quote = texquote($1); $line =~ s/ ([a-z][a-z0-9\.\-\_]*)\(([1-9])\)$/ \xab\\man{$quote}{$2}\xbb/; } } } my @macros = ("textbf","textit","man","href"); # quote arguments for tex foreach my $macro (@macros) { my $newline = ""; foreach my $seg (split(/(\xab\\${macro}\{[^\xbb]*\}\xbb)/,$line)) { #printf STDERR "quote args\n"; my $newseg = $seg; # check for nesting first; we only want to escape the # inner most argument, process nested macro if it has a nested macro # since the nested macro won't catch in the other regex cases my $foundnest = 0; foreach my $nest (@macros) { if ($macro eq $nest) { next; } $newseg =~ m/^\xab\\${macro}\{[ ]*\\${nest}\{([^\xbb]*)\}\{([^\xbb]*)\}\}\xbb$/; if (defined($2)) { $foundnest = 1; $newline .= "\xab\\${macro}\{\\${nest}\{".texquote($1)."\}\{".texquote(${2})."\}\}\xbb"; last; } $newseg =~ m/^\xab\\${macro}\{[ ]*\\${nest}\{([^\xbb]*)\}\}\xbb$/; if (defined($1)) { $foundnest = 1; $newline .= "\xab\\${macro}\{\\${nest}\{".texquote($1)."\}\}\xbb"; last; } } if ($foundnest > 0) { next; } # check for 2 args first $newseg =~ m/^\xab\\${macro}\{([^\xbb]*)\}\{([^\xbb]*)\}\xbb$/; if (defined($2)) { $newline .= "\xab\\${macro}\{".texquote($1)."\}\{".texquote(${2})."\}\xbb"; next; } $newseg =~ m/^\xab\\${macro}\{([^\xbb]*)\}\xbb$/; if (defined($1)) { $newline .= "\xab\\${macro}\{".texquote($1)."\}\xbb"; next; } $newline .= $seg; } $line = $newline; } printf $fmtline,$line; } 1; sub texquote { my ($text) = @_; my ($ret) = ""; my ($esctest) = ""; my ($escbase) = "BaCkSlAsH"; my ($esccount) = 0; #$verbose++; if ($verbose > 0) { printf STDERR "\ntexquote: '%s' -> ",$text; } if ($text =~ m/\\/) { $esctest=sprintf "%s%d",$escbase,$esccount++; while ($text =~ m/$esctest/) { $esctest=sprintf "%s%d",$escbase,$esccount++; } $text =~ s/\\/$esctest/g; if ($verbose > 0) { printf STDERR "'%s' -> ",$text; } } $text =~ s/([%\{\}_#\&\$\^])/\\$1/g; $text =~ s/([<>\|\*~])/\{\$$1\$\}/g; if ($esccount > 0) { $text =~ s/$esctest/\$\\backslash\$/g; } if ($verbose > 0) { printf STDERR "'%s'\n",$text; } #$verbose--; return $text; }