#! /usr/bin/perl -w
# xml2tex: a perl script to process XML documents and generate TeX code
# Copyright (C) 1999 Ed Cashin
#
# version 1.1
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
use strict;
use XML::Parser;
use Time::localtime;
use Getopt::Long;
my ($help_wanted);
my $options = GetOptions("help" => \$help_wanted);
if ($help_wanted || ! @ARGV) {
print "run perldoc $0 for usage\n";
}
my @files = @ARGV;
my $parser = new XML::Parser(ErrorContext => 2,
Handlers => {
'Init' => \&inithandler,
'Start' => \&starthandler,
'End' => \&endhandler,
'Char' => \&charhandler,
'Final' => \&finalhandler,
});
foreach my $filename (@files) {
$parser->parsefile($filename);
}
sub inithandler {
print <<'ENDORAMA';
\input eplain
\def\normalparskip{1 ex plus .5ex}
\def\itemskip{1 em}
\parskip=\normalparskip
\parindent=0in
\newdimen\tmphsize
\font\tenvtt=cmvtt10 at 10 truept
\font\twelvevtt=cmvtt10 at 12 truept
\font\tenrm=cmr10 at 10 truept
\font\tenbx=cmbx10 at 10 truept
\font\twelvessbx=cmssbx10 at 12 truept
\font\fifteenssbx=cmssbx10 at 15 truept
ENDORAMA
}
sub starthandler {
my ($p, $el, @attrlist) = @_;
if ($el =~ /\btitle\b/) { &starttitle($p, $el); }
elsif ($el =~ /\bsubtitle\b/) { &startsubtitle($p, $el); }
elsif ($el =~ /\bit\b/) { &startitalic($p, $el); }
elsif ($el =~ /\bbf\b/) { &startbold($p, $el); }
elsif ($el =~ /\bsect\b/) { &startsect($p, $el); }
elsif ($el =~ /\bcode\b/) { &startcode($p, $el); }
elsif ($el =~ /\bitemize\b/) { &startitemize($p, $el); }
elsif ($el =~ /\bitem\b/) { &startitem($p, $el, \@attrlist); }
#----------now the empty XML tags
elsif ($el =~ /\btoday\b/) { &fancy_today($p, $el); }
elsif ($el =~ /\bp\b/) { &par($p, $el); }
elsif ($el =~ /\bhr\b/) { &hrule($p, $el); }
}
sub charhandler {
my ($p, $data) = @_;
print &tex_escaped($data);
}
sub tex_escaped {
my $text = shift;
study $text;
#----------handle quotes
while ($text =~ s/\"/``/ && $text =~ s/\"/''/) { }
#----------escape special chars
#-----first are extra-special characters: do \, {, and } first
#-----so that our own TeX code doesn't get messed up
# $text =~ s/\\/\$\\backslash\$/g; # \
$text =~ s/\\/dollabackaslashadolla/g;
# \ is a special case, since using
# $\backslash$ would conflict with the
# rule for escaping '$'
$text =~ s/\$/\\\$/g; # $
$text =~ s/\{/\$\\lbrace\$/g; # {
$text =~ s/\}/\$\\rbrace\$/g; # }
$text =~ s/&/\\\&/g; # &
$text =~ s/\^/\\char\'136\\relax /g; # ^
$text =~ s/~/\\char\'176\\relax /g; # ~
$text =~ s/\|/\$|\$/g; # |
$text =~ s/_/\\\_/g; # _
$text =~ s/\#/\\char\'043\\relax /g; # #
$text =~ s/%/\\\%/g; # %
$text =~ s/\$<\$/g; # <
$text =~ s/>/\$>\$/g; # >
#---------reset the backslash
$text =~ s/dollabackaslashadolla/\$\\backslash\$/g;
#old
# #----------escape dollar signs before using them to get '<' and '>'
# $text =~ s/\$/\\\$/g;
# #----------handle '<' and '>'
# $text =~ s/>/\$>\$/g;
# $text =~ s/\$<\$/g;
#
# $text =~ s/\%/\\\%/g;
# $text =~ s/\_/\\\_/g;
# $text =~ s/\&/\\\&/g;
return $text;
}
sub endhandler {
my ($p, $el) = @_;
if ($el =~ /\bhead\b/) { &endhead($p, $el); }
elsif ($el =~ /\btitle\b/) { &endtitle($p, $el); }
elsif ($el =~ /\bsubtitle\b/) { &endsubtitle($p, $el); }
elsif ($el =~ /\bit\b/) { &enditalic($p, $el); }
elsif ($el =~ /\bbf\b/) { &endbold($p, $el); }
elsif ($el =~ /\bsect\b/) { &endsect($p, $el); }
elsif ($el =~ /\bcode\b/) { &endcode($p, $el); }
elsif ($el =~ /\bitemize\b/) { &enditemize($p, $el); }
elsif ($el =~ /\bitem\b/) { &enditem($p, $el); }
}
sub finalhandler {
print '\bye' . "\n";
}
sub starttitle { print "\\hfil{\\fifteenssbx "; }
sub endtitle { print "}\\par"; }
sub startsubtitle { print "\\hfil{\\twelvevtt "; }
sub endsubtitle { print "}\\par"; }
sub startitalic { print "{\\tenit "; }
sub enditalic { print "\\/}"; }
sub startbold { print "{\\tenbx "; }
sub endbold { print "\\/}"; }
sub startsect { print "\n\\bigbreak\n{\\twelvessbx "; }
sub endsect { print "}\\par\n"; }
sub startitemize {
print "\n\\tmphsize=\\hsize\\advance\\tmphsize by -\\itemskip"
. "\t\% itemize\n";
}
sub enditemize {
print "\\advance\\tmphsize by \\itemskip\t\% end itemize\n";
}
sub startitem {
my $attrsR;
(undef, undef, $attrsR) = @_;
my $itemname;
while (my ($attr, $val) = splice @$attrsR, 0, 2) {
if (defined $val and $attr eq "name") {
$itemname = $val;
}
}
if (! defined $itemname) {
$itemname = "";
} else {
$itemname = &tex_escaped($itemname);
}
print "\\medskip\\hskip\\itemskip\\vbox{\\hsize\\tmphsize\%\n";
print " {\\tenbx $itemname}\\quad\n";
}
sub enditem {
print "}\n";
}
sub startcode {
print <<'ENDORAMA';
\begingroup\tenvtt
\tmphsize=\hsize
\advance\tmphsize by -4em
\null\hskip 2em
\obeylines\obeywhitespace\vbox{\hsize\tmphsize
ENDORAMA
}
sub endcode {
print <<'ENDORAMA';
}
\endgroup
ENDORAMA
}
sub endhead {
print <<'ENDORAMA';
\par\vrule height 1pt depth 0pt width 5in\par
\hsize 6in % narrow the right margin
\tenrm
\bigskip
ENDORAMA
}
sub fancy_today {
my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
print localtime->mday . " " . $months[localtime->mon] . ", "
. (localtime->year + 1900);
}
sub par {
print "\\par ";
}
sub hrule {
print "\\bigbreak\\null\\vskip 1ex\\hrule\n";
}
__END__
=head1 NAME
xml2tex - Converts XML documentation into TeX code, suitable for processing with TeX or pdfTeX.
=head1 SYNOPSIS
kali$ xml2tex mydoc.xml > mydoc.tex
kali$ pdftex mydoc.tex && acroread mydoc.pdf
kali$ tex mydoc.tex && dvips -o mydoc.ps mydoc
=head1 ABSTRACT
This program understands a small but useful number of XML tags. It can generate TeX formatting code based on the structural tags. This TeX code may be processed by pdfTeX to produce pdf-format files or by TeX to produce dvi (and postscript) files.
=head2 NOTE
You must supply a starting and ending tag around your whole document after the xml version tag:
The code tag generates TeX code that will produce a monospaced font and will respect the formatting you provide with whitespace--but NOT FOR TABS! Tabs don't work. They look like spaces in the output. For example,
# this is a comment
sub foo {
my $bar = "open";
}
=head1 more supported tags: EMPTY-ELEMENT TAGS
When the tag doesn\'t enclose something it\'s called an empty-element tag. You have to put a slash _after_ the tag like this:
It is