#! /usr/bin/perl -w # xml2tex: a perl script to process XML documents and generate TeX code # Copyright (C) 1999 Ed Cashin # # version 1.1 # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # use strict; use XML::Parser; use Time::localtime; use Getopt::Long; my ($help_wanted); my $options = GetOptions("help" => \$help_wanted); if ($help_wanted || ! @ARGV) { print "run perldoc $0 for usage\n"; } my @files = @ARGV; my $parser = new XML::Parser(ErrorContext => 2, Handlers => { 'Init' => \&inithandler, 'Start' => \&starthandler, 'End' => \&endhandler, 'Char' => \&charhandler, 'Final' => \&finalhandler, }); foreach my $filename (@files) { $parser->parsefile($filename); } sub inithandler { print <<'ENDORAMA'; \input eplain \def\normalparskip{1 ex plus .5ex} \def\itemskip{1 em} \parskip=\normalparskip \parindent=0in \newdimen\tmphsize \font\tenvtt=cmvtt10 at 10 truept \font\twelvevtt=cmvtt10 at 12 truept \font\tenrm=cmr10 at 10 truept \font\tenbx=cmbx10 at 10 truept \font\twelvessbx=cmssbx10 at 12 truept \font\fifteenssbx=cmssbx10 at 15 truept ENDORAMA } sub starthandler { my ($p, $el, @attrlist) = @_; if ($el =~ /\btitle\b/) { &starttitle($p, $el); } elsif ($el =~ /\bsubtitle\b/) { &startsubtitle($p, $el); } elsif ($el =~ /\bit\b/) { &startitalic($p, $el); } elsif ($el =~ /\bbf\b/) { &startbold($p, $el); } elsif ($el =~ /\bsect\b/) { &startsect($p, $el); } elsif ($el =~ /\bcode\b/) { &startcode($p, $el); } elsif ($el =~ /\bitemize\b/) { &startitemize($p, $el); } elsif ($el =~ /\bitem\b/) { &startitem($p, $el, \@attrlist); } #----------now the empty XML tags elsif ($el =~ /\btoday\b/) { &fancy_today($p, $el); } elsif ($el =~ /\bp\b/) { &par($p, $el); } elsif ($el =~ /\bhr\b/) { &hrule($p, $el); } } sub charhandler { my ($p, $data) = @_; print &tex_escaped($data); } sub tex_escaped { my $text = shift; study $text; #----------handle quotes while ($text =~ s/\"/``/ && $text =~ s/\"/''/) { } #----------escape special chars #-----first are extra-special characters: do \, {, and } first #-----so that our own TeX code doesn't get messed up # $text =~ s/\\/\$\\backslash\$/g; # \ $text =~ s/\\/dollabackaslashadolla/g; # \ is a special case, since using # $\backslash$ would conflict with the # rule for escaping '$' $text =~ s/\$/\\\$/g; # $ $text =~ s/\{/\$\\lbrace\$/g; # { $text =~ s/\}/\$\\rbrace\$/g; # } $text =~ s/&/\\\&/g; # & $text =~ s/\^/\\char\'136\\relax /g; # ^ $text =~ s/~/\\char\'176\\relax /g; # ~ $text =~ s/\|/\$|\$/g; # | $text =~ s/_/\\\_/g; # _ $text =~ s/\#/\\char\'043\\relax /g; # # $text =~ s/%/\\\%/g; # % $text =~ s//\$>\$/g; # > #---------reset the backslash $text =~ s/dollabackaslashadolla/\$\\backslash\$/g; #old # #----------escape dollar signs before using them to get '<' and '>' # $text =~ s/\$/\\\$/g; # #----------handle '<' and '>' # $text =~ s/>/\$>\$/g; # $text =~ s/mday . " " . $months[localtime->mon] . ", " . (localtime->year + 1900); } sub par { print "\\par "; } sub hrule { print "\\bigbreak\\null\\vskip 1ex\\hrule\n"; } __END__ =head1 NAME xml2tex - Converts XML documentation into TeX code, suitable for processing with TeX or pdfTeX. =head1 SYNOPSIS kali$ xml2tex mydoc.xml > mydoc.tex kali$ pdftex mydoc.tex && acroread mydoc.pdf kali$ tex mydoc.tex && dvips -o mydoc.ps mydoc =head1 ABSTRACT This program understands a small but useful number of XML tags. It can generate TeX formatting code based on the structural tags. This TeX code may be processed by pdfTeX to produce pdf-format files or by TeX to produce dvi (and postscript) files. =head2 NOTE You must supply a starting and ending tag around your whole document after the xml version tag: Hi there! XML is case sensitive. =head1 SUPPORTED TAGS =item HEAD the tag encloses a header for the document: currently only a title and subtitles. =item TITLE the tag encloses the document's title =item SUBTITLE =item ITALICS =item BOLD FACE =item PREFORMATTED CODE The code tag generates TeX code that will produce a monospaced font and will respect the formatting you provide with whitespace--but NOT FOR TABS! Tabs don't work. They look like spaces in the output. For example, # this is a comment sub foo { my $bar = "open"; } =head1 more supported tags: EMPTY-ELEMENT TAGS When the tag doesn\'t enclose something it\'s called an empty-element tag. You have to put a slash _after_ the tag like this: It is . =item PARAGRAPH

Shows that this is the beginning of a new paragraph. =item TODAY Inserts a string for today\'s date, e.g., "21 May, 1999" =head1 BUGS It\'s not really a bug, but a lot of the kinds of things you\'d want in documentation turn out to be enough like XML code to mess up the parser. Things that look like " ", for example. You can use XML\'s CDATA directive to quote weird stuff. See http://www.w3.org/XML/ I\'m sure there are bugs. Please let me know about them.