CFGROB@weizmann.bitnet (01/05/91)
I wrote this script to be able to print some bulletins and other
running formated texts in TeX. I thought it is a good example on how
powerful the text manipulation facilities of Perl are and I thought
there might be a general interest for the script.
#########################################################################
## The program takes a formated text file and translates it into TeX
## format. It will try to interpret the intentions of the format as
## much as possible and translate these into the corresponding TeX
## sequences.
##
## There are still several formatting items that could be added,
## e.g. recognizing columns and removing page numbering.
##
## Dov Grobgeld
## Department of Chemical Physics
## The Weizmann Institute of Science
## Rehovot Israel
## Bitnet: CFGROB@WEIZMANN
##
## Version 0.1
## 3 January 1991
##
## This program is donated to the public domain. If modified I would
## be pleased to receive information thereof, so I can include new
## features in my next version.
###########################################################################
$*=1; # Yes, do multiline pattern matching...
# 1. Scan through the file to find the longest line
# The information will be used for identifying centered lines.
$infile=@ARGV[0]; open(INFILE, $infile);
$maxline=0;
while (<INFILE>) {
if (length > $maxline) {$maxline = length;}
}
# 2. Scan again and try to interpret the format of the text author
open(INFILE, $infile);
# Print some TeX commands in the output file
# Redefine the paragraph skip
print '\parskip=0pt plus 2pt',"\n";
# Define some macros
print '\def\emptylineskip{\vskip10pt plus 2pt minus 2pt}',"\n";
print '\def\horizontalline{\vskip2mm\hrule\vskip2mm}',"\n";
print "\n";
$_=<INFILE>;
while (!eof) {
# convert tabs to spaces
1 while s/\t/" " x (8 - length($`) % 8)/e;
# Get line length and amount of indention
$l=length;
if (s/(^ +)//) {$indent=length($1);} else { $indent=0; }
s/ +\n/\n/; #strip trailing spaces
# An empty line, jump an emptylineskip
if (length($_)==1) {
$par="\\emptylineskip\n";
eof ($_=<INFILE>);
}
# A repeated character during the whole line is converted to a horizontal
# line.
# A very naive solution, could be made much more flexible...
elsif ($_ eq substr($_,0,1) x (length($_)-1) . "\n") {
$par="\\horizontalline\n";
eof ($_=<INFILE>);
}
# A centered line
elsif (($indent >= 8) && (($maxline - $l) - $indent < $maxline/3)) {
chop; # Take of the line feed
$_="\\centerline\{" . $_ . "\}\n";
$par=$_;
eof ($_=<INFILE>);
}
# An index line
elsif ($l < $maxline/2) {
if ($indent==0) { $_="\\noindent ".$_; }
$par=$_."\n"; # Add an extra line to make into a paragraph
eof ($_=<INFILE>); # Get the next line
}
# A paragraph
else {
# Check if the paragraph has the form of an item
if ((s/^\s*(\S+)\s\s//) # First word is separated by two or more spaces
# from the second word
(s/^\s*(\w+\.)//) # First word is followed by a full stop
(s/^\s*(\w+\))//) ) { # First word is followed by a closing bracket
$par="\\item\{$1\}";
} else { $par=""; };
$linesinpar=0;
# if ($indent==0) { $par="\\noindent\n".$par; }
$line=$_;
$maxl=0; # Will hold the length of the longest line in the paragraph
do {
$linesinpar++;
$par.=$line; # Add line to paragraph
eof ($_=<INFILE>);
$line=$_;
# convert tabs to spaces
1 while $line=~ s/\t/" " x (8 - length($`) % 8)/e;
# Save old line information
$oldind=$indent; $oldl=$l;
# Get line length and indention as for first line above
$l=length($line);
if ($line=~ s/(^\s+)//) { $indent=length($1);}
else { $indent=0; }
# Test if it is the longest line in the paragraph
if ($l > $maxl) { $maxl = $l; }
} until # List all conditions for end of paragraph
# We reached the end of the file
eof
# The current line is empty
(length($line) <= 1)
# The current line is not the first line in the paragraph and
# the indention of the current line is different from the
# last line in the paragraph.
(($linesinpar > 1) && !($oldind==$indent))
# The last line in the paragraph is shorter than half of
# the longest line in the paragraph
($oldl < $maxl/2)
# The current line is a composed of only one repeated character
($_ eq substr($_,0,1) x (length($_)-1) . "\n");
# If it was an empty line that ended the paragraph get next line
if (length($line)<=1) {$_=<INFILE>;}
# If it was the end of file that ended the paragraph, add current line
# to the paragraph
if (eof) { $par.=$line;}
$par.="\n";
}
# Try to compensate for multiple spaces. Really should try to figure
# out if the text file has columns... I'll save that for later...
$par=~ s/ /\\quad/g;
# protect certain characters
$par=~ s/\#/\\#/g;
$par=~ s/\&/\\&/g;
$par=~ s/\%/\\%/g;
$par=~ s/\$/\\\$/g;
$par=~ s/\_/\\_/g;
$par=~ s/\~/\$\\ast\$/g;
$par=~ s/\.\.\./\{\\dots\}/g;
# Replace "..." with ``...''
while (($par=~ s/"/\`\`/) && ( $par=~ s/"/\'\'/ )) {};
print $par;
}
print "\n\\end"