CFGROB@weizmann.bitnet (01/05/91)
I wrote this script to be able to print some bulletins and other running formated texts in TeX. I thought it is a good example on how powerful the text manipulation facilities of Perl are and I thought there might be a general interest for the script. ######################################################################### ## The program takes a formated text file and translates it into TeX ## format. It will try to interpret the intentions of the format as ## much as possible and translate these into the corresponding TeX ## sequences. ## ## There are still several formatting items that could be added, ## e.g. recognizing columns and removing page numbering. ## ## Dov Grobgeld ## Department of Chemical Physics ## The Weizmann Institute of Science ## Rehovot Israel ## Bitnet: CFGROB@WEIZMANN ## ## Version 0.1 ## 3 January 1991 ## ## This program is donated to the public domain. If modified I would ## be pleased to receive information thereof, so I can include new ## features in my next version. ########################################################################### $*=1; # Yes, do multiline pattern matching... # 1. Scan through the file to find the longest line # The information will be used for identifying centered lines. $infile=@ARGV[0]; open(INFILE, $infile); $maxline=0; while (<INFILE>) { if (length > $maxline) {$maxline = length;} } # 2. Scan again and try to interpret the format of the text author open(INFILE, $infile); # Print some TeX commands in the output file # Redefine the paragraph skip print '\parskip=0pt plus 2pt',"\n"; # Define some macros print '\def\emptylineskip{\vskip10pt plus 2pt minus 2pt}',"\n"; print '\def\horizontalline{\vskip2mm\hrule\vskip2mm}',"\n"; print "\n"; $_=<INFILE>; while (!eof) { # convert tabs to spaces 1 while s/\t/" " x (8 - length($`) % 8)/e; # Get line length and amount of indention $l=length; if (s/(^ +)//) {$indent=length($1);} else { $indent=0; } s/ +\n/\n/; #strip trailing spaces # An empty line, jump an emptylineskip if (length($_)==1) { $par="\\emptylineskip\n"; eof ($_=<INFILE>); } # A repeated character during the whole line is converted to a horizontal # line. # A very naive solution, could be made much more flexible... elsif ($_ eq substr($_,0,1) x (length($_)-1) . "\n") { $par="\\horizontalline\n"; eof ($_=<INFILE>); } # A centered line elsif (($indent >= 8) && (($maxline - $l) - $indent < $maxline/3)) { chop; # Take of the line feed $_="\\centerline\{" . $_ . "\}\n"; $par=$_; eof ($_=<INFILE>); } # An index line elsif ($l < $maxline/2) { if ($indent==0) { $_="\\noindent ".$_; } $par=$_."\n"; # Add an extra line to make into a paragraph eof ($_=<INFILE>); # Get the next line } # A paragraph else { # Check if the paragraph has the form of an item if ((s/^\s*(\S+)\s\s//) # First word is separated by two or more spaces # from the second word (s/^\s*(\w+\.)//) # First word is followed by a full stop (s/^\s*(\w+\))//) ) { # First word is followed by a closing bracket $par="\\item\{$1\}"; } else { $par=""; }; $linesinpar=0; # if ($indent==0) { $par="\\noindent\n".$par; } $line=$_; $maxl=0; # Will hold the length of the longest line in the paragraph do { $linesinpar++; $par.=$line; # Add line to paragraph eof ($_=<INFILE>); $line=$_; # convert tabs to spaces 1 while $line=~ s/\t/" " x (8 - length($`) % 8)/e; # Save old line information $oldind=$indent; $oldl=$l; # Get line length and indention as for first line above $l=length($line); if ($line=~ s/(^\s+)//) { $indent=length($1);} else { $indent=0; } # Test if it is the longest line in the paragraph if ($l > $maxl) { $maxl = $l; } } until # List all conditions for end of paragraph # We reached the end of the file eof # The current line is empty (length($line) <= 1) # The current line is not the first line in the paragraph and # the indention of the current line is different from the # last line in the paragraph. (($linesinpar > 1) && !($oldind==$indent)) # The last line in the paragraph is shorter than half of # the longest line in the paragraph ($oldl < $maxl/2) # The current line is a composed of only one repeated character ($_ eq substr($_,0,1) x (length($_)-1) . "\n"); # If it was an empty line that ended the paragraph get next line if (length($line)<=1) {$_=<INFILE>;} # If it was the end of file that ended the paragraph, add current line # to the paragraph if (eof) { $par.=$line;} $par.="\n"; } # Try to compensate for multiple spaces. Really should try to figure # out if the text file has columns... I'll save that for later... $par=~ s/ /\\quad/g; # protect certain characters $par=~ s/\#/\\#/g; $par=~ s/\&/\\&/g; $par=~ s/\%/\\%/g; $par=~ s/\$/\\\$/g; $par=~ s/\_/\\_/g; $par=~ s/\~/\$\\ast\$/g; $par=~ s/\.\.\./\{\\dots\}/g; # Replace "..." with ``...'' while (($par=~ s/"/\`\`/) && ( $par=~ s/"/\'\'/ )) {}; print $par; } print "\n\\end"