rich@Rice.edu (Carey Richard Murphey) (07/26/90)
Here's a handy utility for those who use medline (a.k.a. BRS/OnSite).
The following perl program converts output from BRS/OnSite database
into a LaTeX style (bibtex) bibliography. For those of you who use
BRS/OnSite, you can use this to download bibliographic references by
capturing the output to your vt100 compatible terminal and runing it
through this filter.
I'm a novice to perl, so any pointers or improvements (even on style)
are welcome. Hope someone finds it useful... Rich@Rice.edu
#!/usr/local/bin/perl -- # -*-Perl-*-
# this perl script converts medline vt100 output into bibtex format.
################################################################
$* = 1; # treat strings as a multi-line buffers.
while ( !eof (STDIN) ) {
$data = <STDIN>;
# remove all the vt100 escape sequences.
# \033 is escape ^[
# data starts on line 3 and extends through line 18
################################################################
print STDERR "removing vt100 escape sequences.\n";
$data =~ s/\033\[\dm//g; # font change
$data =~ s/\033\[2J//g; # clear whole screen
$data =~ s/\033\[0[12]\;\d+H/\001/g; # movement to top of screen
$data =~ s/\033\[19\;\d+H/\002/g; # movement to bottom of screen
$data =~ s/\033\[2\d\;\d+H/\002/g; # movement to bottom of screen
$data =~ s/\033\[\d+\;002H/\n/g; # new line of data
$data =~ s/\033\[\d+\;001H/\n/g; # user prompt
$data =~ s/\033\[\d+\;\d+H/ /g; # cursor movement
$data =~ s/\007//g; # delete bells
# remove all medline headers, commands and queries to the user.
################################################################
print STDERR "removing miscellaneous junk.\n";
$data =~ s/\001SCREEN +\d+ +OF +\d+ *//g;
$data =~ s/\001MESH *//g;
$data =~ s/\001ANSWER +\d+ *//g;
$data =~ s/\002PRESS ENTER FOR NEXT SCREEN, ENTER A SCREEN NUMBER, N FOR NEXT DOCUMENT, *//g;
$data =~ s/\002 *OR A COMMAND--> *//g;
$data =~ s/[\001\002] *//g;
$data =~ s/ +\n +/\n/g; # eliminate trailing and leading spaces
$data =~ s/\n{2,}/\n/g; # crush out blank lines
$data =~ s/\n-END OF DISPLAY REQUEST-.*/\n/g;
$data =~ s/\nSEARCH +\d+.*/\n/g;
$data =~ s/\nPRESS ENTER FOR NEXT.*/\n/g;
$data =~ s/\nANSWER +\d+.*/\n/g;
$data =~ s/\nENTER TI (TITLE ONLY), S.*/\n/g;
$data =~ s/\nENTER DOCUMENT NUMBERS.*/\n/g;
$data =~ s/\nENTER SCREEN NUMBER.*/\n/g;
$data =~ s/\nENTER SEARCH TERMS.*/\n/g;
$data =~ s/\n{2,}/\n/g; # crush out blank lines
################################################################
@line = split (/\n/, $data); # split the data into lines
$data = 0;
$* = 0;
# open (FOO, ">foo");
# foreach $n (@line) { print FOO $n, "\n"; }
# close FOO;
print STDERR "extracting bibliographic data.\n";
# take each abstract and put the authors, tittle, etc. into separate arrays.
$abnum = -1;
for ($n = 0; $n <= $#line; $n ++) {
if (@line[$n] =~ s/^ *AU //) {
$abnum ++;
print STDERR $abnum, " ";
@author[$abnum] = @line[$n];
}
if (@line[$n] =~ s/^ *TI //) {
@title[$abnum] = @line[$n];
}
if (@line[$n] =~ s/^ *SO //) {
@source[$abnum] = @line[$n];
}
if (@line[$n] =~ s/^ *AB //) {
@abstract[$abnum] = @line[$n];
}
}
print STDERR "\n";
@line = (0);
print STDERR "reformatting references.\n";
for ($n = 0; $n <= $abnum; $n ++) {
print STDERR $n, " ";
# form a keyword from the author's first name and date of the article
$keyn = "???" . $n;
if (@author[$n] =~ /\w+/) {
$keyn = $&; }
if (@source[$n] =~ /19(\d\d)\b/) {
$keyn .= "-" . $1; }
# convert the author list to bibtex format
@authors = split(/ /, @author[$n]);
$authorlist = "";
foreach $name (@authors) {
$name =~ s/(\w+)-([\w\.\-]+)/\2 \1/;
$name =~ s/-/./;
$name =~ s/\b([A-Z])([A-Z]) /\1.\2. /; # add periods after initials
$authorlist .= $name . " and ";
}
$authorlist =~ s/ and $//;
$authorlist =~ s/\s*and\s+and\s*/ and /g; # why do we need this?
# split the source into journal name, volume, etc.
@so = split(/\./, @source[$n]);
# print the bibtex entry
print "\n@ARTICLE{", $keyn, ",\n";
print "\tAUTHOR = {", $authorlist, "},\n";
print "\tTITLE = {", @title[$n], "},\n";
# journal name
@so[0] =~ s/-/. /g;
@so[0] .= ".";
print "\tJOURNAL = {", @so[0], "},\n";
# volume
if (@so[2] =~ /\s*([\d]+)/) {
print "\tVOLUME = ", $1, ",\n"; }
# number
if (@so[2] =~ /\(([\d]+)\)/) {
print "\tNUMBER = ", $1, ",\n"; }
# pages
if (@so[3] =~ /P ([\d\-]+)/) {
print "\tPAGES = {", $1, "},\n"; }
if (@so[1] =~ /[a-zA-Z]{3}/) {
print "\tMONTH = ", $&, ",\n"; }
if (@so[1] =~ /19(\d\d)\b/) {
print "\tYEAR = ", $&, "\n"; }
else {
print "\tNOYEAR = 0\n"; }
# bibtex does not handle long abstracts, so we provide it as a comment.
if (length @abstract[$n]) {
print "% ", @abstract[$n], "\n"; }
print "}\n";
}
}
print STDERR "\n";
exit 0
--
Rich@Rice.edu