rich@Rice.edu (Carey Richard Murphey) (07/26/90)
Here's a handy utility for those who use medline (a.k.a. BRS/OnSite). The following perl program converts output from BRS/OnSite database into a LaTeX style (bibtex) bibliography. For those of you who use BRS/OnSite, you can use this to download bibliographic references by capturing the output to your vt100 compatible terminal and runing it through this filter. I'm a novice to perl, so any pointers or improvements (even on style) are welcome. Hope someone finds it useful... Rich@Rice.edu #!/usr/local/bin/perl -- # -*-Perl-*- # this perl script converts medline vt100 output into bibtex format. ################################################################ $* = 1; # treat strings as a multi-line buffers. while ( !eof (STDIN) ) { $data = <STDIN>; # remove all the vt100 escape sequences. # \033 is escape ^[ # data starts on line 3 and extends through line 18 ################################################################ print STDERR "removing vt100 escape sequences.\n"; $data =~ s/\033\[\dm//g; # font change $data =~ s/\033\[2J//g; # clear whole screen $data =~ s/\033\[0[12]\;\d+H/\001/g; # movement to top of screen $data =~ s/\033\[19\;\d+H/\002/g; # movement to bottom of screen $data =~ s/\033\[2\d\;\d+H/\002/g; # movement to bottom of screen $data =~ s/\033\[\d+\;002H/\n/g; # new line of data $data =~ s/\033\[\d+\;001H/\n/g; # user prompt $data =~ s/\033\[\d+\;\d+H/ /g; # cursor movement $data =~ s/\007//g; # delete bells # remove all medline headers, commands and queries to the user. ################################################################ print STDERR "removing miscellaneous junk.\n"; $data =~ s/\001SCREEN +\d+ +OF +\d+ *//g; $data =~ s/\001MESH *//g; $data =~ s/\001ANSWER +\d+ *//g; $data =~ s/\002PRESS ENTER FOR NEXT SCREEN, ENTER A SCREEN NUMBER, N FOR NEXT DOCUMENT, *//g; $data =~ s/\002 *OR A COMMAND--> *//g; $data =~ s/[\001\002] *//g; $data =~ s/ +\n +/\n/g; # eliminate trailing and leading spaces $data =~ s/\n{2,}/\n/g; # crush out blank lines $data =~ s/\n-END OF DISPLAY REQUEST-.*/\n/g; $data =~ s/\nSEARCH +\d+.*/\n/g; $data =~ s/\nPRESS ENTER FOR NEXT.*/\n/g; $data =~ s/\nANSWER +\d+.*/\n/g; $data =~ s/\nENTER TI (TITLE ONLY), S.*/\n/g; $data =~ s/\nENTER DOCUMENT NUMBERS.*/\n/g; $data =~ s/\nENTER SCREEN NUMBER.*/\n/g; $data =~ s/\nENTER SEARCH TERMS.*/\n/g; $data =~ s/\n{2,}/\n/g; # crush out blank lines ################################################################ @line = split (/\n/, $data); # split the data into lines $data = 0; $* = 0; # open (FOO, ">foo"); # foreach $n (@line) { print FOO $n, "\n"; } # close FOO; print STDERR "extracting bibliographic data.\n"; # take each abstract and put the authors, tittle, etc. into separate arrays. $abnum = -1; for ($n = 0; $n <= $#line; $n ++) { if (@line[$n] =~ s/^ *AU //) { $abnum ++; print STDERR $abnum, " "; @author[$abnum] = @line[$n]; } if (@line[$n] =~ s/^ *TI //) { @title[$abnum] = @line[$n]; } if (@line[$n] =~ s/^ *SO //) { @source[$abnum] = @line[$n]; } if (@line[$n] =~ s/^ *AB //) { @abstract[$abnum] = @line[$n]; } } print STDERR "\n"; @line = (0); print STDERR "reformatting references.\n"; for ($n = 0; $n <= $abnum; $n ++) { print STDERR $n, " "; # form a keyword from the author's first name and date of the article $keyn = "???" . $n; if (@author[$n] =~ /\w+/) { $keyn = $&; } if (@source[$n] =~ /19(\d\d)\b/) { $keyn .= "-" . $1; } # convert the author list to bibtex format @authors = split(/ /, @author[$n]); $authorlist = ""; foreach $name (@authors) { $name =~ s/(\w+)-([\w\.\-]+)/\2 \1/; $name =~ s/-/./; $name =~ s/\b([A-Z])([A-Z]) /\1.\2. /; # add periods after initials $authorlist .= $name . " and "; } $authorlist =~ s/ and $//; $authorlist =~ s/\s*and\s+and\s*/ and /g; # why do we need this? # split the source into journal name, volume, etc. @so = split(/\./, @source[$n]); # print the bibtex entry print "\n@ARTICLE{", $keyn, ",\n"; print "\tAUTHOR = {", $authorlist, "},\n"; print "\tTITLE = {", @title[$n], "},\n"; # journal name @so[0] =~ s/-/. /g; @so[0] .= "."; print "\tJOURNAL = {", @so[0], "},\n"; # volume if (@so[2] =~ /\s*([\d]+)/) { print "\tVOLUME = ", $1, ",\n"; } # number if (@so[2] =~ /\(([\d]+)\)/) { print "\tNUMBER = ", $1, ",\n"; } # pages if (@so[3] =~ /P ([\d\-]+)/) { print "\tPAGES = {", $1, "},\n"; } if (@so[1] =~ /[a-zA-Z]{3}/) { print "\tMONTH = ", $&, ",\n"; } if (@so[1] =~ /19(\d\d)\b/) { print "\tYEAR = ", $&, "\n"; } else { print "\tNOYEAR = 0\n"; } # bibtex does not handle long abstracts, so we provide it as a comment. if (length @abstract[$n]) { print "% ", @abstract[$n], "\n"; } print "}\n"; } } print STDERR "\n"; exit 0 -- Rich@Rice.edu