[sci.bio] Open Reading Frame -> PostScript filter

craigw@aecom.YU.EDU (Craig Werner) (08/31/88)

The following C program, when compiled, takes as input a DNA sequence
(straight text, A C G T/U) and outputs a PostScript file, which when sent
to the appropriate printer, prints out an Open Reading Frame diagram in
both orientations. 
	As an added bonus, the intermediate PostScript output is at least
marginally human-readable, so one can actually pick out the open reading
frames from the PostScript code for use with a program like 'subseq.'
My only faux pas in this version is that the numbers for the opposite
strand (reverse orientation) are with respect to the forward strand.  I plan
to correct that some day, but since all other programs only work with
open reading frames in the forward strand, I would have to 'revcomp' it
anyway.

	Oh, by the way, the Plasmid Description Language announced about
a month ago, is available by mail for interested parties.  I'll post
more information in a few days.

---------cut here ----------------
/* ------------------------------------------------------------ */
/* orfps.c							*/
/* An Open Reading Frame --> PostScript converter		*/
/* copyright 1988 by Craig Werner				*/
/* 			Dept. of Microbiology and Immunology 	*/
/*		and the Medical Scientist Training Program	*/
/* 			Albert Einstein College of Medicine	*/
/*			craigw@aecom.YU.EDU 			*/
/* Some rights reserved						*/
/* ------------------------------------------------------------ */

#include <stdio.h>
#include <ctype.h>
#define		MAXSEQ	32767

main(argc, argv)
int	argc;
char	*argv[];
{
	char a[MAXSEQ];
	char ifname[16], ofname[16];
	if (argc < 2) {
		fprintf(stderr, "Usage: %s in-file out-file\n", argv[0]);
		fprintf(stderr, "If no out-file is specified, result sent to standard output.\n");
		exit(1);
	}
	strcpy (ifname, argv[1]);
	if (argc > 2)
		{
		strcpy (ofname, argv[2]);
		if (!strcmp(ifname, ofname)) {
			fprintf(stderr, "In-file and out-file names must be different\n");
			exit(2);
		}
	}
	else strcpy(ofname, "\0");

/* PROGRAM REALLY BEGINS BELOW */

	getseq(a, ifname);
	writeps(a, ofname);
	findstop(a, ofname);
	footer(ofname);
}


getseq(a, ifname)
char *a;
char *ifname;
{
	char c;
	int i,j,k;
	FILE *ifp;

	if ((ifp = fopen(ifname, "r")) == NULL) {
		fprintf(stderr, "Unable to open file %s\n", ifname); 
		exit (3);
	}

for (i=0; i < MAXSEQ; i++)
	a[i] = 0;
i=0;
while ( (c=toupper(fgetc(ifp))) != EOF )
	if (isalnum(c) || ispunct(c)) 
		if (c == 'U')
			a[i++] = 'T';
		else 	a[i++] = c;
}

findstop(a, ofname)
char *a;
char *ofname;
{
	int j,f;
	FILE *ofp;

if (strlen(ofname)) {
	if ((ofp = fopen(ofname, "a")) == NULL) {
	printf(stderr, "Unable to open file %s\n", ofname);
	exit(4);
	}
}
else ofp = stdout;

for (f=0 ; f<3; f++)
	for (j = f ; a[j]; j = j+3)
	if ( (a[j] == 'T') )
		if ( (a[j+1] == 'A') && ((a[j+2] == 'A') || (a[j+2] == 'G')) )
			fprintf(ofp, "%d %d STOP\n",f+1,j);
		else	
		if ( ( a[j+1] == 'G') && ( a[j+2] == 'A') )
			fprintf(ofp, "%d %d STOP\n",f+1,j);
	
for (f=3 ; f<6; f++)
	for (j = f ; a[j]; j = j+3)
	if ( a[j] == 'A' )
		if ( (a[j-1] == 'T') && ((a[j-2] == 'T') || (a[j-2] == 'C')) )
			fprintf(ofp, "%d %d STOP\n",2-f,j);
		else	
		if ( ( a[j-1] == 'C') && ( a[j-2] == 'T') )
			fprintf(ofp, "%d %d STOP\n",2-f,j);
}

writeps(a, ofname)
char *a;
char *ofname;
{
	FILE 	*ofp;

if (strlen(ofname)) {
	if ((ofp = fopen(ofname, "w")) == NULL) {
	printf(stderr, "Unable to open file %s\n", ofname);
	exit(4);
	}
}
else ofp = stdout;

	fprintf(ofp,"%%!\n");
	fprintf(ofp, "\n");

	fprintf(ofp, "/inch {72 mul} def \n");
	fprintf(ofp, "\n");

	fprintf(ofp, "0.5 setlinewidth\n");
	fprintf(ofp, "/PS 10 def \n");
	fprintf(ofp, "/Times-Roman findfont PS scalefont setfont \n");
	fprintf(ofp, "\n");

	fprintf(ofp, "/LM 0.5 inch def \n");
	fprintf(ofp, "/RM 8.0 inch def \n");
	fprintf(ofp, "/Y 9 inch def \n");
	fprintf(ofp, "/dL 0.25 inch def \n");
	fprintf(ofp, "/width RM LM sub %d div def\n", strlen(a));
	fprintf(ofp, "\n");


	fprintf(ofp, "/str 3 string def \n");
	fprintf(ofp, "1 1 4 { /n exch def \n");
	fprintf(ofp, "		LM Y dL n mul add moveto \n");
	fprintf(ofp, "		RM 0 rlineto stroke \n");
	fprintf(ofp, "		LM Y dL n mul sub moveto  \n");
	fprintf(ofp, "		RM 0 rlineto stroke \n");
	fprintf(ofp, "	      } for \n");
	fprintf(ofp, "1 1 3 { /n exch def  \n");
	fprintf(ofp, "		LM PS sub 		Y dL n mul add dL 2 div add PS 3 div sub moveto \n");
	fprintf(ofp, "		n str cvs show \n");
	fprintf(ofp, "		LM PS PS add sub  	Y dL n mul sub dL 2 div sub PS 3 div sub moveto \n");
	fprintf(ofp, "		(\\261) show n str cvs show \n");
	fprintf(ofp, "		} for \n");
	fprintf(ofp, " \n");

	fprintf(ofp, "/STOP {\n");
	fprintf(ofp, " /site exch def \n");
	fprintf(ofp, " /frame exch def \n");
	fprintf(ofp, " LM site width mul add	Y frame dL mul add moveto \n");
	fprintf(ofp, " 0 frame frame abs div dL mul rlineto stroke \n");
	fprintf(ofp, "} def \n");
}

footer(ofname)
char *ofname;
{
	FILE *ofp;

if (strlen(ofname)) {
	if ((ofp = fopen(ofname, "w")) == NULL) {
	printf(stderr, "Unable to open file %s\n", ofname);
	exit(4);
	}
}
else ofp = stdout;

	fprintf(ofp, "%%footer\n", ofp);
	fprintf(ofp, "showpage\n", ofp);
}
/* -------------------- End of orfps.c ----------------------------- */
----------- cut here too ---------

-- 
---------
	Craig Werner 	(Lognames: werner, craigw)
       "This is no social crisis, just another tricky day for you."