craigw@aecom.YU.EDU (Craig Werner) (08/31/88)
The following C program, when compiled, takes as input a DNA sequence (straight text, A C G T/U) and outputs a PostScript file, which when sent to the appropriate printer, prints out an Open Reading Frame diagram in both orientations. As an added bonus, the intermediate PostScript output is at least marginally human-readable, so one can actually pick out the open reading frames from the PostScript code for use with a program like 'subseq.' My only faux pas in this version is that the numbers for the opposite strand (reverse orientation) are with respect to the forward strand. I plan to correct that some day, but since all other programs only work with open reading frames in the forward strand, I would have to 'revcomp' it anyway. Oh, by the way, the Plasmid Description Language announced about a month ago, is available by mail for interested parties. I'll post more information in a few days. ---------cut here ---------------- /* ------------------------------------------------------------ */ /* orfps.c */ /* An Open Reading Frame --> PostScript converter */ /* copyright 1988 by Craig Werner */ /* Dept. of Microbiology and Immunology */ /* and the Medical Scientist Training Program */ /* Albert Einstein College of Medicine */ /* craigw@aecom.YU.EDU */ /* Some rights reserved */ /* ------------------------------------------------------------ */ #include <stdio.h> #include <ctype.h> #define MAXSEQ 32767 main(argc, argv) int argc; char *argv[]; { char a[MAXSEQ]; char ifname[16], ofname[16]; if (argc < 2) { fprintf(stderr, "Usage: %s in-file out-file\n", argv[0]); fprintf(stderr, "If no out-file is specified, result sent to standard output.\n"); exit(1); } strcpy (ifname, argv[1]); if (argc > 2) { strcpy (ofname, argv[2]); if (!strcmp(ifname, ofname)) { fprintf(stderr, "In-file and out-file names must be different\n"); exit(2); } } else strcpy(ofname, "\0"); /* PROGRAM REALLY BEGINS BELOW */ getseq(a, ifname); writeps(a, ofname); findstop(a, ofname); footer(ofname); } getseq(a, ifname) char *a; char *ifname; { char c; int i,j,k; FILE *ifp; if ((ifp = fopen(ifname, "r")) == NULL) { fprintf(stderr, "Unable to open file %s\n", ifname); exit (3); } for (i=0; i < MAXSEQ; i++) a[i] = 0; i=0; while ( (c=toupper(fgetc(ifp))) != EOF ) if (isalnum(c) || ispunct(c)) if (c == 'U') a[i++] = 'T'; else a[i++] = c; } findstop(a, ofname) char *a; char *ofname; { int j,f; FILE *ofp; if (strlen(ofname)) { if ((ofp = fopen(ofname, "a")) == NULL) { printf(stderr, "Unable to open file %s\n", ofname); exit(4); } } else ofp = stdout; for (f=0 ; f<3; f++) for (j = f ; a[j]; j = j+3) if ( (a[j] == 'T') ) if ( (a[j+1] == 'A') && ((a[j+2] == 'A') || (a[j+2] == 'G')) ) fprintf(ofp, "%d %d STOP\n",f+1,j); else if ( ( a[j+1] == 'G') && ( a[j+2] == 'A') ) fprintf(ofp, "%d %d STOP\n",f+1,j); for (f=3 ; f<6; f++) for (j = f ; a[j]; j = j+3) if ( a[j] == 'A' ) if ( (a[j-1] == 'T') && ((a[j-2] == 'T') || (a[j-2] == 'C')) ) fprintf(ofp, "%d %d STOP\n",2-f,j); else if ( ( a[j-1] == 'C') && ( a[j-2] == 'T') ) fprintf(ofp, "%d %d STOP\n",2-f,j); } writeps(a, ofname) char *a; char *ofname; { FILE *ofp; if (strlen(ofname)) { if ((ofp = fopen(ofname, "w")) == NULL) { printf(stderr, "Unable to open file %s\n", ofname); exit(4); } } else ofp = stdout; fprintf(ofp,"%%!\n"); fprintf(ofp, "\n"); fprintf(ofp, "/inch {72 mul} def \n"); fprintf(ofp, "\n"); fprintf(ofp, "0.5 setlinewidth\n"); fprintf(ofp, "/PS 10 def \n"); fprintf(ofp, "/Times-Roman findfont PS scalefont setfont \n"); fprintf(ofp, "\n"); fprintf(ofp, "/LM 0.5 inch def \n"); fprintf(ofp, "/RM 8.0 inch def \n"); fprintf(ofp, "/Y 9 inch def \n"); fprintf(ofp, "/dL 0.25 inch def \n"); fprintf(ofp, "/width RM LM sub %d div def\n", strlen(a)); fprintf(ofp, "\n"); fprintf(ofp, "/str 3 string def \n"); fprintf(ofp, "1 1 4 { /n exch def \n"); fprintf(ofp, " LM Y dL n mul add moveto \n"); fprintf(ofp, " RM 0 rlineto stroke \n"); fprintf(ofp, " LM Y dL n mul sub moveto \n"); fprintf(ofp, " RM 0 rlineto stroke \n"); fprintf(ofp, " } for \n"); fprintf(ofp, "1 1 3 { /n exch def \n"); fprintf(ofp, " LM PS sub Y dL n mul add dL 2 div add PS 3 div sub moveto \n"); fprintf(ofp, " n str cvs show \n"); fprintf(ofp, " LM PS PS add sub Y dL n mul sub dL 2 div sub PS 3 div sub moveto \n"); fprintf(ofp, " (\\261) show n str cvs show \n"); fprintf(ofp, " } for \n"); fprintf(ofp, " \n"); fprintf(ofp, "/STOP {\n"); fprintf(ofp, " /site exch def \n"); fprintf(ofp, " /frame exch def \n"); fprintf(ofp, " LM site width mul add Y frame dL mul add moveto \n"); fprintf(ofp, " 0 frame frame abs div dL mul rlineto stroke \n"); fprintf(ofp, "} def \n"); } footer(ofname) char *ofname; { FILE *ofp; if (strlen(ofname)) { if ((ofp = fopen(ofname, "w")) == NULL) { printf(stderr, "Unable to open file %s\n", ofname); exit(4); } } else ofp = stdout; fprintf(ofp, "%%footer\n", ofp); fprintf(ofp, "showpage\n", ofp); } /* -------------------- End of orfps.c ----------------------------- */ ----------- cut here too --------- -- --------- Craig Werner (Lognames: werner, craigw) "This is no social crisis, just another tricky day for you."