[comp.lang.postscript] NROFF to PS conversion

silvert@dalcs.UUCP (Bill Silvert) (02/12/88)

Some time back I posted a request for an nroff to postscript converter.
I got some suggestions and was able to dig up a few useful files.  I
also got a lot of echos that others were looking for the same.  So now
that I have kludged together a working system, I decided to post it in
case anyone else wants it, or in case anyone wants to improve it.

95% of the code was written by someone else, as you can see from the
internal documentation.  It didn't work for me until I hacked it a bit,
which is why I am reposting other people's code.

System design is simple and dirty.  psr pipes its input through rmbs,
which converts all the overstrikes output by nroff into flags -- control
codes governing underlining and boldface.  Then psr converts the file
into postscript.  Note that rmbs is effectively the same as ul, except
that ul does too much interpretation and corrupts the input file.  I
don't have a source license, so I couldn't hack ul.

Apologies to the original authors if they don't like what I did to their
code, but I asked around for current versions and this is what I ended
up with.  I am grateful for the code I got, and it was well-written and
easy to hack.
----------------------- cut here ------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create:
#	psr.c
#	rmbs.c
# This archive created: Wed Feb 10 14:39:41 1988
# By:	William Silvert (Modelling/Statistics Group, BIO)
export PATH; PATH=/bin:/usr/bin:$PATH
if test -f 'psr.c'
then
	echo shar: "will not over-write existing file 'psr.c'"
else
cat << \SHAR_EOF > 'psr.c'
/*
 * psr: convert NROFF output files to PostScript
 *      missing compressed & expanded modes, auto margins and graphics.
 *
 * Copyright (c) 1986, Eric Gisin, <egisin@waterloo.CSNET>
 *      This program may be redistributed in source form,
 *      provided no fee is charged and this copyright notice is preserved.
 *
 * Converted to NROFF filter by W. Silvert, Nov. 1987.
 *	There is a pipe for NROFF output through rmbs, which works like ul(1).
 */

static char SCCSID[] = "@(#)psr.c	Ver. 1.7, 88/02/09 13:42:38";

#include <stdio.h>
#include <ctype.h>

#define HRES    60
#define VRES    (3*72)

#define setfont(F)      font |= (F)
#define clrfont(F)      font &= ~(F)
#define tstfont(F)      (font&(F))
#define BOLD	1
#define ITALIC	2
#define PITCH12 0x04
#define UNDLINE 0x08

#define FILTER	"col|rmbs"

char *  Head [] = {
        "%!PS-Adobe-1.0",
        "%%Creator: E. Gisin & W. Silvert",
        "%%For: NROFF -> PS processing",
        "%%DocumentFonts: Courier Courier-Bold Courier-Oblique Courier-BoldObli
que",
        "%%Pages: (atend)",
        "",
        "/Hscale 72 60 div def",
        "/Vscale 72 -216 div def",
        "/Vlength 72 11 mul def",
        "/Xlate {Hscale mul exch Vscale mul Vlength add} def",
        "/M {Xlate moveto} def",
        "/F {",
        "       /f exch def",
        "       [/Courier /Courier-Bold /Courier-Oblique /Courier-BoldOblique]"
,
        "       f 16#3 and get findfont",
        "       f 16#4 and 0 eq {12} {10} ifelse",
        "       f 16#10 and 0 ne {2 sub} if",
        "       scalefont setfont",
        "       f 16#10 and 0 ne {0 f 16#20 and 0 ne {4} {-2} ifelse rmoveto} i
f",
        "} def",
        "/S {show} def",
        "/P+ {/Save save def} def",
        "/P- {gsave showpage grestore Save restore} def",
        "/G {pop pop} def",     /* Graphics, not implemented */
        "20 0 translate",	/* indent a bit over 3 characters */
        NULL};

int     page = 0;
int     landscape = 0;
char    hexchar [] = "0123456789ABCDEF";
char	*progname, *filename;

main(argc, argv)
        char ** argv;
{
        int     i;
        int     nofiles = 1;
	FILE	*popen(), *f;
	char	bufferin[256];

	progname = argv[0];
        for (i = 0; Head[i]!=NULL; i ++)
                printf("%s\n", Head[i]);
        for (i = 1; i < argc; i ++) {
	    filename = argv[i];
            if (filename[0]=='-')
                switch (filename[1]) {
                  case 'w':
                  case 'l':
                        printf("-20 0 translate "); /* reverse default */
                        printf("[0 -.727 0.727 0 3 737] concat\n");
        		printf("/Vlength 72 8.5 mul def\n");
			landscape++;
                        break;
                  default:
                        fprintf(stderr, "Usage: %s [-w] file ...\n", progname);
                        exit(1);
                }
            else {
                nofiles = 0;
		sprintf(bufferin, "cat %s | %s", filename, FILTER);
                f = popen(bufferin, "r");
                if (f==NULL) {
                        fprintf(stderr, "%s: Can't open input %s\n",
					progname, filename);
                        continue;
                }
                psr(f);
                pclose(f);
            }
	}
        if (nofiles) {
                f = popen(FILTER, "r");
                if (f==NULL) {
                        fprintf(stderr, "%s: Can't open pipe\n", progname);
			exit(1);
                }
                psr(f);
                pclose(f);
	}
        printf("%%%%Trailer\n%%%%Pages: %d\n\004", page);
	exit(0);
}

psr(f)
        register FILE * f;
{
        register c;
        register i;
        int     started = 0;    /* any output on page? */
        int     font = 0, lastfont;
        int     hpos = 0, hlast, hspace = HRES/10, htab = HRES*8/10;
        int     hbeg = 0, hend = HRES*8;
        int     vpos = 0, vlast, vspace = VRES/6, vtab = 0;
        int     vlength = VRES*11, vmarg = 0;
        int     charset = 0;

	if(landscape)
		vlength = (VRES*17)/2;
        do switch((c = getc(f))) {
	  case '\033':			/* escape code */
		switch((c=getc(f))) {
		case EOF:
                	vpos = vlength;
                	goto CheckPage;
		default:
			fprintf(stderr,"Illegal ESC-%c combination\n",c);
			break;
		}
		break;
          case EOF:
                vpos = vlength;
                goto CheckPage;
          case 0x00:    /* tab terminator ? */
                break;
          case 0x08:    /* backspace */
                hpos -= hspace;
                break;
          case 0x09:    /* Hor tab */
                if (!htab)
                        break;
                for (i = hbeg; i<=hpos; i += htab)
                        ;
                hpos = i;
                break;
          case 0x0A:    /* line feed */
                hpos = hbeg;
                vpos += vspace;
                goto CheckPage;
          case 0x0B:    /* vert tab */
                if (!vtab)
                        break;
                for (i = 0; i<=vpos; i += vtab)
                        ;
                vpos = i;
                break;
          case 0x0C:    /* form feed */
                vpos = vlength;

CheckPage:
                if (vpos<vlength)
                        break;
                /* output page end */
                vpos %= vlength;
                if (started)
                        fprintf(stdout, "P-\n");
                started = 0;
                break;
          case 0x0D:    /* return */
                hpos = hbeg;
                break;
          case '\016':	/* so */
		setfont(BOLD);
                break;
          case '\017':	/* se */
		clrfont(BOLD);
                break;
	  case '\034':		/* normally used for superscripts */
                vpos -= vspace/3;
                break;
	  case '\035':		/* normally used for subscripts */
                vpos += vspace/3;
                break;
          case '\036':	/* us */
		setfont(ITALIC);
                break;
          case '\037':	/* ue */
		clrfont(ITALIC);
                break;
          case ' ':
                if (tstfont(UNDLINE))
                        goto Printing;
                hpos += hspace;
                break;
          default:
                if (!isprint(c&0x7F))
                        break;

Printing:
                /* output page begin */
                if (!started) {
                        fprintf(stdout, "%%%%Page: ? %d\nP+\n", ++page);
                        started = 1;
                        lastfont = -1;
                        hlast = -1;
                }
                /* output position */
                if (vpos!=vlast || hpos!=hlast) {
                        fprintf(stdout, "%d %d M ", vpos, hpos);
                        vlast = vpos; hlast = hpos;
                }
                /* output font information */
                if (font!=lastfont) {
                        fprintf(stdout, "%d F ", font);
                        lastfont = font;
                }
                /* output text */
                putc('(', stdout);
                while (c==' ' || isprint(c&0x7F)) {
                        if (c=='\\' || c=='(' || c==')')
                                putc('\\', stdout);
                        putc(c, stdout);
                        hpos += hspace;
                        c = getc(f);
                }
                fputs(")S\n", stdout);
                ungetc(c, f);
                /* underline text */
                if (tstfont(UNDLINE)) {
                        i = hpos;
                        hpos = hlast;
                        fprintf(stdout, "%d %d M ", vpos, hpos);
                        fprintf(stdout, "%d F ", font&PITCH12);
                        lastfont = font&PITCH12;
                        putc('(', stdout);
                        while (hpos<i) {
                                putc('_', stdout);
                                hpos += hspace;
                        }
                        putc(')', stdout);
                        fputs("S\n", stdout);
                }
                hlast = hpos;
                break;
        } while (!feof(f));
}
SHAR_EOF
fi
if test -f 'rmbs.c'
then
	echo shar: "will not over-write existing file 'rmbs.c'"
else
cat << \SHAR_EOF > 'rmbs.c'
/*	rmbs.c	(V.1.0)
 *
 *	Rmbs converts the output of text formatters like proff or nroff
 *	(not tested) into a form that is less harmful to the mechanics
 *	of a modern matrix printer. These formatters highlight text by a
 *	technique called overstriking. Rmbs tries to detect the character
 *	sequences that are output for bold, bold and underlined and 
 *	underlined text and converts them to the corresponding control
 *	codes of the printer.
 *
 *	Proff outputs the following sequences:
 *	Bold:			'char' '\b' 'char'
 *	Underlined:		'_' '\b' 'char'
 *	Bold & Underlined:	'_' '\b' '_' '\b' 'char' '\b' 'char'
 *	This implies an ambiguity that cannot be easily resolved: What
 *	does '_' '\b' '_' stand for? Is it a bold or an underlined '_'?
 *	For ease of coding I've decided, that it's a bold '_'.
 *
 *	Because the printer control codes are hardwired into the program, 
 *	you must build a different version of the program for every printer
 *	you have. As most people (including me) have only one printer, I
 *	didn't want to bother with a suitable data base of printer codes.
 *
 *	Although there's a high probability, that there's no true original
 *	idea in this program, you should give me credit, if you debug,
 *	modify, enhance, or give it away. Lightning shall strike your
 *	computer(s), if you sell this program or any close derivatives
 *	for profit.
 *
 *	Michael Doerr	(uucp: ...!seismo!unido!uklirb!mdoerr)
 *	University of Kaiserslautern (West Germany)
 */

/* This is the only printer specific part of the program. Change the
 * control codes according to your printer manual.
 * These codes are generic ones for driving a further filter:
 */
#define	BOLD_ON		"\016"
#define	BOLD_OFF	"\017"
#define	ULIN_ON		"\036"
#define	ULIN_OFF	"\037"


#include <stdio.h>

/* character attributes */
#define	NONE		0
#define	BOLD		1
#define	ULIN		2

typedef	int	WORD;
typedef	char	BYTE;
typedef	union	{
    WORD	w;
    struct	{
	BYTE    ch;
	BYTE    attrib;
    } s;
} elem;

#define	line_len	300
elem	line[line_len];

main(argc, argv)
int	argc;
char	**argv;
{
    int     i, high;
    BYTE    ch;

    for (i = 0; i < line_len;)
	line[i++].w = 0;
    i = high = 0;

    while ((ch = getchar()) != EOF)
    {
	if (ch == '\n')
	{
	    print_line(high);
	    i = high = 0;
	    continue;
	}
	else if (ch == '\b')
	    i--;
	else if (line[i].s.ch == '\0')
	    line[i++].s.ch = ch;
	else if (line[i].s.ch == ch)
	    line[i++].s.attrib |= BOLD;
	else if (line[i].s.ch == '_')
	{
	    line[i].s.ch = ch;
	    line[i++].s.attrib |= ULIN;
	}
	else if (ch == '_')
	    line[i++].s.attrib |= ULIN;
	if (i < 0)
	    i = 0;
	else if (high < i)
	    high = i;
	if (i == line_len)
	    i = line_len-1;
    } /* while */
    if (high > 0)
	print_line(high);
} /* main */

print_line(high)
int	high;
{
    int     i;
    BYTE    new, old = NONE;

    for (i = 0; i < high;)
    {
	if (old != (new = line[i].s.attrib))
	{
	    change_state(old, new);
	    old = new;
	}
	putchar(line[i].s.ch);
	line[i++].w = 0;
    }
    if (old != NONE)
	change_state(old, NONE);
    putchar('\n');
} /* print_line */

#define	putstr(str)	char *s = str; s--; while(*++s) putchar(*s)

change_state(old, new)
BYTE	old, new;
{
    if ((old ^ new) & BOLD)
    {
	if (new & BOLD)
	{   putstr(BOLD_ON);	}
	else
	{   putstr(BOLD_OFF);	}
    }
    if ((old ^ new) & ULIN)
    {
	if (new & ULIN)
	{   putstr(ULIN_ON);	}
	else
	{   putstr(ULIN_OFF);	}
    }
}

SHAR_EOF
fi
exit 0
#	End of shell archive

-- 
Bill Silvert, Modelling/Statistics Group, Biological Sciences Branch
Bedford Institute of Oceanography, Dartmouth, NS, Canada B2Y 4A2
	UUCP: ...!{uunet,utai,watmath}!dalcs!biomel!bill
	CDN or BITNET: biomel@cs.dal.cdn