[net.sources] WordStar to nroff filter

david@varian.UUCP (03/14/84)

The following program was written several years ago here for the purpose
of moving a few large WordStar files to UNIX and nroff; it was used once
and the output fixed up by hand, and all further changes to those files
were done on UNIX.  The programmer who wrote it is no longer at Varian,
but I am in touch with him, and he gave his OK for posting to the net;
however, he didn't want his name on it, as it is not a finished product.
Folks are welcome to use it as a starting base, and if anyone improves it,
please send me the changes, and I'll see that the author gets them as well.

I believe that there is very little, if any change, in WordStar format
from Version 2.0 to the current 3.3; we actually use Version 3.0.

	David Brown	 (415) 945-2199
	Varian Instruments 2700 Mitchell Dr.  Walnut Creek, Ca. 94598
	{ihnp4,tektronix,hplabs,sytek,dual}!zehntel!varian!david
	{amd70,fortune}!varian!david
	...!decvax!sytek!zehntel!varian!david
	...!ucbvax!menlo70!sytek!zehntel!varian!david

/*
	Program:	ws_nroff		Revised:	01-Sep-81

	Purpose:  This program accepts a text file including formatting in-
		formation for "Wordstar", and transforms it to a text file
		including similar formatting information for "nroff".

	Input:	"Wordstar" V2.0 document file via standard input
		(Wordstar is a word processor which runs under CP/M.)

	Output:	"nroff" source file via standard output
		(nroff is a text processor which runs under UNIX V7.)

	WARNING!!!
	This program is incomplete - many WordStar functions are not
	handled but just translated to comments - see the code below.
	In addition, there are problems with un-hyphenating hyphenated
	words and with indentation. There may be a problem with some
	cases of underlining. Output generated from this program
	should be inspected and corrected before feeding to nroff.

	Author: the author wishes to remain anonymous due to the incompleteness
	of the program. However, he would appreciate seeing additions and
	fixes; if you improve this program, please send changes to David
	Brown at Varian: 
	USENET: {zehntel,fortune,adm70}!varian!david
	and I will forward them.
*/
#include	<stdio.h>
#include	<ctype.h>	/*  for "islower", "toupper"		*/
#define	capital(c)	((islower(c))? toupper(c) : (c))

#define	ON	1
#define	OFF	0
#define	LINLIM	(0x100-2)	/*  maximum input line length		*/
#define CHRMASK	'\177'		/*  for stripping flag bit		*/

#define	CR	'\015'		/*  carriage return character		*/
#define SOFTCR	'\215'		/*  discardable carriage return		*/
#define LF	'\012'		/*  line feed (newline) character	*/
#define SOFTLF	'\212'		/*  soft line feed character		*/
#define FF	'\014'		/*  form feed character			*/

#define SOFTSP	'\240'		/*  soft space character		*/
#define	SOFTHPH	'\036'		/*  ^^ is soft hyphen character		*/
#define	SOFTHPH2 '\037'		/*  ^_ is soft hyphen character		*/
#define	BOLDFCE	'\002'		/*  ^B is boldface toggle character	*/
#define	DSTRIKE	'\004'		/*  ^D is doublestrike toggle character	*/
#define	UNDERLN	'\023'		/*  ^S is underline toggle character	*/
#define CPMEOF	'\032'		/*  ^Z is WORDSTAR end-of-file char	*/
/*
	    reference numbers for WORDSTAR "dot" commands
		"MP" denotes "merge-print" commands
*/
#define	AV	('A'<<8) | 'V'	/*  ask for variable value	MP	*/
#define	BP	('B'<<8) | 'P'	/*  bidirectional print			*/
#define	CP	('C'<<8) | 'P'	/*  conditional page			*/
#define	CS	('C'<<8) | 'S'	/*  clear screen		MP	*/
#define	CW	('C'<<8) | 'W'	/*  character width			*/
#define	DF	('D'<<8) | 'F'	/*  data file			MP	*/
#define	DM	('D'<<8) | 'M'	/*  display message		MP	*/
#define	FI	('F'<<8) | 'I'	/*  file insert			MP	*/
#define	FM	('F'<<8) | 'M'	/*  footing margin			*/
#define	FO	('F'<<8) | 'O'	/*  footing				*/
#define	HE	('H'<<8) | 'E'	/*  heading				*/
#define	HM	('H'<<8) | 'M'	/*  heading margin			*/
#define	IG	('I'<<8) | 'G'	/*  ignore (unprinted comment)		*/
#define	DOT	('.'<<8) | ' '	/*  ignore (unprinted comment)		*/
#define	IJ	('I'<<8) | 'J'	/*  interpret input as justified  MP	*/
#define	LH	('L'<<8) | 'H'	/*  line height				*/
#define	LM	('L'<<8) | 'M'	/*  left margin			MP	*/
#define	LS	('L'<<8) | 'S'	/*  line spacing		MP	*/
#define	MB	('M'<<8) | 'B'	/*  margin at bottom			*/
#define	MT	('M'<<8) | 'T'	/*  margin at top			*/
#define	OJ	('O'<<8) | 'J'	/*  output justification	MP	*/
#define	OP	('O'<<8) | 'P'	/*  omit page number			*/
#define	PA	('P'<<8) | 'A'	/*  new page				*/
#define	PC	('P'<<8) | 'C'	/*  page number column			*/
#define	PF	('P'<<8) | 'F'	/*  print-time line forming	MP	*/
#define	PL	('P'<<8) | 'L'	/*  paper length			*/
#define	PN	('P'<<8) | 'N'	/*  page number				*/
#define	PO	('P'<<8) | 'O'	/*  page offset				*/
#define	RM	('R'<<8) | 'M'	/*  right margin		MP	*/
#define	RP	('R'<<8) | 'P'	/*  repeat			MP	*/
#define	RV	('R'<<8) | 'V'	/*  read variables		MP	*/
#define	SR	('S'<<8) | 'R'	/*  subscript/superscript roll		*/
#define	SV	('S'<<8) | 'V'	/*  set variable		MP	*/
#define	UJ	('U'<<8) | 'J'	/*  micro-justification on/off		*/

#define NOT_FOUND -1		/*  no-such-command error flag		*/
#define	LISTLEN	34		/*  length of command list		*/

/*  nroff font definitions						*/

#define	ROMAN	'R'
#define	BOLD	'B'
#define	ITALIC	'I'
/**/

main()
{
    int		len = LINLIM;		/*  greatest acceptable line length  */
    int		getln();
    char	line[LINLIM+2];

    printf(".pl 11i\n");		/*  default page length		*/
    printf(".po 0.8i\n");		/*  default page offset		*/
    printf(".ll 6.5i\n");		/*  default line length		*/
    printf(".pc #\n");			/*  interpret "#" as page no.	*/

    for (len = LINLIM; len = getln(line, len); len = LINLIM)
	putln (line, len);
}					/*  end of "main"  */
/**/

getln(line, maxlen)
/*
    Fill buffer with available characters until end-of-line or end-of-page or
    end-of-file, or until buffer full.
*/
char	line[];
int	maxlen;
{
    register	i = 0;
    char	chr = 0;

    i = 0;
    chr = 0;
    do
    {
	line[i++] = chr = getchar();
    }
    while (chr != LF && chr != SOFTLF && chr != FF && chr != EOF && i < maxlen);

    if (chr == EOF) i--;		/*  discard EOF character	*/
    line[i] = '\0';
    if (i >= maxlen && chr != LF && chr != SOFTLF && chr != FF)
	fprintf (stderr, "getln:  line too long, %x hex\n", i);

    return (i);
}					/*  end of "getln"  */
/**/

putln(line, len)

char	line[];
int	len;				/*  line length up to terminal null  */
{
    static int	vertsp = 0,
		indent = 0,
		softflag =	OFF,
		underline =	OFF,
		boldface =	OFF,
		doublestrike =	OFF;
    static char	font =		ROMAN,
		carryover[LINLIM+2] = 0;

    int		tmp = 0;
    register	i;

    /*		Special actions for beginning of line			*/

    /*		skip control characters					*/

    for (i=0; line[i]==BOLDFCE || line[i] == DSTRIKE || line[i] == UNDERLN; i++)
	;

    /*		squeeze spaces out					*/

    for (tmp = i; line[tmp] == SOFTSP || line[tmp] == ' '; tmp++)
	;
    strcpy (line+i, line+tmp);
    if (indent != tmp-i)
    {
	indent = tmp - i;		/*  set new indentation level	*/
	printf ("'in %d\n", indent);
    }

    /*		check for special lines					*/

    switch (line[i] & CHRMASK)
    {
    case CR:					/*  blank line		    */
	if (line[i] == CR && line[i+1] == LF)	/*  count only hard CR & LF */
	    vertsp++;
	goto endputln;			/*  discard original line	    */
	break;

    case '.':				/*  control line (first char = dot) */
	if (indent != 0)
	{
	    printvsp (vertsp);		/*  issue vertical space collected  */
	    vertsp = 0;
	    printf("\\&");		/*  dot wasn't really first char,   */
	    break;			/*  so hide it from nroff	    */
	}
	stripflags(line);
	switch (cmd(line))		/*  which control command?  */
	{
	case CP:			/*  Conditional Page break  */
	    printvsp (vertsp);		/*  issue vertical space collected  */
	    vertsp = 0;
	    if (sscanf(line+3, "%d", &tmp) == 0)	/*  null argument  */
	    {
		stdterm (line, len);
		printf (".\\\" %s", line);
	    }
	    else					/*  valid argument  */
		printf (".if (\\.h - \\nl < %u) .bp\n", tmp);

	    goto endputln;		/*  discard original line  */

	case CS:			/*  Clear Screen  */
	    goto endputln;		/*  discard original line  */

	case CW:			/*  Character Width  */
	    if (sscanf(&line[3], "%d", &tmp) == 0)	/*  null argument  */
	    {
		stdterm (line, len);
		printf (".\\\" %s", line);
	    }
	    else					/*  valid argument  */
		printf (".ps %d\n", tmp*0.6);
	    goto endputln;		/*  discard original line  */

	case DM:			/*  Display Message on tty  */
	    stdterm (line, len);
	    printf (".tm %s", &line[i]);
	    goto endputln;

	case PA:			/*  Page Advance	*/
	    printvsp (vertsp);
	    vertsp = 0;
	    printf (".bp\n");
	    goto endputln;		/*  discard original line  */

	case DF:			/*  Data File	*/
	case FI:			/*  File Insert  */
	case FO:			/*  Footing Text  */
	case FM:			/*  Footing Margin  */
	case HE:	/*  heading				*/
	case HM:	/*  heading margin			*/
	case IG:	/*  ignore (unprinted comment)		*/
	case DOT:	/*  ignore (unprinted comment)		*/
	case IJ:	/*  interpret input as justified  MP	*/
	case LH:	/*  line height				*/
	case LM:	/*  left margin			MP	*/
	case LS:	/*  line spacing		MP	*/
	case MB:	/*  margin at bottom			*/
	case MT:	/*  margin at top			*/
	case OJ:	/*  output justification	MP	*/
	case OP:	/*  omit page number			*/
	case PC:	/*  page number column			*/
	case PF:	/*  print-time line forming	MP	*/
	case PL:	/*  paper length			*/
	case PN:	/*  page number				*/
	case PO:	/*  page offset				*/
	case RM:	/*  right margin		MP	*/
	case RP:	/*  repeat			MP	*/
	case RV:	/*  read variables		MP	*/
	case SR:	/*  subscript/superscript roll		*/
	case SV:	/*  set variable		MP	*/
	case UJ:	/*  micro-justification on/off		*/
	case NOT_FOUND:	/*  ".??" is unknown control command	*/
	default:
	    stdterm (line, len);
	    printf (".\\\" %s", line);	/*  print as comment  */
	    goto endputln;
	}				/*  end of "dot command" cases  */
	break;

    default:				/*  begining of normal text line  */
	break;
    }					/*  end of first character cases  */

    /*
	First character checks completed; prepare to check the rest of the line.
    */
    printvsp (vertsp);			/*  issue accumulated vertical space  */
    vertsp = 0;
    printf ("%s", carryover);		/*  first half of hyphenated word */
    carryover[0] = '\0';
    softflag = OFF;

    /*
	Massage the non-blank line
    */

    for (i=0; line[i]; i++)
    {
	switch (line[i] & CHRMASK)
	{
	case ' ':
	    if (softflag == ON && line[i] == SOFTSP)
	    {
		tmp = i;
		while (line[++i] == SOFTSP)	/*  discard soft spaces    */
		    ;
		strcpy (line+tmp, line+i);
		i = tmp-1;
	    }
	    break;

	case SOFTHPH:
	/*
	    This soft hyphen never ends a line.  Evidently it is left
	    over from a time when the word was broken at this point.
	    The soft hyphen is simply discarded.
	*/
	    strcpy (line+i, line+i+1);
	    i--;
	    break;

	case SOFTHPH2:
	/*
	    This soft hyphen always ends a line.  The first part of the
	    word must be saved to be installed after any leading blanks
	    on the next line.
	*/
	    line[i] = '\0';
	    do
		i--;
	    while (line[i] != ' ' && line[i] != '\t' && i > 0);
	    strcpy (carryover, line+i+1);
	    stdterm (line, i);
	    break;

	case CR:
	    if (line[i] == CR && line[i+1] == LF)
		vertsp++;
	    stdterm (line, i+1);
	    break;

	case UNDERLN:			/*  underline toggle		   */
	/*
	    Reverse the state of the underline switch.
	    Change to or from underline (italic) font, depending on present
	    state of underline switch.
	*/
	    underline = (underline) ? OFF : ON;
	    if (underline == ON) font = ITALIC;
	    else if (boldface == ON) font = BOLD;
	    else if (doublestrike == ON) font = BOLD;
	    else font = ROMAN;
	    /*
		Move first part of line out, inserting font setting.
	    */
	    line[i++] = 0;
	    stripflags (line);
	    printf ("%s\\f%c", line, font);
	    strcpy (line, line+i);
	    i = -1;
	    break;

	case BOLDFCE:			/*  boldface toggle  */
	/*
	    Reverse the state of the boldface switch.
	    Change to or from boldface font, depending on present state of
	    underline switch and boldface switch.
	*/
	    boldface = (boldface) ? OFF : ON;
	    if (underline == OFF && doublestrike == OFF)
	    {
		if (boldface == ON) font = BOLD;
		else if (doublestrike == ON) font = BOLD;
		else font = ROMAN;
		/*
		    Move first part of line out, inserting font setting.
		*/
		line[i++] = 0;
		stripflags (line);
		printf ("%s\\f%c", line, font);
		strcpy (line, line+i);
		i = -1;
	    }
	    else				/*  just delete control char  */
	    {
		strcpy (line+i, line+i+1);
		i--;
	    }
	    break;

	case DSTRIKE:			/*  doublestrike toggle  */
	/*
	    Reverse the state of the doublestrike switch.
	    Change to or from boldface font, depending on present state of
	    underline switch, boldface switch, and doublestrike switch.
	*/
	    doublestrike = (doublestrike) ? OFF : ON;
	    if (underline == OFF && boldface == OFF)
	    {
		if (doublestrike == ON) font = BOLD;
		else font = ROMAN;
		/*
		    Move first part of line out, inserting font setting.
		*/
		line[i++] = 0;
		stripflags (line);
		printf ("%s\\f%c", line, font);
		strcpy (line, line+i);
		i = -1;
	    }
	    else			/*  just delete control char	      */
	    {
		strcpy (line+i, line+i+1);
		i--;
	    }
	    break;

	case FF:			/*  page break			      */
	    stdterm (line, i);		/*  terminate line, dropping FF char  */
	    printf ("%s", line);	/*  print the line		      */
	    strcpy (line, ".bp\n");	/*  issue page-break command	      */
	    break;

	case CPMEOF:			/*  WORDSTAR end of file character    */
	case EOF:
	    line[i--] = 0;		/*  decrement index anticipating  */
	    break;			/*  automatic increment		  */

	default:			/*  just any old character	  */
	    break;
	}				/*  end of character switch	  */

	softflag = (line[i] & ~CHRMASK) ? ON : OFF;	/*  copy flag	  */
	line[i] &= CHRMASK;		/*  strip flag bit		  */

    }					/*  end of line massager	  */
    if (pagepos == 0)
	 if (topmargin >= 1)
	     for (i = topmargin; i; i--)
	     {
		if (i-headmargin == 1) printf (".tl '%s'\n", header);
		else putchar('\n');
		pagepos++;
	    }
    printf ("%s", line);
    pagepos = (++pagepos)%pagelen;
    endputln:;
}					/*  end of "putln"		  */
/**/

cmd(line)
/*
	See if first two characters following "." in line form a WORDSTAR
	"dot" command, and return the command identifier or "not found" flag.
*/
char	line[];
{
    unsigned		lowstop, highstop, target;
    register		position;
    static unsigned	dotcmd[LISTLEN]=
    {
    /*	Integers are  composed of characters in command.	*/
	AV,
	BP,
	CP,
	CS,
	CW,
	DF,
	DM,
	FI,
	FM,
	FO,
	HE,
	HM,
	IG,
	DOT,
	IJ,
	LH,
	LM,
	LS,
	MB,
	MT,
	OJ,
	OP,
	PA,
	PC,
	PF,
	PL,
	PN,
	PO,
	RM,
	RP,
	RV,
	SR,
	SV,
	UJ
    };
    lowstop = 0;
    highstop = LISTLEN-1;
    position = 0;
    
    if (line[0] == '.')			/*  make sure dot is present  */
    {
	target = (capital(line[1]) <<8) | capital(line[2]);
	while (dotcmd[position] != target && lowstop <= highstop)
	{
	    position = (lowstop + highstop)/2;
	    if (target < dotcmd[position])
		highstop = position - 1;
	    else
	   	lowstop = position + 1;
	}
	if (dotcmd[position] == DOT)		/*  anything close qualifies  */
	    target = dotcmd[position];		/*  as a comment ("..x")      */
    }
    else					/*  no initial dot	      */
	target = NOT_FOUND;
    
    return ((dotcmd[position] == target) ? dotcmd[position] : NOT_FOUND);
}						/*	end of "cmd"	*/





printvsp(vertsp)
/*
    Insert command for accumulated vertical spaces.
*/
int	vertsp;				/*  vertical spaces accumulated  */
{
    switch (vertsp)
    {
    case 0:				/*  no solid vertical space  */
	break;

    case 1:				/*  one vertical break  */
	printf (".br\n");		/*  create control line  */
	break;

    case 2:				/*  one blank line     */
	printf (".sp\n");		/*  no arg. -- 1 blank line  */
	break;

    default:				/*  two or more blank lines  */
	printf (".sp %d\n", vertsp-1);  /*  need arg. here */
	break;
    }
}					/*  end of "printvsp"  */



stdterm(line, len)
/*
    Apply standard terminator to line.
    Len is length of line.  Terminal null is not counted.
    Linebuf must have room for 0 to 2 characters beyond current end of line.
*/
char	line[];
int	len;
{
    if (line[len-1] == LF || line[len-1] == SOFTLF)
	len--;
    if (line[len-1] == CR || line[len-1] == SOFTCR)
	len--;
    line[len++] = '\n';
    line[len] = 0;
}

stripflags(line)
/*
    Strip the flag bit from every character in the line.
*/
char	line[];
{
    register	i;

    for (i=0; line[i] &= CHRMASK; i++)
    ;
}