[mod.sources] hack to convert nroff underlines to LA50 esc. seq's

sources-request@panda.UUCP (01/08/86)

Mod.sources:  Volume 3, Issue 76
Submitted by: genrad!decvax!minow (Martin Minow)



# This is a shell archive.  Remove anything before this line, then
# unpack it by saving it in a file and typing "sh file".  (Files
# unpacked will be owned by you and have default permissions.)
#
# This archive contains:
# readme.txt ulfix.c Makefile

echo x - readme.txt
cat > "readme.txt" << '//E*O*F readme.txt//'
ulfix is a public-domain program that converts nroff-style underlines
and superscripts to the escape sequences understood by Dec LA50
and VT200 series terminals.  It is a quick hack and shouldn't
be taken seriously.

Martin Minow
decvax!minow
//E*O*F readme.txt//

echo x - ulfix.c
cat > "ulfix.c" << '//E*O*F ulfix.c//'
/*
 * Copy stdin to stdout, converting sequences of _<BS>L... to
 * an escape sequence appropriate for display on a VTxx terminal
 * or LAxx printer.  Also hack nroff superscripts.
 */

/*)BUILD
 */

#ifdef DOCUMENTATION

Title	ulfix	Fix Overstrike Underlining
Index	ulfix	Fix overstrike underlining

Usage

	ulfix [-e] [-s] [-n] [input [output]]

Description

	Copy input to output, replacing overstrike underlining
	(such as generated by the Unix nroff formatter) by escape
	sequences appropriate for VT100 (family) terminals
	or LA50 printers.

	The -e option generates escape sequences in their
	eight-bit form.  This is the default on P/OS.

	The -s option generates escape sequences in the
	seven-bit form.  This may not work on P/OS.

	The -n option supresses conversion of superscripts to their
	their Multinational form.  This is needed for display on
	terminals, such as the VT100, which support underscore
	attributes but not Multinational.

	Note: the program assumes that Dec Multinational is mapped
	into the right-half of the 8-bit code matrix (this is normal
	for VT200 family terminals).

	Missing arguments are replaced by stdin and stdout. An argument
	of '-' may be used for explicit specification of stdin or stdout:

	    ulfix - foo.out

	reads stdin, writing all output to foo.out.

	The reverse linefeed and superscript escape sequences generated
	by nroff (<ESC>7, <ESC>8 and <ESC>9) are generally removed.
	The sequence <ESC>8 followed by a sequence of digits from
	the set "123oa." followed by <ESC>9 are replaced by appropriate
	superscripted characters in the Dec Multinational (Latin 1) set.

Author

	Martin Minow

Bugs

	Ulfix should be more intelligent about escape sequences.
	Note that it somewhat duplicates functionality in the Unix col,
	colcrt, and more programs.

#endif

#include	<stdio.h>
#ifdef vms
#include	<ssdef.h>
#include	<stsdef.h>
#include	<errno.h>
#define	IO_SUCCESS	SS$_NORMAL
#define	IO_ERROR	errno
#endif
#ifndef	IO_SUCCESS
#define	IO_SUCCESS	0
#define	IO_ERROR	1
#endif
#define	FALSE	0
#define	TRUE	1
#ifndef	EOS
#define	EOS	'\0'
#endif
#define	ESC	'\033'
#ifdef decus
extern int	$$pos;			/* TRUE on P/OS			*/
#endif

char		line[513];
char		work[513];
int		eightbit = 0;		/* Assume seven bit		*/
int		nosuper = FALSE;
/*
 * super[] is a list of characters that have Multinational superscripts
 * and replace[] is the equivalent Multinational character.
 */
char		super[] =   {  '1',  '2',  '3',  'o',  'a',  '.',  EOS	};
char		replace[] = { 0x39, 0x32, 0x33, 0x3A, 0x2A, 0x37	};

char		*csi[] = {
	"\033[", "\233"
};

extern char	*strchr();

main(argc, argv)
int		argc;
char		*argv[];
{
	register char	*ap;

#ifdef vms
	argc = getredirection(argc, argv);
#endif
#ifdef decus
	if ($$pos)
	    eightbit = 1;
#endif
	while (argc > 1
	    && argv[1][0] == '-'
	    && argv[1][1] != EOS) {
	    for (ap = &argv[1][1]; *ap != EOS; ap++) {
		switch (*ap) {
		case 'e':
		case 'E':
		    eightbit = 1;
		    break;

		case 'n':
		case 'N':
		    nosuper = TRUE;
		    break;

		case 's':
		case 'S':
		    eightbit = 0;
		    break;

		default:
		    fprintf(stderr, "Unknown option '%c' ignored.\n", *ap);
		    break;
		}
	    }
	    argc--;
	    argv++;
	}
	if (argc > 1) {
	    if (strcmp(argv[1], "-") != 0) {
		if (freopen(argv[1], "r", stdin) == NULL) {
		    perror(argv[1]);
		    exit(IO_ERROR);
		}
	    }
	    argc--;
	    argv++;
	}
	if (argc > 1) {
	    if (strcmp(argv[1], "-") != 0) {
#ifdef vms
		if (freopen(argv[1], "w", stdout,
			"rfm=var", "rat=cr") == NULL) {
#else
		if (freopen(argv[1], "w", stdout) == NULL) {
#endif
		    perror(argv[1]);
		    exit(IO_ERROR);
		}
	    }
	    argc--;
	    argv++;
	}
	while (fgets(line, sizeof line, stdin) != NULL) {
	    if (!nosuper)
		mapsuperscript();
	    eatsuperscript();
	    eatoverstrike();
	    fputs(line, stdout);
	}
	fclose(stdin);
	fclose(stdout);
}

mapsuperscript()
/*
 * Convert <ESC>81<ESC>9 to Multinational superscript
 */
{
	register char	*lp;
	register char	*ep;
	register char	*wp;
	char		*xp;
	int		i;		/* Workaround Decus C bug	*/

	for (lp = line; (lp = strchr(lp, ESC)) != NULL;) {
	    if (lp[1] != '8')
		lp++;
	    else {
		wp = work;
		for (ep = lp + 2; *ep != EOS && *ep != ESC; ep++) {
		    /*
		     * Check for underscore in superscript or
		     * one of the superscripted Multinationals.
		     * If it's something else, drop back to
		     * ASCII_G for want of any better ideas.
		     */
		    if (*ep == '_' && ep[1] == '\b') {
			*wp++ = *ep++;		/* Catch you later	*/
			*wp++ = *ep;		/* You too		*/
		    }
		    else if ((xp = strchr(super, *ep)) != NULL) {
			i = xp - &super[0];	/* Don't optimize this	*/
			i = replace[i];		/* -- Decus C bug --	*/
			if (eightbit)
			    *wp++ = i | 0x80;
			else {
			    *wp++ = ESC;	/* SS2, shift to Multi	*/
			    *wp++ = 'N';	/* for one character;	*/
			    *wp++ = i;		/* and here it is.	*/
			}
		    }
		    else {			/* Garbage		*/
			*wp++ = *ep;
		    }
		}
		if (*ep == ESC && ep[1] == '9')
		    ep += 2;
		strcpy(wp, ep);
		strcpy(lp, work);
		lp += (wp - work);
	    }
	}
}

eatsuperscript()
/*
 * Eat superscript escape sequences
 */
{
	register char	*lp;

	for (lp = line; (lp = strchr(lp, ESC)) != NULL;) {
	    switch (lp[1]) {
	    case '7':
	    case '8':
	    case '9':
		strcpy(lp, lp + 2);	/* Eat this		*/
		break;

	    default:			/* <ESC>N or stranger	*/
		lp++;			/* Skip over it		*/
		break;
	    }
	}
}

eatoverstrike()
{
	register char	*lp;
	register char	*ep;
	register char	*wp;

	for (lp = line; (lp = strchr(lp, '_')) != NULL;) {
	    if (lp[1] == '\b' && lp[2] != '\0') {
		wp = work;
		ep = lp + 2;
		if (*ep == ESC && ep[1] == 'N') {
		    *wp++ = *ep++;
		    *wp++ = *ep++;
		}
		if (*ep != EOS)
		    *wp++ = *ep++;
		while (ep[0] == '_' && ep[1] == '\b') {
		    ep += 2;
		    if (*ep == ESC && ep[1] == 'N') {
			*wp++ = *ep++;
			*wp++ = *ep++;
		    }
		    if (*ep != EOS)
			*wp++ = *ep++;
		}
		/*
		 * work..wp has the text to be underscored.
		 * lp -> start of underscored,
		 * ep -> start of text after sequence
		 * wp -> free space in work.
		 * Note:
		 * <ESC>[4m		turn  on underscore
		 * <ESC>[0m		turn off underscore
		 */
		sprintf(wp, "%s0m%s", csi[eightbit], ep);
		sprintf(lp, "%s4m%s", csi[eightbit], work);
	    }
	    else {
		lp++;		/* Random '_', skip over it	*/
	    }
	}
}
//E*O*F ulfix.c//

echo x - Makefile
cat > "Makefile" << '//E*O*F Makefile//'
# Unix makefile for ulfix
#
# The redefinition of strchr() is needed for Unix 4.2 bsd
# (and maybe some other Unices).
#
BSDDEFINE = -Dstrchr=index -Dstrrchr=rindex
#
# On certain systems, such as Unix System III, you may need to define
# $(LINTFLAGS) in the make command line to set system-specific lint flags.
#
# DEFINES collects all -D arguments for cc and lint:
#
DEFINES = $(BSDDEFINE)

CFLAGS = -O $(DEFINES)

#
# ** compile ulfix
#
SRCS = ulfix.c
OBJS = ulfix.o
ulfix: $(OBJS)
	$(CC) $(CFLAGS) $(OBJS) -o ulfix

lint:	$(SRCS)
	lint $(LINTFLAGS) $(DEFINES) $(SRCS)

#
# ** Remove unneeded files
#
clean:
	rm -f $(OBJS) ulfix

ulfix.o	:	ulfix.c

//E*O*F Makefile//

exit 0