[comp.os.vms] Untabify utility

STEINBERGER@KL.SRI.COM (Richard Steinberger) (05/17/88)

Has anyone written a good UNTABIFY utility?  It should be able to replace
tab characters with an appropriate number of spaces so the output text
looks "identical" to the input (it should not merely replace tabs with 8
spaces).  Yes, I know I could write it myself.  Yes, my version of emacs
(Unipress) will do this for me interactively.  I need a program that can
run in a command file.  Can anyone help?

-Ric Steinberger
steinberger@kl.sri.com

-------

LEICHTER@Venus.YCC.Yale.EDU ("Jerry Leichter ", LEICHTER-JERRY@CS.YALE.EDU) (05/17/88)

	Has anyone written a good UNTABIFY utility?  It should be able to
	replace tab characters with an appropriate number of spaces so the
	output text looks "identical" to the input (it should not merely
	replace tabs with 8 spaces).  Yes, I know I could write it myself.
	Yes, my version of emacs (Unipress) will do this for me interactively.
	I need a program that can run in a command file.  Can anyone help?

From the DECUS C library.  In the interests of completeness, I've included
both detab and entab.
							-- Jerry

--------------------------------detab.c-------------------------------------
/*
 *	detab <file >file
 */

/*)BUILD	$(TKBOPTIONS) = {
			TASK	= ...DET
		}
*/

#ifdef	DOCUMENTATION

title	detab	Replace tabs by blanks
index		Replace tabs by blanks

synopsis

	detab infile outfile

description

	Copies input to output, replacing sequences of tabs
	by a string of blanks (presupposing tabstops every
	8 columns).  Trailing blanks are removed.  If the file
	arguments are missing, the standard input and output
	are used.

diagnostics

	None

author

	Martin Minow

bugs

bugs

	Tabs occur every eight column only.

	On VMS, a file produced when an explicit output file spec is
	provided will be in "C format" (STREAM_LF records).  If you use
	redirection, the output will be variable-length records with
	implied carriage control - which is more easily dealt with in
	many cases.

#endif

#include <stdio.h>
#ifdef vms
#include		<ssdef.h>
#include		<stsdef.h>
#define	IO_SUCCESS	(SS$_NORMAL | STS$M_INHIB_MSG)
#define	IO_ERROR	SS$_ABORT
#endif
/*
 * Note: IO_SUCCESS and IO_ERROR are defined in the Decus C stdio.h file
 */
#ifndef	IO_SUCCESS
#define	IO_SUCCESS	0
#endif
#ifndef	IO_ERROR
#define	IO_ERROR	1
#endif
#define	FALSE	0
#define	TRUE	1
#define	EOS	0
#define	BLANK	' '
#define	TAB	'\t'
#define	NEWLINE	'\n'
#define	FALSE	0
#define	TRUE	1

char	line[513];

main(argc, argv)
char *argv[];
{
	register int	i;
	register char	*lstart;
	register char	*lend;

#ifdef vms
	argc = getredirection(argc,argv);
#endif

	for (i = 1; i < argc; i++) {
	    if (i == 1) {
		if (freopen(argv[i], "r", stdin) == NULL) {
		    perror(argv[i]);
		    exit(IO_ERROR);
		}
	    }
	    else {
		if (freopen(argv[i], "w", stdout) == NULL) {
		    perror(argv[i]);
		    exit(IO_ERROR);
		}
	    }
	}
	while (gets(line) != NULL) {
	    lstart = line;
	    while ((lend = strchr(lstart, TAB)) != NULL) {
		/*
		 * Found a tab.
		 */
		*lend++ = EOS;
		printf("%s", lstart);
		i = 8 - ((lend - lstart - 1) & 07);
		while (--i >= 0)
		    putchar(BLANK);
		lstart = lend;
	    }
	    printf("%s\n", lstart);
	}
}		
--------------------------------entab.c-------------------------------------
/*
 *	entab <file >file
 */

/*)BUILD	$(TKBOPTIONS) = {
			TASK	= ...ENT
		}
*/

#ifdef	DOCUMENTATION

title	entab	Replace blanks by tabs and blanks
index		Replace blanks by tabs and blanks

synopsis
	.s.nf
	entab [-t] infile outfile
	.s.f
description

	Copies input to output, replacing sequences of blanks
	and tabs by the minimum number of tabs and blanks required
	to give the same visual effect.

	Trailing blank/tabs are removed.

	<Return> overstrikes are handled.

	If -t is given, a single blank will be output as <TAB> if
	valid.  For example, if the string:

		1234567 8

	is given, the program will output a space following the '7'.
	If -t is given, the program will output a <TAB>.

diagnostics

	None

author

	Martin Minow

	(Taken from Kernighan and Plauger, Software Tools)

bugs

	Tabs occur every eight column only.

	On VMS, a file produced when an explicit output file spec is
	provided will be in "C format" (STREAM_LF records).  If you use
	redirection, the output will be variable-length records with
	implied carriage control - which is more easily dealt with in
	many cases.

#endif

#include <stdio.h>
#define	BLANK	' '
#define	TAB	'\t'
#define	RETURN	'\r'
#define	NEWLINE	'\n'
#define	FALSE	0
#define	TRUE	1

int	tflag = FALSE;

main(argc, argv)
char *argv[];
{
	register int	c;
	register int	col;
	register int	newcol;
	int		lastc;

#ifdef vms
	argc = getredirection(argc,argv);
#endif

	col = FALSE;			/* TRUE if input redirected	*/
	for (c = 1; c < argc; c++) {
	    if (argv[c][0] == '-') {
		if (tolower(argv[c][1]) == 't')
		    tflag++;
		else {
		    fprintf(stderr, "Unknown option \"%s\"\n", argv[c]);
		}
	    }
	    else {
		if (!col) {
		    freopen(argv[c], "r", stdin);
		    col++;
		}
		else {
		    freopen(argv[c], "w", stdout);
		}
	    }
	}
	c = EOF;
	col = 0;	/* Tab stops at 0, 8, 16, ...			*/
	for (;;) {
	    newcol = col;
	    for (;;) {
		lastc = c;
		switch (c = getchar()) {
		case BLANK:
		    newcol++;
		    continue;

		case TAB:
		    newcol = nexttabstop(newcol);
		    continue;

		default:
		    break;		/* Exits for loop		*/
		}
		break;			/* Neither BLANK nor TAB	*/
	    }
	    if (c == EOF) {
		if (newcol > 0) {
		    putchar(NEWLINE);
		}
		break;
	    }
	    else if (c == RETURN || c == NEWLINE) {
		col = 0;
		putchar(c);
	    }
	    else {
		/*
		 * If -t was not given and the last character was
		 * a blank and there's just one blank, don't do
		 * a tab here.
		 */
		if (tflag || lastc == TAB || (col + 1) < newcol) {
		    while (nexttabstop(col) <= newcol) {
			putchar(TAB);
			col = nexttabstop(col);
		    }
		}
		while (col < newcol) {
		    putchar(BLANK);
		    col++;
		}
		putchar(c);
		col++;
	    }
	}
}


nexttabstop(col)
register int	col;
/*
 * Return the next tab stop after col (col == 1 -> 8)
 */
{
	return (col + (8 - (col & 7)));
}

-----------------------------getredirection.c---------------------------------
/*
 * getredirection() is intended to aid in porting C programs
 * to VMS (Vax-11 C) which does not support '>' and '<'
 * I/O redirection.  With suitable modification, it may
 * useful for other portability problems as well.
 *
 * Modified, 24-Jan-86 by Jerry Leichter
 *	When creating a new output file, force the maximum record size to
 *	512; otherwise, it ends up as 0 (though the C I/O system won't write
 *	a record longer than 512 bytes anyway) which will cause problems if
 *	the file is later opened for APPEND - if the maximum record size is
 *	0, C will use the length of the longest record written to the file
 *	for its buffer!
 */

#ifdef	vms
#include	<stdio.h>
#include	<errno.h>

int
getredirection(argc, argv)
int		argc;
char		**argv;
/*
 * Process vms redirection arg's.  Exit if any error is seen.
 * If getredirection() processes an argument, it is erased
 * from the vector.  getredirection() returns a new argc value.
 *
 * Warning: do not try to simplify the code for vms.  The code
 * presupposes that getredirection() is called before any data is
 * read from stdin or written to stdout.
 *
 * Normal usage is as follows:
 *
 *	main(argc, argv)
 *	int		argc;
 *	char		*argv[];
 *	{
 *		argc = getredirection(argc, argv);
 *	}
 */
{
	register char		*ap;	/* Argument pointer	*/
	int			i;	/* argv[] index		*/
	int			j;	/* Output index		*/
	int			file;	/* File_descriptor 	*/

	for (j = i = 1; i < argc; i++) {   /* Do all arguments	*/
	    switch (*(ap = argv[i])) {
	    case '<':			/* <file		*/
		if (freopen(++ap, "r", stdin) == NULL) {
		    perror(ap);		/* Can't find file	*/
		    exit(errno);	/* Is a fatal error	*/
		}
		break;

	    case '>':			/* >file or >>file	*/
		if (*++ap == '>') {	/* >>file		*/
		    /*
		     * If the file exists, and is writable by us,
		     * call freopen to append to the file (using the
		     * file's current attributes).  Otherwise, create
		     * a new file with "vanilla" attributes as if
		     * the argument was given as ">filename".
		     * access(name, 2) is TRUE if we can write on
		     * the specified file.
		     */
		    if (access(++ap, 2) == 0) {
			if (freopen(ap, "a", stdout) != NULL)
			    break;	/* Exit case statement	*/
			perror(ap);	/* Error, can't append	*/
			exit(errno);	/* After access test	*/
		    }			/* If file accessable	*/
		}
		/*
		 * On vms, we want to create the file using "standard"
		 * record attributes.  create(...) creates the file
		 * using the caller's default protection mask and
		 * "variable length, implied carriage return"
		 * attributes. dup2() associates the file with stdout.
		 */
		if ((file = creat(ap, 0, "rat=cr", "rfm=var", "mrs=512"))
			== -1
		 || dup2(file, fileno(stdout)) == -1) {
		    perror(ap);		/* Can't create file	*/
		    exit(errno);	/* is a fatal error	*/
		}			/* If '>' creation	*/
		break;			/* Exit case test	*/

	    default:
		argv[j++] = ap;		/* Not a redirector	*/
		break;			/* Exit case test	*/
	    }
	}				/* For all arguments	*/
	argv[j] = NULL;			/* Terminate argv[]	*/
	return (j);			/* Return new argc	*/
}
#else
getredirection(argc, argv)
int		argc;
char		*argv[];
/*
 * Dummy routine.
 */
{
	return (argv[0], argc);
}
#endif

u3369429@ucsvc.unimelb.edu.au (Michael Bednarek) (05/18/88)

In article <8805170715.AA03292@ucbvax.Berkeley.EDU>, LEICHTER@Venus.YCC.Yale.EDU ("Jerry Leichter ", LEICHTER-JERRY@CS.YALE.EDU) writes:
> 
> 	Has anyone written a good UNTABIFY utility?
>       [...]
> From the DECUS C library.  In the interests of completeness, I've included
> both detab and entab.
> 							-- Jerry
> [...]

Thanks. But people should be aware of the special case of Fortran Carriage-
control files. You don't want to change the space in column one which is
followed by seven spaces into a TAB, do you?

A while ago, I published in this group DETAB and TAB_IT, command procedures
with embedded TPU code. They not only go to great lengths to handle Fortran
Carriagecontrol files, they also handle tab stops other than eight. They
do not cater for asymmetric tab stops, though.

I noticed that TPU would go into infinite loops when I tried to use TAB_IT
on large (>300 blocks) files. Does anyone out there know anything about this?
--
Michael Bednarek, Institute of Applied Economic and Social Research (IAESR)
   //  Melbourne University,Parkville 3052, AUSTRALIA, Phone:+61 3 344 5744
 \X/   Domain:u3369429@{murdu.oz.au | ucsvc.dn.mu.oz.au} | mb@munnari.oz.au
       "bang":...UUNET!munnari!murdu!u3369429     PSI%23343000301::U3369429
"POST NO BILLS."

carl@CITHEX.CALTECH.EDU (Carl J Lydick) (05/22/88)

 > I noticed that TPU would go into infinite loops when I tried to use TAB_IT
 > on large (>300 blocks) files. Does anyone out there know anything about this?

I think so.  I think that what is happening is that TPU is NOT going  into  an
infinite loop, it's just getting VERY slow at doing what you've told it to do.
TPU has fairly severe memory management problems; the symptom  is  exponential
use  of  resources  (CPU  time  and  memory)  as  a  function of the number of
replacements  of  text  done  within  a  single  procedure.   The  first   512
replacements  may  take a fraction of a second.  The 50th 512 replacements may
take several CPU hours, assuming you haven't run out of  virtual  memory  yet.
The workaround for this is to have a separate procedure to do the replacement,
called from the procedure with the main loop in it.

LEICHTER@VENUS.YCC.YALE.EDU ("Jerry Leichter ", LEICHTER-JERRY@CS.YALE.EDU) (05/23/88)

	> 
	> 	Has anyone written a good UNTABIFY utility?
	>       [...]
	> From the DECUS C library.  In the interests of completeness, I've
	> included both detab and entab.
	> 							-- Jerry
	> [...]

	Thanks. But people should be aware of the special case of Fortran
	Carriage-control files. You don't want to change the space in column
	one which is followed by seven spaces into a TAB, do you? ...

Have you tried the programs I posted?  The VAX C library understands FORTRAN
carriage control format, and will expand the input lines into text streams
according to the FORTRAN definitions.
							-- Jerry