[net.sources] CORRECT source

rlr@avsdS.UUCP (Rhode L. Roberts) (05/15/85)

*** REPLACE THIS LINE WITH YOUR MESSAGE ***

*** REPLACE THIS LINE WITH YOUR MESSAGE ***

--------[ Delete from line one to and including this line. ]-----------
#
#	Here is the source for "CORRECT"
#	(Use csh to extract)
#       
#	This is the second posting of this source.
#	The first got munged in the mailer ( sorry ).
#
#	It was originaly posted to the net about
#	two years ago.  I am not the author.
#	In the #define statements I commented out
#	the old stuff and reset them to point to
#	4.2 BSD directories.  It all seems to work
#	for me.
#
#	R. Roberts
#	Ampex Corp.
#	Redwood City, Ca.
#	!fortune!dsd!avsdS:rlr
#------------------------------------------------------------
#

cat > Makefile << 'graBITanSTACKit: Wed May 15 08:40:32 PDT 1985'
BIN = /usr/local
MAN = /usr/man/manl
ROFF = nroff
SRCS = correct.c
CFLAGS = -O
correct:	$(SRCS)
	cc $(CFLAGS) $(SRCS) -s -o $@
install:
	strip correct
	mv correct $(BIN)
	cp correct.1 $(MAN)
manl:   correct.1
	$(ROFF) -man correct.1
'graBITanSTACKit: Wed May 15 08:40:32 PDT 1985'

cat > README << 'graBITanSTACKit: Wed May 15 08:40:34 PDT 1985'
This program interfaces spell, grep, and look to aid in
the correction of spelling errors in files.

The files that should have been placed in this directory
are: Makefile, README, correct.c, correct.1

Potential changes necessary for particular installations
might be:

(1) In correct.c the paths to the spell, ex, grep and look commands
	are for 4.1bsd.  You may have to alter the corresponding defines
	found at the beginning of the program.  The "ed" editor could
	probably be substituted for "ex".  Note that the version of grep
	used is the Berkeley version with the -w option (which recognizes
	only 'words').

(2) In Makefile, the binary is defaulted to /usr/local and
	the manual entry to /usr/man/manl.
'graBITanSTACKit: Wed May 15 08:40:34 PDT 1985'

cat > correct.1 << 'graBITanSTACKit: Wed May 15 08:40:36 PDT 1985'
.TH CORRECT 1 Local
.SH NAME
correct - correct spelling mistakes
.SH SYNOPSIS
\fBcorrect\fR [\fB-v\fR] [\fB-d \fIlocdict\fR] \fIfile\fR
.SH DESCRIPTION
.PP
Correct takes \fIfile\fR as input, runs \fIspell\fR
to find spelling errors and prompts for corrections.
If the \fB-d\fR flag is specified, the file \fIlocdict\fR will be used
as a local dictionary.  Correct will filter prompts for those words
contained in this file. If \fIlocdict\fR is not found, it will be
created.
The \fB-v\fR (verbose) flag indicates that a brief usage message should
be output at the start, and that a long form of the prompt should be
used.
.PP
The verbose prompt has the form:
.br
	word TAB (CR, ESC, CTRL D, ?prefix, or replacement): 
.br
the brief form is:
.br
	word (CR, ESC, ^D, ?, or fix):
.PP
\fBCR\fR (carriage return) indicates that the word is spelled correctly.
If the \fB-d\fR option is used, the word is added to the local
dictionary.  Without \fB-d\fR , CR and ESC are equivalent.
.PP
\fBESC\fRignores the word.  The word is not placed in the local
dictionary.
.PP
\fBCTRL D\fR runs \fIgrep -w\fR to show the context in which the word
is used.
.PP
\fB?prefix\fR requests a list of all dictionary words with 'prefix'
(using
.IR look (1)).
The list is output in 5 column format.
.PP
\fBreplacement\fR is the correct spelling of the word.
.PP
After making corrections, you will be prompted whether
or not you want the corrections made.  A yes (or 'y') answer will
change all occurrences of the incorrect spellings to the new value
you entered.
.PP
If any words are to be added to the local dictionary, the user is
asked whether the dictionary should be updated.  (Note: if the
dictionary is added to by hand, be sure to sort it before using
correct, since checks for matches are terminated at the first
impossible match.)
.SH SEE ALSO
spell(1), grep(1), look(1), ex(1)
.SH AUTHOR
R. Alan Eustace (alan.ucf-cs@rand-relay or decvax!ucf-cs!alan).
.br
Dave Olson, Fortune Systems ({harpo|ihnp4}!fortune!olson)
added multi-column listing, numerous speedups, and removed fixed
limits on dictionary size and number of corrections/errors.
.SH BUGS
May dump core on 'errors' that are longer than 60 characters.
'graBITanSTACKit: Wed May 15 08:40:36 PDT 1985'

cat > correct.c << 'graBITanSTACKit: Wed May 15 08:40:38 PDT 1985'
/*
 *   correct.c -
 *       This program can be redistributed without charge
 *       with credit to the author.
 *
 *       R. Alan Eustace
 *          alan.ucf-cs@rand-relay
 *          decvax!ucf-cs!alan
 *		

 Modified to use a faster version of sort, to use a modified
 popen(), which doesn't do a sh -c, do malloc space for the local
 dictionary.  Also changed dictionary search to terminate as soon as
 we find a word > than what we are looking for, which is much faster
 with larger dictionaries.  Now use t_brkc, t_eofc so returns don't
 have to typed for most cases.  Added signal trapping, added -v
 (verbose option), added set report=100 to ex cmd's.
 Space for words/corrections/local dictionary is now malloced,
 not hardwired with fixed limits.
	fortune!olson (Dave Olson at Fortune Systems) 7 Nov. 83

 */
#define	SPELL	"/usr/bin/spell"
#define	LOOK	"/usr/bin/look"
/*
 * #define	GREP	"/usr/bin/grep"
 * #define	EX	"/usr/local/bin/ex"
 */
#define	GREP	"/usr/ucb/grep"
#define	EX	"/usr/ucb/ex"

#include <stdio.h>
#include <setjmp.h>
#include <sgtty.h>
#include <signal.h>

struct tchars orig;
/*#define DEBUG*/
char SYNTAX[] = "usage: correct [-v] [-d locdict] file";
#define STRLEN 60
#define TRUE 	1
#define FALSE 	0

FILE *pop(),*fopen();
FILE *fpspell, *fplook, *fpgrep, *fpex, *pfLdfile;

char PROMPT_VERB[] = "%s\t(CR, ESC, CTRL D, ?prefix, or replacement): ";
char PROMPT_NOVERB[] = "%s (CR, ESC, ^D, ?, or fix): ";
char **asLocdict;
int iasLast;
char *psFile;
char *psLdfile;
char *index(), *malloc(), *realloc();

char sDelim[] = "/%$#*[],#!";

/* fortune!olson addition */
jmp_buf env;
int trapit();

struct Sfix {
	char *correct;
	char *old;
	struct Sfix *next;
} *sfix;

main(argc, argv)
int argc;
char *argv[];
{
	int cnt, verbose=0;
	register i, ips, ias;
	char *cmd[5];
	char try[STRLEN], wordin[STRLEN];
	char bCorrflag ,bLocdictflag;
	char *prompt;
	char *pc;

	if (argc < 2)  {
		fprintf(stderr,"%s\n",SYNTAX);
		exit(1);
	}

	psFile = NULL;
	psLdfile = NULL;
	for(ips=1;ips<argc;ips++)  {
		if (argv[ips][0] == '-')  {
			switch (argv[ips][1])  {
			case 'v': 	/* fortune!olson */
				verbose = 1;
				break;
			case 'd':
				if (argc > ips)  {
					psLdfile = argv[++ips];
				}
				else   {
					fprintf(stderr,"%s\n",SYNTAX);
					exit(1);
				}
				break;
			default :
				fprintf(stderr,"%s\n",SYNTAX);
				exit(1);
			}
		}
		else {
			psFile = argv[ips];
		}
	}
	if (psFile == NULL) {
		fprintf(stderr,"%s\n",SYNTAX);
		exit(1);
	}
	if (psLdfile != NULL)
		InitasLocdict();

	bCorrflag = bLocdictflag = 0;

	/* fortune!olson */
	setuptty();
	if(verbose) {
		prompt = PROMPT_VERB;
		printf("CR (RETURN key) means this word is OK and should be added to the dictionary\n");
		printf("ESC means this word is OK, but don't add it to the dictionary\n");
		printf("CTRL D means show the context the word is in, then re-prompt\n");
		printf("?'prefix' means show dictionary words that start with 'prefix', then re-prompt\n");
		printf("'replacement' means the word(s) you type replace the word shown\n");
		printf("? and 'replacement' must be followed by the RETURN key\n");
		printf("\nRunning spell ....\n");
	}
	else
		prompt = PROMPT_NOVERB;
	setjmp(env);
	cmd[0] = SPELL;
	cmd[1] = psFile;
	cmd[2] = NULL;
	if ((fpspell = pop(cmd,"r")) == NULL)  {
		fprintf(stderr,"Cannot open %s\n",argv[1]);
		exit(1);
	}

	signal(SIGINT,trapit);
	while(fscanf(fpspell,"%s",wordin) != EOF)  {
		if(psLdfile!=NULL &&
			SearchasLocdict(wordin)==TRUE) continue;
		setjmp(env);
		printf(prompt,wordin);
		fflush(stdout);

		cnt = read(0,try,STRLEN);
		try[cnt] = '\0';
		switch(*try) {
		case '?':
			cmd[0] = LOOK;
			cmd[1] = try+1;
			cmd[2] = 0;
			if ((fplook = pop(cmd,"r")) == NULL)  {
				fprintf(stderr,"look command failure..strange \n");
				longjmp(env,1);
			}
			for(i=0;;i++)  {
				if(fscanf(fplook,"%s",try)==EOF)
					break;
				if(i%5 == 4)
					printf("%.15s\n",try);
				else
					printf("%-15s",try);
			}
			pclose(fplook);
			if(i%5)
				putchar('\n');
			longjmp(env,1);
			break;
		case '\0':  /* a CTRL D */
			cmd[0] = GREP;
			cmd[1] = "-w";	/* -w says match 'words' only, like
				\<word\> in the ex/vi editor */
			cmd[2] = wordin;
			cmd[3] = psFile;
			cmd[4] = NULL;
			putchar('\n');
			if ((fpgrep = pop(cmd,"r")) == NULL)  {
				fprintf(stderr,"grep command failure..strange \n");
				longjmp(env,1);
			}
			for(;;)  {
				if(fgets(try,100,fpgrep) == NULL)
					break;
				fputs(try,stdout);
			}
			pclose(fpgrep);
			longjmp(env,1);
			break;
		case '\n':
			AddtoasLocdict(wordin);
			bLocdictflag++;
			break;
		case '\33':
			putchar('\n');
			break;
		default:
			bCorrflag++;
			try[cnt-1] = '\0';
			AddCorrect(wordin,try);
		}
	}
	pclose(fpspell);
	AddCorrect((char *)NULL,(char *)NULL);	/* terminate the correction list */

	if (bCorrflag)  {
		printf("Corrections made?");
		scanf("%s",try);
		if (*try == 'y')  {
			register struct Sfix *fix;

			cmd[0] = EX;
			cmd[1] = "-";
			cmd[2] = psFile;
			cmd[3] = NULL;

			if ((fpex = pop(cmd,"w")) == NULL)  {
				fprintf(stderr,"ex command failure..strange \n");
				goto dorest;
			}
			fprintf(fpex,"set report=500\n");
			for(fix=sfix;fix->next; fix=fix->next)  {
				for(pc=sDelim;*pc!='\0';pc++)  {
					if(index(fix->correct,*pc)==NULL)
						break;
				}
#ifdef DEBUG
				printf("fin-char=%c, string=%s, old=%s.\n",*pc,fix->correct,fix->old);
#endif
				if (*pc == '\0') {
					fprintf(stderr,"Corrected word (%s) contains all known delimeters, skipped\n",fix->correct);
					continue;
				}
#ifdef DEBUG
				printf("g%c\\<%s\\>%cs%c%c%s%cg\n",*pc,
					fix->old,*pc,*pc,*pc,fix->correct,*pc);
#endif
#ifndef DEBUG
				fprintf(fpex,"g%c\\<%s\\>%cs%c%c%s%cg\n",*pc,
					fix->old,*pc,*pc,*pc,fix->correct,*pc);
#endif
			}
			fprintf(fpex,"x\n");
			pclose(fpex);
		}
	}

dorest:
	if (psLdfile != NULL && bLocdictflag)  {
		printf("Local Dictionary updated?");
		scanf("%s",try);
		if (*try == 'y')  {
			ssort(asLocdict, asLocdict+iasLast);
			if ((pfLdfile = fopen(psLdfile,"w"))==NULL)  {
				fprintf(stderr,"Local dictionary %s can't be written, not added to\n",psLdfile);
				cleanup(1);
			}

			for(ias=0;ias<=iasLast;ias++)
				fprintf(pfLdfile,"%s\n",asLocdict[ias]);
			fclose(pfLdfile);
		}
	}

	cleanup(0);
}


char NOTENOUGHMEM[] =
 "Not enough room for all of dictionary, words after line %d skipped\n";
char NOMEM[] = "No memory for local dictionary, skipped\n";


/* revised to use malloc, so we have no artificial size limits on the
	dictionary.  fortune!olson */

InitasLocdict()
{
	char buf[STRLEN];

	/* get initial space for 1000 words */
	if((asLocdict = (char **)malloc(BUFSIZ*sizeof(char *))) == NULL) {
		fprintf(stderr,NOMEM);
		iasLast = 0;
		return;
	}

	if((pfLdfile = fopen(psLdfile,"r"))==NULL)  {
		fprintf(stderr,"Creating new local dictionary %s\n",psLdfile);
		iasLast = 0;
		return;
	}

	while(fscanf(pfLdfile,"%s",buf) != EOF)
		if(AddtoasLocdict(buf) == NULL)
			break;

	fclose(pfLdfile);
}


AddCorrect(old, new)
char *new, *old;
{
	static struct Sfix *nfix = NULL;
	struct Sfix *lfix = NULL;

	if(nfix)
		lfix = nfix;

	if((nfix = (struct Sfix *) malloc(sizeof(struct Sfix))) == NULL)
		fprintf(stderr,"No room for more corrections\n");
	else {
		if(lfix)
			lfix->next = nfix;
		nfix->old = malloc(strlen(old)+1);
		nfix->correct = malloc(strlen(new)+1);
		if(nfix->old == NULL || nfix->correct == NULL)
			fprintf(stderr,"No room for more corrections\n");
		else {
			if(!sfix)	/* set up beginning of linked list */
				sfix = nfix;
			strcpy(nfix->old,old);
			strcpy(nfix->correct,new);
			nfix->next = NULL;
		}
	}

	if(*new == NULL && *old == NULL)  /* end of list */
		nfix = sfix;
}
		

AddtoasLocdict(psWord)
register char *psWord;
{
	register i = iasLast;
	char *tmp;

	if((i%BUFSIZ) == BUFSIZ-1) {
		/* do a malloc first, since a realloc which fails may
			trash what we already have */
		if((tmp=malloc(BUFSIZ)) == NULL) {
			fprintf(stderr,NOTENOUGHMEM,i);
			return NULL;
		}
		else
			free(tmp);
		if((asLocdict = (char **)realloc((char *)asLocdict,
			(1+i+BUFSIZ)*sizeof(char *))) == NULL) {
			fprintf(stderr,NOMEM);
			iasLast = 0;
			return NULL;
		}
	}
	if((asLocdict[i] = malloc(strlen(psWord)+1)) == NULL)  {
		fprintf(stderr,NOTENOUGHMEM,i);
		return NULL;
	}
	else
		strcpy(asLocdict[i],psWord);

	return ++iasLast;
}


SearchasLocdict(psWord)
register char *psWord;
{
	register int ias, res;

	for(ias=0;ias<=iasLast;ias++)  {
		res = strcmp(asLocdict[ias],psWord);
		if(res==0)
			return TRUE;
		else if(res>0)	/* don't waste time searching the rest */
			return FALSE;
	}
	return FALSE;
}


/* based on @(#)popen.c	4.1 (Berkeley) 12/21/80 */
#ifndef FILE
#	include <stdio.h>
#endif
#define	tst(a,b)	(*mode == 'r'? (b) : (a))
#define	RDR	0
#define	WTR	1
static	int	popen_pid[20];

FILE *
pop(cmd,mode)
char	**cmd;
char	*mode;
{
	int p[2];
	register myside, hisside, pid;

/***
printf("pop called with: mode %s, %s,",mode,*cmd);
for(pid=1;cmd[pid];pid++)
	printf("%s,",cmd[pid]);
printf("\n");
***/
	if(pipe(p) < 0)
		return NULL;
	myside = tst(p[WTR], p[RDR]);
	hisside = tst(p[RDR], p[WTR]);
	if((pid = fork()) == 0) {
		/* myside and hisside reverse roles in child */
		close(myside);
		dup2(hisside, tst(0, 1));
		close(hisside);
		execvp(*cmd,cmd);
		perror(*cmd);
		_exit(1);
	}
	if(pid == -1)
		return NULL;
	popen_pid[myside] = pid;
	close(hisside);
	return(fdopen(myside, mode));
}

pclose(ptr)
FILE *ptr;
{
	register f, r, (*hstat)(), (*istat)(), (*qstat)();
	int status;

	f = fileno(ptr);
	fclose(ptr);
	istat = signal(SIGINT, SIG_IGN);
	qstat = signal(SIGQUIT, SIG_IGN);
	hstat = signal(SIGHUP, SIG_IGN);
	while((r = wait(&status)) != popen_pid[f] && r != -1)
		;
	if(r == -1)
		status = -1;
	signal(SIGINT, istat);
	signal(SIGQUIT, qstat);
	signal(SIGHUP, hstat);
	return(status);
}

/* fortune!olson addition */
trapit()
{
	signal(SIGINT,SIG_IGN);
	printf("\nType ESC to quit, RETURN to continue: ");
	fflush(stdout);
	if(getchar() == '\33') {
		putchar('\n');
		cleanup(SIGINT);
	}
	signal(SIGINT,trapit);
	longjmp(env,1);
}

setuptty()
{
	struct tchars tmp;

	ioctl(0,TIOCGETC,&orig);
	tmp = orig;
	tmp.t_eofc = '\004';
	tmp.t_brkc = '\033';
	ioctl(0,TIOCSETC,&tmp);
}
		


cleanup(val)
int val;
{
	ioctl(0,TIOCSETC,orig);
	exit(val);
}


/* @(#)qsort.c	4.1 (Berkeley) 12/21/80 */
/*  modified from above (doesn't require the function;
	references to the function replaced with strcmp.  references to
	the two functions that copied "size" bytes to change the order
	changed to in-line code.  It is thus now necessary to declare the
	arguements to the function correctly, instead of having everything
	declared as char pointers!  Dave Olson */


ssort(a, l)
char **a, **l;
{
	register char **i, **j;
	register char **lp, **hp;
	int c;
	char *tmpptr;
	unsigned n;

start:
	if((n=l-a) <= 1)
		return;
	if(n%2)
		n++;
	n /= 2;
	hp = lp = a+n;
	i = a;
	j = l-1;

	for(;;) {
		if(i < lp) {
			if((c = strcmp(*i, *lp)) == 0) {
				tmpptr = *i;
				*i = *--lp;
				*lp = tmpptr;
				continue;
			}
			if(c < 0) {
				i++;
				continue;
			}
		}

loop:
		if(j > hp) {
			if((c = strcmp(*hp, *j)) == 0) {
				tmpptr = *++hp;
				*hp = *j;
				*j = tmpptr;
				goto loop;
			}
			if(c > 0) {
				if(i == lp) {
					tmpptr = *i;
					*i = *j;
					*j = *++hp;
					*hp = tmpptr;
					i = ++lp;
					goto loop;
				}
				tmpptr = *i;
				*i = *j;
				*j = tmpptr;
				i++;
				j--;
				continue;
			}
			j--;
			goto loop;
		}

		if(i == lp) {
			if(lp-a >= l-hp) {
				ssort(hp+1, l);
				l = lp;
			}
			else {
				ssort(a, lp);
				a = hp+1;
			}
			goto start;
		}

		tmpptr = *j;
		*j = *i;
		*i = *--lp;
		*lp = tmpptr;
		j = --hp;
	}
}
'graBITanSTACKit: Wed May 15 08:40:38 PDT 1985'