[net.sources] cgrep - a context grepping tool

iannucci@sjuvax.UUCP (D. Iannucci) (04/22/86)

#!/bin/sh
# This is a shell archive.
# Feed to /bin/sh to create:
#	README
#	cgrep.c
#	cgrep.man
#
echo "Extracting README"
cat > README <<End-of-readme

	      This program 'greps' out a set of contiguous lines from a file
based on a pattern lying within the lines, and directions indicating how much
of the context surrounding the pattern is desired.  Context can be specified
by a line count, paragraph boundaries, or a second pattern to search for.
The same rules may be used to print backward and forward from the initial
pattern.  This is my first posting of source code and any constructive
criticism (but not flames) is more than welcome.  Please report to me any
further bugs that you may find.

--
To iterate is human, to recurse, divine.

Dave Iannucci @ St. Joseph's University, Philadelphia [40 00' N 75 15' W]
{{ihnp4 | ucbvax}!allegra | {psuvax1}!burdvax | astrovax}!sjuvax!iannucci

End-of-readme
echo "Extracting cgrep.c"
cat > cgrep.c <<End-of-cgrep
/*
**   Cgrep - a program to extract context from text files.
**	by David J. Iannucci @ Saint Joseph's University, Philadelphia
**						  iannucci@sjuvax.UUCP
**
**			Copyright (c) 1986
**
**	You may do anything you like with this program except:
**		1.  Use it to direct commercial advantage.
**		2.  Take credit for any code I have written.
**
**	Any changes made to this program must be documented so that I do not
**	receive credit or blame for having made them.
**
**
*/

#include <stdio.h>
#include <ctype.h>
#define	FALSE	0
#define	TRUE	1
#define	LINES	1
#define	PARA	2
#define	PATN	3
#define LOOKBACK	100	/* Number of lines of look-back */
#define MAXLINE		256 	/* Maximum length of a line */

char *strsave(), *progname;
int ignorecase=FALSE;
int patgotten=FALSE;
int filesread=0;
FILE *file;

main(argc, argv)
int argc;
char *argv[];
{

	char *buf[LOOKBACK], aline[MAXLINE+1];
	char pat[MAXLINE], patn_f[MAXLINE], patn_b[MAXLINE];
	int lines_f=0, lines_b=0, schback=0, schforw=0;
	int i=0, j=0, readsofar=0, lastone=(-1), mmode=FALSE;

	progname = *argv;
	file = stdin;

if ( argc < 2 )
{
    fprintf(stderr, 

"Usage: %s [-mi] [-n -p -/pat] [+n +p +/pat] pattern [ files... ]\n",

						progname);
						exit(1);
}

	while ( --argc > 0 )	{	/* Parse the command line */
		switch(argv[1][0])	{
			case '+': if ( strlen(argv[1]) == 1 )	{
					lines_f=1;
					schforw = LINES;
				  }
				  else switch (argv[1][1])   {
					  case 'p': schforw = PARA;
						    break;
					  case '/':
						strcpy(patn_f, argv[1]+2);
						schforw = PATN;
						break;
					  default:
						sscanf(argv[1],"+%d",&lines_f);
						schforw = LINES;
						break;
					}
				   break;

			case '-':  if ( strlen(argv[1]) == 1 )	{
					lines_b=1;
					schback = LINES;
				   }
				   else switch (argv[1][1])    {
					   case 'm': mmode=TRUE; break;
					   case 'p': schback = PARA; break;
					   case 'i': ignorecase=TRUE;break;
					   case '/': 
						strcpy(patn_b, argv[1]+2);
						schback = PATN;
						break;
					   default: 
					      sscanf(argv[1], "-%d", &lines_b);
					      schback = LINES;
					      lines_b=((lines_b > LOOKBACK) ?
							LOOKBACK : lines_b);
					      break;
					}
				        break;

			default:  if ( !strcmp(pat, "") )	{
					strcpy(pat, argv[1]);
					patgotten=TRUE;
					--argc;
				  }
				  break;
		}
	++argv;
	if ( patgotten )
		break;
	}

	/* end of command line parsing */

      do
      {
	if ( argc-- != 0 )
	{
		i=0;
		readsofar=0;
		++filesread;
		if ((file=fopen(argv[filesread], "r")) == NULL )
		{
		fprintf(stderr, "%s: cannot open %s\n", progname,
							     argv[filesread]);
		exit(1);
		}
	}

           while  ( fgets(aline, MAXLINE, file) != NULL )
	   {
		buf[i++%LOOKBACK] = strsave(aline);
		readsofar++;
		if ( schback == PARA )
			   if ( emptyline(aline) )
				   lastone = i-1;
		if ( schback == PATN )
			   if ( found(aline, patn_b) )
				   lastone = i-2;
		if ( found(aline, pat) )		{
			if ( filesread > 1 )
				printf("<%s>\n", argv[filesread]);
			if ( schback == PARA || schback == PATN )
				lines_b=((lastone == -1) ? 0 : i-2-lastone);
			lines_b=(( lines_b >= readsofar) ? i-1 : lines_b);
			for (j=(--i-lines_b); j <= i; j++)
				output(mmode, buf[nmod(j, LOOKBACK)]);
			j=0;
			if ( schforw )
			   while ( fgets(aline, MAXLINE, file) != NULL )  {
				if ( ( (schforw==LINES) && ++j > lines_f ) || 
				     ( (schforw==PARA) && emptyline(aline) ) )
					exit(0);
				output(mmode, aline);
				if ( schforw==PATN )
					if ( found(aline, patn_f) )
						exit(0);
			   }
			   exit(0);
		}
           }
	   if ( file != stdin )
	   	fclose(file);

      } while ( argc > 0 );

      fprintf(stderr, "%s: %s not found.\n", progname, pat);
      exit(1);

}

found(s,  t)	/* Almost the same as K&R's "index" function */
char *s, *t;
{
	int i, j ,k;

	for (i=0; s[i] != '\0'; i++)	{
		for (j=i, k=0; t[k] != '\0' && match(s[j], t[k]); j++, k++)
			;
		if ( t[k] == '\0' )
			return(TRUE);
	}
	return(FALSE);
}

output(mailmode, line)
int mailmode;
char *line;
{
	if ( mailmode )		{
		while ( *line == ' ' || *line == '\t' )
			line++;
		printf("> %s", line);
	}
	else printf("%s", line);
}

emptyline(s)
char *s;
{
	while ( *s )	{
		if ( *s != ' ' && *s != '\t' && *s != '\n' )
			return(FALSE);
		++s;
	}
	return(TRUE);

}

nmod(a, m)
int a, m;
{
	if ( a < 0 )
		return(m + (a % m));
	else
		return(a % m);
}

match(c, d)
int c, d;
{
	if ( ignorecase )	{
		if ( isupper(c) && islower(d) )
			return(c==toupper(d));
		else if ( islower(c) && isupper(d) )
			return(c==tolower(d));
		else return(c==d);
	}
	else
		return(c==d);
}

char *strsave(s)
char *s;
{
	char *p;

	if ((p = (char *) malloc((strlen(s)+1)*sizeof(char))) == (char *)0 )
	{
		fprintf(stderr,
		"%s: Malloc failed in %s:%d\n", progname,__FILE__, __LINE__);
		exit(2);
	}
	strcpy(p, s);
	return p;
}
End-of-cgrep
echo "Extracting cgrep.man"
cat > cgrep.man <<End-of-man
.TH CGREP 1 "18 April 1986"
.UC 4
.SH NAME
cgrep \- search a file for a piece of context
.SH SYNOPSIS
.B cgrep
[-mi] [-n -p -/pat] [+n +p +/pat] pattern [ files... ]
.LP
.SH DESCRIPTION
.I Cgrep
takes a chunk of context out of a text file based on a pattern that lies
inside the context, and rules for specifying the amount of context. If no
rules are specified, then
.I cgrep 
works just like grep
except that it finds only the first occurrence of "pattern".  A backward
searching rule need not be matched by a forward searching rule, and vice
versa, however, no more than one rule "per direction" should be used.  
.I Cgrep
does not recognize the grep family metacharacters.  It matches exactly what
it is given. The default input is the standard input.

When more than one file is searched, the name of the file in which the
context is found is enclosed in <> at the top of the text.

The following options are recognized:
.TP
.B \-n
Begin printing 
.I n
lines before the first occurrence of "pattern". If no
.I n
is specified, default is one.
.TP
.B \-p
Begin printing at the beginning of the paragraph 
containing the first occurrence of "pattern".  (assuming
that paragraphs are separated by a blank line)
.TP
.B \-/pat
Begin printing at the line containing "pat" nearest 
before the first occurrence of "pattern".
.TP
.B +n
Stop printing 
.I n
lines after the first occurrence of "pattern".  If no
.I n
is specified, default is one.
.TP
.B \+p
Stop printing at the end of the paragraph containing the
first occurrence of "pattern".  (i.e. the next blank line)
.TP
.B +/pat
Stop printing at the first line past "pattern" which
contains "pat".
.TP
.B \-i
Ignore case.
.TP
.B \-m
Mail mode.  Delete all blank space at the beginning of the lines to be
printed and insert "> ".  This may be useful from within the editor
in replying to mail or news.
.TP
.SH
.SH EXAMPLE
$ cgrep -2 +/john harry file1

searches file1, and prints starting 2 lines before the first occurrence of
"harry" and ending when it finds a line containing "john"

$ cgrep -p +p george file2

searches file2 and prints the paragraph containing "george"
.SH BUGS
When using both -m and -i, use separate flags.

Results are unpredictable if multiple rules for searching the same direction
are given, though it will probably use the latter.

If the pattern is found in the first of multiple files, the filename is
not printed at the top.

If a backward pattern search is done on a non-existent pattern, the backward
search is ignored.

Line length is limited to 256 characters.

Send all comments, etc to iannucci@sjuvax.UUCP
End-of-man
echo "Done"