iannucci@sjuvax.UUCP (D. Iannucci) (04/22/86)
#!/bin/sh # This is a shell archive. # Feed to /bin/sh to create: # README # cgrep.c # cgrep.man # echo "Extracting README" cat > README <<End-of-readme This program 'greps' out a set of contiguous lines from a file based on a pattern lying within the lines, and directions indicating how much of the context surrounding the pattern is desired. Context can be specified by a line count, paragraph boundaries, or a second pattern to search for. The same rules may be used to print backward and forward from the initial pattern. This is my first posting of source code and any constructive criticism (but not flames) is more than welcome. Please report to me any further bugs that you may find. -- To iterate is human, to recurse, divine. Dave Iannucci @ St. Joseph's University, Philadelphia [40 00' N 75 15' W] {{ihnp4 | ucbvax}!allegra | {psuvax1}!burdvax | astrovax}!sjuvax!iannucci End-of-readme echo "Extracting cgrep.c" cat > cgrep.c <<End-of-cgrep /* ** Cgrep - a program to extract context from text files. ** by David J. Iannucci @ Saint Joseph's University, Philadelphia ** iannucci@sjuvax.UUCP ** ** Copyright (c) 1986 ** ** You may do anything you like with this program except: ** 1. Use it to direct commercial advantage. ** 2. Take credit for any code I have written. ** ** Any changes made to this program must be documented so that I do not ** receive credit or blame for having made them. ** ** */ #include <stdio.h> #include <ctype.h> #define FALSE 0 #define TRUE 1 #define LINES 1 #define PARA 2 #define PATN 3 #define LOOKBACK 100 /* Number of lines of look-back */ #define MAXLINE 256 /* Maximum length of a line */ char *strsave(), *progname; int ignorecase=FALSE; int patgotten=FALSE; int filesread=0; FILE *file; main(argc, argv) int argc; char *argv[]; { char *buf[LOOKBACK], aline[MAXLINE+1]; char pat[MAXLINE], patn_f[MAXLINE], patn_b[MAXLINE]; int lines_f=0, lines_b=0, schback=0, schforw=0; int i=0, j=0, readsofar=0, lastone=(-1), mmode=FALSE; progname = *argv; file = stdin; if ( argc < 2 ) { fprintf(stderr, "Usage: %s [-mi] [-n -p -/pat] [+n +p +/pat] pattern [ files... ]\n", progname); exit(1); } while ( --argc > 0 ) { /* Parse the command line */ switch(argv[1][0]) { case '+': if ( strlen(argv[1]) == 1 ) { lines_f=1; schforw = LINES; } else switch (argv[1][1]) { case 'p': schforw = PARA; break; case '/': strcpy(patn_f, argv[1]+2); schforw = PATN; break; default: sscanf(argv[1],"+%d",&lines_f); schforw = LINES; break; } break; case '-': if ( strlen(argv[1]) == 1 ) { lines_b=1; schback = LINES; } else switch (argv[1][1]) { case 'm': mmode=TRUE; break; case 'p': schback = PARA; break; case 'i': ignorecase=TRUE;break; case '/': strcpy(patn_b, argv[1]+2); schback = PATN; break; default: sscanf(argv[1], "-%d", &lines_b); schback = LINES; lines_b=((lines_b > LOOKBACK) ? LOOKBACK : lines_b); break; } break; default: if ( !strcmp(pat, "") ) { strcpy(pat, argv[1]); patgotten=TRUE; --argc; } break; } ++argv; if ( patgotten ) break; } /* end of command line parsing */ do { if ( argc-- != 0 ) { i=0; readsofar=0; ++filesread; if ((file=fopen(argv[filesread], "r")) == NULL ) { fprintf(stderr, "%s: cannot open %s\n", progname, argv[filesread]); exit(1); } } while ( fgets(aline, MAXLINE, file) != NULL ) { buf[i++%LOOKBACK] = strsave(aline); readsofar++; if ( schback == PARA ) if ( emptyline(aline) ) lastone = i-1; if ( schback == PATN ) if ( found(aline, patn_b) ) lastone = i-2; if ( found(aline, pat) ) { if ( filesread > 1 ) printf("<%s>\n", argv[filesread]); if ( schback == PARA || schback == PATN ) lines_b=((lastone == -1) ? 0 : i-2-lastone); lines_b=(( lines_b >= readsofar) ? i-1 : lines_b); for (j=(--i-lines_b); j <= i; j++) output(mmode, buf[nmod(j, LOOKBACK)]); j=0; if ( schforw ) while ( fgets(aline, MAXLINE, file) != NULL ) { if ( ( (schforw==LINES) && ++j > lines_f ) || ( (schforw==PARA) && emptyline(aline) ) ) exit(0); output(mmode, aline); if ( schforw==PATN ) if ( found(aline, patn_f) ) exit(0); } exit(0); } } if ( file != stdin ) fclose(file); } while ( argc > 0 ); fprintf(stderr, "%s: %s not found.\n", progname, pat); exit(1); } found(s, t) /* Almost the same as K&R's "index" function */ char *s, *t; { int i, j ,k; for (i=0; s[i] != '\0'; i++) { for (j=i, k=0; t[k] != '\0' && match(s[j], t[k]); j++, k++) ; if ( t[k] == '\0' ) return(TRUE); } return(FALSE); } output(mailmode, line) int mailmode; char *line; { if ( mailmode ) { while ( *line == ' ' || *line == '\t' ) line++; printf("> %s", line); } else printf("%s", line); } emptyline(s) char *s; { while ( *s ) { if ( *s != ' ' && *s != '\t' && *s != '\n' ) return(FALSE); ++s; } return(TRUE); } nmod(a, m) int a, m; { if ( a < 0 ) return(m + (a % m)); else return(a % m); } match(c, d) int c, d; { if ( ignorecase ) { if ( isupper(c) && islower(d) ) return(c==toupper(d)); else if ( islower(c) && isupper(d) ) return(c==tolower(d)); else return(c==d); } else return(c==d); } char *strsave(s) char *s; { char *p; if ((p = (char *) malloc((strlen(s)+1)*sizeof(char))) == (char *)0 ) { fprintf(stderr, "%s: Malloc failed in %s:%d\n", progname,__FILE__, __LINE__); exit(2); } strcpy(p, s); return p; } End-of-cgrep echo "Extracting cgrep.man" cat > cgrep.man <<End-of-man .TH CGREP 1 "18 April 1986" .UC 4 .SH NAME cgrep \- search a file for a piece of context .SH SYNOPSIS .B cgrep [-mi] [-n -p -/pat] [+n +p +/pat] pattern [ files... ] .LP .SH DESCRIPTION .I Cgrep takes a chunk of context out of a text file based on a pattern that lies inside the context, and rules for specifying the amount of context. If no rules are specified, then .I cgrep works just like grep except that it finds only the first occurrence of "pattern". A backward searching rule need not be matched by a forward searching rule, and vice versa, however, no more than one rule "per direction" should be used. .I Cgrep does not recognize the grep family metacharacters. It matches exactly what it is given. The default input is the standard input. When more than one file is searched, the name of the file in which the context is found is enclosed in <> at the top of the text. The following options are recognized: .TP .B \-n Begin printing .I n lines before the first occurrence of "pattern". If no .I n is specified, default is one. .TP .B \-p Begin printing at the beginning of the paragraph containing the first occurrence of "pattern". (assuming that paragraphs are separated by a blank line) .TP .B \-/pat Begin printing at the line containing "pat" nearest before the first occurrence of "pattern". .TP .B +n Stop printing .I n lines after the first occurrence of "pattern". If no .I n is specified, default is one. .TP .B \+p Stop printing at the end of the paragraph containing the first occurrence of "pattern". (i.e. the next blank line) .TP .B +/pat Stop printing at the first line past "pattern" which contains "pat". .TP .B \-i Ignore case. .TP .B \-m Mail mode. Delete all blank space at the beginning of the lines to be printed and insert "> ". This may be useful from within the editor in replying to mail or news. .TP .SH .SH EXAMPLE $ cgrep -2 +/john harry file1 searches file1, and prints starting 2 lines before the first occurrence of "harry" and ending when it finds a line containing "john" $ cgrep -p +p george file2 searches file2 and prints the paragraph containing "george" .SH BUGS When using both -m and -i, use separate flags. Results are unpredictable if multiple rules for searching the same direction are given, though it will probably use the latter. If the pattern is found in the first of multiple files, the filename is not printed at the top. If a backward pattern search is done on a non-existent pattern, the backward search is ignored. Line length is limited to 256 characters. Send all comments, etc to iannucci@sjuvax.UUCP End-of-man echo "Done"