[net.sources] slice utility

garyp@cognos.UUCP (Gary Puckering) (12/17/86)

Slice splits up a file into lots of little files.  It reads its input a
line at a time, and starts a new output file when

*	the input line matches a pattern, or
*	there have been n lines written to the current output file.

You can use it to split a mailbox or an archive of news articles into
one article per file, for example.  In fact, you can do this with about
5 lines of awk, but you run into problems with long lines (and speed,
if it bothers you!).

Slice was originally contributed by Russell Quinn as the program
"mailsplit".  Unlike mailsplit, however, slice allows multiple output
formats to be specified (rather than multiple input files).  This makes
it possible to deposit the pieces (slices!) into files named whatever
your want.  For example:

     slice <article -x '^--* [Cc]ut' README article.sh

will deposit everything up to the cut line into README and everything
after it into article.sh (the -x option causes the matched line to be
excluded).

There are even options to make slicing mailboxes and files containing
shell scripts easier (-m and -s).

There are some good examples in the man page.

Source, Makefile and manual entry enclosed.  To install, do the
following:

1:	Edit the Makefile: you'll need to alter the "R=/usr/local" if 
	you don't want slice to live in /usr/local/usr/bin.

2:	make slice

3:	have a play with it & satisfy yourself that it behaves reasonably

4:	make install

Make "install" will do a "$(MAKE) $(CLEAN)" afterwards.  If you don't 
want to remove the binary, say

        CLEAN="" make install

at step 4.


--------------------- cut here ----------------------------------------
#!/bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #!/bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	Makefile
#	opts.h
#	slice.c
# This archive created: Tue Dec 16 17:04:43 1986
export PATH; PATH=/bin:$PATH
echo shar: extracting "'Makefile'" '(1221 characters)'
if test -f 'Makefile'
then
	echo shar: over-writing existing file "'Makefile'"
fi
sed 's/^X//' << \SHAR_EOF > 'Makefile'
X# Makefile for slice
X#
X# Originally contributed at mailsplit, written by:
X#   R E Quin, October 1986 University of Warwick (UK) Computer Science
X#   warwick!req     +44 203 523193
X#
X# Modified and recontributed by:
X#   Gary Puckering        3755 Riverside Dr.
X#   Cognos Incorporated   Ottawa, Ontario
X#   (613) 738-1440        CANADA  K1G 3N3
X#
X# This makefile is intended for the sys5 Augmented make.
X# 
XMAKE=make 
XCLEAN=clean 
XCC=cc 
XHACKS= 
XCFLAGS=-O $(HACKS)
X# R is the root of the filesystem -- i.e. where to install things.
X# The binaries are installed in $R/$(DESTDIR).
XR=/usr/local
XDESTDIR=$R/usr/bin 
XMANDIR=$R/usr/man/man1
XPROG=slice 
X
X# PROG is what to make; DESTDIR is where to put it.
X# HACKS are for -DBUGFIX style things.
X
X# R is intended to be the root of the filesystem if it isn't "/"
X
X# "make install " does a $(MAKE) $(CLEAN) at the end, so you can say
X# CLEAN=  make -e install
X# if you don't want to remove the garbage at the end, for example.
X# This is useful primarily for testing the install: entry!
X
Xall: $(PROG)
X 
Xslice: opts.h slice.o
X	$(CC) -o $(PROG) slice.o
X 
Xinstall: slice
X	/bin/mv $(PROG) $(DESTDIR)
X	/bin/cp slice.1 $(MANDIR)
X	$(MAKE) $(CLEAN)
X 
Xclean: 
X	rm -rf core *.o $(PROG) a.out
SHAR_EOF
if test 1221 -ne "`wc -c 'Makefile'`"
then
	echo shar: error transmitting "'Makefile'" '(should have been 1221 characters)'
fi
echo shar: extracting "'opts.h'" '(769 characters)'
if test -f 'opts.h'
then
	echo shar: over-writing existing file "'opts.h'"
fi
sed 's/^X//' << \SHAR_EOF > 'opts.h'
X
X#define FALSE 0
X#define TRUE 1
Xtypedef int bool;
X
X#define EXIT_SYNTAX 1	/* syntax error parsing commandline options */
X#define EXIT_SEMANT 2	/* options are correct but meaningless */
X#define EXIT_RUNERR 3	/* error opening a file, for example */
X#define EXIT_INTERN 4	/* internal error -- bug!! */
X
X#define nextstr(s,count,array,failure)	\
X	{if (((count)<2) && !((array)[0][1])) {failure;}\
X	else {if ((array)[0][1]) { s = &((array)[0][1]); } \
X	      else {s = array[1]; --count; array++;}}}
X
X#define DFLTNAME "slice"	/* input filename (for stdin) */
X#define BUFLEN BUFSIZ	/* the maximum length of an input line (incl. "\n\0") */
X#define MAXFILENAMELEN BUFSIZ	/* longer than the longest possible file name */
X#define DFLTOUTNAME	"%s:%03.d"	/* o/p file name format */
X
SHAR_EOF
if test 769 -ne "`wc -c 'opts.h'`"
then
	echo shar: error transmitting "'opts.h'" '(should have been 769 characters)'
fi
echo shar: extracting "'slice.c'" '(8964 characters)'
if test -f 'slice.c'
then
	echo shar: over-writing existing file "'slice.c'"
fi
sed 's/^X//' << \SHAR_EOF > 'slice.c'
X/* slice -- split files at lines that match a pattern */
X#include <stdio.h>
X#include <ctype.h>
X
X#include "opts.h"				/* defines nextstr() etc */
X
Xchar *progname = "slice";		/* for error messages */
Xchar *pattern = (char *) NULL;	/* reg expr used to split file */
Xchar **format;					/* ptr for format strings */
Xint  n_format;					/* number of format strings */
Xchar *defaultfmt[] = {DFLTOUTNAME};	/* default format string */
Xint  filenumber = 0;
Xint  every_n_lines = 0;			/* split every n lines */
Xbool exclude = FALSE;			/* exclude matched line from o/p files */
Xbool split_after = FALSE;		/* split after matched line */
X
Xusage(status)
X     int status;	/* exit if status != 0 */
X{
X     fprintf(stderr,"Usage: %s [-f filename] [-a] [-x] [-i<n>] [-m|-s|-n<n>] [-e expression | expression] [format...]\n", progname);
X     if (status)
X	  exit(status);
X}
X
Xmain(argc, argv)
X     char *argv[];
X{
X     /* split files at points that match a given pattern */
X     /* initialise things */
X     bool donefiles = FALSE;
X     char *buffer;
X	 char *infile = (char *) NULL;
X
X     int getnum();		/* does more checking than atoi */
X     char *rmpath();    /* removes leading pathname from a filename */
X
X     /* now remove possible leading pathname
X      * (e.g. /usr/bin/slice is to report it's errors as slice
X      */
X     progname = rmpath(argv[0]);
X
X
X	while (--argc) {
X	  if (**++argv == '-') {
X		switch(*++*argv) {
X			case 'a': {				/* split after pattern */
X				split_after = TRUE;
X				break;
X			}
X			case 'e': {				/* pattern (expression) */
X				++argv; argc--;
X				if (argc==0 || !**argv) {
X					error("Pattern after -e missing or null\n");
X					usage(1);
X				}
X				pattern = *argv;
X				break;
X			}
X			case 'm': {				/* mailbox pattern */
X				pattern = "^From ";
X				break; 
X			}
X			case 's': {				/* shell pattern */
X				pattern = "^#! *\/bin\/sh";
X				break; 
X			}
X			case 'n': {				/* -n n_lines -- split every n lines */
X				nextstr(buffer,argc,argv,usage(2));
X				every_n_lines = getnum(buffer);
X				if (every_n_lines <= 0) {
X					error("-n: number must be at least 1\n");
X					exit(EXIT_SYNTAX);
X				}
X				break;
X			} 
X			case 'f': {
X				++argv; argc--;
X				if (argc==0 || !**argv) {
X					error("Filename after -f missing or null\n");
X					usage(1);
X				}
X				infile = *argv;
X				break;
X			}				
X		    case 'i': {	/* -i initial_number */
X				nextstr(buffer,argc,argv,usage(2));
X				filenumber = getnum(buffer);
X				if (filenumber < 0) {
X			    	error("-i must be followed by a positive number\n");
X				    exit(EXIT_SYNTAX);
X				 }
X				filenumber--;	/* needs to be one less to start with */
X				break;
X		    }
X			case 'x': { /* exclude matched lines */
X				exclude = TRUE;
X				break;
X			}
X		    default: {
X				error("Unknown flag -%c\n", **argv);
X				usage(1);
X		    }
X		}			/* end switch */
X	  } else {	
X		if (!pattern) pattern = *argv;	/* first non-flag is pattern */
X		else break;						/* break while loop */
X	  }			/* end if */
X     }		/* end while */
X
X	 if (!argc) {
X		format = defaultfmt;
X		n_format = 1; }
X	 else {
X		format = argv;
X		n_format = argc;
X	 }
X
X#ifdef DEBUG
X	printf("argc=%d\n",argc);
X	printf("format='%s'\n",*format);
X	printf("pattern='%s'\n",pattern);
X#endif
X
X	 if (!infile) split(stdin, DFLTNAME, pattern);
X	 else        fsplit(infile, pattern);
X
X     exit(0);
X}
X
Xfsplit(name, pat)
X     char *name;
X     char *pat;
X{
X     FILE *fd;
X
X     if (!name || !*name) {
X	  error("Can't split a file with an empty name\n");
X	  usage(2);
X     }
X
X     if ( (fd = fopen(name, "r")) == NULL) {
X	  error("Can't open %s\n", name);
X	  return;
X     }
X
X     (void) split(fd, name, pat);
X
X     if (fclose(fd) == EOF) {	/* something's gone wrong */
X	  error("Can't close %s -- giving up\n", name);
X	  exit(EXIT_RUNERR);
X     }
X}
X
Xchar buffer[BUFLEN];
X
Xint
Xsplit(input, name, pattern)
X     FILE *input;		/* fd of input file */
X     char *name;		/* input filename */
X     char *pattern;		/* pattern used to split file */
X{
X     /* do the real work here. Oh dear, I don't know how... */
X     /* we are always called with an open file. */
X
X     extern char *re_comp();     /* compile string into automaton */
X     extern int   re_exec();     /* try to match string */
X#define REMATCH 1
X#define RENOMATCH 0
X#define REFAULT -1
X
X     char *errmessage;
X     FILE *output = NULL;
X     char fnambuf[MAXFILENAMELEN + 2];  /* +1 for null, +1 for overflow */
X     int reg_status = 0;				/* regular expression status */
X     int line = 0;
X
X	 if (split_after && exclude) {
X	  error("Can't specify both -a and -x\n");
X	  usage(2);
X	 }
X
X	 if (every_n_lines && exclude) {
X	  error("Can't specify both -n and -x\n");
X	  usage(2);
X	 }
X
X	 if (every_n_lines && split_after) {
X	  error("Can't specify both -n and -a\n");
X	  usage(2);
X	 }
X
X	 if (every_n_lines && pattern) {
X	  error("Can't specify both -n and pattern\n");
X	  usage(2);
X	 }
X
X     if (!every_n_lines && (!pattern || !*pattern)) {
X	  error("Can't match an empty pattern\n");
X	  usage(2);
X     }
X
X     if (!every_n_lines && (errmessage = re_comp(pattern)) != NULL) {
X	  error("Error in pattern <%s>: %s\n", pattern, errmessage);
X	  exit(EXIT_RUNERR);
X     }
X     /* errmessage is NULL here */
X
X     /* the -2 to fgets is because of the null and \n appended */
X     while (fgets(buffer, BUFLEN - 2, input) != NULL) {
X	  if (!output ||	/* first line */
X	     (every_n_lines > 0 && (++line == every_n_lines)) || /* nth line */
X	     (!every_n_lines &&
X	     ((reg_status = re_exec(buffer)) == REMATCH)) ) { /* matches pat */
X	       /* don't look at 1st line of file, to avoid an infinite */
X	       /* recursion... */
X
X			if (output && split_after) {
X				fputs(buffer, output);
X			}
X
X			if (n_format && mkname(fnambuf, name)) {;
X				/* check for output file = input file */
X				if (strcmp(fnambuf,name)==0) {
X					error("Output file same as input file\n");
X					exit(EXIT_RUNERR);
X				}
X				/* start a new file */
X				if (output && output != stdout) {
X					if (fclose(output) == EOF) {
X						error("Can't close output file\n");
X						exit(EXIT_RUNERR);
X					}
X					output = NULL;
X				}
X				line = 0;
X				if (fnambuf[0]=='+' && fnambuf[1]==NULL) {
X					output = stdout;
X				} else {
X					if ((output = fopen(fnambuf, "a")) == NULL) {
X						error("Can't open output file %s\n", fnambuf);
X						exit(EXIT_RUNERR);
X					}
X				}
X				/* if matched lines are excluded, skip the fputs */
X				if (exclude && reg_status == REMATCH) continue;
X
X				/* if file is to be split after pattern, put already done */
X				if (split_after && reg_status == REMATCH) continue;
X			} else {
X				error("Insufficient formats -- last file contains remainder\n");
X				}
X	  } else if (reg_status == REFAULT) {
X	       /* the re_exec failed */
X	       error("Internal error trying to match <%s> to <%s>\n",
X			      pattern, buffer);
X	       exit(EXIT_INTERN);
X	  }
X	  fputs(buffer, output);
X      }
X      return (filenumber == -1);	/* exit status for main */
X}
X
Xbool
Xmkname(fnambuf, name)
X	 char *fnambuf;
X	 char *name;
X{
X     int i, s = -1, d = -1;
X	 static bool new_format = TRUE;
X	 static bool perpetual = FALSE;
X	 static bool d_before_s = FALSE;
X
X	 if (new_format) {
X		 if (!n_format) {
X			error("Internal error: mkname called but formats have run out\n");
X			exit(EXIT_INTERN);
X		 }
X	     i = bfsearch(*format, "%",0);
X	     s = bfsearch(*format, "%s",0);
X	     if (i>=0 && i==s) d = bfsearch(*format, "%",++i);
X	     else 			   d = i;
X		 if (d<0) perpetual = FALSE;
X		 else     perpetual = TRUE;
X		 if (d<s || s<0) d_before_s = TRUE;
X		 else            d_before_s = FALSE;
X		 new_format = FALSE;
X	 }
X
X	 if (perpetual) ++filenumber;
X
X     if (d_before_s)
X          sprintf(fnambuf, *format, filenumber, rmpath(name));
X     else 
X          sprintf(fnambuf, *format, rmpath(name), filenumber);
X      
X	 if (!perpetual) {
X		new_format = TRUE;
X		--n_format;
X		if (n_format) {
X			++format; 
X			filenumber=0;
X		}
X	}
X}
X
Xerror(fmt, a1, a2, a3, a4)
X     char *fmt;
X{
X     fputs(progname, stderr);
X     fputs(": ", stderr);
X     fprintf(stderr, fmt, a1, a2, a3, a4);
X}
X
X/* getnum(s) returns the value of the unsigned int in s.  If there's any
X * trailing garbage, or the number isn't +ve, we return -1
X */
Xint
Xgetnum(s)
X     char *s;
X{
X     register char *p;
X
X     for (p = s; *p; p++) {
X	  if (!isdigit(*p)) {
X	       return -1;
X	  }
X     }
X     return atoi(s);
X}
X
X
X/* Remove the leading pathname from a filename */
X
Xchar *
Xrmpath(fullname)
X    char *fullname;
X{
X    register char *p;
X    char *q = (char *) NULL;
X
X    for (p = fullname; p && *p; p++) {
X         if (*p == '/')
X  	    q = ++p;
X    }
X    if (q && *q) {
X         return(q);
X    }
X    return(fullname);
X}
X
X
X/* Find substring within string */
X/* Brute force algorithm */
X
Xint 
Xbfsearch(string,key,start)
X
X  char  string[],
X	key[];
X  int   start;
X{
X	int i=start,j=0;
X
X	if (string[0]==NULL || key[0]==NULL) return(-1);
X
X	do {
X	  if (string[i] == key[j])
X	    {i++; j++;}
X	  else
X	    {i=i-j+1; j=0;};
X	}
X	while (string[i]!=NULL && key[j]!=NULL);
X
X	if (key[j]==NULL) return(i-j);
X	  else return(-1);
X}
X
SHAR_EOF
if test 8964 -ne "`wc -c 'slice.c'`"
then
	echo shar: error transmitting "'slice.c'" '(should have been 8964 characters)'
fi
#	End of shell archive
exit 0
-- 
Gary Puckering        3755 Riverside Dr.
Cognos Incorporated   Ottawa, Ontario
(613) 738-1440        CANADA  K1G 3N3

garyp@cognos.UUCP (Gary Puckering) (01/20/87)

--

Some time ago, I posted the utility "slice" to the net.  Apparently,
at least one site did not receive the source.  This was my first major
posting to the net, so I probably messed up (though it's hard to believe
I forgot the source!).  Anyway, I'm reposting it for the benefit of
those who didn't get a useful version the first time.

For those who missed the first posting, a brief description of slice
follows:

-----------------------------------------------------------------------

Slice splits up a file into lots of little files.  It reads its input a
line at a time, and starts a new output file when

*	the input line matches a pattern, or
*	there have been n lines written to the current output file.

You can use it to split a mailbox or an archive of news articles into
one article per file, for example.  In fact, you can do this with about
5 lines of awk, but you run into problems with long lines (and speed,
if it bothers you!).

Slice was originally contributed by Russell Quinn as the program
"mailsplit".  Unlike mailsplit, however, slice allows multiple output
formats to be specified (rather than multiple input files).  This makes
it possible to deposit the pieces (slices!) into files named whatever
your want.  For example:

     slice <article -x '^--* [Cc]ut' README article.sh

will deposit everything up to the cut line into README and everything
after it into article.sh (the -x option causes the matched line to be
excluded).

There are even options to make slicing mailboxes and files containing
shell scripts easier (-m and -s).

There are some good examples in the man page.

Source, Makefile and manual entry enclosed.  To install, do the
following:

1:	Edit the Makefile: you'll need to alter the "R=/usr/local" if 
	you don't want slice to live in /usr/local/usr/bin.

2:	make slice

3:	have a play with it & satisfy yourself that it behaves reasonably

4:	make install

Make "install" will do a "$(MAKE) $(CLEAN)" afterwards.  If you don't 
want to remove the binary, say

        CLEAN="" make install

at step 4.


--------------------- cut here ----------------------------------------
#!/bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #!/bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	slice.1
#	Makefile
#	opts.h
#	slice.c
# This archive created: Tue Jan 20 09:39:14 1987
export PATH; PATH=/bin:$PATH
echo shar: extracting "'slice.1'" '(5375 characters)'
if test -f 'slice.1'
then
	echo shar: over-writing existing file "'slice.1'"
fi
sed 's/^X//' << \SHAR_EOF > 'slice.1'
X.TH SLICE 1L "1986 December 11" "Cognos Inc."
X.SH NAME
Xslice \- split a file into pieces, by pattern
X.SH SYNOPSIS
X.B slice
X[ \fB\-f \fIfilename\fP ]
X[ \fB\-i\fP\fIn\fP ]
X[ \fB\-a\fP ]
X[ \fB\-x\fP ]
X[ \fB\-m\fP | \fB\-s\fP | \fB\-n\fP\fIn\fP ]
X[ \fIformat\fP .\|.\|. ]
X.LP
X.sp
X.B slice
X[ \fB\-f \fIfilename\fP ]
X[ \fB\-i\fP\fIn\fP ]
X[ \fB\-a\fP ]
X[ \fB\-x\fP ]
X\fB\-e \fIexpression\fP 
X[ \fIformat\fP .\|.\|. ]
X.LP
X.sp
X.B slice
X[ \fB\-f \fIfilename\fP ]
X[ \fB\-i\fP\fIn\fP ]
X[ \fB\-a\fP ]
X[ \fB\-x\fP ]
X\fIexpression\fP
X[ \fIformat\fP .\|.\|. ]
X.SH DESCRIPTION
X.I Slice
Xsplits large files into smaller ones.  The output files are named
Xaccording to the \fIformat\fR strings provided.  The input file is split
Xwhenever a pattern is matched or every \fIn\fR lines, depending on the
Xoptions selected.  The syntax is similar to \fIgrep\fR, except for the
X\fB-f\fR option, which has a different meaning.
XBecause some of the options are mutually exclusive,
Xthere are three forms of the command.
X.LP
XThe options allowed for \fIslice\fR are:
X.IP "\fB-f\fR \fIfilename\fR"
XInput for \fIslice\fR is taken from the named file rather than \fIstdin\fR.
X.IP \-\fBi\fP\fIn\fP
XThe starting number for numbering output files generated by formats
Xcontaining %d (see \fIformat\fR below).  The default starting number is
Xone.
X.IP \fB-a\fR
XCauses the file to be split after the line matching the pattern, rather
Xthan before (as is normally the case).
X.IP \fB-x\fR
XCauses the matched line to be excluded from the output files.  Handy
Xform eliminating cut lines, etc.
X.IP \fB-m\fR
XUses the pattern '^From ' to split the file.  This is convenient for breaking
Xup a mailbox file.
X.IP \fB-s\fR
XUses the pattern '^#!\ */bin/sh' to split the file.  This is ideal for
Xbreaking up mail or news article containing a Bourne shell script.
X.IP \-\fBn\fIn\fR
XSplit the file every \fIn\fR lines.  In this case, no pattern matching 
Xis performed.  This is the behaviour of \fIsplit (1)\fR,
Xexcept that the default output filename format for
X\fIslice\fR is different.
X.IP "\fB-e\fR \fIexpression\fR"
XUses the pattern specified by \fIexpression\fR to split the file.
XThe matched line is put in the new output
Xfile.  The pattern may contain newlines (which match themselves).  See
X.I ed (1)
Xfor details of the regular expressions.
X.IP \fIformat\fR
XAll three forms of the command allow the specification of zero or more
X\fIformat\fR strings as non-flag parameters.
XThe format strings are formed after the fashion of
X.I printf 
Xformats and are used to generate output filenames.
XYou can provide as many as you like.  Every time a split is required,
Xthe next format will be selected.  If \fIslice\fR runs out of
Xformats, a warning will be issued and the last file will contain the
Xremainder of the input file.
X.IP
XA %s in the string will be replaced by the input filename (less pathname).
XIf the input file is \fIstdin\fR, the name \fIslice:\fR will be used
Xas a default.
X.IP
XA %d in a format string will be used to generate a unique number.  
XOnce \fIslice\fR encounters a format containing %d, it will continue
Xusing it until the input file is exhausted.
X.IP
XThe %d is replaced with a number starting at one for the first file, and 
Xincremented by one for each split.  The \-\fBi\fP option can be used
Xto start at a different number.
XThe default format is
X.sp
X\ \ \ \ %s:%03d
X.sp
Xwhich results in files having names like \fIfilename\fR:01, 
X\fIfilename\fR:02, \fIfilename\fR:03 and so on.  
XThe default format was chosen because the resulting files are listed 
Xin numerical order by
X.I ls
Xor by
X.I echo *
Xwhich is sometimes useful.
X.LP
XThe special format string '+' is used to designate that \fIstdout\fR is
Xto be used.  This is handy for piping one of the pieces to another
Xcommand.  Since \fIslice\fR appends to its output files, its possible
Xfor the same filename to appear more than once as a format.
X.LP
X.SH EXAMPLES
XSplit up a mail folder into files slice:001, slice:002, etc.:
X.sp
X	slice -m <folder
X.sp
XSplit a news article containing a shell script into article.hdr and
Xarticle.sh:
X.sp
X	slice -s -f article '%s.hdr' '%s.sh'
X.sp
XSplit stdin at every line of dashes into the files 0, 1, 2, etc.:
X.sp
X	cat anyfile | slice -i0 '^--* *$' '%d'
X.sp
XSplit a news article containing a shell script into its header portion
Xand script portion.  Pipe the latter to \fIsh\fR to unshar it:
X.sp
X	slice -s <article README + | sh
X.sp
XPipe the middle portion of a file to sh, keeping the head and tail in a
Xfile called README (exclude cut lines):
X.sp
X	slice -f myfile -x '^--* [Cc]ut ' README + README | sh
X.sp
XKeep the middle portion of a file, discarding the head and tail:
X.sp
X	slice -f myfile '^--* [Cc]ut ' /dev/null middle /dev/null
X.sp
X.SH BUGS
XWatch out for filename expansion by the shell.  This could cause
X\fIslice\fR to interpret the extra filenames as output formats causing
Xslices to be appended to existing files.
X.SH DIAGNOSTICS
XMostly straight-forward.
X``Internal Error'' indicates a bug in \fIslice\fR, and should be reported.
XExit staus 1 indicates an error parsing options \- for example, if an unknown
Xflag was ued.
XExit status 2 indicates a meaningless combination was detected and rejected
X(this is rare in practice).
XExit status 3 indicates a run-time problem \- for example, if a file couldn't
Xbe opened.
X.SH "SEE ALSO"
X.I ed (1),
X.I mail (1),
X.I ls (1),
X.I split (1),
X.I mailsplit (l),
X.I printf (3).
SHAR_EOF
if test 5375 -ne "`wc -c 'slice.1'`"
then
	echo shar: error transmitting "'slice.1'" '(should have been 5375 characters)'
fi
echo shar: extracting "'Makefile'" '(1229 characters)'
if test -f 'Makefile'
then
	echo shar: over-writing existing file "'Makefile'"
fi
sed 's/^X//' << \SHAR_EOF > 'Makefile'
X# Makefile for slice
X#
X# Originally contributed at mailsplit, written by:
X#   R E Quin, October 1986 University of Warwick (UK) Computer Science
X#   warwick!req     +44 203 523193
X#
X# Modified and recontributed by:
X#   Gary Puckering        3755 Riverside Dr.
X#   Cognos Incorporated   Ottawa, Ontario
X#   (613) 738-1440        CANADA  K1G 3N3
X#
X# This makefile is intended for the sys5 Augmented make.
X# 
XMAKE=make 
XCLEAN=clean 
XCC=cc 
XHACKS= 
XCFLAGS=-O $(HACKS)
X# R is the root of the filesystem -- i.e. where to install things.
X# The binaries are installed in $R/$(DESTDIR).
XR=/usr/local
XDESTDIR=$R/usr/bin 
XMANDIR=$R/man/manl
XPROG=slice 
X
X# PROG is what to make; DESTDIR is where to put it.
X# HACKS are for -DBUGFIX style things.
X
X# R is intended to be the root of the filesystem if it isn't "/"
X
X# "make install " does a $(MAKE) $(CLEAN) at the end, so you can say
X# CLEAN=  make -e install
X# if you don't want to remove the garbage at the end, for example.
X# This is useful primarily for testing the install: entry!
X
Xall: $(PROG)
X 
Xslice: opts.h slice.o
X	$(CC) -o $(PROG) slice.o
X 
Xinstall: slice
X	/bin/mv $(PROG) $(DESTDIR)
X	/bin/cp $(PROG).1 $(MANDIR)/$(PROG).l
X	$(MAKE) $(CLEAN)
X 
Xclean: 
X	rm -rf core *.o $(PROG) a.out
SHAR_EOF
if test 1229 -ne "`wc -c 'Makefile'`"
then
	echo shar: error transmitting "'Makefile'" '(should have been 1229 characters)'
fi
echo shar: extracting "'opts.h'" '(769 characters)'
if test -f 'opts.h'
then
	echo shar: over-writing existing file "'opts.h'"
fi
sed 's/^X//' << \SHAR_EOF > 'opts.h'
X
X#define FALSE 0
X#define TRUE 1
Xtypedef int bool;
X
X#define EXIT_SYNTAX 1	/* syntax error parsing commandline options */
X#define EXIT_SEMANT 2	/* options are correct but meaningless */
X#define EXIT_RUNERR 3	/* error opening a file, for example */
X#define EXIT_INTERN 4	/* internal error -- bug!! */
X
X#define nextstr(s,count,array,failure)	\
X	{if (((count)<2) && !((array)[0][1])) {failure;}\
X	else {if ((array)[0][1]) { s = &((array)[0][1]); } \
X	      else {s = array[1]; --count; array++;}}}
X
X#define DFLTNAME "slice"	/* input filename (for stdin) */
X#define BUFLEN BUFSIZ	/* the maximum length of an input line (incl. "\n\0") */
X#define MAXFILENAMELEN BUFSIZ	/* longer than the longest possible file name */
X#define DFLTOUTNAME	"%s:%03.d"	/* o/p file name format */
X
SHAR_EOF
if test 769 -ne "`wc -c 'opts.h'`"
then
	echo shar: error transmitting "'opts.h'" '(should have been 769 characters)'
fi
echo shar: extracting "'slice.c'" '(8964 characters)'
if test -f 'slice.c'
then
	echo shar: over-writing existing file "'slice.c'"
fi
sed 's/^X//' << \SHAR_EOF > 'slice.c'
X/* slice -- split files at lines that match a pattern */
X#include <stdio.h>
X#include <ctype.h>
X
X#include "opts.h"				/* defines nextstr() etc */
X
Xchar *progname = "slice";		/* for error messages */
Xchar *pattern = (char *) NULL;	/* reg expr used to split file */
Xchar **format;					/* ptr for format strings */
Xint  n_format;					/* number of format strings */
Xchar *defaultfmt[] = {DFLTOUTNAME};	/* default format string */
Xint  filenumber = 0;
Xint  every_n_lines = 0;			/* split every n lines */
Xbool exclude = FALSE;			/* exclude matched line from o/p files */
Xbool split_after = FALSE;		/* split after matched line */
X
Xusage(status)
X     int status;	/* exit if status != 0 */
X{
X     fprintf(stderr,"Usage: %s [-f filename] [-a] [-x] [-i<n>] [-m|-s|-n<n>] [-e expression | expression] [format...]\n", progname);
X     if (status)
X	  exit(status);
X}
X
Xmain(argc, argv)
X     char *argv[];
X{
X     /* split files at points that match a given pattern */
X     /* initialise things */
X     bool donefiles = FALSE;
X     char *buffer;
X	 char *infile = (char *) NULL;
X
X     int getnum();		/* does more checking than atoi */
X     char *rmpath();    /* removes leading pathname from a filename */
X
X     /* now remove possible leading pathname
X      * (e.g. /usr/bin/slice is to report it's errors as slice
X      */
X     progname = rmpath(argv[0]);
X
X
X	while (--argc) {
X	  if (**++argv == '-') {
X		switch(*++*argv) {
X			case 'a': {				/* split after pattern */
X				split_after = TRUE;
X				break;
X			}
X			case 'e': {				/* pattern (expression) */
X				++argv; argc--;
X				if (argc==0 || !**argv) {
X					error("Pattern after -e missing or null\n");
X					usage(1);
X				}
X				pattern = *argv;
X				break;
X			}
X			case 'm': {				/* mailbox pattern */
X				pattern = "^From ";
X				break; 
X			}
X			case 's': {				/* shell pattern */
X				pattern = "^#! *\/bin\/sh";
X				break; 
X			}
X			case 'n': {				/* -n n_lines -- split every n lines */
X				nextstr(buffer,argc,argv,usage(2));
X				every_n_lines = getnum(buffer);
X				if (every_n_lines <= 0) {
X					error("-n: number must be at least 1\n");
X					exit(EXIT_SYNTAX);
X				}
X				break;
X			} 
X			case 'f': {
X				++argv; argc--;
X				if (argc==0 || !**argv) {
X					error("Filename after -f missing or null\n");
X					usage(1);
X				}
X				infile = *argv;
X				break;
X			}				
X		    case 'i': {	/* -i initial_number */
X				nextstr(buffer,argc,argv,usage(2));
X				filenumber = getnum(buffer);
X				if (filenumber < 0) {
X			    	error("-i must be followed by a positive number\n");
X				    exit(EXIT_SYNTAX);
X				 }
X				filenumber--;	/* needs to be one less to start with */
X				break;
X		    }
X			case 'x': { /* exclude matched lines */
X				exclude = TRUE;
X				break;
X			}
X		    default: {
X				error("Unknown flag -%c\n", **argv);
X				usage(1);
X		    }
X		}			/* end switch */
X	  } else {	
X		if (!pattern) pattern = *argv;	/* first non-flag is pattern */
X		else break;						/* break while loop */
X	  }			/* end if */
X     }		/* end while */
X
X	 if (!argc) {
X		format = defaultfmt;
X		n_format = 1; }
X	 else {
X		format = argv;
X		n_format = argc;
X	 }
X
X#ifdef DEBUG
X	printf("argc=%d\n",argc);
X	printf("format='%s'\n",*format);
X	printf("pattern='%s'\n",pattern);
X#endif
X
X	 if (!infile) split(stdin, DFLTNAME, pattern);
X	 else        fsplit(infile, pattern);
X
X     exit(0);
X}
X
Xfsplit(name, pat)
X     char *name;
X     char *pat;
X{
X     FILE *fd;
X
X     if (!name || !*name) {
X	  error("Can't split a file with an empty name\n");
X	  usage(2);
X     }
X
X     if ( (fd = fopen(name, "r")) == NULL) {
X	  error("Can't open %s\n", name);
X	  return;
X     }
X
X     (void) split(fd, name, pat);
X
X     if (fclose(fd) == EOF) {	/* something's gone wrong */
X	  error("Can't close %s -- giving up\n", name);
X	  exit(EXIT_RUNERR);
X     }
X}
X
Xchar buffer[BUFLEN];
X
Xint
Xsplit(input, name, pattern)
X     FILE *input;		/* fd of input file */
X     char *name;		/* input filename */
X     char *pattern;		/* pattern used to split file */
X{
X     /* do the real work here. Oh dear, I don't know how... */
X     /* we are always called with an open file. */
X
X     extern char *re_comp();     /* compile string into automaton */
X     extern int   re_exec();     /* try to match string */
X#define REMATCH 1
X#define RENOMATCH 0
X#define REFAULT -1
X
X     char *errmessage;
X     FILE *output = NULL;
X     char fnambuf[MAXFILENAMELEN + 2];  /* +1 for null, +1 for overflow */
X     int reg_status = 0;				/* regular expression status */
X     int line = 0;
X
X	 if (split_after && exclude) {
X	  error("Can't specify both -a and -x\n");
X	  usage(2);
X	 }
X
X	 if (every_n_lines && exclude) {
X	  error("Can't specify both -n and -x\n");
X	  usage(2);
X	 }
X
X	 if (every_n_lines && split_after) {
X	  error("Can't specify both -n and -a\n");
X	  usage(2);
X	 }
X
X	 if (every_n_lines && pattern) {
X	  error("Can't specify both -n and pattern\n");
X	  usage(2);
X	 }
X
X     if (!every_n_lines && (!pattern || !*pattern)) {
X	  error("Can't match an empty pattern\n");
X	  usage(2);
X     }
X
X     if (!every_n_lines && (errmessage = re_comp(pattern)) != NULL) {
X	  error("Error in pattern <%s>: %s\n", pattern, errmessage);
X	  exit(EXIT_RUNERR);
X     }
X     /* errmessage is NULL here */
X
X     /* the -2 to fgets is because of the null and \n appended */
X     while (fgets(buffer, BUFLEN - 2, input) != NULL) {
X	  if (!output ||	/* first line */
X	     (every_n_lines > 0 && (++line == every_n_lines)) || /* nth line */
X	     (!every_n_lines &&
X	     ((reg_status = re_exec(buffer)) == REMATCH)) ) { /* matches pat */
X	       /* don't look at 1st line of file, to avoid an infinite */
X	       /* recursion... */
X
X			if (output && split_after) {
X				fputs(buffer, output);
X			}
X
X			if (n_format && mkname(fnambuf, name)) {;
X				/* check for output file = input file */
X				if (strcmp(fnambuf,name)==0) {
X					error("Output file same as input file\n");
X					exit(EXIT_RUNERR);
X				}
X				/* start a new file */
X				if (output && output != stdout) {
X					if (fclose(output) == EOF) {
X						error("Can't close output file\n");
X						exit(EXIT_RUNERR);
X					}
X					output = NULL;
X				}
X				line = 0;
X				if (fnambuf[0]=='+' && fnambuf[1]==NULL) {
X					output = stdout;
X				} else {
X					if ((output = fopen(fnambuf, "a")) == NULL) {
X						error("Can't open output file %s\n", fnambuf);
X						exit(EXIT_RUNERR);
X					}
X				}
X				/* if matched lines are excluded, skip the fputs */
X				if (exclude && reg_status == REMATCH) continue;
X
X				/* if file is to be split after pattern, put already done */
X				if (split_after && reg_status == REMATCH) continue;
X			} else {
X				error("Insufficient formats -- last file contains remainder\n");
X				}
X	  } else if (reg_status == REFAULT) {
X	       /* the re_exec failed */
X	       error("Internal error trying to match <%s> to <%s>\n",
X			      pattern, buffer);
X	       exit(EXIT_INTERN);
X	  }
X	  fputs(buffer, output);
X      }
X      return (filenumber == -1);	/* exit status for main */
X}
X
Xbool
Xmkname(fnambuf, name)
X	 char *fnambuf;
X	 char *name;
X{
X     int i, s = -1, d = -1;
X	 static bool new_format = TRUE;
X	 static bool perpetual = FALSE;
X	 static bool d_before_s = FALSE;
X
X	 if (new_format) {
X		 if (!n_format) {
X			error("Internal error: mkname called but formats have run out\n");
X			exit(EXIT_INTERN);
X		 }
X	     i = bfsearch(*format, "%",0);
X	     s = bfsearch(*format, "%s",0);
X	     if (i>=0 && i==s) d = bfsearch(*format, "%",++i);
X	     else 			   d = i;
X		 if (d<0) perpetual = FALSE;
X		 else     perpetual = TRUE;
X		 if (d<s || s<0) d_before_s = TRUE;
X		 else            d_before_s = FALSE;
X		 new_format = FALSE;
X	 }
X
X	 if (perpetual) ++filenumber;
X
X     if (d_before_s)
X          sprintf(fnambuf, *format, filenumber, rmpath(name));
X     else 
X          sprintf(fnambuf, *format, rmpath(name), filenumber);
X      
X	 if (!perpetual) {
X		new_format = TRUE;
X		--n_format;
X		if (n_format) {
X			++format; 
X			filenumber=0;
X		}
X	}
X}
X
Xerror(fmt, a1, a2, a3, a4)
X     char *fmt;
X{
X     fputs(progname, stderr);
X     fputs(": ", stderr);
X     fprintf(stderr, fmt, a1, a2, a3, a4);
X}
X
X/* getnum(s) returns the value of the unsigned int in s.  If there's any
X * trailing garbage, or the number isn't +ve, we return -1
X */
Xint
Xgetnum(s)
X     char *s;
X{
X     register char *p;
X
X     for (p = s; *p; p++) {
X	  if (!isdigit(*p)) {
X	       return -1;
X	  }
X     }
X     return atoi(s);
X}
X
X
X/* Remove the leading pathname from a filename */
X
Xchar *
Xrmpath(fullname)
X    char *fullname;
X{
X    register char *p;
X    char *q = (char *) NULL;
X
X    for (p = fullname; p && *p; p++) {
X         if (*p == '/')
X  	    q = ++p;
X    }
X    if (q && *q) {
X         return(q);
X    }
X    return(fullname);
X}
X
X
X/* Find substring within string */
X/* Brute force algorithm */
X
Xint 
Xbfsearch(string,key,start)
X
X  char  string[],
X	key[];
X  int   start;
X{
X	int i=start,j=0;
X
X	if (string[0]==NULL || key[0]==NULL) return(-1);
X
X	do {
X	  if (string[i] == key[j])
X	    {i++; j++;}
X	  else
X	    {i=i-j+1; j=0;};
X	}
X	while (string[i]!=NULL && key[j]!=NULL);
X
X	if (key[j]==NULL) return(i-j);
X	  else return(-1);
X}
X
SHAR_EOF
if test 8964 -ne "`wc -c 'slice.c'`"
then
	echo shar: error transmitting "'slice.c'" '(should have been 8964 characters)'
fi
#	End of shell archive
exit 0
-- 
Gary Puckering        3755 Riverside Dr.
Cognos Incorporated   Ottawa, Ontario
(613) 738-1440        CANADA  K1G 3N3

tes@whuts.UUCP (01/26/87)

<*>
Slice does not compile/link,
it appears to be looking for
re_* functions.

How do I obtain same,  these
presumably are on the developer's
system, and he assumed that they
were universal.  (Surprise!!)
-- 
    -----                   Terry Sterkel
  -====----            AT&T Bell Laboratories
  ---------    {harvard|allegra|ulysses|ihnp4}!whuts!tes
    -----         [opinions are obviously only my own]