garyp@cognos.UUCP (Gary Puckering) (12/17/86)
Slice splits up a file into lots of little files. It reads its input a line at a time, and starts a new output file when * the input line matches a pattern, or * there have been n lines written to the current output file. You can use it to split a mailbox or an archive of news articles into one article per file, for example. In fact, you can do this with about 5 lines of awk, but you run into problems with long lines (and speed, if it bothers you!). Slice was originally contributed by Russell Quinn as the program "mailsplit". Unlike mailsplit, however, slice allows multiple output formats to be specified (rather than multiple input files). This makes it possible to deposit the pieces (slices!) into files named whatever your want. For example: slice <article -x '^--* [Cc]ut' README article.sh will deposit everything up to the cut line into README and everything after it into article.sh (the -x option causes the matched line to be excluded). There are even options to make slicing mailboxes and files containing shell scripts easier (-m and -s). There are some good examples in the man page. Source, Makefile and manual entry enclosed. To install, do the following: 1: Edit the Makefile: you'll need to alter the "R=/usr/local" if you don't want slice to live in /usr/local/usr/bin. 2: make slice 3: have a play with it & satisfy yourself that it behaves reasonably 4: make install Make "install" will do a "$(MAKE) $(CLEAN)" afterwards. If you don't want to remove the binary, say CLEAN="" make install at step 4. --------------------- cut here ---------------------------------------- #!/bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #!/bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create the files: # Makefile # opts.h # slice.c # This archive created: Tue Dec 16 17:04:43 1986 export PATH; PATH=/bin:$PATH echo shar: extracting "'Makefile'" '(1221 characters)' if test -f 'Makefile' then echo shar: over-writing existing file "'Makefile'" fi sed 's/^X//' << \SHAR_EOF > 'Makefile' X# Makefile for slice X# X# Originally contributed at mailsplit, written by: X# R E Quin, October 1986 University of Warwick (UK) Computer Science X# warwick!req +44 203 523193 X# X# Modified and recontributed by: X# Gary Puckering 3755 Riverside Dr. X# Cognos Incorporated Ottawa, Ontario X# (613) 738-1440 CANADA K1G 3N3 X# X# This makefile is intended for the sys5 Augmented make. X# XMAKE=make XCLEAN=clean XCC=cc XHACKS= XCFLAGS=-O $(HACKS) X# R is the root of the filesystem -- i.e. where to install things. X# The binaries are installed in $R/$(DESTDIR). XR=/usr/local XDESTDIR=$R/usr/bin XMANDIR=$R/usr/man/man1 XPROG=slice X X# PROG is what to make; DESTDIR is where to put it. X# HACKS are for -DBUGFIX style things. X X# R is intended to be the root of the filesystem if it isn't "/" X X# "make install " does a $(MAKE) $(CLEAN) at the end, so you can say X# CLEAN= make -e install X# if you don't want to remove the garbage at the end, for example. X# This is useful primarily for testing the install: entry! X Xall: $(PROG) X Xslice: opts.h slice.o X $(CC) -o $(PROG) slice.o X Xinstall: slice X /bin/mv $(PROG) $(DESTDIR) X /bin/cp slice.1 $(MANDIR) X $(MAKE) $(CLEAN) X Xclean: X rm -rf core *.o $(PROG) a.out SHAR_EOF if test 1221 -ne "`wc -c 'Makefile'`" then echo shar: error transmitting "'Makefile'" '(should have been 1221 characters)' fi echo shar: extracting "'opts.h'" '(769 characters)' if test -f 'opts.h' then echo shar: over-writing existing file "'opts.h'" fi sed 's/^X//' << \SHAR_EOF > 'opts.h' X X#define FALSE 0 X#define TRUE 1 Xtypedef int bool; X X#define EXIT_SYNTAX 1 /* syntax error parsing commandline options */ X#define EXIT_SEMANT 2 /* options are correct but meaningless */ X#define EXIT_RUNERR 3 /* error opening a file, for example */ X#define EXIT_INTERN 4 /* internal error -- bug!! */ X X#define nextstr(s,count,array,failure) \ X {if (((count)<2) && !((array)[0][1])) {failure;}\ X else {if ((array)[0][1]) { s = &((array)[0][1]); } \ X else {s = array[1]; --count; array++;}}} X X#define DFLTNAME "slice" /* input filename (for stdin) */ X#define BUFLEN BUFSIZ /* the maximum length of an input line (incl. "\n\0") */ X#define MAXFILENAMELEN BUFSIZ /* longer than the longest possible file name */ X#define DFLTOUTNAME "%s:%03.d" /* o/p file name format */ X SHAR_EOF if test 769 -ne "`wc -c 'opts.h'`" then echo shar: error transmitting "'opts.h'" '(should have been 769 characters)' fi echo shar: extracting "'slice.c'" '(8964 characters)' if test -f 'slice.c' then echo shar: over-writing existing file "'slice.c'" fi sed 's/^X//' << \SHAR_EOF > 'slice.c' X/* slice -- split files at lines that match a pattern */ X#include <stdio.h> X#include <ctype.h> X X#include "opts.h" /* defines nextstr() etc */ X Xchar *progname = "slice"; /* for error messages */ Xchar *pattern = (char *) NULL; /* reg expr used to split file */ Xchar **format; /* ptr for format strings */ Xint n_format; /* number of format strings */ Xchar *defaultfmt[] = {DFLTOUTNAME}; /* default format string */ Xint filenumber = 0; Xint every_n_lines = 0; /* split every n lines */ Xbool exclude = FALSE; /* exclude matched line from o/p files */ Xbool split_after = FALSE; /* split after matched line */ X Xusage(status) X int status; /* exit if status != 0 */ X{ X fprintf(stderr,"Usage: %s [-f filename] [-a] [-x] [-i<n>] [-m|-s|-n<n>] [-e expression | expression] [format...]\n", progname); X if (status) X exit(status); X} X Xmain(argc, argv) X char *argv[]; X{ X /* split files at points that match a given pattern */ X /* initialise things */ X bool donefiles = FALSE; X char *buffer; X char *infile = (char *) NULL; X X int getnum(); /* does more checking than atoi */ X char *rmpath(); /* removes leading pathname from a filename */ X X /* now remove possible leading pathname X * (e.g. /usr/bin/slice is to report it's errors as slice X */ X progname = rmpath(argv[0]); X X X while (--argc) { X if (**++argv == '-') { X switch(*++*argv) { X case 'a': { /* split after pattern */ X split_after = TRUE; X break; X } X case 'e': { /* pattern (expression) */ X ++argv; argc--; X if (argc==0 || !**argv) { X error("Pattern after -e missing or null\n"); X usage(1); X } X pattern = *argv; X break; X } X case 'm': { /* mailbox pattern */ X pattern = "^From "; X break; X } X case 's': { /* shell pattern */ X pattern = "^#! *\/bin\/sh"; X break; X } X case 'n': { /* -n n_lines -- split every n lines */ X nextstr(buffer,argc,argv,usage(2)); X every_n_lines = getnum(buffer); X if (every_n_lines <= 0) { X error("-n: number must be at least 1\n"); X exit(EXIT_SYNTAX); X } X break; X } X case 'f': { X ++argv; argc--; X if (argc==0 || !**argv) { X error("Filename after -f missing or null\n"); X usage(1); X } X infile = *argv; X break; X } X case 'i': { /* -i initial_number */ X nextstr(buffer,argc,argv,usage(2)); X filenumber = getnum(buffer); X if (filenumber < 0) { X error("-i must be followed by a positive number\n"); X exit(EXIT_SYNTAX); X } X filenumber--; /* needs to be one less to start with */ X break; X } X case 'x': { /* exclude matched lines */ X exclude = TRUE; X break; X } X default: { X error("Unknown flag -%c\n", **argv); X usage(1); X } X } /* end switch */ X } else { X if (!pattern) pattern = *argv; /* first non-flag is pattern */ X else break; /* break while loop */ X } /* end if */ X } /* end while */ X X if (!argc) { X format = defaultfmt; X n_format = 1; } X else { X format = argv; X n_format = argc; X } X X#ifdef DEBUG X printf("argc=%d\n",argc); X printf("format='%s'\n",*format); X printf("pattern='%s'\n",pattern); X#endif X X if (!infile) split(stdin, DFLTNAME, pattern); X else fsplit(infile, pattern); X X exit(0); X} X Xfsplit(name, pat) X char *name; X char *pat; X{ X FILE *fd; X X if (!name || !*name) { X error("Can't split a file with an empty name\n"); X usage(2); X } X X if ( (fd = fopen(name, "r")) == NULL) { X error("Can't open %s\n", name); X return; X } X X (void) split(fd, name, pat); X X if (fclose(fd) == EOF) { /* something's gone wrong */ X error("Can't close %s -- giving up\n", name); X exit(EXIT_RUNERR); X } X} X Xchar buffer[BUFLEN]; X Xint Xsplit(input, name, pattern) X FILE *input; /* fd of input file */ X char *name; /* input filename */ X char *pattern; /* pattern used to split file */ X{ X /* do the real work here. Oh dear, I don't know how... */ X /* we are always called with an open file. */ X X extern char *re_comp(); /* compile string into automaton */ X extern int re_exec(); /* try to match string */ X#define REMATCH 1 X#define RENOMATCH 0 X#define REFAULT -1 X X char *errmessage; X FILE *output = NULL; X char fnambuf[MAXFILENAMELEN + 2]; /* +1 for null, +1 for overflow */ X int reg_status = 0; /* regular expression status */ X int line = 0; X X if (split_after && exclude) { X error("Can't specify both -a and -x\n"); X usage(2); X } X X if (every_n_lines && exclude) { X error("Can't specify both -n and -x\n"); X usage(2); X } X X if (every_n_lines && split_after) { X error("Can't specify both -n and -a\n"); X usage(2); X } X X if (every_n_lines && pattern) { X error("Can't specify both -n and pattern\n"); X usage(2); X } X X if (!every_n_lines && (!pattern || !*pattern)) { X error("Can't match an empty pattern\n"); X usage(2); X } X X if (!every_n_lines && (errmessage = re_comp(pattern)) != NULL) { X error("Error in pattern <%s>: %s\n", pattern, errmessage); X exit(EXIT_RUNERR); X } X /* errmessage is NULL here */ X X /* the -2 to fgets is because of the null and \n appended */ X while (fgets(buffer, BUFLEN - 2, input) != NULL) { X if (!output || /* first line */ X (every_n_lines > 0 && (++line == every_n_lines)) || /* nth line */ X (!every_n_lines && X ((reg_status = re_exec(buffer)) == REMATCH)) ) { /* matches pat */ X /* don't look at 1st line of file, to avoid an infinite */ X /* recursion... */ X X if (output && split_after) { X fputs(buffer, output); X } X X if (n_format && mkname(fnambuf, name)) {; X /* check for output file = input file */ X if (strcmp(fnambuf,name)==0) { X error("Output file same as input file\n"); X exit(EXIT_RUNERR); X } X /* start a new file */ X if (output && output != stdout) { X if (fclose(output) == EOF) { X error("Can't close output file\n"); X exit(EXIT_RUNERR); X } X output = NULL; X } X line = 0; X if (fnambuf[0]=='+' && fnambuf[1]==NULL) { X output = stdout; X } else { X if ((output = fopen(fnambuf, "a")) == NULL) { X error("Can't open output file %s\n", fnambuf); X exit(EXIT_RUNERR); X } X } X /* if matched lines are excluded, skip the fputs */ X if (exclude && reg_status == REMATCH) continue; X X /* if file is to be split after pattern, put already done */ X if (split_after && reg_status == REMATCH) continue; X } else { X error("Insufficient formats -- last file contains remainder\n"); X } X } else if (reg_status == REFAULT) { X /* the re_exec failed */ X error("Internal error trying to match <%s> to <%s>\n", X pattern, buffer); X exit(EXIT_INTERN); X } X fputs(buffer, output); X } X return (filenumber == -1); /* exit status for main */ X} X Xbool Xmkname(fnambuf, name) X char *fnambuf; X char *name; X{ X int i, s = -1, d = -1; X static bool new_format = TRUE; X static bool perpetual = FALSE; X static bool d_before_s = FALSE; X X if (new_format) { X if (!n_format) { X error("Internal error: mkname called but formats have run out\n"); X exit(EXIT_INTERN); X } X i = bfsearch(*format, "%",0); X s = bfsearch(*format, "%s",0); X if (i>=0 && i==s) d = bfsearch(*format, "%",++i); X else d = i; X if (d<0) perpetual = FALSE; X else perpetual = TRUE; X if (d<s || s<0) d_before_s = TRUE; X else d_before_s = FALSE; X new_format = FALSE; X } X X if (perpetual) ++filenumber; X X if (d_before_s) X sprintf(fnambuf, *format, filenumber, rmpath(name)); X else X sprintf(fnambuf, *format, rmpath(name), filenumber); X X if (!perpetual) { X new_format = TRUE; X --n_format; X if (n_format) { X ++format; X filenumber=0; X } X } X} X Xerror(fmt, a1, a2, a3, a4) X char *fmt; X{ X fputs(progname, stderr); X fputs(": ", stderr); X fprintf(stderr, fmt, a1, a2, a3, a4); X} X X/* getnum(s) returns the value of the unsigned int in s. If there's any X * trailing garbage, or the number isn't +ve, we return -1 X */ Xint Xgetnum(s) X char *s; X{ X register char *p; X X for (p = s; *p; p++) { X if (!isdigit(*p)) { X return -1; X } X } X return atoi(s); X} X X X/* Remove the leading pathname from a filename */ X Xchar * Xrmpath(fullname) X char *fullname; X{ X register char *p; X char *q = (char *) NULL; X X for (p = fullname; p && *p; p++) { X if (*p == '/') X q = ++p; X } X if (q && *q) { X return(q); X } X return(fullname); X} X X X/* Find substring within string */ X/* Brute force algorithm */ X Xint Xbfsearch(string,key,start) X X char string[], X key[]; X int start; X{ X int i=start,j=0; X X if (string[0]==NULL || key[0]==NULL) return(-1); X X do { X if (string[i] == key[j]) X {i++; j++;} X else X {i=i-j+1; j=0;}; X } X while (string[i]!=NULL && key[j]!=NULL); X X if (key[j]==NULL) return(i-j); X else return(-1); X} X SHAR_EOF if test 8964 -ne "`wc -c 'slice.c'`" then echo shar: error transmitting "'slice.c'" '(should have been 8964 characters)' fi # End of shell archive exit 0 -- Gary Puckering 3755 Riverside Dr. Cognos Incorporated Ottawa, Ontario (613) 738-1440 CANADA K1G 3N3
garyp@cognos.UUCP (Gary Puckering) (01/20/87)
-- Some time ago, I posted the utility "slice" to the net. Apparently, at least one site did not receive the source. This was my first major posting to the net, so I probably messed up (though it's hard to believe I forgot the source!). Anyway, I'm reposting it for the benefit of those who didn't get a useful version the first time. For those who missed the first posting, a brief description of slice follows: ----------------------------------------------------------------------- Slice splits up a file into lots of little files. It reads its input a line at a time, and starts a new output file when * the input line matches a pattern, or * there have been n lines written to the current output file. You can use it to split a mailbox or an archive of news articles into one article per file, for example. In fact, you can do this with about 5 lines of awk, but you run into problems with long lines (and speed, if it bothers you!). Slice was originally contributed by Russell Quinn as the program "mailsplit". Unlike mailsplit, however, slice allows multiple output formats to be specified (rather than multiple input files). This makes it possible to deposit the pieces (slices!) into files named whatever your want. For example: slice <article -x '^--* [Cc]ut' README article.sh will deposit everything up to the cut line into README and everything after it into article.sh (the -x option causes the matched line to be excluded). There are even options to make slicing mailboxes and files containing shell scripts easier (-m and -s). There are some good examples in the man page. Source, Makefile and manual entry enclosed. To install, do the following: 1: Edit the Makefile: you'll need to alter the "R=/usr/local" if you don't want slice to live in /usr/local/usr/bin. 2: make slice 3: have a play with it & satisfy yourself that it behaves reasonably 4: make install Make "install" will do a "$(MAKE) $(CLEAN)" afterwards. If you don't want to remove the binary, say CLEAN="" make install at step 4. --------------------- cut here ---------------------------------------- #!/bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #!/bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create the files: # slice.1 # Makefile # opts.h # slice.c # This archive created: Tue Jan 20 09:39:14 1987 export PATH; PATH=/bin:$PATH echo shar: extracting "'slice.1'" '(5375 characters)' if test -f 'slice.1' then echo shar: over-writing existing file "'slice.1'" fi sed 's/^X//' << \SHAR_EOF > 'slice.1' X.TH SLICE 1L "1986 December 11" "Cognos Inc." X.SH NAME Xslice \- split a file into pieces, by pattern X.SH SYNOPSIS X.B slice X[ \fB\-f \fIfilename\fP ] X[ \fB\-i\fP\fIn\fP ] X[ \fB\-a\fP ] X[ \fB\-x\fP ] X[ \fB\-m\fP | \fB\-s\fP | \fB\-n\fP\fIn\fP ] X[ \fIformat\fP .\|.\|. ] X.LP X.sp X.B slice X[ \fB\-f \fIfilename\fP ] X[ \fB\-i\fP\fIn\fP ] X[ \fB\-a\fP ] X[ \fB\-x\fP ] X\fB\-e \fIexpression\fP X[ \fIformat\fP .\|.\|. ] X.LP X.sp X.B slice X[ \fB\-f \fIfilename\fP ] X[ \fB\-i\fP\fIn\fP ] X[ \fB\-a\fP ] X[ \fB\-x\fP ] X\fIexpression\fP X[ \fIformat\fP .\|.\|. ] X.SH DESCRIPTION X.I Slice Xsplits large files into smaller ones. The output files are named Xaccording to the \fIformat\fR strings provided. The input file is split Xwhenever a pattern is matched or every \fIn\fR lines, depending on the Xoptions selected. The syntax is similar to \fIgrep\fR, except for the X\fB-f\fR option, which has a different meaning. XBecause some of the options are mutually exclusive, Xthere are three forms of the command. X.LP XThe options allowed for \fIslice\fR are: X.IP "\fB-f\fR \fIfilename\fR" XInput for \fIslice\fR is taken from the named file rather than \fIstdin\fR. X.IP \-\fBi\fP\fIn\fP XThe starting number for numbering output files generated by formats Xcontaining %d (see \fIformat\fR below). The default starting number is Xone. X.IP \fB-a\fR XCauses the file to be split after the line matching the pattern, rather Xthan before (as is normally the case). X.IP \fB-x\fR XCauses the matched line to be excluded from the output files. Handy Xform eliminating cut lines, etc. X.IP \fB-m\fR XUses the pattern '^From ' to split the file. This is convenient for breaking Xup a mailbox file. X.IP \fB-s\fR XUses the pattern '^#!\ */bin/sh' to split the file. This is ideal for Xbreaking up mail or news article containing a Bourne shell script. X.IP \-\fBn\fIn\fR XSplit the file every \fIn\fR lines. In this case, no pattern matching Xis performed. This is the behaviour of \fIsplit (1)\fR, Xexcept that the default output filename format for X\fIslice\fR is different. X.IP "\fB-e\fR \fIexpression\fR" XUses the pattern specified by \fIexpression\fR to split the file. XThe matched line is put in the new output Xfile. The pattern may contain newlines (which match themselves). See X.I ed (1) Xfor details of the regular expressions. X.IP \fIformat\fR XAll three forms of the command allow the specification of zero or more X\fIformat\fR strings as non-flag parameters. XThe format strings are formed after the fashion of X.I printf Xformats and are used to generate output filenames. XYou can provide as many as you like. Every time a split is required, Xthe next format will be selected. If \fIslice\fR runs out of Xformats, a warning will be issued and the last file will contain the Xremainder of the input file. X.IP XA %s in the string will be replaced by the input filename (less pathname). XIf the input file is \fIstdin\fR, the name \fIslice:\fR will be used Xas a default. X.IP XA %d in a format string will be used to generate a unique number. XOnce \fIslice\fR encounters a format containing %d, it will continue Xusing it until the input file is exhausted. X.IP XThe %d is replaced with a number starting at one for the first file, and Xincremented by one for each split. The \-\fBi\fP option can be used Xto start at a different number. XThe default format is X.sp X\ \ \ \ %s:%03d X.sp Xwhich results in files having names like \fIfilename\fR:01, X\fIfilename\fR:02, \fIfilename\fR:03 and so on. XThe default format was chosen because the resulting files are listed Xin numerical order by X.I ls Xor by X.I echo * Xwhich is sometimes useful. X.LP XThe special format string '+' is used to designate that \fIstdout\fR is Xto be used. This is handy for piping one of the pieces to another Xcommand. Since \fIslice\fR appends to its output files, its possible Xfor the same filename to appear more than once as a format. X.LP X.SH EXAMPLES XSplit up a mail folder into files slice:001, slice:002, etc.: X.sp X slice -m <folder X.sp XSplit a news article containing a shell script into article.hdr and Xarticle.sh: X.sp X slice -s -f article '%s.hdr' '%s.sh' X.sp XSplit stdin at every line of dashes into the files 0, 1, 2, etc.: X.sp X cat anyfile | slice -i0 '^--* *$' '%d' X.sp XSplit a news article containing a shell script into its header portion Xand script portion. Pipe the latter to \fIsh\fR to unshar it: X.sp X slice -s <article README + | sh X.sp XPipe the middle portion of a file to sh, keeping the head and tail in a Xfile called README (exclude cut lines): X.sp X slice -f myfile -x '^--* [Cc]ut ' README + README | sh X.sp XKeep the middle portion of a file, discarding the head and tail: X.sp X slice -f myfile '^--* [Cc]ut ' /dev/null middle /dev/null X.sp X.SH BUGS XWatch out for filename expansion by the shell. This could cause X\fIslice\fR to interpret the extra filenames as output formats causing Xslices to be appended to existing files. X.SH DIAGNOSTICS XMostly straight-forward. X``Internal Error'' indicates a bug in \fIslice\fR, and should be reported. XExit staus 1 indicates an error parsing options \- for example, if an unknown Xflag was ued. XExit status 2 indicates a meaningless combination was detected and rejected X(this is rare in practice). XExit status 3 indicates a run-time problem \- for example, if a file couldn't Xbe opened. X.SH "SEE ALSO" X.I ed (1), X.I mail (1), X.I ls (1), X.I split (1), X.I mailsplit (l), X.I printf (3). SHAR_EOF if test 5375 -ne "`wc -c 'slice.1'`" then echo shar: error transmitting "'slice.1'" '(should have been 5375 characters)' fi echo shar: extracting "'Makefile'" '(1229 characters)' if test -f 'Makefile' then echo shar: over-writing existing file "'Makefile'" fi sed 's/^X//' << \SHAR_EOF > 'Makefile' X# Makefile for slice X# X# Originally contributed at mailsplit, written by: X# R E Quin, October 1986 University of Warwick (UK) Computer Science X# warwick!req +44 203 523193 X# X# Modified and recontributed by: X# Gary Puckering 3755 Riverside Dr. X# Cognos Incorporated Ottawa, Ontario X# (613) 738-1440 CANADA K1G 3N3 X# X# This makefile is intended for the sys5 Augmented make. X# XMAKE=make XCLEAN=clean XCC=cc XHACKS= XCFLAGS=-O $(HACKS) X# R is the root of the filesystem -- i.e. where to install things. X# The binaries are installed in $R/$(DESTDIR). XR=/usr/local XDESTDIR=$R/usr/bin XMANDIR=$R/man/manl XPROG=slice X X# PROG is what to make; DESTDIR is where to put it. X# HACKS are for -DBUGFIX style things. X X# R is intended to be the root of the filesystem if it isn't "/" X X# "make install " does a $(MAKE) $(CLEAN) at the end, so you can say X# CLEAN= make -e install X# if you don't want to remove the garbage at the end, for example. X# This is useful primarily for testing the install: entry! X Xall: $(PROG) X Xslice: opts.h slice.o X $(CC) -o $(PROG) slice.o X Xinstall: slice X /bin/mv $(PROG) $(DESTDIR) X /bin/cp $(PROG).1 $(MANDIR)/$(PROG).l X $(MAKE) $(CLEAN) X Xclean: X rm -rf core *.o $(PROG) a.out SHAR_EOF if test 1229 -ne "`wc -c 'Makefile'`" then echo shar: error transmitting "'Makefile'" '(should have been 1229 characters)' fi echo shar: extracting "'opts.h'" '(769 characters)' if test -f 'opts.h' then echo shar: over-writing existing file "'opts.h'" fi sed 's/^X//' << \SHAR_EOF > 'opts.h' X X#define FALSE 0 X#define TRUE 1 Xtypedef int bool; X X#define EXIT_SYNTAX 1 /* syntax error parsing commandline options */ X#define EXIT_SEMANT 2 /* options are correct but meaningless */ X#define EXIT_RUNERR 3 /* error opening a file, for example */ X#define EXIT_INTERN 4 /* internal error -- bug!! */ X X#define nextstr(s,count,array,failure) \ X {if (((count)<2) && !((array)[0][1])) {failure;}\ X else {if ((array)[0][1]) { s = &((array)[0][1]); } \ X else {s = array[1]; --count; array++;}}} X X#define DFLTNAME "slice" /* input filename (for stdin) */ X#define BUFLEN BUFSIZ /* the maximum length of an input line (incl. "\n\0") */ X#define MAXFILENAMELEN BUFSIZ /* longer than the longest possible file name */ X#define DFLTOUTNAME "%s:%03.d" /* o/p file name format */ X SHAR_EOF if test 769 -ne "`wc -c 'opts.h'`" then echo shar: error transmitting "'opts.h'" '(should have been 769 characters)' fi echo shar: extracting "'slice.c'" '(8964 characters)' if test -f 'slice.c' then echo shar: over-writing existing file "'slice.c'" fi sed 's/^X//' << \SHAR_EOF > 'slice.c' X/* slice -- split files at lines that match a pattern */ X#include <stdio.h> X#include <ctype.h> X X#include "opts.h" /* defines nextstr() etc */ X Xchar *progname = "slice"; /* for error messages */ Xchar *pattern = (char *) NULL; /* reg expr used to split file */ Xchar **format; /* ptr for format strings */ Xint n_format; /* number of format strings */ Xchar *defaultfmt[] = {DFLTOUTNAME}; /* default format string */ Xint filenumber = 0; Xint every_n_lines = 0; /* split every n lines */ Xbool exclude = FALSE; /* exclude matched line from o/p files */ Xbool split_after = FALSE; /* split after matched line */ X Xusage(status) X int status; /* exit if status != 0 */ X{ X fprintf(stderr,"Usage: %s [-f filename] [-a] [-x] [-i<n>] [-m|-s|-n<n>] [-e expression | expression] [format...]\n", progname); X if (status) X exit(status); X} X Xmain(argc, argv) X char *argv[]; X{ X /* split files at points that match a given pattern */ X /* initialise things */ X bool donefiles = FALSE; X char *buffer; X char *infile = (char *) NULL; X X int getnum(); /* does more checking than atoi */ X char *rmpath(); /* removes leading pathname from a filename */ X X /* now remove possible leading pathname X * (e.g. /usr/bin/slice is to report it's errors as slice X */ X progname = rmpath(argv[0]); X X X while (--argc) { X if (**++argv == '-') { X switch(*++*argv) { X case 'a': { /* split after pattern */ X split_after = TRUE; X break; X } X case 'e': { /* pattern (expression) */ X ++argv; argc--; X if (argc==0 || !**argv) { X error("Pattern after -e missing or null\n"); X usage(1); X } X pattern = *argv; X break; X } X case 'm': { /* mailbox pattern */ X pattern = "^From "; X break; X } X case 's': { /* shell pattern */ X pattern = "^#! *\/bin\/sh"; X break; X } X case 'n': { /* -n n_lines -- split every n lines */ X nextstr(buffer,argc,argv,usage(2)); X every_n_lines = getnum(buffer); X if (every_n_lines <= 0) { X error("-n: number must be at least 1\n"); X exit(EXIT_SYNTAX); X } X break; X } X case 'f': { X ++argv; argc--; X if (argc==0 || !**argv) { X error("Filename after -f missing or null\n"); X usage(1); X } X infile = *argv; X break; X } X case 'i': { /* -i initial_number */ X nextstr(buffer,argc,argv,usage(2)); X filenumber = getnum(buffer); X if (filenumber < 0) { X error("-i must be followed by a positive number\n"); X exit(EXIT_SYNTAX); X } X filenumber--; /* needs to be one less to start with */ X break; X } X case 'x': { /* exclude matched lines */ X exclude = TRUE; X break; X } X default: { X error("Unknown flag -%c\n", **argv); X usage(1); X } X } /* end switch */ X } else { X if (!pattern) pattern = *argv; /* first non-flag is pattern */ X else break; /* break while loop */ X } /* end if */ X } /* end while */ X X if (!argc) { X format = defaultfmt; X n_format = 1; } X else { X format = argv; X n_format = argc; X } X X#ifdef DEBUG X printf("argc=%d\n",argc); X printf("format='%s'\n",*format); X printf("pattern='%s'\n",pattern); X#endif X X if (!infile) split(stdin, DFLTNAME, pattern); X else fsplit(infile, pattern); X X exit(0); X} X Xfsplit(name, pat) X char *name; X char *pat; X{ X FILE *fd; X X if (!name || !*name) { X error("Can't split a file with an empty name\n"); X usage(2); X } X X if ( (fd = fopen(name, "r")) == NULL) { X error("Can't open %s\n", name); X return; X } X X (void) split(fd, name, pat); X X if (fclose(fd) == EOF) { /* something's gone wrong */ X error("Can't close %s -- giving up\n", name); X exit(EXIT_RUNERR); X } X} X Xchar buffer[BUFLEN]; X Xint Xsplit(input, name, pattern) X FILE *input; /* fd of input file */ X char *name; /* input filename */ X char *pattern; /* pattern used to split file */ X{ X /* do the real work here. Oh dear, I don't know how... */ X /* we are always called with an open file. */ X X extern char *re_comp(); /* compile string into automaton */ X extern int re_exec(); /* try to match string */ X#define REMATCH 1 X#define RENOMATCH 0 X#define REFAULT -1 X X char *errmessage; X FILE *output = NULL; X char fnambuf[MAXFILENAMELEN + 2]; /* +1 for null, +1 for overflow */ X int reg_status = 0; /* regular expression status */ X int line = 0; X X if (split_after && exclude) { X error("Can't specify both -a and -x\n"); X usage(2); X } X X if (every_n_lines && exclude) { X error("Can't specify both -n and -x\n"); X usage(2); X } X X if (every_n_lines && split_after) { X error("Can't specify both -n and -a\n"); X usage(2); X } X X if (every_n_lines && pattern) { X error("Can't specify both -n and pattern\n"); X usage(2); X } X X if (!every_n_lines && (!pattern || !*pattern)) { X error("Can't match an empty pattern\n"); X usage(2); X } X X if (!every_n_lines && (errmessage = re_comp(pattern)) != NULL) { X error("Error in pattern <%s>: %s\n", pattern, errmessage); X exit(EXIT_RUNERR); X } X /* errmessage is NULL here */ X X /* the -2 to fgets is because of the null and \n appended */ X while (fgets(buffer, BUFLEN - 2, input) != NULL) { X if (!output || /* first line */ X (every_n_lines > 0 && (++line == every_n_lines)) || /* nth line */ X (!every_n_lines && X ((reg_status = re_exec(buffer)) == REMATCH)) ) { /* matches pat */ X /* don't look at 1st line of file, to avoid an infinite */ X /* recursion... */ X X if (output && split_after) { X fputs(buffer, output); X } X X if (n_format && mkname(fnambuf, name)) {; X /* check for output file = input file */ X if (strcmp(fnambuf,name)==0) { X error("Output file same as input file\n"); X exit(EXIT_RUNERR); X } X /* start a new file */ X if (output && output != stdout) { X if (fclose(output) == EOF) { X error("Can't close output file\n"); X exit(EXIT_RUNERR); X } X output = NULL; X } X line = 0; X if (fnambuf[0]=='+' && fnambuf[1]==NULL) { X output = stdout; X } else { X if ((output = fopen(fnambuf, "a")) == NULL) { X error("Can't open output file %s\n", fnambuf); X exit(EXIT_RUNERR); X } X } X /* if matched lines are excluded, skip the fputs */ X if (exclude && reg_status == REMATCH) continue; X X /* if file is to be split after pattern, put already done */ X if (split_after && reg_status == REMATCH) continue; X } else { X error("Insufficient formats -- last file contains remainder\n"); X } X } else if (reg_status == REFAULT) { X /* the re_exec failed */ X error("Internal error trying to match <%s> to <%s>\n", X pattern, buffer); X exit(EXIT_INTERN); X } X fputs(buffer, output); X } X return (filenumber == -1); /* exit status for main */ X} X Xbool Xmkname(fnambuf, name) X char *fnambuf; X char *name; X{ X int i, s = -1, d = -1; X static bool new_format = TRUE; X static bool perpetual = FALSE; X static bool d_before_s = FALSE; X X if (new_format) { X if (!n_format) { X error("Internal error: mkname called but formats have run out\n"); X exit(EXIT_INTERN); X } X i = bfsearch(*format, "%",0); X s = bfsearch(*format, "%s",0); X if (i>=0 && i==s) d = bfsearch(*format, "%",++i); X else d = i; X if (d<0) perpetual = FALSE; X else perpetual = TRUE; X if (d<s || s<0) d_before_s = TRUE; X else d_before_s = FALSE; X new_format = FALSE; X } X X if (perpetual) ++filenumber; X X if (d_before_s) X sprintf(fnambuf, *format, filenumber, rmpath(name)); X else X sprintf(fnambuf, *format, rmpath(name), filenumber); X X if (!perpetual) { X new_format = TRUE; X --n_format; X if (n_format) { X ++format; X filenumber=0; X } X } X} X Xerror(fmt, a1, a2, a3, a4) X char *fmt; X{ X fputs(progname, stderr); X fputs(": ", stderr); X fprintf(stderr, fmt, a1, a2, a3, a4); X} X X/* getnum(s) returns the value of the unsigned int in s. If there's any X * trailing garbage, or the number isn't +ve, we return -1 X */ Xint Xgetnum(s) X char *s; X{ X register char *p; X X for (p = s; *p; p++) { X if (!isdigit(*p)) { X return -1; X } X } X return atoi(s); X} X X X/* Remove the leading pathname from a filename */ X Xchar * Xrmpath(fullname) X char *fullname; X{ X register char *p; X char *q = (char *) NULL; X X for (p = fullname; p && *p; p++) { X if (*p == '/') X q = ++p; X } X if (q && *q) { X return(q); X } X return(fullname); X} X X X/* Find substring within string */ X/* Brute force algorithm */ X Xint Xbfsearch(string,key,start) X X char string[], X key[]; X int start; X{ X int i=start,j=0; X X if (string[0]==NULL || key[0]==NULL) return(-1); X X do { X if (string[i] == key[j]) X {i++; j++;} X else X {i=i-j+1; j=0;}; X } X while (string[i]!=NULL && key[j]!=NULL); X X if (key[j]==NULL) return(i-j); X else return(-1); X} X SHAR_EOF if test 8964 -ne "`wc -c 'slice.c'`" then echo shar: error transmitting "'slice.c'" '(should have been 8964 characters)' fi # End of shell archive exit 0 -- Gary Puckering 3755 Riverside Dr. Cognos Incorporated Ottawa, Ontario (613) 738-1440 CANADA K1G 3N3
tes@whuts.UUCP (01/26/87)
<*> Slice does not compile/link, it appears to be looking for re_* functions. How do I obtain same, these presumably are on the developer's system, and he assumed that they were universal. (Surprise!!) -- ----- Terry Sterkel -====---- AT&T Bell Laboratories --------- {harvard|allegra|ulysses|ihnp4}!whuts!tes ----- [opinions are obviously only my own]