stevesu@copper.UUCP (04/11/87)
Here's something I came across in my bin directory. According to
the modification time on the file, I must have written it back in
1984. It's just like the "standard" wc (4.[23]bsd, anyway) with
the following two improvements:
1. It doesn't count what you don't ask for. Therefore,
wc -l is faster and wc -c is _m_u_c_h faster than when it has
to count words (which is harder). It also seems to be
considerably faster than /usr/ucb/wc.
2. It has the -v (verbose) and -p (count pages) options that
some old version of wc (4.1? 2.9?) that I got used to had.
3. (Three! Three improvements! _N_obody expects...) It prints
the fields in the order you ask for (i.e. wc -cwl gives
you the reverse of the usual order). (This isn't
terribly important, and I've never made use of it, but
for some reason I wrote it that way.)
There is also a -s flag that lets you set the page size used in
calculating page counts with -p.
Here is a timing comparison (on a 780 running Ultrix):
$ cd /usr/dict
$ time /usr/ucb/wc words web*
24259 24259 198596 words
234936 234936 2486813 web2
76205 121847 1012730 web2a
335400 381042 3698139 total
3:29.2 real 56.9 user 10.8 sys
$ time wc.new words web* > /dev/null
1:44.1 real 26.7 user 11.7 sys
$ time wc.new -w words web* > /dev/null
1:50.0 real 26.1 user 11.9 sys
$ time wc.new -l words web* > /dev/null
1:08.5 real 14.6 user 11.4 sys
$ time wc.new -c words web* > /dev/null
25.8 real 0.2 user 9.7 sys
Of course, if all you really care about is the character count,
an ls -l is faster still (although it will give you a different
answer if the file contains bad blocks, but I digress).
The word-counting algorithm probably isn't the one I would have
chosen, but it matches the one that /usr/ucb/wc uses.
If you're picky about plug compatibility, you should note that
the error handling is a bit different than the standard version.
(There is, regrettably, no "usage:" message.)
Following my signature are the source and man page.
Steve Summit
stevesu@copper.tek.com
cat > wc.c <<\%EOF%
/*
* wc [ -lwcp ] [ -spagesize ] [ -v ] [ files... ]
*
* Count lines, words, characters, and pages.
*
* Runs faster by doing less work if it doesn't have to count
* all quantities.
*
* Use this program as you wish, but please leave this header intact.
*
* Steve Summit 12/4/84
*/
#include <stdio.h>
#define TRUE 1
#define FALSE 0
long int totchars = 0;
long int totwords = 0;
long int totlines = 0;
long int totpages = 0;
#define LINES 04
#define WORDS 02
#define CHARS 01
int count = LINES | WORDS | CHARS;
char want[10] = "lwc";
int verbose = FALSE;
int pagelen = 66;
int errs = 0;
#define Isdigit(c) ((c) >= '0' && (c) <= '9')
#define Ctod(c) ((c) - '0')
#define Append(mask, letter) if(deflt) \
{ \
count = mask; \
(void)strcpy(want, letter); \
deflt = FALSE; \
} \
else { \
count |= mask; \
(void)strcat(want, letter); \
}
#define Append2(letter) if(deflt) \
{ \
(void)strcpy(want, letter); \
deflt = FALSE; \
} \
else (void)strcat(want, letter)
char *progname = "wc";
extern char *rindex();
extern char *strcat();
extern char *strcpy();
main(argc, argv)
int argc;
char *argv[];
{
int fd;
int deflt = TRUE;
int argi;
char *p;
int totals;
if(argc > 0)
{
p = rindex(argv[0], '/');
if(p != NULL)
progname = p + 1;
else progname = argv[0];
}
for(argi = 1; argi < argc && argv[argi][0] == '-'; argi++)
{
for(p = &argv[argi][1]; *p != '\0'; p++)
{
switch(*p)
{
case 'l':
Append(LINES, "l");
break;
case 'w':
Append(WORDS, "w");
break;
case 'c':
Append(CHARS, "c");
break;
case 'p':
Append2("p");
break;
case 'v':
verbose = TRUE;
if(deflt)
(void)strcpy(want, "lwcp");
break;
case 's':
pagelen = 0;
while(Isdigit(*(p + 1)))
pagelen = 10 * pagelen + Ctod(*++p);
break;
default:
fprintf(stderr, "%s: unknown option -%c\n",
progname, *p);
}
}
}
if(verbose)
{
for(p = want; *p != '\0'; p++)
{
switch(*p)
{
case 'l':
printf(" lines");
break;
case 'w':
printf(" words");
break;
case 'c':
printf(" chars");
break;
case 'p':
printf(" pages");
break;
}
}
putchar('\n');
}
if(argi >= argc)
wc("", 0);
else {
totals = (argi + 1) < argc;
for(; argi < argc; argi++)
{
if((fd = open(argv[argi], 0)) < 0)
{
fprintf(stderr, "%s: can't open %s\n", progname,
argv[argi]);
perror("");
errs++;
continue;
}
wc(argv[argi], fd);
(void)close(fd);
}
if(totals)
{
printit(totlines, totwords, totchars, totpages);
printf(" total\n");
}
}
exit(errs);
}
#define Set(flag) flag++
#define Clear(flag) flag = FALSE
#define Checkline() if(*p == '\n') \
lines++
#define Checkword() if(' ' < *p && *p < '\177') \
{ \
if(!inword) \
{ \
words++; \
Set(inword); \
} \
continue; \
}
#define Checkword2() else if(*p != ' ' && *p != '\t') \
continue; \
Clear(inword)
#define Checkword3() if(*p == ' ' || *p == '\n' || *p == '\t') \
Clear(inword)
#define Dochars() chars += r
wc(name, fd)
char *name;
int fd;
{
char buf[BUFSIZ];
register char *bufend;
int r;
long int lines, words, chars, pages;
register char *p;
register int inword;
lines = words = chars = pages = 0;
Clear(inword);
switch(count)
{
case LINES:
while((r = read(fd, buf, BUFSIZ)) > 0)
{
bufend = buf + r;
for(p = buf; p < bufend; p++)
Checkline();
}
break;
case WORDS:
while((r = read(fd, buf, BUFSIZ)) > 0)
{
bufend = buf + r;
for(p = buf; p < bufend; p++)
{
Checkword();
Checkword3();
}
}
break;
case CHARS:
while((r = read(fd, buf, BUFSIZ)) > 0)
Dochars();
break;
case LINES|CHARS:
while((r = read(fd, buf, BUFSIZ)) > 0)
{
Dochars();
bufend = buf + r;
for(p = buf; p < bufend; p++)
Checkline();
}
break;
case LINES|WORDS:
while((r = read(fd, buf, BUFSIZ)) > 0)
{
bufend = buf + r;
for(p = buf; p < bufend; p++)
{
Checkword();
Checkline();
Checkword2();
}
}
break;
case WORDS|CHARS:
while((r = read(fd, buf, BUFSIZ)) > 0)
{
Dochars();
bufend = buf + r;
for(p = buf; p < bufend; p++)
{
Checkword();
Checkword3();
}
}
break;
case LINES|WORDS|CHARS:
while((r = read(fd, buf, BUFSIZ)) > 0)
{
Dochars();
bufend = buf + r;
for(p = buf; p < bufend; p++)
{
Checkword();
Checkline();
Checkword2();
}
}
break;
}
if(r < 0)
{
fprintf(stderr, "%s: %s: read error\n", progname,
*name != '\0' ? name : "standard input");
perror("");
errs++;
}
pages = lines / pagelen + (lines % pagelen != 0 ? 1 : 0);
printit(lines, words, chars, pages);
if(*name != '\0')
printf(" %s", name);
putchar('\n');
totlines += lines;
totwords += words;
totchars += chars;
totpages += pages;
}
printit(lines, words, chars, pages)
long int lines, words, chars, pages;
{
char *p;
for(p = want; *p != '\0'; p++)
{
switch(*p)
{
case 'l':
printf(" %7ld", lines);
break;
case 'w':
printf(" %7ld", words);
break;
case 'c':
printf(" %7ld", chars);
break;
case 'p':
printf(" %7ld", pages);
break;
}
}
}
%EOF%
cat > wc.1 <<\%EOF%
.TH WC 1
.SH NAME
wc \- word count
.SH SYNOPSIS
.B wc
[
.B \-lwcp
]
[
.B \-s\c
.I pagesize
]
[
.B \-v
]
[ name ... ]
.SH DESCRIPTION
.PP
.I Wc
counts lines, words, characters and (optionally) pages
in the named files, or in the standard input if no name appears.
A word is a maximal string of characters delimited by spaces, tabs or newlines.
.PP
If an argument beginning with one of ``lwcp'' is present,
the specified counts (lines, words, characters, or pages)
are selected by the letters
.BR l ,
.BR w ,
.BR c ,
or
.BR p .
The default is
.B \-lwc
unless
.B \-v
is specified.
.PP
The
.B \-s
option specifies that pages are
.I pagesize
lines long instead of the defaut 66.
.PP
The
.B \-v
option asks for a verbose output format, with column headers and
including pages by default.
.SH BUGS
%EOF%