[comp.os.minix] ascii

nfs@notecnirp.Princeton.EDU (Norbert Schlenker) (12/12/89)

Ascii(1) fails for files with no terminating \n on the last line.  The
problem manifests itself as the loss of the final line for short files,
and as a repetition of part of the file for longer ones.  It also fails
silently for files with lines exceeding 30000 characters (admittedly
not an oft encountered situation).

After trying in vain to patch the 1.3 version to remedy these problems,
I rewrote the program using <stdio.h>.  It's shorter and much easier to
understand.  And just for fun, I re-rewrote the new one to use input
buffering similar to that used in the original, but keeping <stdio.h>
on the output side (because it should be faster that way).

Running a 400K source archive through each version, I got the following
timings:
				real / user /system
  Original 1.3 version		66.0 /  3.1 / 32.8
  Using 1.3 <stdio.h>		23.0 / 16.5 /  2.0
  Using my <stdio.h>		12.0 /  3.3 /  1.8
  Output via <stdio.h>		13.0 /  2.3 /  1.0

Enclosed are the two new versions.

------------------------------ Cut here -------------------------------
echo x - stdio_ascii.c
sed '/^X/s///' > stdio_ascii.c << '/'
X/* ascii - list lines with/without ASCII characters */
X
X#include <stdio.h>
X#include <string.h>
X
Xstatic char line[BUFSIZ];	/* line buffer */
X
Xint main(argc, argv)
Xint argc;
Xchar **argv;
X{
X  int nflag = 0;		/* 1 if invoked with -n */
X  int ascii_line;		/* set to 1 if line is all ASCII */
X  int ascii_file = 1;		/* set to 0 if file is not all ASCII */
X  register int c;		/* input character */
X  register char *lp;		/* line pointer */
X
X  --argc; ++argv;  
X  if (argc > 0 && strcmp(*argv, "-n") == 0) {
X	nflag = 1;
X	--argc; ++argv;
X  }
X  switch (argc) {
X  case 0:
X	break;
X  case 1:
X	if (freopen(*argv, "r", stdin) == NULL) {
X		fputs("ascii: cannot open ", stderr);
X		fputs(*argv, stderr);
X		putc('\n', stderr);
X		exit(1);
X	}
X	break;
X  default:
X	fputs("Usage: ascii [-n] file\n", stderr);
X	exit(1);
X  }
X
X  while (!feof(stdin)) {
X	lp = line;
X	ascii_line = 1;
X	while ((c = getchar()) != EOF && lp != &line[BUFSIZ]) {
X		*lp++ = c;
X		if (c == '\n') break;
X		if ((c & 0x80) != 0)
X			ascii_line = 0;
X	}
X	if (lp == &line[BUFSIZ]) {
X		fputs("ascii: line too long\n", stderr);
X		exit(1);
X	}
X	if (ascii_line != nflag)
X		fwrite(line, lp - line, 1, stdout);
X	if (!ascii_line)
X		ascii_file = 0;
X  }
X
X  exit(ascii_file == 0);
X}
/
echo x - fast_ascii.c
sed '/^X/s///' > fast_ascii.c << '/'
X/* ascii - list lines with/without ASCII characters */
X
X#define _POSIX_SOURCE
X
X#include <sys/types.h>
X#include <fcntl.h>
X#include <unistd.h>
X#include <stdio.h>
X#include <string.h>
X
X#define BUFSIZE 4*1024
X
Xstatic char buf[BUFSIZE+1];	/* input buffer - +1 for sentinel */
Xstatic char carry[BUFSIZE];	/* buffer for partial line carryover */
X
Xint main(argc, argv)
Xint argc;
Xchar **argv;
X{
X  int nflag = 0;		/* 1 if invoked with -n */
X  int ascii_line;		/* set to 1 if line is all ASCII */
X  int ascii_file = 1;		/* set to 0 if file is not all ASCII */
X  int count;			/* count of characters in buf */
X  char *start;			/* points to beginning of line */
X  register char *end;		/* points to end of line */
X  char *sentinel;		/* points past last character in buffer */
X  int carry_count;		/* size of carry over */
X
X  --argc; ++argv;  
X  if (argc > 0 && strcmp(*argv, "-n") == 0) {
X	nflag = 1;
X	--argc; ++argv;
X  }
X  switch (argc) {
X  case 0:
X	break;
X  case 1:
X	close(0);
X	if (open(*argv, O_RDONLY) != 0) {
X		std_err("ascii: cannot open ");
X		std_err(*argv);
X		std_err("\n");
X		exit(1);
X	}
X	break;
X  default:
X	std_err("Usage: ascii [-n] file\n");
X	exit(1);
X  }
X
X  if ((count = read(0, buf, BUFSIZE)) <= 0)
X	exit(0);
X  *(sentinel = &buf[count]) = '\n';
X  start = buf;
X  ascii_line = 1;
X  carry_count = 0;
X  while (1) {
X	for (end = start; *end != '\n'; ++end)
X		if ((*end & 0x80) != 0) {
X			ascii_line = 0;
X			ascii_file = 0;
X			end = (char *) memchr(end, '\n', BUFSIZE);
X			break;
X		}
X	if (end != sentinel) {
X		++end;
X		if (ascii_line != nflag) {
X			if (carry_count != 0)
X				fwrite(carry, carry_count, 1, stdout);
X			fwrite(start, end - start, 1, stdout);
X		}
X		carry_count = 0;
X		start = end;
X		ascii_line = 1;
X	} else {
X		if (carry_count != 0) {
X			std_err("ascii: line too long\n");
X			exit(1);
X		}
X		if (ascii_line != nflag) {
X			carry_count = end - start;
X			memcpy(carry, start, carry_count);
X		}
X		if ((count = read(0, buf, BUFSIZE)) <= 0)
X			break;
X		*(sentinel = &buf[count]) = '\n';
X		start = buf;
X	}
X  }
X  if (ascii_line != nflag && carry_count != 0)
X	fwrite(carry, carry_count, 1, stdout);
X
X  exit(ascii_file == 0);
X}
/