nfs@notecnirp.Princeton.EDU (Norbert Schlenker) (12/12/89)
Ascii(1) fails for files with no terminating \n on the last line. The problem manifests itself as the loss of the final line for short files, and as a repetition of part of the file for longer ones. It also fails silently for files with lines exceeding 30000 characters (admittedly not an oft encountered situation). After trying in vain to patch the 1.3 version to remedy these problems, I rewrote the program using <stdio.h>. It's shorter and much easier to understand. And just for fun, I re-rewrote the new one to use input buffering similar to that used in the original, but keeping <stdio.h> on the output side (because it should be faster that way). Running a 400K source archive through each version, I got the following timings: real / user /system Original 1.3 version 66.0 / 3.1 / 32.8 Using 1.3 <stdio.h> 23.0 / 16.5 / 2.0 Using my <stdio.h> 12.0 / 3.3 / 1.8 Output via <stdio.h> 13.0 / 2.3 / 1.0 Enclosed are the two new versions. ------------------------------ Cut here ------------------------------- echo x - stdio_ascii.c sed '/^X/s///' > stdio_ascii.c << '/' X/* ascii - list lines with/without ASCII characters */ X X#include <stdio.h> X#include <string.h> X Xstatic char line[BUFSIZ]; /* line buffer */ X Xint main(argc, argv) Xint argc; Xchar **argv; X{ X int nflag = 0; /* 1 if invoked with -n */ X int ascii_line; /* set to 1 if line is all ASCII */ X int ascii_file = 1; /* set to 0 if file is not all ASCII */ X register int c; /* input character */ X register char *lp; /* line pointer */ X X --argc; ++argv; X if (argc > 0 && strcmp(*argv, "-n") == 0) { X nflag = 1; X --argc; ++argv; X } X switch (argc) { X case 0: X break; X case 1: X if (freopen(*argv, "r", stdin) == NULL) { X fputs("ascii: cannot open ", stderr); X fputs(*argv, stderr); X putc('\n', stderr); X exit(1); X } X break; X default: X fputs("Usage: ascii [-n] file\n", stderr); X exit(1); X } X X while (!feof(stdin)) { X lp = line; X ascii_line = 1; X while ((c = getchar()) != EOF && lp != &line[BUFSIZ]) { X *lp++ = c; X if (c == '\n') break; X if ((c & 0x80) != 0) X ascii_line = 0; X } X if (lp == &line[BUFSIZ]) { X fputs("ascii: line too long\n", stderr); X exit(1); X } X if (ascii_line != nflag) X fwrite(line, lp - line, 1, stdout); X if (!ascii_line) X ascii_file = 0; X } X X exit(ascii_file == 0); X} / echo x - fast_ascii.c sed '/^X/s///' > fast_ascii.c << '/' X/* ascii - list lines with/without ASCII characters */ X X#define _POSIX_SOURCE X X#include <sys/types.h> X#include <fcntl.h> X#include <unistd.h> X#include <stdio.h> X#include <string.h> X X#define BUFSIZE 4*1024 X Xstatic char buf[BUFSIZE+1]; /* input buffer - +1 for sentinel */ Xstatic char carry[BUFSIZE]; /* buffer for partial line carryover */ X Xint main(argc, argv) Xint argc; Xchar **argv; X{ X int nflag = 0; /* 1 if invoked with -n */ X int ascii_line; /* set to 1 if line is all ASCII */ X int ascii_file = 1; /* set to 0 if file is not all ASCII */ X int count; /* count of characters in buf */ X char *start; /* points to beginning of line */ X register char *end; /* points to end of line */ X char *sentinel; /* points past last character in buffer */ X int carry_count; /* size of carry over */ X X --argc; ++argv; X if (argc > 0 && strcmp(*argv, "-n") == 0) { X nflag = 1; X --argc; ++argv; X } X switch (argc) { X case 0: X break; X case 1: X close(0); X if (open(*argv, O_RDONLY) != 0) { X std_err("ascii: cannot open "); X std_err(*argv); X std_err("\n"); X exit(1); X } X break; X default: X std_err("Usage: ascii [-n] file\n"); X exit(1); X } X X if ((count = read(0, buf, BUFSIZE)) <= 0) X exit(0); X *(sentinel = &buf[count]) = '\n'; X start = buf; X ascii_line = 1; X carry_count = 0; X while (1) { X for (end = start; *end != '\n'; ++end) X if ((*end & 0x80) != 0) { X ascii_line = 0; X ascii_file = 0; X end = (char *) memchr(end, '\n', BUFSIZE); X break; X } X if (end != sentinel) { X ++end; X if (ascii_line != nflag) { X if (carry_count != 0) X fwrite(carry, carry_count, 1, stdout); X fwrite(start, end - start, 1, stdout); X } X carry_count = 0; X start = end; X ascii_line = 1; X } else { X if (carry_count != 0) { X std_err("ascii: line too long\n"); X exit(1); X } X if (ascii_line != nflag) { X carry_count = end - start; X memcpy(carry, start, carry_count); X } X if ((count = read(0, buf, BUFSIZE)) <= 0) X break; X *(sentinel = &buf[count]) = '\n'; X start = buf; X } X } X if (ascii_line != nflag && carry_count != 0) X fwrite(carry, carry_count, 1, stdout); X X exit(ascii_file == 0); X} /