sources-request@panda.UUCP (05/28/86)
Mod.sources: Volume 5, Issue 16 Submitted by: talcott!topaz!lll-crg!csustan!casey [ I haven't tried this, but I bet you odd-address-machine people will be happy to see this fix - moderator ] Hi, I've just recently porteed bm to a PDP-11 runnung under the August seismo distribution of BSD2.9. It turns out this wasn't an easy process at all and I was more that 3 hours into it before I found out why bm ran correctly, but almost 4 times slower than fgrep! The problem was bm doing I/O on odd addresses and/or going for odd length transfers. The problem rests partially with the PDP-11 architecture and partially with the implementation of copyout in the kernel. I'll be fixing copyout (and copyin of course), but the PDP's architectual problems remain. And as the PDP isn't the only machine that has biases about odd addresses, I thought I'd forward on the necessary changes to you. Hope these are useful! Leith (Casey) Leedom lll-crg.arpa!csustan!casey Computer Science Department work: (209) 667-3185 California State University, Stanislaus home: (209) 634-2775 Turlock, CA 95380 P.S. I'm only forwarding bm.c, Execute.c, and MoveResidue.c as they're the only one's that needed changing. ---- cut here ---- cut here ---- cut here ---- cut here ---- cut here ---- #! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create: # Execute.c # MoveResidue.c # bm.c # This archive created: Fri May 23 05:39:21 1986 export PATH; PATH=/bin:/usr/bin:$PATH if test -f 'Execute.c' then echo shar: "will not over-write existing file 'Execute.c'" else cat << \SHAR_EOF > 'Execute.c' #include <stdio.h> #include "bm.h" #include "Extern.h" Execute(DescVec, NPats, TextFile, Buffer) struct PattDesc *DescVec[]; /* pointers to status vectors for the different * patterns, including skip tables, position in buffer, etc. */ int NPats; /* number of patterns */ char Buffer[]; /* holds text from file */ int TextFile; /* file to search */ { long BuffPos; /* offset of first char in Buffer in TextFile */ int NRead, /* number of chars read from file */ NWanted, /* number of chars wanted */ NAvail, /* number of chars actually read */ BuffSize, /* number of chars in buffer */ BuffEx, /* flag to indicate that buffer has been searched */ ResSize, /* number of characters in the last, incomplete line */ Found, /* flag indicates whether pattern found * completely and all matches printed */ Valid; /* was the match "valid", i.e. if -x used, * did the whole line match? */ char *BuffEnd; /* pointer to last char of last complete line */ /* * In order to optimize I/O for some machines which impose a severe * penalty for I/O on an odd address, we play a nasty game. First, we * assume that the Buffer which is passed to us has an even address. * Then whenever we move a buffer residual back to the beginning of * the Buffer for the next read cycle, we actually move it to Buffer + * Odd(Residual) where Odd() is 1 if Residual is odd, zero otherwise. * This causes the the next read to go down on an even address. We * keep track of the beginning of data in the Buffer with BuffStart. */ char *BuffStart; /* misc working variables */ #ifdef notdef int i; #else !notdef register struct PattDesc *Desc; struct PattDesc **Vec, **LastPatt = DescVec+NPats; #endif notdef /* initialize */ ResSize = 0; Found = 0; BuffPos = 0; BuffStart = Buffer; #ifdef notdef for (i=0; i < NPats; i++) { DescVec[i] -> Success = 0; DescVec[i] -> Start = Buffer; } /* for */ #else !notdef for (Vec=DescVec; Vec < LastPatt; Vec++) { Desc = *Vec; Desc->Success = 0; Desc->Start = Buffer; } #endif notdef /* now do the searching */ do { /* first, read a bufferfull and set up the variables */ /* * Some systems *even* get upset when you ask for an odd read * length - ARGH! */ NWanted = (int)((unsigned)(MAXBUFF - ResSize) & ~1); NRead = 0; do { /* * BuffStart+ResSize+BRead is even first time through * this loop - afterwards, no guaranties, but for * files this loop never goes more than once ... * Can't do any better. */ NAvail = read(TextFile,BuffStart + ResSize + NRead, NWanted); if (NAvail == -1) { fprintf(stderr, "bm: error reading from input file\n"); exit(2); } /* if */ NRead += NAvail; NWanted -= NAvail; } while (NAvail && NWanted); BuffEx = 0; BuffSize = ResSize + NRead; BuffEnd = BuffStart + BuffSize - 1; /* locate the end of the last complete line */ while (*BuffEnd != '\n' && BuffEnd >= BuffStart) --BuffEnd; if (BuffEnd < BuffStart) BuffEnd = BuffStart + BuffSize - 1; while (!BuffEx) { /* work through one buffer full */ BuffEx = 1; /* set it provisionally, then clear * it if we find the buffer non-empty */ #ifdef notdef for (i=0; i< NPats; i++) { if (!DescVec[i]->Success) /* if the pattern has not been found */ DescVec[i]-> Success = Search(DescVec[i]->Pattern, DescVec[i]->PatLen, BuffStart, BuffEnd, DescVec[i]->Skip1, DescVec[i]->Skip2, DescVec[i]); if (DescVec[i]->Success){ /* if a match occurred */ BuffEx = 0; Valid = MatchFound(DescVec[i],BuffPos, BuffStart, BuffEnd); Found |= Valid; if ((sFlag || lFlag) && Found) return(0); } /* if */ } /* for */ #else !notdef for (Vec=DescVec; Vec < LastPatt; Vec++) { Desc = *Vec; if (!Desc->Success) /* if the pattern has not been found */ Desc-> Success = Search(Desc->Pattern, Desc->PatLen, BuffStart, BuffEnd, Desc->Skip1, Desc->Skip2, Desc); if (Desc->Success){ /* if a match occurred */ BuffEx = 0; Valid = MatchFound(Desc,BuffPos, BuffStart, BuffEnd); Found |= Valid; if ((sFlag || lFlag) && Found) return(0); } /* if */ } /* for */ #endif notdef } /* while */ if(NRead) { ResSize = MoveResidue(DescVec,NPats,Buffer,&BuffStart,BuffEnd); BuffPos += BuffSize - ResSize; } /* if */ } while (NRead); return(!Found); } /* Execute */ SHAR_EOF fi if test -f 'MoveResidue.c' then echo shar: "will not over-write existing file 'MoveResidue.c'" else cat << \SHAR_EOF > 'MoveResidue.c' #include "bm.h" /* Moves the text which has not been completely searched at the end of * the buffer to the beginning of the buffer * and returns number of bytes moved */ /* * In coordination with the I/O optimization code in Execute, if the Residual * is odd in length, we move it to Buffer+1, otherwise to Buffer+0, to make * sure that [at least] the next read goes down on an even address. A pointer * to a pointer to the current start of data in the buffer is passed in, that * pointer is updated and passed back. */ int MoveResidue(DescVec, NPats, Buffer, BuffStartP, BuffEnd) struct PattDesc *DescVec[]; int NPats; char *Buffer, **BuffStartP, *BuffEnd; { char *FirstStart, *f, *t, *Residue; int ResSize, i; FirstStart = BuffEnd; /* find the earliest point which has not been * completely searched */ for (i=0; i < NPats; i++) { FirstStart = min(FirstStart, DescVec[i]-> Start ); } /* for */ /* now scan to the beginning of the line containing the * unsearched text */ for (Residue = FirstStart; *Residue != '\n' && Residue >= *BuffStartP; Residue--); if (Residue < *BuffStartP) Residue = FirstStart; else ++Residue; ResSize = (BuffEnd - Residue + 1); /* now move the residue to the beginning of * the file */ t = *BuffStartP = ((unsigned) ResSize & 1) ? Buffer+1 : Buffer; f = Residue; for(i=ResSize;i;--i) *t++ = *f++; /* now fix the status vectors */ for (i=0; i < NPats; i++) { DescVec[i]->Start -= (Residue - *BuffStartP); } /* for */ return(ResSize); } /* MoveResidue */ SHAR_EOF fi if test -f 'bm.c' then echo shar: "will not over-write existing file 'bm.c'" else cat << \SHAR_EOF > 'bm.c' #include <stdio.h> #include <sys/types.h> /* XXXX ADDED FOR BSD2.9 */ #include <sys/file.h> #include <strings.h> #include "bm.h" #include "Extern.h" main(argc,argv) int argc; char *argv[]; { /* test driver for grep based on Boyer-Moore algorithm */ char BigBuff[MAXBUFF + 3]; /* * We leave one extra character at the beginning and end of the buffer, * but don't tell Execute about it. This is so when someone is * scanning the buffer and scans past the end (or beginning) * we are still technically in the buffer (picky, but there ARE * machines which would complain). We then leave an *additional* * free byte at the begining so we can pass an even address to Execute * (on some machines, odd address I/O can *COMPLETELY* destroy any * speed benefits of bm). */ int ret = 1, /* return code from Execute */ NotFound = 0, /* non-zero if file not readable */ NFiles, NPats; /* number of patterns to search for */ char i, *FlagPtr, **OptPtr; /* used to scan command line */ int TextFile /* file to search */; struct PattDesc *DescVec[MAXPATS]; --argc; OptPtr = argv + 1; while ( argc && **OptPtr == '-') { FlagPtr = *OptPtr + 1; while (*FlagPtr) { switch (*FlagPtr) { case 'c': cFlag = 1; break; case 'e': eFlag = 1; /* get the patterns from next arg */ NPats = MkDescVec(DescVec,*++OptPtr); --argc; break; case 'f': fFlag = 1; /* read the patterns from a file */ NPats = GetPatFile(*++OptPtr,DescVec); --argc; break; case 'l': lFlag = 1; break; case 'n': nFlag = 1; break; case 's': sFlag = 1; break; case 'x': xFlag = 1; break; case 'h': hFlag = 1; break; default: fprintf(stderr, "bm: invalid option: -%c \n", *FlagPtr); PutUsage(); exit(2); } /* switch */ ++FlagPtr; } /* while */ ++OptPtr; --argc; } /* while */ /* OptPtr now points to patterns */ if (!fFlag && !eFlag) { if (!argc) { fprintf(stderr,"bm: no pattern specified\n"); PutUsage(); exit(2); } else NPats = MkDescVec(DescVec,*OptPtr); ++OptPtr; --argc; } /* OptPtr now points to first file */ NFiles = argc; if (!NFiles) ret &= Execute(DescVec,NPats,0,BigBuff+2); else while (argc--) { if ((NFiles > 1) || lFlag) FileName = *OptPtr; if ((TextFile = open(*OptPtr,O_RDONLY,0)) < 0) { fprintf(stderr,"bm: can't open %s\n",*OptPtr); NotFound++; } else { ret &= Execute(DescVec,NPats,TextFile,BigBuff+2); if (sFlag && !ret) exit(0); close(TextFile); } /* if */ ++OptPtr; } /* while */ if (cFlag) printf("%d\n",MatchCount); exit(NotFound ? 2 : ret); } /* main */ SHAR_EOF fi exit 0 # End of shell archive