root@ozdaltx.UUCP (07/27/90)
Posting-number: Volume 14, Issue 35 Submitted-by: root@ozdaltx.UUCP Archive-name: mkkey/part01 ---- Cut Here and unpack ---- #!/bin/sh # This is a shell archive (shar 3.11) # made 05/24/1990 17:12 UTC by root@ozdaltx # Source directory /tmp # # existing files WILL be overwritten # # This shar contains: # length mode name # ------ ---------- ------------------------------------------ # 1883 -rw-r--r-- README # 1738 -rw-r--r-- bkey.c # 45 -rwxr-xr-x makeit # 1891 -rwxr-xr-x mkkey # touch 2>&1 | fgrep '[-amc]' > /tmp/s3_touch$$ if [ -s /tmp/s3_touch$$ ] then TOUCH=can else TOUCH=cannot fi rm -f /tmp/s3_touch$$ # ============= README ============== echo "x - extracting README (Text)" sed 's/^X//' << 'SHAR_EOF' > README && XREADME X XMkkey - A group of programs to generate a list of key words Xand their related files from text files. X XThis group of programs relies on certain (I assume) standard X*NIX text processing programs: X hyphen - a program to extract and join hyphenated words. Could X be done with sed. X comm - Finds common words in two files. Probably awk could X accomplish this, but would be slower. X sort & uniq - should be available anywhere. X vi - Use your favorite editor here. X XThe following is used in the program: X tolower - converts all upper case to lower. Tr can do the same X thing. X bkey.c - The program that actually does the KEY file generation. X Makeit is the cc command. This one is for SCO XENIX. X XNot many comments are in the files - as they should be fairly self- Xexplanitory. XTwo files, apart from the text file(s), are needed; ignore and Xinclude. These should be placed in the directory described by $KPATH. XKPATH will also need to be changed to your preference. You will Xprobably want to add changes to mkkey to remove the temporary files Xafter each key update. For the ignore file, /usr/lib/eign would be a Xgood place to start as it contains the common words; the, he, she, Xthat... etc. Include builds itself. The program does copy KEY to XKEY.O - just in case. I keep KEY.O around until I'm sure KEY is OK. X XThere are some sed lines that may confuse some people. XThese are there to try to pear down the size of the ignore file. X XI'd like to know of changes and improvements. Especially in the area Xof stripping ignore words to their base form. There are no Xrestrictions on this collection of files. X XEnjoy.... X XScotty X------ XAIDS INFORMATION EXCHANGE BBS (214) 247-2367/247-5609 X "Education is the best weapon" X{mic,void,egsner}!ozdaltx!sysop || {uunet,smu,ames}!sulaco!ozdaltx!sysop X- SHAR_EOF chmod 0644 README || echo "restore of README fails" if [ $TOUCH = can ] then touch -am 0524120890 README fi set `wc -c README`;Wc_c=$1 if test "$Wc_c" != "1883" then echo original size 1883, current size $Wc_c;fi # ============= bkey.c ============== echo "x - extracting bkey.c (Text)" sed 's/^X//' << 'SHAR_EOF' > bkey.c && X#include <stdio.h> X#include <string.h> X#define MAXLINES 2000 X Xextern int fseek(), fscanf(), sscanf(); Xextern char *malloc(); Xextern char *fgets(); Xextern int strncmp(); X Xmain(argc, argv) Xint argc; Xchar *argv[]; X{ X FILE *fp, *fopen(); X unsigned int i,j; X char *array[MAXLINES]; X char a[2][90], b[2][90]; X int c; X int len = 0; X X X j=i=0; X if(argc == 1){ X printf("Usage: %s file\n",argv[0]); X exit(1); X } X if((fp = fopen(argv[1], "r")) == 0){ X printf("%s: Can't open %s\n",argv[0],argv[1]); X exit(1); X } X/* make a pass through the file to determine how many lines there are */ X while((c=fgetc(fp)) != EOF){ X if(c == '\n') X i++; X } X if(i > MAXLINES){ X printf("%s: To many lines to read\n", argv[0]); X exit(1); X } X for(j=0; j <= i; j++){ X if((array[j]=malloc(90)) == 0){ X printf("Can't allocate memory\n"); X exit(1); X } X } X/* go back to the start of the file */ X fseek(fp,0,0); X/* start reading the file and placing each line into array */ X j=0; X while((fgets(array[j],90,fp)) != 0){ X len=strlen(array[j]); X array[j][len-1] = '\0'; /* zap the newline */ X j++; X } X fclose(fp); X for(j=0; j < i; j++){ X sscanf(array[j],"%s %[0-9A-Za-z. \t]",a[0],a[1]); X if(strlen(array[j]) > 65){ X printf("%s\n", array[j]); X continue; X } X if(strncmp(array[j+1],a[0],strlen(a[0])) == 0){ X sscanf(array[j+1],"%s%[0-9A-Za-z. \t]",b[0],b[1]); X if(strlen(array[j]) + strlen(b[1]) > 75){ X printf("%s\n", array[j]); X printf("%s: %s\n",b[0],b[1]); X } else { X if(b[1][0] == ' '){ X printf("%s%s\n", array[j],b[1]); X } else { X printf("%s %s\n", array[j],b[1]); X } X } X j++; X } else { X printf("%s\n", array[j]); X X } X X } X exit(0); X} SHAR_EOF chmod 0644 bkey.c || echo "restore of bkey.c fails" if [ $TOUCH = can ] then touch -am 0524114390 bkey.c fi set `wc -c bkey.c`;Wc_c=$1 if test "$Wc_c" != "1738" then echo original size 1738, current size $Wc_c;fi # ============= makeit ============== echo "x - extracting makeit (Text)" sed 's/^X//' << 'SHAR_EOF' > makeit && Xcc -LARGE -Ml2e -F 6000 -s -O bkey.c -o bkey SHAR_EOF chmod 0755 makeit || echo "restore of makeit fails" if [ $TOUCH = can ] then touch -am 0524114390 makeit fi set `wc -c makeit`;Wc_c=$1 if test "$Wc_c" != "45" then echo original size 45, current size $Wc_c;fi # ============= mkkey ============== echo "x - extracting mkkey (Text)" sed 's/^X//' << 'SHAR_EOF' > mkkey && XKPATH=/bbs/lib/key.d; export KPATH Xcase $# in X0) X echo "Usage : $0 sourcefile" X exit;; Xesac Xif test -f "$1" Xthen X FILE=$1; export FILE Xelse X echo "No file: $1" X exit Xfi Xgrep $1 KEY > /dev/null Xcase $? in X0) echo "$1 has already been processed" X exit;; Xesac Xecho "Processing $1, building key.raw XFinding hyphanated words..." Xhyphen $1 | tolower > hyph Xsed ' X/'$1'/d X/^$/d Xs/-//' hyph > hy.inc Xsed ' X/'$1'/d X/^$/d X:a X/[\/-]/{ Xs//\ X/ Xb a X} Xs/-$//' hyph > hy.ign Xsort -u hy.inc -o hy.inc Xsort -u hy.ign -o hy.ign Xtr ' ' '\012' < $1 hy.inc | tolower | sort -u > key.raw1 Xcat hy.ign key.raw1 | sort | uniq -u | Xsed -n ' X/[ :;,._'\'')("!*?]\[{}+=\\#@$%&*<>^]/s///g X/['\''\`][dst]$/s/['\''\`]// Xs/ness$// X/[^e][cdfrtslnp]ies$/s/ies$/y/ X/[\/-]/s//\ X/g X/^$/d X/^[0-9]*$/d X/^[a-z]*[0-9]*$/p X/^[a-z]*$/p' | sort -u > key.raw Xecho "Removing common words from key.raw" Xcomm -23 key.raw $KPATH/ignore | comm -23 - $KPATH/include | X sort -u > klist X Xif test -s klist Xthen X echo "# put a % by each word going to the $KPATH/include list, X# anything else will be sent to the $KPATH/ignore list." > ked X cat klist >> ked X vi ked Xecho "C)ontinue or Q)uit?\c " Xread cq Xcase $cq in X [Cc]) ;; X [Qq]) exit;; Xesac Xecho "building inc and and ign" Xsed '/^#/d X/^%/{ Xs///w 'inc' Xd X} ' ked | Xsed 's/[\/-]/\ X/g Xs/+//g Xs/\]//g' > ign Xrm ked Xfi X Xecho "Building ktemp" Xif test -s inc Xthen X echo "Adding inc to $KPATH/include list..." X cat inc >> $KPATH/include X sort -u $KPATH/include -o $KPATH/include X comm -12 key.raw $KPATH/include | sed 's/$/: '$1'/' > ktemp Xelse X comm -12 key.raw $KPATH/include | sed 's/$/: '$1'/' > ktemp Xfi X Xif test -s ign Xthen X echo "Adding ign to $KPATH/ignore list..." X sort -u ign $KPATH/ignore -o $KPATH/ignore Xfi X Xecho "Adding KEY to ktemp file" X Xif test -f KEY Xthen X cp KEY KEY.O Xfi Xsort KEY ktemp -o ktemp Xecho "Rebuilding KEY file" Xbkey ktemp > KEY SHAR_EOF chmod 0755 mkkey || echo "restore of mkkey fails" if [ $TOUCH = can ] then touch -am 0524114290 mkkey fi set `wc -c mkkey`;Wc_c=$1 if test "$Wc_c" != "1891" then echo original size 1891, current size $Wc_c;fi exit 0