[comp.sources.unix] v19i073: NN, a Usenet news reader, Part12/15

rsalz@uunet.uu.net (Rich Salz) (06/27/89)

Submitted-by: storm@texas.dk (Kim F. Storm)
Posting-number: Volume 19, Issue 73
Archive-name: nn/part12

#!/bin/sh
# this is part 12 of a multipart archive
# do not concatenate these parts, unpack them in order with /bin/sh
# file pack_date.c continued
#
CurArch=12
if test ! -r s2_seq_.tmp
then echo "Please unpack part 1 first!"
     exit 1; fi
( read Scheck
  if test "$Scheck" != $CurArch
  then echo "Please unpack part $Scheck next!"
       exit 1;
  else exit 0; fi
) < s2_seq_.tmp || exit 1
echo "x - Continuing file pack_date.c"
sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' >> pack_date.c
X *	as long as the "ordering" is ok.
X *
X *	The result is NOT a time_t value, i.e. ctime() will
X *	not produce the original Date string.
X *
X *	The date must have format:  [D]D Mmm YY hh:mm:ss GMT
X */
X
Xpack_date(destp, date)
Xtime_stamp *destp;
Xchar *date;
X{
X    time_stamp res;
X    register int min, hour, day, mon, year;
X
X    *destp = 0;
X    if (date == NULL) return;
X    
X    if ((day = next_int(&date)) == 0) return;
X    
X    switch (*date) {
X     case 'J':
X	if (date[1] == 'a') { mon = 0; break; }
X	if (date[2] == 'n') { mon = 5; break; }
X	mon = 6; break;
X     case 'F':
X	mon = 1; break;
X     case 'M':
X	if (date[2] == 'r') { mon = 2; break; }
X	mon = 4; break;
X     case 'A':
X	if (date[1] == 'p') { mon = 3; break; }
X	mon = 7; break;
X     case 'S':
X	mon = 8; break;
X     case 'O':
X	mon = 9; break;
X     case 'N':
X	mon = 10; break;
X     case 'D':
X	mon = 11; break;
X     default:
X	return;
X    }
X    
X    date += 4;
X    
X    year = next_int(&date);
X    hour = next_int(&date);
X    min = next_int(&date);
X    
X    year -= 87;	/* base is 1987 */
X    if (year < 0) year += 100;
X    
X    res = (year * 12 + mon) * 31 + day - 1;
X    res *= 24 * 60;
X    res += (hour * 60) + min;
X
X    *destp = res;
X}
X
X
Xstatic next_int(dp)
Xchar **dp;
X{
X    register char *str = *dp;
X    register i;
X    
X    i = 0;
X    while (*str && isdigit(*str))
X	i = (i * 10) + *str++ - '0';
X	
X    while (*str && (isspace(*str) || *str == ':')) str++;
X
X    *dp = str;
X    return i;
X}
X
X
X#ifdef DATE_TEST
X
X
Xmain()
X{
X    char buffer[128];
X    char *dp;
X    unsigned long t;
X    
X    while (fgets(buffer, 128, stdin)) {
X	dp = strchr(buffer, ':');
X	if (dp == NULL) continue;
X	dp++;
X	while (isspace(*dp)) dp++;
X	pack_date(&t, dp);
X	printf("%lu\t%s\n", t, dp);
X    }
X    
X    nn_exit(0);
X}
X
X#endif
NO_NEWS_IS_GOOD_NEWS
echo "File pack_date.c is complete"
chmod 0644 pack_date.c || echo "restore of pack_date.c fails"
set `wc -c pack_date.c`;Sum=$1
if test "$Sum" != "1938"
then echo original size 1938, current size $Sum;fi
echo "x - extracting pack_name.c (Text)"
sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > pack_name.c &&
X/*
X * 	pack_name(packed, name, length)
X *	pack sender's name into something sensible, return in packed
X *
X */
X
X#include "config.h"
X
X
X#define	SEP_DOT		0	/* . */
X#define	SEP_PERCENT	1	/* % */
X#define	SEP_SCORE	2	/* _ */
X#define	SEP_AMPERSAND	3	/* @ */
X#define	SEP_BANG	4	/* ! */
X#define	SEP_MAXIMUM	5
X
X
X#define CL_OK		0x0100		/* letter or digit */
X#define CL_SPACE	0x0200		/* cvt to space */
X#define CL_IGNORE	0x0400		/* ignore */
X#define CL_RANGE(c)	0x0800+c	/* space range, end with c */
X#define CL_HYPHEN	0x1000		/* convert to - */
X#define CL_STOP		0x2000		/* discard rest of name */
X#define	CL_SEP		| 0x4000 +    	/* address separator */
X
X#define	IS_OK(c)	(Class[c] & CL_OK)
X#define IS_SPACE(c)	(Class[c] & CL_SPACE)
X#define IGNORE(c)	(c & 0x80 || Class[c] & CL_IGNORE)
X#define BEGIN_RANGE(c)	(Class[c] & CL_RANGE(0))
X#define END_RANGE(c)	(Class[c] & 0xff)
X#define IS_HYPHEN(c)	(Class[c] & CL_HYPHEN)
X#define IS_STOP(c)	(Class[c] & CL_STOP)
X#define	IS_SEPARATOR(c)	(Class[c] & (0 CL_SEP 0))
X    
Xstatic short Class[128] = {
X	/* NUL */		CL_STOP ,
X	/* SOH */		CL_IGNORE ,
X	/* STX */		CL_IGNORE ,
X	/* ETX */		CL_IGNORE ,
X	/* EOT */		CL_IGNORE ,
X	/* ENQ */		CL_IGNORE ,
X	/* ACK */		CL_IGNORE ,
X	/* BEL */		CL_IGNORE ,
X	/* BS  */		CL_IGNORE ,
X	/* TAB */		CL_SPACE ,
X	/* NL  */		CL_IGNORE ,
X	/* VT  */		CL_IGNORE ,
X	/* FF  */		CL_IGNORE ,
X	/* CR  */		CL_IGNORE ,
X	/* SO  */		CL_IGNORE ,
X	/* SI  */		CL_IGNORE ,
X	/* DLE */		CL_IGNORE ,
X	/* DC1 */		CL_IGNORE ,
X	/* DC2 */		CL_IGNORE ,
X	/* DC3 */		CL_IGNORE ,
X	/* DC4 */		CL_IGNORE ,
X	/* NAK */		CL_IGNORE ,
X	/* SYN */		CL_IGNORE ,
X	/* ETB */		CL_IGNORE ,
X	/* CAN */		CL_IGNORE ,
X	/* EM  */		CL_IGNORE ,
X	/* SUB */		CL_IGNORE ,
X	/* ESC */		CL_IGNORE ,
X	/* FS  */		CL_IGNORE ,
X	/* GS  */		CL_IGNORE ,
X	/* RS  */		CL_IGNORE ,
X	/* US  */		CL_IGNORE ,
X	
X	/* space */		CL_SPACE ,
X	/*   !   */		CL_SPACE CL_SEP SEP_BANG,
X	/*   "   */		CL_RANGE( '"' ) ,
X	/*   #   */		CL_OK ,
X	/*   $   */		CL_OK ,
X	/*   %   */		CL_OK CL_SEP SEP_PERCENT,
X	/*   &   */		CL_OK ,
X	/*   '   */		CL_OK ,
X	/*   (   */		CL_RANGE( ')' ) ,
X	/*   )   */		CL_IGNORE ,
X	/*   *   */		CL_HYPHEN ,
X	/*   +   */		CL_HYPHEN ,
X	/*   ,   */		CL_STOP ,
X	/*   -   */		CL_HYPHEN ,
X	/*   .   */		CL_SPACE CL_SEP SEP_DOT,
X	/*   /   */		CL_OK ,
X	/*   0   */		CL_OK ,
X	/*   1   */		CL_OK ,
X	/*   2   */		CL_OK ,
X	/*   3   */		CL_OK ,
X	/*   4   */		CL_OK ,
X	/*   5   */		CL_OK ,
X	/*   6   */		CL_OK ,
X	/*   7   */		CL_OK ,
X	/*   8   */		CL_OK ,
X	/*   9   */		CL_OK ,
X	/*   :   */		CL_IGNORE ,
X	/*   ;   */		CL_STOP ,
X	/*   <   */		CL_IGNORE ,
X	/*   =   */		CL_HYPHEN ,
X	/*   >   */		CL_IGNORE ,
X	/*   ?   */		CL_IGNORE ,
X	/*   @   */		CL_OK CL_SEP SEP_AMPERSAND,
X	/*   A   */		CL_OK ,
X	/*   B   */		CL_OK ,
X	/*   C   */		CL_OK ,
X	/*   D   */		CL_OK ,
X	/*   E   */		CL_OK ,
X	/*   F   */		CL_OK ,
X	/*   G   */		CL_OK ,
X	/*   H   */		CL_OK ,
X	/*   I   */		CL_OK ,
X	/*   J   */		CL_OK ,
X	/*   K   */		CL_OK ,
X	/*   L   */		CL_OK ,
X	/*   M   */		CL_OK ,
X	/*   N   */		CL_OK ,
X	/*   O   */		CL_OK ,
X	/*   P   */		CL_OK ,
X	/*   Q   */		CL_OK ,
X	/*   R   */		CL_OK ,
X	/*   S   */		CL_OK ,
X	/*   T   */		CL_OK ,
X	/*   U   */		CL_OK ,
X	/*   V   */		CL_OK ,
X	/*   W   */		CL_OK ,
X	/*   X   */		CL_OK ,
X	/*   Y   */		CL_OK ,
X	/*   Z   */		CL_OK ,
X	/*   [   */		CL_OK ,
X	/*   \   */		CL_OK ,
X	/*   ]   */		CL_OK ,
X	/*   ^   */		CL_IGNORE ,
X	/*   _   */		CL_SPACE CL_SEP SEP_SCORE,
X	/*   `   */		CL_IGNORE ,
X	/*   a   */		CL_OK ,
X	/*   b   */		CL_OK ,
X	/*   c   */		CL_OK ,
X	/*   d   */		CL_OK ,
X	/*   e   */		CL_OK ,
X	/*   f   */		CL_OK ,
X	/*   g   */		CL_OK ,
X	/*   h   */		CL_OK ,
X	/*   i   */		CL_OK ,
X	/*   j   */		CL_OK ,
X	/*   k   */		CL_OK ,
X	/*   l   */		CL_OK ,
X	/*   m   */		CL_OK ,
X	/*   n   */		CL_OK ,
X	/*   o   */		CL_OK ,
X	/*   p   */		CL_OK ,
X	/*   q   */		CL_OK ,
X	/*   r   */		CL_OK ,
X	/*   s   */		CL_OK ,
X	/*   t   */		CL_OK ,
X	/*   u   */		CL_OK ,
X	/*   v   */		CL_OK ,
X	/*   w   */		CL_OK ,
X	/*   x   */		CL_OK ,
X	/*   y   */		CL_OK ,
X	/*   z   */		CL_OK ,
X	/*   {   */		CL_OK ,
X	/*   |   */		CL_OK ,
X	/*   }   */		CL_OK ,
X	/*   ~   */		CL_HYPHEN ,
X	/*  DEL  */		CL_IGNORE 
X} ;
X
X
Xpack_name(dest, source, length)
Xchar *dest, *source;
Xint length;
X{
X    register char *p, *q, *r, c;
X    register int n;
X    char namebuf[129], *name;
X    char *maxq;
X    int lname, lfirst, lmiddle, llast, sep, i;
X    int drop_space, prev_space;
X    char *separator[SEP_MAXIMUM];
X    
X    dest[0] = NUL;
X    
X    if (source == NULL || source[0] == NUL)
X	return 0;
X
X    p = source, q = namebuf, n = 0;
X    
Xnew_partition:
X    for (i = SEP_MAXIMUM; --i >= 0; separator[i] = NULL);
X    
X    while ( c = *p++ ) {
X	if (c == '<') {
X	    while (q > namebuf && q[-1] == SP) q--;
X	    if (q == namebuf) continue;
X	    break;
X	}
X	if (IGNORE(c)) continue;
X	if (q == namebuf && IS_SPACE(c)) continue;
X	if (c == '(') {
X	    if (*p == ')') {
X		p++;
X		continue;
X	    }
X	    if (n++ == 0) {
X		q = namebuf;
X		goto new_partition;
X	    }
X	    continue;
X	}
X	if (c == ')') {
X	    if (--n == 0) break;
X	    continue;
X	}
X	if (n > 1) continue;
X	*q++ = c;
X	if (IS_SEPARATOR(c)) {
X	    switch (sep = (Class[c] & 0xff)) {
X		
X	     case SEP_DOT:
X		if (separator[SEP_AMPERSAND] && q - namebuf <= length)
X		    break;
X		continue;
X
X	     case SEP_BANG:
X		if (separator[SEP_AMPERSAND]) continue;
X		break;
X		
X	     default:
X		if (separator[sep]) continue;
X		break;
X	    }
X	    
X	    separator[sep] = q - 1;
X	}
X    }
X
X    *q = NUL;
X    
X    if (namebuf[0] == NUL) return 0;
X    
X    name = namebuf;
X
X    if (name[0] == '"') {
X	name++;
X	if (q[-1] == '"') *--q = NUL;
X    }
X    
X    if (q - name <= length) goto name_ok;
X    
X    /* sorry for the many goto's -- the 3B2 C compiler does not */
X    /* make correct code for complicated logical expressions!!  */
X    /* not even without -O					*/
X
X    /* We must pack the name to make it fit */
X    
X    /* Name_of_person%... -> Name_of_person */
X
X    if (r = separator[SEP_PERCENT]) {
X	if (!(q = separator[SEP_SCORE]) || q > r ) 
X	    goto no_percent;
X	if ((q = separator[SEP_AMPERSAND]) && q < r)
X	    goto no_percent;
X	if ((q = separator[SEP_BANG]) && q < r)
X	    goto no_percent;
X	*r = NUL;
X	goto parse_name;
X    }
X
X no_percent:
X
X    /* name@site.domain -> name@site */
X 
X   if (r = separator[SEP_AMPERSAND]) {
X
X       if ((q = separator[SEP_PERCENT]) && q < r) {
X	   *r = NUL;
X	   if (r - name <= length) goto name_ok;
X
X	   *q = NUL;
X
X	   if (((p = separator[SEP_BANG]) && p < q)
X	     || ((p = strrchr(name, '!')) && p < q)) {
X	       name = p + 1;
X	   }
X
X	   if (strchr(name, '.')) 
X	       goto parse_name;
X	   
X	   goto name_ok;
X       }
X
X       if (q = separator[SEP_DOT]) {
X	   *q = NUL;
X	   goto name_ok;
X       }	
X	
X       *r = NUL;
X       if (r - name <= length) goto name_ok;
X       
X       if ((q = separator[SEP_BANG]) && q < r) {
X	   name = q + 1;
X	   goto name_ok;
X       }
X
X#ifdef NOTDEF
X       if (strchr(name, '!') == NULL) 
X	   goto parse_name; /* take the chance ... */
X#endif
X	goto name_ok;	/* can't do it any better */
X    }
X    
X    
X    /* Phase 1: Normalization (remove superfluous characters) */
X    
X parse_name:
X    
X    for (p = name, lname = 0, prev_space = 0; c = *p; p++) {
X
X/*	
X	if (IGNORE(c)) {
X	    *p = TAB;
X	    if (p == name) name++;
X	    continue;
X	}
X*/
X	
X	if (IS_OK(c)) {
X	    lname++;
X	    prev_space = 0;
X	    continue;
X	}
X	
X	if (IS_HYPHEN(c)) {
X	    if (p == name) {
X		name++;
X		continue;
X	    }
X	    if (prev_space)
X		*p = TAB;
X	    else {
X		*p = '-';
X		lname++;
X	    }
X	    continue;
X	}
X	
X	if (BEGIN_RANGE(c)) {
X	    
X	    if (p == name) {
X		name++;
X		continue;
X	    }
X	    
X	    c = END_RANGE(c);
X	    for (q = p+1; *q && *q != c; q++);
X	    if (*q) {
X		if (p[-1] != ' ') lname++;
X		while (p <= q) *p++ = ' ';
X		p--;
X		prev_space++;
X		continue;
X	    }
X	    c = ' ';
X	}
X	
X	if (IS_SPACE(c)) {
X	    *p = ' ';
X	    if (p == name) 
X		name++;
X	    else
X		if (!prev_space) {
X		    lname++;
X		    prev_space++;
X		}
X	    continue;
X	}
X	
X	if (IS_STOP(c)) {
X	    *p = NUL;
X	    break;
X	}
X    }
X drop_last_name:
X    while (p > name && (*--p == ' ' || *p == TAB)) *p = NUL;
X    
X    if (lname < length) goto name_ok;
X    
X    
X    /* Phase 2: Reduce middle names */
X    
X    for (r = p, llast = 0; r > name && *r != ' '; r--)
X	if (*r != TAB) llast++;
X    
X    /* r points to space before last name */
X    
X    if (strncmp(r, " Jr", 3) == 0 || strncmp(r, " II", 3) == 0) {
X	p = r+1;
X	lname -= llast;
X	goto drop_last_name;
X    }		
X    
X    if (r == name) goto phase6;	/* only last name */
X    
X    for (q = name, lfirst = 0; *q && *q != ' '; q++)
X	if (*q != TAB) lfirst++;
X    
X    /* q points at space after first name */
X    
X    for (p = q, lmiddle = 0; p < r; ) {
X	/* find next middle name */
X	while (p < r && (*p == ' ' || *p == TAB)) p++;
X	
X	if (p >= r) break; /* found last name */
X	
X	p++; /* skip first char of middle name */
X	for (;*p != ' '; p++) { /* remove rest */
X	    if (*p == TAB) continue;
X	    *p = TAB;
X	    lname--;
X	}
X	lmiddle += 2;	/* initial + space */
X    }
X    
X    if (lname < length) goto name_ok;
X    
X    /* If removing middle names is not enough, but reducing first name instead is, do it that way */
X    
X    if (lname - lmiddle >= length && lname - lfirst + 1 < length) goto phase4;
X    
X    
X    /* Phase 3: Remove middle names */
X    
X    for (p = q; p < r; p++) {
X	if (*p == TAB) continue;
X	if (*p == ' ') continue;
X	*p = TAB;
X	lname -= 2;
X    }
X    
X    if (lname < length) goto name_ok;
X    
X    
X    /* Phase 4: Reduce first name */
X    
X phase4:
X    for (p = name+1; p < q; p++) {
X	if (*p == TAB) continue;
X	if (*p == ' ') continue;
X	*p = TAB;
X	lname--;
X    }
X    
X    if (lname < length) goto name_ok;
X    
X    /* Phase 5: Remove first name */
X    
X    name = r+1;
X    lname--;
X    
X    if (lname < length) goto name_ok;
X    
X    /* Phase 6: Cut last name */
X phase6:
X    goto name_ok;
X    
X name_ok:
X
X    q = dest;
X    maxq = q + length;
X
X    drop_space = 1;
X    
X    for (p = name; *p && q < maxq ; p++) {
X	if (*p == TAB) continue;
X	
X	if ( *p == ' ' ) {
X	    if (!drop_space) {
X		drop_space = 1;
X		*q++ = ' ';
X	    }
X	    continue;
X	}
X	drop_space = 0;
X	*q++ = *p;
X    }
X    
X    *q = NUL;
X    
X    return strlen(dest);
X}    
X
NO_NEWS_IS_GOOD_NEWS
chmod 0644 pack_name.c || echo "restore of pack_name.c fails"
set `wc -c pack_name.c`;Sum=$1
if test "$Sum" != "10093"
then echo original size 10093, current size $Sum;fi
echo "x - extracting pack_subject.c (Text)"
sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > pack_subject.c &&
X/*
X * pack subject by eliminating RE prefixes and - (nf) suffixes
X */
X
X#include "config.h"
X
Xpack_subject(dest, src, re_counter_ptr, max_length)
Xregister char *dest, *src;
Xint *re_counter_ptr, max_length;
X{
X    int re;
X    char *start_dest;
X    register char *max_dest;
X    
X    re = 0;
X
X    if (src) {
X	start_dest = dest;
X	max_dest = dest + max_length;
X    
X	while (*src) {
X	    if (isspace(*src)) {
X		src++;
X		continue;
X	    }
X	    
X	    /* count and remove 'Re: Re: ...' */
X
X	    if (*src != 'R' && *src != 'r') break;
X	    *dest++ = *src++;
X	    
X	    if (*src != 'e' && *src != 'E') break;
X	    *dest++ = *src++;
X
X	    if (*src == ':' || *src == ' ') {
X		src++;
X		dest = start_dest;
X		re++;
X		continue;
X	    }
X	    
X	    if (*src != '^') break;
X		
X	    src++;
X	    dest = start_dest;
X	    
X	    while (isdigit(*src)) *dest++ = *src++;
X	    if (dest == start_dest) 
X		re++;
X	    else {
X		*dest = NUL;
X		dest = start_dest;
X		re += atoi(dest);
X	    }
X	    if (*src == ':') src++;
X	}
X	
X	while (*src && dest < max_dest) {
X	    if (*src == '-' && strncmp("- (nf)", src, 5) == 0) break;
X	    *dest++ = *src++;
X	}
X    }
X    
X    *dest = NUL;
X    *re_counter_ptr = (char)re;
X    
X    return dest - start_dest;
X}
NO_NEWS_IS_GOOD_NEWS
chmod 0644 pack_subject.c || echo "restore of pack_subject.c fails"
set `wc -c pack_subject.c`;Sum=$1
if test "$Sum" != "1207"
then echo original size 1207, current size $Sum;fi
echo "x - extracting patchlevel.h (Text)"
sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > patchlevel.h &&
X/*
X * Current patch level (initial level is zero)
X *
X * Modification history:
X *
X *	1988-07-20:  Beta-test release 6.0 	(Denmark)
X *	1988-11-01:  Distributed release 6.1 	(Europe)
X *	1989-03-21:  Distributed release 6.2beta (FTP)
X *	1989-05-30:  Distributed release 6.3	(World)
X */
X
X#define PATCHLEVEL 0
X
NO_NEWS_IS_GOOD_NEWS
chmod 0644 patchlevel.h || echo "restore of patchlevel.h fails"
set `wc -c patchlevel.h`;Sum=$1
if test "$Sum" != "305"
then echo original size 305, current size $Sum;fi
echo "x - extracting prefix.sh (Text)"
sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > prefix.sh &&
XWARNING: DON'T CHANGE THE ORDER OR CONTENTS OF THE FOLLOWING LINES
X
X#include "config.h"
X#include "patchlevel.h"
X#include "update.h"
X
X--------CUT PREFIX HERE--------
X&!/bin/sh
X
X& Release RELEASE,VERSION,PATCHLEVEL, No. UPDATE
X
X& Do not edit this file directly.
X& It is generated from the corresponding .sh file.
X
X
XSPOOL=NEWS_DIRECTORY
X
XTMP=TMP_DIRECTORY
X
XLIB=LIB_DIRECTORY
X
XDB=DB_DIRECTORY
X
XINEWS=INEWS_PATH
X
XRECMAIL=REC_MAIL
X
X#ifdef APPEND_SIGNATURE
XAPPENDSIG=true
X#else
XAPPENDSIG=false
X#endif
X
XPG=PAGER
X
X#ifdef NNTP
X#undef NNTP
XNNTP=true
XACTIVE=$DB/ACTIVE
X#else
XNNTP=false
XACTIVE=NEWS_ACTIVE
X#endif
NO_NEWS_IS_GOOD_NEWS
chmod 0644 prefix.sh || echo "restore of prefix.sh fails"
set `wc -c prefix.sh`;Sum=$1
if test "$Sum" != "600"
then echo original size 600, current size $Sum;fi
echo "x - extracting rc.c (Text)"
sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > rc.c &&
X/*
X * rc management routines
X */
X
X#include "config.h"
X#include "term.h"
X#include "debug.h"
X
Xexport int  keep_rc_backup = 1;
Xexport int  no_update = 0;
Xexport int  use_newsrc = 0;
X
Xexport long unread_articles;	/* estimate of unread articles */
Xexport int  unread_groups;
X
X
Xstatic FILE *rc = NULL;		/* rc_file descriptor */
X
Xstatic char RC[] = "rc";
Xstatic char BAK[] = "rc.bak";
Xstatic char NEWSRC[] = ".newsrc";
X
Xstatic int  has_newsrc = 0;
X
X/* RC lines have the format:  */
X/*	SUBSCR space LASTART space GROUPNUM space NAME */
X
X#define	SUBSCRZ			1
X#define	SUBSCRPOS		0
X#define SUBSCR(buf)		buf[0]
X
X/* NOTICE THAT LASTARTZ IS HARDCODED IN A printf FORMAT STRING LATER ON */
X
X#define LASTARTZ		6
X#define	LASTARTPOS		(SUBSCRPOS + SUBSCRZ + 1)
X#define	LASTART(buf)		atol(buf + LASTARTPOS)
X
X#define GROUPNAMEPOS		(LASTARTPOS + LASTARTZ + 1)
X#define	GROUPNAME(buf)		(buf + GROUPNAMEPOS)
X
X#define	NEW_OFFSET	((off_t)1)	/* append to rc_file when written */
X
X/*
X * read rc file info to group headers
X * master file has been read in
X */
X
X#define	G_OLD	G_NEW	/* inverse use during rc reading */
X#define G_RENUM	G_DONE
X
X
Xvisit_rc_file()
X{
X    FILE *bak, *newsrc;
X    register int c;
X    register group_header *gh;
X    register char *cp;
X    char line[512];
X    off_t rcpos;		/* position in rc */
X    int warn_duplicates = 0, mk_bak, rd_newsrc, bak_used;
X    time_t m_rc, m_newsrc;
X    char rc_path[FILENAME], bak_path[FILENAME];
X    
X    strcpy(rc_path, relative(nn_directory, RC));
X    strcpy(bak_path, relative(nn_directory, BAK));
X
X    rc = NULL;	/* open rc-file */
X    rewind_rc(rc_path, OPEN_READ);
X
X    m_rc = 0;
X    if (rc != NULL) {
X	fseek(rc, (off_t)0, 2);
X	if (ftell(rc) > (off_t)0) {
X	    fseek(rc, 0L, 0);
X    	    m_rc = m_time(rc);
X	}
X    }
X    
X    rd_newsrc = use_newsrc;
X    m_newsrc = 0;
X    mk_bak = keep_rc_backup;
X    bak_used = 0;
X    
X    if (m_rc == 0) {		/* rc empty (i.e. new or corrupted) */
X	if ((bak = open_file(bak_path, OPEN_READ)) != NULL) {
X	    m_rc = m_time(bak);
X	    if (m_rc >= m_newsrc) {
X		printf("\nRestoring %s from %s\n", RC, BAK);
X		rewind_rc(rc_path, OPEN_CREATE | MUST_EXIST);
X		while ((c = getc(bak)) != EOF) putc(c, rc);
X		rewind_rc(rc_path, OPEN_READ | MUST_EXIST);
X		fclose(bak);
X		mk_bak = 0;
X		bak_used = 1;
X	    }
X	} else {
X	    rd_newsrc = 1;
X	    display_help("welcome");
X	}
X    }
X
X    if (rd_newsrc) {
X	newsrc = open_file(relative(home_directory, NEWSRC), OPEN_READ);
X	if (newsrc != NULL) {
X	    m_newsrc = m_time(newsrc);
X	    has_newsrc = 1;
X	}
X    }
X    
X    if (has_newsrc) {
X	if (m_rc < m_newsrc) {
X	    if (bak_used) {
X		printf("\n%s is newer than %s -- use %s ? ",
X		       NEWSRC, BAK, NEWSRC);
X		fl;
X		if (!yes(0)) m_newsrc = 0;
X	    }
X
X	    if (m_newsrc != 0) {
X		
X		printf("\nReading from %s\n", NEWSRC);
X
X		if (m_rc != 0) {
X		    fclose(rc);
X		    rc = NULL;
X		    
X		    unlink(bak_path);
X		    if (link(rc_path, bak_path) < 0 || unlink(rc_path) < 0)
X			user_error("Cannot backup %s file\n", RC);
X		    
X		    mk_bak = 0;
X		}    
X		
X		rewind_rc(rc_path, OPEN_CREATE | MUST_EXIST);
X		read_newsrc(newsrc);
X		rewind_rc(rc_path, OPEN_READ | MUST_EXIST);
X	    }
X	}
X	
X	fclose(newsrc);
X    }
X    
X    if (no_update) mk_bak = 0;
X
X    bak = mk_bak ? open_file(bak_path, OPEN_CREATE | MUST_EXIST) : NULL;
X
X    for(;;) {
X	rcpos = ftell(rc);
X	
X	c = getc(rc);
X	
X	cp = line;
X	while (c != NL) {
X	    if (c == EOF) goto endloop;
X	    *cp++ = c;
X	    c = getc(rc);
X	}
X	*cp = NUL;
X	if (bak != NULL) {
X	    fputs(line, bak);
X	    fputc(NL, bak);
X	}
X	
X	if (SUBSCR(line) != '+' && SUBSCR(line) != '!') {
X	    /* unrecognized line */
X	    continue;
X	}
X	
X	if ((gh = lookup(GROUPNAME(line))) == NULL) continue;
X
X	if (gh->group_flag & G_OLD) {
X	    printf("Duplicated entry in rc file: %s\n", gh->group_name);
X	    warn_duplicates++;
X	}
X	gh->rc_offset = rcpos;
X	
X	gh->group_flag |= G_OLD;
X	if (SUBSCR(line) == '+')
X	    gh->group_flag |= G_SUBSCRIPTION;
X	
X	gh->last_article = LASTART(line);
X	
X	if (gh->last_article > gh->last_l_article)
X	    gh->group_flag |= G_RENUM;	/* mark for use below */
X	
X	if (gh->first_l_article > gh->last_article ||
X	    gh->last_article > gh->last_l_article)
X	    gh->last_article = gh->first_l_article - 1;
X	
X	if (gh->last_article < 0) gh->last_article = 0;
X    }	
X
Xendloop:
X    if (warn_duplicates) {
X	printf("You can repair this using \"nntidy\"\n");
X	any_key(0);
X    }    
X
X    rewind_rc(rc_path, OPEN_UPDATE | MUST_EXIST);
X
X    Loop_Groups_Header(gh) {
X	if (gh->group_flag & G_OLD) {
X	    if (gh->group_flag & G_RENUM) /* group is renumbered */
X		write_rc_entry(gh, 0);
X	    gh->group_flag &= ~(G_NEW | G_RENUM);
X	} else {
X	    gh->group_flag |= G_SUBSCRIPTION | G_NEW;
X	    gh->last_article = gh->first_l_article - 1;
X	    if (gh->last_article < 0) gh->last_article = 0;
X	    gh->rc_offset = NEW_OFFSET;
X	}
X	gh->first_article = gh->last_article;
X    }
X    
X    if (bak != NULL) fclose(bak);
X    
X    if (no_update) {
X	fclose(rc);
X	rc = NULL;
X    } else
X	fflush(rc);
X}
X
X
Xrewind_rc(path, mode)
Xchar *path;
X{
X    if (rc != NULL) fclose(rc);
X    rc = open_file(path, mode);
X}
X
X
Xrestore_bak()
X{
X    if (no_update) 
X	return 1;
X    
X    if (!keep_rc_backup) {
X	msg("No %s file ('backup' is not set)", BAK);
X	return 0;
X    }
X
X    prompt("Are you sure? ");
X    if (!yes(1)) return 0;
X    
X    fclose(rc);	/* cannot use close_rc() since it would update .newsrc */
X    rc = NULL;
X    
X    if (chdir(nn_directory) < 0) goto err;
X
X    if (unlink(RC) < 0) goto err;
X    if (link(BAK, RC) < 0) goto err;
X    if (unlink(BAK) < 0) goto err;
X    
X    return 1;
X
X err:
X    clrdisp();
X    printf("Restore of %s file failed\n\n", RC);
X    printf("Check state of %s and %s files\n", RC, BAK);
X    nn_exit(1);
X    /*NOTREACHED*/
X}
X
Xupdate_rc(gh)
Xregister group_header *gh;
X{
X    add_unread(gh, -1);
X
X    if (no_update || gh->group_flag & G_RC_UPDATED) return;
X
X    gh->last_article = gh->last_l_article;
X
X#ifdef RC_TEST    
X    if (Debug & RC_TEST) 
X	fprintf(stderr, "upd_rc(%s) pos=%ld, artno=%ld\n",
X		gh->group_name, gh->rc_offset, gh->last_article);
X#endif
X
X    write_rc_entry(gh, 0);
X    
X    if (gh->group_flag & G_READ) return;
X    
X    gh->group_flag |= G_READ;
X
X    if ((gh->group_flag & G_SUBSCRIPTION) == 0) return;
X}
X
X
Xrestore_rc(gh, count)
Xregister group_header *gh;
Xlong count;
X{
X    if (no_update || (count == 0 && (gh->group_flag & G_RC_UPDATED) == 0))
X	return 0;
X
X    if (gh->group_flag & G_READ || count > 0) {
X	add_unread(gh, -1);
X    
X	if (count > 0) {
X	    gh->last_article = gh->last_l_article - count;
X	    if (gh->last_article < gh->first_l_article)
X		gh->last_article = gh->first_l_article - 1;
X	    gh->first_article = gh->last_article;
X	} else
X	    gh->last_article = gh->first_article;
X
X#ifdef RC_TEST    
X	if (Debug & RC_TEST) 
X	    fprintf(stderr, "restore_rc(%s) pos=%ld, artno=%ld\n",
X		    gh->group_name, gh->rc_offset, gh->last_article);
X#endif
X
X	write_rc_entry(gh, 0);
X	
X	gh->group_flag &= ~(G_READ|G_RC_UPDATED);
X	
X	add_unread(gh, 1);
X	
X	return 1;
X    }
X    return 0;
X}
X
X
Xclose_rc()
X{
X    off_t endrc;
X    
X    if (rc == NULL) return;
X    
X    if (use_newsrc) {
X	write_newsrc();
X
X	fflush(rc);
X	fseek(rc, 0L, 2);	/* touch rc file */
X	if ((endrc = ftell(rc)) == 0)
X	    fprintf(rc, "#\n");
X	else {
X	    fflush(rc);
X	    fseek(rc, endrc - 1, 0);
X	    fputc(NL, rc);
X	}
X    }
X    
X    fclose(rc);
X    rc = NULL;
X}
X
X
X
Xcount_unread_articles(trace)
Xint trace;
X{
X    register group_header *gh;
X    long n;
X    
X    unread_articles = 0;
X    unread_groups = 0;
X    
X    Loop_Groups_Header(gh) {
X	gh->group_flag &= ~G_UNREAD_COUNT;
X
X	if ((gh->group_flag & G_SUBSCRIPTION) == 0) continue;
X
X	if (gh->last_l_article > gh->last_article) {
X	    n = unread_articles;
X	    add_unread(gh, 1);
X	    if (trace)
X		printf("%s: %d\n", gh->group_name, unread_articles - n);
X	}
X    }
X}
X
X
Xprt_unread(format)
Xregister char *format;
X{
X    if (format == NULL) {
X	printf("No News (is good news)\n");
X	return;
X    }
X    
X    while (*format) {
X	if (*format != '%') {
X	    putchar(*format++);
X	    continue;
X	}
X	format++;
X	switch (*format++) {
X	 case 'u':
X	    printf("%ld unread article%s", 
X		   unread_articles, 
X		   unread_articles == 1 ? "" : "s");
X	    continue;
X	 case 'g':
X	    printf("%d group%s",
X		   unread_groups, 
X		   unread_groups == 1 ? "" : "s");
X	    continue;
X	 case 'i':
X	    printf(unread_articles == 1 ? "is" : "are");
X	    continue;
X	 case 'U':
X	    printf("%ld", unread_articles);
X	    continue;
X	 case 'G':
X	    printf("%d", unread_groups);
X	    continue;
X	}
X    }
X}
X
X
Xadd_unread(gh, mode)
Xgroup_header *gh;
Xint mode;	/* +1 => add, -1 => subtract */
X{
X    long art;
X    int was_unread;
X    
X    art = gh->last_l_article - gh->last_article;
X    was_unread = (gh->group_flag & G_UNREAD_COUNT);
X    
X    if (mode > 0) {
X	if (was_unread) return 0;
X	unread_articles += art;
X	unread_groups++;
X	gh->group_flag |= G_UNREAD_COUNT;
X    } else {
X	if (!was_unread) return 0;
X	unread_articles -= art;
X	unread_groups--;
X	gh->group_flag &= ~G_UNREAD_COUNT;
X    }    
X	
X    return was_unread;
X}
X
X
X/*
X * write one line on rc_file
X */
X
Xwrite_rc_entry(gh, new)
Xgroup_header *gh;
Xint new;	/* 0 => old, 1 => quick append, 2 => normal append */
X{
X    if (gh->rc_offset == NEW_OFFSET) new = 2;
X    
X    if (new) {
X	if (new == 2) fseek(rc, (off_t)0, 2);
X	gh->rc_offset = ftell(rc);
X    } else
X	if (fseek(rc, gh->rc_offset, 0) < 0)
X	    user_error("Seek error on %s file", RC);
X
X    /*
X     * the 'last article' is not updated in the rc file
X     * when a group is unsubscribed; if it is later resubscribed,
X     * the present articles will still be unread (if they exist)
X     */
X
X    /* update article number */
X	
X    fprintf(rc, "%c %06ld",	/* MUST CHANGE IF LASTARTZ CHANGES */
X	    (gh->group_flag & G_SUBSCRIPTION) ? '+' : '!',
X	    (long)(gh->last_article));
X
X    if (new) {
X	fputc(' ', rc);
X	fputs(gh->group_name, rc);
X	fputc(NL, rc);
X    }
X
X    fflush(rc);
X}
X
X
X/*
X * Old-style .newsrc support
X */
X
Xstatic read_newsrc(newsrc)
XFILE *newsrc;
X{
X    copy_newsrc(newsrc, (FILE *)NULL);
X}
X
Xstatic write_newsrc()
X{
X    char newsrc_path[FILENAME], bak_path[FILENAME];
X    FILE *newsrc, *bak;
X    
X    strcpy(newsrc_path, relative(home_directory, NEWSRC));
X    sprintf(bak_path, "%s.bak", newsrc_path);
X    
X    if (has_newsrc) {
X	unlink(bak_path);
X	if (link(newsrc_path, bak_path) < 0 || unlink(newsrc_path) < 0)
X	    user_error("Cannot backup %s file\n", newsrc_path);
X
X	bak = open_file(bak_path, OPEN_READ | MUST_EXIST);
X    } else
X	bak = NULL;
X
X    if (file_exist(newsrc_path, (char *)NULL)) {
X	/* This is real paranoia ... don't let people lose their .newsrc */
X	/* This should not happen - but it has been seen */
X	log_entry('E', "failed to backup %s", newsrc_path);
X	fprintf(stderr, "PROBLEM... YOUR %s WAS NOT UPDATED\n", NEWSRC);
X	if (bak != NULL) fclose(bak);
X	return;
X    }
X    
X    newsrc = open_file(newsrc_path, OPEN_CREATE | MUST_EXIST);
X    copy_newsrc(bak, newsrc);
X    if (bak != NULL) fclose(bak);
X    fclose(newsrc);
X}
X
Xstatic copy_newsrc(old_rc, new_rc)
XFILE *old_rc, *new_rc;
X{
X    char buf[2048];
X    char *sub, *last, subscr;
X    long atol();
X    register group_header *gh;
X    
X    Loop_Groups_Header(gh)
X	gh->group_flag &= ~G_DONE;
X
X    if (old_rc != NULL) {
X	/* NEWSRC lines have the following format 		*/
X	/*	NAME(n)SUBSCR(1) space NUM[,NUM][-NUM]... 	*/
X
X	while (fgets(buf, 2048, old_rc) != NULL) {
X	    subscr = 0;
X	    if (sub = strchr(buf, ':'))
X		subscr = 1;
X	    else 
X		sub = strchr(buf, '!');
X	    
X	    if (sub == NULL) {
X		if (new_rc != NULL) goto output_unchanged;
X		continue;
X	    }
X	    
X	    *sub = NUL;
X	    gh = lookup(buf);
X	    *sub++ = subscr ? ':' : '!';
X	    
X	    if (gh == NULL) {
X		if (new_rc != NULL) goto output_unchanged;
X		continue;
X	    }
X	    
X	    if (new_rc != NULL) {
X		if (gh->group_flag & G_DONE) continue;
X		gh->group_flag |= G_DONE;
X		if (!subscr) goto output_unchanged;
X		write_newsrc_entry(new_rc, gh, (*sub == NL) ? 1 : 0);
X		continue;
X	    }
X	    
X	    /* Notice: unread articles before the last read article are lost */
X	    
X	    if (*sub == NL)	/* new group */
X		continue;
X	    
X	    if (subscr) {
X		last = strrchr(sub, '-');
X		if (last == NULL) last = strrchr(sub, ',');
X		if (last == NULL) last = strrchr(sub, ' ');
X		if (last == NULL) last = "0"; else last++;
X		
X		gh->last_article = atol(last);
X		gh->group_flag |= G_SUBSCRIPTION;
X	    } else
X		gh->last_article = 0;
X	    
X	    gh->rc_offset = NEW_OFFSET;
X	    
X	    write_rc_entry(gh, 0);
X	    continue;
X	    
X	 output_unchanged:
X	    fputs(buf, new_rc);
X	}
X    }    
X
X    Loop_Groups_Header(gh) {
X	if (new_rc != NULL) {
X	    if (gh->group_flag & G_DONE) continue;
X	    write_newsrc_entry(new_rc, gh, -1);
X	} else {
X	    gh->rc_offset = 0;
X	    gh->last_article = 0;
X	    gh->group_flag &= G_MASTER_FLAGS;
X	}
X    }
X
X    return 1;
X}
X
Xwrite_newsrc_entry(newsrc, gh, also_new)
XFILE *newsrc;
Xregister group_header *gh;
Xint also_new;
X{
X    if ((gh->group_flag & G_READ) == 0 && (gh->group_flag & G_NEW)) {
X	if (also_new < 0) return;
X    } else
X	also_new = 0;
X	
X    fprintf(newsrc, "%s%c", gh->group_name,
X	    (gh->group_flag & G_SUBSCRIPTION) ? ':' : '!');
X
X    if (also_new) {
X	fputc(NL, newsrc);
X	return;
X    }
X    
X    if (gh->first_l_article > gh->last_article) 
X	fprintf(newsrc, " %s%d\n", 
X		gh->first_l_article > 2 ? "1-" : "",
X		gh->first_l_article - 1);
X    else
X	fprintf(newsrc, " %d-%d\n", gh->first_l_article, gh->last_article);
X}
NO_NEWS_IS_GOOD_NEWS
chmod 0644 rc.c || echo "restore of rc.c fails"
set `wc -c rc.c`;Sum=$1
if test "$Sum" != "13314"
then echo original size 13314, current size $Sum;fi
echo "x - extracting regexp.c (Text)"
sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > regexp.c &&
X/*
X * regexp.c - regular expression matching
X *
X * NOTICE: THIS CODE HAS BEEN MODIFIED TO FIT THE NN ENVIRONMENT.
X *
X * DESCRIPTION
X *
X *	This source was taken from the pax posting in comp.sources.unix.
X *
X *	Underneath the reformatting and comment blocks which were added to 
X *	make it consistent with the rest of the code, you will find a
X *	modified version of Henry Specer's regular expression library.
X *	Henry's functions were modified to provide the minimal regular
X *	expression matching, as required by P1003.  Henry's code was
X *	copyrighted, and copy of the copyright message and restrictions
X *	are provided, verbatim, below:
X *
X *	Copyright (c) 1986 by University of Toronto.
X *	Written by Henry Spencer.  Not derived from licensed software.
X *
X *	Permission is granted to anyone to use this software for any
X *	purpose on any computer system, and to redistribute it freely,
X *	subject to the following restrictions:
X *
X *	1. The author is not responsible for the consequences of use of
X *         this software, no matter how awful, even if they arise
X *	   from defects in it.
X *
X *	2. The origin of this software must not be misrepresented, either
X *	   by explicit claim or by omission.
X *
X *	3. Altered versions must be plainly marked as such, and must not
X *	   be misrepresented as being the original software.
X *
X * 	Beware that some of this code is subtly aware of the way operator
X * 	precedence is structured in regular expressions.  Serious changes in
X * 	regular-expression syntax might require a total rethink.
X *
X * AUTHORS
X *
X *     Mark H. Colburn, NAPS International (mark@jhereg.mn.org)
X *     Henry Spencer, University of Torronto (henry@utzoo.edu)
X *
X * Sponsored by The USENIX Association for public distribution. 
X *
X * $Log:	regexp.c,v $
X * Revision 1.1  88/12/23  18:02:32  mark
X * Initial revision
X * 
X */
X
X#define NN
X
X/* Headers */
X
X#ifdef NN
X#include "config.h"
X#include "regexp.h"
X#else
X#include "pax.h"
X
X#ifndef lint
Xstatic char    *Ident = "$Id: regexp.c,v 1.1 88/12/23 18:02:32 mark Rel $";
X#endif
X#endif
X
X/*
X * The "internal use only" fields in regexp.h are present to pass info from
X * compile to execute that permits the execute phase to run lots faster on
X * simple cases.  They are:
X *
X * regstart	char that must begin a match; '\0' if none obvious
X * reganch	is the match anchored (at beginning-of-line only)?
X * regmust	string (pointer into program) that match must include, or NULL
X * regmlen	length of regmust string
X *
X * Regstart and reganch permit very fast decisions on suitable starting points
X * for a match, cutting down the work a lot.  Regmust permits fast rejection
X * of lines that cannot possibly match.  The regmust tests are costly enough
X * that regcomp() supplies a regmust only if the r.e. contains something
X * potentially expensive (at present, the only such thing detected is * or +
X * at the start of the r.e., which can involve a lot of backup).  Regmlen is
X * supplied because the test in regexec() needs it and regcomp() is computing
X * it anyway.
X */
X
X/*
X * Structure for regexp "program".  This is essentially a linear encoding
X * of a nondeterministic finite-state machine (aka syntax charts or
X * "railroad normal form" in parsing technology).  Each node is an opcode
X * plus a "nxt" pointer, possibly plus an operand.  "Nxt" pointers of
X * all nodes except BRANCH implement concatenation; a "nxt" pointer with
X * a BRANCH on both ends of it is connecting two alternatives.  (Here we
X * have one of the subtle syntax dependencies:  an individual BRANCH (as
X * opposed to a collection of them) is never concatenated with anything
X * because of operator precedence.)  The operand of some types of node is
X * a literal string; for others, it is a node leading into a sub-FSM.  In
X * particular, the operand of a BRANCH node is the first node of the branch.
X * (NB this is *not* a tree structure:  the tail of the branch connects
X * to the thing following the set of BRANCHes.)  The opcodes are:
X */
X
X/* definition	number	opnd?	meaning */
X#define	END	0		/* no	End of program. */
X#define	BOL	1		/* no	Match "" at beginning of line. */
X#define	EOL	2		/* no	Match "" at end of line. */
X#define	ANY	3		/* no	Match any one character. */
X#define	ANYOF	4		/* str	Match any character in this string. */
X#define	ANYBUT	5		/* str	Match any character not in this
X				 * string. */
X#define	BRANCH	6		/* node	Match this alternative, or the
X				 * nxt... */
X#define	BACK	7		/* no	Match "", "nxt" ptr points backward. */
X#define	EXACTLY	8		/* str	Match this string. */
X#define	NOTHING	9		/* no	Match empty string. */
X#define	STAR	10		/* node	Match this (simple) thing 0 or more
X				 * times. */
X#define	OPEN	20		/* no	Mark this point in input as start of
X				 * #n. */
X /* OPEN+1 is number 1, etc. */
X#define	CLOSE	30		/* no	Analogous to OPEN. */
X
X/*
X * Opcode notes:
X *
X * BRANCH	The set of branches constituting a single choice are hooked
X *		together with their "nxt" pointers, since precedence prevents
X *		anything being concatenated to any individual branch.  The
X *		"nxt" pointer of the last BRANCH in a choice points to the
X *		thing following the whole choice.  This is also where the
X *		final "nxt" pointer of each individual branch points; each
X *		branch starts with the operand node of a BRANCH node.
X *
X * BACK		Normal "nxt" pointers all implicitly point forward; BACK
X *		exists to make loop structures possible.
X *
X * STAR		complex '*', are implemented as circular BRANCH structures 
X *		using BACK.  Simple cases (one character per match) are 
X *		implemented with STAR for speed and to minimize recursive 
X *		plunges.
X *
X * OPEN,CLOSE	...are numbered at compile time.
X */
X
X/*
X * A node is one char of opcode followed by two chars of "nxt" pointer.
X * "Nxt" pointers are stored as two 8-bit pieces, high order first.  The
X * value is a positive offset from the opcode of the node containing it.
X * An operand, if any, simply follows the node.  (Note that much of the
X * code generation knows about this implicit relationship.)
X *
X * Using two bytes for the "nxt" pointer is vast overkill for most things,
X * but allows patterns to get big without disasters.
X */
X#define	OP(p)	(*(p))
X#define	NEXT(p)	(((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
X#define	OPERAND(p)	((p) + 3)
X
X/*
X * Utility definitions.
X */
X
X#define	FAIL(m)	{ regerror(m); return(NULL); }
X#define	ISMULT(c)	((c) == '*')
X#define	META	"^$.[()|*\\"
X#ifndef CHARBITS
X#define	UCHARAT(p)	((int)*(unsigned char *)(p))
X#else
X#define	UCHARAT(p)	((int)*(p)&CHARBITS)
X#endif
X
X/*
X * Flags to be passed up and down.
X */
X#define	HASWIDTH	01	/* Known never to match null string. */
X#define	SIMPLE		02	/* Simple enough to be STAR operand. */
X#define	SPSTART		04	/* Starts with * */
X#define	WORST		0	/* Worst case. */
X
X/*
X * Global work variables for regcomp().
X */
Xstatic char    *regparse;	/* Input-scan pointer. */
Xstatic int      regnpar;	/* () count. */
Xstatic char     regdummy;
Xstatic char    *regcode;	/* Code-emit pointer; &regdummy = don't. */
Xstatic long     regsize;	/* Code size. */
X
X/*
X * Forward declarations for regcomp()'s friends.
X */
X#ifndef STATIC
X#define	STATIC	static
X#endif
XSTATIC char    *reg();
XSTATIC char    *regbranch();
XSTATIC char    *regpiece();
XSTATIC char    *regatom();
XSTATIC char    *regnode();
XSTATIC char    *regnext();
XSTATIC void     regc();
XSTATIC void     reginsert();
XSTATIC void     regtail();
XSTATIC void     regoptail();
X#ifdef STRCSPN
XSTATIC int      strcspn();
X#endif
X
X/*
X - regcomp - compile a regular expression into internal code
X *
X * We can't allocate space until we know how big the compiled form will be,
X * but we can't compile it (and thus know how big it is) until we've got a
X * place to put the code.  So we cheat:  we compile it twice, once with code
X * generation turned off and size counting turned on, and once "for real".
X * This also means that we don't allocate space until we are sure that the
X * thing really will compile successfully, and we never have to move the
X * code and thus invalidate pointers into it.  (Note that it has to be in
X * one piece because free() must be able to free it all.)
X *
X * Beware that the optimization-preparation code in here knows about some
X * of the structure of the compiled regexp.
X */
Xregexp *regcomp(exp)
Xchar           *exp;
X{
X    register regexp *r;
X    register char  *scan;
X    register char  *longest;
X    register int    len;
X    int             flags;
X    extern char    *malloc();
X
X    if (exp == NULL)
X	FAIL("NULL argument");
X
X    /* First pass: determine size, legality. */
X    regparse = exp;
X    regnpar = 1;
X    regsize = 0L;
X    regcode = &regdummy;
X    regc(MAGIC);
X    if (reg(0, &flags) == NULL)
X	return (NULL);
X
X    /* Small enough for pointer-storage convention? */
X    if (regsize >= 32767L)	/* Probably could be 65535L. */
X	FAIL("regexp too big");
X
X    /* Allocate space. */
X    r = (regexp *) malloc(sizeof(regexp) + (unsigned) regsize);
X    if (r == NULL)
X	FAIL("out of space");
X
X    /* Second pass: emit code. */
X    regparse = exp;
X    regnpar = 1;
X    regcode = r->program;
X    regc(MAGIC);
X    if (reg(0, &flags) == NULL)
X	return (NULL);
X
X    /* Dig out information for optimizations. */
X    r->regstart = '\0';		/* Worst-case defaults. */
X    r->reganch = 0;
X    r->regmust = NULL;
X    r->regmlen = 0;
X    scan = r->program + 1;	/* First BRANCH. */
X    if (OP(regnext(scan)) == END) {	/* Only one top-level choice. */
X	scan = OPERAND(scan);
X
X	/* Starting-point info. */
X	if (OP(scan) == EXACTLY)
X	    r->regstart = *OPERAND(scan);
X	else if (OP(scan) == BOL)
X	    r->reganch++;
X
X	/*
X	 * If there's something expensive in the r.e., find the longest
X	 * literal string that must appear and make it the regmust.  Resolve
X	 * ties in favor of later strings, since the regstart check works
X	 * with the beginning of the r.e. and avoiding duplication
X	 * strengthens checking.  Not a strong reason, but sufficient in the
X	 * absence of others. 
X	 */
X	if (flags & SPSTART) {
X	    longest = NULL;
X	    len = 0;
X	    for (; scan != NULL; scan = regnext(scan))
X		if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
X		    longest = OPERAND(scan);
X		    len = strlen(OPERAND(scan));
X		}
X	    r->regmust = longest;
X	    r->regmlen = len;
X	}
X    }
X    return (r);
X}
X
X/*
X - reg - regular expression, i.e. main body or parenthesized thing
X *
X * Caller must absorb opening parenthesis.
X *
X * Combining parenthesis handling with the base level of regular expression
X * is a trifle forced, but the need to tie the tails of the branches to what
X * follows makes it hard to avoid.
X */
Xstatic char *reg(paren, flagp)
Xint             paren;		/* Parenthesized? */
Xint            *flagp;
X{
X    register char  *ret;
X    register char  *br;
X    register char  *ender;
X    register int    parno;
X    int             flags;
X
X    *flagp = HASWIDTH;		/* Tentatively. */
X
X    /* Make an OPEN node, if parenthesized. */
X    if (paren) {
X	if (regnpar >= NSUBEXP)
X	    FAIL("too many ()");
X	parno = regnpar;
X	regnpar++;
X	ret = regnode(OPEN + parno);
X    } else
X	ret = NULL;
X
X    /* Pick up the branches, linking them together. */
X    br = regbranch(&flags);
X    if (br == NULL)
X	return (NULL);
X    if (ret != NULL)
X	regtail(ret, br);	/* OPEN -> first. */
X    else
X	ret = br;
X    if (!(flags & HASWIDTH))
X	*flagp &= ~HASWIDTH;
X    *flagp |= flags & SPSTART;
X    while (*regparse == '|') {
X	regparse++;
X	br = regbranch(&flags);
X	if (br == NULL)
X	    return (NULL);
X	regtail(ret, br);	/* BRANCH -> BRANCH. */
X	if (!(flags & HASWIDTH))
X	    *flagp &= ~HASWIDTH;
X	*flagp |= flags & SPSTART;
X    }
X
X    /* Make a closing node, and hook it on the end. */
X    ender = regnode((paren) ? CLOSE + parno : END);
X    regtail(ret, ender);
X
X    /* Hook the tails of the branches to the closing node. */
X    for (br = ret; br != NULL; br = regnext(br))
X	regoptail(br, ender);
X
X    /* Check for proper termination. */
X    if (paren && *regparse++ != ')') {
X	FAIL("unmatched ()");
X    } else if (!paren && *regparse != '\0') {
X	if (*regparse == ')') {
X	    FAIL("unmatched ()");
X	} else
X	    FAIL("junk on end");/* "Can't happen". */
X	/* NOTREACHED */
X    }
X    return (ret);
X}
X
X/*
X - regbranch - one alternative of an | operator
X *
X * Implements the concatenation operator.
X */
Xstatic char  *regbranch(flagp)
Xint            *flagp;
X{
X    register char  *ret;
X    register char  *chain;
X    register char  *latest;
X    int             flags;
X
X    *flagp = WORST;		/* Tentatively. */
X
X    ret = regnode(BRANCH);
X    chain = NULL;
X    while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
X	latest = regpiece(&flags);
X	if (latest == NULL)
X	    return (NULL);
X	*flagp |= flags & HASWIDTH;
X	if (chain == NULL)	/* First piece. */
X	    *flagp |= flags & SPSTART;
X	else
X	    regtail(chain, latest);
X	chain = latest;
X    }
X    if (chain == NULL)		/* Loop ran zero times. */
X	regnode(NOTHING);
X
X    return (ret);
X}
X
X/*
X - regpiece - something followed by possible [*]
X *
X * Note that the branching code sequence used for * is somewhat optimized:  
X * they use the same NOTHING node as both the endmarker for their branch 
X * list and the body of the last branch.  It might seem that this node could 
X * be dispensed with entirely, but the endmarker role is not redundant.
X */
Xstatic char *regpiece(flagp)
Xint            *flagp;
X{
X    register char  *ret;
X    register char   op;
X    register char  *nxt;
X    int             flags;
X
X    ret = regatom(&flags);
X    if (ret == NULL)
X	return (NULL);
X
X    op = *regparse;
X    if (!ISMULT(op)) {
X	*flagp = flags;
X	return (ret);
X    }
X    if (!(flags & HASWIDTH))
X	FAIL("* operand could be empty");
X    *flagp = (WORST | SPSTART);
X
X    if (op == '*' && (flags & SIMPLE))
X	reginsert(STAR, ret);
X    else if (op == '*') {
X	/* Emit x* as (x&|), where & means "self". */
X	reginsert(BRANCH, ret);	/* Either x */
X	regoptail(ret, regnode(BACK));	/* and loop */
X	regoptail(ret, ret);	/* back */
X	regtail(ret, regnode(BRANCH));	/* or */
X	regtail(ret, regnode(NOTHING));	/* null. */
X    } 
X    regparse++;
X    if (ISMULT(*regparse))
X	FAIL("nested *");
X
X    return (ret);
X}
X
X/*
X - regatom - the lowest level
X *
X * Optimization:  gobbles an entire sequence of ordinary characters so that
X * it can turn them into a single node, which is smaller to store and
X * faster to run.  Backslashed characters are exceptions, each becoming a
X * separate node; the code is simpler that way and it's not worth fixing.
X */
Xstatic char *regatom(flagp)
Xint            *flagp;
X{
X    register char  *ret;
X    int             flags;
X
X    *flagp = WORST;		/* Tentatively. */
X
X    switch (*regparse++) {
X    case '^':
X	ret = regnode(BOL);
X	break;
X    case '$':
X	ret = regnode(EOL);
X	break;
X    case '.':
X	ret = regnode(ANY);
X	*flagp |= HASWIDTH | SIMPLE;
X	break;
X    case '[':{
X	    register int    class;
X	    register int    classend;
X
X	    if (*regparse == '^') {	/* Complement of range. */
X		ret = regnode(ANYBUT);
X		regparse++;
X	    } else
X		ret = regnode(ANYOF);
X	    if (*regparse == ']' || *regparse == '-')
X		regc(*regparse++);
X	    while (*regparse != '\0' && *regparse != ']') {
X		if (*regparse == '-') {
X		    regparse++;
X		    if (*regparse == ']' || *regparse == '\0')
X			regc('-');
X		    else {
X			class = UCHARAT(regparse - 2) + 1;
X			classend = UCHARAT(regparse);
X			if (class > classend + 1)
X			    FAIL("invalid [] range");
X			for (; class <= classend; class++)
X			    regc(class);
X			regparse++;
X		    }
X		} else
X		    regc(*regparse++);
X	    }
X	    regc('\0');
X	    if (*regparse != ']')
X		FAIL("unmatched []");
X	    regparse++;
X	    *flagp |= HASWIDTH | SIMPLE;
X	}
X	break;
X    case '(':
X	ret = reg(1, &flags);
X	if (ret == NULL)
X	    return (NULL);
X	*flagp |= flags & (HASWIDTH | SPSTART);
X	break;
X    case '\0':
X    case '|':
X    case ')':
X	FAIL("internal urp");	/* Supposed to be caught earlier. */
X	break;
X    case '*':
X	FAIL("* follows nothing");
X	break;
X    case '\\':
X	if (*regparse == '\0')
X	    FAIL("trailing \\");
X	ret = regnode(EXACTLY);
X	regc(*regparse++);
X	regc('\0');
X	*flagp |= HASWIDTH | SIMPLE;
X	break;
X    default:{
X	    register int    len;
X	    register char   ender;
X
X	    regparse--;
X	    len = strcspn(regparse, META);
X	    if (len <= 0)
X		FAIL("internal disaster");
X	    ender = *(regparse + len);
X	    if (len > 1 && ISMULT(ender))
X		len--;		/* Back off clear of * operand. */
X	    *flagp |= HASWIDTH;
X	    if (len == 1)
X		*flagp |= SIMPLE;
X	    ret = regnode(EXACTLY);
X	    while (len > 0) {
X		regc(*regparse++);
X		len--;
X	    }
X	    regc('\0');
X	}
X	break;
X    }
X
X    return (ret);
X}
X
X/*
X - regnode - emit a node
X */
Xstatic char *regnode(op)
Xchar            op;
X{
X    register char  *ret;
X    register char  *ptr;
X
X    ret = regcode;
X    if (ret == &regdummy) {
X	regsize += 3;
X	return (ret);
X    }
X    ptr = ret;
X    *ptr++ = op;
X    *ptr++ = '\0';		/* Null "nxt" pointer. */
X    *ptr++ = '\0';
X    regcode = ptr;
X
X    return (ret);
X}
X
X/*
X - regc - emit (if appropriate) a byte of code
X */
Xstatic void regc(b)
Xchar            b;
X{
X    if (regcode != &regdummy)
X	*regcode++ = b;
X    else
X	regsize++;
X}
X
X/*
X - reginsert - insert an operator in front of already-emitted operand
X *
X * Means relocating the operand.
X */
Xstatic void reginsert(op, opnd)
Xchar            op;
Xchar           *opnd;
X{
X    register char  *src;
X    register char  *dst;
X    register char  *place;
X
X    if (regcode == &regdummy) {
X	regsize += 3;
X	return;
X    }
X    src = regcode;
X    regcode += 3;
X    dst = regcode;
X    while (src > opnd)
X	*--dst = *--src;
X
X    place = opnd;		/* Op node, where operand used to be. */
X    *place++ = op;
X    *place++ = '\0';
X    *place++ = '\0';
X}
X
X/*
X - regtail - set the next-pointer at the end of a node chain
X */
Xstatic void regtail(p, val)
Xchar           *p;
Xchar           *val;
X{
X    register char  *scan;
X    register char  *temp;
X    register int    offset;
X
X    if (p == &regdummy)
X	return;
X
X    /* Find last node. */
X    scan = p;
X    for (;;) {
X	temp = regnext(scan);
X	if (temp == NULL)
X	    break;
X	scan = temp;
X    }
X
X    if (OP(scan) == BACK)
X	offset = scan - val;
X    else
X	offset = val - scan;
X    *(scan + 1) = (offset >> 8) & 0377;
X    *(scan + 2) = offset & 0377;
X}
X
X/*
X - regoptail - regtail on operand of first argument; nop if operandless
NO_NEWS_IS_GOOD_NEWS
echo "End of part 12"
echo "File regexp.c is continued in part 13"
echo "13" > s2_seq_.tmp
exit 0
---
Kim F. Storm        storm@texas.dk        Tel +45 429 174 00
Texas Instruments, Marielundvej 46E, DK-2730 Herlev, Denmark
	  No news is good news, but nn is better!

-- 
Please send comp.sources.unix-related mail to rsalz@uunet.uu.net.
Use a domain-based address or give alternate paths, or you may lose out.