rsalz@uunet.uu.net (Rich Salz) (02/26/91)
Submitted-by: Paul Eggert <eggert@twinsun.com> Posting-number: Volume 24, Issue 22 Archive-name: gnudiff1.15/part07 #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh <file", e.g.. If this archive is complete, you # will see the following message at the end: # "End of archive 7 (of 8)." # Contents: regex.c1 # Wrapped by eggert@ata on Mon Jan 7 11:25:31 1991 PATH=/bin:/usr/bin:/usr/ucb ; export PATH if test -f 'regex.c1' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'regex.c1'\" else echo shar: Extracting \"'regex.c1'\" \(45472 characters\) sed "s/^X//" >'regex.c1' <<'END_OF_FILE' X/* Extended regular expression matching and search library. X Copyright (C) 1985, 1989-90 Free Software Foundation, Inc. X X This program is free software; you can redistribute it and/or modify X it under the terms of the GNU General Public License as published by X the Free Software Foundation; either version 1, or (at your option) X any later version. X X This program is distributed in the hope that it will be useful, X but WITHOUT ANY WARRANTY; without even the implied warranty of X MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the X GNU General Public License for more details. X X You should have received a copy of the GNU General Public License X along with this program; if not, write to the Free Software X Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ X X X/* To test, compile with -Dtest. This Dtestable feature turns this into X a self-contained program which reads a pattern, describes how it X compiles, then reads a string and searches for it. X X On the other hand, if you compile with both -Dtest and -Dcanned you X can run some tests we've already thought of. */ X X X#ifdef emacs X X/* The `emacs' switch turns on certain special matching commands X that make sense only in emacs. */ X X#include "config.h" X#include "lisp.h" X#include "buffer.h" X#include "syntax.h" X X#else /* not emacs */ X X#if defined (USG) || defined (STDC_HEADERS) X#ifndef BSTRING X#include <string.h> X#define bcopy(s,d,n) memcpy((d),(s),(n)) X#define bcmp(s1,s2,n) memcmp((s1),(s2),(n)) X#define bzero(s,n) memset((s),0,(n)) X#endif X#endif X X#ifdef STDC_HEADERS X#include <stdlib.h> X#else Xchar *malloc (); Xchar *realloc (); X#endif X X/* Make alloca work the best possible way. */ X#ifdef __GNUC__ X#define alloca __builtin_alloca X#else X#ifdef sparc X#include <alloca.h> X#else Xchar *alloca (); X#endif X#endif X X X/* Define the syntax stuff, so we can do the \<, \>, etc. */ X X/* This must be nonzero for the wordchar and notwordchar pattern X commands in re_match_2. */ X#ifndef Sword X#define Sword 1 X#endif X X#define SYNTAX(c) re_syntax_table[c] X X X#ifdef SYNTAX_TABLE X Xchar *re_syntax_table; X X#else /* not SYNTAX_TABLE */ X Xstatic char re_syntax_table[256]; X X Xstatic void Xinit_syntax_once () X{ X register int c; X static int done = 0; X X if (done) X return; X X bzero (re_syntax_table, sizeof re_syntax_table); X X for (c = 'a'; c <= 'z'; c++) X re_syntax_table[c] = Sword; X X for (c = 'A'; c <= 'Z'; c++) X re_syntax_table[c] = Sword; X X for (c = '0'; c <= '9'; c++) X re_syntax_table[c] = Sword; X X done = 1; X} X X#endif /* SYNTAX_TABLE */ X#endif /* emacs */ X X/* We write fatal error messages on standard error. */ X#include <stdio.h> X X/* isalpha(3) etc. are used for the character classes. */ X#include <ctype.h> X/* Sequents are missing isgraph. */ X#ifndef isgraph X#define isgraph(c) (isprint((c)) && !isspace((c))) X#endif X X/* Get the interface, including the syntax bits. */ X#include "regex.h" X X X/* These are the command codes that appear in compiled regular X expressions, one per byte. Some command codes are followed by X argument bytes. A command code can specify any interpretation X whatsoever for its arguments. Zero-bytes may appear in the compiled X regular expression. X X The value of `exactn' is needed in search.c (search_buffer) in emacs. X So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of X `exactn' we use here must also be 1. */ X Xenum regexpcode X { X unused=0, X exactn=1, /* Followed by one byte giving n, then by n literal bytes. */ X begline, /* Fail unless at beginning of line. */ X endline, /* Fail unless at end of line. */ X jump, /* Followed by two bytes giving relative address to jump to. */ X on_failure_jump, /* Followed by two bytes giving relative address of X place to resume at in case of failure. */ X finalize_jump, /* Throw away latest failure point and then jump to X address. */ X maybe_finalize_jump, /* Like jump but finalize if safe to do so. X This is used to jump back to the beginning X of a repeat. If the command that follows X this jump is clearly incompatible with the X one at the beginning of the repeat, such that X we can be sure that there is no use backtracking X out of repetitions already completed, X then we finalize. */ X dummy_failure_jump, /* Jump, and push a dummy failure point. This X failure point will be thrown away if an attempt X is made to use it for a failure. A + construct X makes this before the first repeat. Also X use it as an intermediary kind of jump when X compiling an or construct. */ X succeed_n, /* Used like on_failure_jump except has to succeed n times; X then gets turned into an on_failure_jump. The relative X address following it is useless until then. The X address is followed by two bytes containing n. */ X jump_n, /* Similar to jump, but jump n times only; also the relative X address following is in turn followed by yet two more bytes X containing n. */ X set_number_at, /* Set the following relative location to the X subsequent number. */ X anychar, /* Matches any (more or less) one character. */ X charset, /* Matches any one char belonging to specified set. X First following byte is number of bitmap bytes. X Then come bytes for a bitmap saying which chars are in. X Bits in each byte are ordered low-bit-first. X A character is in the set if its bit is 1. X A character too large to have a bit in the map X is automatically not in the set. */ X charset_not, /* Same parameters as charset, but match any character X that is not one of those specified. */ X start_memory, /* Start remembering the text that is matched, for X storing in a memory register. Followed by one X byte containing the register number. Register numbers X must be in the range 0 through RE_NREGS. */ X stop_memory, /* Stop remembering the text that is matched X and store it in a memory register. Followed by X one byte containing the register number. Register X numbers must be in the range 0 through RE_NREGS. */ X duplicate, /* Match a duplicate of something remembered. X Followed by one byte containing the index of the memory X register. */ X before_dot, /* Succeeds if before point. */ X at_dot, /* Succeeds if at point. */ X after_dot, /* Succeeds if after point. */ X begbuf, /* Succeeds if at beginning of buffer. */ X endbuf, /* Succeeds if at end of buffer. */ X wordchar, /* Matches any word-constituent character. */ X notwordchar, /* Matches any char that is not a word-constituent. */ X wordbeg, /* Succeeds if at word beginning. */ X wordend, /* Succeeds if at word end. */ X wordbound, /* Succeeds if at a word boundary. */ X notwordbound,/* Succeeds if not at a word boundary. */ X syntaxspec, /* Matches any character whose syntax is specified. X followed by a byte which contains a syntax code, X e.g., Sword. */ X notsyntaxspec /* Matches any character whose syntax differs from X that specified. */ X }; X X X/* Number of failure points to allocate space for initially, X when matching. If this number is exceeded, more space is allocated, X so it is not a hard limit. */ X X#ifndef NFAILURES X#define NFAILURES 80 X#endif X X#ifdef CHAR_UNSIGNED X#define SIGN_EXTEND_CHAR(c) ((c)>(char)127?(c)-256:(c)) /* for IBM RT */ X#endif X#ifndef SIGN_EXTEND_CHAR X#define SIGN_EXTEND_CHAR(x) (x) X#endif X X X/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ X#define STORE_NUMBER(destination, number) \ X { (destination)[0] = (number) & 0377; \ X (destination)[1] = (number) >> 8; } X X/* Same as STORE_NUMBER, except increment the destination pointer to X the byte after where the number is stored. Watch out that values for X DESTINATION such as p + 1 won't work, whereas p will. */ X#define STORE_NUMBER_AND_INCR(destination, number) \ X { STORE_NUMBER(destination, number); \ X (destination) += 2; } X X X/* Put into DESTINATION a number stored in two contingous bytes starting X at SOURCE. */ X#define EXTRACT_NUMBER(destination, source) \ X { (destination) = *(source) & 0377; \ X (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; } X X/* Same as EXTRACT_NUMBER, except increment the pointer for source to X point to second byte of SOURCE. Note that SOURCE has to be a value X such as p, not, e.g., p + 1. */ X#define EXTRACT_NUMBER_AND_INCR(destination, source) \ X { EXTRACT_NUMBER (destination, source); \ X (source) += 2; } X X X/* Specify the precise syntax of regexps for compilation. This provides X for compatibility for various utilities which historically have X different, incompatible syntaxes. X X The argument SYNTAX is a bit-mask comprised of the various bits X defined in regex.h. */ X Xint Xre_set_syntax (syntax) X int syntax; X{ X int ret; X X ret = obscure_syntax; X obscure_syntax = syntax; X return ret; X} X X/* Set by re_set_syntax to the current regexp syntax to recognize. */ Xint obscure_syntax = 0; X X X X/* Macros for re_compile_pattern, which is found below these definitions. */ X X#define CHAR_CLASS_MAX_LENGTH 6 X X/* Fetch the next character in the uncompiled pattern, translating it if X necessary. */ X#define PATFETCH(c) \ X {if (p == pend) goto end_of_pattern; \ X c = * (unsigned char *) p++; \ X if (translate) c = translate[c]; } X X/* Fetch the next character in the uncompiled pattern, with no X translation. */ X#define PATFETCH_RAW(c) \ X {if (p == pend) goto end_of_pattern; \ X c = * (unsigned char *) p++; } X X#define PATUNFETCH p-- X X X/* If the buffer isn't allocated when it comes in, use this. */ X#define INIT_BUF_SIZE 28 X X/* Make sure we have at least N more bytes of space in buffer. */ X#define GET_BUFFER_SPACE(n) \ X { \ X while (b - bufp->buffer + (n) >= bufp->allocated) \ X EXTEND_BUFFER; \ X } X X/* Make sure we have one more byte of buffer space and then add CH to it. */ X#define BUFPUSH(ch) \ X { \ X GET_BUFFER_SPACE (1); \ X *b++ = (char) (ch); \ X } X X/* Extend the buffer by twice its current size via reallociation and X reset the pointers that pointed into the old allocation to point to X the correct places in the new allocation. If extending the buffer X results in it being larger than 1 << 16, then flag memory exhausted. */ X#define EXTEND_BUFFER \ X { char *old_buffer = bufp->buffer; \ X if (bufp->allocated == (1L<<16)) goto too_big; \ X bufp->allocated *= 2; \ X if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \ X bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated); \ X if (bufp->buffer == 0) \ X goto memory_exhausted; \ X b = (b - old_buffer) + bufp->buffer; \ X if (fixup_jump) \ X fixup_jump = (fixup_jump - old_buffer) + bufp->buffer; \ X if (laststart) \ X laststart = (laststart - old_buffer) + bufp->buffer; \ X begalt = (begalt - old_buffer) + bufp->buffer; \ X if (pending_exact) \ X pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ X } X X/* Set the bit for character C in a character set list. */ X#define SET_LIST_BIT(c) (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) X X/* Get the next unsigned number in the uncompiled pattern. */ X#define GET_UNSIGNED_NUMBER(num) \ X { if (p != pend) \ X { \ X PATFETCH (c); \ X while (isdigit (c)) \ X { \ X if (num < 0) \ X num = 0; \ X num = num * 10 + c - '0'; \ X if (p == pend) \ X break; \ X PATFETCH (c); \ X } \ X } \ X } X X/* Subroutines for re_compile_pattern. */ Xstatic void store_jump (), insert_jump (), store_jump_n (), X insert_jump_n (), insert_op_2 (); X X X/* re_compile_pattern takes a regular-expression string X and converts it into a buffer full of byte commands for matching. X X PATTERN is the address of the pattern string X SIZE is the length of it. X BUFP is a struct re_pattern_buffer * which points to the info X on where to store the byte commands. X This structure contains a char * which points to the X actual space, which should have been obtained with malloc. X re_compile_pattern may use realloc to grow the buffer space. X X The number of bytes of commands can be found out by looking in X the `struct re_pattern_buffer' that bufp pointed to, after X re_compile_pattern returns. */ X Xchar * Xre_compile_pattern (pattern, size, bufp) X char *pattern; X int size; X struct re_pattern_buffer *bufp; X{ X register char *b = bufp->buffer; X register char *p = pattern; X char *pend = pattern + size; X register unsigned c, c1; X char *p1; X unsigned char *translate = (unsigned char *) bufp->translate; X X /* Address of the count-byte of the most recently inserted `exactn' X command. This makes it possible to tell whether a new exact-match X character can be added to that command or requires a new `exactn' X command. */ X X char *pending_exact = 0; X X /* Address of the place where a forward-jump should go to the end of X the containing expression. Each alternative of an `or', except the X last, ends with a forward-jump of this sort. */ X X char *fixup_jump = 0; X X /* Address of start of the most recently finished expression. X This tells postfix * where to find the start of its operand. */ X X char *laststart = 0; X X /* In processing a repeat, 1 means zero matches is allowed. */ X X char zero_times_ok; X X /* In processing a repeat, 1 means many matches is allowed. */ X X char many_times_ok; X X /* Address of beginning of regexp, or inside of last \(. */ X X char *begalt = b; X X /* In processing an interval, at least this many matches must be made. */ X int lower_bound; X X /* In processing an interval, at most this many matches can be made. */ X int upper_bound; X X /* Place in pattern (i.e., the {) to which to go back if the interval X is invalid. */ X char *beg_interval = 0; X X /* Stack of information saved by \( and restored by \). X Four stack elements are pushed by each \(: X First, the value of b. X Second, the value of fixup_jump. X Third, the value of regnum. X Fourth, the value of begalt. */ X X int stackb[40]; X int *stackp = stackb; X int *stacke = stackb + 40; X int *stackt; X X /* Counts \('s as they are encountered. Remembered for the matching \), X where it becomes the register number to put in the stop_memory X command. */ X X int regnum = 1; X X bufp->fastmap_accurate = 0; X X#ifndef emacs X#ifndef SYNTAX_TABLE X /* Initialize the syntax table. */ X init_syntax_once(); X#endif X#endif X X if (bufp->allocated == 0) X { X bufp->allocated = INIT_BUF_SIZE; X if (bufp->buffer) X /* EXTEND_BUFFER loses when bufp->allocated is 0. */ X bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE); X else X /* Caller did not allocate a buffer. Do it for them. */ X bufp->buffer = (char *) malloc (INIT_BUF_SIZE); X if (!bufp->buffer) goto memory_exhausted; X begalt = b = bufp->buffer; X } X X while (p != pend) X { X PATFETCH (c); X X switch (c) X { X case '$': X { X char *p1 = p; X /* When testing what follows the $, X look past the \-constructs that don't consume anything. */ X if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) X while (p1 != pend) X { X if (*p1 == '\\' && p1 + 1 != pend X && (p1[1] == '<' || p1[1] == '>' X || p1[1] == '`' || p1[1] == '\'' X#ifdef emacs X || p1[1] == '=' X#endif X || p1[1] == 'b' || p1[1] == 'B')) X p1 += 2; X else X break; X } X if (obscure_syntax & RE_TIGHT_VBAR) X { X if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend) X goto normal_char; X /* Make operand of last vbar end before this `$'. */ X if (fixup_jump) X store_jump (fixup_jump, jump, b); X fixup_jump = 0; X BUFPUSH (endline); X break; X } X /* $ means succeed if at end of line, but only in special contexts. X If validly in the middle of a pattern, it is a normal character. */ X X if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend) X goto invalid_pattern; X if (p1 == pend || *p1 == '\n' X || (obscure_syntax & RE_CONTEXT_INDEP_OPS) X || (obscure_syntax & RE_NO_BK_PARENS X ? *p1 == ')' X : *p1 == '\\' && p1[1] == ')') X || (obscure_syntax & RE_NO_BK_VBAR X ? *p1 == '|' X : *p1 == '\\' && p1[1] == '|')) X { X BUFPUSH (endline); X break; X } X goto normal_char; X } X case '^': X /* ^ means succeed if at beg of line, but only if no preceding X pattern. */ X X if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart) X goto invalid_pattern; X if (laststart && p - 2 >= pattern && p[-2] != '\n' X && !(obscure_syntax & RE_CONTEXT_INDEP_OPS)) X goto normal_char; X if (obscure_syntax & RE_TIGHT_VBAR) X { X if (p != pattern + 1 X && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) X goto normal_char; X BUFPUSH (begline); X begalt = b; X } X else X BUFPUSH (begline); X break; X X case '+': X case '?': X if ((obscure_syntax & RE_BK_PLUS_QM) X || (obscure_syntax & RE_LIMITED_OPS)) X goto normal_char; X handle_plus: X case '*': X /* If there is no previous pattern, char not special. */ X if (!laststart) X { X if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) X goto invalid_pattern; X else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) X goto normal_char; X } X /* If there is a sequence of repetition chars, X collapse it down to just one. */ X zero_times_ok = 0; X many_times_ok = 0; X while (1) X { X zero_times_ok |= c != '+'; X many_times_ok |= c != '?'; X if (p == pend) X break; X PATFETCH (c); X if (c == '*') X ; X else if (!(obscure_syntax & RE_BK_PLUS_QM) X && (c == '+' || c == '?')) X ; X else if ((obscure_syntax & RE_BK_PLUS_QM) X && c == '\\') X { X int c1; X PATFETCH (c1); X if (!(c1 == '+' || c1 == '?')) X { X PATUNFETCH; X PATUNFETCH; X break; X } X c = c1; X } X else X { X PATUNFETCH; X break; X } X } X X /* Star, etc. applied to an empty pattern is equivalent X to an empty pattern. */ X if (!laststart) X break; X X /* Now we know whether or not zero matches is allowed X and also whether or not two or more matches is allowed. */ X if (many_times_ok) X { X /* If more than one repetition is allowed, put in at the X end a backward relative jump from b to before the next X jump we're going to put in below (which jumps from X laststart to after this jump). */ X GET_BUFFER_SPACE (3); X store_jump (b, maybe_finalize_jump, laststart - 3); X b += 3; /* Because store_jump put stuff here. */ X } X /* On failure, jump from laststart to b + 3, which will be the X end of the buffer after this jump is inserted. */ X GET_BUFFER_SPACE (3); X insert_jump (on_failure_jump, laststart, b + 3, b); X pending_exact = 0; X b += 3; X if (!zero_times_ok) X { X /* At least one repetition is required, so insert a X dummy-failure before the initial on-failure-jump X instruction of the loop. This effects a skip over that X instruction the first time we hit that loop. */ X GET_BUFFER_SPACE (6); X insert_jump (dummy_failure_jump, laststart, laststart + 6, b); X b += 3; X } X break; X X case '.': X laststart = b; X BUFPUSH (anychar); X break; X X case '[': X if (p == pend) X goto invalid_pattern; X while (b - bufp->buffer X > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH) X EXTEND_BUFFER; X X laststart = b; X if (*p == '^') X { X BUFPUSH (charset_not); X p++; X } X else X BUFPUSH (charset); X p1 = p; X X BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH); X /* Clear the whole map */ X bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); X X if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not) X SET_LIST_BIT ('\n'); X X X /* Read in characters and ranges, setting map bits. */ X while (1) X { X PATFETCH (c); X X /* If set, \ escapes characters when inside [...]. */ X if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\') X { X PATFETCH(c1); X SET_LIST_BIT (c1); X continue; X } X if (c == ']') X { X if (p == p1 + 1) X { X /* If this is an empty bracket expression. */ X if ((obscure_syntax & RE_NO_EMPTY_BRACKETS) X && p == pend) X goto invalid_pattern; X } X else X /* Stop if this isn't merely a ] inside a bracket X expression, but rather the end of a bracket X expression. */ X break; X } X /* Get a range. */ X if (p[0] == '-' && p[1] != ']') X { X PATFETCH (c1); X PATFETCH (c1); X X if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1) X goto invalid_pattern; X X if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END) X && c1 == '-' && *p != ']') X goto invalid_pattern; X X while (c <= c1) X { X SET_LIST_BIT (c); X c++; X } X } X else if ((obscure_syntax & RE_CHAR_CLASSES) X && c == '[' && p[0] == ':') X { X /* Longest valid character class word has six characters. */ X char str[CHAR_CLASS_MAX_LENGTH]; X PATFETCH (c); X c1 = 0; X /* If no ] at end. */ X if (p == pend) X goto invalid_pattern; X while (1) X { X /* Don't translate the ``character class'' characters. */ X PATFETCH_RAW (c); X if (c == ':' || c == ']' || p == pend X || c1 == CHAR_CLASS_MAX_LENGTH) X break; X str[c1++] = c; X } X str[c1] = '\0'; X if (p == pend X || c == ']' /* End of the bracket expression. */ X || p[0] != ']' X || p + 1 == pend X || (strcmp (str, "alpha") != 0 X && strcmp (str, "upper") != 0 X && strcmp (str, "lower") != 0 X && strcmp (str, "digit") != 0 X && strcmp (str, "alnum") != 0 X && strcmp (str, "xdigit") != 0 X && strcmp (str, "space") != 0 X && strcmp (str, "print") != 0 X && strcmp (str, "punct") != 0 X && strcmp (str, "graph") != 0 X && strcmp (str, "cntrl") != 0)) X { X /* Undo the ending character, the letters, and leave X the leading : and [ (but set bits for them). */ X c1++; X while (c1--) X PATUNFETCH; X SET_LIST_BIT ('['); X SET_LIST_BIT (':'); X } X else X { X /* The ] at the end of the character class. */ X PATFETCH (c); X if (c != ']') X goto invalid_pattern; X for (c = 0; c < (1 << BYTEWIDTH); c++) X { X if ((strcmp (str, "alpha") == 0 && isalpha (c)) X || (strcmp (str, "upper") == 0 && isupper (c)) X || (strcmp (str, "lower") == 0 && islower (c)) X || (strcmp (str, "digit") == 0 && isdigit (c)) X || (strcmp (str, "alnum") == 0 && isalnum (c)) X || (strcmp (str, "xdigit") == 0 && isxdigit (c)) X || (strcmp (str, "space") == 0 && isspace (c)) X || (strcmp (str, "print") == 0 && isprint (c)) X || (strcmp (str, "punct") == 0 && ispunct (c)) X || (strcmp (str, "graph") == 0 && isgraph (c)) X || (strcmp (str, "cntrl") == 0 && iscntrl (c))) X SET_LIST_BIT (c); X } X } X } X else X SET_LIST_BIT (c); X } X X /* Discard any character set/class bitmap bytes that are all X 0 at the end of the map. Decrement the map-length byte too. */ X while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) X b[-1]--; X b += b[-1]; X break; X X case '(': X if (! (obscure_syntax & RE_NO_BK_PARENS)) X goto normal_char; X else X goto handle_open; X X case ')': X if (! (obscure_syntax & RE_NO_BK_PARENS)) X goto normal_char; X else X goto handle_close; X X case '\n': X if (! (obscure_syntax & RE_NEWLINE_OR)) X goto normal_char; X else X goto handle_bar; X X case '|': X if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) X && (! laststart || p == pend)) X goto invalid_pattern; X else if (! (obscure_syntax & RE_NO_BK_VBAR)) X goto normal_char; X else X goto handle_bar; X X case '{': X if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES) X && (obscure_syntax & RE_INTERVALS))) X goto normal_char; X else X goto handle_interval; X X case '\\': X if (p == pend) goto invalid_pattern; X PATFETCH_RAW (c); X switch (c) X { X case '(': X if (obscure_syntax & RE_NO_BK_PARENS) X goto normal_backsl; X handle_open: X if (stackp == stacke) goto nesting_too_deep; X X /* Laststart should point to the start_memory that we are about X to push (unless the pattern has RE_NREGS or more ('s). */ X *stackp++ = b - bufp->buffer; X if (regnum < RE_NREGS) X { X BUFPUSH (start_memory); X BUFPUSH (regnum); X } X *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0; X *stackp++ = regnum++; X *stackp++ = begalt - bufp->buffer; X fixup_jump = 0; X laststart = 0; X begalt = b; X break; X X case ')': X if (obscure_syntax & RE_NO_BK_PARENS) X goto normal_backsl; X handle_close: X if (stackp == stackb) goto unmatched_close; X begalt = *--stackp + bufp->buffer; X if (fixup_jump) X store_jump (fixup_jump, jump, b); X if (stackp[-1] < RE_NREGS) X { X BUFPUSH (stop_memory); X BUFPUSH (stackp[-1]); X } X stackp -= 2; X fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0; X laststart = *--stackp + bufp->buffer; X break; X X case '|': X if ((obscure_syntax & RE_LIMITED_OPS) X || (obscure_syntax & RE_NO_BK_VBAR)) X goto normal_backsl; X handle_bar: X if (obscure_syntax & RE_LIMITED_OPS) X goto normal_char; X /* Insert before the previous alternative a jump which X jumps to this alternative if the former fails. */ X GET_BUFFER_SPACE (6); X insert_jump (on_failure_jump, begalt, b + 6, b); X pending_exact = 0; X b += 3; X /* The alternative before the previous alternative has a X jump after it which gets executed if it gets matched. X Adjust that jump so it will jump to the previous X alternative's analogous jump (put in below, which in X turn will jump to the next (if any) alternative's such X jump, etc.). The last such jump jumps to the correct X final destination. */ X if (fixup_jump) X store_jump (fixup_jump, jump, b); X X /* Leave space for a jump after previous alternative---to be X filled in later. */ X fixup_jump = b; X b += 3; X X laststart = 0; X begalt = b; X break; X X case '{': X if (! (obscure_syntax & RE_INTERVALS) X /* Let \{ be a literal. */ X || ((obscure_syntax & RE_INTERVALS) X && (obscure_syntax & RE_NO_BK_CURLY_BRACES)) X /* If it's the string "\{". */ X || (p - 2 == pattern && p == pend)) X goto normal_backsl; X handle_interval: X beg_interval = p - 1; /* The {. */ X /* If there is no previous pattern, this isn't an interval. */ X if (!laststart) X { X if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) X goto invalid_pattern; X else X goto normal_backsl; X } X /* It also isn't an interval if not preceded by an re X matching a single character or subexpression, or if X the current type of intervals can't handle back X references and the previous thing is a back reference. */ X if (! (*laststart == anychar X || *laststart == charset X || *laststart == charset_not X || *laststart == start_memory X || (*laststart == exactn && laststart[1] == 1) X || (! (obscure_syntax & RE_NO_BK_REFS) X && *laststart == duplicate))) X { X if (obscure_syntax & RE_NO_BK_CURLY_BRACES) X goto normal_char; X X /* Posix extended syntax is handled in previous X statement; this is for Posix basic syntax. */ X if (obscure_syntax & RE_INTERVALS) X goto invalid_pattern; X X goto normal_backsl; X } X lower_bound = -1; /* So can see if are set. */ X upper_bound = -1; X GET_UNSIGNED_NUMBER (lower_bound); X if (c == ',') X { X GET_UNSIGNED_NUMBER (upper_bound); X if (upper_bound < 0) X upper_bound = RE_DUP_MAX; X } X if (upper_bound < 0) X upper_bound = lower_bound; X if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES)) X { X if (c != '\\') X goto invalid_pattern; X PATFETCH (c); X } X if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX X || lower_bound > upper_bound X || ((obscure_syntax & RE_NO_BK_CURLY_BRACES) X && p != pend && *p == '{')) X { X if (obscure_syntax & RE_NO_BK_CURLY_BRACES) X goto unfetch_interval; X else X goto invalid_pattern; X } X X /* If upper_bound is zero, don't want to succeed at all; X jump from laststart to b + 3, which will be the end of X the buffer after this jump is inserted. */ X X if (upper_bound == 0) X { X GET_BUFFER_SPACE (3); X insert_jump (jump, laststart, b + 3, b); X b += 3; X } X X /* Otherwise, after lower_bound number of succeeds, jump X to after the jump_n which will be inserted at the end X of the buffer, and insert that jump_n. */ X else X { /* Set to 5 if only one repetition is allowed and X hence no jump_n is inserted at the current end of X the buffer; then only space for the succeed_n is X needed. Otherwise, need space for both the X succeed_n and the jump_n. */ X X unsigned slots_needed = upper_bound == 1 ? 5 : 10; X X GET_BUFFER_SPACE (slots_needed); X /* Initialize the succeed_n to n, even though it will X be set by its attendant set_number_at, because X re_compile_fastmap will need to know it. Jump to X what the end of buffer will be after inserting X this succeed_n and possibly appending a jump_n. */ X insert_jump_n (succeed_n, laststart, b + slots_needed, X b, lower_bound); X b += 5; /* Just increment for the succeed_n here. */ X X /* More than one repetition is allowed, so put in at X the end of the buffer a backward jump from b to the X succeed_n we put in above. By the time we've gotten X to this jump when matching, we'll have matched once X already, so jump back only upper_bound - 1 times. */ X X if (upper_bound > 1) X { X store_jump_n (b, jump_n, laststart, upper_bound - 1); X b += 5; X /* When hit this when matching, reset the X preceding jump_n's n to upper_bound - 1. */ X BUFPUSH (set_number_at); X GET_BUFFER_SPACE (2); X STORE_NUMBER_AND_INCR (b, -5); X STORE_NUMBER_AND_INCR (b, upper_bound - 1); X } X /* When hit this when matching, set the succeed_n's n. */ X GET_BUFFER_SPACE (5); X insert_op_2 (set_number_at, laststart, b, 5, lower_bound); X b += 5; X } X pending_exact = 0; X beg_interval = 0; X break; X X X unfetch_interval: X /* If an invalid interval, match the characters as literals. */ X if (beg_interval) X p = beg_interval; X else X { X fprintf (stderr, X "regex: no interval beginning to which to backtrack.\n"); X exit (1); X } X X beg_interval = 0; X PATFETCH (c); /* normal_char expects char in `c'. */ X goto normal_char; X break; X X#ifdef emacs X case '=': X BUFPUSH (at_dot); X break; X X case 's': X laststart = b; X BUFPUSH (syntaxspec); X PATFETCH (c); X BUFPUSH (syntax_spec_code[c]); X break; X X case 'S': X laststart = b; X BUFPUSH (notsyntaxspec); X PATFETCH (c); X BUFPUSH (syntax_spec_code[c]); X break; X#endif /* emacs */ X X case 'w': X laststart = b; X BUFPUSH (wordchar); X break; X X case 'W': X laststart = b; X BUFPUSH (notwordchar); X break; X X case '<': X BUFPUSH (wordbeg); X break; X X case '>': X BUFPUSH (wordend); X break; X X case 'b': X BUFPUSH (wordbound); X break; X X case 'B': X BUFPUSH (notwordbound); X break; X X case '`': X BUFPUSH (begbuf); X break; X X case '\'': X BUFPUSH (endbuf); X break; X X case '1': X case '2': X case '3': X case '4': X case '5': X case '6': X case '7': X case '8': X case '9': X if (obscure_syntax & RE_NO_BK_REFS) X goto normal_char; X c1 = c - '0'; X if (c1 >= regnum) X { X if (obscure_syntax & RE_NO_EMPTY_BK_REF) X goto invalid_pattern; X else X goto normal_char; X } X /* Can't back reference to a subexpression if inside of it. */ X for (stackt = stackp - 2; stackt > stackb; stackt -= 4) X if (*stackt == c1) X goto normal_char; X laststart = b; X BUFPUSH (duplicate); X BUFPUSH (c1); X break; X X case '+': X case '?': X if (obscure_syntax & RE_BK_PLUS_QM) X goto handle_plus; X else X goto normal_backsl; X break; X X default: X normal_backsl: X /* You might think it would be useful for \ to mean X not to translate; but if we don't translate it X it will never match anything. */ X if (translate) c = translate[c]; X goto normal_char; X } X break; X X default: X normal_char: /* Expects the character in `c'. */ X if (!pending_exact || pending_exact + *pending_exact + 1 != b X || *pending_exact == 0177 || *p == '*' || *p == '^' X || ((obscure_syntax & RE_BK_PLUS_QM) X ? *p == '\\' && (p[1] == '+' || p[1] == '?') X : (*p == '+' || *p == '?')) X || ((obscure_syntax & RE_INTERVALS) X && ((obscure_syntax & RE_NO_BK_CURLY_BRACES) X ? *p == '{' X : (p[0] == '\\' && p[1] == '{')))) X { X laststart = b; X BUFPUSH (exactn); X pending_exact = b; X BUFPUSH (0); X } X BUFPUSH (c); X (*pending_exact)++; X } X } X X if (fixup_jump) X store_jump (fixup_jump, jump, b); X X if (stackp != stackb) goto unmatched_open; X X bufp->used = b - bufp->buffer; X return 0; X X invalid_pattern: X return "Invalid regular expression"; X X unmatched_open: X return "Unmatched \\("; X X unmatched_close: X return "Unmatched \\)"; X X end_of_pattern: X return "Premature end of regular expression"; X X nesting_too_deep: X return "Nesting too deep"; X X too_big: X return "Regular expression too big"; X X memory_exhausted: X return "Memory exhausted"; X} X X X/* Store a jump of the form <OPCODE> <relative address>. X Store in the location FROM a jump operation to jump to relative X address FROM - TO. OPCODE is the opcode to store. */ X Xstatic void Xstore_jump (from, opcode, to) X char *from, *to; X char opcode; X{ X from[0] = opcode; X STORE_NUMBER(from + 1, to - (from + 3)); X} X X X/* Open up space before char FROM, and insert there a jump to TO. X CURRENT_END gives the end of the storage not in use, so we know X how much data to copy up. OP is the opcode of the jump to insert. X X If you call this function, you must zero out pending_exact. */ X Xstatic void Xinsert_jump (op, from, to, current_end) X char op; X char *from, *to, *current_end; X{ X register char *pfrom = current_end; /* Copy from here... */ X register char *pto = current_end + 3; /* ...to here. */ X X while (pfrom != from) X *--pto = *--pfrom; X store_jump (from, op, to); X} X X X/* Store a jump of the form <opcode> <relative address> <n> . X X Store in the location FROM a jump operation to jump to relative X address FROM - TO. OPCODE is the opcode to store, N is a number the X jump uses, say, to decide how many times to jump. X X If you call this function, you must zero out pending_exact. */ X Xstatic void Xstore_jump_n (from, opcode, to, n) X char *from, *to; X char opcode; X unsigned n; X{ X from[0] = opcode; X STORE_NUMBER (from + 1, to - (from + 3)); X STORE_NUMBER (from + 3, n); X} X X X/* Similar to insert_jump, but handles a jump which needs an extra X number to handle minimum and maximum cases. Open up space at X location FROM, and insert there a jump to TO. CURRENT_END gives the X end of the storage in use, so we know how much data to copy up. OP is X the opcode of the jump to insert. X X If you call this function, you must zero out pending_exact. */ X Xstatic void Xinsert_jump_n (op, from, to, current_end, n) X char op; X char *from, *to, *current_end; X unsigned n; X{ X register char *pfrom = current_end; /* Copy from here... */ X register char *pto = current_end + 5; /* ...to here. */ X X while (pfrom != from) X *--pto = *--pfrom; X store_jump_n (from, op, to, n); X} X X X/* Open up space at location THERE, and insert operation OP followed by X NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so X we know how much data to copy up. X X If you call this function, you must zero out pending_exact. */ X Xstatic void Xinsert_op_2 (op, there, current_end, num_1, num_2) X char op; X char *there, *current_end; X int num_1, num_2; X{ X register char *pfrom = current_end; /* Copy from here... */ X register char *pto = current_end + 5; /* ...to here. */ X X while (pfrom != there) X *--pto = *--pfrom; X X there[0] = op; X STORE_NUMBER (there + 1, num_1); X STORE_NUMBER (there + 3, num_2); X} X X X X/* Given a pattern, compute a fastmap from it. The fastmap records X which of the (1 << BYTEWIDTH) possible characters can start a string X that matches the pattern. This fastmap is used by re_search to skip X quickly over totally implausible text. X X The caller must supply the address of a (1 << BYTEWIDTH)-byte data X area as bufp->fastmap. X The other components of bufp describe the pattern to be used. */ X Xvoid Xre_compile_fastmap (bufp) X struct re_pattern_buffer *bufp; X{ X unsigned char *pattern = (unsigned char *) bufp->buffer; X int size = bufp->used; X register char *fastmap = bufp->fastmap; X register unsigned char *p = pattern; X register unsigned char *pend = pattern + size; X register int j, k; X unsigned char *translate = (unsigned char *) bufp->translate; X X unsigned char *stackb[NFAILURES]; X unsigned char **stackp = stackb; X X unsigned is_a_succeed_n; X X bzero (fastmap, (1 << BYTEWIDTH)); X bufp->fastmap_accurate = 1; X bufp->can_be_null = 0; X X while (p) X { X is_a_succeed_n = 0; X if (p == pend) X { X bufp->can_be_null = 1; X break; X } X#ifdef SWITCH_ENUM_BUG X switch ((int) ((enum regexpcode) *p++)) X#else X switch ((enum regexpcode) *p++) X#endif X { X case exactn: X if (translate) X fastmap[translate[p[1]]] = 1; X else X fastmap[p[1]] = 1; X break; X X case begline: X case before_dot: X case at_dot: X case after_dot: X case begbuf: X case endbuf: X case wordbound: X case notwordbound: X case wordbeg: X case wordend: X continue; X X case endline: X if (translate) X fastmap[translate['\n']] = 1; X else X fastmap['\n'] = 1; X X if (bufp->can_be_null != 1) X bufp->can_be_null = 2; X break; X X case jump_n: X case finalize_jump: X case maybe_finalize_jump: X case jump: X case dummy_failure_jump: X EXTRACT_NUMBER_AND_INCR (j, p); X p += j; X if (j > 0) X continue; X /* Jump backward reached implies we just went through X the body of a loop and matched nothing. X Opcode jumped to should be an on_failure_jump. X Just treat it like an ordinary jump. X For a * loop, it has pushed its failure point already; X If so, discard that as redundant. */ X X if ((enum regexpcode) *p != on_failure_jump X && (enum regexpcode) *p != succeed_n) X continue; X p++; X EXTRACT_NUMBER_AND_INCR (j, p); X p += j; X if (stackp != stackb && *stackp == p) X stackp--; X continue; X X case on_failure_jump: X handle_on_failure_jump: X EXTRACT_NUMBER_AND_INCR (j, p); X *++stackp = p + j; X if (is_a_succeed_n) X EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ X continue; X X case succeed_n: X is_a_succeed_n = 1; X /* Get to the number of times to succeed. */ X p += 2; X /* Increment p past the n for when k != 0. */ X EXTRACT_NUMBER_AND_INCR (k, p); X if (k == 0) X { X p -= 4; X goto handle_on_failure_jump; X } X continue; X X case set_number_at: X p += 4; X continue; X X case start_memory: X case stop_memory: X p++; X continue; X X case duplicate: X bufp->can_be_null = 1; X fastmap['\n'] = 1; X case anychar: X for (j = 0; j < (1 << BYTEWIDTH); j++) X if (j != '\n') X fastmap[j] = 1; X if (bufp->can_be_null) X return; X /* Don't return; check the alternative paths X so we can set can_be_null if appropriate. */ X break; X X case wordchar: X for (j = 0; j < (1 << BYTEWIDTH); j++) X if (SYNTAX (j) == Sword) X fastmap[j] = 1; X break; X X case notwordchar: X for (j = 0; j < (1 << BYTEWIDTH); j++) X if (SYNTAX (j) != Sword) X fastmap[j] = 1; X break; X X#ifdef emacs X case syntaxspec: X k = *p++; X for (j = 0; j < (1 << BYTEWIDTH); j++) X if (SYNTAX (j) == (enum syntaxcode) k) X fastmap[j] = 1; X break; X X case notsyntaxspec: X k = *p++; X for (j = 0; j < (1 << BYTEWIDTH); j++) X if (SYNTAX (j) != (enum syntaxcode) k) X fastmap[j] = 1; X break; X#endif /* not emacs */ X X case charset: X for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) X if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) X { X if (translate) X fastmap[translate[j]] = 1; X else X fastmap[j] = 1; X } X break; X X case charset_not: X /* Chars beyond end of map must be allowed */ X for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) X if (translate) X fastmap[translate[j]] = 1; X else X fastmap[j] = 1; X X for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) X if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) X { X if (translate) X fastmap[translate[j]] = 1; X else X fastmap[j] = 1; X } X break; X } X X /* Get here means we have successfully found the possible starting X characters of one path of the pattern. We need not follow this X path any farther. Instead, look at the next alternative X remembered in the stack. */ X if (stackp != stackb) X p = *stackp--; X else X break; X } X} END_OF_FILE if test 45472 -ne `wc -c <'regex.c1'`; then echo shar: \"'regex.c1'\" unpacked with wrong size! fi # end of 'regex.c1' fi if test -r regex.c1 -a -r regex.c2 then echo shar: concatenating \"regex.c1\" and \"regex.c2\" into \"regex.c\" cat regex.c1 regex.c2 >regex.c || echo shar: \"regex.c\" creation failed! fi echo shar: End of archive 7 \(of 8\). cp /dev/null ark7isdone MISSING="" for I in 1 2 3 4 5 6 7 8 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have unpacked all 8 archives. rm -f ark[1-9]isdone else echo You still need to unpack the following archives: echo " " ${MISSING} fi ## End of shell archive. exit 0 exit 0 # Just in case... -- Please send comp.sources.unix-related mail to rsalz@uunet.uu.net. Use a domain-based address or give alternate paths, or you may lose out.