[comp.sources.misc] v14i074: strstr.c implementation

gwyn@smoke.brl.mil (Doug Gwyn) (08/31/90)

Posting-number: Volume 14, Issue 74
Submitted-by: gwyn@smoke.brl.mil (Doug Gwyn)
Archive-name: strstr/part01

#! /bin/sh
# This file was wrapped with "dummyshar".  "sh" this file to extract.
# Contents:  strstr.c
echo extracting 'strstr.c'
if test -f 'strstr.c' -a -z "$1"; then echo Not overwriting 'strstr.c'; else
sed 's/^X//' << \EOF > 'strstr.c'
X/*
X	strstr - public-domain implementation of standard C library function
X
X	last edit:	24-Aug-1990	D A Gwyn
X
X	This is an original implementation based on an idea by D M Sunday,
X	essentially the "quick search" algorithm described in CACM V33 N8.
X	Unlike Sunday's implementation, this one does not wander past the
X	ends of the strings (which can cause malfunctions under certain
X	circumstances), nor does it require the length of the searched
X	text to be determined in advance.  There are numerous other subtle
X	improvements too.  The code is intended to be fully portable, but in
X	environments that do not conform to the C standard, you should check
X	the sections below marked "configure as required".  There are also
X	a few compilation options, as follows:
X
X	#define ROBUST	to obtain sane behavior when invoked with a null
X			pointer argument, at a miniscule cost in speed
X	#define ZAP	to use memset() to zero the shift[] array; this may
X			be faster in some implementations, but could fail on
X			unusual architectures
X	#define DEBUG	to enable assertions (bug detection)
X	#define TEST	to enable the test program attached at the end
X*/
X#define ROBUST
X#define	ZAP
X
X#ifdef __STDC__
X
X#include	<stddef.h>		/* defines size_t and NULL */
X#include	<limits.h>		/* defines UCHAR_MAX */
X
X#ifdef ZAP
Xtypedef void	*pointer;
Xextern pointer	memset( pointer, int, size_t );
X#endif
X
X#else	/* normal UNIX-like C environment assumed; configure as required: */
X
Xtypedef unsigned	size_t; 	/* type of result of sizeof */
X
X#define	NULL		0		/* null pointer constant */
X
X#define	UCHAR_MAX	255		/* largest value of unsigned char */
X					/* 255 @ 8 bits, 65535 @ 16 bits */
X
X#ifdef ZAP
Xtypedef char	*pointer;
Xextern pointer	memset();
X#endif
X
X#define const	/* nothing */
X
X#endif	/* __STDC__ */
X
X#ifndef DEBUG
X#define	NDEBUG
X#endif
X#include	<assert.h>
X
Xtypedef const unsigned char	cuc;	/* char variety used in algorithm */
X
X#define EOS	'\0'			/* C string terminator */
X
Xchar *					/* returns -> leftmost occurrence,
X					   or null pointer if not present */
Xstrstr( s1, s2 )
X	const char	*s1;		/* -> string to be searched */
X	const char	*s2;		/* -> search-pattern string */
X	{
X	register cuc	*t;		/* -> text character being tested */
X	register cuc	*p;		/* -> pattern char being tested */
X	register cuc	*tx;		/* -> possible start of match */
X	register size_t	m;		/* length of pattern */
X#if UCHAR_MAX > 255			/* too large for auto allocation */
X	static				/* not malloc()ed; that can fail! */
X#endif					/* else allocate shift[] on stack */
X		size_t	shift[UCHAR_MAX + 1];	/* pattern shift table */
X
X#ifdef ROBUST				/* not required by C standard */
X	if ( s1 == NULL || s2 == NULL )
X		return NULL;		/* certainly, no match is found! */
X#endif
X
X	/* Precompute shift intervals based on the pattern;
X	   the length of the pattern is determined as a side effect: */
X
X#ifdef ZAP
X	(void)memset( (pointer)&shift[1], 0, UCHAR_MAX * sizeof(size_t) );
X#else
X	{
X	register unsigned char	c;
X
X	c = UCHAR_MAX;
X	do
X		shift[c] = 0;
X	while ( --c > 0 );
X	}
X#endif
X	/* Note: shift[0] is undefined at this point (fixed later). */
X
X	for ( m = 1, p = (cuc *)s2; *p != EOS; ++m, ++p )
X		shift[(cuc)*p] = m;
X
X	assert(s2[m - 1] == EOS);
X
X	{
X	register unsigned char	c;
X
X	c = UCHAR_MAX;
X	do
X		shift[c] = m - shift[c];
X	while ( --c > 0 );
X
X	/* Note: shift[0] is still undefined at this point. */
X	}
X
X	shift[0] = --m; 		/* shift[EOS]; important details! */
X
X	assert(s2[m] == EOS);
X
X	/* Try to find the pattern in the text string: */
X
X	for ( tx = (cuc *)s1; ; tx += shift[*t] )
X		{
X		for ( t = tx, p = (cuc *)s2; ; ++t, ++p )
X			{
X			if ( *p == EOS )       /* entire pattern matched */
X				return (char *)tx;
X
X			if ( *p != *t )
X				break;
X			}
X
X		do	{
X			assert(m > 0);
X			assert(t - tx < m);
X
X			if ( *t == EOS )
X				return NULL;	/* no match */
X			}
X		while ( ++t - tx != m );	/* < */
X		}
X	}
X
X#ifdef TEST
X
X#ifdef __STDC__
X
X#include	<stdlib.h>		/* defines exit, EXIT_* */
X
X#else	/* normal UNIX-like C environment assumed; configure as required: */
X
Xextern void	exit();
X#define	EXIT_SUCCESS	0
X#define	EXIT_FAILURE	1
X
X#endif	/* __STDC__ */
X
X#include	<stdio.h>
X
Xint
Xmain( argc, argv )
X	int		argc;
X	char		*argv[];
X	{
X	register char	*p;
X
X	if ( argc < 3 )
X		{
X		(void)fprintf( stderr, "usage: strstr text pattern\n" );
X		exit( EXIT_FAILURE );
X		}
X
X	if ( (p = strstr( argv[1], argv[2] )) == NULL )
X		(void)printf( "pattern not found\n" );
X	else
X		(void)printf( "pattern start: %s\n", p );
X
X	return EXIT_SUCCESS;
X	}
X
X#endif	/* TEST */
EOF
chars=`wc -c < 'strstr.c'`
if test $chars !=     4462; then echo 'strstr.c' is $chars characters, should be     4462 characters!; fi
fi
exit 0