[comp.sys.amiga] Here... have some free string functions

ins_adjb@jhunix.HCF.JHU.EDU (Daniel Jay Barrett) (06/08/88)

	Last night, while using Manx C, I needed some standard
string functions that are NOT included with Manx.  So, I wrote them
from scratch.  Here they are, to save YOU the trouble of writing
them someday.

	The functions are the UNIX-standard strtok(), strpbrk(),
strspn() and strcspn().  Don't blame me... I didn't make up the
names...!  See the "README" file for descriptions.  These functions
are VERY USEFUL!!!

	Oh yeah... they are hereby released into the Public Domain.

#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	README
#	strcspn.c
#	strpbrk.c
#	strspn.c
#	strtok.c
# This archive created: Wed Jun  8 01:04:42 1988
export PATH; PATH=/bin:$PATH
if test -f 'README'
then
	echo shar: will not over-write existing file "'README'"
else
cat << \SHAR_EOF > 'README'
****************************************************************************
* STRING FUNCTIONS by Daniel J. Barrett.                                   *
* 		      barrett@cs.jhu.edu, ins_adjb@jhunix.UUCP             *
*                                                                          *
* THESE ROUTINES ARE IN THE PUBLIC DOMAIN.                                 *
****************************************************************************

	Manx `C' provides most of the standard "UNIX" string functions, such
as strcat(), strcmp(), and so on.

	Four of the functions that Manx does NOT provide are strspn(), 
strcspn(), strpbrk(), and strtok().  Here are my versions of these 4 missing 
functions, written from scratch.  I wrote several versions, and these were 
the fastest.  (Undoubtedly, assembler would be faster, but I don't know
assembler.  Feel free to "one-up" me.  :-))

	Note that some of these functions call built-in Manx functions like
index() and strchr().  These versions are FASTER than when I did everything 
"by hand".

	I don't own Lattice `C', but I suppose these routines will work
if you have the functions index() and strchr()... they would be easy to
write, anyway.

Here are brief descriptions of the 4 functions.

int strspn(string, character_set)
char *string, *character_set;

	Starting from the beginning of string "string", count how
	many of its characters are found in "character_set".  When
	you hit the first character NOT in "character_set", RETURN
	the number of characters found so far.  If either argument
	is NULL, strspn() returns 0.

	strspn("abcdefg", "abd")	returns 2.
	strspn("abcdefg", "xyyz")	returns 0.
	strspn("abcdefg", "dbc")	returns 0.
	strspn("abcdefg", "dxbgac")	returns 4.

int strcspn(string, character_set)
char *string, *character_set;

	This function is exactly the opposite of strspn().  Return the
	number of characters, starting from the beginning of the string,
	that are NOT found in "character_set".  Keep counting until you
	hit the first character in "string" that IS found in "character_set";
	then RETURN.  If either argument is NULL, strcspn() returns 0.

	strcspn("abcdefg", "abd")	returns 0.
	strcspn("abcdefg", "xyyz")	returns 7.
	strspn("abcdefg", "dbc")	returns 1.
	strspn("abcdefg", "dxbgac")	returns 0.

char *strpbrk(string, character_set)
char *string, *character_set;

	Return a pointer to the first character in "string" that
	appears in "character_set".  If either argument is NULL,
	strpbrk() returns NULL.

	strcspn("abcdefg", "abd")	returns "abcdefg".
	strcspn("abcdefg", "xyyz")	returns NULL.
	strspn("abcdefg", "dbc")	returns "bcdefg".
	strspn("abcdefg", "dxbgac")	returns "abcdefg".

char *strtok(string, character_set)
char *string, *character_set;

	This is a VERY USEFUL function.  The UNIX manual explains it best:

	`The strtok subroutine considers the string "string" to consist of
	a sequence of zero or more text tokens separated by spans of
	one or more characters from the separator string "character_set".  
	The first call (with pointer "string" specified) returns a pointer
	to the first character of the first token, and will have written
	a null character into "string" immediately following the
	returned token.  The function keeps track of its position in
	the string between separate calls, so that subsequent calls
	(which must be made with the first argument a NULL pointer)
	will work through the string "string" immediately following that
	token.  In this way, subsequent calls will work through the
	string "string" until no tokens remain.  The separator string 
	"character_set" may be different from call to call.  When no token
	remains in "string", a NULL pointer is returned.'

Here is an example program demonstrating strtok().

/******************************************************************/
#include <stdio.h>

extern char *strtok();
char tokesep[] = " \n\t\rx";
	
main()
{
	char buf[BUFSIZ], *tokep;

	while (fgets(buf, sizeof(buf), stdin)) {
		tokep = strtok(buf, tokesep);
		do {
			printf("Token is %s\n", tokep);
			tokep = strtok((char *)NULL, tokesep);
		}while (tokep);
	}
}

/******************************************************************/
SHAR_EOF
fi # end of overwriting check
if test -f 'strcspn.c'
then
	echo shar: will not over-write existing file "'strcspn.c'"
else
cat << \SHAR_EOF > 'strcspn.c'
#define STRING_END	'\0'

/* Return the number of characters NOT from "charset" that are at the 
 * BEGINNING of string "string".
*/

int strcspn(str, charset)
register char *str, *charset;
{
	register char *s=str;
	while (!strchr(charset, *s))
		s++;
	return(s - str);
}

SHAR_EOF
fi # end of overwriting check
if test -f 'strpbrk.c'
then
	echo shar: will not over-write existing file "'strpbrk.c'"
else
cat << \SHAR_EOF > 'strpbrk.c'
#define STRING_END	'\0'
#ifndef NULL
#define NULL	0L
#endif

char *strpbrk(str, charset)
char *str, *charset;
{
	register char *s;
	extern char *index();

	s = str;
	while ((*s != STRING_END) && (!index(charset, *s)))
		s++;
	return((*s!=STRING_END) ? s : NULL);
}
SHAR_EOF
fi # end of overwriting check
if test -f 'strspn.c'
then
	echo shar: will not over-write existing file "'strspn.c'"
else
cat << \SHAR_EOF > 'strspn.c'
/* Return the number of characters from "charset" that are at the BEGINNING
 * of string "str".
*/

int strspn(str, charset)
register char *str, *charset;
{
	register char *s;
	s = str;
	while (index(charset, *s))
		s++;
	return(s - str);
}
SHAR_EOF
fi # end of overwriting check
if test -f 'strtok.c'
then
	echo shar: will not over-write existing file "'strtok.c'"
else
cat << \SHAR_EOF > 'strtok.c'
#define STRING_END	'\0'
#ifndef NULL
#define NULL	0L
#endif

char *strtok(buf, separators)
char *buf, *separators;
{
	register char *token, *end;	/* Start and end of token. */
	extern char *strpbrk();
	static char *fromLastTime;

	if (token = buf ? buf : fromLastTime) {
		token += strspn(token, separators);	/* Find token! */
		if (*token == STRING_END)
			return(NULL);
		fromLastTime = ((end = strpbrk(token,separators))
				? &end[1]
				: NULL);
		*end = STRING_END;			/* Cut it short! */
	}
	return(token);
}
SHAR_EOF
fi # end of overwriting check
#	End of shell archive
exit 0

-- 
Dan Barrett	ins_adjb@jhunix.UUCP
		barrett@cs.jhu.edu