[net.sources] public-domain string

henry@utzoo.UUCP (Henry Spencer) (11/30/83)

The following (long) article is the source for a reimplementation of
the string(3) routines from System III.  This includes the V7 string
routines plus some useful extras.  If these things have changed any
in System V, I will update mine as soon as I see a System V manual.

Although these routines were written from a System III manual page,
and are believed identical in behavior to the System III routines
(except for having the signed-char bug I posted a while ago fixed),
they are *not* Bell code, they are not derived from Bell code, and
they were not written by inspection of Bell code.  They are hereby
placed into the public domain.  They may be used for any purpose on
any system without anyone's permission.

The routines are accompanied by a manual page (a minor variant of
the Bell one) and the source for a test program that gives them a
fairly thorough workout.  It was this test program that found the
signed-char bug in the V7 routines.

-----
cat <<'!' >index.c
/*
 * index - find first occurrence of a character in a string
 */

#define	NULL	0

char *				/* found char, or NULL if none */
index(s, c)
char *s;
char c;
{
	extern char *strchr();

	return(strchr(s, c));
}
!
cat <<'!' >rindex.c
/*
 * rindex - find last occurrence of a character in a string
 */

#define	NULL	0

char *				/* found char, or NULL if none */
rindex(s, c)
char *s;
char c;
{
	extern char *strrchr();

	return(strrchr(s, c));
}
!
cat <<'!' >strcat.c
/*
 * strcat - append string s2 to s1
 */
char *				/* s1 */
strcat(s1, s2)
char *s1;
char *s2;
{
	register char *scan1;
	register char *scan2;

	for (scan1 = s1; *scan1 != '\0'; scan1++)
		continue;
	scan2 = s2;
	while ((*scan1++ = *scan2++) != '\0')
		continue;
	return(s1);
}
!
cat <<'!' >strchr.c
/*
 * strchr - find first occurrence of a character in a string
 */

#define	NULL	0

char *				/* found char, or NULL if none */
strchr(s, c)
char *s;
register char c;
{
	register char *scan;

	/*
	 * The odd placement of the two tests is so NUL is findable.
	 */
	for (scan = s; *scan != c;)	/* ++ moved down for opt. */
		if (*scan++ == '\0')
			return(NULL);
	return(scan);
}
!
cat <<'!' >strcmp.c
/*
 * strcmp - compare string s1 to s2
 *
 * CHARBITS should be defined only if the compiler lacks "unsigned char".
 * It should be a mask, e.g. 0377 for an 8-bit machine.
 */

#ifndef CHARBITS
#	define	UNSCHAR(c)	((unsigned char)(c))
#else
#	define	UNSCHAR(c)	((c)&CHARBITS)
#endif

int				/* <0 for <, 0 for ==, >0 for > */
strcmp(s1, s2)
char *s1;
char *s2;
{
	register char *scan1;
	register char *scan2;

	scan1 = s1;
	scan2 = s2;
	while (*scan1 != '\0' && *scan1 == *scan2) {
		scan1++;
		scan2++;
	}

	/*
	 * The UNSCHAR invocations are needed because chars
	 * can be negative on some machines and on such machines the
	 * end-of-string NUL does not collate low against all chars.
	 */
	return(UNSCHAR(*scan1) - UNSCHAR(*scan2));
}
!
cat <<'!' >strcpy.c
/*
 * strcpy - copy string s2 to s1
 */
char *				/* s1 */
strcpy(s1, s2)
char *s1;
char *s2;
{
	register char *scan1;
	register char *scan2;

	scan1 = s1;
	scan2 = s2;
	while ((*scan1++ = *scan2++) != '\0')
		continue;
	return(s1);
}
!
cat <<'!' >strcspn.c
/*
 * strcspn - find length of initial segment of s1 consisting entirely
 * of characters not from s2
 */

int
strcspn(s1, s2)
char *s1;
char *s2;
{
	register char *scan1;
	register char *scan2;
	register int count;

	count = 0;
	for (scan1 = s1; *scan1 != '\0'; scan1++) {
		for (scan2 = s2; *scan2 != '\0';)	/* ++ moved down. */
			if (*scan1 == *scan2++)
				return(count);
		count++;
	}
	return(count);
}
!
cat <<'!' >strlen.c
/*
 * strlen - length of string (not including NUL)
 */
int
strlen(s)
char *s;
{
	register char *scan;
	register int count;

	count = 0;
	scan = s;
	while (*scan++ != '\0')
		count++;
	return(count);
}
!
cat <<'!' >strncat.c
/*
 * strncat - append at most n characters of string s2 to s1
 */
char *				/* s1 */
strncat(s1, s2, n)
char *s1;
char *s2;
int n;
{
	register char *scan1;
	register char *scan2;
	register int count;

	for (scan1 = s1; *scan1 != '\0'; scan1++)
		continue;
	scan2 = s2;
	count = n;
	while (*scan2 != '\0' && --count >= 0)
		*scan1++ = *scan2++;
	*scan1++ = '\0';
	return(s1);
}
!
cat <<'!' >strncmp.c
/*
 * strncmp - compare at most n characters of string s1 to s2
 *
 * CHARBITS should be defined only if the compiler lacks "unsigned char".
 * It should be a mask, e.g. 0377 for an 8-bit machine.
 */

#ifndef CHARBITS
#	define	UNSCHAR(c)	((unsigned char)(c))
#else
#	define	UNSCHAR(c)	((c)&CHARBITS)
#endif

int				/* <0 for <, 0 for ==, >0 for > */
strncmp(s1, s2, n)
char *s1;
char *s2;
int n;
{
	register char *scan1;
	register char *scan2;
	register int count;

	scan1 = s1;
	scan2 = s2;
	count = n;
	while (--count >= 0 && *scan1 != '\0' && *scan1 == *scan2) {
		scan1++;
		scan2++;
	}
	if (count < 0)
		return(0);

	/*
	 * These UNSCHAR invocations are necessary because the end-of-string
	 * NUL does not collate low with respect to all characters if the
	 * machine has signed characters, hence the simple subtraction is
	 * not quite good enough.
	 */
	return(UNSCHAR(*scan1) - UNSCHAR(*scan2));
}
!
cat <<'!' >strncpy.c
/*
 * strncpy - copy at most n characters of string s2 to s1
 */
char *				/* s1 */
strncpy(s1, s2, n)
char *s1;
char *s2;
int n;
{
	register char *scan1;
	register char *scan2;
	register int count;

	scan1 = s1;
	scan2 = s2;
	count = n;
	while (--count >= 0 && (*scan1++ = *scan2++) != '\0')
		continue;
	while (--count >= 0)
		*scan1++ = '\0';
	return(s1);
}
!
cat <<'!' >strpbrk.c
/*
 * strpbrk - find first occurrence of any char from s2 in s1
 */

#define	NULL	0

char *				/* found char, or NULL if none */
strpbrk(s1, s2)
char *s1;
char *s2;
{
	register char *scan1;
	register char *scan2;

	for (scan1 = s1; *scan1 != '\0'; scan1++) {
		for (scan2 = s2; *scan2 != '\0';)	/* ++ moved down. */
			if (*scan1 == *scan2++)
				return(scan1);
	}
	return(NULL);
}
!
cat <<'!' >strrchr.c
/*
 * strrchr - find last occurrence of a character in a string
 */

#define	NULL	0

char *				/* found char, or NULL if none */
strrchr(s, c)
char *s;
register char c;
{
	register char *scan;
	register char *place;

	place = NULL;
	for (scan = s; *scan != '\0'; scan++)
		if (*scan == c)
			place = scan;
	if (c == '\0')
		return(scan);
	return(place);
}
!
cat <<'!' >strspn.c
/*
 * strspn - find length of initial segment of s1 consisting entirely
 * of characters from s2
 */

int
strspn(s1, s2)
char *s1;
char *s2;
{
	register char *scan1;
	register char *scan2;
	register int count;

	count = 0;
	for (scan1 = s1; *scan1 != '\0'; scan1++) {
		for (scan2 = s2; *scan2 != '\0'; scan2++)
			if (*scan1 == *scan2)
				break;
		if (*scan2 == '\0')
			return(count);
		count++;
	}
	return(count);
}
!
cat <<'!' >strtok.c
/*
 * Get next token from string s1 (NULL on 2nd, 3rd, etc. calls),
 * where tokens are nonempty strings separated by runs of
 * chars from s2.  Writes NULs into s1 to end tokens.  s2 need not
 * remain constant from call to call.
 */

#define	NULL	0

static char *scanpoint = NULL;

char *				/* NULL if no token left */
strtok(s1, s2)
char *s1;
register char *s2;
{
	register char *scan;
	char *tok;
	register char *scan2;

	if (s1 == NULL && scanpoint == NULL)
		return(NULL);
	if (s1 != NULL)
		scan = s1;
	else
		scan = scanpoint;

	/*
	 * Scan leading delimiters.
	 */
	for (; *scan != '\0'; scan++) {
		for (scan2 = s2; *scan2 != '\0'; scan2++)
			if (*scan == *scan2)
				break;
		if (*scan2 == '\0')
			break;
	}
	if (*scan == '\0') {
		scanpoint = NULL;
		return(NULL);
	}

	tok = scan;

	/*
	 * Scan token.
	 */
	for (; *scan != '\0'; scan++) {
		for (scan2 = s2; *scan2 != '\0';)	/* ++ moved down. */
			if (*scan == *scan2++) {
				scanpoint = scan+1;
				*scan = '\0';
				return(tok);
			}
	}

	/*
	 * Reached end of string.
	 */
	scanpoint = NULL;
	return(tok);
}
!
cat <<'!' >tester.c
/*
 * Test program for string(3) routines.
 * 
 * If compiled with -DV7, tests only those routines found in V7.
 *
 * Note that at least one Bell Labs implementation of the string
 * routines flunks a couple of these tests -- the ones which test
 * behavior on "negative" characters.
 */

#include <stdio.h>

extern char *strcat();
extern char *strncat();
extern int strcmp();
extern int strncmp();
extern char *strcpy();
extern char *strncpy();
extern int strlen();
#ifndef V7
extern char *strchr();
#endif
extern char *index();
#ifndef V7
extern char *strrchr();
#endif
extern char *rindex();
#ifndef V7
extern char *strpbrk();
extern int strspn();
extern int strcspn();
extern char *strtok();
#endif

#define	STREQ(a, b)	(strcmp((a), (b)) == 0)

char *it = "<UNSET>";		/* Routine name for message routines. */

/*
 - equal - complain if first two args don't strcmp as equal
 */
void
equal(a, b, number)
char *a;
char *b;
int number;			/* Test number for error message. */
{
	check(a != NULL && b != NULL && STREQ(a, b), number);
}

/*
 - check - complain if condition is not true
 */
void
check(thing, number)
int thing;
int number;			/* Test number for error message. */
{
	if (!thing)
		printf("%s flunked test %d\n", it, number);
}

/* ARGSUSED */
main(argc, argv)
int argc;
char *argv[];
{
	char one[50];
	char two[50];

	/*
	 * Test strcmp first because we use it to test other things.
	 */
	it = "strcmp";
	check(strcmp("", "") == 0, 1);		/* Trivial case. */
	check(strcmp("a", "a") == 0, 2);	/* Identity. */
	check(strcmp("abc", "abc") == 0, 3);	/* Multicharacter. */
	check(strcmp("abc", "abcd") < 0, 4);	/* Length mismatches. */
	check(strcmp("abcd", "abc") > 0, 5);
	check(strcmp("abcd", "abce") < 0, 6);	/* Honest miscompares. */
	check(strcmp("abce", "abcd") > 0, 7);
	check(strcmp("a\203", "a") > 0, 8);	/* Tricky if char signed. */

	/*
	 * Test strcpy next because we need it to set up other tests.
	 */
	it = "strcpy";
	check(strcpy(one, "abcd") == one, 1);	/* Returned value. */
	equal(one, "abcd", 2);			/* Basic test. */

	(void) strcpy(one, "x");
	equal(one, "x", 3);			/* Writeover. */
	equal(one+2, "cd", 4);			/* Wrote too much? */

	(void) strcpy(two, "hi there");
	(void) strcpy(one, two);
	equal(one, "hi there", 5);		/* Basic test encore. */
	equal(two, "hi there", 6);		/* Stomped on source? */

	(void) strcpy(one, "");
	equal(one, "", 7);			/* Boundary condition. */

	/*
	 * strcat
	 */
	it = "strcat";
	(void) strcpy(one, "ijk");
	check(strcat(one, "lmn") == one, 1);	/* Returned value. */
	equal(one, "ijklmn", 2);		/* Basic test. */

	(void) strcpy(one, "x");
	(void) strcat(one, "yz");
	equal(one, "xyz", 3);			/* Writeover. */
	equal(one+4, "mn", 4);			/* Wrote too much? */

	(void) strcpy(one, "gh");
	(void) strcpy(two, "ef");
	(void) strcat(one, two);
	equal(one, "ghef", 5);			/* Basic test encore. */
	equal(two, "ef", 6);			/* Stomped on source? */

	(void) strcpy(one, "");
	(void) strcat(one, "");
	equal(one, "", 7);			/* Boundary conditions. */
	(void) strcpy(one, "ab");
	(void) strcat(one, "");
	equal(one, "ab", 8);
	(void) strcpy(one, "");
	(void) strcat(one, "cd");
	equal(one, "cd", 9);

	/*
	 * strncat - first test it as strcat, with big counts, then
	 * test the count mechanism.
	 */
	it = "strncat";
	(void) strcpy(one, "ijk");
	check(strncat(one, "lmn", 99) == one, 1);	/* Returned value. */
	equal(one, "ijklmn", 2);		/* Basic test. */

	(void) strcpy(one, "x");
	(void) strncat(one, "yz", 99);
	equal(one, "xyz", 3);			/* Writeover. */
	equal(one+4, "mn", 4);			/* Wrote too much? */

	(void) strcpy(one, "gh");
	(void) strcpy(two, "ef");
	(void) strncat(one, two, 99);
	equal(one, "ghef", 5);			/* Basic test encore. */
	equal(two, "ef", 6);			/* Stomped on source? */

	(void) strcpy(one, "");
	(void) strncat(one, "", 99);
	equal(one, "", 7);			/* Boundary conditions. */
	(void) strcpy(one, "ab");
	(void) strncat(one, "", 99);
	equal(one, "ab", 8);
	(void) strcpy(one, "");
	(void) strncat(one, "cd", 99);
	equal(one, "cd", 9);

	(void) strcpy(one, "ab");
	(void) strncat(one, "cdef", 2);
	equal(one, "abcd", 10);			/* Count-limited. */

	(void) strncat(one, "gh", 0);
	equal(one, "abcd", 11);			/* Zero count. */

	(void) strncat(one, "gh", 2);
	equal(one, "abcdgh", 12);		/* Count and length equal. */

	/*
	 * strncmp - first test as strcmp with big counts, then test
	 * count code.
	 */
	it = "strncmp";
	check(strncmp("", "", 99) == 0, 1);	/* Trivial case. */
	check(strncmp("a", "a", 99) == 0, 2);	/* Identity. */
	check(strncmp("abc", "abc", 99) == 0, 3);	/* Multicharacter. */
	check(strncmp("abc", "abcd", 99) < 0, 4);	/* Length unequal. */
	check(strncmp("abcd", "abc", 99) > 0, 5);
	check(strncmp("abcd", "abce", 99) < 0, 6);	/* Honestly unequal. */
	check(strncmp("abce", "abcd", 99) > 0, 7);
	check(strncmp("a\203", "a", 2) > 0, 8);	/* Tricky if '\203' < 0 */
	check(strncmp("abce", "abcd", 3) == 0, 9);	/* Count limited. */
	check(strncmp("abce", "abc", 3) == 0, 10);	/* Count == length. */
	check(strncmp("abcd", "abce", 4) < 0, 11);	/* Nudging limit. */
	check(strncmp("abc", "def", 0) == 0, 12);	/* Zero count. */

	/*
	 * strncpy - testing is a bit different because of odd semantics
	 */
	it = "strncpy";
	check(strncpy(one, "abc", 4) == one, 1);	/* Returned value. */
	equal(one, "abc", 2);			/* Did the copy go right? */

	(void) strcpy(one, "abcdefgh");
	(void) strncpy(one, "xyz", 2);
	equal(one, "xycdefgh", 3);		/* Copy cut by count. */

	(void) strcpy(one, "abcdefgh");
	(void) strncpy(one, "xyz", 3);		/* Copy cut just before NUL. */
	equal(one, "xyzdefgh", 4);

	(void) strcpy(one, "abcdefgh");
	(void) strncpy(one, "xyz", 4);		/* Copy just includes NUL. */
	equal(one, "xyz", 5);
	equal(one+4, "efgh", 6);		/* Wrote too much? */

	(void) strcpy(one, "abcdefgh");
	(void) strncpy(one, "xyz", 5);		/* Copy includes padding. */
	equal(one, "xyz", 7);
	equal(one+4, "", 8);
	equal(one+5, "fgh", 9);

	(void) strcpy(one, "abc");
	(void) strncpy(one, "xyz", 0);		/* Zero-length copy. */
	equal(one, "abc", 10);	

	(void) strncpy(one, "", 2);		/* Zero-length source. */
	equal(one, "", 11);
	equal(one+1, "", 12);	
	equal(one+2, "c", 13);

	(void) strcpy(one, "hi there");
	(void) strncpy(two, one, 9);
	equal(two, "hi there", 14);		/* Just paranoia. */
	equal(one, "hi there", 15);		/* Stomped on source? */

	/*
	 * strlen
	 */
	it = "strlen";
	check(strlen("") == 0, 1);		/* Empty. */
	check(strlen("a") == 1, 2);		/* Single char. */
	check(strlen("abcd") == 4, 3);		/* Multiple chars. */

#ifndef V7
	/*
	 * strchr
	 */
	it = "strchr";
	check(strchr("abcd", 'z') == NULL, 1);	/* Not found. */
	(void) strcpy(one, "abcd");
	check(strchr(one, 'c') == one+2, 2);	/* Basic test. */
	check(strchr(one, 'd') == one+3, 3);	/* End of string. */
	check(strchr(one, 'a') == one, 4);	/* Beginning. */
	check(strchr(one, '\0') == one+4, 5);	/* Finding NUL. */
	(void) strcpy(one, "ababa");
	check(strchr(one, 'b') == one+1, 6);	/* Finding first. */
	(void) strcpy(one, "");
	check(strchr(one, 'b') == NULL, 7);	/* Empty string. */
	check(strchr(one, '\0') == one, 8);	/* NUL in empty string. */
#endif

	/*
	 * index - just like strchr
	 */
	it = "index";
	check(index("abcd", 'z') == NULL, 1);	/* Not found. */
	(void) strcpy(one, "abcd");
	check(index(one, 'c') == one+2, 2);	/* Basic test. */
	check(index(one, 'd') == one+3, 3);	/* End of string. */
	check(index(one, 'a') == one, 4);	/* Beginning. */
	check(index(one, '\0') == one+4, 5);	/* Finding NUL. */
	(void) strcpy(one, "ababa");
	check(index(one, 'b') == one+1, 6);	/* Finding first. */
	(void) strcpy(one, "");
	check(index(one, 'b') == NULL, 7);	/* Empty string. */
	check(index(one, '\0') == one, 8);	/* NUL in empty string. */

#ifndef V7
	/*
	 * strrchr
	 */
	it = "strrchr";
	check(strrchr("abcd", 'z') == NULL, 1);	/* Not found. */
	(void) strcpy(one, "abcd");
	check(strrchr(one, 'c') == one+2, 2);	/* Basic test. */
	check(strrchr(one, 'd') == one+3, 3);	/* End of string. */
	check(strrchr(one, 'a') == one, 4);	/* Beginning. */
	check(strrchr(one, '\0') == one+4, 5);	/* Finding NUL. */
	(void) strcpy(one, "ababa");
	check(strrchr(one, 'b') == one+3, 6);	/* Finding last. */
	(void) strcpy(one, "");
	check(strrchr(one, 'b') == NULL, 7);	/* Empty string. */
	check(strrchr(one, '\0') == one, 8);	/* NUL in empty string. */
#endif

	/*
	 * rindex - just like strrchr
	 */
	it = "rindex";
	check(rindex("abcd", 'z') == NULL, 1);	/* Not found. */
	(void) strcpy(one, "abcd");
	check(rindex(one, 'c') == one+2, 2);	/* Basic test. */
	check(rindex(one, 'd') == one+3, 3);	/* End of string. */
	check(rindex(one, 'a') == one, 4);	/* Beginning. */
	check(rindex(one, '\0') == one+4, 5);	/* Finding NUL. */
	(void) strcpy(one, "ababa");
	check(rindex(one, 'b') == one+3, 6);	/* Finding last. */
	(void) strcpy(one, "");
	check(rindex(one, 'b') == NULL, 7);	/* Empty string. */
	check(rindex(one, '\0') == one, 8);	/* NUL in empty string. */

#ifndef V7
	/*
	 * strpbrk - somewhat like strchr
	 */
	it = "strpbrk";
	check(strpbrk("abcd", "z") == NULL, 1);	/* Not found. */
	(void) strcpy(one, "abcd");
	check(strpbrk(one, "c") == one+2, 2);	/* Basic test. */
	check(strpbrk(one, "d") == one+3, 3);	/* End of string. */
	check(strpbrk(one, "a") == one, 4);	/* Beginning. */
	check(strpbrk(one, "") == NULL, 5);	/* Empty search list. */
	check(strpbrk(one, "cb") == one+1, 6);	/* Multiple search. */
	(void) strcpy(one, "abcabdea");
	check(strpbrk(one, "b") == one+1, 7);	/* Finding first. */
	check(strpbrk(one, "cb") == one+1, 8);	/* With multiple search. */
	check(strpbrk(one, "db") == one+1, 9);	/* Another variant. */
	(void) strcpy(one, "");
	check(strpbrk(one, "bc") == NULL, 10);	/* Empty string. */
	check(strpbrk(one, "") == NULL, 11);	/* Both strings empty. */

	/*
	 * strspn
	 */
	it = "strspn";
	check(strspn("abcba", "abc") == 5, 1);	/* Whole string. */
	check(strspn("abcba", "ab") == 2, 2);	/* Partial. */
	check(strspn("abc", "qx") == 0, 3);	/* None. */
	check(strspn("", "ab") == 0, 4);	/* Null string. */
	check(strspn("abc", "") == 0, 5);	/* Null search list. */

	/*
	 * strcspn
	 */
	it = "strcspn";
	check(strcspn("abcba", "qx") == 5, 1);	/* Whole string. */
	check(strcspn("abcba", "cx") == 2, 2);	/* Partial. */
	check(strcspn("abc", "abc") == 0, 3);	/* None. */
	check(strcspn("", "ab") == 0, 4);	/* Null string. */
	check(strcspn("abc", "") == 3, 5);	/* Null search list. */

	/*
	 * strtok - the hard one
	 */
	it = "strtok";
	(void) strcpy(one, "first, second, third");
	equal(strtok(one, ", "), "first", 1);	/* Basic test. */
	equal(one, "first", 2);
	equal(strtok((char *)NULL, ", "), "second", 3);
	equal(strtok((char *)NULL, ", "), "third", 4);
	check(strtok((char *)NULL, ", ") == NULL, 5);
	(void) strcpy(one, ", first, ");
	equal(strtok(one, ", "), "first", 6);	/* Extra delims, 1 tok. */
	check(strtok((char *)NULL, ", ") == NULL, 7);
	(void) strcpy(one, "1a, 1b; 2a, 2b");
	equal(strtok(one, ", "), "1a", 8);	/* Changing delim lists. */
	equal(strtok((char *)NULL, "; "), "1b", 9);
	equal(strtok((char *)NULL, ", "), "2a", 10);
	(void) strcpy(two, "x-y");
	equal(strtok(two, "-"), "x", 11);	/* New string before done. */
	equal(strtok((char *)NULL, "-"), "y", 12);
	check(strtok((char *)NULL, "-") == NULL, 13);
	(void) strcpy(one, "a,b, c,, ,d");
	equal(strtok(one, ", "), "a", 14);	/* Different separators. */
	equal(strtok((char *)NULL, ", "), "b", 15);
	equal(strtok((char *)NULL, " ,"), "c", 16);	/* Permute list too. */
	equal(strtok((char *)NULL, " ,"), "d", 17);
	check(strtok((char *)NULL, ", ") == NULL, 18);
	check(strtok((char *)NULL, ", ") == NULL, 19);	/* Persistence. */
	(void) strcpy(one, ", ");
	check(strtok(one, ", ") == NULL, 20);	/* No tokens. */
	(void) strcpy(one, "");
	check(strtok(one, ", ") == NULL, 21);	/* Empty string. */
	(void) strcpy(one, "abc");
	equal(strtok(one, ", "), "abc", 22);	/* No delimiters. */
	check(strtok((char *)NULL, ", ") == NULL, 23);
	(void) strcpy(one, "abc");
	equal(strtok(one, ""), "abc", 24);	/* Empty delimiter list. */
	check(strtok((char *)NULL, "") == NULL, 25);
	(void) strcpy(one, "abcdefgh");
	(void) strcpy(one, "a,b,c");
	equal(strtok(one, ","), "a", 26);	/* Basics again... */
	equal(strtok((char *)NULL, ","), "b", 27);
	equal(strtok((char *)NULL, ","), "c", 28);
	check(strtok((char *)NULL, ",") == NULL, 29);
	equal(one+6, "gh", 30);			/* Stomped past end? */
	equal(one, "a", 31);			/* Stomped old tokens? */
	equal(one+2, "b", 32);
	equal(one+4, "c", 33);
#endif

	exit(0);
}
!
cat <<'!' >Makefile
STRING = index.o rindex.o strcat.o strchr.o strcmp.o strcpy.o strcspn.o \
	strlen.o strncat.o strncmp.o strncpy.o strpbrk.o strrchr.o strspn.o \
	strtok.o 
CSTRING = index.c rindex.c strcat.c strchr.c strcmp.c strcpy.c strcspn.c \
	strlen.c strncat.c strncmp.c strncpy.c strpbrk.c strrchr.c strspn.c \
	strtok.c 
# CHARBITS must be defined when the compiler lacks "unsigned char".
# See the comments in strcmp.c and strncmp.c .
CFLAGS=-O -DCHARBITS=0377

doit:	tester
	: 'Testing new routines -- no output is good output.'
	tester

v7:	v7tester
	: 'Testing V7 routines -- no output is good output.'
	: 'V7 str[n]cmp flunks test #8 on signed-char cpus.'
	v7tester

tests:	doit v7

tester:	tester.c $(STRING)
	cc -n -O -Dvoid=int tester.c $(STRING) -o tester

v7tester:	tester.c
	cc -n -O -Dvoid=int -DV7 tester.c -o v7tester

lint:
	lint -hpan -Dvoid=int tester.c $(CSTRING)
	lint -ha -Dvoid=int -DV7 tester.c

print:
	( pr Makefile *.c ; nroff -man string.3 ) | lpr -t "String routines"

clean:
	rm -f tester v7tester a.out *.o
!
cat <<'!' >string.3
.TH STRING 3 reimplemented
.DA 29 Nov 1983
.SH NAME
strcat, strncat, strcmp, strncmp, strcpy, strncpy, strlen, strchr, index, strrchr, rindex, strpbrk, strspn, strcspn, strtok \- string operations
.SH SYNOPSIS
.nf
.ft B
char *strcat (s1, s2)
char *s1, *s2;

char *strncat (s1, s2, n)
char *s1, *s2;
int n;

int strcmp (s1, s2)
char *s1, *s2;

int strncmp (s1, s2, n)
char *s1, *s2;
int n;

char *strcpy (s1, s2)
char *s1, *s2;

char *strncpy (s1, s2, n)
char *s1, *s2;
int n;

int strlen (s)
char *s;

char *strchr (s, c)
char *s, c;

char *index (s, c)
char *s, c;

char *strrchr (s, c)
char *s, c;

char *rindex (s, c)
char *s, c;

char *strpbrk (s1, s2)
char *s1, *s2;

int strspn (s1, s2)
char *s1, *s2;

int strcspn (s1, s2)
char *s1, *s2;

char *strtok (s1, s2)
char *s1, *s2;
.SH DESCRIPTION
These functions operated on \fBNUL\fR-terminated strings.
They do not check for overflow of any receiving string.
.PP
.I Strcat
appends a copy of string
.I s2
to the end of string
.IR s1 .
.I Strncat
copies at most
.I n
characters.
Both return a pointer to the \fBNUL\fR-terminated result.
.PP
.I Strcmp
compares its arguments and returns an integer greater than,
equal to, or less than 0, according as
.I s1
is lexicographically greater than, equal to, or less than
.IR s2 .
.I Strncmp
makes the same comparison but looks at at most
.I n
characters.
.PP
.I Strcpy
copies string
.I s2
to
.IR s1 ,
stopping after the \fBNUL\fR character has been moved.
.I Strncpy
copies exactly
.I n
characters,
truncating or \fBNUL\fR-padding
.IR s2 ;
the target may not be \fBNUL\fR-terminated if the length of
.I s2
is
.I n
or more.
Both return
.IR s1 .
.PP
.I Strlen
returns the number of non-\fBNUL\fR characters in
.IR s .
.PP
.I Strchr
(\fIstrrchr\fR)
returns a pointer to the first (last) occurrence of character
.I c
in string
.IR s ,
or
.B NULL
if
.I c
does not occur in the string.
The \fBNUL\fR character terminating a string is considered to be part of the string.
.PP
.I Index
and
.I rindex
are older names for
.I strchr
and
.I strrchr
respectively.
.PP
.I Strpbrk
returns a pointer to the first occurrence in string
.I s1
of any character from string
.IR s2 ,
or
.B NULL
if no character from
.I s2
exists in
.IR s1 .
.PP
.I Strspn
(\fIstrcspn\fR)
returns the length of the initial segment of string
.I s1
which consists entirely of characters from (not from) string
.IR s2 .
.PP
.I Strtok
considers the string
.I s1
to consist of a sequence of zero or more text tokens separated by
spans of one or more characters from the separator string
.IR s2 .
The first call (with pointer
.I s1
specified) returns a pointer to the first character of the first
token,
and will have written a
.B NUL
character into
.I s1
immediately following the returned token.
Subsequent calls with
.B "(char *)NULL"
for the first argument will work through the string
.I s1
in this way until no tokens remain.
The separator string
.I s2
may be different from call to call.
When no token remains in
.IR s1 ,
a
.B NULL
is returned.
.SH HISTORY
Written by Henry Spencer, working from a Bell Labs manual page.
Behavior believed identical to the Bell version, except for a bug in
the Bell version which is fixed in this version:
the Bell code assumed that all characters collate
high with respect to the end-of-string \fBNUL\fR, which is not
true on
a machine with signed characters.
.SH BUGS
.I Strcmp
uses native character comparison, which is signed on PDP-11s,
unsigned on most other machines.
.PP
All string movement is performed character by character starting at
the left.
Thus overlapping moves toward the left will work as expected,
but overlapping moves to the right may yield surprises.
.PP
Since
.I strtok
uses a static variable to store its where-to-resume-scan pointer,
it must be used with caution in the presence of recursion or
reentrancy.
!
-----
-- 
				Henry Spencer @ U of Toronto Zoology
				{allegra,ihnp4,linus,decvax}!utzoo!henry

henry@utzoo.UUCP (Henry Spencer) (12/01/83)

For those among the audience interested in bit-twiddling efficiency,
I've just had a quick look at how the (pdp11) assembly code for my
string routines compares to that of the V7 routines.  Pretty close.
There are one or two cases where my versions actually do better,
most of the time the V7 routines are a bit better, and the differences
are never large.  Most of the cases where V7 comes out ahead are places
where the V7 code is running a scanning pointer past the beginning or
end of a string and then backing it up to the boundary it went past.
This is a slightly dubious practice if one is very concerned about
portability; on a segmented machine, a pointer that has been backed
up past the beginning of a segment can point heaven-knows-where, and
one might possibly run into problems trying to "backspace" it.  The
absence of this particular trick in my code means that there are a
few places where my routines don't get the full benefit of the 11's
autoincrement addressing modes.  That's about the only difference.
-- 
				Henry Spencer @ U of Toronto Zoology
				{allegra,ihnp4,linus,decvax}!utzoo!henry