[comp.sys.atari.st] String Routines

trb@stag.UUCP (06/06/87)

The following came in several days ago, but I just now found it in my mail.
Have fun with it.
   -Todd Burkey
   ..ihnp4!meccts!stag!trb


Received: by stag.UUCP (smail2.5)
	id AA06028; 4 Jun 87 04:18:21 PDT (Thu)
Date: Wed, 4 Jun 87 04:07:40 CDT
To: stag!trb
From: dal@syntel.UUCP (Dale Schumacher)
Reply-To: ..ihnp4!meccts!stag!syntel!dal
Subject: C String Functions (long!)
Message-Id: <0604870407400028@syntel.UUCP>

I can't post to newgroups yet, but I do read this one (a little delayed),
so I'm having a friend post this for me.  Send responses directly to me
through e-mail, ..ihpn4!meccts!stag!syntel!dal

The following is a group of string routines which are (a completed and
tested) part of a set of standard library routines for use with Alcyon C
v4.14.  I'm re-writing the standard libraries (except floating point and
GEM routines) from scratch, first in C, then converting much of it to
assembly.  When they are completed and tested, I'll be releasing them,
with all source code, into the public domain.  I'm implementing several
Unix library routines which were not included in the Alcyon distribution
as well as a few routines which are compatible with Microsoft C v4.0 for
the IBM-PC (which I cross-develop software for).  As a hint of things to
come, here are a few bugs which my routines fix:
	- Mangling of the command line parameters
	- strchr() not finding the '\0' charactrer
	- creat() making duplicate file names (Fcreat()'s problem actually)
	- malloc() requiring pre-defined heap space (my solution avoids
	  the Malloc() problem as well)
	- Lack of a system() command (mine uses _shell_p, the SHELL
	  environment variable and the PATH, if needed)
	- printf() problems of all descriptions (I also make the internal
	  formatting routine _printf() available for general use)
Anyhow, these routines are posted mostly in response to the C string
routines posted by RDROYA01@ULKYVX.BITNET and as and example of what will
soon be available... (WARNING: the routines run about 14K long)

--------------------------------CUT-HERE--------------------------------
/*
 *	STRING MANIPULATION:
 */

#include <stdio.h>

char *blkcpy(dest, source, len)		/* aka: movmem */
register char *dest;
register char *source;
register int len;
/*
 *	Copies the <source> block to the <dest>.  <len> bytes are
 *	always copied.  No terminator is added to <dest>.  A pointer
 *	to <dest> is returned.
 */
{
	register char *p = dest;

	if(source < dest) {
		dest += len;
		source += len;
		while(len--)
			*--dest = *--source;
	}
	else {
		while(len--)
			*dest++ = *source++;
	}
	return(p);
}

/*
 *	The same function optimized for 68000 assembly.
 *
asm(".globl _blkcpy					");
asm("_blkcpy:						");
asm(".globl _movmem					");
asm("_movmem:						");
asm("	move.l	4(a7),a1	* destination		");
asm("	move.l	8(a7),a0	* source		");
asm("	move.w	12(a7),d0	* number of bytes	");
asm("	cmp.l	a0,a1		* check copy direction	");
asm("	ble	movmem4					");
asm("	add.l	d0,a0		* move pointers to end	");
asm("	add.l	d0,a1					");
asm("	bra	movmem2					");
asm("movmem1:						");
asm("	move.b	-(a0),-(a1)	* (s < d) copy loop	");
asm("movmem2:						");
asm("	dbra	d0,movmem1				");
asm("	bra	movmem5					");
asm("movmem3:						");
asm("	move.b	(a0)+,(a1)+	* (s >= d) copy loop	");
asm("movmem4:						");
asm("	dbra	d0,movmem3				");
asm("movmem5:						");
asm("	move.l	4(a7),d0	* return dest pointer	");
asm("	rts						");
 */

char *blkfill(dest, data, len)		/* aka: fillmem */
register char *dest;
register char data;
register int len;
/*
 *	Fill <dest> will <len> bytes of <data>.  A pointer to <dest>
 *	is returned.
 */
{
	register char *p = dest;

	while(len--)
		*dest++ = data;
	return(p);
}

int blkcmp(blk1, blk2, len)
register char *blk1;
register char *blk2;
register int len;
/*
 *	Lexicographically compare the two blocks.  Return a value
 *	indicating the relationship between the blocks.  Possible
 *	return values are:
 *		negative	blk1 < blk2
 *		0		blk1 == blk2
 *		positive	blk1 > blk2
 *	<len> bytes are always compared.
 */
{
	while((--len) && (*blk1 == *blk2)) {
		++blk1;
		++blk2;
	}
	return(*blk1 - *blk2);
}

int blkicmp(blk1, blk2, len)
register char *blk1;
register char *blk2;
register int len;
/*
 *	Same as blkcmp() except the case of alphabetic characters is
 *	ignored during comparison.
 */
{
	register char c1, c2;

	while(((c1 = tolower(*blk1++)) == (c2 = tolower(*blk2++))) && (--len))
		;
	return(c1 - c2);
}

int strlen(string)
register char *string;
/*
 *	Returns the number of characters in a string, not including the
 *	terminating '\0'.
 */
{
	register int n = 0;

	while(*string++)
		++n;
	return(n);
}

char *strcpy(dest, source)
register char *dest;
register char *source;
/*
 *	Copies the <source> string to the <dest> including the '\0'.  A
 *	pointer to the start of <dest> is returned.
 */
{
	register char *p = dest;

	while(*dest++ = *source++)
		;
	return(p);
}

char *strncpy(dest, source, limit)
register char *dest;
register char *source;
register int limit;
/*
 *	Copies the <source> string to the <dest>.  At most, <limit>
 *	characters are copied.  If <source> ends before <limit> characters
 *	have been copied, the '\0' is copied, otherwise <dest> is not
 *	terminated by the copy.
 */
{
	register char *p = dest;

	while((limit--) && (*dest++ = *source++))
		;
	return(p);
}

char *strdup(string)
register char *string;
/*
 *	Create a copy of <string> and return a pointer to the copy.
 */
{
	register char *p;
	char *malloc();

	if(p = malloc(strlen(string) + 1))
		strcpy(p, string);
	return(p);
}

char *strcat(dest, source)
register char *dest;
register char *source;
/*
 *	Concatenate <source> on the end of <dest>.  The terminator of
 *	<dest> will be overwritten by the first character of <source>.
 *	The termintor from <source> will be copied.  A pointer to
 *	the modified <dest> is returned.
 */
{
	register char *p = dest;

	while(*dest)
		++dest;
	while(*dest++ = *source++)
		;
	return(p);
}

char *strncat(dest, source, limit)
register char *dest;
register char *source;
register int limit;
/*
 *	Concatenate <limit> characters from <source> onto <dest>.  If
 *	<source> contains less than <limit> characters, the length of
 *	source is used for <limit>.  The terminating '\0' is always
 *	added.  A pointer to <dest> is returned.
 */
{
	register char *p = dest;

	while(*dest)
		++dest;
	while((limit--) && (*dest++ = *source++))
		;
	*dest = '\0';
	return(p);
}

char *strupr(string)
register char *string;
/*
 *	Convert all alphabetic characters in <string> to upper case.
 */
{
	register char *p = string;

	while(*string) {
		if(islower(*string))
			*string ^= 0x20;
		++string;
	}
	return(p);
}

char *strlwr(string)
register char *string;
/*
 *	Convert all alphabetic characters in <string> to lower case.
 */
{
	register char *p = string;

	while(*string) {
		if(isupper(*string))
			*string ^= 0x20;
		++string;
	}
	return(p);
}

char *strrev(string)
char *string;
/*
 *	Reverse the order of the characters in <string> in place.
 */
{
	register char *p = string, *q, c;

	if(*(q = p)) {		/* non-empty string? */
		while(*++q)
			;
		while(--q > p) {
			c = *q;
			*q = *p;
			*p++ = c;
		}
	}
	return(string);
}

int strcmp(str1, str2)
register char *str1;
register char *str2;
/*
 *	Lexicographically compare the two strings.  Return a value
 *	indicating the relationship between the strings.  Possible
 *	return values are:
 *		negative	str1 < str2
 *		0		str1 == str2
 *		positive	str1 > str2
 */
{
	for(; *str1 == *str2; ++str1, ++str2)
		if(*str1 == '\0')
			return(0);
	return(*str1 - *str2);
}

int strncmp(str1, str2, limit)
register char *str1;
register char *str2;
register int limit;
/*
 *	Compare strings as with strcmp(), but limit comparison to the
 *	<limit> characters.
 */
{
	for(; ((--limit) && (*str1 == *str2)); ++str1, ++str2)
		if(*str1 == '\0')
			return(0);
	return(*str1 - *str2);
}

int stricmp(str1, str2)
register char *str1;
register char *str2;
/*
 *	Compare strings as with strcmp(), but ignore the case of any
 *	alphabetic characters.
 */
{
	register char c1, c2;

	while((c1 = tolower(*str1++)) == (c2 = tolower(*str2++)))
		if(c1 == '\0')
			return(0);
	return(c1 - c2);
}

int strnicmp(str1, str2, limit)
register char *str1;
register char *str2;
register int limit;
/*
 *	Compare strings as with strncmp(), but ignore the case of any
 *	alphabetic characters.
 */
{
	register char c1, c2;

	while(((c1 = tolower(*str1++)) == (c2 = tolower(*str2++))) && --limit)
		if(c1 == '\0')
			return(0);
	return(c1 - c2);
}

char *strchr(string, symbol)
register char *string;
register char symbol;
/*
 *	Return a pointer to the first occurance of <symbol> in <string>.
 *	NULL is returned if <symbol> is not found.
 */
{
	do {
		if(*string == symbol)
			return(string);
	} while(*string++);
	return(NULL);
}

/*
 *	The same function optimized for 68000 assembly.
 *
asm(".globl _strchr		");
asm("_strchr:			");
asm("	move.l	4(a7),a0	");
asm("	move.w	8(a7),d0	");
asm("strchr1:			");
asm("	cmp.b	(a0),d0		");
asm("	bne	strchr2		");
asm("	move.l	a0,d0		");
asm("	rts			");
asm("strchr2:			");
asm("	tst.b	(a0)+		");
asm("	bne	strchr1		");
asm("	clr.l	d0		");
asm("	rts			");
*/

char *strrchr(string, symbol)
register char *string;
register char symbol;
/*
 *	Return a pointer to the last occurance of <symbol> in <string>.
 *	NULL is returned if <symbol> is not found.
 */
{
	register char *p = string;

	while(*string++)
		;
	do {
		if(*--string == symbol)
			return(string);
	} while(string != p);
	return(NULL);
}

int strpos(string, symbol)
register char *string;
register char symbol;
/*
 *	Return the index of the first occurance of <symbol> in <string>.
 *	-1 is returned if <symbol> is not found.
 */
{
	register int i = 0;

	do {
		if(*string == symbol)
			return(i);
		++i;
	} while(*string++);
	return(-1);
}

int strrpos(string, symbol)
register char *string;
register char symbol;
/*
 *	Return the index of the last occurance of <symbol> in <string>.
 *	-1 is returned if <symbol> is not found.
 */
{
	register int i = 0;
	register char *p = string;

	while(*string++)
		++i;
	do {
		if(*--string == symbol)
			return(i);
		--i;
	} while(string != p);
	return(-1);
}

char *strstr(string, pattern)
register char *string;
register char *pattern;
/*
 *	Return a pointer to the first occurance of <pattern> in <string>.
 *	NULL is returned if <pattern> is not found.
 */
{
	register plen;

	plen = strlen(pattern);
	while(string = strchr(string, *pattern)) {
		if(strncmp(string, pattern, plen) == 0)
			return(string);
		++string;
	}
	return(NULL);
}

char *stristr(string, pattern)
register char *string;
register char *pattern;
/*
 *	Same as strstr(), but ignore the case of any alphabetic characters.
 */
{
	register plen;

	plen = strlen(pattern);
	while(*string) {
		if(strnicmp(string, pattern, plen) == 0)
			return(string);
		++string;
	}
	return(NULL);
}

int strspn(string, set)
register char *string;
register char *set;
/*
 *	Return the length of the sub-string of <string> that consists
 *	entirely of characters found in <set>.  The terminating '\0'
 *	in <set> is not considered part of the match set.  If the first
 *	character if <string> is not in <set>, 0 is returned.
 */
{
	register int n = 0;

	while(*string && strchr(set, *string++))
		++n;
	return(n);
}

int strcspn(string, set)
register char *string;
register char *set;
/*
 *	Return the length of the sub-string of <string> that consists
 *	entirely of characters not found in <set>.  The terminating '\0'
 *	in <set> is not considered part of the match set.  If the first
 *	character if <string> is in <set>, 0 is returned.
 */
{
	register int n = 0;

	while(*string && !strchr(set, *string++))
		++n;
	return(n);
}

char *strpbrk(string, set)
register char *string;
register char *set;
/*
 *	Return a pointer to the first occurance in <string> of any
 *	character in <set>.
 */
{
	while(*string) {
		if(strchr(set, *string))
			return(string);
		++string;
	}
	return(NULL);
}

char *strrpbrk(string, set)
register char *string;
register char *set;
/*
 *	Return a pointer to the last occurance in <string> of any
 *	character in <set>.
 */
{
	register char *p;

	p = strrchr(string, '\0');		/* start at EOS */
	while(string != p) {
		if(strchr(set, *--p))
			return(p);
	}
	return(NULL);
}

static	char	*_strtok = NULL;	/* local token pointer */

char *strtok(string, delim)
register char *string;
register char *delim;
/*
 *	Return a token from <string>.  If <string> in not NULL, it is
 *	the beginning of a string from which tokens are to be extracted.
 *	Characters found in <delim> are skipped over to find the start
 *	of a token, characters are then accumulated until a character in
 *	<delim> is found, or the terminator of <string> is reached.
 *	A pointer to the '\0' terminated token is then returned.  Note
 *	that this function modifies <string> (by inserting '\0's) in
 *	the process.  Subsequent calls to strtok() may specify NULL as
 *	the <string> argument, in which case subsequent tokens are
 *	returned, or NULL if there are no more tokens.
 */
{
	register char *p;

	if(string == NULL)
		string = _strtok;
	while(*string && strchr(delim, *string))
		++string;
	if(*string == '\0')		/* no more tokens */
		return(NULL);
	p = string;
	while(*string && !strchr(delim, *string))
		++string;
	if(*string != '\0')
		*string++ = '\0';
	_strtok = string;
	return(p);
}

char *strtrim(string, junk)
register char *string;
register char *junk;
/*
 *	Remove leading and trailing characters found in <junk>
 *	from <string>.  Return a pointer to the modified <string>.
 */
{
	register char *p = string, *q = string;

	while(*string && strchr(junk, *string))
		++string;
	while(*string && !strchr(junk, *string))
		*p++ = *string++;
	*p = '\0';
	return(q);
}

char *stradj(string, dir)
register char *string;
register int dir;
/*
 *	Adjust <string> by adding space if <dir> is positive, or removing
 *	space if <dir> is negative.  The magnitude of <dir> is the number
 *	of character positions to add or remove.  Characters are added or
 *	removed at the beginning of <string>.  A pointer to the modified
 *	<string> is returned.
 */
{
	register char *p = string, *q;

	if(dir == 0)
		return(string);
	if(dir > 0) {			/* add space */
		while(*p)			/* find end */
			++p;
		q = p + dir;			/* set gap */
		while(p >= string)		/* copy data */
			*q-- = *p--;
		while(q >= string) {		/* replace <nul>s */
			if(*q == '\0')
				*q = ' ';
			--q;
		}
	}
	else {				/* remove space */
		dir = -dir;
		q = p + dir;			/* set gap */
		while(*p++ = *q++)		/* copy data */
			;
	}
	return(string);
}

int strrpl(string, ptrn, rpl, n)
char *string;
char *ptrn;
register char *rpl;
register int n;
/*
 *	Replace at most <n> occurances of <ptrn> in <string> with <rpl>.
 *	If <n> is -1, replace all.  Return the number of replacments.
 */
{
	register char *p, *q = string;
	register int d, rlen, nn = 0;

	rlen = strlen(rpl);
	d = rlen - strlen(ptrn);
	while(n && (p = strstr(q, ptrn))) {
		++nn;
		stradj(p, d);
		strncpy(p, rpl, rlen);
		q = p + rlen;
		if(n > 0)
			--n;
	}
	return(nn);
}

int strirpl(string, ptrn, rpl, n)
char *string;
char *ptrn;
register char *rpl;
register int n;
/*
 *	Same as strrpl() except ignores case of alphabetic characters.
 */
{
	register char *p, *q = string;
	register int d, rlen, nn = 0;

	rlen = strlen(rpl);
	d = rlen - strlen(ptrn);
	while(n && (p = stristr(q, ptrn))) {
		++nn;
		stradj(p, d);
		strncpy(p, rpl, rlen);
		q = p + rlen;
		if(n > 0)
			--n;
	}
	return(nn);
}

--------------------------------CUT-HERE--------------------------------

                Dale Schumacher
                ..ihnp4!meccts!stag!syntel!dal