[comp.sources.bugs] scandir

rsalz@bbn.com (Rich Salz) (03/30/91)

The following unofficial patch to NNTP1.5.11 speeds up the scandir()
routine and removes the reliance on the low-water mark in the active
file.  That should make C News-users happier.  It uses the dynamic array
technique more efficiently; non-dynamic is no longer an option.

You should also get rid of MAX_ARTICLES in common/conf.h

You can also get more performance by changing this line in find_group:
	if ((cond = strncmp(kludgebuf, group_array[mid], length)) < 0)
to something like this:
	if ((cond = kludgebuf[0] - group_array[mid]) == 0)
	    cond = strncmp(kludgebuf, group_array[mid], length);
	if (cond < 0)
I sent the scandir() replacement to Stan last week.  I hope it makes it
into 1.6.  I did not send the cond code just above.  There's also an
improvement to group.c that is new.

Enjoy,
	/r$


diff -c .old/active.c ./active.c
*** .old/active.c	Fri Mar 29 11:47:43 1991
--- ./active.c	Fri Mar 29 11:48:29 1991
***************
*** 130,138 ****
   *			we're searching for.
   *			"num_groups" is the total number
   *			of groups in the group array.
-  *			"low_msg" and "high_msg" are
-  *			pointers to where we're supposed
-  *			to put the low and high message numbers.
   *
   *	Returns:	0 if all goes well,
   *			-1 if we can't find the group.
--- 130,135 ----
***************
*** 140,149 ****
   *	Side effects:	None.
   */
  
! find_group(group, num_groups, low_msg, high_msg)
  	char		*group;
  	int		num_groups;
- 	int		*low_msg, *high_msg;
  {
  	char		kludgebuf[MAXBUFLEN];
  	int		cond;
--- 137,145 ----
   *	Side effects:	None.
   */
  
! find_group(group, num_groups)
  	char		*group;
  	int		num_groups;
  {
  	char		kludgebuf[MAXBUFLEN];
  	int		cond;
***************
*** 163,170 ****
  		else if (cond > 0)
  			low = mid + 1;
  		else {
- 			(void) sscanf(group_array[mid], "%s %d %d",
- 				kludgebuf, high_msg, low_msg);
  			return(0);
  		}
  	}
--- 159,164 ----
diff -c .old/common.h ./common.h
*** .old/common.h	Fri Mar 29 11:19:49 1991
--- ./common.h	Fri Mar 29 11:45:39 1991
***************
*** 170,181 ****
  extern	char	*homedir;
  extern	int	ingroup;
  extern	int	maxgroups;
- #ifdef DYNAMIC_ART_ARRAY
  extern	int	*art_array;
- extern	unsigned int size_art_array;
- #else
- extern	int	art_array[];
- #endif
  extern	int	art_ptr;
  extern	FILE	*art_fp;
  extern	int	num_arts;
--- 170,176 ----
diff -c .old/globals.c ./globals.c
*** .old/globals.c	Fri Mar 29 11:19:49 1991
--- ./globals.c	Fri Mar 29 11:46:02 1991
***************
*** 36,47 ****
  int	ingroup = 0;
  int	art_ptr;
  int	num_arts;
- #ifdef DYNAMIC_ART_ARRAY
  int	*art_array = 0;		/* dynamic array */
- unsigned int size_art_array = 0;	/* current size of art_array */
- #else
- int	art_array[MAX_ARTICLES];
- #endif
  FILE	*art_fp;
  int	uid_poster, gid_poster;
  char	*home_poster;
--- 36,42 ----
diff -c .old/group.c ./group.c
*** .old/group.c	Fri Mar 29 11:47:43 1991
--- ./group.c	Fri Mar 29 11:49:55 1991
***************
*** 19,25 ****
  	char	*argv[];
  {
  	char	temp_dir[256];
- 	int	high_msg, low_msg;
  	char	*cp;
  	char	*reqlist[2];
  
--- 19,24 ----
***************
*** 42,48 ****
  		return;
  	}
  
! 	if (find_group(argv[1], num_groups, &low_msg, &high_msg) < 0) {
  		printf("%d Invalid group name (not in active).\r\n",
  			ERR_NOGROUP);
  		(void) fflush(stdout);
--- 41,47 ----
  		return;
  	}
  
! 	if (find_group(argv[1], num_groups) < 0) {
  		printf("%d Invalid group name (not in active).\r\n",
  			ERR_NOGROUP);
  		(void) fflush(stdout);
***************
*** 74,82 ****
  	syslog(LOG_INFO, "%s group %s", hostname, argv[1]);
  #endif
  
! 	while ((cp = index(argv[1], '.')) != (char *) NULL)
! 		*cp = '/';
! 
  	(void) strcpy(temp_dir, spooldir);
  	(void) strcat(temp_dir, "/");
  	(void) strcat(temp_dir, argv[1]);
--- 73,80 ----
  	syslog(LOG_INFO, "%s group %s", hostname, argv[1]);
  #endif
  
! 	for (cp = argv[1]; (cp = index(cp, '.')) != NULL; )
! 	    *cp++ = '/';
  	(void) strcpy(temp_dir, spooldir);
  	(void) strcat(temp_dir, "/");
  	(void) strcat(temp_dir, argv[1]);
***************
*** 92,104 ****
  	++grps_acsd;
  #endif
  
! 	num_arts = scan_dir(low_msg, high_msg);
  	art_ptr = 0;
- 
  	ingroup = 1;
! 
! 	while ((cp = index(argv[1], '/')) != (char *) NULL)
! 		*cp = '.';
  
  	printf("%d %d %d %d %s\r\n",
  		OK_GROUP,
--- 90,100 ----
  	++grps_acsd;
  #endif
  
! 	num_arts = scan_dir();
  	art_ptr = 0;
  	ingroup = 1;
! 	for (cp = argv[1]; (cp = index(cp, '/')) != NULL; )
! 	    *cp++ = '.';
  
  	printf("%d %d %d %d %s\r\n",
  		OK_GROUP,
diff -c .old/scandir.c ./scandir.c
*** .old/scandir.c	Fri Mar 29 11:19:50 1991
--- ./scandir.c	Fri Mar 29 11:47:30 1991
***************
*** 4,103 ****
  
  #include "common.h"
  
- /*
-  * scan_dir -- scan the current directory for news articles,
-  *	loading the article numbers into art_array.  Return
-  *	number of articles loaded.
-  *
-  *	Parameters:	"low_msg", "high_msg" are the low
-  *			and high messages numbers in this
-  *			group; we ignore numbers outside this
-  *			range.
-  *
-  *	Returns:	Number of articles loaded into
-  *			array.
-  *
-  *	Side effects:	Changes "art_array".
-  */
  
! extern	int	intcmp();
! extern char *malloc(), *realloc();
! 
! scan_dir(low_msg, high_msg)
! int	low_msg, high_msg;
  {
! 	register struct direct	*dirent;
! 	register DIR		*dirp;
! 	int			artnum;
  
! 	num_arts = 0;
  
- 	dirp = opendir(".");
  
! 	if (dirp == NULL)
! 		return (0);
  
! 	while ((dirent = readdir(dirp)) != NULL) {
! 		artnum = atoi(dirent->d_name);
! #ifdef DYNAMIC_ART_ARRAY
! 		if (artnum == 0 || artnum < low_msg || artnum > high_msg)
! 			continue;
! 		/* Expand/allocate art_array elements as necessary */
! 		if (num_arts + 1 >= size_art_array) {
! 			size_art_array += 1024;
! 			if (art_array) {
! #ifdef SYSLOG
! 				syslog(LOG_INFO,
! 				    "increasing art_array to %d elements",
! 				    size_art_array);
! #endif
! 				art_array = (int *)realloc(art_array,
! 				    size_art_array * sizeof(*art_array));
! 			} else
! 				art_array = (int *)
! 				    malloc(size_art_array * sizeof(*art_array));
! 			if (art_array == 0) {
! #ifdef SYSLOG
! 				syslog(LOG_ERR,
! 				    "scan_dir(): malloc/realloc failed");
! #endif
! 				num_arts = 0;
! 				size_art_array = 0;
! 				size_art_array = 0;
! 				closedir(dirp);
! 				return(0);
! 			}
! 		}
! 		art_array[num_arts] = artnum;
!  		++num_arts;
! #else
! 		if (artnum != 0 && artnum >= low_msg && artnum <= high_msg)
! 			art_array[num_arts++] = artnum;
! #endif
  
! 	}
! 	closedir(dirp);
  
! 	qsort((char *) art_array, num_arts, sizeof(int), intcmp);
  
! 	return (num_arts);
! }
! 
! 
! /*
!  * intcmp -- compare to integers.
!  *
!  *	Parameters:	"x", "y" point to the integers to be compared.
!  *
!  *	Returns:	-1 if "x" is less than "y",
!  *			0 if "x" equals "y", and
!  *			1 if "x" is greater than "y".
!  *
!  *	Side effects:	None.
!  */
! 
! intcmp(x, y)
! register int	*x, *y;
! {
! 	return (*x - *y);
  }
--- 4,84 ----
  
  #include "common.h"
  
  
! /*
! **  Compare two integers for qsort.
! */
! static int
! intcmp(p1, p2)
!     char	*p1;
!     char	*p2;
  {
!     int		*i1;
!     int		*i2;
  
!     i1 = (int *)p1;
!     i2 = (int *)p2;
!     return *i1 - *i2;
! }
  
  
! /*
! **  Fill in art_array with article numbers.
! */
! int
! scan_dir()
! {
!     static int size_art_array;
!     DIR *dirp;
!     struct dirent *entry;
!     struct stat sb;
!     int i;
  
!     num_arts = 0;
  
!     /* Estimate size needed by dividing by near-minimum size of an entry.
!      * Idea taken from the freely-redistributable BSD scandir() routine. */
!     if (stat(".", &sb) < 0)
! 	return 0;
!     i = sb.st_size / 24;
!     if (art_array == NULL) {
! 	size_art_array = i;
! 	art_array = (int *)malloc(size_art_array * sizeof *art_array);
!     }
!     else if (size_art_array < i) {
! 	size_art_array = i;
! 	art_array = (int *)realloc(art_array,
! 			    size_art_array * sizeof *art_array);
!     }
!     if (art_array == NULL) {
! 	syslog(LOG_ERR, "scan_dir:  malloc/realloc failed");
! 	size_art_array = 0;
! 	return 0;
!     }
  
!     /* Open the directory. */
!     if ((dirp = opendir(".")) == NULL)
! 	return 0;
  
!     /* Scan for entries. */
!     while ((entry = readdir(dirp)) != NULL) {
! 	if (!isdigit(entry->d_name[0]))
! 	    continue;
! 	if (num_arts >= size_art_array - 1) {
! 	    /* Directory grew, get more space. */
! 	    size_art_array += 100;
! 	    art_array = (int *)realloc(art_array,
! 				size_art_array * sizeof *art_array);
! 	    if (art_array == NULL) {
! 		syslog(LOG_ERR, "scan_dir: realloc failed");
! 		num_arts = 0;
! 		size_art_array = 0;
! 		return 0;
! 	    }
! 	}
! 	art_array[num_arts++] = atoi(entry->d_name);
!     }
!     closedir(dirp);
!     qsort((char *)art_array, num_arts, sizeof *art_array, intcmp);
!     return num_arts;
  }
-- 
Please send comp.sources.unix-related mail to rsalz@uunet.uu.net.
Use a domain-based address or give alternate paths, or you may lose out.